summaryrefslogtreecommitdiffstats
path: root/block/blk-mq.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-04 01:50:31 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-04 01:50:31 +0200
commit03ffbcdd7898c0b5299efeb9f18de927487ec1cf (patch)
tree0569222e4dc9db22049d7d8d15920cc085a194f6 /block/blk-mq.c
parentMerge branch 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/ke... (diff)
parentMerge tag 'irqchip-4.13-2' of git://git.kernel.org/pub/scm/linux/kernel/git/m... (diff)
downloadlinux-03ffbcdd7898c0b5299efeb9f18de927487ec1cf.tar.xz
linux-03ffbcdd7898c0b5299efeb9f18de927487ec1cf.zip
Merge branch 'irq-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull irq updates from Thomas Gleixner: "The irq department delivers: - Expand the generic infrastructure handling the irq migration on CPU hotplug and convert X86 over to it. (Thomas Gleixner) Aside of consolidating code this is a preparatory change for: - Finalizing the affinity management for multi-queue devices. The main change here is to shut down interrupts which are affine to a outgoing CPU and reenabling them when the CPU comes online again. That avoids moving interrupts pointlessly around and breaking and reestablishing affinities for no value. (Christoph Hellwig) Note: This contains also the BLOCK-MQ and NVME changes which depend on the rework of the irq core infrastructure. Jens acked them and agreed that they should go with the irq changes. - Consolidation of irq domain code (Marc Zyngier) - State tracking consolidation in the core code (Jeffy Chen) - Add debug infrastructure for hierarchical irq domains (Thomas Gleixner) - Infrastructure enhancement for managing generic interrupt chips via devmem (Bartosz Golaszewski) - Constification work all over the place (Tobias Klauser) - Two new interrupt controller drivers for MVEBU (Thomas Petazzoni) - The usual set of fixes, updates and enhancements all over the place" * 'irq-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (112 commits) irqchip/or1k-pic: Fix interrupt acknowledgement irqchip/irq-mvebu-gicp: Allocate enough memory for spi_bitmap irqchip/gic-v3: Fix out-of-bound access in gic_set_affinity nvme: Allocate queues for all possible CPUs blk-mq: Create hctx for each present CPU blk-mq: Include all present CPUs in the default queue mapping genirq: Avoid unnecessary low level irq function calls genirq: Set irq masked state when initializing irq_desc genirq/timings: Add infrastructure for estimating the next interrupt arrival time genirq/timings: Add infrastructure to track the interrupt timings genirq/debugfs: Remove pointless NULL pointer check irqchip/gic-v3-its: Don't assume GICv3 hardware supports 16bit INTID irqchip/gic-v3-its: Add ACPI NUMA node mapping irqchip/gic-v3-its-platform-msi: Make of_device_ids const irqchip/gic-v3-its: Make of_device_ids const irqchip/irq-mvebu-icu: Add new driver for Marvell ICU irqchip/irq-mvebu-gicp: Add new driver for Marvell GICP dt-bindings/interrupt-controller: Add DT binding for the Marvell ICU genirq/irqdomain: Remove auto-recursive hierarchy support irqchip/MSI: Use irq_domain_update_bus_token instead of an open coded access ...
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r--block/blk-mq.c120
1 files changed, 11 insertions, 109 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ced2b000ca02..6cef42f419a5 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -37,9 +37,6 @@
#include "blk-wbt.h"
#include "blk-mq-sched.h"
-static DEFINE_MUTEX(all_q_mutex);
-static LIST_HEAD(all_q_list);
-
static void blk_mq_poll_stats_start(struct request_queue *q);
static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
@@ -1984,8 +1981,8 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
INIT_LIST_HEAD(&__ctx->rq_list);
__ctx->queue = q;
- /* If the cpu isn't online, the cpu is mapped to first hctx */
- if (!cpu_online(i))
+ /* If the cpu isn't present, the cpu is mapped to first hctx */
+ if (!cpu_present(i))
continue;
hctx = blk_mq_map_queue(q, i);
@@ -2028,8 +2025,7 @@ static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set,
}
}
-static void blk_mq_map_swqueue(struct request_queue *q,
- const struct cpumask *online_mask)
+static void blk_mq_map_swqueue(struct request_queue *q)
{
unsigned int i, hctx_idx;
struct blk_mq_hw_ctx *hctx;
@@ -2047,13 +2043,11 @@ static void blk_mq_map_swqueue(struct request_queue *q,
}
/*
- * Map software to hardware queues
+ * Map software to hardware queues.
+ *
+ * If the cpu isn't present, the cpu is mapped to first hctx.
*/
- for_each_possible_cpu(i) {
- /* If the cpu isn't online, the cpu is mapped to first hctx */
- if (!cpumask_test_cpu(i, online_mask))
- continue;
-
+ for_each_present_cpu(i) {
hctx_idx = q->mq_map[i];
/* unmapped hw queue can be remapped after CPU topo changed */
if (!set->tags[hctx_idx] &&
@@ -2363,16 +2357,8 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
blk_queue_softirq_done(q, set->ops->complete);
blk_mq_init_cpu_queues(q, set->nr_hw_queues);
-
- get_online_cpus();
- mutex_lock(&all_q_mutex);
-
- list_add_tail(&q->all_q_node, &all_q_list);
blk_mq_add_queue_tag_set(set, q);
- blk_mq_map_swqueue(q, cpu_online_mask);
-
- mutex_unlock(&all_q_mutex);
- put_online_cpus();
+ blk_mq_map_swqueue(q);
if (!(set->flags & BLK_MQ_F_NO_SCHED)) {
int ret;
@@ -2398,18 +2384,12 @@ void blk_mq_free_queue(struct request_queue *q)
{
struct blk_mq_tag_set *set = q->tag_set;
- mutex_lock(&all_q_mutex);
- list_del_init(&q->all_q_node);
- mutex_unlock(&all_q_mutex);
-
blk_mq_del_queue_tag_set(q);
-
blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
}
/* Basically redo blk_mq_init_queue with queue frozen */
-static void blk_mq_queue_reinit(struct request_queue *q,
- const struct cpumask *online_mask)
+static void blk_mq_queue_reinit(struct request_queue *q)
{
WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth));
@@ -2422,76 +2402,12 @@ static void blk_mq_queue_reinit(struct request_queue *q,
* involves free and re-allocate memory, worthy doing?)
*/
- blk_mq_map_swqueue(q, online_mask);
+ blk_mq_map_swqueue(q);
blk_mq_sysfs_register(q);
blk_mq_debugfs_register_hctxs(q);
}
-/*
- * New online cpumask which is going to be set in this hotplug event.
- * Declare this cpumasks as global as cpu-hotplug operation is invoked
- * one-by-one and dynamically allocating this could result in a failure.
- */
-static struct cpumask cpuhp_online_new;
-
-static void blk_mq_queue_reinit_work(void)
-{
- struct request_queue *q;
-
- mutex_lock(&all_q_mutex);
- /*
- * We need to freeze and reinit all existing queues. Freezing
- * involves synchronous wait for an RCU grace period and doing it
- * one by one may take a long time. Start freezing all queues in
- * one swoop and then wait for the completions so that freezing can
- * take place in parallel.
- */
- list_for_each_entry(q, &all_q_list, all_q_node)
- blk_freeze_queue_start(q);
- list_for_each_entry(q, &all_q_list, all_q_node)
- blk_mq_freeze_queue_wait(q);
-
- list_for_each_entry(q, &all_q_list, all_q_node)
- blk_mq_queue_reinit(q, &cpuhp_online_new);
-
- list_for_each_entry(q, &all_q_list, all_q_node)
- blk_mq_unfreeze_queue(q);
-
- mutex_unlock(&all_q_mutex);
-}
-
-static int blk_mq_queue_reinit_dead(unsigned int cpu)
-{
- cpumask_copy(&cpuhp_online_new, cpu_online_mask);
- blk_mq_queue_reinit_work();
- return 0;
-}
-
-/*
- * Before hotadded cpu starts handling requests, new mappings must be
- * established. Otherwise, these requests in hw queue might never be
- * dispatched.
- *
- * For example, there is a single hw queue (hctx) and two CPU queues (ctx0
- * for CPU0, and ctx1 for CPU1).
- *
- * Now CPU1 is just onlined and a request is inserted into ctx1->rq_list
- * and set bit0 in pending bitmap as ctx1->index_hw is still zero.
- *
- * And then while running hw queue, blk_mq_flush_busy_ctxs() finds bit0 is set
- * in pending bitmap and tries to retrieve requests in hctx->ctxs[0]->rq_list.
- * But htx->ctxs[0] is a pointer to ctx0, so the request in ctx1->rq_list is
- * ignored.
- */
-static int blk_mq_queue_reinit_prepare(unsigned int cpu)
-{
- cpumask_copy(&cpuhp_online_new, cpu_online_mask);
- cpumask_set_cpu(cpu, &cpuhp_online_new);
- blk_mq_queue_reinit_work();
- return 0;
-}
-
static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
{
int i;
@@ -2702,7 +2618,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
blk_mq_update_queue_map(set);
list_for_each_entry(q, &set->tag_list, tag_set_list) {
blk_mq_realloc_hw_ctxs(set, q);
- blk_mq_queue_reinit(q, cpu_online_mask);
+ blk_mq_queue_reinit(q);
}
list_for_each_entry(q, &set->tag_list, tag_set_list)
@@ -2918,24 +2834,10 @@ bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie)
}
EXPORT_SYMBOL_GPL(blk_mq_poll);
-void blk_mq_disable_hotplug(void)
-{
- mutex_lock(&all_q_mutex);
-}
-
-void blk_mq_enable_hotplug(void)
-{
- mutex_unlock(&all_q_mutex);
-}
-
static int __init blk_mq_init(void)
{
cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL,
blk_mq_hctx_notify_dead);
-
- cpuhp_setup_state_nocalls(CPUHP_BLK_MQ_PREPARE, "block/mq:prepare",
- blk_mq_queue_reinit_prepare,
- blk_mq_queue_reinit_dead);
return 0;
}
subsys_initcall(blk_mq_init);