diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/Kconfig | 5 | ||||
-rw-r--r-- | block/Makefile | 1 | ||||
-rw-r--r-- | block/blk-cgroup.c | 1 | ||||
-rw-r--r-- | block/blk-core.c | 28 | ||||
-rw-r--r-- | block/blk-ioc.c | 45 | ||||
-rw-r--r-- | block/blk-map.c | 1 | ||||
-rw-r--r-- | block/blk-mq-sched.c | 16 | ||||
-rw-r--r-- | block/blk-mq-sysfs.c | 40 | ||||
-rw-r--r-- | block/blk-mq-tag.c | 2 | ||||
-rw-r--r-- | block/blk-mq-tag.h | 6 | ||||
-rw-r--r-- | block/blk-mq-virtio.c | 54 | ||||
-rw-r--r-- | block/blk-mq.c | 150 | ||||
-rw-r--r-- | block/blk-mq.h | 12 | ||||
-rw-r--r-- | block/blk-softirq.c | 1 | ||||
-rw-r--r-- | block/blk-sysfs.c | 2 | ||||
-rw-r--r-- | block/cfq-iosched.c | 1 | ||||
-rw-r--r-- | block/elevator.c | 2 | ||||
-rw-r--r-- | block/genhd.c | 32 | ||||
-rw-r--r-- | block/ioprio.c | 3 | ||||
-rw-r--r-- | block/sed-opal.c | 10 |
20 files changed, 273 insertions, 139 deletions
diff --git a/block/Kconfig b/block/Kconfig index a2a92e57a87d..e9f780f815f5 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -189,4 +189,9 @@ config BLK_MQ_PCI depends on BLOCK && PCI default y +config BLK_MQ_VIRTIO + bool + depends on BLOCK && VIRTIO + default y + source block/Kconfig.iosched diff --git a/block/Makefile b/block/Makefile index 2ad7c304e3f5..081bb680789b 100644 --- a/block/Makefile +++ b/block/Makefile @@ -25,6 +25,7 @@ obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o +obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o obj-$(CONFIG_BLK_WBT) += blk-wbt.o obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 295e98c2c8cc..bbe7ee00bd3d 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -17,6 +17,7 @@ #include <linux/ioprio.h> #include <linux/kdev_t.h> #include <linux/module.h> +#include <linux/sched/signal.h> #include <linux/err.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> diff --git a/block/blk-core.c b/block/blk-core.c index b9e857f4afe8..0eeb99ef654f 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -578,9 +578,6 @@ void blk_cleanup_queue(struct request_queue *q) q->queue_lock = &q->__queue_lock; spin_unlock_irq(lock); - bdi_unregister(q->backing_dev_info); - put_disk_devt(q->disk_devt); - /* @q is and will stay empty, shutdown and put */ blk_put_queue(q); } @@ -2018,17 +2015,34 @@ blk_qc_t generic_make_request(struct bio *bio) struct request_queue *q = bdev_get_queue(bio->bi_bdev); if (likely(blk_queue_enter(q, false) == 0)) { + struct bio_list hold; + struct bio_list lower, same; + + /* Create a fresh bio_list for all subordinate requests */ + hold = bio_list_on_stack; + bio_list_init(&bio_list_on_stack); ret = q->make_request_fn(q, bio); blk_queue_exit(q); - bio = bio_list_pop(current->bio_list); + /* sort new bios into those for a lower level + * and those for the same level + */ + bio_list_init(&lower); + bio_list_init(&same); + while ((bio = bio_list_pop(&bio_list_on_stack)) != NULL) + if (q == bdev_get_queue(bio->bi_bdev)) + bio_list_add(&same, bio); + else + bio_list_add(&lower, bio); + /* now assemble so we handle the lowest level first */ + bio_list_merge(&bio_list_on_stack, &lower); + bio_list_merge(&bio_list_on_stack, &same); + bio_list_merge(&bio_list_on_stack, &hold); } else { - struct bio *bio_next = bio_list_pop(current->bio_list); - bio_io_error(bio); - bio = bio_next; } + bio = bio_list_pop(current->bio_list); } while (bio); current->bio_list = NULL; /* deactivate */ diff --git a/block/blk-ioc.c b/block/blk-ioc.c index b12f9c87b4c3..63898d229cb9 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -7,6 +7,7 @@ #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/slab.h> +#include <linux/sched/task.h> #include "blk.h" @@ -36,8 +37,8 @@ static void icq_free_icq_rcu(struct rcu_head *head) } /* - * Exit an icq. Called with both ioc and q locked for sq, only ioc locked for - * mq. + * Exit an icq. Called with ioc locked for blk-mq, and with both ioc + * and queue locked for legacy. */ static void ioc_exit_icq(struct io_cq *icq) { @@ -54,7 +55,10 @@ static void ioc_exit_icq(struct io_cq *icq) icq->flags |= ICQ_EXITED; } -/* Release an icq. Called with both ioc and q locked. */ +/* + * Release an icq. Called with ioc locked for blk-mq, and with both ioc + * and queue locked for legacy. + */ static void ioc_destroy_icq(struct io_cq *icq) { struct io_context *ioc = icq->ioc; @@ -62,7 +66,6 @@ static void ioc_destroy_icq(struct io_cq *icq) struct elevator_type *et = q->elevator->type; lockdep_assert_held(&ioc->lock); - lockdep_assert_held(q->queue_lock); radix_tree_delete(&ioc->icq_tree, icq->q->id); hlist_del_init(&icq->ioc_node); @@ -222,24 +225,40 @@ void exit_io_context(struct task_struct *task) put_io_context_active(ioc); } +static void __ioc_clear_queue(struct list_head *icq_list) +{ + unsigned long flags; + + while (!list_empty(icq_list)) { + struct io_cq *icq = list_entry(icq_list->next, + struct io_cq, q_node); + struct io_context *ioc = icq->ioc; + + spin_lock_irqsave(&ioc->lock, flags); + ioc_destroy_icq(icq); + spin_unlock_irqrestore(&ioc->lock, flags); + } +} + /** * ioc_clear_queue - break any ioc association with the specified queue * @q: request_queue being cleared * - * Walk @q->icq_list and exit all io_cq's. Must be called with @q locked. + * Walk @q->icq_list and exit all io_cq's. */ void ioc_clear_queue(struct request_queue *q) { - lockdep_assert_held(q->queue_lock); + LIST_HEAD(icq_list); - while (!list_empty(&q->icq_list)) { - struct io_cq *icq = list_entry(q->icq_list.next, - struct io_cq, q_node); - struct io_context *ioc = icq->ioc; + spin_lock_irq(q->queue_lock); + list_splice_init(&q->icq_list, &icq_list); - spin_lock(&ioc->lock); - ioc_destroy_icq(icq); - spin_unlock(&ioc->lock); + if (q->mq_ops) { + spin_unlock_irq(q->queue_lock); + __ioc_clear_queue(&icq_list); + } else { + __ioc_clear_queue(&icq_list); + spin_unlock_irq(q->queue_lock); } } diff --git a/block/blk-map.c b/block/blk-map.c index 2f18c2a0be1b..3b5cb863318f 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -2,6 +2,7 @@ * Functions related to mapping data to requests */ #include <linux/kernel.h> +#include <linux/sched/task_stack.h> #include <linux/module.h> #include <linux/bio.h> #include <linux/blkdev.h> diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 98c7b061781e..09af8ff18719 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -110,15 +110,14 @@ struct request *blk_mq_sched_get_request(struct request_queue *q, struct blk_mq_alloc_data *data) { struct elevator_queue *e = q->elevator; - struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *ctx; struct request *rq; blk_queue_enter_live(q); - ctx = blk_mq_get_ctx(q); - hctx = blk_mq_map_queue(q, ctx->cpu); - - blk_mq_set_alloc_data(data, q, data->flags, ctx, hctx); + data->q = q; + if (likely(!data->ctx)) + data->ctx = blk_mq_get_ctx(q); + if (likely(!data->hctx)) + data->hctx = blk_mq_map_queue(q, data->ctx->cpu); if (e) { data->flags |= BLK_MQ_REQ_INTERNAL; @@ -135,8 +134,6 @@ struct request *blk_mq_sched_get_request(struct request_queue *q, rq = __blk_mq_alloc_request(data, op); } else { rq = __blk_mq_alloc_request(data, op); - if (rq) - data->hctx->tags->rqs[rq->tag] = rq; } if (rq) { @@ -454,7 +451,8 @@ int blk_mq_sched_setup(struct request_queue *q) */ ret = 0; queue_for_each_hw_ctx(q, hctx, i) { - hctx->sched_tags = blk_mq_alloc_rq_map(set, i, q->nr_requests, 0); + hctx->sched_tags = blk_mq_alloc_rq_map(set, i, + q->nr_requests, set->reserved_tags); if (!hctx->sched_tags) { ret = -ENOMEM; break; diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 295e69670c39..d745ab81033a 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -17,6 +17,15 @@ static void blk_mq_sysfs_release(struct kobject *kobj) { } +static void blk_mq_hw_sysfs_release(struct kobject *kobj) +{ + struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx, + kobj); + free_cpumask_var(hctx->cpumask); + kfree(hctx->ctxs); + kfree(hctx); +} + struct blk_mq_ctx_sysfs_entry { struct attribute attr; ssize_t (*show)(struct blk_mq_ctx *, char *); @@ -200,7 +209,7 @@ static struct kobj_type blk_mq_ctx_ktype = { static struct kobj_type blk_mq_hw_ktype = { .sysfs_ops = &blk_mq_hw_sysfs_ops, .default_attrs = default_hw_ctx_attrs, - .release = blk_mq_sysfs_release, + .release = blk_mq_hw_sysfs_release, }; static void blk_mq_unregister_hctx(struct blk_mq_hw_ctx *hctx) @@ -242,24 +251,15 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx) static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q) { struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *ctx; - int i, j; + int i; - queue_for_each_hw_ctx(q, hctx, i) { + queue_for_each_hw_ctx(q, hctx, i) blk_mq_unregister_hctx(hctx); - hctx_for_each_ctx(hctx, ctx, j) - kobject_put(&ctx->kobj); - - kobject_put(&hctx->kobj); - } - blk_mq_debugfs_unregister_hctxs(q); kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); kobject_del(&q->mq_kobj); - kobject_put(&q->mq_kobj); - kobject_put(&dev->kobj); q->mq_sysfs_init_done = false; @@ -277,7 +277,19 @@ void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx) kobject_init(&hctx->kobj, &blk_mq_hw_ktype); } -static void blk_mq_sysfs_init(struct request_queue *q) +void blk_mq_sysfs_deinit(struct request_queue *q) +{ + struct blk_mq_ctx *ctx; + int cpu; + + for_each_possible_cpu(cpu) { + ctx = per_cpu_ptr(q->queue_ctx, cpu); + kobject_put(&ctx->kobj); + } + kobject_put(&q->mq_kobj); +} + +void blk_mq_sysfs_init(struct request_queue *q) { struct blk_mq_ctx *ctx; int cpu; @@ -297,8 +309,6 @@ int blk_mq_register_dev(struct device *dev, struct request_queue *q) blk_mq_disable_hotplug(); - blk_mq_sysfs_init(q); - ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq"); if (ret < 0) goto out; diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 54c84363c1b2..e48bc2c72615 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -181,7 +181,7 @@ found_tag: void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, unsigned int tag) { - if (tag >= tags->nr_reserved_tags) { + if (!blk_mq_tag_is_reserved(tags, tag)) { const int real_tag = tag - tags->nr_reserved_tags; BUG_ON(real_tag >= tags->nr_tags); diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 63497423c5cd..5cb51e53cc03 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -85,4 +85,10 @@ static inline void blk_mq_tag_set_rq(struct blk_mq_hw_ctx *hctx, hctx->tags->rqs[tag] = rq; } +static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags, + unsigned int tag) +{ + return tag < tags->nr_reserved_tags; +} + #endif diff --git a/block/blk-mq-virtio.c b/block/blk-mq-virtio.c new file mode 100644 index 000000000000..c3afbca11299 --- /dev/null +++ b/block/blk-mq-virtio.c @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016 Christoph Hellwig. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#include <linux/device.h> +#include <linux/blk-mq.h> +#include <linux/blk-mq-virtio.h> +#include <linux/virtio_config.h> +#include <linux/module.h> +#include "blk-mq.h" + +/** + * blk_mq_virtio_map_queues - provide a default queue mapping for virtio device + * @set: tagset to provide the mapping for + * @vdev: virtio device associated with @set. + * @first_vec: first interrupt vectors to use for queues (usually 0) + * + * This function assumes the virtio device @vdev has at least as many available + * interrupt vetors as @set has queues. It will then queuery the vector + * corresponding to each queue for it's affinity mask and built queue mapping + * that maps a queue to the CPUs that have irq affinity for the corresponding + * vector. + */ +int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set, + struct virtio_device *vdev, int first_vec) +{ + const struct cpumask *mask; + unsigned int queue, cpu; + + if (!vdev->config->get_vq_affinity) + goto fallback; + + for (queue = 0; queue < set->nr_hw_queues; queue++) { + mask = vdev->config->get_vq_affinity(vdev, first_vec + queue); + if (!mask) + goto fallback; + + for_each_cpu(cpu, mask) + set->mq_map[cpu] = queue; + } + + return 0; +fallback: + return blk_mq_map_queues(set); +} +EXPORT_SYMBOL_GPL(blk_mq_virtio_map_queues); diff --git a/block/blk-mq.c b/block/blk-mq.c index 9e6b064e5339..159187a28d66 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -20,6 +20,8 @@ #include <linux/cpu.h> #include <linux/cache.h> #include <linux/sched/sysctl.h> +#include <linux/sched/topology.h> +#include <linux/sched/signal.h> #include <linux/delay.h> #include <linux/crash_dump.h> #include <linux/prefetch.h> @@ -75,10 +77,20 @@ void blk_mq_freeze_queue_start(struct request_queue *q) } EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start); -static void blk_mq_freeze_queue_wait(struct request_queue *q) +void blk_mq_freeze_queue_wait(struct request_queue *q) { wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter)); } +EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait); + +int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, + unsigned long timeout) +{ + return wait_event_timeout(q->mq_freeze_wq, + percpu_ref_is_zero(&q->q_usage_counter), + timeout); +} +EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait_timeout); /* * Guarantee no request is in use, so we can change any data structure of @@ -234,6 +246,7 @@ struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data, } rq->tag = tag; rq->internal_tag = -1; + data->hctx->tags->rqs[rq->tag] = rq; } blk_mq_rq_ctx_init(data->q, data->ctx, rq, op); @@ -273,10 +286,9 @@ EXPORT_SYMBOL(blk_mq_alloc_request); struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw, unsigned int flags, unsigned int hctx_idx) { - struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *ctx; + struct blk_mq_alloc_data alloc_data = { .flags = flags }; struct request *rq; - struct blk_mq_alloc_data alloc_data; + unsigned int cpu; int ret; /* @@ -299,25 +311,23 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw, * Check if the hardware context is actually mapped to anything. * If not tell the caller that it should skip this queue. */ - hctx = q->queue_hw_ctx[hctx_idx]; - if (!blk_mq_hw_queue_mapped(hctx)) { - ret = -EXDEV; - goto out_queue_exit; - } - ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask)); - - blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx); - rq = __blk_mq_alloc_request(&alloc_data, rw); - if (!rq) { - ret = -EWOULDBLOCK; - goto out_queue_exit; + alloc_data.hctx = q->queue_hw_ctx[hctx_idx]; + if (!blk_mq_hw_queue_mapped(alloc_data.hctx)) { + blk_queue_exit(q); + return ERR_PTR(-EXDEV); } + cpu = cpumask_first(alloc_data.hctx->cpumask); + alloc_data.ctx = __blk_mq_get_ctx(q, cpu); - return rq; + rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data); -out_queue_exit: + blk_mq_put_ctx(alloc_data.ctx); blk_queue_exit(q); - return ERR_PTR(ret); + + if (!rq) + return ERR_PTR(-EWOULDBLOCK); + + return rq; } EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx); @@ -852,6 +862,9 @@ done: return true; } + if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag)) + data.flags |= BLK_MQ_REQ_RESERVED; + rq->tag = blk_mq_get_tag(&data); if (rq->tag >= 0) { if (blk_mq_tag_busy(data.hctx)) { @@ -865,12 +878,9 @@ done: return false; } -static void blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, - struct request *rq) +static void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, + struct request *rq) { - if (rq->tag == -1 || rq->internal_tag == -1) - return; - blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, rq->tag); rq->tag = -1; @@ -880,6 +890,26 @@ static void blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, } } +static void blk_mq_put_driver_tag_hctx(struct blk_mq_hw_ctx *hctx, + struct request *rq) +{ + if (rq->tag == -1 || rq->internal_tag == -1) + return; + + __blk_mq_put_driver_tag(hctx, rq); +} + +static void blk_mq_put_driver_tag(struct request *rq) +{ + struct blk_mq_hw_ctx *hctx; + + if (rq->tag == -1 || rq->internal_tag == -1) + return; + + hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); + __blk_mq_put_driver_tag(hctx, rq); +} + /* * If we fail getting a driver tag because all the driver tags are already * assigned and on the dispatch list, BUT the first entry does not have a @@ -989,7 +1019,19 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) bd.rq = rq; bd.list = dptr; - bd.last = list_empty(list); + + /* + * Flag last if we have no more requests, or if we have more + * but can't assign a driver tag to it. + */ + if (list_empty(list)) + bd.last = true; + else { + struct request *nxt; + + nxt = list_first_entry(list, struct request, queuelist); + bd.last = !blk_mq_get_driver_tag(nxt, NULL, false); + } ret = q->mq_ops->queue_rq(hctx, &bd); switch (ret) { @@ -997,7 +1039,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) queued++; break; case BLK_MQ_RQ_QUEUE_BUSY: - blk_mq_put_driver_tag(hctx, rq); + blk_mq_put_driver_tag_hctx(hctx, rq); list_add(&rq->queuelist, list); __blk_mq_requeue_request(rq); break; @@ -1027,6 +1069,13 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) * that is where we will continue on next queue run. */ if (!list_empty(list)) { + /* + * If we got a driver tag for the next request already, + * free it again. + */ + rq = list_first_entry(list, struct request, queuelist); + blk_mq_put_driver_tag(rq); + spin_lock(&hctx->lock); list_splice_init(list, &hctx->dispatch); spin_unlock(&hctx->lock); @@ -1713,16 +1762,20 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, unsigned int reserved_tags) { struct blk_mq_tags *tags; + int node; + + node = blk_mq_hw_queue_to_node(set->mq_map, hctx_idx); + if (node == NUMA_NO_NODE) + node = set->numa_node; - tags = blk_mq_init_tags(nr_tags, reserved_tags, - set->numa_node, + tags = blk_mq_init_tags(nr_tags, reserved_tags, node, BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags)); if (!tags) return NULL; tags->rqs = kzalloc_node(nr_tags * sizeof(struct request *), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, - set->numa_node); + node); if (!tags->rqs) { blk_mq_free_tags(tags); return NULL; @@ -1730,7 +1783,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, tags->static_rqs = kzalloc_node(nr_tags * sizeof(struct request *), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, - set->numa_node); + node); if (!tags->static_rqs) { kfree(tags->rqs); blk_mq_free_tags(tags); @@ -1750,6 +1803,11 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, { unsigned int i, j, entries_per_page, max_order = 4; size_t rq_size, left; + int node; + + node = blk_mq_hw_queue_to_node(set->mq_map, hctx_idx); + if (node == NUMA_NO_NODE) + node = set->numa_node; INIT_LIST_HEAD(&tags->page_list); @@ -1771,7 +1829,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, this_order--; do { - page = alloc_pages_node(set->numa_node, + page = alloc_pages_node(node, GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO, this_order); if (page) @@ -1804,7 +1862,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, if (set->ops->init_request) { if (set->ops->init_request(set->driver_data, rq, hctx_idx, i, - set->numa_node)) { + node)) { tags->static_rqs[i] = NULL; goto fail; } @@ -1897,16 +1955,6 @@ static void blk_mq_exit_hw_queues(struct request_queue *q, } } -static void blk_mq_free_hw_queues(struct request_queue *q, - struct blk_mq_tag_set *set) -{ - struct blk_mq_hw_ctx *hctx; - unsigned int i; - - queue_for_each_hw_ctx(q, hctx, i) - free_cpumask_var(hctx->cpumask); -} - static int blk_mq_init_hctx(struct request_queue *q, struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx, unsigned hctx_idx) @@ -1987,7 +2035,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i); struct blk_mq_hw_ctx *hctx; - memset(__ctx, 0, sizeof(*__ctx)); __ctx->cpu = i; spin_lock_init(&__ctx->lock); INIT_LIST_HEAD(&__ctx->rq_list); @@ -2199,15 +2246,19 @@ void blk_mq_release(struct request_queue *q) queue_for_each_hw_ctx(q, hctx, i) { if (!hctx) continue; - kfree(hctx->ctxs); - kfree(hctx); + kobject_put(&hctx->kobj); } q->mq_map = NULL; kfree(q->queue_hw_ctx); - /* ctx kobj stays in queue_ctx */ + /* + * release .mq_kobj and sw queue's kobject now because + * both share lifetime with request queue. + */ + blk_mq_sysfs_deinit(q); + free_percpu(q->queue_ctx); } @@ -2272,10 +2323,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, if (hctx->tags) blk_mq_free_map_and_requests(set, j); blk_mq_exit_hctx(q, set, hctx, j); - free_cpumask_var(hctx->cpumask); kobject_put(&hctx->kobj); - kfree(hctx->ctxs); - kfree(hctx); hctxs[j] = NULL; } @@ -2294,6 +2342,9 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, if (!q->queue_ctx) goto err_exit; + /* init q->mq_kobj and sw queues' kobjects */ + blk_mq_sysfs_init(q); + q->queue_hw_ctx = kzalloc_node(nr_cpu_ids * sizeof(*(q->queue_hw_ctx)), GFP_KERNEL, set->numa_node); if (!q->queue_hw_ctx) @@ -2384,7 +2435,6 @@ void blk_mq_free_queue(struct request_queue *q) blk_mq_del_queue_tag_set(q); blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); - blk_mq_free_hw_queues(q, set); } /* Basically redo blk_mq_init_queue with queue frozen */ diff --git a/block/blk-mq.h b/block/blk-mq.h index 24b2256186f3..b79f9a7d8cf6 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -77,6 +77,8 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, /* * sysfs helpers */ +extern void blk_mq_sysfs_init(struct request_queue *q); +extern void blk_mq_sysfs_deinit(struct request_queue *q); extern int blk_mq_sysfs_register(struct request_queue *q); extern void blk_mq_sysfs_unregister(struct request_queue *q); extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); @@ -146,16 +148,6 @@ struct blk_mq_alloc_data { struct blk_mq_hw_ctx *hctx; }; -static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data, - struct request_queue *q, unsigned int flags, - struct blk_mq_ctx *ctx, struct blk_mq_hw_ctx *hctx) -{ - data->q = q; - data->flags = flags; - data->ctx = ctx; - data->hctx = hctx; -} - static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data) { if (data->flags & BLK_MQ_REQ_INTERNAL) diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 06cf9807f49a..87b7df4851bf 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -9,6 +9,7 @@ #include <linux/interrupt.h> #include <linux/cpu.h> #include <linux/sched.h> +#include <linux/sched/topology.h> #include "blk.h" diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 002af836aa87..c44b321335f3 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -815,9 +815,7 @@ static void blk_release_queue(struct kobject *kobj) blkcg_exit_queue(q); if (q->elevator) { - spin_lock_irq(q->queue_lock); ioc_clear_queue(q); - spin_unlock_irq(q->queue_lock); elevator_exit(q->elevator); } diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 137944777859..440b95ee593c 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -8,6 +8,7 @@ */ #include <linux/module.h> #include <linux/slab.h> +#include <linux/sched/clock.h> #include <linux/blkdev.h> #include <linux/elevator.h> #include <linux/ktime.h> diff --git a/block/elevator.c b/block/elevator.c index ac1c9f481a98..01139f549b5b 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -983,9 +983,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) if (old_registered) elv_unregister_queue(q); - spin_lock_irq(q->queue_lock); ioc_clear_queue(q); - spin_unlock_irq(q->queue_lock); } /* allocate, init and register new elevator */ diff --git a/block/genhd.c b/block/genhd.c index 2f444b87a5f2..a9c516a8b37d 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -572,20 +572,6 @@ exit: disk_part_iter_exit(&piter); } -void put_disk_devt(struct disk_devt *disk_devt) -{ - if (disk_devt && atomic_dec_and_test(&disk_devt->count)) - disk_devt->release(disk_devt); -} -EXPORT_SYMBOL(put_disk_devt); - -void get_disk_devt(struct disk_devt *disk_devt) -{ - if (disk_devt) - atomic_inc(&disk_devt->count); -} -EXPORT_SYMBOL(get_disk_devt); - /** * device_add_disk - add partitioning information to kernel list * @parent: parent device for the disk @@ -626,13 +612,6 @@ void device_add_disk(struct device *parent, struct gendisk *disk) disk_alloc_events(disk); - /* - * Take a reference on the devt and assign it to queue since it - * must not be reallocated while the bdi is registered - */ - disk->queue->disk_devt = disk->disk_devt; - get_disk_devt(disk->disk_devt); - /* Register BDI before referencing it from bdev */ bdi = disk->queue->backing_dev_info; bdi_register_owner(bdi, disk_to_dev(disk)); @@ -681,7 +660,16 @@ void del_gendisk(struct gendisk *disk) disk->flags &= ~GENHD_FL_UP; sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); - blk_unregister_queue(disk); + if (disk->queue) { + /* + * Unregister bdi before releasing device numbers (as they can + * get reused and we'd get clashes in sysfs). + */ + bdi_unregister(disk->queue->backing_dev_info); + blk_unregister_queue(disk); + } else { + WARN_ON(1); + } blk_unregister_region(disk_devt(disk), disk->minors); part_stat_set_all(&disk->part0, 0); diff --git a/block/ioprio.c b/block/ioprio.c index 3790669232ff..0c47a00f92a8 100644 --- a/block/ioprio.c +++ b/block/ioprio.c @@ -23,8 +23,11 @@ #include <linux/kernel.h> #include <linux/export.h> #include <linux/ioprio.h> +#include <linux/cred.h> #include <linux/blkdev.h> #include <linux/capability.h> +#include <linux/sched/user.h> +#include <linux/sched/task.h> #include <linux/syscalls.h> #include <linux/security.h> #include <linux/pid_namespace.h> diff --git a/block/sed-opal.c b/block/sed-opal.c index 1e18dca360fc..14035f826b5e 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -1023,7 +1023,6 @@ static int finalize_and_send(struct opal_dev *dev, cont_fn cont) static int gen_key(struct opal_dev *dev, void *data) { - const u8 *method; u8 uid[OPAL_UID_LENGTH]; int err = 0; @@ -1031,7 +1030,6 @@ static int gen_key(struct opal_dev *dev, void *data) set_comid(dev, dev->comid); memcpy(uid, dev->prev_data, min(sizeof(uid), dev->prev_d_len)); - method = opalmethod[OPAL_GENKEY]; kfree(dev->prev_data); dev->prev_data = NULL; @@ -1669,7 +1667,6 @@ static int add_user_to_lr(struct opal_dev *dev, void *data) static int lock_unlock_locking_range(struct opal_dev *dev, void *data) { u8 lr_buffer[OPAL_UID_LENGTH]; - const u8 *method; struct opal_lock_unlock *lkul = data; u8 read_locked = 1, write_locked = 1; int err = 0; @@ -1677,7 +1674,6 @@ static int lock_unlock_locking_range(struct opal_dev *dev, void *data) clear_opal_cmd(dev); set_comid(dev, dev->comid); - method = opalmethod[OPAL_SET]; if (build_locking_range(lr_buffer, sizeof(lr_buffer), lkul->session.opal_key.lr) < 0) return -ERANGE; @@ -1733,14 +1729,12 @@ static int lock_unlock_locking_range_sum(struct opal_dev *dev, void *data) { u8 lr_buffer[OPAL_UID_LENGTH]; u8 read_locked = 1, write_locked = 1; - const u8 *method; struct opal_lock_unlock *lkul = data; int ret; clear_opal_cmd(dev); set_comid(dev, dev->comid); - method = opalmethod[OPAL_SET]; if (build_locking_range(lr_buffer, sizeof(lr_buffer), lkul->session.opal_key.lr) < 0) return -ERANGE; @@ -2133,7 +2127,7 @@ static int opal_add_user_to_lr(struct opal_dev *dev, pr_err("Locking state was not RO or RW\n"); return -EINVAL; } - if (lk_unlk->session.who < OPAL_USER1 && + if (lk_unlk->session.who < OPAL_USER1 || lk_unlk->session.who > OPAL_USER9) { pr_err("Authority was not within the range of users: %d\n", lk_unlk->session.who); @@ -2316,7 +2310,7 @@ static int opal_activate_user(struct opal_dev *dev, int ret; /* We can't activate Admin1 it's active as manufactured */ - if (opal_session->who < OPAL_USER1 && + if (opal_session->who < OPAL_USER1 || opal_session->who > OPAL_USER9) { pr_err("Who was not a valid user: %d\n", opal_session->who); return -EINVAL; |