summaryrefslogtreecommitdiffstats
path: root/block/blk-mq.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-08-02 22:46:35 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2022-08-02 22:46:35 +0200
commitc013d0af81f60cc7dbe357c4e2a925fb6738dbfe (patch)
tree171dfdf928d0450a3fa98a58b2297d857804bb35 /block/blk-mq.c
parentMerge tag 'for-5.20/io_uring-zerocopy-send-2022-07-29' of git://git.kernel.dk... (diff)
parentublk_drv: fix double shift bug (diff)
downloadlinux-c013d0af81f60cc7dbe357c4e2a925fb6738dbfe.tar.xz
linux-c013d0af81f60cc7dbe357c4e2a925fb6738dbfe.zip
Merge tag 'for-5.20/block-2022-07-29' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: - Improve the type checking of request flags (Bart) - Ensure queue mapping for a single queues always picks the right queue (Bart) - Sanitize the io priority handling (Jan) - rq-qos race fix (Jinke) - Reserved tags handling improvements (John) - Separate memory alignment from file/disk offset aligment for O_DIRECT (Keith) - Add new ublk driver, userspace block driver using io_uring for communication with the userspace backend (Ming) - Use try_cmpxchg() to cleanup the code in various spots (Uros) - Finally remove bdevname() (Christoph) - Clean up the zoned device handling (Christoph) - Clean up independent access range support (Christoph) - Clean up and improve block sysfs handling (Christoph) - Clean up and improve teardown of block devices. This turns the usual two step process into something that is simpler to implement and handle in block drivers (Christoph) - Clean up chunk size handling (Christoph) - Misc cleanups and fixes (Bart, Bo, Dan, GuoYong, Jason, Keith, Liu, Ming, Sebastian, Yang, Ying) * tag 'for-5.20/block-2022-07-29' of git://git.kernel.dk/linux-block: (178 commits) ublk_drv: fix double shift bug ublk_drv: make sure that correct flags(features) returned to userspace ublk_drv: fix error handling of ublk_add_dev ublk_drv: fix lockdep warning block: remove __blk_get_queue block: call blk_mq_exit_queue from disk_release for never added disks blk-mq: fix error handling in __blk_mq_alloc_disk ublk: defer disk allocation ublk: rewrite ublk_ctrl_get_queue_affinity to not rely on hctx->cpumask ublk: fold __ublk_create_dev into ublk_ctrl_add_dev ublk: cleanup ublk_ctrl_uring_cmd ublk: simplify ublk_ch_open and ublk_ch_release ublk: remove the empty open and release block device operations ublk: remove UBLK_IO_F_PREFLUSH ublk: add a MAINTAINERS entry block: don't allow the same type rq_qos add more than once mmc: fix disk/queue leak in case of adding disk failure ublk_drv: fix an IS_ERR() vs NULL check ublk: remove UBLK_IO_F_INTEGRITY ublk_drv: remove unneeded semicolon ...
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r--block/blk-mq.c105
1 files changed, 78 insertions, 27 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 93d9d60980fb..70177ee74295 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -42,6 +42,7 @@
#include "blk-stat.h"
#include "blk-mq-sched.h"
#include "blk-rq-qos.h"
+#include "blk-ioprio.h"
static DEFINE_PER_CPU(struct llist_head, blk_cpu_done);
@@ -128,8 +129,7 @@ struct mq_inflight {
unsigned int inflight[2];
};
-static bool blk_mq_check_inflight(struct request *rq, void *priv,
- bool reserved)
+static bool blk_mq_check_inflight(struct request *rq, void *priv)
{
struct mq_inflight *mi = priv;
@@ -474,6 +474,9 @@ retry:
if (!(data->rq_flags & RQF_ELV))
blk_mq_tag_busy(data->hctx);
+ if (data->flags & BLK_MQ_REQ_RESERVED)
+ data->rq_flags |= RQF_RESV;
+
/*
* Try batched alloc if we want more than 1 tag.
*/
@@ -507,13 +510,13 @@ retry:
alloc_time_ns);
}
-struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
+struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf,
blk_mq_req_flags_t flags)
{
struct blk_mq_alloc_data data = {
.q = q,
.flags = flags,
- .cmd_flags = op,
+ .cmd_flags = opf,
.nr_tags = 1,
};
struct request *rq;
@@ -537,12 +540,12 @@ out_queue_exit:
EXPORT_SYMBOL(blk_mq_alloc_request);
struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
- unsigned int op, blk_mq_req_flags_t flags, unsigned int hctx_idx)
+ blk_opf_t opf, blk_mq_req_flags_t flags, unsigned int hctx_idx)
{
struct blk_mq_alloc_data data = {
.q = q,
.flags = flags,
- .cmd_flags = op,
+ .cmd_flags = opf,
.nr_tags = 1,
};
u64 alloc_time_ns = 0;
@@ -588,6 +591,9 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
else
data.rq_flags |= RQF_ELV;
+ if (flags & BLK_MQ_REQ_RESERVED)
+ data.rq_flags |= RQF_RESV;
+
ret = -EWOULDBLOCK;
tag = blk_mq_get_tag(&data);
if (tag == BLK_MQ_NO_TAG)
@@ -654,7 +660,7 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
{
printk(KERN_INFO "%s: dev %s: flags=%llx\n", msg,
rq->q->disk ? rq->q->disk->disk_name : "?",
- (unsigned long long) rq->cmd_flags);
+ (__force unsigned long long) rq->cmd_flags);
printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n",
(unsigned long long)blk_rq_pos(rq),
@@ -707,8 +713,9 @@ static void blk_print_req_error(struct request *req, blk_status_t status)
"phys_seg %u prio class %u\n",
blk_status_to_str(status),
req->q->disk ? req->q->disk->disk_name : "?",
- blk_rq_pos(req), req_op(req), blk_op_str(req_op(req)),
- req->cmd_flags & ~REQ_OP_MASK,
+ blk_rq_pos(req), (__force u32)req_op(req),
+ blk_op_str(req_op(req)),
+ (__force u32)(req->cmd_flags & ~REQ_OP_MASK),
req->nr_phys_segments,
IOPRIO_PRIO_CLASS(req->ioprio));
}
@@ -1393,8 +1400,7 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q,
}
EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
-static bool blk_mq_rq_inflight(struct request *rq, void *priv,
- bool reserved)
+static bool blk_mq_rq_inflight(struct request *rq, void *priv)
{
/*
* If we find a request that isn't idle we know the queue is busy
@@ -1420,13 +1426,13 @@ bool blk_mq_queue_inflight(struct request_queue *q)
}
EXPORT_SYMBOL_GPL(blk_mq_queue_inflight);
-static void blk_mq_rq_timed_out(struct request *req, bool reserved)
+static void blk_mq_rq_timed_out(struct request *req)
{
req->rq_flags |= RQF_TIMED_OUT;
if (req->q->mq_ops->timeout) {
enum blk_eh_timer_return ret;
- ret = req->q->mq_ops->timeout(req, reserved);
+ ret = req->q->mq_ops->timeout(req);
if (ret == BLK_EH_DONE)
return;
WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER);
@@ -1463,7 +1469,7 @@ void blk_mq_put_rq_ref(struct request *rq)
__blk_mq_free_request(rq);
}
-static bool blk_mq_check_expired(struct request *rq, void *priv, bool reserved)
+static bool blk_mq_check_expired(struct request *rq, void *priv)
{
unsigned long *next = priv;
@@ -1475,7 +1481,7 @@ static bool blk_mq_check_expired(struct request *rq, void *priv, bool reserved)
* from blk_mq_check_expired().
*/
if (blk_mq_req_expired(rq, next))
- blk_mq_rq_timed_out(rq, reserved);
+ blk_mq_rq_timed_out(rq);
return true;
}
@@ -2085,14 +2091,10 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
return;
if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
- int cpu = get_cpu();
- if (cpumask_test_cpu(cpu, hctx->cpumask)) {
+ if (cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) {
__blk_mq_run_hw_queue(hctx);
- put_cpu();
return;
}
-
- put_cpu();
}
kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work,
@@ -2156,7 +2158,7 @@ static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q)
* just causes lock contention inside the scheduler and pointless cache
* bouncing.
*/
- struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, 0, ctx);
+ struct blk_mq_hw_ctx *hctx = ctx->hctxs[HCTX_TYPE_DEFAULT];
if (!blk_mq_hctx_stopped(hctx))
return hctx;
@@ -2783,6 +2785,14 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
return rq;
}
+static void bio_set_ioprio(struct bio *bio)
+{
+ /* Nobody set ioprio so far? Initialize it based on task's nice value */
+ if (IOPRIO_PRIO_CLASS(bio->bi_ioprio) == IOPRIO_CLASS_NONE)
+ bio->bi_ioprio = get_current_ioprio();
+ blkcg_set_ioprio(bio);
+}
+
/**
* blk_mq_submit_bio - Create and send a request to block device.
* @bio: Bio pointer.
@@ -2799,7 +2809,7 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
void blk_mq_submit_bio(struct bio *bio)
{
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
- struct blk_plug *plug = blk_mq_plug(q, bio);
+ struct blk_plug *plug = blk_mq_plug(bio);
const int is_sync = op_is_sync(bio->bi_opf);
struct request *rq;
unsigned int nr_segs = 1;
@@ -2812,6 +2822,8 @@ void blk_mq_submit_bio(struct bio *bio)
if (!bio_integrity_prep(bio))
return;
+ bio_set_ioprio(bio);
+
rq = blk_mq_get_cached_request(q, plug, &bio, nr_segs);
if (!rq) {
if (!bio)
@@ -3276,7 +3288,7 @@ struct rq_iter_data {
bool has_rq;
};
-static bool blk_mq_has_request(struct request *rq, void *data, bool reserved)
+static bool blk_mq_has_request(struct request *rq, void *data)
{
struct rq_iter_data *iter_data = data;
@@ -3895,7 +3907,7 @@ static struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
q->queuedata = queuedata;
ret = blk_mq_init_allocated_queue(set, q);
if (ret) {
- blk_cleanup_queue(q);
+ blk_put_queue(q);
return ERR_PTR(ret);
}
return q;
@@ -3907,6 +3919,35 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
}
EXPORT_SYMBOL(blk_mq_init_queue);
+/**
+ * blk_mq_destroy_queue - shutdown a request queue
+ * @q: request queue to shutdown
+ *
+ * This shuts down a request queue allocated by blk_mq_init_queue() and drops
+ * the initial reference. All future requests will failed with -ENODEV.
+ *
+ * Context: can sleep
+ */
+void blk_mq_destroy_queue(struct request_queue *q)
+{
+ WARN_ON_ONCE(!queue_is_mq(q));
+ WARN_ON_ONCE(blk_queue_registered(q));
+
+ might_sleep();
+
+ blk_queue_flag_set(QUEUE_FLAG_DYING, q);
+ blk_queue_start_drain(q);
+ blk_freeze_queue(q);
+
+ blk_sync_queue(q);
+ blk_mq_cancel_work_sync(q);
+ blk_mq_exit_queue(q);
+
+ /* @q is and will stay empty, shutdown and put */
+ blk_put_queue(q);
+}
+EXPORT_SYMBOL(blk_mq_destroy_queue);
+
struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata,
struct lock_class_key *lkclass)
{
@@ -3919,13 +3960,23 @@ struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata,
disk = __alloc_disk_node(q, set->numa_node, lkclass);
if (!disk) {
- blk_cleanup_queue(q);
+ blk_mq_destroy_queue(q);
return ERR_PTR(-ENOMEM);
}
+ set_bit(GD_OWNS_QUEUE, &disk->state);
return disk;
}
EXPORT_SYMBOL(__blk_mq_alloc_disk);
+struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q,
+ struct lock_class_key *lkclass)
+{
+ if (!blk_get_queue(q))
+ return NULL;
+ return __alloc_disk_node(q, NUMA_NO_NODE, lkclass);
+}
+EXPORT_SYMBOL(blk_mq_alloc_disk_for_queue);
+
static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
struct blk_mq_tag_set *set, struct request_queue *q,
int hctx_idx, int node)
@@ -4513,7 +4564,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
list_for_each_entry(q, &set->tag_list, tag_set_list) {
blk_mq_debugfs_unregister_hctxs(q);
- blk_mq_sysfs_unregister(q);
+ blk_mq_sysfs_unregister_hctxs(q);
}
prev_nr_hw_queues = set->nr_hw_queues;
@@ -4544,7 +4595,7 @@ fallback:
reregister:
list_for_each_entry(q, &set->tag_list, tag_set_list) {
- blk_mq_sysfs_register(q);
+ blk_mq_sysfs_register_hctxs(q);
blk_mq_debugfs_register_hctxs(q);
}