summaryrefslogtreecommitdiffstats
path: root/block/blk-core.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-15 00:32:19 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-15 00:32:19 +0100
commite2c5923c349c1738fe8fda980874d93f6fb2e5b6 (patch)
treeb97a90170c45211bcc437761653aa8016c34afcd /block/blk-core.c
parentMerge tag 'configfs-for-4.15' of git://git.infradead.org/users/hch/configfs (diff)
parentnvme: fix visibility of "uuid" ns attribute (diff)
downloadlinux-e2c5923c349c1738fe8fda980874d93f6fb2e5b6.tar.xz
linux-e2c5923c349c1738fe8fda980874d93f6fb2e5b6.zip
Merge branch 'for-4.15/block' of git://git.kernel.dk/linux-block
Pull core block layer updates from Jens Axboe: "This is the main pull request for block storage for 4.15-rc1. Nothing out of the ordinary in here, and no API changes or anything like that. Just various new features for drivers, core changes, etc. In particular, this pull request contains: - A patch series from Bart, closing the whole on blk/scsi-mq queue quescing. - A series from Christoph, building towards hidden gendisks (for multipath) and ability to move bio chains around. - NVMe - Support for native multipath for NVMe (Christoph). - Userspace notifications for AENs (Keith). - Command side-effects support (Keith). - SGL support (Chaitanya Kulkarni) - FC fixes and improvements (James Smart) - Lots of fixes and tweaks (Various) - bcache - New maintainer (Michael Lyle) - Writeback control improvements (Michael) - Various fixes (Coly, Elena, Eric, Liang, et al) - lightnvm updates, mostly centered around the pblk interface (Javier, Hans, and Rakesh). - Removal of unused bio/bvec kmap atomic interfaces (me, Christoph) - Writeback series that fix the much discussed hundreds of millions of sync-all units. This goes all the way, as discussed previously (me). - Fix for missing wakeup on writeback timer adjustments (Yafang Shao). - Fix laptop mode on blk-mq (me). - {mq,name} tupple lookup for IO schedulers, allowing us to have alias names. This means you can use 'deadline' on both !mq and on mq (where it's called mq-deadline). (me). - blktrace race fix, oopsing on sg load (me). - blk-mq optimizations (me). - Obscure waitqueue race fix for kyber (Omar). - NBD fixes (Josef). - Disable writeback throttling by default on bfq, like we do on cfq (Luca Miccio). - Series from Ming that enable us to treat flush requests on blk-mq like any other request. This is a really nice cleanup. - Series from Ming that improves merging on blk-mq with schedulers, getting us closer to flipping the switch on scsi-mq again. - BFQ updates (Paolo). - blk-mq atomic flags memory ordering fixes (Peter Z). - Loop cgroup support (Shaohua). - Lots of minor fixes from lots of different folks, both for core and driver code" * 'for-4.15/block' of git://git.kernel.dk/linux-block: (294 commits) nvme: fix visibility of "uuid" ns attribute blk-mq: fixup some comment typos and lengths ide: ide-atapi: fix compile error with defining macro DEBUG blk-mq: improve tag waiting setup for non-shared tags brd: remove unused brd_mutex blk-mq: only run the hardware queue if IO is pending block: avoid null pointer dereference on null disk fs: guard_bio_eod() needs to consider partitions xtensa/simdisk: fix compile error nvme: expose subsys attribute to sysfs nvme: create 'slaves' and 'holders' entries for hidden controllers block: create 'slaves' and 'holders' entries for hidden gendisks nvme: also expose the namespace identification sysfs files for mpath nodes nvme: implement multipath access to nvme subsystems nvme: track shared namespaces nvme: introduce a nvme_ns_ids structure nvme: track subsystems block, nvme: Introduce blk_mq_req_flags_t block, scsi: Make SCSI quiesce and resume work reliably block: Add the QUEUE_FLAG_PREEMPT_ONLY request queue flag ...
Diffstat (limited to 'block/blk-core.c')
-rw-r--r--block/blk-core.c274
1 files changed, 236 insertions, 38 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 048be4aa6024..7c54c195e79e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -333,11 +333,13 @@ EXPORT_SYMBOL(blk_stop_queue);
void blk_sync_queue(struct request_queue *q)
{
del_timer_sync(&q->timeout);
+ cancel_work_sync(&q->timeout_work);
if (q->mq_ops) {
struct blk_mq_hw_ctx *hctx;
int i;
+ cancel_delayed_work_sync(&q->requeue_work);
queue_for_each_hw_ctx(q, hctx, i)
cancel_delayed_work_sync(&hctx->run_work);
} else {
@@ -347,6 +349,37 @@ void blk_sync_queue(struct request_queue *q)
EXPORT_SYMBOL(blk_sync_queue);
/**
+ * blk_set_preempt_only - set QUEUE_FLAG_PREEMPT_ONLY
+ * @q: request queue pointer
+ *
+ * Returns the previous value of the PREEMPT_ONLY flag - 0 if the flag was not
+ * set and 1 if the flag was already set.
+ */
+int blk_set_preempt_only(struct request_queue *q)
+{
+ unsigned long flags;
+ int res;
+
+ spin_lock_irqsave(q->queue_lock, flags);
+ res = queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q);
+ spin_unlock_irqrestore(q->queue_lock, flags);
+
+ return res;
+}
+EXPORT_SYMBOL_GPL(blk_set_preempt_only);
+
+void blk_clear_preempt_only(struct request_queue *q)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(q->queue_lock, flags);
+ queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q);
+ wake_up_all(&q->mq_freeze_wq);
+ spin_unlock_irqrestore(q->queue_lock, flags);
+}
+EXPORT_SYMBOL_GPL(blk_clear_preempt_only);
+
+/**
* __blk_run_queue_uncond - run a queue whether or not it has been stopped
* @q: The queue to run
*
@@ -610,6 +643,9 @@ void blk_set_queue_dying(struct request_queue *q)
}
spin_unlock_irq(q->queue_lock);
}
+
+ /* Make blk_queue_enter() reexamine the DYING flag. */
+ wake_up_all(&q->mq_freeze_wq);
}
EXPORT_SYMBOL_GPL(blk_set_queue_dying);
@@ -718,7 +754,7 @@ static void free_request_size(void *element, void *data)
int blk_init_rl(struct request_list *rl, struct request_queue *q,
gfp_t gfp_mask)
{
- if (unlikely(rl->rq_pool))
+ if (unlikely(rl->rq_pool) || q->mq_ops)
return 0;
rl->q = q;
@@ -760,15 +796,38 @@ struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
}
EXPORT_SYMBOL(blk_alloc_queue);
-int blk_queue_enter(struct request_queue *q, bool nowait)
+/**
+ * blk_queue_enter() - try to increase q->q_usage_counter
+ * @q: request queue pointer
+ * @flags: BLK_MQ_REQ_NOWAIT and/or BLK_MQ_REQ_PREEMPT
+ */
+int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
{
+ const bool preempt = flags & BLK_MQ_REQ_PREEMPT;
+
while (true) {
+ bool success = false;
int ret;
- if (percpu_ref_tryget_live(&q->q_usage_counter))
+ rcu_read_lock_sched();
+ if (percpu_ref_tryget_live(&q->q_usage_counter)) {
+ /*
+ * The code that sets the PREEMPT_ONLY flag is
+ * responsible for ensuring that that flag is globally
+ * visible before the queue is unfrozen.
+ */
+ if (preempt || !blk_queue_preempt_only(q)) {
+ success = true;
+ } else {
+ percpu_ref_put(&q->q_usage_counter);
+ }
+ }
+ rcu_read_unlock_sched();
+
+ if (success)
return 0;
- if (nowait)
+ if (flags & BLK_MQ_REQ_NOWAIT)
return -EBUSY;
/*
@@ -781,7 +840,8 @@ int blk_queue_enter(struct request_queue *q, bool nowait)
smp_rmb();
ret = wait_event_interruptible(q->mq_freeze_wq,
- !atomic_read(&q->mq_freeze_depth) ||
+ (atomic_read(&q->mq_freeze_depth) == 0 &&
+ (preempt || !blk_queue_preempt_only(q))) ||
blk_queue_dying(q));
if (blk_queue_dying(q))
return -ENODEV;
@@ -844,6 +904,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
setup_timer(&q->backing_dev_info->laptop_mode_wb_timer,
laptop_mode_timer_fn, (unsigned long) q);
setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
+ INIT_WORK(&q->timeout_work, NULL);
INIT_LIST_HEAD(&q->queue_head);
INIT_LIST_HEAD(&q->timeout_list);
INIT_LIST_HEAD(&q->icq_list);
@@ -1154,7 +1215,7 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
* @rl: request list to allocate from
* @op: operation and flags
* @bio: bio to allocate request for (can be %NULL)
- * @gfp_mask: allocation mask
+ * @flags: BLQ_MQ_REQ_* flags
*
* Get a free request from @q. This function may fail under memory
* pressure or if @q is dead.
@@ -1164,7 +1225,7 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
* Returns request pointer on success, with @q->queue_lock *not held*.
*/
static struct request *__get_request(struct request_list *rl, unsigned int op,
- struct bio *bio, gfp_t gfp_mask)
+ struct bio *bio, blk_mq_req_flags_t flags)
{
struct request_queue *q = rl->q;
struct request *rq;
@@ -1173,6 +1234,8 @@ static struct request *__get_request(struct request_list *rl, unsigned int op,
struct io_cq *icq = NULL;
const bool is_sync = op_is_sync(op);
int may_queue;
+ gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC :
+ __GFP_DIRECT_RECLAIM;
req_flags_t rq_flags = RQF_ALLOCED;
lockdep_assert_held(q->queue_lock);
@@ -1255,6 +1318,8 @@ static struct request *__get_request(struct request_list *rl, unsigned int op,
blk_rq_set_rl(rq, rl);
rq->cmd_flags = op;
rq->rq_flags = rq_flags;
+ if (flags & BLK_MQ_REQ_PREEMPT)
+ rq->rq_flags |= RQF_PREEMPT;
/* init elvpriv */
if (rq_flags & RQF_ELVPRIV) {
@@ -1333,7 +1398,7 @@ rq_starved:
* @q: request_queue to allocate request from
* @op: operation and flags
* @bio: bio to allocate request for (can be %NULL)
- * @gfp_mask: allocation mask
+ * @flags: BLK_MQ_REQ_* flags.
*
* Get a free request from @q. If %__GFP_DIRECT_RECLAIM is set in @gfp_mask,
* this function keeps retrying under memory pressure and fails iff @q is dead.
@@ -1343,7 +1408,7 @@ rq_starved:
* Returns request pointer on success, with @q->queue_lock *not held*.
*/
static struct request *get_request(struct request_queue *q, unsigned int op,
- struct bio *bio, gfp_t gfp_mask)
+ struct bio *bio, blk_mq_req_flags_t flags)
{
const bool is_sync = op_is_sync(op);
DEFINE_WAIT(wait);
@@ -1355,7 +1420,7 @@ static struct request *get_request(struct request_queue *q, unsigned int op,
rl = blk_get_rl(q, bio); /* transferred to @rq on success */
retry:
- rq = __get_request(rl, op, bio, gfp_mask);
+ rq = __get_request(rl, op, bio, flags);
if (!IS_ERR(rq))
return rq;
@@ -1364,7 +1429,7 @@ retry:
return ERR_PTR(-EAGAIN);
}
- if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) {
+ if ((flags & BLK_MQ_REQ_NOWAIT) || unlikely(blk_queue_dying(q))) {
blk_put_rl(rl);
return rq;
}
@@ -1391,20 +1456,28 @@ retry:
goto retry;
}
+/* flags: BLK_MQ_REQ_PREEMPT and/or BLK_MQ_REQ_NOWAIT. */
static struct request *blk_old_get_request(struct request_queue *q,
- unsigned int op, gfp_t gfp_mask)
+ unsigned int op, blk_mq_req_flags_t flags)
{
struct request *rq;
+ gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC :
+ __GFP_DIRECT_RECLAIM;
+ int ret = 0;
WARN_ON_ONCE(q->mq_ops);
/* create ioc upfront */
create_io_context(gfp_mask, q->node);
+ ret = blk_queue_enter(q, flags);
+ if (ret)
+ return ERR_PTR(ret);
spin_lock_irq(q->queue_lock);
- rq = get_request(q, op, NULL, gfp_mask);
+ rq = get_request(q, op, NULL, flags);
if (IS_ERR(rq)) {
spin_unlock_irq(q->queue_lock);
+ blk_queue_exit(q);
return rq;
}
@@ -1415,25 +1488,40 @@ static struct request *blk_old_get_request(struct request_queue *q,
return rq;
}
-struct request *blk_get_request(struct request_queue *q, unsigned int op,
- gfp_t gfp_mask)
+/**
+ * blk_get_request_flags - allocate a request
+ * @q: request queue to allocate a request for
+ * @op: operation (REQ_OP_*) and REQ_* flags, e.g. REQ_SYNC.
+ * @flags: BLK_MQ_REQ_* flags, e.g. BLK_MQ_REQ_NOWAIT.
+ */
+struct request *blk_get_request_flags(struct request_queue *q, unsigned int op,
+ blk_mq_req_flags_t flags)
{
struct request *req;
+ WARN_ON_ONCE(op & REQ_NOWAIT);
+ WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PREEMPT));
+
if (q->mq_ops) {
- req = blk_mq_alloc_request(q, op,
- (gfp_mask & __GFP_DIRECT_RECLAIM) ?
- 0 : BLK_MQ_REQ_NOWAIT);
+ req = blk_mq_alloc_request(q, op, flags);
if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn)
q->mq_ops->initialize_rq_fn(req);
} else {
- req = blk_old_get_request(q, op, gfp_mask);
+ req = blk_old_get_request(q, op, flags);
if (!IS_ERR(req) && q->initialize_rq_fn)
q->initialize_rq_fn(req);
}
return req;
}
+EXPORT_SYMBOL(blk_get_request_flags);
+
+struct request *blk_get_request(struct request_queue *q, unsigned int op,
+ gfp_t gfp_mask)
+{
+ return blk_get_request_flags(q, op, gfp_mask & __GFP_DIRECT_RECLAIM ?
+ 0 : BLK_MQ_REQ_NOWAIT);
+}
EXPORT_SYMBOL(blk_get_request);
/**
@@ -1576,6 +1664,7 @@ void __blk_put_request(struct request_queue *q, struct request *req)
blk_free_request(rl, req);
freed_request(rl, sync, rq_flags);
blk_put_rl(rl);
+ blk_queue_exit(q);
}
}
EXPORT_SYMBOL_GPL(__blk_put_request);
@@ -1857,8 +1946,10 @@ get_rq:
* Grab a free request. This is might sleep but can not fail.
* Returns with the queue unlocked.
*/
- req = get_request(q, bio->bi_opf, bio, GFP_NOIO);
+ blk_queue_enter_live(q);
+ req = get_request(q, bio->bi_opf, bio, 0);
if (IS_ERR(req)) {
+ blk_queue_exit(q);
__wbt_done(q->rq_wb, wb_acct);
if (PTR_ERR(req) == -ENOMEM)
bio->bi_status = BLK_STS_RESOURCE;
@@ -2200,8 +2291,10 @@ blk_qc_t generic_make_request(struct bio *bio)
current->bio_list = bio_list_on_stack;
do {
struct request_queue *q = bio->bi_disk->queue;
+ blk_mq_req_flags_t flags = bio->bi_opf & REQ_NOWAIT ?
+ BLK_MQ_REQ_NOWAIT : 0;
- if (likely(blk_queue_enter(q, bio->bi_opf & REQ_NOWAIT) == 0)) {
+ if (likely(blk_queue_enter(q, flags) == 0)) {
struct bio_list lower, same;
/* Create a fresh bio_list for all subordinate requests */
@@ -2242,6 +2335,40 @@ out:
EXPORT_SYMBOL(generic_make_request);
/**
+ * direct_make_request - hand a buffer directly to its device driver for I/O
+ * @bio: The bio describing the location in memory and on the device.
+ *
+ * This function behaves like generic_make_request(), but does not protect
+ * against recursion. Must only be used if the called driver is known
+ * to not call generic_make_request (or direct_make_request) again from
+ * its make_request function. (Calling direct_make_request again from
+ * a workqueue is perfectly fine as that doesn't recurse).
+ */
+blk_qc_t direct_make_request(struct bio *bio)
+{
+ struct request_queue *q = bio->bi_disk->queue;
+ bool nowait = bio->bi_opf & REQ_NOWAIT;
+ blk_qc_t ret;
+
+ if (!generic_make_request_checks(bio))
+ return BLK_QC_T_NONE;
+
+ if (unlikely(blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0))) {
+ if (nowait && !blk_queue_dying(q))
+ bio->bi_status = BLK_STS_AGAIN;
+ else
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+ return BLK_QC_T_NONE;
+ }
+
+ ret = q->make_request_fn(q, bio);
+ blk_queue_exit(q);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(direct_make_request);
+
+/**
* submit_bio - submit a bio to the block device layer for I/O
* @bio: The &struct bio which describes the I/O
*
@@ -2285,6 +2412,17 @@ blk_qc_t submit_bio(struct bio *bio)
}
EXPORT_SYMBOL(submit_bio);
+bool blk_poll(struct request_queue *q, blk_qc_t cookie)
+{
+ if (!q->poll_fn || !blk_qc_t_valid(cookie))
+ return false;
+
+ if (current->plug)
+ blk_flush_plug_list(current->plug, false);
+ return q->poll_fn(q, cookie);
+}
+EXPORT_SYMBOL_GPL(blk_poll);
+
/**
* blk_cloned_rq_check_limits - Helper function to check a cloned request
* for new the queue limits
@@ -2350,7 +2488,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
* bypass a potential scheduler on the bottom device for
* insert.
*/
- blk_mq_request_bypass_insert(rq);
+ blk_mq_request_bypass_insert(rq, true);
return BLK_STS_OK;
}
@@ -2464,20 +2602,22 @@ void blk_account_io_done(struct request *req)
* Don't process normal requests when queue is suspended
* or in the process of suspending/resuming
*/
-static struct request *blk_pm_peek_request(struct request_queue *q,
- struct request *rq)
+static bool blk_pm_allow_request(struct request *rq)
{
- if (q->dev && (q->rpm_status == RPM_SUSPENDED ||
- (q->rpm_status != RPM_ACTIVE && !(rq->rq_flags & RQF_PM))))
- return NULL;
- else
- return rq;
+ switch (rq->q->rpm_status) {
+ case RPM_RESUMING:
+ case RPM_SUSPENDING:
+ return rq->rq_flags & RQF_PM;
+ case RPM_SUSPENDED:
+ return false;
+ }
+
+ return true;
}
#else
-static inline struct request *blk_pm_peek_request(struct request_queue *q,
- struct request *rq)
+static bool blk_pm_allow_request(struct request *rq)
{
- return rq;
+ return true;
}
#endif
@@ -2517,6 +2657,48 @@ void blk_account_io_start(struct request *rq, bool new_io)
part_stat_unlock();
}
+static struct request *elv_next_request(struct request_queue *q)
+{
+ struct request *rq;
+ struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
+
+ WARN_ON_ONCE(q->mq_ops);
+
+ while (1) {
+ list_for_each_entry(rq, &q->queue_head, queuelist) {
+ if (blk_pm_allow_request(rq))
+ return rq;
+
+ if (rq->rq_flags & RQF_SOFTBARRIER)
+ break;
+ }
+
+ /*
+ * Flush request is running and flush request isn't queueable
+ * in the drive, we can hold the queue till flush request is
+ * finished. Even we don't do this, driver can't dispatch next
+ * requests and will requeue them. And this can improve
+ * throughput too. For example, we have request flush1, write1,
+ * flush 2. flush1 is dispatched, then queue is hold, write1
+ * isn't inserted to queue. After flush1 is finished, flush2
+ * will be dispatched. Since disk cache is already clean,
+ * flush2 will be finished very soon, so looks like flush2 is
+ * folded to flush1.
+ * Since the queue is hold, a flag is set to indicate the queue
+ * should be restarted later. Please see flush_end_io() for
+ * details.
+ */
+ if (fq->flush_pending_idx != fq->flush_running_idx &&
+ !queue_flush_queueable(q)) {
+ fq->flush_queue_delayed = 1;
+ return NULL;
+ }
+ if (unlikely(blk_queue_bypass(q)) ||
+ !q->elevator->type->ops.sq.elevator_dispatch_fn(q, 0))
+ return NULL;
+ }
+}
+
/**
* blk_peek_request - peek at the top of a request queue
* @q: request queue to peek at
@@ -2538,12 +2720,7 @@ struct request *blk_peek_request(struct request_queue *q)
lockdep_assert_held(q->queue_lock);
WARN_ON_ONCE(q->mq_ops);
- while ((rq = __elv_next_request(q)) != NULL) {
-
- rq = blk_pm_peek_request(q, rq);
- if (!rq)
- break;
-
+ while ((rq = elv_next_request(q)) != NULL) {
if (!(rq->rq_flags & RQF_STARTED)) {
/*
* This is the first time the device driver
@@ -2695,6 +2872,27 @@ struct request *blk_fetch_request(struct request_queue *q)
}
EXPORT_SYMBOL(blk_fetch_request);
+/*
+ * Steal bios from a request and add them to a bio list.
+ * The request must not have been partially completed before.
+ */
+void blk_steal_bios(struct bio_list *list, struct request *rq)
+{
+ if (rq->bio) {
+ if (list->tail)
+ list->tail->bi_next = rq->bio;
+ else
+ list->head = rq->bio;
+ list->tail = rq->biotail;
+
+ rq->bio = NULL;
+ rq->biotail = NULL;
+ }
+
+ rq->__data_len = 0;
+}
+EXPORT_SYMBOL_GPL(blk_steal_bios);
+
/**
* blk_update_request - Special helper function for request stacking drivers
* @req: the request being processed