Merge branch 'for-4.16/block' of git://git.kernel.dk/linux-block

Pull block updates from Jens Axboe: "This is the main pull request for block IO related changes for the 4.16 kernel. Nothing major in this pull request, but a good amount of improvements and fixes all over the map. This contains: - BFQ improvements, fixes, and cleanups from Angelo, Chiara, and Paolo. - Support for SMR zones for deadline and mq-deadline from Damien and Christoph. - Set of fixes for bcache by way of Michael Lyle, including fixes from himself, Kent, Rui, Tang, and Coly. - Series from Matias for lightnvm with fixes from Hans Holmberg, Javier, and Matias. Mostly centered around pblk, and the removing rrpc 1.2 in preparation for supporting 2.0. - A couple of NVMe pull requests from Christoph. Nothing major in here, just fixes and cleanups, and support for command tracing from Johannes. - Support for blk-throttle for tracking reads and writes separately. From Joseph Qi. A few cleanups/fixes also for blk-throttle from Weiping. - Series from Mike Snitzer that enables dm to register its queue more logically, something that's alwways been problematic on dm since it's a stacked device. - Series from Ming cleaning up some of the bio accessor use, in preparation for supporting multipage bvecs. - Various fixes from Ming closing up holes around queue mapping and quiescing. - BSD partition fix from Richard Narron, fixing a problem where we can't mount newer (10/11) FreeBSD partitions. - Series from Tejun reworking blk-mq timeout handling. The previous scheme relied on atomic bits, but it had races where we would think a request had timed out if it to reused at the wrong time. - null_blk now supports faking timeouts, to enable us to better exercise and test that functionality separately. From me. - Kill the separate atomic poll bit in the request struct. After this, we don't use the atomic bits on blk-mq anymore at all. From me. - sgl_alloc/free helpers from Bart. - Heavily contended tag case scalability improvement from me. - Various little fixes and cleanups from Arnd, Bart, Corentin, Douglas, Eryu, Goldwyn, and myself" * 'for-4.16/block' of git://git.kernel.dk/linux-block: (186 commits) block: remove smart1,2.h nvme: add tracepoint for nvme_complete_rq nvme: add tracepoint for nvme_setup_cmd nvme-pci: introduce RECONNECTING state to mark initializing procedure nvme-rdma: remove redundant boolean for inline_data nvme: don't free uuid pointer before printing it nvme-pci: Suspend queues after deleting them bsg: use pr_debug instead of hand crafted macros blk-mq-debugfs: don't allow write on attributes with seq_operations set nvme-pci: Fix queue double allocations block: Set BIO_TRACE_COMPLETION on new bio during split blk-throttle: use queue_is_rq_based block: Remove kblockd_schedule_delayed_work{,_on}() blk-mq: Avoid that blk_mq_delay_run_hw_queue() introduces unintended delays blk-mq: Rename blk_mq_request_direct_issue() into blk_mq_request_issue_directly() lib/scatterlist: Fix chaining support in sgl_alloc_order() blk-throttle: track read and write request individually block: add bdev_read_only() checks to common helpers block: fail op_is_write() requests to read-only partitions blk-throttle: export io_serviced_recursive, io_service_bytes_recursive ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2018-01-29 20:51:49 +0100
committer: Linus Torvalds <torvalds@linux-foundation.org> 2018-01-29 20:51:49 +0100
commit: 0a4b6e2f80aad46fb55a5cf7b1664c0aef030ee0 (patch)
tree: cefccd67dc1f27bb45830f6b8065dd4a1c05e83b /block/deadline-iosched.c
parent: Merge tag 'edac_for_4.16' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp (diff)
parent: block: remove smart1,2.h (diff)
download: linux-0a4b6e2f80aad46fb55a5cf7b1664c0aef030ee0.tar.xz
linux-0a4b6e2f80aad46fb55a5cf7b1664c0aef030ee0.zip
1 files changed, 103 insertions, 11 deletions
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index b83f77460d28..9de9f156e203 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -50,8 +50,6 @@ struct deadline_data {
 	int front_merges;
 };
 
-static void deadline_move_request(struct deadline_data *, struct request *);
-
 static inline struct rb_root *
 deadline_rb_root(struct deadline_data *dd, struct request *rq)
 {
@@ -100,6 +98,12 @@ deadline_add_request(struct request_queue *q, struct request *rq)
 	struct deadline_data *dd = q->elevator->elevator_data;
 	const int data_dir = rq_data_dir(rq);
 
+	/*
+	 * This may be a requeue of a write request that has locked its
+	 * target zone. If it is the case, this releases the zone lock.
+	 */
+	blk_req_zone_write_unlock(rq);
+
 	deadline_add_rq_rb(dd, rq);
 
 	/*
@@ -190,6 +194,12 @@ deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq)
 {
 	struct request_queue *q = rq->q;
 
+	/*
+	 * For a zoned block device, write requests must write lock their
+	 * target zone.
+	 */
+	blk_req_zone_write_lock(rq);
+
 	deadline_remove_request(q, rq);
 	elv_dispatch_add_tail(q, rq);
 }
@@ -231,6 +241,69 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
 }
 
 /*
+ * For the specified data direction, return the next request to dispatch using
+ * arrival ordered lists.
+ */
+static struct request *
+deadline_fifo_request(struct deadline_data *dd, int data_dir)
+{
+	struct request *rq;
+
+	if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE))
+		return NULL;
+
+	if (list_empty(&dd->fifo_list[data_dir]))
+		return NULL;
+
+	rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
+	if (data_dir == READ || !blk_queue_is_zoned(rq->q))
+		return rq;
+
+	/*
+	 * Look for a write request that can be dispatched, that is one with
+	 * an unlocked target zone.
+	 */
+	list_for_each_entry(rq, &dd->fifo_list[WRITE], queuelist) {
+		if (blk_req_can_dispatch_to_zone(rq))
+			return rq;
+	}
+
+	return NULL;
+}
+
+/*
+ * For the specified data direction, return the next request to dispatch using
+ * sector position sorted lists.
+ */
+static struct request *
+deadline_next_request(struct deadline_data *dd, int data_dir)
+{
+	struct request *rq;
+
+	if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE))
+		return NULL;
+
+	rq = dd->next_rq[data_dir];
+	if (!rq)
+		return NULL;
+
+	if (data_dir == READ || !blk_queue_is_zoned(rq->q))
+		return rq;
+
+	/*
+	 * Look for a write request that can be dispatched, that is one with
+	 * an unlocked target zone.
+	 */
+	while (rq) {
+		if (blk_req_can_dispatch_to_zone(rq))
+			return rq;
+		rq = deadline_latter_request(rq);
+	}
+
+	return NULL;
+}
+
+/*
  * deadline_dispatch_requests selects the best request according to
  * read/write expire, fifo_batch, etc
  */
@@ -239,16 +312,15 @@ static int deadline_dispatch_requests(struct request_queue *q, int force)
 	struct deadline_data *dd = q->elevator->elevator_data;
 	const int reads = !list_empty(&dd->fifo_list[READ]);
 	const int writes = !list_empty(&dd->fifo_list[WRITE]);
-	struct request *rq;
+	struct request *rq, *next_rq;
 	int data_dir;
 
 	/*
 	 * batches are currently reads XOR writes
 	 */
-	if (dd->next_rq[WRITE])
-		rq = dd->next_rq[WRITE];
-	else
-		rq = dd->next_rq[READ];
+	rq = deadline_next_request(dd, WRITE);
+	if (!rq)
+		rq = deadline_next_request(dd, READ);
 
 	if (rq && dd->batching < dd->fifo_batch)
 		/* we have a next request are still entitled to batch */
@@ -262,7 +334,8 @@ static int deadline_dispatch_requests(struct request_queue *q, int force)
 	if (reads) {
 		BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ]));
 
-		if (writes && (dd->starved++ >= dd->writes_starved))
+		if (deadline_fifo_request(dd, WRITE) &&
+		    (dd->starved++ >= dd->writes_starved))
 			goto dispatch_writes;
 
 		data_dir = READ;
@@ -291,21 +364,29 @@ dispatch_find_request:
 	/*
 	 * we are not running a batch, find best request for selected data_dir
 	 */
-	if (deadline_check_fifo(dd, data_dir) || !dd->next_rq[data_dir]) {
+	next_rq = deadline_next_request(dd, data_dir);
+	if (deadline_check_fifo(dd, data_dir) || !next_rq) {
 		/*
 		 * A deadline has expired, the last request was in the other
 		 * direction, or we have run out of higher-sectored requests.
 		 * Start again from the request with the earliest expiry time.
 		 */
-		rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
+		rq = deadline_fifo_request(dd, data_dir);
 	} else {
 		/*
 		 * The last req was the same dir and we have a next request in
 		 * sort order. No expired requests so continue on from here.
 		 */
-		rq = dd->next_rq[data_dir];
+		rq = next_rq;
 	}
 
+	/*
+	 * For a zoned block device, if we only have writes queued and none of
+	 * them can be dispatched, rq will be NULL.
+	 */
+	if (!rq)
+		return 0;
+
 	dd->batching = 0;
 
 dispatch_request:
@@ -318,6 +399,16 @@ dispatch_request:
 	return 1;
 }
 
+/*
+ * For zoned block devices, write unlock the target zone of completed
+ * write requests.
+ */
+static void
+deadline_completed_request(struct request_queue *q, struct request *rq)
+{
+	blk_req_zone_write_unlock(rq);
+}
+
 static void deadline_exit_queue(struct elevator_queue *e)
 {
 	struct deadline_data *dd = e->elevator_data;
@@ -439,6 +530,7 @@ static struct elevator_type iosched_deadline = {
 		.elevator_merged_fn =		deadline_merged_request,
 		.elevator_merge_req_fn =	deadline_merged_requests,
 		.elevator_dispatch_fn =		deadline_dispatch_requests,
+		.elevator_completed_req_fn =	deadline_completed_request,
 		.elevator_add_req_fn =		deadline_add_request,
 		.elevator_former_req_fn =	elv_rb_former_request,
 		.elevator_latter_req_fn =	elv_rb_latter_request,
author	Linus Torvalds <torvalds@linux-foundation.org>	2018-01-29 20:51:49 +0100
committer	Linus Torvalds <torvalds@linux-foundation.org>	2018-01-29 20:51:49 +0100
commit	0a4b6e2f80aad46fb55a5cf7b1664c0aef030ee0 (patch)
tree	cefccd67dc1f27bb45830f6b8065dd4a1c05e83b /block/deadline-iosched.c
parent	Merge tag 'edac_for_4.16' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp (diff)
parent	block: remove smart1,2.h (diff)
download	linux-0a4b6e2f80aad46fb55a5cf7b1664c0aef030ee0.tar.xz linux-0a4b6e2f80aad46fb55a5cf7b1664c0aef030ee0.zip