summaryrefslogtreecommitdiffstats
path: root/block/bfq-iosched.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-08-14 19:23:25 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-14 19:23:25 +0200
commit73ba2fb33c492916853dfe63e3b3163da0be661d (patch)
treec2fda8ca1273744d2e884d24189a15ac1a7d63c2 /block/bfq-iosched.c
parentMerge branch 'l1tf-final' of git://git.kernel.org/pub/scm/linux/kernel/git/ti... (diff)
parentblkcg: Make blkg_root_lookup() work for queues in bypass mode (diff)
downloadlinux-73ba2fb33c492916853dfe63e3b3163da0be661d.tar.xz
linux-73ba2fb33c492916853dfe63e3b3163da0be661d.zip
Merge tag 'for-4.19/block-20180812' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: "First pull request for this merge window, there will also be a followup request with some stragglers. This pull request contains: - Fix for a thundering heard issue in the wbt block code (Anchal Agarwal) - A few NVMe pull requests: * Improved tracepoints (Keith) * Larger inline data support for RDMA (Steve Wise) * RDMA setup/teardown fixes (Sagi) * Effects log suppor for NVMe target (Chaitanya Kulkarni) * Buffered IO suppor for NVMe target (Chaitanya Kulkarni) * TP4004 (ANA) support (Christoph) * Various NVMe fixes - Block io-latency controller support. Much needed support for properly containing block devices. (Josef) - Series improving how we handle sense information on the stack (Kees) - Lightnvm fixes and updates/improvements (Mathias/Javier et al) - Zoned device support for null_blk (Matias) - AIX partition fixes (Mauricio Faria de Oliveira) - DIF checksum code made generic (Max Gurtovoy) - Add support for discard in iostats (Michael Callahan / Tejun) - Set of updates for BFQ (Paolo) - Removal of async write support for bsg (Christoph) - Bio page dirtying and clone fixups (Christoph) - Set of bcache fix/changes (via Coly) - Series improving blk-mq queue setup/teardown speed (Ming) - Series improving merging performance on blk-mq (Ming) - Lots of other fixes and cleanups from a slew of folks" * tag 'for-4.19/block-20180812' of git://git.kernel.dk/linux-block: (190 commits) blkcg: Make blkg_root_lookup() work for queues in bypass mode bcache: fix error setting writeback_rate through sysfs interface null_blk: add lock drop/acquire annotation Blk-throttle: reduce tail io latency when iops limit is enforced block: paride: pd: mark expected switch fall-throughs block: Ensure that a request queue is dissociated from the cgroup controller block: Introduce blk_exit_queue() blkcg: Introduce blkg_root_lookup() block: Remove two superfluous #include directives blk-mq: count the hctx as active before allocating tag block: bvec_nr_vecs() returns value for wrong slab bcache: trivial - remove tailing backslash in macro BTREE_FLAG bcache: make the pr_err statement used for ENOENT only in sysfs_attatch section bcache: set max writeback rate when I/O request is idle bcache: add code comments for bset.c bcache: fix mistaken comments in request.c bcache: fix mistaken code comments in bcache.h bcache: add a comment in super.c bcache: avoid unncessary cache prefetch bch_btree_node_get() bcache: display rate debug parameters to 0 when writeback is not running ...
Diffstat (limited to 'block/bfq-iosched.c')
-rw-r--r--block/bfq-iosched.c131
1 files changed, 110 insertions, 21 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 495b9ddb3355..41d9036b1822 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -634,7 +634,7 @@ static bool bfq_differentiated_weights(struct bfq_data *bfqd)
* The following function returns true if every queue must receive the
* same share of the throughput (this condition is used when deciding
* whether idling may be disabled, see the comments in the function
- * bfq_bfqq_may_idle()).
+ * bfq_better_to_idle()).
*
* Such a scenario occurs when:
* 1) all active queues have the same weight,
@@ -742,8 +742,9 @@ inc_counter:
* See the comments to the function bfq_weights_tree_add() for considerations
* about overhead.
*/
-void bfq_weights_tree_remove(struct bfq_data *bfqd, struct bfq_entity *entity,
- struct rb_root *root)
+void __bfq_weights_tree_remove(struct bfq_data *bfqd,
+ struct bfq_entity *entity,
+ struct rb_root *root)
{
if (!entity->weight_counter)
return;
@@ -760,6 +761,43 @@ reset_entity_pointer:
}
/*
+ * Invoke __bfq_weights_tree_remove on bfqq and all its inactive
+ * parent entities.
+ */
+void bfq_weights_tree_remove(struct bfq_data *bfqd,
+ struct bfq_queue *bfqq)
+{
+ struct bfq_entity *entity = bfqq->entity.parent;
+
+ __bfq_weights_tree_remove(bfqd, &bfqq->entity,
+ &bfqd->queue_weights_tree);
+
+ for_each_entity(entity) {
+ struct bfq_sched_data *sd = entity->my_sched_data;
+
+ if (sd->next_in_service || sd->in_service_entity) {
+ /*
+ * entity is still active, because either
+ * next_in_service or in_service_entity is not
+ * NULL (see the comments on the definition of
+ * next_in_service for details on why
+ * in_service_entity must be checked too).
+ *
+ * As a consequence, the weight of entity is
+ * not to be removed. In addition, if entity
+ * is active, then its parent entities are
+ * active as well, and thus their weights are
+ * not to be removed either. In the end, this
+ * loop must stop here.
+ */
+ break;
+ }
+ __bfq_weights_tree_remove(bfqd, entity,
+ &bfqd->group_weights_tree);
+ }
+}
+
+/*
* Return expired entry, or NULL to just start from scratch in rbtree.
*/
static struct request *bfq_check_fifo(struct bfq_queue *bfqq,
@@ -1344,18 +1382,30 @@ static bool bfq_bfqq_update_budg_for_activation(struct bfq_data *bfqd,
* remain unchanged after such an expiration, and the
* following statement therefore assigns to
* entity->budget the remaining budget on such an
- * expiration. For clarity, entity->service is not
- * updated on expiration in any case, and, in normal
- * operation, is reset only when bfqq is selected for
- * service (see bfq_get_next_queue).
+ * expiration.
*/
entity->budget = min_t(unsigned long,
bfq_bfqq_budget_left(bfqq),
bfqq->max_budget);
+ /*
+ * At this point, we have used entity->service to get
+ * the budget left (needed for updating
+ * entity->budget). Thus we finally can, and have to,
+ * reset entity->service. The latter must be reset
+ * because bfqq would otherwise be charged again for
+ * the service it has received during its previous
+ * service slot(s).
+ */
+ entity->service = 0;
+
return true;
}
+ /*
+ * We can finally complete expiration, by setting service to 0.
+ */
+ entity->service = 0;
entity->budget = max_t(unsigned long, bfqq->max_budget,
bfq_serv_to_charge(bfqq->next_rq, bfqq));
bfq_clear_bfqq_non_blocking_wait_rq(bfqq);
@@ -3233,11 +3283,21 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
ref = bfqq->ref;
__bfq_bfqq_expire(bfqd, bfqq);
+ if (ref == 1) /* bfqq is gone, no more actions on it */
+ return;
+
/* mark bfqq as waiting a request only if a bic still points to it */
- if (ref > 1 && !bfq_bfqq_busy(bfqq) &&
+ if (!bfq_bfqq_busy(bfqq) &&
reason != BFQQE_BUDGET_TIMEOUT &&
- reason != BFQQE_BUDGET_EXHAUSTED)
+ reason != BFQQE_BUDGET_EXHAUSTED) {
bfq_mark_bfqq_non_blocking_wait_rq(bfqq);
+ /*
+ * Not setting service to 0, because, if the next rq
+ * arrives in time, the queue will go on receiving
+ * service with this same budget (as if it never expired)
+ */
+ } else
+ entity->service = 0;
}
/*
@@ -3295,7 +3355,7 @@ static bool bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq)
* issues taken into account are not trivial. We discuss these issues
* individually while introducing the variables.
*/
-static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+static bool bfq_better_to_idle(struct bfq_queue *bfqq)
{
struct bfq_data *bfqd = bfqq->bfqd;
bool rot_without_queueing =
@@ -3528,19 +3588,19 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
}
/*
- * If the in-service queue is empty but the function bfq_bfqq_may_idle
+ * If the in-service queue is empty but the function bfq_better_to_idle
* returns true, then:
* 1) the queue must remain in service and cannot be expired, and
* 2) the device must be idled to wait for the possible arrival of a new
* request for the queue.
- * See the comments on the function bfq_bfqq_may_idle for the reasons
+ * See the comments on the function bfq_better_to_idle for the reasons
* why performing device idling is the best choice to boost the throughput
- * and preserve service guarantees when bfq_bfqq_may_idle itself
+ * and preserve service guarantees when bfq_better_to_idle itself
* returns true.
*/
static bool bfq_bfqq_must_idle(struct bfq_queue *bfqq)
{
- return RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_may_idle(bfqq);
+ return RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_better_to_idle(bfqq);
}
/*
@@ -3559,8 +3619,14 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue");
+ /*
+ * Do not expire bfqq for budget timeout if bfqq may be about
+ * to enjoy device idling. The reason why, in this case, we
+ * prevent bfqq from expiring is the same as in the comments
+ * on the case where bfq_bfqq_must_idle() returns true, in
+ * bfq_completed_request().
+ */
if (bfq_may_expire_for_budg_timeout(bfqq) &&
- !bfq_bfqq_wait_request(bfqq) &&
!bfq_bfqq_must_idle(bfqq))
goto expire;
@@ -3620,7 +3686,7 @@ check_queue:
* may idle after their completion, then keep it anyway.
*/
if (bfq_bfqq_wait_request(bfqq) ||
- (bfqq->dispatched != 0 && bfq_bfqq_may_idle(bfqq))) {
+ (bfqq->dispatched != 0 && bfq_better_to_idle(bfqq))) {
bfqq = NULL;
goto keep_queue;
}
@@ -4582,8 +4648,7 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
*/
bfqq->budget_timeout = jiffies;
- bfq_weights_tree_remove(bfqd, &bfqq->entity,
- &bfqd->queue_weights_tree);
+ bfq_weights_tree_remove(bfqd, bfqq);
}
now_ns = ktime_get_ns();
@@ -4637,15 +4702,39 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
* or if we want to idle in case it has no pending requests.
*/
if (bfqd->in_service_queue == bfqq) {
- if (bfqq->dispatched == 0 && bfq_bfqq_must_idle(bfqq)) {
- bfq_arm_slice_timer(bfqd);
+ if (bfq_bfqq_must_idle(bfqq)) {
+ if (bfqq->dispatched == 0)
+ bfq_arm_slice_timer(bfqd);
+ /*
+ * If we get here, we do not expire bfqq, even
+ * if bfqq was in budget timeout or had no
+ * more requests (as controlled in the next
+ * conditional instructions). The reason for
+ * not expiring bfqq is as follows.
+ *
+ * Here bfqq->dispatched > 0 holds, but
+ * bfq_bfqq_must_idle() returned true. This
+ * implies that, even if no request arrives
+ * for bfqq before bfqq->dispatched reaches 0,
+ * bfqq will, however, not be expired on the
+ * completion event that causes bfqq->dispatch
+ * to reach zero. In contrast, on this event,
+ * bfqq will start enjoying device idling
+ * (I/O-dispatch plugging).
+ *
+ * But, if we expired bfqq here, bfqq would
+ * not have the chance to enjoy device idling
+ * when bfqq->dispatched finally reaches
+ * zero. This would expose bfqq to violation
+ * of its reserved service guarantees.
+ */
return;
} else if (bfq_may_expire_for_budg_timeout(bfqq))
bfq_bfqq_expire(bfqd, bfqq, false,
BFQQE_BUDGET_TIMEOUT);
else if (RB_EMPTY_ROOT(&bfqq->sort_list) &&
(bfqq->dispatched == 0 ||
- !bfq_bfqq_may_idle(bfqq)))
+ !bfq_better_to_idle(bfqq)))
bfq_bfqq_expire(bfqd, bfqq, false,
BFQQE_NO_MORE_REQUESTS);
}