summaryrefslogtreecommitdiffstats
path: root/block/bfq-iosched.h
diff options
context:
space:
mode:
authorPaolo Valente <paolo.valente@linaro.org>2018-12-06 19:18:18 +0100
committerJens Axboe <axboe@kernel.dk>2018-12-07 15:40:07 +0100
commitba7aeae5539c7a7cccc4cf07a2bc61281a93c50e (patch)
tree3c629f7194a500adb395329925ea0ce83c2a28d3 /block/bfq-iosched.h
parentblk-mq: fix corruption with direct issue (diff)
downloadlinux-ba7aeae5539c7a7cccc4cf07a2bc61281a93c50e.tar.xz
linux-ba7aeae5539c7a7cccc4cf07a2bc61281a93c50e.zip
block, bfq: fix decrement of num_active_groups
Since commit '2d29c9f89fcd ("block, bfq: improve asymmetric scenarios detection")', if there are process groups with I/O requests waiting for completion, then BFQ tags the scenario as 'asymmetric'. This detection is needed for preserving service guarantees (for details, see comments on the computation * of the variable asymmetric_scenario in the function bfq_better_to_idle). Unfortunately, commit '2d29c9f89fcd ("block, bfq: improve asymmetric scenarios detection")' contains an error exactly in the updating of the number of groups with I/O requests waiting for completion: if a group has more than one descendant process, then the above number of groups, which is renamed from num_active_groups to a more appropriate num_groups_with_pending_reqs by this commit, may happen to be wrongly decremented multiple times, namely every time one of the descendant processes gets all its pending I/O requests completed. A correct, complete solution should work as follows. Consider a group that is inactive, i.e., that has no descendant process with pending I/O inside BFQ queues. Then suppose that num_groups_with_pending_reqs is still accounting for this group, because the group still has some descendant process with some I/O request still in flight. num_groups_with_pending_reqs should be decremented when the in-flight request of the last descendant process is finally completed (assuming that nothing else has changed for the group in the meantime, in terms of composition of the group and active/inactive state of child groups and processes). To accomplish this, an additional pending-request counter must be added to entities, and must be updated correctly. To avoid this additional field and operations, this commit resorts to the following tradeoff between simplicity and accuracy: for an inactive group that is still counted in num_groups_with_pending_reqs, this commit decrements num_groups_with_pending_reqs when the first descendant process of the group remains with no request waiting for completion. This simplified scheme provides a fix to the unbalanced decrements introduced by 2d29c9f89fcd. Since this error was also caused by lack of comments on this non-trivial issue, this commit also adds related comments. Fixes: 2d29c9f89fcd ("block, bfq: improve asymmetric scenarios detection") Reported-by: Steven Barrett <steven@liquorix.net> Tested-by: Steven Barrett <steven@liquorix.net> Tested-by: Lucjan Lucjanov <lucjan.lucjanov@gmail.com> Reviewed-by: Federico Motta <federico@willer.it> Signed-off-by: Paolo Valente <paolo.valente@linaro.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block/bfq-iosched.h')
-rw-r--r--block/bfq-iosched.h51
1 files changed, 49 insertions, 2 deletions
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 77651d817ecd..0b02bf302de0 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -196,6 +196,9 @@ struct bfq_entity {
/* flag, set to request a weight, ioprio or ioprio_class change */
int prio_changed;
+
+ /* flag, set if the entity is counted in groups_with_pending_reqs */
+ bool in_groups_with_pending_reqs;
};
struct bfq_group;
@@ -448,10 +451,54 @@ struct bfq_data {
* bfq_weights_tree_[add|remove] for further details).
*/
struct rb_root queue_weights_tree;
+
/*
- * number of groups with requests still waiting for completion
+ * Number of groups with at least one descendant process that
+ * has at least one request waiting for completion. Note that
+ * this accounts for also requests already dispatched, but not
+ * yet completed. Therefore this number of groups may differ
+ * (be larger) than the number of active groups, as a group is
+ * considered active only if its corresponding entity has
+ * descendant queues with at least one request queued. This
+ * number is used to decide whether a scenario is symmetric.
+ * For a detailed explanation see comments on the computation
+ * of the variable asymmetric_scenario in the function
+ * bfq_better_to_idle().
+ *
+ * However, it is hard to compute this number exactly, for
+ * groups with multiple descendant processes. Consider a group
+ * that is inactive, i.e., that has no descendant process with
+ * pending I/O inside BFQ queues. Then suppose that
+ * num_groups_with_pending_reqs is still accounting for this
+ * group, because the group has descendant processes with some
+ * I/O request still in flight. num_groups_with_pending_reqs
+ * should be decremented when the in-flight request of the
+ * last descendant process is finally completed (assuming that
+ * nothing else has changed for the group in the meantime, in
+ * terms of composition of the group and active/inactive state of child
+ * groups and processes). To accomplish this, an additional
+ * pending-request counter must be added to entities, and must
+ * be updated correctly. To avoid this additional field and operations,
+ * we resort to the following tradeoff between simplicity and
+ * accuracy: for an inactive group that is still counted in
+ * num_groups_with_pending_reqs, we decrement
+ * num_groups_with_pending_reqs when the first descendant
+ * process of the group remains with no request waiting for
+ * completion.
+ *
+ * Even this simpler decrement strategy requires a little
+ * carefulness: to avoid multiple decrements, we flag a group,
+ * more precisely an entity representing a group, as still
+ * counted in num_groups_with_pending_reqs when it becomes
+ * inactive. Then, when the first descendant queue of the
+ * entity remains with no request waiting for completion,
+ * num_groups_with_pending_reqs is decremented, and this flag
+ * is reset. After this flag is reset for the entity,
+ * num_groups_with_pending_reqs won't be decremented any
+ * longer in case a new descendant queue of the entity remains
+ * with no request waiting for completion.
*/
- unsigned int num_active_groups;
+ unsigned int num_groups_with_pending_reqs;
/*
* Number of bfq_queues containing requests (including the