diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/bfq-iosched.c | 76 | ||||
-rw-r--r-- | block/bfq-iosched.h | 51 | ||||
-rw-r--r-- | block/bfq-wf2q.c | 5 | ||||
-rw-r--r-- | block/blk-mq.c | 7 |
4 files changed, 111 insertions, 28 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 3d1f319fe977..cd307767a134 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -638,7 +638,7 @@ static bool bfq_varied_queue_weights_or_active_groups(struct bfq_data *bfqd) bfqd->queue_weights_tree.rb_node->rb_right) #ifdef CONFIG_BFQ_GROUP_IOSCHED ) || - (bfqd->num_active_groups > 0 + (bfqd->num_groups_with_pending_reqs > 0 #endif ); } @@ -802,7 +802,21 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd, */ break; } - bfqd->num_active_groups--; + + /* + * The decrement of num_groups_with_pending_reqs is + * not performed immediately upon the deactivation of + * entity, but it is delayed to when it also happens + * that the first leaf descendant bfqq of entity gets + * all its pending requests completed. The following + * instructions perform this delayed decrement, if + * needed. See the comments on + * num_groups_with_pending_reqs for details. + */ + if (entity->in_groups_with_pending_reqs) { + entity->in_groups_with_pending_reqs = false; + bfqd->num_groups_with_pending_reqs--; + } } } @@ -3529,27 +3543,44 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq) * fact, if there are active groups, then, for condition (i) * to become false, it is enough that an active group contains * more active processes or sub-groups than some other active - * group. We address this issue with the following bi-modal - * behavior, implemented in the function + * group. More precisely, for condition (i) to hold because of + * such a group, it is not even necessary that the group is + * (still) active: it is sufficient that, even if the group + * has become inactive, some of its descendant processes still + * have some request already dispatched but still waiting for + * completion. In fact, requests have still to be guaranteed + * their share of the throughput even after being + * dispatched. In this respect, it is easy to show that, if a + * group frequently becomes inactive while still having + * in-flight requests, and if, when this happens, the group is + * not considered in the calculation of whether the scenario + * is asymmetric, then the group may fail to be guaranteed its + * fair share of the throughput (basically because idling may + * not be performed for the descendant processes of the group, + * but it had to be). We address this issue with the + * following bi-modal behavior, implemented in the function * bfq_symmetric_scenario(). * - * If there are active groups, then the scenario is tagged as + * If there are groups with requests waiting for completion + * (as commented above, some of these groups may even be + * already inactive), then the scenario is tagged as * asymmetric, conservatively, without checking any of the * conditions (i) and (ii). So the device is idled for bfqq. * This behavior matches also the fact that groups are created - * exactly if controlling I/O (to preserve bandwidth and - * latency guarantees) is a primary concern. + * exactly if controlling I/O is a primary concern (to + * preserve bandwidth and latency guarantees). * - * On the opposite end, if there are no active groups, then - * only condition (i) is actually controlled, i.e., provided - * that condition (i) holds, idling is not performed, - * regardless of whether condition (ii) holds. In other words, - * only if condition (i) does not hold, then idling is - * allowed, and the device tends to be prevented from queueing - * many requests, possibly of several processes. Since there - * are no active groups, then, to control condition (i) it is - * enough to check whether all active queues have the same - * weight. + * On the opposite end, if there are no groups with requests + * waiting for completion, then only condition (i) is actually + * controlled, i.e., provided that condition (i) holds, idling + * is not performed, regardless of whether condition (ii) + * holds. In other words, only if condition (i) does not hold, + * then idling is allowed, and the device tends to be + * prevented from queueing many requests, possibly of several + * processes. Since there are no groups with requests waiting + * for completion, then, to control condition (i) it is enough + * to check just whether all the queues with requests waiting + * for completion also have the same weight. * * Not checking condition (ii) evidently exposes bfqq to the * risk of getting less throughput than its fair share. @@ -3607,10 +3638,11 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq) * bfqq is weight-raised is checked explicitly here. More * precisely, the compound condition below takes into account * also the fact that, even if bfqq is being weight-raised, - * the scenario is still symmetric if all active queues happen - * to be weight-raised. Actually, we should be even more - * precise here, and differentiate between interactive weight - * raising and soft real-time weight raising. + * the scenario is still symmetric if all queues with requests + * waiting for completion happen to be + * weight-raised. Actually, we should be even more precise + * here, and differentiate between interactive weight raising + * and soft real-time weight raising. * * As a side note, it is worth considering that the above * device-idling countermeasures may however fail in the @@ -5417,7 +5449,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) bfqd->idle_slice_timer.function = bfq_idle_slice_timer; bfqd->queue_weights_tree = RB_ROOT; - bfqd->num_active_groups = 0; + bfqd->num_groups_with_pending_reqs = 0; INIT_LIST_HEAD(&bfqd->active_list); INIT_LIST_HEAD(&bfqd->idle_list); diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 77651d817ecd..0b02bf302de0 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -196,6 +196,9 @@ struct bfq_entity { /* flag, set to request a weight, ioprio or ioprio_class change */ int prio_changed; + + /* flag, set if the entity is counted in groups_with_pending_reqs */ + bool in_groups_with_pending_reqs; }; struct bfq_group; @@ -448,10 +451,54 @@ struct bfq_data { * bfq_weights_tree_[add|remove] for further details). */ struct rb_root queue_weights_tree; + /* - * number of groups with requests still waiting for completion + * Number of groups with at least one descendant process that + * has at least one request waiting for completion. Note that + * this accounts for also requests already dispatched, but not + * yet completed. Therefore this number of groups may differ + * (be larger) than the number of active groups, as a group is + * considered active only if its corresponding entity has + * descendant queues with at least one request queued. This + * number is used to decide whether a scenario is symmetric. + * For a detailed explanation see comments on the computation + * of the variable asymmetric_scenario in the function + * bfq_better_to_idle(). + * + * However, it is hard to compute this number exactly, for + * groups with multiple descendant processes. Consider a group + * that is inactive, i.e., that has no descendant process with + * pending I/O inside BFQ queues. Then suppose that + * num_groups_with_pending_reqs is still accounting for this + * group, because the group has descendant processes with some + * I/O request still in flight. num_groups_with_pending_reqs + * should be decremented when the in-flight request of the + * last descendant process is finally completed (assuming that + * nothing else has changed for the group in the meantime, in + * terms of composition of the group and active/inactive state of child + * groups and processes). To accomplish this, an additional + * pending-request counter must be added to entities, and must + * be updated correctly. To avoid this additional field and operations, + * we resort to the following tradeoff between simplicity and + * accuracy: for an inactive group that is still counted in + * num_groups_with_pending_reqs, we decrement + * num_groups_with_pending_reqs when the first descendant + * process of the group remains with no request waiting for + * completion. + * + * Even this simpler decrement strategy requires a little + * carefulness: to avoid multiple decrements, we flag a group, + * more precisely an entity representing a group, as still + * counted in num_groups_with_pending_reqs when it becomes + * inactive. Then, when the first descendant queue of the + * entity remains with no request waiting for completion, + * num_groups_with_pending_reqs is decremented, and this flag + * is reset. After this flag is reset for the entity, + * num_groups_with_pending_reqs won't be decremented any + * longer in case a new descendant queue of the entity remains + * with no request waiting for completion. */ - unsigned int num_active_groups; + unsigned int num_groups_with_pending_reqs; /* * Number of bfq_queues containing requests (including the diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index 4b0d5fb69160..63e0f12be7c9 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -1012,7 +1012,10 @@ static void __bfq_activate_entity(struct bfq_entity *entity, container_of(entity, struct bfq_group, entity); struct bfq_data *bfqd = bfqg->bfqd; - bfqd->num_active_groups++; + if (!entity->in_groups_with_pending_reqs) { + entity->in_groups_with_pending_reqs = true; + bfqd->num_groups_with_pending_reqs++; + } } #endif diff --git a/block/blk-mq.c b/block/blk-mq.c index 7f478ae288af..b645275dfe5f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1831,7 +1831,7 @@ insert: if (bypass_insert) return BLK_STS_RESOURCE; - blk_mq_sched_insert_request(rq, false, run_queue, false); + blk_mq_request_bypass_insert(rq, run_queue); return BLK_STS_OK; } @@ -1847,7 +1847,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false, true); if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) - blk_mq_sched_insert_request(rq, false, true, false); + blk_mq_request_bypass_insert(rq, true); else if (ret != BLK_STS_OK) blk_mq_end_request(rq, ret); @@ -1881,7 +1881,8 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, if (ret != BLK_STS_OK) { if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) { - list_add(&rq->queuelist, list); + blk_mq_request_bypass_insert(rq, + list_empty(list)); break; } blk_mq_end_request(rq, ret); |