diff options
author | Xuewen Yan <xuewen.yan@unisoc.com> | 2020-12-18 10:27:52 +0100 |
---|---|---|
committer | Peter Zijlstra <peterz@infradead.org> | 2021-01-14 11:20:10 +0100 |
commit | 8c1f560c1ea3f19e22ba356f62680d9d449c9ec2 (patch) | |
tree | 53d6533010f2ce9f22ed9bac1acee2d6b50f999d /kernel | |
parent | sched: Add schedutil overview (diff) | |
download | linux-8c1f560c1ea3f19e22ba356f62680d9d449c9ec2.tar.xz linux-8c1f560c1ea3f19e22ba356f62680d9d449c9ec2.zip |
sched/fair: Avoid stale CPU util_est value for schedutil in task dequeue
CPU (root cfs_rq) estimated utilization (util_est) is currently used in
dequeue_task_fair() to drive frequency selection before it is updated.
with:
CPU_util : rq->cfs.avg.util_avg
CPU_util_est : rq->cfs.avg.util_est
CPU_utilization : max(CPU_util, CPU_util_est)
task_util : p->se.avg.util_avg
task_util_est : p->se.avg.util_est
dequeue_task_fair():
/* (1) CPU_util and task_util update + inform schedutil about
CPU_utilization changes */
for_each_sched_entity() /* 2 loops */
(dequeue_entity() ->) update_load_avg() -> cfs_rq_util_change()
-> cpufreq_update_util() ->...-> sugov_update_[shared\|single]
-> sugov_get_util() -> cpu_util_cfs()
/* (2) CPU_util_est and task_util_est update */
util_est_dequeue()
cpu_util_cfs() uses CPU_utilization which could lead to a false (too
high) utilization value for schedutil in task ramp-down or ramp-up
scenarios during task dequeue.
To mitigate the issue split the util_est update (2) into:
(A) CPU_util_est update in util_est_dequeue()
(B) task_util_est update in util_est_update()
Place (A) before (1) and keep (B) where (2) is. The latter is necessary
since (B) relies on task_util update in (1).
Fixes: 7f65ea42eb00 ("sched/fair: Add util_est on top of PELT")
Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/1608283672-18240-1-git-send-email-xuewen.yan94@gmail.com
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched/fair.c | 43 |
1 files changed, 28 insertions, 15 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 389cb58655c0..40d3ebfb5a48 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3943,6 +3943,22 @@ static inline void util_est_enqueue(struct cfs_rq *cfs_rq, trace_sched_util_est_cfs_tp(cfs_rq); } +static inline void util_est_dequeue(struct cfs_rq *cfs_rq, + struct task_struct *p) +{ + unsigned int enqueued; + + if (!sched_feat(UTIL_EST)) + return; + + /* Update root cfs_rq's estimated utilization */ + enqueued = cfs_rq->avg.util_est.enqueued; + enqueued -= min_t(unsigned int, enqueued, _task_util_est(p)); + WRITE_ONCE(cfs_rq->avg.util_est.enqueued, enqueued); + + trace_sched_util_est_cfs_tp(cfs_rq); +} + /* * Check if a (signed) value is within a specified (unsigned) margin, * based on the observation that: @@ -3956,23 +3972,16 @@ static inline bool within_margin(int value, int margin) return ((unsigned int)(value + margin - 1) < (2 * margin - 1)); } -static void -util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep) +static inline void util_est_update(struct cfs_rq *cfs_rq, + struct task_struct *p, + bool task_sleep) { long last_ewma_diff; struct util_est ue; - int cpu; if (!sched_feat(UTIL_EST)) return; - /* Update root cfs_rq's estimated utilization */ - ue.enqueued = cfs_rq->avg.util_est.enqueued; - ue.enqueued -= min_t(unsigned int, ue.enqueued, _task_util_est(p)); - WRITE_ONCE(cfs_rq->avg.util_est.enqueued, ue.enqueued); - - trace_sched_util_est_cfs_tp(cfs_rq); - /* * Skip update of task's estimated utilization when the task has not * yet completed an activation, e.g. being migrated. @@ -4012,8 +4021,7 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep) * To avoid overestimation of actual task utilization, skip updates if * we cannot grant there is idle time in this CPU. */ - cpu = cpu_of(rq_of(cfs_rq)); - if (task_util(p) > capacity_orig_of(cpu)) + if (task_util(p) > capacity_orig_of(cpu_of(rq_of(cfs_rq)))) return; /* @@ -4096,8 +4104,11 @@ static inline void util_est_enqueue(struct cfs_rq *cfs_rq, struct task_struct *p) {} static inline void -util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, - bool task_sleep) {} +util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p) {} + +static inline void +util_est_update(struct cfs_rq *cfs_rq, struct task_struct *p, + bool task_sleep) {} static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {} #endif /* CONFIG_SMP */ @@ -5609,6 +5620,8 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) int idle_h_nr_running = task_has_idle_policy(p); bool was_sched_idle = sched_idle_rq(rq); + util_est_dequeue(&rq->cfs, p); + for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); dequeue_entity(cfs_rq, se, flags); @@ -5659,7 +5672,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) rq->next_balance = jiffies; dequeue_throttle: - util_est_dequeue(&rq->cfs, p, task_sleep); + util_est_update(&rq->cfs, p, task_sleep); hrtick_update(rq); } |