diff options
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r-- | kernel/sched/fair.c | 122 |
1 files changed, 116 insertions, 6 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3582117e1580..22b59a7facd2 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3873,6 +3873,113 @@ static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq) static int idle_balance(struct rq *this_rq, struct rq_flags *rf); +static inline unsigned long task_util(struct task_struct *p) +{ + return READ_ONCE(p->se.avg.util_avg); +} + +static inline unsigned long _task_util_est(struct task_struct *p) +{ + struct util_est ue = READ_ONCE(p->se.avg.util_est); + + return max(ue.ewma, ue.enqueued); +} + +static inline unsigned long task_util_est(struct task_struct *p) +{ + return max(task_util(p), _task_util_est(p)); +} + +static inline void util_est_enqueue(struct cfs_rq *cfs_rq, + struct task_struct *p) +{ + unsigned int enqueued; + + if (!sched_feat(UTIL_EST)) + return; + + /* Update root cfs_rq's estimated utilization */ + enqueued = cfs_rq->avg.util_est.enqueued; + enqueued += _task_util_est(p); + WRITE_ONCE(cfs_rq->avg.util_est.enqueued, enqueued); +} + +/* + * Check if a (signed) value is within a specified (unsigned) margin, + * based on the observation that: + * + * abs(x) < y := (unsigned)(x + y - 1) < (2 * y - 1) + * + * NOTE: this only works when value + maring < INT_MAX. + */ +static inline bool within_margin(int value, int margin) +{ + return ((unsigned int)(value + margin - 1) < (2 * margin - 1)); +} + +static void +util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep) +{ + long last_ewma_diff; + struct util_est ue; + + if (!sched_feat(UTIL_EST)) + return; + + /* + * Update root cfs_rq's estimated utilization + * + * If *p is the last task then the root cfs_rq's estimated utilization + * of a CPU is 0 by definition. + */ + ue.enqueued = 0; + if (cfs_rq->nr_running) { + ue.enqueued = cfs_rq->avg.util_est.enqueued; + ue.enqueued -= min_t(unsigned int, ue.enqueued, + _task_util_est(p)); + } + WRITE_ONCE(cfs_rq->avg.util_est.enqueued, ue.enqueued); + + /* + * Skip update of task's estimated utilization when the task has not + * yet completed an activation, e.g. being migrated. + */ + if (!task_sleep) + return; + + /* + * Skip update of task's estimated utilization when its EWMA is + * already ~1% close to its last activation value. + */ + ue = p->se.avg.util_est; + ue.enqueued = task_util(p); + last_ewma_diff = ue.enqueued - ue.ewma; + if (within_margin(last_ewma_diff, (SCHED_CAPACITY_SCALE / 100))) + return; + + /* + * Update Task's estimated utilization + * + * When *p completes an activation we can consolidate another sample + * of the task size. This is done by storing the current PELT value + * as ue.enqueued and by using this value to update the Exponential + * Weighted Moving Average (EWMA): + * + * ewma(t) = w * task_util(p) + (1-w) * ewma(t-1) + * = w * task_util(p) + ewma(t-1) - w * ewma(t-1) + * = w * (task_util(p) - ewma(t-1)) + ewma(t-1) + * = w * ( last_ewma_diff ) + ewma(t-1) + * = w * (last_ewma_diff + ewma(t-1) / w) + * + * Where 'w' is the weight of new samples, which is configured to be + * 0.25, thus making w=1/4 ( >>= UTIL_EST_WEIGHT_SHIFT) + */ + ue.ewma <<= UTIL_EST_WEIGHT_SHIFT; + ue.ewma += last_ewma_diff; + ue.ewma >>= UTIL_EST_WEIGHT_SHIFT; + WRITE_ONCE(p->se.avg.util_est, ue); +} + #else /* CONFIG_SMP */ static inline int @@ -3902,6 +4009,13 @@ static inline int idle_balance(struct rq *rq, struct rq_flags *rf) return 0; } +static inline void +util_est_enqueue(struct cfs_rq *cfs_rq, struct task_struct *p) {} + +static inline void +util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, + bool task_sleep) {} + #endif /* CONFIG_SMP */ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se) @@ -5249,6 +5363,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (!se) add_nr_running(rq, 1); + util_est_enqueue(&rq->cfs, p); hrtick_update(rq); } @@ -5308,6 +5423,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (!se) sub_nr_running(rq, 1); + util_est_dequeue(&rq->cfs, p, task_sleep); hrtick_update(rq); } @@ -5835,7 +5951,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, return target; } -static inline unsigned long task_util(struct task_struct *p); static unsigned long cpu_util_wake(int cpu, struct task_struct *p); static unsigned long capacity_spare_wake(int cpu, struct task_struct *p) @@ -6351,11 +6466,6 @@ static unsigned long cpu_util(int cpu) return (util >= capacity) ? capacity : util; } -static inline unsigned long task_util(struct task_struct *p) -{ - return p->se.avg.util_avg; -} - /* * cpu_util_wake: Compute CPU utilization with any contributions from * the waking task p removed. |