diff options
author | Peter Zijlstra <peterz@infradead.org> | 2024-05-31 15:49:40 +0200 |
---|---|---|
committer | Peter Zijlstra <peterz@infradead.org> | 2024-08-17 11:06:45 +0200 |
commit | 82e9d0456e06cebe2c89f3c73cdbc9e3805e9437 (patch) | |
tree | e13b9ee47e5d1ad45104110d714bbe52e3be612e | |
parent | sched/eevdf: Fixup PELT vs DELAYED_DEQUEUE (diff) | |
download | linux-82e9d0456e06cebe2c89f3c73cdbc9e3805e9437.tar.xz linux-82e9d0456e06cebe2c89f3c73cdbc9e3805e9437.zip |
sched/fair: Avoid re-setting virtual deadline on 'migrations'
During OSPM24 Youssef noted that migrations are re-setting the virtual
deadline. Notably everything that does a dequeue-enqueue, like setting
nice, changing preferred numa-node, and a myriad of other random crap,
will cause this to happen.
This shouldn't be. Preserve the relative virtual deadline across such
dequeue/enqueue cycles.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Tested-by: Valentin Schneider <vschneid@redhat.com>
Link: https://lkml.kernel.org/r/20240727105030.625119246@infradead.org
-rw-r--r-- | include/linux/sched.h | 6 | ||||
-rw-r--r-- | kernel/sched/fair.c | 23 | ||||
-rw-r--r-- | kernel/sched/features.h | 4 |
3 files changed, 26 insertions, 7 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 8a3a389bd623..d25e1cfd5766 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -544,8 +544,10 @@ struct sched_entity { u64 min_vruntime; struct list_head group_node; - unsigned int on_rq; - unsigned int sched_delayed; + unsigned char on_rq; + unsigned char sched_delayed; + unsigned char rel_deadline; + /* hole */ u64 exec_start; u64 sum_exec_runtime; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 0eb1bbf7f269..fef0e1f26cd8 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5270,6 +5270,12 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) se->vruntime = vruntime - lag; + if (sched_feat(PLACE_REL_DEADLINE) && se->rel_deadline) { + se->deadline += se->vruntime; + se->rel_deadline = 0; + return; + } + /* * When joining the competition; the existing tasks will be, * on average, halfway through their slice, as such start tasks @@ -5382,23 +5388,24 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); static bool dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) { + bool sleep = flags & DEQUEUE_SLEEP; + update_curr(cfs_rq); if (flags & DEQUEUE_DELAYED) { SCHED_WARN_ON(!se->sched_delayed); } else { - bool sleep = flags & DEQUEUE_SLEEP; - + bool delay = sleep; /* * DELAY_DEQUEUE relies on spurious wakeups, special task * states must not suffer spurious wakeups, excempt them. */ if (flags & DEQUEUE_SPECIAL) - sleep = false; + delay = false; - SCHED_WARN_ON(sleep && se->sched_delayed); + SCHED_WARN_ON(delay && se->sched_delayed); - if (sched_feat(DELAY_DEQUEUE) && sleep && + if (sched_feat(DELAY_DEQUEUE) && delay && !entity_eligible(cfs_rq, se)) { if (cfs_rq->next == se) cfs_rq->next = NULL; @@ -5429,6 +5436,11 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) clear_buddies(cfs_rq, se); update_entity_lag(cfs_rq, se); + if (sched_feat(PLACE_REL_DEADLINE) && !sleep) { + se->deadline -= se->vruntime; + se->rel_deadline = 1; + } + if (se != cfs_rq->curr) __dequeue_entity(cfs_rq, se); se->on_rq = 0; @@ -12992,6 +13004,7 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p) if (p->se.sched_delayed) { dequeue_task(rq, p, DEQUEUE_NOCLOCK | DEQUEUE_SLEEP); p->se.sched_delayed = 0; + p->se.rel_deadline = 0; if (sched_feat(DELAY_ZERO) && p->se.vlag > 0) p->se.vlag = 0; } diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 7fdeb5576188..caa4d7221d52 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -10,6 +10,10 @@ SCHED_FEAT(PLACE_LAG, true) */ SCHED_FEAT(PLACE_DEADLINE_INITIAL, true) /* + * Preserve relative virtual deadline on 'migration'. + */ +SCHED_FEAT(PLACE_REL_DEADLINE, true) +/* * Inhibit (wakeup) preemption until the current task has either matched the * 0-lag point or until is has exhausted it's slice. */ |