diff options
author | Ingo Molnar <mingo@kernel.org> | 2023-01-31 15:01:20 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2023-01-31 15:01:20 +0100 |
commit | 57a30218fa25c469ed507964bbf028b7a064309a (patch) | |
tree | 231c7ab6b08f567077b96ed760aaf4c685c899f3 /kernel/sched | |
parent | KVM: selftests: Fix build of rseq test (diff) | |
parent | Linux 6.2-rc6 (diff) | |
download | linux-57a30218fa25c469ed507964bbf028b7a064309a.tar.xz linux-57a30218fa25c469ed507964bbf028b7a064309a.zip |
Merge tag 'v6.2-rc6' into sched/core, to pick up fixes
Pick up fixes before merging another batch of cpuidle updates.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/core.c | 77 | ||||
-rw-r--r-- | kernel/sched/fair.c | 48 |
2 files changed, 94 insertions, 31 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 03b8529db73f..4580fe3e1d0c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2606,27 +2606,71 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) .user_mask = NULL, .flags = SCA_USER, /* clear the user requested mask */ }; + union cpumask_rcuhead { + cpumask_t cpumask; + struct rcu_head rcu; + }; __do_set_cpus_allowed(p, &ac); - kfree(ac.user_mask); + + /* + * Because this is called with p->pi_lock held, it is not possible + * to use kfree() here (when PREEMPT_RT=y), therefore punt to using + * kfree_rcu(). + */ + kfree_rcu((union cpumask_rcuhead *)ac.user_mask, rcu); +} + +static cpumask_t *alloc_user_cpus_ptr(int node) +{ + /* + * See do_set_cpus_allowed() above for the rcu_head usage. + */ + int size = max_t(int, cpumask_size(), sizeof(struct rcu_head)); + + return kmalloc_node(size, GFP_KERNEL, node); } int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node) { + cpumask_t *user_mask; unsigned long flags; - if (!src->user_cpus_ptr) + /* + * Always clear dst->user_cpus_ptr first as their user_cpus_ptr's + * may differ by now due to racing. + */ + dst->user_cpus_ptr = NULL; + + /* + * This check is racy and losing the race is a valid situation. + * It is not worth the extra overhead of taking the pi_lock on + * every fork/clone. + */ + if (data_race(!src->user_cpus_ptr)) return 0; - dst->user_cpus_ptr = kmalloc_node(cpumask_size(), GFP_KERNEL, node); - if (!dst->user_cpus_ptr) + user_mask = alloc_user_cpus_ptr(node); + if (!user_mask) return -ENOMEM; - /* Use pi_lock to protect content of user_cpus_ptr */ + /* + * Use pi_lock to protect content of user_cpus_ptr + * + * Though unlikely, user_cpus_ptr can be reset to NULL by a concurrent + * do_set_cpus_allowed(). + */ raw_spin_lock_irqsave(&src->pi_lock, flags); - cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr); + if (src->user_cpus_ptr) { + swap(dst->user_cpus_ptr, user_mask); + cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr); + } raw_spin_unlock_irqrestore(&src->pi_lock, flags); + + if (unlikely(user_mask)) + kfree(user_mask); + return 0; } @@ -3583,6 +3627,11 @@ static inline bool rq_has_pinned_tasks(struct rq *rq) return false; } +static inline cpumask_t *alloc_user_cpus_ptr(int node) +{ + return NULL; +} + #endif /* !CONFIG_SMP */ static void @@ -5512,7 +5561,9 @@ void scheduler_tick(void) unsigned long thermal_pressure; u64 resched_latency; - arch_scale_freq_tick(); + if (housekeeping_cpu(cpu, HK_TYPE_TICK)) + arch_scale_freq_tick(); + sched_clock_tick(); rq_lock(rq, &rf); @@ -8247,12 +8298,18 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) if (retval) goto out_put_task; - user_mask = kmalloc(cpumask_size(), GFP_KERNEL); - if (!user_mask) { + /* + * With non-SMP configs, user_cpus_ptr/user_mask isn't used and + * alloc_user_cpus_ptr() returns NULL. + */ + user_mask = alloc_user_cpus_ptr(NUMA_NO_NODE); + if (user_mask) { + cpumask_copy(user_mask, in_mask); + } else if (IS_ENABLED(CONFIG_SMP)) { retval = -ENOMEM; goto out_put_task; } - cpumask_copy(user_mask, in_mask); + ac = (struct affinity_context){ .new_mask = in_mask, .user_mask = user_mask, diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d4db72f8f84e..7c46485d65d7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7359,10 +7359,10 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) eenv_task_busy_time(&eenv, p, prev_cpu); for (; pd; pd = pd->next) { + unsigned long util_min = p_util_min, util_max = p_util_max; unsigned long cpu_cap, cpu_thermal_cap, util; unsigned long cur_delta, max_spare_cap = 0; unsigned long rq_util_min, rq_util_max; - unsigned long util_min, util_max; unsigned long prev_spare_cap = 0; int max_spare_cap_cpu = -1; unsigned long base_energy; @@ -7381,6 +7381,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) eenv.pd_cap = 0; for_each_cpu(cpu, cpus) { + struct rq *rq = cpu_rq(cpu); + eenv.pd_cap += cpu_thermal_cap; if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) @@ -7399,24 +7401,19 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) * much capacity we can get out of the CPU; this is * aligned with sched_cpu_util(). */ - if (uclamp_is_used()) { - if (uclamp_rq_is_idle(cpu_rq(cpu))) { - util_min = p_util_min; - util_max = p_util_max; - } else { - /* - * Open code uclamp_rq_util_with() except for - * the clamp() part. Ie: apply max aggregation - * only. util_fits_cpu() logic requires to - * operate on non clamped util but must use the - * max-aggregated uclamp_{min, max}. - */ - rq_util_min = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MIN); - rq_util_max = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MAX); - - util_min = max(rq_util_min, p_util_min); - util_max = max(rq_util_max, p_util_max); - } + if (uclamp_is_used() && !uclamp_rq_is_idle(rq)) { + /* + * Open code uclamp_rq_util_with() except for + * the clamp() part. Ie: apply max aggregation + * only. util_fits_cpu() logic requires to + * operate on non clamped util but must use the + * max-aggregated uclamp_{min, max}. + */ + rq_util_min = uclamp_rq_get(rq, UCLAMP_MIN); + rq_util_max = uclamp_rq_get(rq, UCLAMP_MAX); + + util_min = max(rq_util_min, p_util_min); + util_max = max(rq_util_max, p_util_max); } if (!util_fits_cpu(util, util_min, util_max, cpu)) continue; @@ -9001,16 +8998,23 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu) * * Thermal pressure will impact all cpus in this perf domain * equally. */ - if (static_branch_unlikely(&sched_asym_cpucapacity)) { + if (sched_energy_enabled()) { unsigned long inv_cap = capacity_orig - thermal_load_avg(rq); - struct perf_domain *pd = rcu_dereference(rq->rd->pd); + struct perf_domain *pd; + rcu_read_lock(); + + pd = rcu_dereference(rq->rd->pd); rq->cpu_capacity_inverted = 0; for (; pd; pd = pd->next) { struct cpumask *pd_span = perf_domain_span(pd); unsigned long pd_cap_orig, pd_cap; + /* We can't be inverted against our own pd */ + if (cpumask_test_cpu(cpu_of(rq), pd_span)) + continue; + cpu = cpumask_any(pd_span); pd_cap_orig = arch_scale_cpu_capacity(cpu); @@ -9035,6 +9039,8 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu) break; } } + + rcu_read_unlock(); } trace_sched_cpu_capacity_tp(rq); |