diff options
30 files changed, 94 insertions, 496 deletions
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 6a52d761854b..79190d877fa7 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -1831,7 +1831,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset, ti->cpu = cpu; p->stack = ti; p->state = TASK_UNINTERRUPTIBLE; - cpumask_set_cpu(cpu, &p->cpus_allowed); + cpumask_set_cpu(cpu, &p->cpus_mask); INIT_LIST_HEAD(&p->tasks); p->parent = p->real_parent = p->group_leader = p; INIT_LIST_HEAD(&p->children); diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h index 0f813bb753c6..09cbe9042828 100644 --- a/arch/mips/include/asm/switch_to.h +++ b/arch/mips/include/asm/switch_to.h @@ -42,7 +42,7 @@ extern struct task_struct *ll_task; * inline to try to keep the overhead down. If we have been forced to run on * a "CPU" with an FPU because of a previous high level of FP computation, * but did not actually use the FPU during the most recent time-slice (CU1 - * isn't set), we undo the restriction on cpus_allowed. + * isn't set), we undo the restriction on cpus_mask. * * We're not calling set_cpus_allowed() here, because we have no need to * force prompt migration - we're already switching the current CPU to a @@ -57,7 +57,7 @@ do { \ test_ti_thread_flag(__prev_ti, TIF_FPUBOUND) && \ (!(KSTK_STATUS(prev) & ST0_CU1))) { \ clear_ti_thread_flag(__prev_ti, TIF_FPUBOUND); \ - prev->cpus_allowed = prev->thread.user_cpus_allowed; \ + prev->cpus_mask = prev->thread.user_cpus_allowed; \ } \ next->thread.emulated_fp = 0; \ } while(0) diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c index a7c0f97e4b0d..1a08428eedcf 100644 --- a/arch/mips/kernel/mips-mt-fpaff.c +++ b/arch/mips/kernel/mips-mt-fpaff.c @@ -177,7 +177,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len, if (retval) goto out_unlock; - cpumask_or(&allowed, &p->thread.user_cpus_allowed, &p->cpus_allowed); + cpumask_or(&allowed, &p->thread.user_cpus_allowed, p->cpus_ptr); cpumask_and(&mask, &allowed, cpu_active_mask); out_unlock: diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index c52766a5b85f..ac7159263da0 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -891,12 +891,12 @@ static void mt_ase_fp_affinity(void) * restricted the allowed set to exclude any CPUs with FPUs, * we'll skip the procedure. */ - if (cpumask_intersects(¤t->cpus_allowed, &mt_fpu_cpumask)) { + if (cpumask_intersects(¤t->cpus_mask, &mt_fpu_cpumask)) { cpumask_t tmask; current->thread.user_cpus_allowed - = current->cpus_allowed; - cpumask_and(&tmask, ¤t->cpus_allowed, + = current->cpus_mask; + cpumask_and(&tmask, ¤t->cpus_mask, &mt_fpu_cpumask); set_cpus_allowed_ptr(current, &tmask); set_thread_flag(TIF_FPUBOUND); diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index e56b553de27b..f18d5067cd0f 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -128,7 +128,7 @@ void __spu_update_sched_info(struct spu_context *ctx) * runqueue. The context will be rescheduled on the proper node * if it is timesliced or preempted. */ - cpumask_copy(&ctx->cpus_allowed, ¤t->cpus_allowed); + cpumask_copy(&ctx->cpus_allowed, current->cpus_ptr); /* Save the current cpu id for spu interrupt routing. */ ctx->last_ran = raw_smp_processor_id(); diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index 604c0e3bcc83..f68baccc69f0 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -1503,7 +1503,7 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma) * may be scheduled elsewhere and invalidate entries in the * pseudo-locked region. */ - if (!cpumask_subset(¤t->cpus_allowed, &plr->d->cpu_mask)) { + if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) { mutex_unlock(&rdtgroup_mutex); return -EINVAL; } diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 4fe662c3bbc1..c142b23bb401 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -1038,7 +1038,7 @@ int hfi1_get_proc_affinity(int node) struct hfi1_affinity_node *entry; cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask; const struct cpumask *node_mask, - *proc_mask = ¤t->cpus_allowed; + *proc_mask = current->cpus_ptr; struct hfi1_affinity_node_list *affinity = &node_affinity; struct cpu_mask_set *set = &affinity->proc; @@ -1046,7 +1046,7 @@ int hfi1_get_proc_affinity(int node) * check whether process/context affinity has already * been set */ - if (cpumask_weight(proc_mask) == 1) { + if (current->nr_cpus_allowed == 1) { hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl", current->pid, current->comm, cpumask_pr_args(proc_mask)); @@ -1057,7 +1057,7 @@ int hfi1_get_proc_affinity(int node) cpu = cpumask_first(proc_mask); cpumask_set_cpu(cpu, &set->used); goto done; - } else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) { + } else if (current->nr_cpus_allowed < cpumask_weight(&set->mask)) { hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl", current->pid, current->comm, cpumask_pr_args(proc_mask)); diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index b0110728f541..7e8139ee0cc1 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -855,14 +855,13 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd, { struct sdma_rht_node *rht_node; struct sdma_engine *sde = NULL; - const struct cpumask *current_mask = ¤t->cpus_allowed; unsigned long cpu_id; /* * To ensure that always the same sdma engine(s) will be * selected make sure the process is pinned to this CPU only. */ - if (cpumask_weight(current_mask) != 1) + if (current->nr_cpus_allowed != 1) goto out; cpu_id = smp_processor_id(); diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 78fa634de98a..27b6e664e59d 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -1142,7 +1142,7 @@ static __poll_t qib_poll(struct file *fp, struct poll_table_struct *pt) static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd) { struct qib_filedata *fd = fp->private_data; - const unsigned int weight = cpumask_weight(¤t->cpus_allowed); + const unsigned int weight = current->nr_cpus_allowed; const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus); int local_cpu; @@ -1623,9 +1623,8 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo) ret = find_free_ctxt(i_minor - 1, fp, uinfo); else { int unit; - const unsigned int cpu = cpumask_first(¤t->cpus_allowed); - const unsigned int weight = - cpumask_weight(¤t->cpus_allowed); + const unsigned int cpu = cpumask_first(current->cpus_ptr); + const unsigned int weight = current->nr_cpus_allowed; if (weight == 1 && !test_bit(cpu, qib_cpulist)) if (!find_hca(cpu, &unit) && unit >= 0) diff --git a/fs/proc/array.c b/fs/proc/array.c index 2edbb657f859..84908556ea58 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -381,9 +381,9 @@ static inline void task_context_switch_counts(struct seq_file *m, static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) { seq_printf(m, "Cpus_allowed:\t%*pb\n", - cpumask_pr_args(&task->cpus_allowed)); + cpumask_pr_args(task->cpus_ptr)); seq_printf(m, "Cpus_allowed_list:\t%*pbl\n", - cpumask_pr_args(&task->cpus_allowed)); + cpumask_pr_args(task->cpus_ptr)); } static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm) diff --git a/include/linux/sched.h b/include/linux/sched.h index 11837410690f..1b2590a8d038 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -651,7 +651,8 @@ struct task_struct { unsigned int policy; int nr_cpus_allowed; - cpumask_t cpus_allowed; + const cpumask_t *cpus_ptr; + cpumask_t cpus_mask; #ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; @@ -1399,7 +1400,7 @@ extern struct pid *cad_pid; #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ #define PF_MEMSTALL 0x01000000 /* Stalled due to lack of memory */ #define PF_UMH 0x02000000 /* I'm an Usermodehelper process */ -#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ +#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h index b36f4cf38111..1abe91ff6e4a 100644 --- a/include/linux/sched/nohz.h +++ b/include/linux/sched/nohz.h @@ -7,14 +7,6 @@ */ #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -extern void cpu_load_update_nohz_start(void); -extern void cpu_load_update_nohz_stop(void); -#else -static inline void cpu_load_update_nohz_start(void) { } -static inline void cpu_load_update_nohz_stop(void) { } -#endif - -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) extern void nohz_balance_enter_idle(int cpu); extern int get_nohz_timer_target(void); #else diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index cfc0a89a7159..53afbe07354a 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -84,11 +84,6 @@ struct sched_domain { unsigned int busy_factor; /* less balancing by factor if busy */ unsigned int imbalance_pct; /* No balance until over watermark */ unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ - unsigned int busy_idx; - unsigned int idle_idx; - unsigned int newidle_idx; - unsigned int wake_idx; - unsigned int forkexec_idx; int nohz_idle; /* NOHZ IDLE status */ int flags; /* See SD_* */ diff --git a/init/init_task.c b/init/init_task.c index c70ef656d0f4..3c27c0efa316 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -72,7 +72,8 @@ struct task_struct init_task .static_prio = MAX_PRIO - 20, .normal_prio = MAX_PRIO - 20, .policy = SCHED_NORMAL, - .cpus_allowed = CPU_MASK_ALL, + .cpus_ptr = &init_task.cpus_mask, + .cpus_mask = CPU_MASK_ALL, .nr_cpus_allowed= NR_CPUS, .mm = NULL, .active_mm = &init_mm, diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 515525ff1cfd..a1590e244f5f 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -2829,7 +2829,7 @@ static void cpuset_fork(struct task_struct *task) if (task_css_is_root(task, cpuset_cgrp_id)) return; - set_cpus_allowed_ptr(task, ¤t->cpus_allowed); + set_cpus_allowed_ptr(task, current->cpus_ptr); task->mems_allowed = current->mems_allowed; } diff --git a/kernel/fork.c b/kernel/fork.c index 75675b9bf6df..6be686283e55 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -894,6 +894,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) #ifdef CONFIG_STACKPROTECTOR tsk->stack_canary = get_random_canary(); #endif + if (orig->cpus_ptr == &orig->cpus_mask) + tsk->cpus_ptr = &tsk->cpus_mask; /* * One for us, one for whoever does the "release_task()" (usually diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 874c427742a9..29984d8c41f0 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -930,7 +930,7 @@ static inline bool is_per_cpu_kthread(struct task_struct *p) */ static inline bool is_cpu_allowed(struct task_struct *p, int cpu) { - if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) + if (!cpumask_test_cpu(cpu, p->cpus_ptr)) return false; if (is_per_cpu_kthread(p)) @@ -1025,7 +1025,7 @@ static int migration_cpu_stop(void *data) local_irq_disable(); /* * We need to explicitly wake pending tasks before running - * __migrate_task() such that we will not miss enforcing cpus_allowed + * __migrate_task() such that we will not miss enforcing cpus_ptr * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test. */ sched_ttwu_pending(); @@ -1056,7 +1056,7 @@ static int migration_cpu_stop(void *data) */ void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask) { - cpumask_copy(&p->cpus_allowed, new_mask); + cpumask_copy(&p->cpus_mask, new_mask); p->nr_cpus_allowed = cpumask_weight(new_mask); } @@ -1126,7 +1126,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, goto out; } - if (cpumask_equal(&p->cpus_allowed, new_mask)) + if (cpumask_equal(p->cpus_ptr, new_mask)) goto out; if (!cpumask_intersects(new_mask, cpu_valid_mask)) { @@ -1286,10 +1286,10 @@ static int migrate_swap_stop(void *data) if (task_cpu(arg->src_task) != arg->src_cpu) goto unlock; - if (!cpumask_test_cpu(arg->dst_cpu, &arg->src_task->cpus_allowed)) + if (!cpumask_test_cpu(arg->dst_cpu, arg->src_task->cpus_ptr)) goto unlock; - if (!cpumask_test_cpu(arg->src_cpu, &arg->dst_task->cpus_allowed)) + if (!cpumask_test_cpu(arg->src_cpu, arg->dst_task->cpus_ptr)) goto unlock; __migrate_swap_task(arg->src_task, arg->dst_cpu); @@ -1331,10 +1331,10 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p, if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu)) goto out; - if (!cpumask_test_cpu(arg.dst_cpu, &arg.src_task->cpus_allowed)) + if (!cpumask_test_cpu(arg.dst_cpu, arg.src_task->cpus_ptr)) goto out; - if (!cpumask_test_cpu(arg.src_cpu, &arg.dst_task->cpus_allowed)) + if (!cpumask_test_cpu(arg.src_cpu, arg.dst_task->cpus_ptr)) goto out; trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu); @@ -1479,7 +1479,7 @@ void kick_process(struct task_struct *p) EXPORT_SYMBOL_GPL(kick_process); /* - * ->cpus_allowed is protected by both rq->lock and p->pi_lock + * ->cpus_ptr is protected by both rq->lock and p->pi_lock * * A few notes on cpu_active vs cpu_online: * @@ -1519,14 +1519,14 @@ static int select_fallback_rq(int cpu, struct task_struct *p) for_each_cpu(dest_cpu, nodemask) { if (!cpu_active(dest_cpu)) continue; - if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) + if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) return dest_cpu; } } for (;;) { /* Any allowed, online CPU? */ - for_each_cpu(dest_cpu, &p->cpus_allowed) { + for_each_cpu(dest_cpu, p->cpus_ptr) { if (!is_cpu_allowed(p, dest_cpu)) continue; @@ -1570,7 +1570,7 @@ out: } /* - * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable. + * The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable. */ static inline int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) @@ -1580,11 +1580,11 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) if (p->nr_cpus_allowed > 1) cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags); else - cpu = cpumask_any(&p->cpus_allowed); + cpu = cpumask_any(p->cpus_ptr); /* * In order not to call set_task_cpu() on a blocking task we need - * to rely on ttwu() to place the task on a valid ->cpus_allowed + * to rely on ttwu() to place the task on a valid ->cpus_ptr * CPU. * * Since this is common to all placement strategies, this lives here. @@ -2395,7 +2395,7 @@ void wake_up_new_task(struct task_struct *p) #ifdef CONFIG_SMP /* * Fork balancing, do it here and not earlier because: - * - cpus_allowed can change in the fork path + * - cpus_ptr can change in the fork path * - any previously selected CPU might disappear through hotplug * * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq, @@ -3033,7 +3033,6 @@ void scheduler_tick(void) update_rq_clock(rq); curr->sched_class->task_tick(rq, curr, 0); - cpu_load_update_active(rq); calc_global_load_tick(rq); psi_task_tick(rq); @@ -4267,7 +4266,7 @@ change: * the entire root_domain to become SCHED_DEADLINE. We * will also fail if there's no bandwidth available. */ - if (!cpumask_subset(span, &p->cpus_allowed) || + if (!cpumask_subset(span, p->cpus_ptr) || rq->rd->dl_bw.bw == 0) { task_rq_unlock(rq, p, &rf); return -EPERM; @@ -4866,7 +4865,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) goto out_unlock; raw_spin_lock_irqsave(&p->pi_lock, flags); - cpumask_and(mask, &p->cpus_allowed, cpu_active_mask); + cpumask_and(mask, &p->cpus_mask, cpu_active_mask); raw_spin_unlock_irqrestore(&p->pi_lock, flags); out_unlock: @@ -5443,7 +5442,7 @@ int task_can_attach(struct task_struct *p, * allowed nodes is unnecessary. Thus, cpusets are not * applicable for such threads. This prevents checking for * success of set_cpus_allowed_ptr() on all attached tasks - * before cpus_allowed may be changed. + * before cpus_mask may be changed. */ if (p->flags & PF_NO_SETAFFINITY) { ret = -EINVAL; @@ -5470,7 +5469,7 @@ int migrate_task_to(struct task_struct *p, int target_cpu) if (curr_cpu == target_cpu) return 0; - if (!cpumask_test_cpu(target_cpu, &p->cpus_allowed)) + if (!cpumask_test_cpu(target_cpu, p->cpus_ptr)) return -EINVAL; /* TODO: This is not properly updating schedstats */ @@ -5608,7 +5607,7 @@ static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf) put_prev_task(rq, next); /* - * Rules for changing task_struct::cpus_allowed are holding + * Rules for changing task_struct::cpus_mask are holding * both pi_lock and rq->lock, such that holding either * stabilizes the mask. * @@ -5902,8 +5901,8 @@ DECLARE_PER_CPU(cpumask_var_t, select_idle_mask); void __init sched_init(void) { - int i, j; unsigned long alloc_size = 0, ptr; + int i; wait_bit_init(); @@ -6005,10 +6004,6 @@ void __init sched_init(void) #ifdef CONFIG_RT_GROUP_SCHED init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL); #endif - - for (j = 0; j < CPU_LOAD_IDX_MAX; j++) - rq->cpu_load[j] = 0; - #ifdef CONFIG_SMP rq->sd = NULL; rq->rd = NULL; diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c index ec4e4a9aab5f..5cc4012572ec 100644 --- a/kernel/sched/cpudeadline.c +++ b/kernel/sched/cpudeadline.c @@ -120,14 +120,14 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p, const struct sched_dl_entity *dl_se = &p->dl; if (later_mask && - cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) { + cpumask_and(later_mask, cp->free_cpus, p->cpus_ptr)) { return 1; } else { int best_cpu = cpudl_maximum(cp); WARN_ON(best_cpu != -1 && !cpu_present(best_cpu)); - if (cpumask_test_cpu(best_cpu, &p->cpus_allowed) && + if (cpumask_test_cpu(best_cpu, p->cpus_ptr) && dl_time_before(dl_se->deadline, cp->elements[0].dl)) { if (later_mask) cpumask_set_cpu(best_cpu, later_mask); diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c index 9c6480e6d62d..b7abca987d94 100644 --- a/kernel/sched/cpupri.c +++ b/kernel/sched/cpupri.c @@ -94,11 +94,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, if (skip) continue; - if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) + if (cpumask_any_and(p->cpus_ptr, vec->mask) >= nr_cpu_ids) continue; if (lowest_mask) { - cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); + cpumask_and(lowest_mask, p->cpus_ptr, vec->mask); /* * We have to ensure that we have at least one bit diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 43901fa3f269..c1ef30861068 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -538,7 +538,7 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p * If we cannot preempt any rq, fall back to pick any * online CPU: */ - cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed); + cpu = cpumask_any_and(cpu_active_mask, p->cpus_ptr); if (cpu >= nr_cpu_ids) { /* * Failed to find any suitable CPU. @@ -1824,7 +1824,7 @@ static void set_curr_task_dl(struct rq *rq) static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu) { if (!task_running(rq, p) && - cpumask_test_cpu(cpu, &p->cpus_allowed)) + cpumask_test_cpu(cpu, p->cpus_ptr)) return 1; return 0; } @@ -1974,7 +1974,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq) /* Retry if something changed. */ if (double_lock_balance(rq, later_rq)) { if (unlikely(task_rq(task) != rq || - !cpumask_test_cpu(later_rq->cpu, &task->cpus_allowed) || + !cpumask_test_cpu(later_rq->cpu, task->cpus_ptr) || task_running(rq, task) || !dl_task(task) || !task_on_rq_queued(task))) { diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 678bfb9bd87f..7ffde8ce82fd 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -236,49 +236,35 @@ static void sd_free_ctl_entry(struct ctl_table **tablep) *tablep = NULL; } -static int min_load_idx = 0; -static int max_load_idx = CPU_LOAD_IDX_MAX-1; - static void set_table_entry(struct ctl_table *entry, const char *procname, void *data, int maxlen, - umode_t mode, proc_handler *proc_handler, - bool load_idx) + umode_t mode, proc_handler *proc_handler) { entry->procname = procname; entry->data = data; entry->maxlen = maxlen; entry->mode = mode; entry->proc_handler = proc_handler; - - if (load_idx) { - entry->extra1 = &min_load_idx; - entry->extra2 = &max_load_idx; - } } static struct ctl_table * sd_alloc_ctl_domain_table(struct sched_domain *sd) { - struct ctl_table *table = sd_alloc_ctl_entry(14); + struct ctl_table *table = sd_alloc_ctl_entry(9); if (table == NULL) return NULL; - set_table_entry(&table[0] , "min_interval", &sd->min_interval, sizeof(long), 0644, proc_doulongvec_minmax, false); - set_table_entry(&table[1] , "max_interval", &sd->max_interval, sizeof(long), 0644, proc_doulongvec_minmax, false); - set_table_entry(&table[2] , "busy_idx", &sd->busy_idx, sizeof(int) , 0644, proc_dointvec_minmax, true ); - set_table_entry(&table[3] , "idle_idx", &sd->idle_idx, sizeof(int) , 0644, proc_dointvec_minmax, true ); - set_table_entry(&table[4] , "newidle_idx", &sd->newidle_idx, sizeof(int) , 0644, proc_dointvec_minmax, true ); - set_table_entry(&table[5] , "wake_idx", &sd->wake_idx, sizeof(int) , 0644, proc_dointvec_minmax, true ); - set_table_entry(&table[6] , "forkexec_idx", &sd->forkexec_idx, sizeof(int) , 0644, proc_dointvec_minmax, true ); - set_table_entry(&table[7] , "busy_factor", &sd->busy_factor, sizeof(int) , 0644, proc_dointvec_minmax, false); - set_table_entry(&table[8] , "imbalance_pct", &sd->imbalance_pct, sizeof(int) , 0644, proc_dointvec_minmax, false); - set_table_entry(&table[9] , "cache_nice_tries", &sd->cache_nice_tries, sizeof(int) , 0644, proc_dointvec_minmax, false); - set_table_entry(&table[10], "flags", &sd->flags, sizeof(int) , 0644, proc_dointvec_minmax, false); - set_table_entry(&table[11], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax, false); - set_table_entry(&table[12], "name", sd->name, CORENAME_MAX_SIZE, 0444, proc_dostring, false); - /* &table[13] is terminator */ + set_table_entry(&table[0], "min_interval", &sd->min_interval, sizeof(long), 0644, proc_doulongvec_minmax); + set_table_entry(&table[1], "max_interval", &sd->max_interval, sizeof(long), 0644, proc_doulongvec_minmax); + set_table_entry(&table[2], "busy_factor", &sd->busy_factor, sizeof(int), 0644, proc_dointvec_minmax); + set_table_entry(&table[3], "imbalance_pct", &sd->imbalance_pct, sizeof(int), 0644, proc_dointvec_minmax); + set_table_entry(&table[4], "cache_nice_tries", &sd->cache_nice_tries, sizeof(int), 0644, proc_dointvec_minmax); + set_table_entry(&table[5], "flags", &sd->flags, sizeof(int), 0644, proc_dointvec_minmax); + set_table_entry(&table[6], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax); + set_table_entry(&table[7], "name", sd->name, CORENAME_MAX_SIZE, 0444, proc_dostring); + /* &table[8] is terminator */ return table; } @@ -656,8 +642,6 @@ do { \ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x)) P(nr_running); - SEQ_printf(m, " .%-30s: %lu\n", "load", - rq->load.weight); P(nr_switches); P(nr_load_updates); P(nr_uninterruptible); @@ -665,11 +649,6 @@ do { \ SEQ_printf(m, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr))); PN(clock); PN(clock_task); - P(cpu_load[0]); - P(cpu_load[1]); - P(cpu_load[2]); - P(cpu_load[3]); - P(cpu_load[4]); #undef P #undef PN diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f35930f5e528..7f8d477f90fe 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1467,8 +1467,6 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page, } static unsigned long weighted_cpuload(struct rq *rq); -static unsigned long source_load(int cpu, int type); -static unsigned long target_load(int cpu, int type); /* Cached statistics for all CPUs within a node */ struct numa_stats { @@ -1621,7 +1619,7 @@ static void task_numa_compare(struct task_numa_env *env, * be incurred if the tasks were swapped. */ /* Skip this swap candidate if cannot move to the source cpu */ - if (!cpumask_test_cpu(env->src_cpu, &cur->cpus_allowed)) + if (!cpumask_test_cpu(env->src_cpu, cur->cpus_ptr)) goto unlock; /* @@ -1718,7 +1716,7 @@ static void task_numa_find_cpu(struct task_numa_env *env, for_each_cpu(cpu, cpumask_of_node(env->dst_nid)) { /* Skip this CPU if the source task cannot migrate */ - if (!cpumask_test_cpu(cpu, &env->p->cpus_allowed)) + if (!cpumask_test_cpu(cpu, env->p->cpus_ptr)) continue; env->dst_cpu = cpu; @@ -2686,8 +2684,6 @@ static void account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) { update_load_add(&cfs_rq->load, se->load.weight); - if (!parent_entity(se)) - update_load_add(&rq_of(cfs_rq)->load, se->load.weight); #ifdef CONFIG_SMP if (entity_is_task(se)) { struct rq *rq = rq_of(cfs_rq); @@ -2703,8 +2699,6 @@ static void account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) { update_load_sub(&cfs_rq->load, se->load.weight); - if (!parent_entity(se)) - update_load_sub(&rq_of(cfs_rq)->load, se->load.weight); #ifdef CONFIG_SMP if (entity_is_task(se)) { account_numa_dequeue(rq_of(cfs_rq), task_of(se)); @@ -4100,7 +4094,8 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) * least twice that of our own weight (i.e. dont track it * when there are only lesser-weight tasks around): */ - if (schedstat_enabled() && rq_of(cfs_rq)->load.weight >= 2*se->load.weight) { + if (schedstat_enabled() && + rq_of(cfs_rq)->cfs.load.weight >= 2*se->load.weight) { schedstat_set(se->statistics.slice_max, max((u64)schedstat_val(se->statistics.slice_max), se->sum_exec_runtime - se->prev_sum_exec_runtime)); @@ -5325,71 +5320,6 @@ DEFINE_PER_CPU(cpumask_var_t, load_balance_mask); DEFINE_PER_CPU(cpumask_var_t, select_idle_mask); #ifdef CONFIG_NO_HZ_COMMON -/* - * per rq 'load' arrray crap; XXX kill this. - */ - -/* - * The exact cpuload calculated at every tick would be: - * - * load' = (1 - 1/2^i) * load + (1/2^i) * cur_load - * - * If a CPU misses updates for n ticks (as it was idle) and update gets - * called on the n+1-th tick when CPU may be busy, then we have: - * - * load_n = (1 - 1/2^i)^n * load_0 - * load_n+1 = (1 - 1/2^i) * load_n + (1/2^i) * cur_load - * - * decay_load_missed() below does efficient calculation of - * - * load' = (1 - 1/2^i)^n * load - * - * Because x^(n+m) := x^n * x^m we can decompose any x^n in power-of-2 factors. - * This allows us to precompute the above in said factors, thereby allowing the - * reduction of an arbitrary n in O(log_2 n) steps. (See also - * fixed_power_int()) - * - * The calculation is approximated on a 128 point scale. - */ -#define DEGRADE_SHIFT 7 - -static const u8 degrade_zero_ticks[CPU_LOAD_IDX_MAX] = {0, 8, 32, 64, 128}; -static const u8 degrade_factor[CPU_LOAD_IDX_MAX][DEGRADE_SHIFT + 1] = { - { 0, 0, 0, 0, 0, 0, 0, 0 }, - { 64, 32, 8, 0, 0, 0, 0, 0 }, - { 96, 72, 40, 12, 1, 0, 0, 0 }, - { 112, 98, 75, 43, 15, 1, 0, 0 }, - { 120, 112, 98, 76, 45, 16, 2, 0 } -}; - -/* - * Update cpu_load for any missed ticks, due to tickless idle. The backlog - * would be when CPU is idle and so we just decay the old load without - * adding any new load. - */ -static unsigned long -decay_load_missed(unsigned long load, unsigned long missed_updates, int idx) -{ - int j = 0; - - if (!missed_updates) - return load; - - if (missed_updates >= degrade_zero_ticks[idx]) - return 0; - - if (idx == 1) - return load >> missed_updates; - - while (missed_updates) { - if (missed_updates % 2) - load = (load * degrade_factor[idx][j]) >> DEGRADE_SHIFT; - - missed_updates >>= 1; - j++; - } - return load; -} static struct { cpumask_var_t idle_cpus_mask; @@ -5401,234 +5331,11 @@ static struct { #endif /* CONFIG_NO_HZ_COMMON */ -/** - * __cpu_load_update - update the rq->cpu_load[] statistics - * @this_rq: The rq to update statistics for - * @this_load: The current load - * @pending_updates: The number of missed updates - * - * Update rq->cpu_load[] statistics. This function is usually called every - * scheduler tick (TICK_NSEC). - * - * This function computes a decaying average: - * - * load[i]' = (1 - 1/2^i) * load[i] + (1/2^i) * load - * - * Because of NOHZ it might not get called on every tick which gives need for - * the @pending_updates argument. - * - * load[i]_n = (1 - 1/2^i) * load[i]_n-1 + (1/2^i) * load_n-1 - * = A * load[i]_n-1 + B ; A := (1 - 1/2^i), B := (1/2^i) * load - * = A * (A * load[i]_n-2 + B) + B - * = A * (A * (A * load[i]_n-3 + B) + B) + B - * = A^3 * load[i]_n-3 + (A^2 + A + 1) * B - * = A^n * load[i]_0 + (A^(n-1) + A^(n-2) + ... + 1) * B - * = A^n * load[i]_0 + ((1 - A^n) / (1 - A)) * B - * = (1 - 1/2^i)^n * (load[i]_0 - load) + load - * - * In the above we've assumed load_n := load, which is true for NOHZ_FULL as - * any change in load would have resulted in the tick being turned back on. - * - * For regular NOHZ, this reduces to: - * - * load[i]_n = (1 - 1/2^i)^n * load[i]_0 - * - * see decay_load_misses(). For NOHZ_FULL we get to subtract and add the extra - * term. - */ -static void cpu_load_update(struct rq *this_rq, unsigned long this_load, - unsigned long pending_updates) -{ - unsigned long __maybe_unused tickless_load = this_rq->cpu_load[0]; - int i, scale; - - this_rq->nr_load_updates++; - - /* Update our load: */ - this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */ - for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) { - unsigned long old_load, new_load; - - /* scale is effectively 1 << i now, and >> i divides by scale */ - - old_load = this_rq->cpu_load[i]; -#ifdef CONFIG_NO_HZ_COMMON - old_load = decay_load_missed(old_load, pending_updates - 1, i); - if (tickless_load) { - old_load -= decay_load_missed(tickless_load, pending_updates - 1, i); - /* - * old_load can never be a negative value because a - * decayed tickless_load cannot be greater than the - * original tickless_load. - */ - old_load += tickless_load; - } -#endif - new_load = this_load; - /* - * Round up the averaging division if load is increasing. This - * prevents us from getting stuck on 9 if the load is 10, for - * example. - */ - if (new_load > old_load) - new_load += scale - 1; - - this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i; - } -} - -/* Used instead of source_load when we know the type == 0 */ static unsigned long weighted_cpuload(struct rq *rq) { return cfs_rq_runnable_load_avg(&rq->cfs); } -#ifdef CONFIG_NO_HZ_COMMON -/* - * There is no sane way to deal with nohz on smp when using jiffies because the - * CPU doing the jiffies update might drift wrt the CPU doing the jiffy reading - * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}. - * - * Therefore we need to avoid the delta approach from the regular tick when - * possible since that would seriously skew the load calculation. This is why we - * use cpu_load_update_periodic() for CPUs out of nohz. However we'll rely on - * jiffies deltas for updates happening while in nohz mode (idle ticks, idle - * loop exit, nohz_idle_balance, nohz full exit...) - * - * This means we might still be one tick off for nohz periods. - */ - -static void cpu_load_update_nohz(struct rq *this_rq, - unsigned long curr_jiffies, - unsigned long load) -{ - unsigned long pending_updates; - - pending_updates = curr_jiffies - this_rq->last_load_update_tick; - if (pending_updates) { - this_rq->last_load_update_tick = curr_jiffies; - /* - * In the regular NOHZ case, we were idle, this means load 0. - * In the NOHZ_FULL case, we were non-idle, we should consider - * its weighted load. - */ - cpu_load_update(this_rq, load, pending_updates); - } -} - -/* - * Called from nohz_idle_balance() to update the load ratings before doing the - * idle balance. - */ -static void cpu_load_update_idle(struct rq *this_rq) -{ - /* - * bail if there's load or we're actually up-to-date. - */ - if (weighted_cpuload(this_rq)) - return; - - cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), 0); -} - -/* - * Record CPU load on nohz entry so we know the tickless load to account - * on nohz exit. cpu_load[0] happens then to be updated more frequently - * than other cpu_load[idx] but it should be fine as cpu_load readers - * shouldn't rely into synchronized cpu_load[*] updates. - */ -void cpu_load_update_nohz_start(void) -{ - struct rq *this_rq = this_rq(); - - /* - * This is all lockless but should be fine. If weighted_cpuload changes - * concurrently we'll exit nohz. And cpu_load write can race with - * cpu_load_update_idle() but both updater would be writing the same. - */ - this_rq->cpu_load[0] = weighted_cpuload(this_rq); -} - -/* - * Account the tickless load in the end of a nohz frame. - */ -void cpu_load_update_nohz_stop(void) -{ - unsigned long curr_jiffies = READ_ONCE(jiffies); - struct rq *this_rq = this_rq(); - unsigned long load; - struct rq_flags rf; - - if (curr_jiffies == this_rq->last_load_update_tick) - return; - - load = weighted_cpuload(this_rq); - rq_lock(this_rq, &rf); - update_rq_clock(this_rq); - cpu_load_update_nohz(this_rq, curr_jiffies, load); - rq_unlock(this_rq, &rf); -} -#else /* !CONFIG_NO_HZ_COMMON */ -static inline void cpu_load_update_nohz(struct rq *this_rq, - unsigned long curr_jiffies, - unsigned long load) { } -#endif /* CONFIG_NO_HZ_COMMON */ - -static void cpu_load_update_periodic(struct rq *this_rq, unsigned long load) -{ -#ifdef CONFIG_NO_HZ_COMMON - /* See the mess around cpu_load_update_nohz(). */ - this_rq->last_load_update_tick = READ_ONCE(jiffies); -#endif - cpu_load_update(this_rq, load, 1); -} - -/* - * Called from scheduler_tick() - */ -void cpu_load_update_active(struct rq *this_rq) -{ - unsigned long load = weighted_cpuload(this_rq); - - if (tick_nohz_tick_stopped()) - cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), load); - else - cpu_load_update_periodic(this_rq, load); -} - -/* - * Return a low guess at the load of a migration-source CPU weighted - * according to the scheduling class and "nice" value. - * - * We want to under-estimate the load of migration sources, to - * balance conservatively. - */ -static unsigned long source_load(int cpu, int type) -{ - struct rq *rq = cpu_rq(cpu); - unsigned long total = weighted_cpuload(rq); - - if (type == 0 || !sched_feat(LB_BIAS)) - return total; - - return min(rq->cpu_load[type-1], total); -} - -/* - * Return a high guess at the load of a migration-target CPU weighted - * according to the scheduling class and "nice" value. - */ -static unsigned long target_load(int cpu, int type) -{ - struct rq *rq = cpu_rq(cpu); - unsigned long total = weighted_cpuload(rq); - - if (type == 0 || !sched_feat(LB_BIAS)) - return total; - - return max(rq->cpu_load[type-1], total); -} - static unsigned long capacity_of(int cpu) { return cpu_rq(cpu)->cpu_capacity; @@ -5736,7 +5443,7 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p, s64 this_eff_load, prev_eff_load; unsigned long task_load; - this_eff_load = target_load(this_cpu, sd->wake_idx); + this_eff_load = weighted_cpuload(cpu_rq(this_cpu)); if (sync) { unsigned long current_load = task_h_load(current); @@ -5754,7 +5461,7 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p, this_eff_load *= 100; this_eff_load *= capacity_of(prev_cpu); - prev_eff_load = source_load(prev_cpu, sd->wake_idx); + prev_eff_load = weighted_cpuload(cpu_rq(prev_cpu)); prev_eff_load -= task_load; if (sched_feat(WA_BIAS)) prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2; @@ -5815,14 +5522,10 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, unsigned long this_runnable_load = ULONG_MAX; unsigned long min_avg_load = ULONG_MAX, this_avg_load = ULONG_MAX; unsigned long most_spare = 0, this_spare = 0; - int load_idx = sd->forkexec_idx; int imbalance_scale = 100 + (sd->imbalance_pct-100)/2; unsigned long imbalance = scale_load_down(NICE_0_LOAD) * (sd->imbalance_pct-100) / 100; - if (sd_flag & SD_BALANCE_WAKE) - load_idx = sd->wake_idx; - do { unsigned long load, avg_load, runnable_load; unsigned long spare_cap, max_spare_cap; @@ -5831,7 +5534,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, /* Skip over this group if it has no CPUs allowed */ if (!cpumask_intersects(sched_group_span(group), - &p->cpus_allowed)) + p->cpus_ptr)) continue; local_group = cpumask_test_cpu(this_cpu, @@ -5846,12 +5549,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, max_spare_cap = 0; for_each_cpu(i, sched_group_span(group)) { - /* Bias balancing toward CPUs of our domain */ - if (local_group) - load = source_load(i, load_idx); - else - load = target_load(i, load_idx); - + load = weighted_cpuload(cpu_rq(i)); runnable_load += load; avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs); @@ -5963,7 +5661,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this return cpumask_first(sched_group_span(group)); /* Traverse only the allowed CPUs */ - for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) { + for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) { if (available_idle_cpu(i)) { struct rq *rq = cpu_rq(i); struct cpuidle_state *idle = idle_get_state(rq); @@ -6003,7 +5701,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p { int new_cpu = cpu; - if (!cpumask_intersects(sched_domain_span(sd), &p->cpus_allowed)) + if (!cpumask_intersects(sched_domain_span(sd), p->cpus_ptr)) return prev_cpu; /* @@ -6120,7 +5818,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int if (!test_idle_cores(target, false)) return -1; - cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed); + cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); for_each_cpu_wrap(core, cpus, target) { bool idle = true; @@ -6154,7 +5852,7 @@ static int select_idle_smt(struct task_struct *p, int target) return -1; for_each_cpu(cpu, cpu_smt_mask(target)) { - if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) + if (!cpumask_test_cpu(cpu, p->cpus_ptr)) continue; if (available_idle_cpu(cpu)) return cpu; @@ -6217,7 +5915,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t for_each_cpu_wrap(cpu, sched_domain_span(sd), target) { if (!--nr) return -1; - if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) + if (!cpumask_test_cpu(cpu, p->cpus_ptr)) continue; if (available_idle_cpu(cpu)) break; @@ -6254,7 +5952,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) recent_used_cpu != target && cpus_share_cache(recent_used_cpu, target) && available_idle_cpu(recent_used_cpu) && - cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) { + cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr)) { /* * Replace recent_used_cpu with prev as it is a potential * candidate for the next wake: @@ -6600,7 +6298,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) int max_spare_cap_cpu = -1; for_each_cpu_and(cpu, perf_domain_span(pd), sched_domain_span(sd)) { - if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) + if (!cpumask_test_cpu(cpu, p->cpus_ptr)) continue; /* Skip CPUs that will be overutilized. */ @@ -6689,7 +6387,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f } want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu) && - cpumask_test_cpu(cpu, &p->cpus_allowed); + cpumask_test_cpu(cpu, p->cpus_ptr); } rcu_read_lock(); @@ -7445,14 +7143,14 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) /* * We do not migrate tasks that are: * 1) throttled_lb_pair, or - * 2) cannot be migrated to this CPU due to cpus_allowed, or + * 2) cannot be migrated to this CPU due to cpus_ptr, or * 3) running (obviously), or * 4) are cache-hot on their current CPU. */ if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu)) return 0; - if (!cpumask_test_cpu(env->dst_cpu, &p->cpus_allowed)) { + if (!cpumask_test_cpu(env->dst_cpu, p->cpus_ptr)) { int cpu; schedstat_inc(p->se.statistics.nr_failed_migrations_affine); @@ -7472,7 +7170,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) /* Prevent to re-select dst_cpu via env's CPUs: */ for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) { - if (cpumask_test_cpu(cpu, &p->cpus_allowed)) { + if (cpumask_test_cpu(cpu, p->cpus_ptr)) { env->flags |= LBF_DST_PINNED; env->new_dst_cpu = cpu; break; @@ -7879,7 +7577,6 @@ static unsigned long task_h_load(struct task_struct *p) struct sg_lb_stats { unsigned long avg_load; /*Avg load across the CPUs of the group */ unsigned long group_load; /* Total load over the CPUs of the group */ - unsigned long sum_weighted_load; /* Weighted load of group's tasks */ unsigned long load_per_task; unsigned long group_capacity; unsigned long group_util; /* Total utilization of the group */ @@ -7933,34 +7630,6 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds) }; } -/** - * get_sd_load_idx - Obtain the load index for a given sched domain. - * @sd: The sched_domain whose load_idx is to be obtained. - * @idle: The idle status of the CPU for whose sd load_idx is obtained. - * - * Return: The load index. - */ -static inline int get_sd_load_idx(struct sched_domain *sd, - enum cpu_idle_type idle) -{ - int load_idx; - - switch (idle) { - case CPU_NOT_IDLE: - load_idx = sd->busy_idx; - break; - - case CPU_NEWLY_IDLE: - load_idx = sd->newidle_idx; - break; - default: - load_idx = sd->idle_idx; - break; - } - - return load_idx; -} - static unsigned long scale_rt_capacity(struct sched_domain *sd, int cpu) { struct rq *rq = cpu_rq(cpu); @@ -8099,7 +7768,7 @@ static inline int check_misfit_status(struct rq *rq, struct sched_domain *sd) /* * Group imbalance indicates (and tries to solve) the problem where balancing - * groups is inadequate due to ->cpus_allowed constraints. + * groups is inadequate due to ->cpus_ptr constraints. * * Imagine a situation of two groups of 4 CPUs each and 4 tasks each with a * cpumask covering 1 CPU of the first group and 3 CPUs of the second group. @@ -8249,9 +7918,6 @@ static inline void update_sg_lb_stats(struct lb_env *env, struct sg_lb_stats *sgs, int *sg_status) { - int local_group = cpumask_test_cpu(env->dst_cpu, sched_group_span(group)); - int load_idx = get_sd_load_idx(env->sd, env->idle); - unsigned long load; int i, nr_running; memset(sgs, 0, sizeof(*sgs)); @@ -8262,13 +7928,7 @@ static inline void update_sg_lb_stats(struct lb_env *env, if ((env->flags & LBF_NOHZ_STATS) && update_nohz_stats(rq, false)) env->flags |= LBF_NOHZ_AGAIN; - /* Bias balancing toward CPUs of our domain: */ - if (local_group) - load = target_load(i, load_idx); - else - load = source_load(i, load_idx); - - sgs->group_load += load; + sgs->group_load += weighted_cpuload(rq); sgs->group_util += cpu_util(i); sgs->sum_nr_running += rq->cfs.h_nr_running; @@ -8283,7 +7943,6 @@ static inline void update_sg_lb_stats(struct lb_env *env, sgs->nr_numa_running += rq->nr_numa_running; sgs->nr_preferred_running += rq->nr_preferred_running; #endif - sgs->sum_weighted_load += weighted_cpuload(rq); /* * No need to call idle_cpu() if nr_running is not 0 */ @@ -8302,7 +7961,7 @@ static inline void update_sg_lb_stats(struct lb_env *env, sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity; if (sgs->sum_nr_running) - sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running; + sgs->load_per_task = sgs->group_load / sgs->sum_nr_running; sgs->group_weight = group->group_weight; @@ -8768,7 +8427,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env) /* * If the busiest group is imbalanced the below checks don't * work because they assume all things are equal, which typically - * isn't true due to cpus_allowed constraints and the like. + * isn't true due to cpus_ptr constraints and the like. */ if (busiest->group_type == group_imbalanced) goto force_balance; @@ -9210,7 +8869,7 @@ more_balance: * if the curr task on busiest CPU can't be * moved to this_cpu: */ - if (!cpumask_test_cpu(this_cpu, &busiest->curr->cpus_allowed)) { + if (!cpumask_test_cpu(this_cpu, busiest->curr->cpus_ptr)) { raw_spin_unlock_irqrestore(&busiest->lock, flags); env.flags |= LBF_ALL_PINNED; @@ -9879,7 +9538,6 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags, rq_lock_irqsave(rq, &rf); update_rq_clock(rq); - cpu_load_update_idle(rq); rq_unlock_irqrestore(rq, &rf); if (flags & NOHZ_BALANCE_KICK) diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 858589b83377..2410db5e9a35 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -39,7 +39,6 @@ SCHED_FEAT(WAKEUP_PREEMPTION, true) SCHED_FEAT(HRTICK, false) SCHED_FEAT(DOUBLE_TICK, false) -SCHED_FEAT(LB_BIAS, false) /* * Decrement CPU capacity based on time not spent running tasks diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 1e6b909dca36..63ad7c90822c 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1614,7 +1614,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) { if (!task_running(rq, p) && - cpumask_test_cpu(cpu, &p->cpus_allowed)) + cpumask_test_cpu(cpu, p->cpus_ptr)) return 1; return 0; @@ -1751,7 +1751,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) * Also make sure that it wasn't scheduled on its rq. */ if (unlikely(task_rq(task) != rq || - !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_allowed) || + !cpumask_test_cpu(lowest_rq->cpu, task->cpus_ptr) || task_running(rq, task) || !rt_task(task) || !task_on_rq_queued(task))) { diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b52ed1ada0be..607859a18b2a 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -96,12 +96,6 @@ extern atomic_long_t calc_load_tasks; extern void calc_global_load_tick(struct rq *this_rq); extern long calc_load_fold_active(struct rq *this_rq, long adjust); -#ifdef CONFIG_SMP -extern void cpu_load_update_active(struct rq *this_rq); -#else -static inline void cpu_load_update_active(struct rq *this_rq) { } -#endif - /* * Helpers for converting nanosecond timing to jiffy resolution */ @@ -818,8 +812,6 @@ struct rq { unsigned int nr_preferred_running; unsigned int numa_migrate_on; #endif - #define CPU_LOAD_IDX_MAX 5 - unsigned long cpu_load[CPU_LOAD_IDX_MAX]; #ifdef CONFIG_NO_HZ_COMMON #ifdef CONFIG_SMP unsigned long last_load_update_tick; @@ -830,8 +822,6 @@ struct rq { atomic_t nohz_flags; #endif /* CONFIG_NO_HZ_COMMON */ - /* capture load from *all* tasks on this CPU: */ - struct load_weight load; unsigned long nr_load_updates; u64 nr_switches; diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index f53f89df837d..63184cf0d0d7 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1344,11 +1344,6 @@ sd_init(struct sched_domain_topology_level *tl, .imbalance_pct = 125, .cache_nice_tries = 0, - .busy_idx = 0, - .idle_idx = 0, - .newidle_idx = 0, - .wake_idx = 0, - .forkexec_idx = 0, .flags = 1*SD_LOAD_BALANCE | 1*SD_BALANCE_NEWIDLE @@ -1400,13 +1395,10 @@ sd_init(struct sched_domain_topology_level *tl, } else if (sd->flags & SD_SHARE_PKG_RESOURCES) { sd->imbalance_pct = 117; sd->cache_nice_tries = 1; - sd->busy_idx = 2; #ifdef CONFIG_NUMA } else if (sd->flags & SD_NUMA) { sd->cache_nice_tries = 2; - sd->busy_idx = 3; - sd->idle_idx = 2; sd->flags &= ~SD_PREFER_SIBLING; sd->flags |= SD_SERIALIZE; @@ -1419,8 +1411,6 @@ sd_init(struct sched_domain_topology_level *tl, #endif } else { sd->cache_nice_tries = 1; - sd->busy_idx = 2; - sd->idle_idx = 1; } /* diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f4ee1a3428ae..be9707f68024 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -782,7 +782,6 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) */ if (!ts->tick_stopped) { calc_load_nohz_start(); - cpu_load_update_nohz_start(); quiet_vmstat(); ts->last_tick = hrtimer_get_expires(&ts->sched_timer); @@ -829,7 +828,6 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) { /* Update jiffies first */ tick_do_update_jiffies64(now); - cpu_load_update_nohz_stop(); /* * Clear the timer idle flag, so we avoid IPIs on remote queueing and diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index 1e6db9cbe4dc..fa95139445b2 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -277,7 +277,7 @@ static void move_to_next_cpu(void) * of this thread, than stop migrating for the duration * of the current test. */ - if (!cpumask_equal(current_mask, ¤t->cpus_allowed)) + if (!cpumask_equal(current_mask, current->cpus_ptr)) goto disable; get_online_cpus(); diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index 157d9e31f6c2..60ba93fc42ce 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -23,7 +23,7 @@ unsigned int check_preemption_disabled(const char *what1, const char *what2) * Kernel threads bound to a single CPU can safely use * smp_processor_id(): */ - if (cpumask_equal(¤t->cpus_allowed, cpumask_of(this_cpu))) + if (cpumask_equal(current->cpus_ptr, cpumask_of(this_cpu))) goto out; /* diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c index 1da597aa6141..1a72b7d95cdc 100644 --- a/samples/trace_events/trace-events-sample.c +++ b/samples/trace_events/trace-events-sample.c @@ -34,7 +34,7 @@ static void simple_thread_func(int cnt) /* Silly tracepoints */ trace_foo_bar("hello", cnt, array, random_strings[len], - ¤t->cpus_allowed); + current->cpus_ptr); trace_foo_with_template_simple("HELLO", cnt); |