diff options
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/auto_group.c | 68 | ||||
-rw-r--r-- | kernel/sched/auto_group.h | 9 | ||||
-rw-r--r-- | kernel/sched/cputime.c | 98 | ||||
-rw-r--r-- | kernel/sched/fair.c | 2 | ||||
-rw-r--r-- | kernel/sched/features.h | 5 |
5 files changed, 88 insertions, 94 deletions
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c index 0984a21076a3..0f1bacb005a4 100644 --- a/kernel/sched/auto_group.c +++ b/kernel/sched/auto_group.c @@ -110,6 +110,9 @@ out_fail: bool task_wants_autogroup(struct task_struct *p, struct task_group *tg) { + if (!sysctl_sched_autogroup_enabled) + return false; + if (tg != &root_task_group) return false; @@ -143,15 +146,11 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag) p->signal->autogroup = autogroup_kref_get(ag); - if (!ACCESS_ONCE(sysctl_sched_autogroup_enabled)) - goto out; - t = p; do { sched_move_task(t); } while_each_thread(p, t); -out: unlock_task_sighand(p, &flags); autogroup_kref_put(prev); } @@ -159,8 +158,11 @@ out: /* Allocates GFP_KERNEL, cannot be called under any spinlock */ void sched_autogroup_create_attach(struct task_struct *p) { - struct autogroup *ag = autogroup_create(); + struct autogroup *ag; + if (!sysctl_sched_autogroup_enabled) + return; + ag = autogroup_create(); autogroup_move_group(p, ag); /* drop extra reference added by autogroup_create() */ autogroup_kref_put(ag); @@ -176,11 +178,15 @@ EXPORT_SYMBOL(sched_autogroup_detach); void sched_autogroup_fork(struct signal_struct *sig) { + if (!sysctl_sched_autogroup_enabled) + return; sig->autogroup = autogroup_task_get(current); } void sched_autogroup_exit(struct signal_struct *sig) { + if (!sysctl_sched_autogroup_enabled) + return; autogroup_kref_put(sig->autogroup); } @@ -193,58 +199,6 @@ static int __init setup_autogroup(char *str) __setup("noautogroup", setup_autogroup); -#ifdef CONFIG_PROC_FS - -int proc_sched_autogroup_set_nice(struct task_struct *p, int nice) -{ - static unsigned long next = INITIAL_JIFFIES; - struct autogroup *ag; - int err; - - if (nice < -20 || nice > 19) - return -EINVAL; - - err = security_task_setnice(current, nice); - if (err) - return err; - - if (nice < 0 && !can_nice(current, nice)) - return -EPERM; - - /* this is a heavy operation taking global locks.. */ - if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next)) - return -EAGAIN; - - next = HZ / 10 + jiffies; - ag = autogroup_task_get(p); - - down_write(&ag->lock); - err = sched_group_set_shares(ag->tg, prio_to_weight[nice + 20]); - if (!err) - ag->nice = nice; - up_write(&ag->lock); - - autogroup_kref_put(ag); - - return err; -} - -void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m) -{ - struct autogroup *ag = autogroup_task_get(p); - - if (!task_group_is_autogroup(ag->tg)) - goto out; - - down_read(&ag->lock); - seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice); - up_read(&ag->lock); - -out: - autogroup_kref_put(ag); -} -#endif /* CONFIG_PROC_FS */ - #ifdef CONFIG_SCHED_DEBUG int autogroup_path(struct task_group *tg, char *buf, int buflen) { diff --git a/kernel/sched/auto_group.h b/kernel/sched/auto_group.h index 8bd047142816..4552c6bf79d2 100644 --- a/kernel/sched/auto_group.h +++ b/kernel/sched/auto_group.h @@ -4,11 +4,6 @@ #include <linux/rwsem.h> struct autogroup { - /* - * reference doesn't mean how many thread attach to this - * autogroup now. It just stands for the number of task - * could use this autogroup. - */ struct kref kref; struct task_group *tg; struct rw_semaphore lock; @@ -29,9 +24,7 @@ extern bool task_wants_autogroup(struct task_struct *p, struct task_group *tg); static inline struct task_group * autogroup_task_group(struct task_struct *p, struct task_group *tg) { - int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled); - - if (enabled && task_wants_autogroup(p, tg)) + if (task_wants_autogroup(p, tg)) return p->signal->autogroup->tg; return tg; diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 80b2fd5a7cf0..293b202fcf79 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -288,6 +288,34 @@ static __always_inline bool steal_account_process_tick(void) return false; } +/* + * Accumulate raw cputime values of dead tasks (sig->[us]time) and live + * tasks (sum on group iteration) belonging to @tsk's group. + */ +void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) +{ + struct signal_struct *sig = tsk->signal; + struct task_struct *t; + + times->utime = sig->utime; + times->stime = sig->stime; + times->sum_exec_runtime = sig->sum_sched_runtime; + + rcu_read_lock(); + /* make sure we can trust tsk->thread_group list */ + if (!likely(pid_alive(tsk))) + goto out; + + t = tsk; + do { + times->utime += t->utime; + times->stime += t->stime; + times->sum_exec_runtime += task_sched_runtime(t); + } while_each_thread(tsk, t); +out: + rcu_read_unlock(); +} + #ifndef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_IRQ_TIME_ACCOUNTING @@ -417,13 +445,13 @@ void account_idle_ticks(unsigned long ticks) * Use precise platform statistics if available: */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING -void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) +void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) { *ut = p->utime; *st = p->stime; } -void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) +void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) { struct task_cputime cputime; @@ -495,14 +523,30 @@ static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) return (__force cputime_t) temp; } -void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) +/* + * Adjust tick based cputime random precision against scheduler + * runtime accounting. + */ +static void cputime_adjust(struct task_cputime *curr, + struct cputime *prev, + cputime_t *ut, cputime_t *st) { - cputime_t rtime, utime = p->utime, total = utime + p->stime; + cputime_t rtime, utime, total; + + utime = curr->utime; + total = utime + curr->stime; /* - * Use CFS's precise accounting: + * Tick based cputime accounting depend on random scheduling + * timeslices of a task to be interrupted or not by the timer. + * Depending on these circumstances, the number of these interrupts + * may be over or under-optimistic, matching the real user and system + * cputime with a variable precision. + * + * Fix this by scaling these tick based values against the total + * runtime accounted by the CFS scheduler. */ - rtime = nsecs_to_cputime(p->se.sum_exec_runtime); + rtime = nsecs_to_cputime(curr->sum_exec_runtime); if (total) utime = scale_utime(utime, rtime, total); @@ -510,38 +554,36 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) utime = rtime; /* - * Compare with previous values, to keep monotonicity: + * If the tick based count grows faster than the scheduler one, + * the result of the scaling may go backward. + * Let's enforce monotonicity. */ - p->prev_utime = max(p->prev_utime, utime); - p->prev_stime = max(p->prev_stime, rtime - p->prev_utime); + prev->utime = max(prev->utime, utime); + prev->stime = max(prev->stime, rtime - prev->utime); - *ut = p->prev_utime; - *st = p->prev_stime; + *ut = prev->utime; + *st = prev->stime; +} + +void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) +{ + struct task_cputime cputime = { + .utime = p->utime, + .stime = p->stime, + .sum_exec_runtime = p->se.sum_exec_runtime, + }; + + cputime_adjust(&cputime, &p->prev_cputime, ut, st); } /* * Must be called with siglock held. */ -void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) +void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) { - struct signal_struct *sig = p->signal; struct task_cputime cputime; - cputime_t rtime, utime, total; thread_group_cputime(p, &cputime); - - total = cputime.utime + cputime.stime; - rtime = nsecs_to_cputime(cputime.sum_exec_runtime); - - if (total) - utime = scale_utime(cputime.utime, rtime, total); - else - utime = rtime; - - sig->prev_utime = max(sig->prev_utime, utime); - sig->prev_stime = max(sig->prev_stime, rtime - sig->prev_utime); - - *ut = sig->prev_utime; - *st = sig->prev_stime; + cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); } #endif diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a319d56c7605..59e072b2db97 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3330,7 +3330,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ * Batch and idle tasks do not preempt non-idle tasks (their preemption * is driven by the tick): */ - if (unlikely(p->policy != SCHED_NORMAL)) + if (unlikely(p->policy != SCHED_NORMAL) || !sched_feat(WAKEUP_PREEMPTION)) return; find_matching_se(&se, &pse); diff --git a/kernel/sched/features.h b/kernel/sched/features.h index eebefcad7027..e68e69ab917d 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -32,6 +32,11 @@ SCHED_FEAT(LAST_BUDDY, true) SCHED_FEAT(CACHE_HOT_BUDDY, true) /* + * Allow wakeup-time preemption of the current task: + */ +SCHED_FEAT(WAKEUP_PREEMPTION, true) + +/* * Use arch dependent cpu power functions */ SCHED_FEAT(ARCH_POWER, true) |