summaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/auto_group.c68
-rw-r--r--kernel/sched/auto_group.h9
-rw-r--r--kernel/sched/cputime.c98
-rw-r--r--kernel/sched/fair.c2
-rw-r--r--kernel/sched/features.h5
5 files changed, 88 insertions, 94 deletions
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c
index 0984a21076a3..0f1bacb005a4 100644
--- a/kernel/sched/auto_group.c
+++ b/kernel/sched/auto_group.c
@@ -110,6 +110,9 @@ out_fail:
bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
{
+ if (!sysctl_sched_autogroup_enabled)
+ return false;
+
if (tg != &root_task_group)
return false;
@@ -143,15 +146,11 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag)
p->signal->autogroup = autogroup_kref_get(ag);
- if (!ACCESS_ONCE(sysctl_sched_autogroup_enabled))
- goto out;
-
t = p;
do {
sched_move_task(t);
} while_each_thread(p, t);
-out:
unlock_task_sighand(p, &flags);
autogroup_kref_put(prev);
}
@@ -159,8 +158,11 @@ out:
/* Allocates GFP_KERNEL, cannot be called under any spinlock */
void sched_autogroup_create_attach(struct task_struct *p)
{
- struct autogroup *ag = autogroup_create();
+ struct autogroup *ag;
+ if (!sysctl_sched_autogroup_enabled)
+ return;
+ ag = autogroup_create();
autogroup_move_group(p, ag);
/* drop extra reference added by autogroup_create() */
autogroup_kref_put(ag);
@@ -176,11 +178,15 @@ EXPORT_SYMBOL(sched_autogroup_detach);
void sched_autogroup_fork(struct signal_struct *sig)
{
+ if (!sysctl_sched_autogroup_enabled)
+ return;
sig->autogroup = autogroup_task_get(current);
}
void sched_autogroup_exit(struct signal_struct *sig)
{
+ if (!sysctl_sched_autogroup_enabled)
+ return;
autogroup_kref_put(sig->autogroup);
}
@@ -193,58 +199,6 @@ static int __init setup_autogroup(char *str)
__setup("noautogroup", setup_autogroup);
-#ifdef CONFIG_PROC_FS
-
-int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
-{
- static unsigned long next = INITIAL_JIFFIES;
- struct autogroup *ag;
- int err;
-
- if (nice < -20 || nice > 19)
- return -EINVAL;
-
- err = security_task_setnice(current, nice);
- if (err)
- return err;
-
- if (nice < 0 && !can_nice(current, nice))
- return -EPERM;
-
- /* this is a heavy operation taking global locks.. */
- if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next))
- return -EAGAIN;
-
- next = HZ / 10 + jiffies;
- ag = autogroup_task_get(p);
-
- down_write(&ag->lock);
- err = sched_group_set_shares(ag->tg, prio_to_weight[nice + 20]);
- if (!err)
- ag->nice = nice;
- up_write(&ag->lock);
-
- autogroup_kref_put(ag);
-
- return err;
-}
-
-void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m)
-{
- struct autogroup *ag = autogroup_task_get(p);
-
- if (!task_group_is_autogroup(ag->tg))
- goto out;
-
- down_read(&ag->lock);
- seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice);
- up_read(&ag->lock);
-
-out:
- autogroup_kref_put(ag);
-}
-#endif /* CONFIG_PROC_FS */
-
#ifdef CONFIG_SCHED_DEBUG
int autogroup_path(struct task_group *tg, char *buf, int buflen)
{
diff --git a/kernel/sched/auto_group.h b/kernel/sched/auto_group.h
index 8bd047142816..4552c6bf79d2 100644
--- a/kernel/sched/auto_group.h
+++ b/kernel/sched/auto_group.h
@@ -4,11 +4,6 @@
#include <linux/rwsem.h>
struct autogroup {
- /*
- * reference doesn't mean how many thread attach to this
- * autogroup now. It just stands for the number of task
- * could use this autogroup.
- */
struct kref kref;
struct task_group *tg;
struct rw_semaphore lock;
@@ -29,9 +24,7 @@ extern bool task_wants_autogroup(struct task_struct *p, struct task_group *tg);
static inline struct task_group *
autogroup_task_group(struct task_struct *p, struct task_group *tg)
{
- int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
-
- if (enabled && task_wants_autogroup(p, tg))
+ if (task_wants_autogroup(p, tg))
return p->signal->autogroup->tg;
return tg;
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 80b2fd5a7cf0..293b202fcf79 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -288,6 +288,34 @@ static __always_inline bool steal_account_process_tick(void)
return false;
}
+/*
+ * Accumulate raw cputime values of dead tasks (sig->[us]time) and live
+ * tasks (sum on group iteration) belonging to @tsk's group.
+ */
+void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
+{
+ struct signal_struct *sig = tsk->signal;
+ struct task_struct *t;
+
+ times->utime = sig->utime;
+ times->stime = sig->stime;
+ times->sum_exec_runtime = sig->sum_sched_runtime;
+
+ rcu_read_lock();
+ /* make sure we can trust tsk->thread_group list */
+ if (!likely(pid_alive(tsk)))
+ goto out;
+
+ t = tsk;
+ do {
+ times->utime += t->utime;
+ times->stime += t->stime;
+ times->sum_exec_runtime += task_sched_runtime(t);
+ } while_each_thread(tsk, t);
+out:
+ rcu_read_unlock();
+}
+
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
@@ -417,13 +445,13 @@ void account_idle_ticks(unsigned long ticks)
* Use precise platform statistics if available:
*/
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
{
*ut = p->utime;
*st = p->stime;
}
-void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
{
struct task_cputime cputime;
@@ -495,14 +523,30 @@ static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total)
return (__force cputime_t) temp;
}
-void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+/*
+ * Adjust tick based cputime random precision against scheduler
+ * runtime accounting.
+ */
+static void cputime_adjust(struct task_cputime *curr,
+ struct cputime *prev,
+ cputime_t *ut, cputime_t *st)
{
- cputime_t rtime, utime = p->utime, total = utime + p->stime;
+ cputime_t rtime, utime, total;
+
+ utime = curr->utime;
+ total = utime + curr->stime;
/*
- * Use CFS's precise accounting:
+ * Tick based cputime accounting depend on random scheduling
+ * timeslices of a task to be interrupted or not by the timer.
+ * Depending on these circumstances, the number of these interrupts
+ * may be over or under-optimistic, matching the real user and system
+ * cputime with a variable precision.
+ *
+ * Fix this by scaling these tick based values against the total
+ * runtime accounted by the CFS scheduler.
*/
- rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
+ rtime = nsecs_to_cputime(curr->sum_exec_runtime);
if (total)
utime = scale_utime(utime, rtime, total);
@@ -510,38 +554,36 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
utime = rtime;
/*
- * Compare with previous values, to keep monotonicity:
+ * If the tick based count grows faster than the scheduler one,
+ * the result of the scaling may go backward.
+ * Let's enforce monotonicity.
*/
- p->prev_utime = max(p->prev_utime, utime);
- p->prev_stime = max(p->prev_stime, rtime - p->prev_utime);
+ prev->utime = max(prev->utime, utime);
+ prev->stime = max(prev->stime, rtime - prev->utime);
- *ut = p->prev_utime;
- *st = p->prev_stime;
+ *ut = prev->utime;
+ *st = prev->stime;
+}
+
+void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
+{
+ struct task_cputime cputime = {
+ .utime = p->utime,
+ .stime = p->stime,
+ .sum_exec_runtime = p->se.sum_exec_runtime,
+ };
+
+ cputime_adjust(&cputime, &p->prev_cputime, ut, st);
}
/*
* Must be called with siglock held.
*/
-void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
{
- struct signal_struct *sig = p->signal;
struct task_cputime cputime;
- cputime_t rtime, utime, total;
thread_group_cputime(p, &cputime);
-
- total = cputime.utime + cputime.stime;
- rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
-
- if (total)
- utime = scale_utime(cputime.utime, rtime, total);
- else
- utime = rtime;
-
- sig->prev_utime = max(sig->prev_utime, utime);
- sig->prev_stime = max(sig->prev_stime, rtime - sig->prev_utime);
-
- *ut = sig->prev_utime;
- *st = sig->prev_stime;
+ cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
}
#endif
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a319d56c7605..59e072b2db97 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3330,7 +3330,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
* Batch and idle tasks do not preempt non-idle tasks (their preemption
* is driven by the tick):
*/
- if (unlikely(p->policy != SCHED_NORMAL))
+ if (unlikely(p->policy != SCHED_NORMAL) || !sched_feat(WAKEUP_PREEMPTION))
return;
find_matching_se(&se, &pse);
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index eebefcad7027..e68e69ab917d 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -32,6 +32,11 @@ SCHED_FEAT(LAST_BUDDY, true)
SCHED_FEAT(CACHE_HOT_BUDDY, true)
/*
+ * Allow wakeup-time preemption of the current task:
+ */
+SCHED_FEAT(WAKEUP_PREEMPTION, true)
+
+/*
* Use arch dependent cpu power functions
*/
SCHED_FEAT(ARCH_POWER, true)