summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2009-02-15 08:12:00 +0100
committerDavid S. Miller <davem@davemloft.net>2009-02-15 08:12:00 +0100
commit5e30589521518bff36fd2638b3c3d69679c50436 (patch)
tree6ac985658a06b0787e4354d0d16d380ea9b16a5a /kernel
parentMerge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linvil... (diff)
parentLinux 2.6.29-rc5 (diff)
downloadlinux-5e30589521518bff36fd2638b3c3d69679c50436.tar.xz
linux-5e30589521518bff36fd2638b3c3d69679c50436.zip
Merge branch 'master' of /home/davem/src/GIT/linux-2.6/
Conflicts: drivers/net/wireless/iwlwifi/iwl-agn.c drivers/net/wireless/iwlwifi/iwl3945-base.c
Diffstat (limited to 'kernel')
-rw-r--r--kernel/async.c94
-rw-r--r--kernel/cgroup.c3
-rw-r--r--kernel/exit.c3
-rw-r--r--kernel/fork.c6
-rw-r--r--kernel/irq/numa_migrate.c7
-rw-r--r--kernel/itimer.c4
-rw-r--r--kernel/posix-cpu-timers.c117
-rw-r--r--kernel/power/main.c26
-rw-r--r--kernel/profile.c3
-rw-r--r--kernel/sched.c31
-rw-r--r--kernel/sched_fair.c11
-rw-r--r--kernel/sched_stats.h45
-rw-r--r--kernel/signal.c8
-rw-r--r--kernel/sys.c16
-rw-r--r--kernel/sysctl.c5
-rw-r--r--kernel/trace/ftrace.c5
-rw-r--r--kernel/user.c3
-rw-r--r--kernel/wait.c59
18 files changed, 339 insertions, 107 deletions
diff --git a/kernel/async.c b/kernel/async.c
index 608b32b42812..f565891f2c9b 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -54,6 +54,7 @@ asynchronous and synchronous parts of the kernel.
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/kthread.h>
+#include <linux/delay.h>
#include <asm/atomic.h>
static async_cookie_t next_cookie = 1;
@@ -132,21 +133,23 @@ static void run_one_entry(void)
entry = list_first_entry(&async_pending, struct async_entry, list);
/* 2) move it to the running queue */
- list_del(&entry->list);
- list_add_tail(&entry->list, &async_running);
+ list_move_tail(&entry->list, entry->running);
spin_unlock_irqrestore(&async_lock, flags);
/* 3) run it (and print duration)*/
if (initcall_debug && system_state == SYSTEM_BOOTING) {
- printk("calling %lli_%pF @ %i\n", entry->cookie, entry->func, task_pid_nr(current));
+ printk("calling %lli_%pF @ %i\n", (long long)entry->cookie,
+ entry->func, task_pid_nr(current));
calltime = ktime_get();
}
entry->func(entry->data, entry->cookie);
if (initcall_debug && system_state == SYSTEM_BOOTING) {
rettime = ktime_get();
delta = ktime_sub(rettime, calltime);
- printk("initcall %lli_%pF returned 0 after %lld usecs\n", entry->cookie,
- entry->func, ktime_to_ns(delta) >> 10);
+ printk("initcall %lli_%pF returned 0 after %lld usecs\n",
+ (long long)entry->cookie,
+ entry->func,
+ (long long)ktime_to_ns(delta) >> 10);
}
/* 4) remove it from the running queue */
@@ -205,18 +208,44 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct l
return newcookie;
}
+/**
+ * async_schedule - schedule a function for asynchronous execution
+ * @ptr: function to execute asynchronously
+ * @data: data pointer to pass to the function
+ *
+ * Returns an async_cookie_t that may be used for checkpointing later.
+ * Note: This function may be called from atomic or non-atomic contexts.
+ */
async_cookie_t async_schedule(async_func_ptr *ptr, void *data)
{
- return __async_schedule(ptr, data, &async_pending);
+ return __async_schedule(ptr, data, &async_running);
}
EXPORT_SYMBOL_GPL(async_schedule);
-async_cookie_t async_schedule_special(async_func_ptr *ptr, void *data, struct list_head *running)
+/**
+ * async_schedule_domain - schedule a function for asynchronous execution within a certain domain
+ * @ptr: function to execute asynchronously
+ * @data: data pointer to pass to the function
+ * @running: running list for the domain
+ *
+ * Returns an async_cookie_t that may be used for checkpointing later.
+ * @running may be used in the async_synchronize_*_domain() functions
+ * to wait within a certain synchronization domain rather than globally.
+ * A synchronization domain is specified via the running queue @running to use.
+ * Note: This function may be called from atomic or non-atomic contexts.
+ */
+async_cookie_t async_schedule_domain(async_func_ptr *ptr, void *data,
+ struct list_head *running)
{
return __async_schedule(ptr, data, running);
}
-EXPORT_SYMBOL_GPL(async_schedule_special);
+EXPORT_SYMBOL_GPL(async_schedule_domain);
+/**
+ * async_synchronize_full - synchronize all asynchronous function calls
+ *
+ * This function waits until all asynchronous function calls have been done.
+ */
void async_synchronize_full(void)
{
do {
@@ -225,13 +254,30 @@ void async_synchronize_full(void)
}
EXPORT_SYMBOL_GPL(async_synchronize_full);
-void async_synchronize_full_special(struct list_head *list)
+/**
+ * async_synchronize_full_domain - synchronize all asynchronous function within a certain domain
+ * @list: running list to synchronize on
+ *
+ * This function waits until all asynchronous function calls for the
+ * synchronization domain specified by the running list @list have been done.
+ */
+void async_synchronize_full_domain(struct list_head *list)
{
- async_synchronize_cookie_special(next_cookie, list);
+ async_synchronize_cookie_domain(next_cookie, list);
}
-EXPORT_SYMBOL_GPL(async_synchronize_full_special);
+EXPORT_SYMBOL_GPL(async_synchronize_full_domain);
-void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *running)
+/**
+ * async_synchronize_cookie_domain - synchronize asynchronous function calls within a certain domain with cookie checkpointing
+ * @cookie: async_cookie_t to use as checkpoint
+ * @running: running list to synchronize on
+ *
+ * This function waits until all asynchronous function calls for the
+ * synchronization domain specified by the running list @list submitted
+ * prior to @cookie have been done.
+ */
+void async_synchronize_cookie_domain(async_cookie_t cookie,
+ struct list_head *running)
{
ktime_t starttime, delta, endtime;
@@ -247,14 +293,22 @@ void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *r
delta = ktime_sub(endtime, starttime);
printk("async_continuing @ %i after %lli usec\n",
- task_pid_nr(current), ktime_to_ns(delta) >> 10);
+ task_pid_nr(current),
+ (long long)ktime_to_ns(delta) >> 10);
}
}
-EXPORT_SYMBOL_GPL(async_synchronize_cookie_special);
+EXPORT_SYMBOL_GPL(async_synchronize_cookie_domain);
+/**
+ * async_synchronize_cookie - synchronize asynchronous function calls with cookie checkpointing
+ * @cookie: async_cookie_t to use as checkpoint
+ *
+ * This function waits until all asynchronous function calls prior to @cookie
+ * have been done.
+ */
void async_synchronize_cookie(async_cookie_t cookie)
{
- async_synchronize_cookie_special(cookie, &async_running);
+ async_synchronize_cookie_domain(cookie, &async_running);
}
EXPORT_SYMBOL_GPL(async_synchronize_cookie);
@@ -315,7 +369,11 @@ static int async_manager_thread(void *unused)
ec = atomic_read(&entry_count);
while (tc < ec && tc < MAX_THREADS) {
- kthread_run(async_thread, NULL, "async/%i", tc);
+ if (IS_ERR(kthread_run(async_thread, NULL, "async/%i",
+ tc))) {
+ msleep(100);
+ continue;
+ }
atomic_inc(&thread_count);
tc++;
}
@@ -330,7 +388,9 @@ static int async_manager_thread(void *unused)
static int __init async_init(void)
{
if (async_enabled)
- kthread_run(async_manager_thread, NULL, "async/mgr");
+ if (IS_ERR(kthread_run(async_manager_thread, NULL,
+ "async/mgr")))
+ async_enabled = 0;
return 0;
}
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 5a54ff42874e..e14db9c089b9 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2351,7 +2351,7 @@ static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
if (ss->root == root)
- mutex_lock_nested(&ss->hierarchy_mutex, i);
+ mutex_lock(&ss->hierarchy_mutex);
}
}
@@ -2637,6 +2637,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
BUG_ON(!list_empty(&init_task.tasks));
mutex_init(&ss->hierarchy_mutex);
+ lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
ss->active = 1;
}
diff --git a/kernel/exit.c b/kernel/exit.c
index f80dec3f1875..efd30ccf3858 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -118,6 +118,8 @@ static void __exit_signal(struct task_struct *tsk)
* We won't ever get here for the group leader, since it
* will have been the last reference on the signal_struct.
*/
+ sig->utime = cputime_add(sig->utime, task_utime(tsk));
+ sig->stime = cputime_add(sig->stime, task_stime(tsk));
sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
sig->min_flt += tsk->min_flt;
sig->maj_flt += tsk->maj_flt;
@@ -126,6 +128,7 @@ static void __exit_signal(struct task_struct *tsk)
sig->inblock += task_io_get_inblock(tsk);
sig->oublock += task_io_get_oublock(tsk);
task_io_accounting_add(&sig->ioac, &tsk->ioac);
+ sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
sig = NULL; /* Marker for below. */
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 242a706e7721..a66fbde20715 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -851,13 +851,14 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
sig->tty_old_pgrp = NULL;
sig->tty = NULL;
- sig->cutime = sig->cstime = cputime_zero;
+ sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
sig->gtime = cputime_zero;
sig->cgtime = cputime_zero;
sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
task_io_accounting_init(&sig->ioac);
+ sig->sum_sched_runtime = 0;
taskstats_tgid_init(sig);
task_lock(current->group_leader);
@@ -1005,6 +1006,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
* triggers too late. This doesn't hurt, the check is only there
* to stop root fork bombs.
*/
+ retval = -EAGAIN;
if (nr_threads >= max_threads)
goto bad_fork_cleanup_count;
@@ -1093,7 +1095,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
#ifdef CONFIG_DEBUG_MUTEXES
p->blocked_on = NULL; /* not blocked yet */
#endif
- if (unlikely(ptrace_reparented(current)))
+ if (unlikely(current->ptrace))
ptrace_fork(p, clone_flags);
/* Perform scheduler related setup. Assign this task to a CPU. */
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index ecf765c6a77a..acd88356ac76 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -71,7 +71,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
desc = irq_desc_ptrs[irq];
if (desc && old_desc != desc)
- goto out_unlock;
+ goto out_unlock;
node = cpu_to_node(cpu);
desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
@@ -84,10 +84,15 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
init_copy_one_irq_desc(irq, old_desc, desc, cpu);
irq_desc_ptrs[irq] = desc;
+ spin_unlock_irqrestore(&sparse_irq_lock, flags);
/* free the old one */
free_one_irq_desc(old_desc, desc);
+ spin_unlock(&old_desc->lock);
kfree(old_desc);
+ spin_lock(&desc->lock);
+
+ return desc;
out_unlock:
spin_unlock_irqrestore(&sparse_irq_lock, flags);
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 6a5fe93dd8bd..58762f7077ec 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -62,7 +62,7 @@ int do_getitimer(int which, struct itimerval *value)
struct task_cputime cputime;
cputime_t utime;
- thread_group_cputime(tsk, &cputime);
+ thread_group_cputimer(tsk, &cputime);
utime = cputime.utime;
if (cputime_le(cval, utime)) { /* about to fire */
cval = jiffies_to_cputime(1);
@@ -82,7 +82,7 @@ int do_getitimer(int which, struct itimerval *value)
struct task_cputime times;
cputime_t ptime;
- thread_group_cputime(tsk, &times);
+ thread_group_cputimer(tsk, &times);
ptime = cputime_add(times.utime, times.stime);
if (cputime_le(cval, ptime)) { /* about to fire */
cval = jiffies_to_cputime(1);
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index fa07da94d7be..2313a4cc14ea 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -230,6 +230,71 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
return 0;
}
+void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
+{
+ struct sighand_struct *sighand;
+ struct signal_struct *sig;
+ struct task_struct *t;
+
+ *times = INIT_CPUTIME;
+
+ rcu_read_lock();
+ sighand = rcu_dereference(tsk->sighand);
+ if (!sighand)
+ goto out;
+
+ sig = tsk->signal;
+
+ t = tsk;
+ do {
+ times->utime = cputime_add(times->utime, t->utime);
+ times->stime = cputime_add(times->stime, t->stime);
+ times->sum_exec_runtime += t->se.sum_exec_runtime;
+
+ t = next_thread(t);
+ } while (t != tsk);
+
+ times->utime = cputime_add(times->utime, sig->utime);
+ times->stime = cputime_add(times->stime, sig->stime);
+ times->sum_exec_runtime += sig->sum_sched_runtime;
+out:
+ rcu_read_unlock();
+}
+
+static void update_gt_cputime(struct task_cputime *a, struct task_cputime *b)
+{
+ if (cputime_gt(b->utime, a->utime))
+ a->utime = b->utime;
+
+ if (cputime_gt(b->stime, a->stime))
+ a->stime = b->stime;
+
+ if (b->sum_exec_runtime > a->sum_exec_runtime)
+ a->sum_exec_runtime = b->sum_exec_runtime;
+}
+
+void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
+{
+ struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
+ struct task_cputime sum;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cputimer->lock, flags);
+ if (!cputimer->running) {
+ cputimer->running = 1;
+ /*
+ * The POSIX timer interface allows for absolute time expiry
+ * values through the TIMER_ABSTIME flag, therefore we have
+ * to synchronize the timer to the clock every time we start
+ * it.
+ */
+ thread_group_cputime(tsk, &sum);
+ update_gt_cputime(&cputimer->cputime, &sum);
+ }
+ *times = cputimer->cputime;
+ spin_unlock_irqrestore(&cputimer->lock, flags);
+}
+
/*
* Sample a process (thread group) clock for the given group_leader task.
* Must be called with tasklist_lock held for reading.
@@ -457,7 +522,7 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk)
{
struct task_cputime cputime;
- thread_group_cputime(tsk, &cputime);
+ thread_group_cputimer(tsk, &cputime);
cleanup_timers(tsk->signal->cpu_timers,
cputime.utime, cputime.stime, cputime.sum_exec_runtime);
}
@@ -964,6 +1029,19 @@ static void check_thread_timers(struct task_struct *tsk,
}
}
+static void stop_process_timers(struct task_struct *tsk)
+{
+ struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
+ unsigned long flags;
+
+ if (!cputimer->running)
+ return;
+
+ spin_lock_irqsave(&cputimer->lock, flags);
+ cputimer->running = 0;
+ spin_unlock_irqrestore(&cputimer->lock, flags);
+}
+
/*
* Check for any per-thread CPU timers that have fired and move them
* off the tsk->*_timers list onto the firing list. Per-thread timers
@@ -987,13 +1065,15 @@ static void check_process_timers(struct task_struct *tsk,
sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY &&
list_empty(&timers[CPUCLOCK_VIRT]) &&
cputime_eq(sig->it_virt_expires, cputime_zero) &&
- list_empty(&timers[CPUCLOCK_SCHED]))
+ list_empty(&timers[CPUCLOCK_SCHED])) {
+ stop_process_timers(tsk);
return;
+ }
/*
* Collect the current process totals.
*/
- thread_group_cputime(tsk, &cputime);
+ thread_group_cputimer(tsk, &cputime);
utime = cputime.utime;
ptime = cputime_add(utime, cputime.stime);
sum_sched_runtime = cputime.sum_exec_runtime;
@@ -1259,7 +1339,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
if (!task_cputime_zero(&sig->cputime_expires)) {
struct task_cputime group_sample;
- thread_group_cputime(tsk, &group_sample);
+ thread_group_cputimer(tsk, &group_sample);
if (task_cputime_expired(&group_sample, &sig->cputime_expires))
return 1;
}
@@ -1329,6 +1409,33 @@ void run_posix_cpu_timers(struct task_struct *tsk)
}
/*
+ * Sample a process (thread group) timer for the given group_leader task.
+ * Must be called with tasklist_lock held for reading.
+ */
+static int cpu_timer_sample_group(const clockid_t which_clock,
+ struct task_struct *p,
+ union cpu_time_count *cpu)
+{
+ struct task_cputime cputime;
+
+ thread_group_cputimer(p, &cputime);
+ switch (CPUCLOCK_WHICH(which_clock)) {
+ default:
+ return -EINVAL;
+ case CPUCLOCK_PROF:
+ cpu->cpu = cputime_add(cputime.utime, cputime.stime);
+ break;
+ case CPUCLOCK_VIRT:
+ cpu->cpu = cputime.utime;
+ break;
+ case CPUCLOCK_SCHED:
+ cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
+ break;
+ }
+ return 0;
+}
+
+/*
* Set one of the process-wide special case CPU timers.
* The tsk->sighand->siglock must be held by the caller.
* The *newval argument is relative and we update it to be absolute, *oldval
@@ -1341,7 +1448,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
struct list_head *head;
BUG_ON(clock_idx == CPUCLOCK_SCHED);
- cpu_clock_sample_group(clock_idx, tsk, &now);
+ cpu_timer_sample_group(clock_idx, tsk, &now);
if (oldval) {
if (!cputime_eq(*oldval, cputime_zero)) {
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 239988873971..b4d219016b6c 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -57,16 +57,6 @@ int pm_notifier_call_chain(unsigned long val)
#ifdef CONFIG_PM_DEBUG
int pm_test_level = TEST_NONE;
-static int suspend_test(int level)
-{
- if (pm_test_level == level) {
- printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n");
- mdelay(5000);
- return 1;
- }
- return 0;
-}
-
static const char * const pm_tests[__TEST_AFTER_LAST] = {
[TEST_NONE] = "none",
[TEST_CORE] = "core",
@@ -125,14 +115,24 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr,
}
power_attr(pm_test);
-#else /* !CONFIG_PM_DEBUG */
-static inline int suspend_test(int level) { return 0; }
-#endif /* !CONFIG_PM_DEBUG */
+#endif /* CONFIG_PM_DEBUG */
#endif /* CONFIG_PM_SLEEP */
#ifdef CONFIG_SUSPEND
+static int suspend_test(int level)
+{
+#ifdef CONFIG_PM_DEBUG
+ if (pm_test_level == level) {
+ printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n");
+ mdelay(5000);
+ return 1;
+ }
+#endif /* !CONFIG_PM_DEBUG */
+ return 0;
+}
+
#ifdef CONFIG_PM_TEST_SUSPEND
/*
diff --git a/kernel/profile.c b/kernel/profile.c
index 784933acf5b8..7724e0409bae 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -114,12 +114,15 @@ int __ref profile_init(void)
if (!slab_is_available()) {
prof_buffer = alloc_bootmem(buffer_bytes);
alloc_bootmem_cpumask_var(&prof_cpu_mask);
+ cpumask_copy(prof_cpu_mask, cpu_possible_mask);
return 0;
}
if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL))
return -ENOMEM;
+ cpumask_copy(prof_cpu_mask, cpu_possible_mask);
+
prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL);
if (prof_buffer)
return 0;
diff --git a/kernel/sched.c b/kernel/sched.c
index 242d0d47a70d..c1d0ed360088 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2266,16 +2266,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
if (!sched_feat(SYNC_WAKEUPS))
sync = 0;
- if (!sync) {
- if (current->se.avg_overlap < sysctl_sched_migration_cost &&
- p->se.avg_overlap < sysctl_sched_migration_cost)
- sync = 1;
- } else {
- if (current->se.avg_overlap >= sysctl_sched_migration_cost ||
- p->se.avg_overlap >= sysctl_sched_migration_cost)
- sync = 0;
- }
-
#ifdef CONFIG_SMP
if (sched_feat(LB_WAKEUP_UPDATE)) {
struct sched_domain *sd;
@@ -3890,19 +3880,24 @@ int select_nohz_load_balancer(int stop_tick)
int cpu = smp_processor_id();
if (stop_tick) {
- cpumask_set_cpu(cpu, nohz.cpu_mask);
cpu_rq(cpu)->in_nohz_recently = 1;
- /*
- * If we are going offline and still the leader, give up!
- */
- if (!cpu_active(cpu) &&
- atomic_read(&nohz.load_balancer) == cpu) {
+ if (!cpu_active(cpu)) {
+ if (atomic_read(&nohz.load_balancer) != cpu)
+ return 0;
+
+ /*
+ * If we are going offline and still the leader,
+ * give up!
+ */
if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
BUG();
+
return 0;
}
+ cpumask_set_cpu(cpu, nohz.cpu_mask);
+
/* time for ilb owner also to sleep */
if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
if (atomic_read(&nohz.load_balancer) == cpu)
@@ -4697,8 +4692,8 @@ EXPORT_SYMBOL(default_wake_function);
* started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
* zero in this (rare) case, and we handle it by continuing to scan the queue.
*/
-static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
- int nr_exclusive, int sync, void *key)
+void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
+ int nr_exclusive, int sync, void *key)
{
wait_queue_t *curr, *next;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index a7e50ba185ac..0566f2a03c42 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1191,15 +1191,20 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
int idx, unsigned long load, unsigned long this_load,
unsigned int imbalance)
{
+ struct task_struct *curr = this_rq->curr;
+ struct task_group *tg;
unsigned long tl = this_load;
unsigned long tl_per_task;
- struct task_group *tg;
unsigned long weight;
int balanced;
if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))
return 0;
+ if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost ||
+ p->se.avg_overlap > sysctl_sched_migration_cost))
+ sync = 0;
+
/*
* If sync wakeup then subtract the (maximum possible)
* effect of the currently running task from the load
@@ -1426,7 +1431,9 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
if (!sched_feat(WAKEUP_PREEMPT))
return;
- if (sched_feat(WAKEUP_OVERLAP) && sync) {
+ if (sched_feat(WAKEUP_OVERLAP) && (sync ||
+ (se->avg_overlap < sysctl_sched_migration_cost &&
+ pse->avg_overlap < sysctl_sched_migration_cost))) {
resched_task(curr);
return;
}
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 8ab0cef8ecab..a8f93dd374e1 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -296,19 +296,21 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
static inline void account_group_user_time(struct task_struct *tsk,
cputime_t cputime)
{
- struct task_cputime *times;
- struct signal_struct *sig;
+ struct thread_group_cputimer *cputimer;
/* tsk == current, ensure it is safe to use ->signal */
if (unlikely(tsk->exit_state))
return;
- sig = tsk->signal;
- times = &sig->cputime.totals;
+ cputimer = &tsk->signal->cputimer;
- spin_lock(&times->lock);
- times->utime = cputime_add(times->utime, cputime);
- spin_unlock(&times->lock);
+ if (!cputimer->running)
+ return;
+
+ spin_lock(&cputimer->lock);
+ cputimer->cputime.utime =
+ cputime_add(cputimer->cputime.utime, cputime);
+ spin_unlock(&cputimer->lock);
}
/**
@@ -324,19 +326,21 @@ static inline void account_group_user_time(struct task_struct *tsk,
static inline void account_group_system_time(struct task_struct *tsk,
cputime_t cputime)
{
- struct task_cputime *times;
- struct signal_struct *sig;
+ struct thread_group_cputimer *cputimer;
/* tsk == current, ensure it is safe to use ->signal */
if (unlikely(tsk->exit_state))
return;
- sig = tsk->signal;
- times = &sig->cputime.totals;
+ cputimer = &tsk->signal->cputimer;
+
+ if (!cputimer->running)
+ return;
- spin_lock(&times->lock);
- times->stime = cputime_add(times->stime, cputime);
- spin_unlock(&times->lock);
+ spin_lock(&cputimer->lock);
+ cputimer->cputime.stime =
+ cputime_add(cputimer->cputime.stime, cputime);
+ spin_unlock(&cputimer->lock);
}
/**
@@ -352,7 +356,7 @@ static inline void account_group_system_time(struct task_struct *tsk,
static inline void account_group_exec_runtime(struct task_struct *tsk,
unsigned long long ns)
{
- struct task_cputime *times;
+ struct thread_group_cputimer *cputimer;
struct signal_struct *sig;
sig = tsk->signal;
@@ -361,9 +365,12 @@ static inline void account_group_exec_runtime(struct task_struct *tsk,
if (unlikely(!sig))
return;
- times = &sig->cputime.totals;
+ cputimer = &sig->cputimer;
+
+ if (!cputimer->running)
+ return;
- spin_lock(&times->lock);
- times->sum_exec_runtime += ns;
- spin_unlock(&times->lock);
+ spin_lock(&cputimer->lock);
+ cputimer->cputime.sum_exec_runtime += ns;
+ spin_unlock(&cputimer->lock);
}
diff --git a/kernel/signal.c b/kernel/signal.c
index b6b36768b758..2a74fe87c0dd 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1367,7 +1367,6 @@ int do_notify_parent(struct task_struct *tsk, int sig)
struct siginfo info;
unsigned long flags;
struct sighand_struct *psig;
- struct task_cputime cputime;
int ret = sig;
BUG_ON(sig == -1);
@@ -1397,9 +1396,10 @@ int do_notify_parent(struct task_struct *tsk, int sig)
info.si_uid = __task_cred(tsk)->uid;
rcu_read_unlock();
- thread_group_cputime(tsk, &cputime);
- info.si_utime = cputime_to_jiffies(cputime.utime);
- info.si_stime = cputime_to_jiffies(cputime.stime);
+ info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime,
+ tsk->signal->utime));
+ info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime,
+ tsk->signal->stime));
info.si_status = tsk->exit_code & 0x7f;
if (tsk->exit_code & 0x80)
diff --git a/kernel/sys.c b/kernel/sys.c
index e7dc0e10a485..f145c415bc16 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1525,22 +1525,14 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
return -EINVAL;
if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
return -EFAULT;
+ if (new_rlim.rlim_cur > new_rlim.rlim_max)
+ return -EINVAL;
old_rlim = current->signal->rlim + resource;
if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
!capable(CAP_SYS_RESOURCE))
return -EPERM;
-
- if (resource == RLIMIT_NOFILE) {
- if (new_rlim.rlim_max == RLIM_INFINITY)
- new_rlim.rlim_max = sysctl_nr_open;
- if (new_rlim.rlim_cur == RLIM_INFINITY)
- new_rlim.rlim_cur = sysctl_nr_open;
- if (new_rlim.rlim_max > sysctl_nr_open)
- return -EPERM;
- }
-
- if (new_rlim.rlim_cur > new_rlim.rlim_max)
- return -EINVAL;
+ if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
+ return -EPERM;
retval = security_task_setrlimit(resource, &new_rlim);
if (retval)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 790f9d785663..c5ef44ff850f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -101,6 +101,7 @@ static int two = 2;
static int zero;
static int one = 1;
+static unsigned long one_ul = 1;
static int one_hundred = 100;
/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
@@ -974,7 +975,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = &dirty_background_bytes_handler,
.strategy = &sysctl_intvec,
- .extra1 = &one,
+ .extra1 = &one_ul,
},
{
.ctl_name = VM_DIRTY_RATIO,
@@ -995,7 +996,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = &dirty_bytes_handler,
.strategy = &sysctl_intvec,
- .extra1 = &one,
+ .extra1 = &one_ul,
},
{
.procname = "dirty_writeback_centisecs",
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7dcf6e9f2b04..9a236ffe2aa4 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1737,9 +1737,12 @@ static void clear_ftrace_pid(struct pid *pid)
{
struct task_struct *p;
+ rcu_read_lock();
do_each_pid_task(pid, PIDTYPE_PID, p) {
clear_tsk_trace_trace(p);
} while_each_pid_task(pid, PIDTYPE_PID, p);
+ rcu_read_unlock();
+
put_pid(pid);
}
@@ -1747,9 +1750,11 @@ static void set_ftrace_pid(struct pid *pid)
{
struct task_struct *p;
+ rcu_read_lock();
do_each_pid_task(pid, PIDTYPE_PID, p) {
set_tsk_trace_trace(p);
} while_each_pid_task(pid, PIDTYPE_PID, p);
+ rcu_read_unlock();
}
static void clear_ftrace_pid_task(struct pid **pid)
diff --git a/kernel/user.c b/kernel/user.c
index 477b6660f447..3551ac742395 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -72,6 +72,7 @@ static void uid_hash_insert(struct user_struct *up, struct hlist_head *hashent)
static void uid_hash_remove(struct user_struct *up)
{
hlist_del_init(&up->uidhash_node);
+ put_user_ns(up->user_ns);
}
static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
@@ -334,7 +335,6 @@ static void free_user(struct user_struct *up, unsigned long flags)
atomic_inc(&up->__count);
spin_unlock_irqrestore(&uidhash_lock, flags);
- put_user_ns(up->user_ns);
INIT_WORK(&up->work, remove_user_sysfs_dir);
schedule_work(&up->work);
}
@@ -357,7 +357,6 @@ static void free_user(struct user_struct *up, unsigned long flags)
sched_destroy_user(up);
key_put(up->uid_keyring);
key_put(up->session_keyring);
- put_user_ns(up->user_ns);
kmem_cache_free(uid_cachep, up);
}
diff --git a/kernel/wait.c b/kernel/wait.c
index cd87131f2fc2..42a2dbc181c8 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -91,6 +91,15 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
}
EXPORT_SYMBOL(prepare_to_wait_exclusive);
+/*
+ * finish_wait - clean up after waiting in a queue
+ * @q: waitqueue waited on
+ * @wait: wait descriptor
+ *
+ * Sets current thread back to running state and removes
+ * the wait descriptor from the given waitqueue if still
+ * queued.
+ */
void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
{
unsigned long flags;
@@ -117,6 +126,39 @@ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
}
EXPORT_SYMBOL(finish_wait);
+/*
+ * abort_exclusive_wait - abort exclusive waiting in a queue
+ * @q: waitqueue waited on
+ * @wait: wait descriptor
+ * @state: runstate of the waiter to be woken
+ * @key: key to identify a wait bit queue or %NULL
+ *
+ * Sets current thread back to running state and removes
+ * the wait descriptor from the given waitqueue if still
+ * queued.
+ *
+ * Wakes up the next waiter if the caller is concurrently
+ * woken up through the queue.
+ *
+ * This prevents waiter starvation where an exclusive waiter
+ * aborts and is woken up concurrently and noone wakes up
+ * the next waiter.
+ */
+void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
+ unsigned int mode, void *key)
+{
+ unsigned long flags;
+
+ __set_current_state(TASK_RUNNING);
+ spin_lock_irqsave(&q->lock, flags);
+ if (!list_empty(&wait->task_list))
+ list_del_init(&wait->task_list);
+ else if (waitqueue_active(q))
+ __wake_up_common(q, mode, 1, 0, key);
+ spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(abort_exclusive_wait);
+
int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
int ret = default_wake_function(wait, mode, sync, key);
@@ -177,17 +219,20 @@ int __sched
__wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
int (*action)(void *), unsigned mode)
{
- int ret = 0;
-
do {
+ int ret;
+
prepare_to_wait_exclusive(wq, &q->wait, mode);
- if (test_bit(q->key.bit_nr, q->key.flags)) {
- if ((ret = (*action)(q->key.flags)))
- break;
- }
+ if (!test_bit(q->key.bit_nr, q->key.flags))
+ continue;
+ ret = action(q->key.flags);
+ if (!ret)
+ continue;
+ abort_exclusive_wait(wq, &q->wait, mode, &q->key);
+ return ret;
} while (test_and_set_bit(q->key.bit_nr, q->key.flags));
finish_wait(wq, &q->wait);
- return ret;
+ return 0;
}
EXPORT_SYMBOL(__wait_on_bit_lock);