summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorDavid Woodhouse <dwmw2@infradead.org>2006-06-26 17:35:44 +0200
committerDavid Woodhouse <dwmw2@infradead.org>2006-06-26 17:35:44 +0200
commit62ed948cb1405fe95d61d8c6445c102e0c9da0a6 (patch)
treef139adcc861a05e7cc09cdb387a271a652fc2d07 /kernel
parent[MTD] Initialize 'writesize' (diff)
parent[PATCH] uclinux: use PER_LINUX_32BIT in binfmt_flat (diff)
downloadlinux-62ed948cb1405fe95d61d8c6445c102e0c9da0a6.tar.xz
linux-62ed948cb1405fe95d61d8c6445c102e0c9da0a6.zip
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'kernel')
-rw-r--r--kernel/acct.c117
-rw-r--r--kernel/auditsc.c1
-rw-r--r--kernel/compat.c30
-rw-r--r--kernel/cpuset.c16
-rw-r--r--kernel/exit.c14
-rw-r--r--kernel/fork.c6
-rw-r--r--kernel/futex.c8
-rw-r--r--kernel/hrtimer.c15
-rw-r--r--kernel/irq/handle.c5
-rw-r--r--kernel/irq/migration.c4
-rw-r--r--kernel/irq/proc.c3
-rw-r--r--kernel/irq/spurious.c12
-rw-r--r--kernel/kexec.c6
-rw-r--r--kernel/ksysfs.c19
-rw-r--r--kernel/kthread.c61
-rw-r--r--kernel/module.c2
-rw-r--r--kernel/power/Kconfig9
-rw-r--r--kernel/power/disk.c2
-rw-r--r--kernel/power/main.c6
-rw-r--r--kernel/power/power.h2
-rw-r--r--kernel/power/snapshot.c148
-rw-r--r--kernel/power/swsusp.c20
-rw-r--r--kernel/printk.c52
-rw-r--r--kernel/rcupdate.c13
-rw-r--r--kernel/sched.c18
-rw-r--r--kernel/softirq.c2
-rw-r--r--kernel/softlockup.c4
-rw-r--r--kernel/stop_machine.c17
-rw-r--r--kernel/sys.c80
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--kernel/sysctl.c22
-rw-r--r--kernel/timer.c32
-rw-r--r--kernel/user.c2
-rw-r--r--kernel/workqueue.c34
34 files changed, 499 insertions, 285 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index b327f4d20104..368c4f03fe0e 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -75,7 +75,7 @@ int acct_parm[3] = {4, 2, 30};
/*
* External references and all of the globals.
*/
-static void do_acct_process(long, struct file *);
+static void do_acct_process(struct file *);
/*
* This structure is used so that all the data protected by lock
@@ -118,7 +118,7 @@ static int check_free_space(struct file *file)
spin_unlock(&acct_globals.lock);
/* May block */
- if (vfs_statfs(file->f_dentry->d_inode->i_sb, &sbuf))
+ if (vfs_statfs(file->f_dentry, &sbuf))
return res;
suspend = sbuf.f_blocks * SUSPEND;
resume = sbuf.f_blocks * RESUME;
@@ -196,7 +196,7 @@ static void acct_file_reopen(struct file *file)
if (old_acct) {
mnt_unpin(old_acct->f_vfsmnt);
spin_unlock(&acct_globals.lock);
- do_acct_process(0, old_acct);
+ do_acct_process(old_acct);
filp_close(old_acct, NULL);
spin_lock(&acct_globals.lock);
}
@@ -419,16 +419,15 @@ static u32 encode_float(u64 value)
/*
* do_acct_process does all actual work. Caller holds the reference to file.
*/
-static void do_acct_process(long exitcode, struct file *file)
+static void do_acct_process(struct file *file)
{
+ struct pacct_struct *pacct = &current->signal->pacct;
acct_t ac;
mm_segment_t fs;
- unsigned long vsize;
unsigned long flim;
u64 elapsed;
u64 run_time;
struct timespec uptime;
- unsigned long jiffies;
/*
* First check to see if there is enough free_space to continue
@@ -469,12 +468,6 @@ static void do_acct_process(long exitcode, struct file *file)
#endif
do_div(elapsed, AHZ);
ac.ac_btime = xtime.tv_sec - elapsed;
- jiffies = cputime_to_jiffies(cputime_add(current->utime,
- current->signal->utime));
- ac.ac_utime = encode_comp_t(jiffies_to_AHZ(jiffies));
- jiffies = cputime_to_jiffies(cputime_add(current->stime,
- current->signal->stime));
- ac.ac_stime = encode_comp_t(jiffies_to_AHZ(jiffies));
/* we really need to bite the bullet and change layout */
ac.ac_uid = current->uid;
ac.ac_gid = current->gid;
@@ -496,37 +489,18 @@ static void do_acct_process(long exitcode, struct file *file)
old_encode_dev(tty_devnum(current->signal->tty)) : 0;
read_unlock(&tasklist_lock);
- ac.ac_flag = 0;
- if (current->flags & PF_FORKNOEXEC)
- ac.ac_flag |= AFORK;
- if (current->flags & PF_SUPERPRIV)
- ac.ac_flag |= ASU;
- if (current->flags & PF_DUMPCORE)
- ac.ac_flag |= ACORE;
- if (current->flags & PF_SIGNALED)
- ac.ac_flag |= AXSIG;
-
- vsize = 0;
- if (current->mm) {
- struct vm_area_struct *vma;
- down_read(&current->mm->mmap_sem);
- vma = current->mm->mmap;
- while (vma) {
- vsize += vma->vm_end - vma->vm_start;
- vma = vma->vm_next;
- }
- up_read(&current->mm->mmap_sem);
- }
- vsize = vsize / 1024;
- ac.ac_mem = encode_comp_t(vsize);
+ spin_lock(&current->sighand->siglock);
+ ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime)));
+ ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime)));
+ ac.ac_flag = pacct->ac_flag;
+ ac.ac_mem = encode_comp_t(pacct->ac_mem);
+ ac.ac_minflt = encode_comp_t(pacct->ac_minflt);
+ ac.ac_majflt = encode_comp_t(pacct->ac_majflt);
+ ac.ac_exitcode = pacct->ac_exitcode;
+ spin_unlock(&current->sighand->siglock);
ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */
ac.ac_rw = encode_comp_t(ac.ac_io / 1024);
- ac.ac_minflt = encode_comp_t(current->signal->min_flt +
- current->min_flt);
- ac.ac_majflt = encode_comp_t(current->signal->maj_flt +
- current->maj_flt);
ac.ac_swaps = encode_comp_t(0);
- ac.ac_exitcode = exitcode;
/*
* Kernel segment override to datasegment and write it
@@ -546,12 +520,63 @@ static void do_acct_process(long exitcode, struct file *file)
}
/**
+ * acct_init_pacct - initialize a new pacct_struct
+ */
+void acct_init_pacct(struct pacct_struct *pacct)
+{
+ memset(pacct, 0, sizeof(struct pacct_struct));
+ pacct->ac_utime = pacct->ac_stime = cputime_zero;
+}
+
+/**
+ * acct_collect - collect accounting information into pacct_struct
+ * @exitcode: task exit code
+ * @group_dead: not 0, if this thread is the last one in the process.
+ */
+void acct_collect(long exitcode, int group_dead)
+{
+ struct pacct_struct *pacct = &current->signal->pacct;
+ unsigned long vsize = 0;
+
+ if (group_dead && current->mm) {
+ struct vm_area_struct *vma;
+ down_read(&current->mm->mmap_sem);
+ vma = current->mm->mmap;
+ while (vma) {
+ vsize += vma->vm_end - vma->vm_start;
+ vma = vma->vm_next;
+ }
+ up_read(&current->mm->mmap_sem);
+ }
+
+ spin_lock_irq(&current->sighand->siglock);
+ if (group_dead)
+ pacct->ac_mem = vsize / 1024;
+ if (thread_group_leader(current)) {
+ pacct->ac_exitcode = exitcode;
+ if (current->flags & PF_FORKNOEXEC)
+ pacct->ac_flag |= AFORK;
+ }
+ if (current->flags & PF_SUPERPRIV)
+ pacct->ac_flag |= ASU;
+ if (current->flags & PF_DUMPCORE)
+ pacct->ac_flag |= ACORE;
+ if (current->flags & PF_SIGNALED)
+ pacct->ac_flag |= AXSIG;
+ pacct->ac_utime = cputime_add(pacct->ac_utime, current->utime);
+ pacct->ac_stime = cputime_add(pacct->ac_stime, current->stime);
+ pacct->ac_minflt += current->min_flt;
+ pacct->ac_majflt += current->maj_flt;
+ spin_unlock_irq(&current->sighand->siglock);
+}
+
+/**
* acct_process - now just a wrapper around do_acct_process
* @exitcode: task exit code
*
* handles process accounting for an exiting task
*/
-void acct_process(long exitcode)
+void acct_process()
{
struct file *file = NULL;
@@ -570,7 +595,7 @@ void acct_process(long exitcode)
get_file(file);
spin_unlock(&acct_globals.lock);
- do_acct_process(exitcode, file);
+ do_acct_process(file);
fput(file);
}
@@ -599,9 +624,7 @@ void acct_update_integrals(struct task_struct *tsk)
*/
void acct_clear_integrals(struct task_struct *tsk)
{
- if (tsk) {
- tsk->acct_stimexpd = 0;
- tsk->acct_rss_mem1 = 0;
- tsk->acct_vm_mem1 = 0;
- }
+ tsk->acct_stimexpd = 0;
+ tsk->acct_rss_mem1 = 0;
+ tsk->acct_vm_mem1 = 0;
}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index b097ccb4eb7e..9ebd96fda295 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1558,6 +1558,7 @@ int __audit_ipc_obj(struct kern_ipc_perm *ipcp)
* @uid: msgq user id
* @gid: msgq group id
* @mode: msgq mode (permissions)
+ * @ipcp: in-kernel IPC permissions
*
* Returns 0 for success or NULL context or < 0 on error.
*/
diff --git a/kernel/compat.c b/kernel/compat.c
index c1601a84f8d8..126dee9530aa 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -21,6 +21,7 @@
#include <linux/unistd.h>
#include <linux/security.h>
#include <linux/timex.h>
+#include <linux/migrate.h>
#include <asm/uaccess.h>
@@ -729,17 +730,10 @@ void
sigset_from_compat (sigset_t *set, compat_sigset_t *compat)
{
switch (_NSIG_WORDS) {
-#if defined (__COMPAT_ENDIAN_SWAP__)
- case 4: set->sig[3] = compat->sig[7] | (((long)compat->sig[6]) << 32 );
- case 3: set->sig[2] = compat->sig[5] | (((long)compat->sig[4]) << 32 );
- case 2: set->sig[1] = compat->sig[3] | (((long)compat->sig[2]) << 32 );
- case 1: set->sig[0] = compat->sig[1] | (((long)compat->sig[0]) << 32 );
-#else
case 4: set->sig[3] = compat->sig[6] | (((long)compat->sig[7]) << 32 );
case 3: set->sig[2] = compat->sig[4] | (((long)compat->sig[5]) << 32 );
case 2: set->sig[1] = compat->sig[2] | (((long)compat->sig[3]) << 32 );
case 1: set->sig[0] = compat->sig[0] | (((long)compat->sig[1]) << 32 );
-#endif
}
}
@@ -934,3 +928,25 @@ asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp)
return ret;
}
+
+#ifdef CONFIG_NUMA
+asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_pages,
+ compat_uptr_t __user *pages32,
+ const int __user *nodes,
+ int __user *status,
+ int flags)
+{
+ const void __user * __user *pages;
+ int i;
+
+ pages = compat_alloc_user_space(nr_pages * sizeof(void *));
+ for (i = 0; i < nr_pages; i++) {
+ compat_uptr_t p;
+
+ if (get_user(p, pages32 + i) ||
+ put_user(compat_ptr(p), pages + i))
+ return -EFAULT;
+ }
+ return sys_move_pages(pid, nr_pages, pages, nodes, status, flags);
+}
+#endif
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index ab81fdd4572b..b602f73fb38d 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -41,6 +41,7 @@
#include <linux/rcupdate.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
+#include <linux/security.h>
#include <linux/slab.h>
#include <linux/smp_lock.h>
#include <linux/spinlock.h>
@@ -392,11 +393,11 @@ static int cpuset_fill_super(struct super_block *sb, void *unused_data,
return 0;
}
-static struct super_block *cpuset_get_sb(struct file_system_type *fs_type,
- int flags, const char *unused_dev_name,
- void *data)
+static int cpuset_get_sb(struct file_system_type *fs_type,
+ int flags, const char *unused_dev_name,
+ void *data, struct vfsmount *mnt)
{
- return get_sb_single(fs_type, flags, data, cpuset_fill_super);
+ return get_sb_single(fs_type, flags, data, cpuset_fill_super, mnt);
}
static struct file_system_type cpuset_fs_type = {
@@ -1177,6 +1178,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
cpumask_t cpus;
nodemask_t from, to;
struct mm_struct *mm;
+ int retval;
if (sscanf(pidbuf, "%d", &pid) != 1)
return -EIO;
@@ -1205,6 +1207,12 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
get_task_struct(tsk);
}
+ retval = security_task_setscheduler(tsk, 0, NULL);
+ if (retval) {
+ put_task_struct(tsk);
+ return retval;
+ }
+
mutex_lock(&callback_mutex);
task_lock(tsk);
diff --git a/kernel/exit.c b/kernel/exit.c
index e06d0c10a24e..e76bd02e930e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -36,6 +36,7 @@
#include <linux/compat.h>
#include <linux/pipe_fs_i.h>
#include <linux/audit.h> /* for audit_free() */
+#include <linux/resource.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -45,8 +46,6 @@
extern void sem_exit (void);
extern struct task_struct *child_reaper;
-int getrusage(struct task_struct *, int, struct rusage __user *);
-
static void exit_mm(struct task_struct * tsk);
static void __unhash_process(struct task_struct *p)
@@ -579,7 +578,7 @@ static void exit_mm(struct task_struct * tsk)
down_read(&mm->mmap_sem);
}
atomic_inc(&mm->mm_count);
- if (mm != tsk->active_mm) BUG();
+ BUG_ON(mm != tsk->active_mm);
/* more a memory barrier than a real lock */
task_lock(tsk);
tsk->mm = NULL;
@@ -895,11 +894,11 @@ fastcall NORET_TYPE void do_exit(long code)
if (group_dead) {
hrtimer_cancel(&tsk->signal->real_timer);
exit_itimers(tsk->signal);
- acct_process(code);
}
+ acct_collect(code, group_dead);
if (unlikely(tsk->robust_list))
exit_robust_list(tsk);
-#ifdef CONFIG_COMPAT
+#if defined(CONFIG_FUTEX) && defined(CONFIG_COMPAT)
if (unlikely(tsk->compat_robust_list))
compat_exit_robust_list(tsk);
#endif
@@ -907,6 +906,8 @@ fastcall NORET_TYPE void do_exit(long code)
audit_free(tsk);
exit_mm(tsk);
+ if (group_dead)
+ acct_process();
exit_sem(tsk);
__exit_files(tsk);
__exit_fs(tsk);
@@ -1530,8 +1531,7 @@ check_continued:
if (options & __WNOTHREAD)
break;
tsk = next_thread(tsk);
- if (tsk->signal != current->signal)
- BUG();
+ BUG_ON(tsk->signal != current->signal);
} while (tsk != current);
read_unlock(&tasklist_lock);
diff --git a/kernel/fork.c b/kernel/fork.c
index ac8100e3088a..dfd10cb370c3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -368,6 +368,8 @@ void fastcall __mmdrop(struct mm_struct *mm)
*/
void mmput(struct mm_struct *mm)
{
+ might_sleep();
+
if (atomic_dec_and_test(&mm->mm_users)) {
exit_aio(mm);
exit_mmap(mm);
@@ -623,6 +625,7 @@ out:
/*
* Allocate a new files structure and copy contents from the
* passed in files structure.
+ * errorp will be valid only when the returned files_struct is NULL.
*/
static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
{
@@ -631,6 +634,7 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
int open_files, size, i, expand;
struct fdtable *old_fdt, *new_fdt;
+ *errorp = -ENOMEM;
newf = alloc_files();
if (!newf)
goto out;
@@ -744,7 +748,6 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
* break this.
*/
tsk->files = NULL;
- error = -ENOMEM;
newf = dup_fd(oldf, &error);
if (!newf)
goto out;
@@ -871,6 +874,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
tsk->it_prof_expires =
secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
}
+ acct_init_pacct(&sig->pacct);
return 0;
}
diff --git a/kernel/futex.c b/kernel/futex.c
index 5699c512057b..e1a380c77a5a 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1056,11 +1056,11 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, int val,
(unsigned long)uaddr2, val2, val3);
}
-static struct super_block *
-futexfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+static int futexfs_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data,
+ struct vfsmount *mnt)
{
- return get_sb_pseudo(fs_type, "futex", NULL, 0xBAD1DEA);
+ return get_sb_pseudo(fs_type, "futex", NULL, 0xBAD1DEA, mnt);
}
static struct file_system_type futex_fs_type = {
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 18324305724a..55601b3ce60e 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -98,7 +98,6 @@ static DEFINE_PER_CPU(struct hrtimer_base, hrtimer_bases[MAX_HRTIMER_BASES]) =
/**
* ktime_get_ts - get the monotonic clock in timespec format
- *
* @ts: pointer to timespec variable
*
* The function calculates the monotonic clock from the realtime
@@ -238,7 +237,6 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
# ifndef CONFIG_KTIME_SCALAR
/**
* ktime_add_ns - Add a scalar nanoseconds value to a ktime_t variable
- *
* @kt: addend
* @nsec: the scalar nsec value to add
*
@@ -299,7 +297,6 @@ void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
/**
* hrtimer_forward - forward the timer expiry
- *
* @timer: hrtimer to forward
* @now: forward past this time
* @interval: the interval to forward
@@ -411,7 +408,6 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_base *base)
/**
* hrtimer_start - (re)start an relative timer on the current CPU
- *
* @timer: the timer to be added
* @tim: expiry time
* @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
@@ -460,14 +456,13 @@ EXPORT_SYMBOL_GPL(hrtimer_start);
/**
* hrtimer_try_to_cancel - try to deactivate a timer
- *
* @timer: hrtimer to stop
*
* Returns:
* 0 when the timer was not active
* 1 when the timer was active
* -1 when the timer is currently excuting the callback function and
- * can not be stopped
+ * cannot be stopped
*/
int hrtimer_try_to_cancel(struct hrtimer *timer)
{
@@ -489,7 +484,6 @@ EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
/**
* hrtimer_cancel - cancel a timer and wait for the handler to finish.
- *
* @timer: the timer to be cancelled
*
* Returns:
@@ -510,7 +504,6 @@ EXPORT_SYMBOL_GPL(hrtimer_cancel);
/**
* hrtimer_get_remaining - get remaining time for the timer
- *
* @timer: the timer to read
*/
ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
@@ -564,7 +557,6 @@ ktime_t hrtimer_get_next_event(void)
/**
* hrtimer_init - initialize a timer to the given clock
- *
* @timer: the timer to be initialized
* @clock_id: the clock to be used
* @mode: timer mode abs/rel
@@ -576,7 +568,7 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
memset(timer, 0, sizeof(struct hrtimer));
- bases = per_cpu(hrtimer_bases, raw_smp_processor_id());
+ bases = __raw_get_cpu_var(hrtimer_bases);
if (clock_id == CLOCK_REALTIME && mode != HRTIMER_ABS)
clock_id = CLOCK_MONOTONIC;
@@ -588,7 +580,6 @@ EXPORT_SYMBOL_GPL(hrtimer_init);
/**
* hrtimer_get_res - get the timer resolution for a clock
- *
* @which_clock: which clock to query
* @tp: pointer to timespec variable to store the resolution
*
@@ -599,7 +590,7 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
{
struct hrtimer_base *bases;
- bases = per_cpu(hrtimer_bases, raw_smp_processor_id());
+ bases = __raw_get_cpu_var(hrtimer_bases);
*tp = ktime_to_timespec(bases[which_clock].resolution);
return 0;
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 51df337b37db..0f6530117105 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -76,10 +76,11 @@ irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs)
/*
* Have got an event to handle:
*/
-fastcall int handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
+fastcall irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
struct irqaction *action)
{
- int ret, retval = 0, status = 0;
+ irqreturn_t ret, retval = IRQ_NONE;
+ unsigned int status = 0;
if (!(action->flags & SA_INTERRUPT))
local_irq_enable();
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 134f9f2e0e39..a12d00eb5e7c 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -30,7 +30,7 @@ void move_native_irq(int irq)
desc->move_irq = 0;
- if (likely(cpus_empty(pending_irq_cpumask[irq])))
+ if (unlikely(cpus_empty(pending_irq_cpumask[irq])))
return;
if (!desc->handler->set_affinity)
@@ -49,7 +49,7 @@ void move_native_irq(int irq)
* cause some ioapics to mal-function.
* Being paranoid i guess!
*/
- if (unlikely(!cpus_empty(tmp))) {
+ if (likely(!cpus_empty(tmp))) {
if (likely(!(desc->status & IRQ_DISABLED)))
desc->handler->disable(irq);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index d03b5eef8ce0..afacd6f585fa 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -24,6 +24,8 @@ static struct proc_dir_entry *smp_affinity_entry[NR_IRQS];
#ifdef CONFIG_GENERIC_PENDING_IRQ
void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
{
+ set_balance_irq_affinity(irq, mask_val);
+
/*
* Save these away for later use. Re-progam when the
* interrupt is pending
@@ -33,6 +35,7 @@ void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
#else
void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
{
+ set_balance_irq_affinity(irq, mask_val);
irq_affinity[irq] = mask_val;
irq_desc[irq].handler->set_affinity(irq, mask_val);
}
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 7df9abd5ec86..b2fb3c18d06b 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -11,7 +11,7 @@
#include <linux/kallsyms.h>
#include <linux/interrupt.h>
-static int irqfixup;
+static int irqfixup __read_mostly;
/*
* Recovery handler for misrouted interrupts.
@@ -136,9 +136,9 @@ static void report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t actio
void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret,
struct pt_regs *regs)
{
- if (action_ret != IRQ_HANDLED) {
+ if (unlikely(action_ret != IRQ_HANDLED)) {
desc->irqs_unhandled++;
- if (action_ret != IRQ_NONE)
+ if (unlikely(action_ret != IRQ_NONE))
report_bad_irq(irq, desc, action_ret);
}
@@ -152,11 +152,11 @@ void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret,
}
desc->irq_count++;
- if (desc->irq_count < 100000)
+ if (likely(desc->irq_count < 100000))
return;
desc->irq_count = 0;
- if (desc->irqs_unhandled > 99900) {
+ if (unlikely(desc->irqs_unhandled > 99900)) {
/*
* The interrupt is stuck
*/
@@ -171,7 +171,7 @@ void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret,
desc->irqs_unhandled = 0;
}
-int noirqdebug;
+int noirqdebug __read_mostly;
int __init noirqdebug_setup(char *str)
{
diff --git a/kernel/kexec.c b/kernel/kexec.c
index bf39d28e4c0e..58f0f382597c 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -902,14 +902,14 @@ static int kimage_load_segment(struct kimage *image,
* kexec does not sync, or unmount filesystems so if you need
* that to happen you need to do that yourself.
*/
-struct kimage *kexec_image = NULL;
-static struct kimage *kexec_crash_image = NULL;
+struct kimage *kexec_image;
+struct kimage *kexec_crash_image;
/*
* A home grown binary mutex.
* Nothing can wait so this mutex is safe to use
* in interrupt context :)
*/
-static int kexec_lock = 0;
+static int kexec_lock;
asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
struct kexec_segment __user *segments,
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index f119e098e67b..9e28478a17a5 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -14,6 +14,7 @@
#include <linux/sysfs.h>
#include <linux/module.h>
#include <linux/init.h>
+#include <linux/kexec.h>
#define KERNEL_ATTR_RO(_name) \
static struct subsys_attribute _name##_attr = __ATTR_RO(_name)
@@ -48,6 +49,20 @@ static ssize_t uevent_helper_store(struct subsystem *subsys, const char *page, s
KERNEL_ATTR_RW(uevent_helper);
#endif
+#ifdef CONFIG_KEXEC
+static ssize_t kexec_loaded_show(struct subsystem *subsys, char *page)
+{
+ return sprintf(page, "%d\n", !!kexec_image);
+}
+KERNEL_ATTR_RO(kexec_loaded);
+
+static ssize_t kexec_crash_loaded_show(struct subsystem *subsys, char *page)
+{
+ return sprintf(page, "%d\n", !!kexec_crash_image);
+}
+KERNEL_ATTR_RO(kexec_crash_loaded);
+#endif /* CONFIG_KEXEC */
+
decl_subsys(kernel, NULL, NULL);
EXPORT_SYMBOL_GPL(kernel_subsys);
@@ -56,6 +71,10 @@ static struct attribute * kernel_attrs[] = {
&uevent_seqnum_attr.attr,
&uevent_helper_attr.attr,
#endif
+#ifdef CONFIG_KEXEC
+ &kexec_loaded_attr.attr,
+ &kexec_crash_loaded_attr.attr,
+#endif
NULL
};
diff --git a/kernel/kthread.c b/kernel/kthread.c
index c5f3c6613b6d..24be714b04c7 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -45,6 +45,13 @@ struct kthread_stop_info
static DEFINE_MUTEX(kthread_stop_lock);
static struct kthread_stop_info kthread_stop_info;
+/**
+ * kthread_should_stop - should this kthread return now?
+ *
+ * When someone calls kthread_stop on your kthread, it will be woken
+ * and this will return true. You should then return, and your return
+ * value will be passed through to kthread_stop().
+ */
int kthread_should_stop(void)
{
return (kthread_stop_info.k == current);
@@ -122,6 +129,25 @@ static void keventd_create_kthread(void *_create)
complete(&create->done);
}
+/**
+ * kthread_create - create a kthread.
+ * @threadfn: the function to run until signal_pending(current).
+ * @data: data ptr for @threadfn.
+ * @namefmt: printf-style name for the thread.
+ *
+ * Description: This helper function creates and names a kernel
+ * thread. The thread will be stopped: use wake_up_process() to start
+ * it. See also kthread_run(), kthread_create_on_cpu().
+ *
+ * When woken, the thread will run @threadfn() with @data as its
+ * argument. @threadfn can either call do_exit() directly if it is a
+ * standalone thread for which noone will call kthread_stop(), or
+ * return when 'kthread_should_stop()' is true (which means
+ * kthread_stop() has been called). The return value should be zero
+ * or a negative error number; it will be passed to kthread_stop().
+ *
+ * Returns a task_struct or ERR_PTR(-ENOMEM).
+ */
struct task_struct *kthread_create(int (*threadfn)(void *data),
void *data,
const char namefmt[],
@@ -156,6 +182,15 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
}
EXPORT_SYMBOL(kthread_create);
+/**
+ * kthread_bind - bind a just-created kthread to a cpu.
+ * @k: thread created by kthread_create().
+ * @cpu: cpu (might not be online, must be possible) for @k to run on.
+ *
+ * Description: This function is equivalent to set_cpus_allowed(),
+ * except that @cpu doesn't need to be online, and the thread must be
+ * stopped (i.e., just returned from kthread_create().
+ */
void kthread_bind(struct task_struct *k, unsigned int cpu)
{
BUG_ON(k->state != TASK_INTERRUPTIBLE);
@@ -166,12 +201,36 @@ void kthread_bind(struct task_struct *k, unsigned int cpu)
}
EXPORT_SYMBOL(kthread_bind);
+/**
+ * kthread_stop - stop a thread created by kthread_create().
+ * @k: thread created by kthread_create().
+ *
+ * Sets kthread_should_stop() for @k to return true, wakes it, and
+ * waits for it to exit. Your threadfn() must not call do_exit()
+ * itself if you use this function! This can also be called after
+ * kthread_create() instead of calling wake_up_process(): the thread
+ * will exit without calling threadfn().
+ *
+ * Returns the result of threadfn(), or %-EINTR if wake_up_process()
+ * was never called.
+ */
int kthread_stop(struct task_struct *k)
{
return kthread_stop_sem(k, NULL);
}
EXPORT_SYMBOL(kthread_stop);
+/**
+ * kthread_stop_sem - stop a thread created by kthread_create().
+ * @k: thread created by kthread_create().
+ * @s: semaphore that @k waits on while idle.
+ *
+ * Does essentially the same thing as kthread_stop() above, but wakes
+ * @k by calling up(@s).
+ *
+ * Returns the result of threadfn(), or %-EINTR if wake_up_process()
+ * was never called.
+ */
int kthread_stop_sem(struct task_struct *k, struct semaphore *s)
{
int ret;
@@ -210,5 +269,5 @@ static __init int helper_init(void)
return 0;
}
-core_initcall(helper_init);
+core_initcall(helper_init);
diff --git a/kernel/module.c b/kernel/module.c
index bbe04862e1b0..d75275de1c28 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1326,7 +1326,7 @@ int is_exported(const char *name, const struct module *mod)
if (!mod && lookup_symbol(name, __start___ksymtab, __stop___ksymtab))
return 1;
else
- if (lookup_symbol(name, mod->syms, mod->syms + mod->num_syms))
+ if (mod && lookup_symbol(name, mod->syms, mod->syms + mod->num_syms))
return 1;
else
return 0;
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index ce0dfb8f4a4e..fc311a4673a2 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -36,6 +36,15 @@ config PM_DEBUG
code. This is helpful when debugging and reporting various PM bugs,
like suspend support.
+config PM_TRACE
+ bool "Suspend/resume event tracing"
+ depends on PM && PM_DEBUG && X86_32
+ default y
+ ---help---
+ This enables some cheesy code to save the last PM event point in the
+ RTC across reboots, so that you can debug a machine that just hangs
+ during suspend (or more commonly, during resume).
+
config SOFTWARE_SUSPEND
bool "Software Suspend"
depends on PM && SWAP && (X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 81d4d982f3f0..e13e74067845 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -231,7 +231,7 @@ static int software_resume(void)
late_initcall(software_resume);
-static char * pm_disk_modes[] = {
+static const char * const pm_disk_modes[] = {
[PM_DISK_FIRMWARE] = "firmware",
[PM_DISK_PLATFORM] = "platform",
[PM_DISK_SHUTDOWN] = "shutdown",
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 0a907f0dc56b..6d295c776794 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -15,7 +15,7 @@
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/pm.h>
-
+#include <linux/console.h>
#include "power.h"
@@ -145,7 +145,7 @@ static void suspend_finish(suspend_state_t state)
-static char *pm_states[PM_SUSPEND_MAX] = {
+static const char * const pm_states[PM_SUSPEND_MAX] = {
[PM_SUSPEND_STANDBY] = "standby",
[PM_SUSPEND_MEM] = "mem",
#ifdef CONFIG_SOFTWARE_SUSPEND
@@ -262,7 +262,7 @@ static ssize_t state_show(struct subsystem * subsys, char * buf)
static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n)
{
suspend_state_t state = PM_SUSPEND_STANDBY;
- char ** s;
+ const char * const *s;
char *p;
int error;
int len;
diff --git a/kernel/power/power.h b/kernel/power/power.h
index f06f12f21767..57a792982fb9 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -55,7 +55,7 @@ struct snapshot_handle {
unsigned int page;
unsigned int page_offset;
unsigned int prev;
- struct pbe *pbe;
+ struct pbe *pbe, *last_pbe;
void *buffer;
unsigned int buf_offset;
};
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 3eeedbb13b78..24c96f354231 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -150,6 +150,10 @@ int restore_highmem(void)
}
return 0;
}
+#else
+static inline unsigned int count_highmem_pages(void) {return 0;}
+static inline int save_highmem(void) {return 0;}
+static inline int restore_highmem(void) {return 0;}
#endif
static int pfn_is_nosave(unsigned long pfn)
@@ -293,62 +297,29 @@ static inline void create_pbe_list(struct pbe *pblist, unsigned int nr_pages)
}
}
-/**
- * On resume it is necessary to trace and eventually free the unsafe
- * pages that have been allocated, because they are needed for I/O
- * (on x86-64 we likely will "eat" these pages once again while
- * creating the temporary page translation tables)
- */
-
-struct eaten_page {
- struct eaten_page *next;
- char padding[PAGE_SIZE - sizeof(void *)];
-};
-
-static struct eaten_page *eaten_pages = NULL;
-
-static void release_eaten_pages(void)
-{
- struct eaten_page *p, *q;
-
- p = eaten_pages;
- while (p) {
- q = p->next;
- /* We don't want swsusp_free() to free this page again */
- ClearPageNosave(virt_to_page(p));
- free_page((unsigned long)p);
- p = q;
- }
- eaten_pages = NULL;
-}
+static unsigned int unsafe_pages;
/**
* @safe_needed - on resume, for storing the PBE list and the image,
* we can only use memory pages that do not conflict with the pages
- * which had been used before suspend.
+ * used before suspend.
*
* The unsafe pages are marked with the PG_nosave_free flag
- *
- * Allocated but unusable (ie eaten) memory pages should be marked
- * so that swsusp_free() can release them
+ * and we count them using unsafe_pages
*/
static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed)
{
void *res;
+ res = (void *)get_zeroed_page(gfp_mask);
if (safe_needed)
- do {
+ while (res && PageNosaveFree(virt_to_page(res))) {
+ /* The page is unsafe, mark it for swsusp_free() */
+ SetPageNosave(virt_to_page(res));
+ unsafe_pages++;
res = (void *)get_zeroed_page(gfp_mask);
- if (res && PageNosaveFree(virt_to_page(res))) {
- /* This is for swsusp_free() */
- SetPageNosave(virt_to_page(res));
- ((struct eaten_page *)res)->next = eaten_pages;
- eaten_pages = res;
- }
- } while (res && PageNosaveFree(virt_to_page(res)));
- else
- res = (void *)get_zeroed_page(gfp_mask);
+ }
if (res) {
SetPageNosave(virt_to_page(res));
SetPageNosaveFree(virt_to_page(res));
@@ -374,7 +345,8 @@ unsigned long get_safe_page(gfp_t gfp_mask)
* On each page we set up a list of struct_pbe elements.
*/
-struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, int safe_needed)
+static struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask,
+ int safe_needed)
{
unsigned int num;
struct pbe *pblist, *pbe;
@@ -642,6 +614,8 @@ static int mark_unsafe_pages(struct pbe *pblist)
return -EFAULT;
}
+ unsafe_pages = 0;
+
return 0;
}
@@ -719,42 +693,99 @@ static inline struct pbe *unpack_orig_addresses(unsigned long *buf,
}
/**
- * create_image - use metadata contained in the PBE list
+ * prepare_image - use metadata contained in the PBE list
* pointed to by pagedir_nosave to mark the pages that will
* be overwritten in the process of restoring the system
- * memory state from the image and allocate memory for
- * the image avoiding these pages
+ * memory state from the image ("unsafe" pages) and allocate
+ * memory for the image
+ *
+ * The idea is to allocate the PBE list first and then
+ * allocate as many pages as it's needed for the image data,
+ * but not to assign these pages to the PBEs initially.
+ * Instead, we just mark them as allocated and create a list
+ * of "safe" which will be used later
*/
-static int create_image(struct snapshot_handle *handle)
+struct safe_page {
+ struct safe_page *next;
+ char padding[PAGE_SIZE - sizeof(void *)];
+};
+
+static struct safe_page *safe_pages;
+
+static int prepare_image(struct snapshot_handle *handle)
{
int error = 0;
- struct pbe *p, *pblist;
+ unsigned int nr_pages = nr_copy_pages;
+ struct pbe *p, *pblist = NULL;
p = pagedir_nosave;
error = mark_unsafe_pages(p);
if (!error) {
- pblist = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 1);
+ pblist = alloc_pagedir(nr_pages, GFP_ATOMIC, 1);
if (pblist)
copy_page_backup_list(pblist, p);
free_pagedir(p, 0);
if (!pblist)
error = -ENOMEM;
}
- if (!error)
- error = alloc_data_pages(pblist, GFP_ATOMIC, 1);
+ safe_pages = NULL;
+ if (!error && nr_pages > unsafe_pages) {
+ nr_pages -= unsafe_pages;
+ while (nr_pages--) {
+ struct safe_page *ptr;
+
+ ptr = (struct safe_page *)get_zeroed_page(GFP_ATOMIC);
+ if (!ptr) {
+ error = -ENOMEM;
+ break;
+ }
+ if (!PageNosaveFree(virt_to_page(ptr))) {
+ /* The page is "safe", add it to the list */
+ ptr->next = safe_pages;
+ safe_pages = ptr;
+ }
+ /* Mark the page as allocated */
+ SetPageNosave(virt_to_page(ptr));
+ SetPageNosaveFree(virt_to_page(ptr));
+ }
+ }
if (!error) {
- release_eaten_pages();
pagedir_nosave = pblist;
} else {
- pagedir_nosave = NULL;
handle->pbe = NULL;
- nr_copy_pages = 0;
- nr_meta_pages = 0;
+ swsusp_free();
}
return error;
}
+static void *get_buffer(struct snapshot_handle *handle)
+{
+ struct pbe *pbe = handle->pbe, *last = handle->last_pbe;
+ struct page *page = virt_to_page(pbe->orig_address);
+
+ if (PageNosave(page) && PageNosaveFree(page)) {
+ /*
+ * We have allocated the "original" page frame and we can
+ * use it directly to store the read page
+ */
+ pbe->address = 0;
+ if (last && last->next)
+ last->next = NULL;
+ return (void *)pbe->orig_address;
+ }
+ /*
+ * The "original" page frame has not been allocated and we have to
+ * use a "safe" page frame to store the read page
+ */
+ pbe->address = (unsigned long)safe_pages;
+ safe_pages = safe_pages->next;
+ if (last)
+ last->next = pbe;
+ handle->last_pbe = pbe;
+ return (void *)pbe->address;
+}
+
/**
* snapshot_write_next - used for writing the system memory snapshot.
*
@@ -799,15 +830,16 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
} else if (handle->prev <= nr_meta_pages) {
handle->pbe = unpack_orig_addresses(buffer, handle->pbe);
if (!handle->pbe) {
- error = create_image(handle);
+ error = prepare_image(handle);
if (error)
return error;
handle->pbe = pagedir_nosave;
- handle->buffer = (void *)handle->pbe->address;
+ handle->last_pbe = NULL;
+ handle->buffer = get_buffer(handle);
}
} else {
handle->pbe = handle->pbe->next;
- handle->buffer = (void *)handle->pbe->address;
+ handle->buffer = get_buffer(handle);
}
handle->prev = handle->page;
}
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index c4016cbbd3e0..17f669c83012 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -67,9 +67,9 @@ unsigned int count_highmem_pages(void);
int save_highmem(void);
int restore_highmem(void);
#else
-static int save_highmem(void) { return 0; }
-static int restore_highmem(void) { return 0; }
-static unsigned int count_highmem_pages(void) { return 0; }
+static inline int save_highmem(void) { return 0; }
+static inline int restore_highmem(void) { return 0; }
+static inline unsigned int count_highmem_pages(void) { return 0; }
#endif
/**
@@ -175,6 +175,12 @@ void free_all_swap_pages(int swap, struct bitmap_page *bitmap)
*/
#define SHRINK_BITE 10000
+static inline unsigned long __shrink_memory(long tmp)
+{
+ if (tmp > SHRINK_BITE)
+ tmp = SHRINK_BITE;
+ return shrink_all_memory(tmp);
+}
int swsusp_shrink_memory(void)
{
@@ -192,15 +198,17 @@ int swsusp_shrink_memory(void)
PAGES_FOR_IO;
tmp = size;
for_each_zone (zone)
- if (!is_highmem(zone))
+ if (!is_highmem(zone) && populated_zone(zone)) {
tmp -= zone->free_pages;
+ tmp += zone->lowmem_reserve[ZONE_NORMAL];
+ }
if (tmp > 0) {
- tmp = shrink_all_memory(SHRINK_BITE);
+ tmp = __shrink_memory(tmp);
if (!tmp)
return -ENOMEM;
pages += tmp;
} else if (size > image_size / PAGE_SIZE) {
- tmp = shrink_all_memory(SHRINK_BITE);
+ tmp = __shrink_memory(size - (image_size / PAGE_SIZE));
pages += tmp;
}
printk("\b%c", p[i++%4]);
diff --git a/kernel/printk.c b/kernel/printk.c
index 19a955619294..95b7fe17f124 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -24,6 +24,7 @@
#include <linux/console.h>
#include <linux/init.h>
#include <linux/module.h>
+#include <linux/moduleparam.h>
#include <linux/interrupt.h> /* For in_interrupt() */
#include <linux/config.h>
#include <linux/delay.h>
@@ -327,7 +328,9 @@ static void __call_console_drivers(unsigned long start, unsigned long end)
struct console *con;
for (con = console_drivers; con; con = con->next) {
- if ((con->flags & CON_ENABLED) && con->write)
+ if ((con->flags & CON_ENABLED) && con->write &&
+ (cpu_online(smp_processor_id()) ||
+ (con->flags & CON_ANYTIME)))
con->write(con, &LOG_BUF(start), end - start);
}
}
@@ -437,6 +440,7 @@ static int printk_time = 1;
#else
static int printk_time = 0;
#endif
+module_param(printk_time, int, S_IRUGO | S_IWUSR);
static int __init printk_time_setup(char *str)
{
@@ -453,6 +457,18 @@ __attribute__((weak)) unsigned long long printk_clock(void)
return sched_clock();
}
+/* Check if we have any console registered that can be called early in boot. */
+static int have_callable_console(void)
+{
+ struct console *con;
+
+ for (con = console_drivers; con; con = con->next)
+ if (con->flags & CON_ANYTIME)
+ return 1;
+
+ return 0;
+}
+
/**
* printk - print a kernel message
* @fmt: format string
@@ -566,27 +582,29 @@ asmlinkage int vprintk(const char *fmt, va_list args)
log_level_unknown = 1;
}
- if (!cpu_online(smp_processor_id())) {
+ if (!down_trylock(&console_sem)) {
/*
- * Some console drivers may assume that per-cpu resources have
- * been allocated. So don't allow them to be called by this
- * CPU until it is officially up. We shouldn't be calling into
- * random console drivers on a CPU which doesn't exist yet..
+ * We own the drivers. We can drop the spinlock and
+ * let release_console_sem() print the text, maybe ...
*/
+ console_locked = 1;
printk_cpu = UINT_MAX;
spin_unlock_irqrestore(&logbuf_lock, flags);
- goto out;
- }
- if (!down_trylock(&console_sem)) {
- console_locked = 1;
+
/*
- * We own the drivers. We can drop the spinlock and let
- * release_console_sem() print the text
+ * Console drivers may assume that per-cpu resources have
+ * been allocated. So unless they're explicitly marked as
+ * being able to cope (CON_ANYTIME) don't call them until
+ * this CPU is officially up.
*/
- printk_cpu = UINT_MAX;
- spin_unlock_irqrestore(&logbuf_lock, flags);
- console_may_schedule = 0;
- release_console_sem();
+ if (cpu_online(smp_processor_id()) || have_callable_console()) {
+ console_may_schedule = 0;
+ release_console_sem();
+ } else {
+ /* Release by hand to avoid flushing the buffer. */
+ console_locked = 0;
+ up(&console_sem);
+ }
} else {
/*
* Someone else owns the drivers. We drop the spinlock, which
@@ -596,7 +614,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
printk_cpu = UINT_MAX;
spin_unlock_irqrestore(&logbuf_lock, flags);
}
-out:
+
preempt_enable();
return printed_len;
}
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 2058f88c7bbb..20e9710fc21c 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -612,14 +612,6 @@ void synchronize_rcu(void)
wait_for_completion(&rcu.completion);
}
-/*
- * Deprecated, use synchronize_rcu() or synchronize_sched() instead.
- */
-void synchronize_kernel(void)
-{
- synchronize_rcu();
-}
-
module_param(blimit, int, 0);
module_param(qhimark, int, 0);
module_param(qlowmark, int, 0);
@@ -627,7 +619,6 @@ module_param(qlowmark, int, 0);
module_param(rsinterval, int, 0);
#endif
EXPORT_SYMBOL_GPL(rcu_batches_completed);
-EXPORT_SYMBOL_GPL_FUTURE(call_rcu); /* WARNING: GPL-only in April 2006. */
-EXPORT_SYMBOL_GPL_FUTURE(call_rcu_bh); /* WARNING: GPL-only in April 2006. */
+EXPORT_SYMBOL_GPL(call_rcu);
+EXPORT_SYMBOL_GPL(call_rcu_bh);
EXPORT_SYMBOL_GPL(synchronize_rcu);
-EXPORT_SYMBOL_GPL_FUTURE(synchronize_kernel); /* WARNING: GPL-only in April 2006. */
diff --git a/kernel/sched.c b/kernel/sched.c
index c13f1bd2df7d..f06d059edef5 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3886,6 +3886,10 @@ long sched_setaffinity(pid_t pid, cpumask_t new_mask)
!capable(CAP_SYS_NICE))
goto out_unlock;
+ retval = security_task_setscheduler(p, 0, NULL);
+ if (retval)
+ goto out_unlock;
+
cpus_allowed = cpuset_cpus_allowed(p);
cpus_and(new_mask, new_mask, cpus_allowed);
retval = set_cpus_allowed(p, new_mask);
@@ -3954,7 +3958,10 @@ long sched_getaffinity(pid_t pid, cpumask_t *mask)
if (!p)
goto out_unlock;
- retval = 0;
+ retval = security_task_getscheduler(p);
+ if (retval)
+ goto out_unlock;
+
cpus_and(*mask, p->cpus_allowed, cpu_online_map);
out_unlock:
@@ -4046,6 +4053,9 @@ asmlinkage long sys_sched_yield(void)
static inline void __cond_resched(void)
{
+#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
+ __might_sleep(__FILE__, __LINE__);
+#endif
/*
* The BKS might be reacquired before we have dropped
* PREEMPT_ACTIVE, which could trigger a second
@@ -4142,7 +4152,7 @@ EXPORT_SYMBOL(yield);
*/
void __sched io_schedule(void)
{
- struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id());
+ struct runqueue *rq = &__raw_get_cpu_var(runqueues);
atomic_inc(&rq->nr_iowait);
schedule();
@@ -4153,7 +4163,7 @@ EXPORT_SYMBOL(io_schedule);
long __sched io_schedule_timeout(long timeout)
{
- struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id());
+ struct runqueue *rq = &__raw_get_cpu_var(runqueues);
long ret;
atomic_inc(&rq->nr_iowait);
@@ -4746,6 +4756,8 @@ static int migration_call(struct notifier_block *nfb, unsigned long action,
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_UP_CANCELED:
+ if (!cpu_rq(cpu)->migration_thread)
+ break;
/* Unbind it from offline cpu so it can run. Fall thru. */
kthread_bind(cpu_rq(cpu)->migration_thread,
any_online_cpu(cpu_online_map));
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 336f92d64e2e..9e2f1c6e73d7 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -470,6 +470,8 @@ static int cpu_callback(struct notifier_block *nfb,
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_UP_CANCELED:
+ if (!per_cpu(ksoftirqd, hotcpu))
+ break;
/* Unbind so it can run. Fall thru. */
kthread_bind(per_cpu(ksoftirqd, hotcpu),
any_online_cpu(cpu_online_map));
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 14c7faf02909..b5c3b94e01ce 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -36,7 +36,7 @@ static struct notifier_block panic_block = {
void touch_softlockup_watchdog(void)
{
- per_cpu(touch_timestamp, raw_smp_processor_id()) = jiffies;
+ __raw_get_cpu_var(touch_timestamp) = jiffies;
}
EXPORT_SYMBOL(touch_softlockup_watchdog);
@@ -127,6 +127,8 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_UP_CANCELED:
+ if (!per_cpu(watchdog_task, hotcpu))
+ break;
/* Unbind so it can run. Fall thru. */
kthread_bind(per_cpu(watchdog_task, hotcpu),
any_online_cpu(cpu_online_map));
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index dcfb5d731466..2c0aacc37c55 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -4,6 +4,7 @@
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/syscalls.h>
+#include <linux/kthread.h>
#include <asm/atomic.h>
#include <asm/semaphore.h>
#include <asm/uaccess.h>
@@ -25,13 +26,11 @@ static unsigned int stopmachine_num_threads;
static atomic_t stopmachine_thread_ack;
static DECLARE_MUTEX(stopmachine_mutex);
-static int stopmachine(void *cpu)
+static int stopmachine(void *unused)
{
int irqs_disabled = 0;
int prepared = 0;
- set_cpus_allowed(current, cpumask_of_cpu((int)(long)cpu));
-
/* Ack: we are alive */
smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
atomic_inc(&stopmachine_thread_ack);
@@ -85,7 +84,8 @@ static void stopmachine_set_state(enum stopmachine_state state)
static int stop_machine(void)
{
- int i, ret = 0;
+ int ret = 0;
+ unsigned int i;
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
/* One high-prio thread per cpu. We'll do this one. */
@@ -96,11 +96,16 @@ static int stop_machine(void)
stopmachine_state = STOPMACHINE_WAIT;
for_each_online_cpu(i) {
+ struct task_struct *tsk;
if (i == raw_smp_processor_id())
continue;
- ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
- if (ret < 0)
+ tsk = kthread_create(stopmachine, NULL, "stopmachine");
+ if (IS_ERR(tsk)) {
+ ret = PTR_ERR(tsk);
break;
+ }
+ kthread_bind(tsk, i);
+ wake_up_process(tsk);
stopmachine_num_threads++;
}
diff --git a/kernel/sys.c b/kernel/sys.c
index 0b6ec0e7936f..2d5179c67cec 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -13,7 +13,6 @@
#include <linux/notifier.h>
#include <linux/reboot.h>
#include <linux/prctl.h>
-#include <linux/init.h>
#include <linux/highuid.h>
#include <linux/fs.h>
#include <linux/kernel.h>
@@ -57,6 +56,12 @@
#ifndef GET_FPEXC_CTL
# define GET_FPEXC_CTL(a,b) (-EINVAL)
#endif
+#ifndef GET_ENDIAN
+# define GET_ENDIAN(a,b) (-EINVAL)
+#endif
+#ifndef SET_ENDIAN
+# define SET_ENDIAN(a,b) (-EINVAL)
+#endif
/*
* this is where the system-wide overflow UID and GID are defined, for
@@ -132,14 +137,15 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl,
unsigned long val, void *v)
{
int ret = NOTIFY_DONE;
- struct notifier_block *nb;
+ struct notifier_block *nb, *next_nb;
nb = rcu_dereference(*nl);
while (nb) {
+ next_nb = rcu_dereference(nb->next);
ret = nb->notifier_call(nb, val, v);
if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK)
break;
- nb = rcu_dereference(nb->next);
+ nb = next_nb;
}
return ret;
}
@@ -583,7 +589,7 @@ void emergency_restart(void)
}
EXPORT_SYMBOL_GPL(emergency_restart);
-void kernel_restart_prepare(char *cmd)
+static void kernel_restart_prepare(char *cmd)
{
blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
system_state = SYSTEM_RESTART;
@@ -617,7 +623,7 @@ EXPORT_SYMBOL_GPL(kernel_restart);
* Move into place and start executing a preloaded standalone
* executable. If nothing was preloaded return an error.
*/
-void kernel_kexec(void)
+static void kernel_kexec(void)
{
#ifdef CONFIG_KEXEC
struct kimage *image;
@@ -631,7 +637,6 @@ void kernel_kexec(void)
machine_kexec(image);
#endif
}
-EXPORT_SYMBOL_GPL(kernel_kexec);
void kernel_shutdown_prepare(enum system_states state)
{
@@ -1860,23 +1865,20 @@ out:
* fields when reaping, so a sample either gets all the additions of a
* given child after it's reaped, or none so this sample is before reaping.
*
- * tasklist_lock locking optimisation:
- * If we are current and single threaded, we do not need to take the tasklist
- * lock or the siglock. No one else can take our signal_struct away,
- * no one else can reap the children to update signal->c* counters, and
- * no one else can race with the signal-> fields.
- * If we do not take the tasklist_lock, the signal-> fields could be read
- * out of order while another thread was just exiting. So we place a
- * read memory barrier when we avoid the lock. On the writer side,
- * write memory barrier is implied in __exit_signal as __exit_signal releases
- * the siglock spinlock after updating the signal-> fields.
- *
- * We don't really need the siglock when we access the non c* fields
- * of the signal_struct (for RUSAGE_SELF) even in multithreaded
- * case, since we take the tasklist lock for read and the non c* signal->
- * fields are updated only in __exit_signal, which is called with
- * tasklist_lock taken for write, hence these two threads cannot execute
- * concurrently.
+ * Locking:
+ * We need to take the siglock for CHILDEREN, SELF and BOTH
+ * for the cases current multithreaded, non-current single threaded
+ * non-current multithreaded. Thread traversal is now safe with
+ * the siglock held.
+ * Strictly speaking, we donot need to take the siglock if we are current and
+ * single threaded, as no one else can take our signal_struct away, no one
+ * else can reap the children to update signal->c* counters, and no one else
+ * can race with the signal-> fields. If we do not take any lock, the
+ * signal-> fields could be read out of order while another thread was just
+ * exiting. So we should place a read memory barrier when we avoid the lock.
+ * On the writer side, write memory barrier is implied in __exit_signal
+ * as __exit_signal releases the siglock spinlock after updating the signal->
+ * fields. But we don't do this yet to keep things simple.
*
*/
@@ -1885,35 +1887,25 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
struct task_struct *t;
unsigned long flags;
cputime_t utime, stime;
- int need_lock = 0;
memset((char *) r, 0, sizeof *r);
utime = stime = cputime_zero;
- if (p != current || !thread_group_empty(p))
- need_lock = 1;
-
- if (need_lock) {
- read_lock(&tasklist_lock);
- if (unlikely(!p->signal)) {
- read_unlock(&tasklist_lock);
- return;
- }
- } else
- /* See locking comments above */
- smp_rmb();
+ rcu_read_lock();
+ if (!lock_task_sighand(p, &flags)) {
+ rcu_read_unlock();
+ return;
+ }
switch (who) {
case RUSAGE_BOTH:
case RUSAGE_CHILDREN:
- spin_lock_irqsave(&p->sighand->siglock, flags);
utime = p->signal->cutime;
stime = p->signal->cstime;
r->ru_nvcsw = p->signal->cnvcsw;
r->ru_nivcsw = p->signal->cnivcsw;
r->ru_minflt = p->signal->cmin_flt;
r->ru_majflt = p->signal->cmaj_flt;
- spin_unlock_irqrestore(&p->sighand->siglock, flags);
if (who == RUSAGE_CHILDREN)
break;
@@ -1941,8 +1933,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
BUG();
}
- if (need_lock)
- read_unlock(&tasklist_lock);
+ unlock_task_sighand(p, &flags);
+ rcu_read_unlock();
+
cputime_to_timeval(utime, &r->ru_utime);
cputime_to_timeval(stime, &r->ru_stime);
}
@@ -2057,6 +2050,13 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
return -EFAULT;
return 0;
}
+ case PR_GET_ENDIAN:
+ error = GET_ENDIAN(current, arg2);
+ break;
+ case PR_SET_ENDIAN:
+ error = SET_ENDIAN(current, arg2);
+ break;
+
default:
error = -EINVAL;
break;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 5433195040f1..6991bece67e8 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -87,6 +87,7 @@ cond_syscall(sys_inotify_init);
cond_syscall(sys_inotify_add_watch);
cond_syscall(sys_inotify_rm_watch);
cond_syscall(sys_migrate_pages);
+cond_syscall(sys_move_pages);
cond_syscall(sys_chown16);
cond_syscall(sys_fchown16);
cond_syscall(sys_getegid16);
@@ -132,3 +133,4 @@ cond_syscall(sys_mincore);
cond_syscall(sys_madvise);
cond_syscall(sys_mremap);
cond_syscall(sys_remap_file_pages);
+cond_syscall(compat_sys_move_pages);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0d656e61621d..2c0e65819448 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -59,6 +59,7 @@ extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
extern int C_A_D;
extern int sysctl_overcommit_memory;
extern int sysctl_overcommit_ratio;
+extern int sysctl_panic_on_oom;
extern int max_threads;
extern int sysrq_enabled;
extern int core_uses_pid;
@@ -142,7 +143,6 @@ static struct ctl_table_header root_table_header =
static ctl_table kern_table[];
static ctl_table vm_table[];
-static ctl_table proc_table[];
static ctl_table fs_table[];
static ctl_table debug_table[];
static ctl_table dev_table[];
@@ -202,12 +202,6 @@ static ctl_table root_table[] = {
},
#endif
{
- .ctl_name = CTL_PROC,
- .procname = "proc",
- .mode = 0555,
- .child = proc_table,
- },
- {
.ctl_name = CTL_FS,
.procname = "fs",
.mode = 0555,
@@ -398,7 +392,7 @@ static ctl_table kern_table[] = {
.strategy = &sysctl_string,
},
#endif
-#ifdef CONFIG_HOTPLUG
+#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
{
.ctl_name = KERN_HOTPLUG,
.procname = "hotplug",
@@ -702,6 +696,14 @@ static ctl_table vm_table[] = {
.proc_handler = &proc_dointvec,
},
{
+ .ctl_name = VM_PANIC_ON_OOM,
+ .procname = "panic_on_oom",
+ .data = &sysctl_panic_on_oom,
+ .maxlen = sizeof(sysctl_panic_on_oom),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
.ctl_name = VM_OVERCOMMIT_RATIO,
.procname = "overcommit_ratio",
.data = &sysctl_overcommit_ratio,
@@ -918,10 +920,6 @@ static ctl_table vm_table[] = {
{ .ctl_name = 0 }
};
-static ctl_table proc_table[] = {
- { .ctl_name = 0 }
-};
-
static ctl_table fs_table[] = {
{
.ctl_name = FS_NRINODE,
diff --git a/kernel/timer.c b/kernel/timer.c
index 9e49deed468c..eb97371b87d8 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -146,7 +146,7 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
void fastcall init_timer(struct timer_list *timer)
{
timer->entry.next = NULL;
- timer->base = per_cpu(tvec_bases, raw_smp_processor_id());
+ timer->base = __raw_get_cpu_var(tvec_bases);
}
EXPORT_SYMBOL(init_timer);
@@ -383,23 +383,19 @@ EXPORT_SYMBOL(del_timer_sync);
static int cascade(tvec_base_t *base, tvec_t *tv, int index)
{
/* cascade all the timers from tv up one level */
- struct list_head *head, *curr;
+ struct timer_list *timer, *tmp;
+ struct list_head tv_list;
+
+ list_replace_init(tv->vec + index, &tv_list);
- head = tv->vec + index;
- curr = head->next;
/*
- * We are removing _all_ timers from the list, so we don't have to
- * detach them individually, just clear the list afterwards.
+ * We are removing _all_ timers from the list, so we
+ * don't have to detach them individually.
*/
- while (curr != head) {
- struct timer_list *tmp;
-
- tmp = list_entry(curr, struct timer_list, entry);
- BUG_ON(tmp->base != base);
- curr = curr->next;
- internal_add_timer(base, tmp);
+ list_for_each_entry_safe(timer, tmp, &tv_list, entry) {
+ BUG_ON(timer->base != base);
+ internal_add_timer(base, timer);
}
- INIT_LIST_HEAD(head);
return index;
}
@@ -419,10 +415,10 @@ static inline void __run_timers(tvec_base_t *base)
spin_lock_irq(&base->lock);
while (time_after_eq(jiffies, base->timer_jiffies)) {
- struct list_head work_list = LIST_HEAD_INIT(work_list);
+ struct list_head work_list;
struct list_head *head = &work_list;
int index = base->timer_jiffies & TVR_MASK;
-
+
/*
* Cascade timers:
*/
@@ -431,8 +427,8 @@ static inline void __run_timers(tvec_base_t *base)
(!cascade(base, &base->tv3, INDEX(1))) &&
!cascade(base, &base->tv4, INDEX(2)))
cascade(base, &base->tv5, INDEX(3));
- ++base->timer_jiffies;
- list_splice_init(base->tv1.vec + index, &work_list);
+ ++base->timer_jiffies;
+ list_replace_init(base->tv1.vec + index, &work_list);
while (!list_empty(head)) {
void (*fn)(unsigned long);
unsigned long data;
diff --git a/kernel/user.c b/kernel/user.c
index 4b1eb745afa1..6408c0424291 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -148,7 +148,7 @@ struct user_struct * alloc_uid(uid_t uid)
new->mq_bytes = 0;
new->locked_shm = 0;
- if (alloc_uid_keyring(new) < 0) {
+ if (alloc_uid_keyring(new, current) < 0) {
kmem_cache_free(uid_cachep, new);
return NULL;
}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 880fb415a8f6..565cf7a1febd 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -428,22 +428,34 @@ int schedule_delayed_work_on(int cpu,
return ret;
}
-int schedule_on_each_cpu(void (*func) (void *info), void *info)
+/**
+ * schedule_on_each_cpu - call a function on each online CPU from keventd
+ * @func: the function to call
+ * @info: a pointer to pass to func()
+ *
+ * Returns zero on success.
+ * Returns -ve errno on failure.
+ *
+ * Appears to be racy against CPU hotplug.
+ *
+ * schedule_on_each_cpu() is very slow.
+ */
+int schedule_on_each_cpu(void (*func)(void *info), void *info)
{
int cpu;
- struct work_struct *work;
+ struct work_struct *works;
- work = kmalloc(NR_CPUS * sizeof(struct work_struct), GFP_KERNEL);
-
- if (!work)
+ works = alloc_percpu(struct work_struct);
+ if (!works)
return -ENOMEM;
+
for_each_online_cpu(cpu) {
- INIT_WORK(work + cpu, func, info);
+ INIT_WORK(per_cpu_ptr(works, cpu), func, info);
__queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu),
- work + cpu);
+ per_cpu_ptr(works, cpu));
}
flush_workqueue(keventd_wq);
- kfree(work);
+ free_percpu(works);
return 0;
}
@@ -531,11 +543,11 @@ int current_is_keventd(void)
static void take_over_work(struct workqueue_struct *wq, unsigned int cpu)
{
struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
- LIST_HEAD(list);
+ struct list_head list;
struct work_struct *work;
spin_lock_irq(&cwq->lock);
- list_splice_init(&cwq->worklist, &list);
+ list_replace_init(&cwq->worklist, &list);
while (!list_empty(&list)) {
printk("Taking work for %s\n", wq->name);
@@ -578,6 +590,8 @@ static int workqueue_cpu_callback(struct notifier_block *nfb,
case CPU_UP_CANCELED:
list_for_each_entry(wq, &workqueues, list) {
+ if (!per_cpu_ptr(wq->cpu_wq, hotcpu)->thread)
+ continue;
/* Unbind so it can run. */
kthread_bind(per_cpu_ptr(wq->cpu_wq, hotcpu)->thread,
any_online_cpu(cpu_online_map));