diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup.c | 2 | ||||
-rw-r--r-- | kernel/exit.c | 53 | ||||
-rw-r--r-- | kernel/fork.c | 76 | ||||
-rw-r--r-- | kernel/irq/chip.c | 12 | ||||
-rw-r--r-- | kernel/irq/manage.c | 3 | ||||
-rw-r--r-- | kernel/kexec.c | 104 | ||||
-rw-r--r-- | kernel/kthread.c | 2 | ||||
-rw-r--r-- | kernel/power/main.c | 7 | ||||
-rw-r--r-- | kernel/power/power.h | 2 | ||||
-rw-r--r-- | kernel/ptrace.c | 2 | ||||
-rw-r--r-- | kernel/relay.c | 170 | ||||
-rw-r--r-- | kernel/sched.c | 34 | ||||
-rw-r--r-- | kernel/signal.c | 99 | ||||
-rw-r--r-- | kernel/smp.c | 4 | ||||
-rw-r--r-- | kernel/softirq.c | 3 | ||||
-rw-r--r-- | kernel/softlockup.c | 25 | ||||
-rw-r--r-- | kernel/sys.c | 31 | ||||
-rw-r--r-- | kernel/trace/trace.c | 3 | ||||
-rw-r--r-- | kernel/trace/trace_irqsoff.c | 8 | ||||
-rw-r--r-- | kernel/trace/trace_sched_wakeup.c | 27 |
20 files changed, 455 insertions, 212 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 66ec9fd21e0c..89bd6fb7894f 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1529,7 +1529,7 @@ static int cgroup_seqfile_show(struct seq_file *m, void *arg) return cft->read_seq_string(state->cgroup, cft, m); } -int cgroup_seqfile_release(struct inode *inode, struct file *file) +static int cgroup_seqfile_release(struct inode *inode, struct file *file) { struct seq_file *seq = file->private_data; kfree(seq->private); diff --git a/kernel/exit.c b/kernel/exit.c index ad933bb29ec7..6cdf60712bd2 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -46,6 +46,7 @@ #include <linux/resource.h> #include <linux/blkdev.h> #include <linux/task_io_accounting_ops.h> +#include <linux/tracehook.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -162,27 +163,17 @@ static void delayed_put_task_struct(struct rcu_head *rhp) put_task_struct(container_of(rhp, struct task_struct, rcu)); } -/* - * Do final ptrace-related cleanup of a zombie being reaped. - * - * Called with write_lock(&tasklist_lock) held. - */ -static void ptrace_release_task(struct task_struct *p) -{ - BUG_ON(!list_empty(&p->ptraced)); - ptrace_unlink(p); - BUG_ON(!list_empty(&p->ptrace_entry)); -} void release_task(struct task_struct * p) { struct task_struct *leader; int zap_leader; repeat: + tracehook_prepare_release_task(p); atomic_dec(&p->user->processes); proc_flush_task(p); write_lock_irq(&tasklist_lock); - ptrace_release_task(p); + tracehook_finish_release_task(p); __exit_signal(p); /* @@ -204,6 +195,13 @@ repeat: * that case. */ zap_leader = task_detached(leader); + + /* + * This maintains the invariant that release_task() + * only runs on a task in EXIT_DEAD, just for sanity. + */ + if (zap_leader) + leader->exit_state = EXIT_DEAD; } write_unlock_irq(&tasklist_lock); @@ -887,7 +885,8 @@ static void forget_original_parent(struct task_struct *father) */ static void exit_notify(struct task_struct *tsk, int group_dead) { - int state; + int signal; + void *cookie; /* * This does two things: @@ -924,22 +923,11 @@ static void exit_notify(struct task_struct *tsk, int group_dead) !capable(CAP_KILL)) tsk->exit_signal = SIGCHLD; - /* If something other than our normal parent is ptracing us, then - * send it a SIGCHLD instead of honoring exit_signal. exit_signal - * only has special meaning to our real parent. - */ - if (!task_detached(tsk) && thread_group_empty(tsk)) { - int signal = ptrace_reparented(tsk) ? - SIGCHLD : tsk->exit_signal; - do_notify_parent(tsk, signal); - } else if (tsk->ptrace) { - do_notify_parent(tsk, SIGCHLD); - } + signal = tracehook_notify_death(tsk, &cookie, group_dead); + if (signal > 0) + signal = do_notify_parent(tsk, signal); - state = EXIT_ZOMBIE; - if (task_detached(tsk) && likely(!tsk->ptrace)) - state = EXIT_DEAD; - tsk->exit_state = state; + tsk->exit_state = signal < 0 ? EXIT_DEAD : EXIT_ZOMBIE; /* mt-exec, de_thread() is waiting for us */ if (thread_group_leader(tsk) && @@ -949,8 +937,10 @@ static void exit_notify(struct task_struct *tsk, int group_dead) write_unlock_irq(&tasklist_lock); + tracehook_report_death(tsk, signal, cookie, group_dead); + /* If the process is dead, release it - nobody will wait for it */ - if (state == EXIT_DEAD) + if (signal < 0) release_task(tsk); } @@ -1029,10 +1019,7 @@ NORET_TYPE void do_exit(long code) if (unlikely(!tsk->pid)) panic("Attempted to kill the idle task!"); - if (unlikely(current->ptrace & PT_TRACE_EXIT)) { - current->ptrace_message = code; - ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP); - } + tracehook_report_exit(&code); /* * We're taking recursive faults here in do_exit. Safest is to just diff --git a/kernel/fork.c b/kernel/fork.c index b99d73e971a4..abb3ed6298f6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -37,6 +37,7 @@ #include <linux/swap.h> #include <linux/syscalls.h> #include <linux/jiffies.h> +#include <linux/tracehook.h> #include <linux/futex.h> #include <linux/task_io_accounting_ops.h> #include <linux/rcupdate.h> @@ -865,8 +866,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p) new_flags &= ~PF_SUPERPRIV; new_flags |= PF_FORKNOEXEC; - if (!(clone_flags & CLONE_PTRACE)) - p->ptrace = 0; + new_flags |= PF_STARTING; p->flags = new_flags; clear_freeze_flag(p); } @@ -907,7 +907,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, struct pt_regs *regs, unsigned long stack_size, int __user *child_tidptr, - struct pid *pid) + struct pid *pid, + int trace) { int retval; struct task_struct *p; @@ -1163,8 +1164,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, */ p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); - INIT_LIST_HEAD(&p->ptrace_entry); - INIT_LIST_HEAD(&p->ptraced); /* Now that the task is set up, run cgroup callbacks if * necessary. We need to run them before the task is visible @@ -1195,7 +1194,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->real_parent = current->real_parent; else p->real_parent = current; - p->parent = p->real_parent; spin_lock(¤t->sighand->siglock); @@ -1237,8 +1235,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (likely(p->pid)) { list_add_tail(&p->sibling, &p->real_parent->children); - if (unlikely(p->ptrace & PT_PTRACED)) - __ptrace_link(p, current->parent); + tracehook_finish_clone(p, clone_flags, trace); if (thread_group_leader(p)) { if (clone_flags & CLONE_NEWPID) @@ -1323,29 +1320,13 @@ struct task_struct * __cpuinit fork_idle(int cpu) struct pt_regs regs; task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, - &init_struct_pid); + &init_struct_pid, 0); if (!IS_ERR(task)) init_idle(task, cpu); return task; } -static int fork_traceflag(unsigned clone_flags) -{ - if (clone_flags & CLONE_UNTRACED) - return 0; - else if (clone_flags & CLONE_VFORK) { - if (current->ptrace & PT_TRACE_VFORK) - return PTRACE_EVENT_VFORK; - } else if ((clone_flags & CSIGNAL) != SIGCHLD) { - if (current->ptrace & PT_TRACE_CLONE) - return PTRACE_EVENT_CLONE; - } else if (current->ptrace & PT_TRACE_FORK) - return PTRACE_EVENT_FORK; - - return 0; -} - /* * Ok, this is the main fork-routine. * @@ -1380,14 +1361,14 @@ long do_fork(unsigned long clone_flags, } } - if (unlikely(current->ptrace)) { - trace = fork_traceflag (clone_flags); - if (trace) - clone_flags |= CLONE_PTRACE; - } + /* + * When called from kernel_thread, don't do user tracing stuff. + */ + if (likely(user_mode(regs))) + trace = tracehook_prepare_clone(clone_flags); p = copy_process(clone_flags, stack_start, regs, stack_size, - child_tidptr, NULL); + child_tidptr, NULL, trace); /* * Do this prior waking up the new thread - the thread pointer * might get invalid after that point, if the thread exits quickly. @@ -1405,32 +1386,35 @@ long do_fork(unsigned long clone_flags, init_completion(&vfork); } - if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) { + tracehook_report_clone(trace, regs, clone_flags, nr, p); + + /* + * We set PF_STARTING at creation in case tracing wants to + * use this to distinguish a fully live task from one that + * hasn't gotten to tracehook_report_clone() yet. Now we + * clear it and set the child going. + */ + p->flags &= ~PF_STARTING; + + if (unlikely(clone_flags & CLONE_STOPPED)) { /* * We'll start up with an immediate SIGSTOP. */ sigaddset(&p->pending.signal, SIGSTOP); set_tsk_thread_flag(p, TIF_SIGPENDING); - } - - if (!(clone_flags & CLONE_STOPPED)) - wake_up_new_task(p, clone_flags); - else __set_task_state(p, TASK_STOPPED); - - if (unlikely (trace)) { - current->ptrace_message = nr; - ptrace_notify ((trace << 8) | SIGTRAP); + } else { + wake_up_new_task(p, clone_flags); } + tracehook_report_clone_complete(trace, regs, + clone_flags, nr, p); + if (clone_flags & CLONE_VFORK) { freezer_do_not_count(); wait_for_completion(&vfork); freezer_count(); - if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) { - current->ptrace_message = nr; - ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP); - } + tracehook_report_vfork_done(p, nr); } } else { nr = PTR_ERR(p); @@ -1442,7 +1426,7 @@ long do_fork(unsigned long clone_flags, #define ARCH_MIN_MMSTRUCT_ALIGN 0 #endif -static void sighand_ctor(struct kmem_cache *cachep, void *data) +static void sighand_ctor(void *data) { struct sighand_struct *sighand = data; diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 964964baefa2..3cd441ebf5d2 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -28,8 +28,7 @@ void dynamic_irq_init(unsigned int irq) unsigned long flags; if (irq >= NR_IRQS) { - printk(KERN_ERR "Trying to initialize invalid IRQ%d\n", irq); - WARN_ON(1); + WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq); return; } @@ -62,8 +61,7 @@ void dynamic_irq_cleanup(unsigned int irq) unsigned long flags; if (irq >= NR_IRQS) { - printk(KERN_ERR "Trying to cleanup invalid IRQ%d\n", irq); - WARN_ON(1); + WARN(1, KERN_ERR "Trying to cleanup invalid IRQ%d\n", irq); return; } @@ -71,9 +69,8 @@ void dynamic_irq_cleanup(unsigned int irq) spin_lock_irqsave(&desc->lock, flags); if (desc->action) { spin_unlock_irqrestore(&desc->lock, flags); - printk(KERN_ERR "Destroying IRQ%d without calling free_irq\n", + WARN(1, KERN_ERR "Destroying IRQ%d without calling free_irq\n", irq); - WARN_ON(1); return; } desc->msi_desc = NULL; @@ -96,8 +93,7 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip) unsigned long flags; if (irq >= NR_IRQS) { - printk(KERN_ERR "Trying to install chip for IRQ%d\n", irq); - WARN_ON(1); + WARN(1, KERN_ERR "Trying to install chip for IRQ%d\n", irq); return -EINVAL; } diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index f8914b92b664..152abfd3589f 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -177,8 +177,7 @@ static void __enable_irq(struct irq_desc *desc, unsigned int irq) { switch (desc->depth) { case 0: - printk(KERN_WARNING "Unbalanced enable for IRQ %d\n", irq); - WARN_ON(1); + WARN(1, KERN_WARNING "Unbalanced enable for IRQ %d\n", irq); break; case 1: { unsigned int status = desc->status & ~IRQ_DISABLED; diff --git a/kernel/kexec.c b/kernel/kexec.c index 1c5fcacbcf33..c8a4370e2a34 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -24,6 +24,12 @@ #include <linux/utsrelease.h> #include <linux/utsname.h> #include <linux/numa.h> +#include <linux/suspend.h> +#include <linux/device.h> +#include <linux/freezer.h> +#include <linux/pm.h> +#include <linux/cpu.h> +#include <linux/console.h> #include <asm/page.h> #include <asm/uaccess.h> @@ -242,6 +248,12 @@ static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry, goto out; } + image->swap_page = kimage_alloc_control_pages(image, 0); + if (!image->swap_page) { + printk(KERN_ERR "Could not allocate swap buffer\n"); + goto out; + } + result = 0; out: if (result == 0) @@ -589,14 +601,12 @@ static void kimage_free_extra_pages(struct kimage *image) kimage_free_page_list(&image->unuseable_pages); } -static int kimage_terminate(struct kimage *image) +static void kimage_terminate(struct kimage *image) { if (*image->entry != 0) image->entry++; *image->entry = IND_DONE; - - return 0; } #define for_each_kimage_entry(image, ptr, entry) \ @@ -988,6 +998,8 @@ asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, if (result) goto out; + if (flags & KEXEC_PRESERVE_CONTEXT) + image->preserve_context = 1; result = machine_kexec_prepare(image); if (result) goto out; @@ -997,9 +1009,7 @@ asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, if (result) goto out; } - result = kimage_terminate(image); - if (result) - goto out; + kimage_terminate(image); } /* Install the new kernel, and Uninstall the old */ image = xchg(dest_image, image); @@ -1415,3 +1425,85 @@ static int __init crash_save_vmcoreinfo_init(void) } module_init(crash_save_vmcoreinfo_init) + +/** + * kernel_kexec - reboot the system + * + * Move into place and start executing a preloaded standalone + * executable. If nothing was preloaded return an error. + */ +int kernel_kexec(void) +{ + int error = 0; + + if (xchg(&kexec_lock, 1)) + return -EBUSY; + if (!kexec_image) { + error = -EINVAL; + goto Unlock; + } + + if (kexec_image->preserve_context) { +#ifdef CONFIG_KEXEC_JUMP + mutex_lock(&pm_mutex); + pm_prepare_console(); + error = freeze_processes(); + if (error) { + error = -EBUSY; + goto Restore_console; + } + suspend_console(); + error = device_suspend(PMSG_FREEZE); + if (error) + goto Resume_console; + error = disable_nonboot_cpus(); + if (error) + goto Resume_devices; + local_irq_disable(); + /* At this point, device_suspend() has been called, + * but *not* device_power_down(). We *must* + * device_power_down() now. Otherwise, drivers for + * some devices (e.g. interrupt controllers) become + * desynchronized with the actual state of the + * hardware at resume time, and evil weirdness ensues. + */ + error = device_power_down(PMSG_FREEZE); + if (error) + goto Enable_irqs; + save_processor_state(); +#endif + } else { + blocking_notifier_call_chain(&reboot_notifier_list, + SYS_RESTART, NULL); + system_state = SYSTEM_RESTART; + device_shutdown(); + sysdev_shutdown(); + printk(KERN_EMERG "Starting new kernel\n"); + machine_shutdown(); + } + + machine_kexec(kexec_image); + + if (kexec_image->preserve_context) { +#ifdef CONFIG_KEXEC_JUMP + restore_processor_state(); + device_power_up(PMSG_RESTORE); + Enable_irqs: + local_irq_enable(); + enable_nonboot_cpus(); + Resume_devices: + device_resume(PMSG_RESTORE); + Resume_console: + resume_console(); + thaw_processes(); + Restore_console: + pm_restore_console(); + mutex_unlock(&pm_mutex); +#endif + } + + Unlock: + xchg(&kexec_lock, 0); + + return error; +} diff --git a/kernel/kthread.c b/kernel/kthread.c index 6111c27491b1..96cff2f8710b 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -176,7 +176,7 @@ void kthread_bind(struct task_struct *k, unsigned int cpu) return; } /* Must have done schedule() in kthread() before we set_task_cpu */ - wait_task_inactive(k); + wait_task_inactive(k, 0); set_task_cpu(k, cpu); k->cpus_allowed = cpumask_of_cpu(cpu); k->rt.nr_cpus_allowed = 1; diff --git a/kernel/power/main.c b/kernel/power/main.c index 95bff23ecdaa..0b7476f5d2a6 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -635,6 +635,13 @@ static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state) } if (status < 0) printk(err_suspend, status); + + /* Some platforms can't detect that the alarm triggered the + * wakeup, or (accordingly) disable it after it afterwards. + * It's supposed to give oneshot behavior; cope. + */ + alm.enabled = false; + rtc_set_alarm(rtc, &alm); } static int __init has_wakealarm(struct device *dev, void *name_ptr) diff --git a/kernel/power/power.h b/kernel/power/power.h index 700f44ec8406..acc0c101dbd5 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -53,8 +53,6 @@ extern int hibernation_platform_enter(void); extern int pfn_is_nosave(unsigned long); -extern struct mutex pm_mutex; - #define power_attr(_name) \ static struct kobj_attribute _name##_attr = { \ .attr = { \ diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 8392a9da6450..082b3fcb32a0 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -107,7 +107,7 @@ int ptrace_check_attach(struct task_struct *child, int kill) read_unlock(&tasklist_lock); if (!ret && !kill) - wait_task_inactive(child); + ret = wait_task_inactive(child, TASK_TRACED) ? 0 : -ESRCH; /* All systems go.. */ return ret; diff --git a/kernel/relay.c b/kernel/relay.c index 7de644cdec43..04006ef970b8 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -407,6 +407,35 @@ void relay_reset(struct rchan *chan) } EXPORT_SYMBOL_GPL(relay_reset); +static inline void relay_set_buf_dentry(struct rchan_buf *buf, + struct dentry *dentry) +{ + buf->dentry = dentry; + buf->dentry->d_inode->i_size = buf->early_bytes; +} + +static struct dentry *relay_create_buf_file(struct rchan *chan, + struct rchan_buf *buf, + unsigned int cpu) +{ + struct dentry *dentry; + char *tmpname; + + tmpname = kzalloc(NAME_MAX + 1, GFP_KERNEL); + if (!tmpname) + return NULL; + snprintf(tmpname, NAME_MAX, "%s%d", chan->base_filename, cpu); + + /* Create file in fs */ + dentry = chan->cb->create_buf_file(tmpname, chan->parent, + S_IRUSR, buf, + &chan->is_global); + + kfree(tmpname); + + return dentry; +} + /* * relay_open_buf - create a new relay channel buffer * @@ -416,45 +445,34 @@ static struct rchan_buf *relay_open_buf(struct rchan *chan, unsigned int cpu) { struct rchan_buf *buf = NULL; struct dentry *dentry; - char *tmpname; if (chan->is_global) return chan->buf[0]; - tmpname = kzalloc(NAME_MAX + 1, GFP_KERNEL); - if (!tmpname) - goto end; - snprintf(tmpname, NAME_MAX, "%s%d", chan->base_filename, cpu); - buf = relay_create_buf(chan); if (!buf) - goto free_name; + return NULL; + + if (chan->has_base_filename) { + dentry = relay_create_buf_file(chan, buf, cpu); + if (!dentry) + goto free_buf; + relay_set_buf_dentry(buf, dentry); + } buf->cpu = cpu; __relay_reset(buf, 1); - /* Create file in fs */ - dentry = chan->cb->create_buf_file(tmpname, chan->parent, S_IRUSR, - buf, &chan->is_global); - if (!dentry) - goto free_buf; - - buf->dentry = dentry; - if(chan->is_global) { chan->buf[0] = buf; buf->cpu = 0; } - goto free_name; + return buf; free_buf: relay_destroy_buf(buf); - buf = NULL; -free_name: - kfree(tmpname); -end: - return buf; + return NULL; } /** @@ -537,8 +555,8 @@ static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb, /** * relay_open - create a new relay channel - * @base_filename: base name of files to create - * @parent: dentry of parent directory, %NULL for root directory + * @base_filename: base name of files to create, %NULL for buffering only + * @parent: dentry of parent directory, %NULL for root directory or buffer * @subbuf_size: size of sub-buffers * @n_subbufs: number of sub-buffers * @cb: client callback functions @@ -560,8 +578,6 @@ struct rchan *relay_open(const char *base_filename, { unsigned int i; struct rchan *chan; - if (!base_filename) - return NULL; if (!(subbuf_size && n_subbufs)) return NULL; @@ -576,7 +592,10 @@ struct rchan *relay_open(const char *base_filename, chan->alloc_size = FIX_SIZE(subbuf_size * n_subbufs); chan->parent = parent; chan->private_data = private_data; - strlcpy(chan->base_filename, base_filename, NAME_MAX); + if (base_filename) { + chan->has_base_filename = 1; + strlcpy(chan->base_filename, base_filename, NAME_MAX); + } setup_callbacks(chan, cb); kref_init(&chan->kref); @@ -604,6 +623,94 @@ free_bufs: } EXPORT_SYMBOL_GPL(relay_open); +struct rchan_percpu_buf_dispatcher { + struct rchan_buf *buf; + struct dentry *dentry; +}; + +/* Called in atomic context. */ +static void __relay_set_buf_dentry(void *info) +{ + struct rchan_percpu_buf_dispatcher *p = info; + + relay_set_buf_dentry(p->buf, p->dentry); +} + +/** + * relay_late_setup_files - triggers file creation + * @chan: channel to operate on + * @base_filename: base name of files to create + * @parent: dentry of parent directory, %NULL for root directory + * + * Returns 0 if successful, non-zero otherwise. + * + * Use to setup files for a previously buffer-only channel. + * Useful to do early tracing in kernel, before VFS is up, for example. + */ +int relay_late_setup_files(struct rchan *chan, + const char *base_filename, + struct dentry *parent) +{ + int err = 0; + unsigned int i, curr_cpu; + unsigned long flags; + struct dentry *dentry; + struct rchan_percpu_buf_dispatcher disp; + + if (!chan || !base_filename) + return -EINVAL; + + strlcpy(chan->base_filename, base_filename, NAME_MAX); + + mutex_lock(&relay_channels_mutex); + /* Is chan already set up? */ + if (unlikely(chan->has_base_filename)) + return -EEXIST; + chan->has_base_filename = 1; + chan->parent = parent; + curr_cpu = get_cpu(); + /* + * The CPU hotplug notifier ran before us and created buffers with + * no files associated. So it's safe to call relay_setup_buf_file() + * on all currently online CPUs. + */ + for_each_online_cpu(i) { + if (unlikely(!chan->buf[i])) { + printk(KERN_ERR "relay_late_setup_files: CPU %u " + "has no buffer, it must have!\n", i); + BUG(); + err = -EINVAL; + break; + } + + dentry = relay_create_buf_file(chan, chan->buf[i], i); + if (unlikely(!dentry)) { + err = -EINVAL; + break; + } + + if (curr_cpu == i) { + local_irq_save(flags); + relay_set_buf_dentry(chan->buf[i], dentry); + local_irq_restore(flags); + } else { + disp.buf = chan->buf[i]; + disp.dentry = dentry; + smp_mb(); + /* relay_channels_mutex must be held, so wait. */ + err = smp_call_function_single(i, + __relay_set_buf_dentry, + &disp, 1); + } + if (unlikely(err)) + break; + } + put_cpu(); + mutex_unlock(&relay_channels_mutex); + + return err; +} + /** * relay_switch_subbuf - switch to a new sub-buffer * @buf: channel buffer @@ -627,8 +734,13 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length) old_subbuf = buf->subbufs_produced % buf->chan->n_subbufs; buf->padding[old_subbuf] = buf->prev_padding; buf->subbufs_produced++; - buf->dentry->d_inode->i_size += buf->chan->subbuf_size - - buf->padding[old_subbuf]; + if (buf->dentry) + buf->dentry->d_inode->i_size += + buf->chan->subbuf_size - + buf->padding[old_subbuf]; + else + buf->early_bytes += buf->chan->subbuf_size - + buf->padding[old_subbuf]; smp_mb(); if (waitqueue_active(&buf->read_wait)) /* @@ -1237,4 +1349,4 @@ static __init int relay_init(void) return 0; } -module_init(relay_init); +early_initcall(relay_init); diff --git a/kernel/sched.c b/kernel/sched.c index 0047bd9b96aa..0236958addcb 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1867,16 +1867,24 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) /* * wait_task_inactive - wait for a thread to unschedule. * + * If @match_state is nonzero, it's the @p->state value just checked and + * not expected to change. If it changes, i.e. @p might have woken up, + * then return zero. When we succeed in waiting for @p to be off its CPU, + * we return a positive number (its total switch count). If a second call + * a short while later returns the same number, the caller can be sure that + * @p has remained unscheduled the whole time. + * * The caller must ensure that the task *will* unschedule sometime soon, * else this function might spin for a *long* time. This function can't * be called with interrupts off, or it may introduce deadlock with * smp_call_function() if an IPI is sent by the same process we are * waiting to become inactive. */ -void wait_task_inactive(struct task_struct *p) +unsigned long wait_task_inactive(struct task_struct *p, long match_state) { unsigned long flags; int running, on_rq; + unsigned long ncsw; struct rq *rq; for (;;) { @@ -1899,8 +1907,11 @@ void wait_task_inactive(struct task_struct *p) * return false if the runqueue has changed and p * is actually now running somewhere else! */ - while (task_running(rq, p)) + while (task_running(rq, p)) { + if (match_state && unlikely(p->state != match_state)) + return 0; cpu_relax(); + } /* * Ok, time to look more closely! We need the rq @@ -1910,9 +1921,21 @@ void wait_task_inactive(struct task_struct *p) rq = task_rq_lock(p, &flags); running = task_running(rq, p); on_rq = p->se.on_rq; + ncsw = 0; + if (!match_state || p->state == match_state) { + ncsw = p->nivcsw + p->nvcsw; + if (unlikely(!ncsw)) + ncsw = 1; + } task_rq_unlock(rq, &flags); /* + * If it changed from the expected state, bail out now. + */ + if (unlikely(!ncsw)) + break; + + /* * Was it really running after all now that we * checked with the proper locks actually held? * @@ -1944,6 +1967,8 @@ void wait_task_inactive(struct task_struct *p) */ break; } + + return ncsw; } /*** @@ -6389,7 +6414,7 @@ static struct notifier_block __cpuinitdata migration_notifier = { .priority = 10 }; -void __init migration_init(void) +static int __init migration_init(void) { void *cpu = (void *)(long)smp_processor_id(); int err; @@ -6399,7 +6424,10 @@ void __init migration_init(void) BUG_ON(err == NOTIFY_BAD); migration_call(&migration_notifier, CPU_ONLINE, cpu); register_cpu_notifier(&migration_notifier); + + return err; } +early_initcall(migration_init); #endif #ifdef CONFIG_SMP diff --git a/kernel/signal.c b/kernel/signal.c index 82c3545596c5..954f77d7e3bc 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -22,6 +22,7 @@ #include <linux/ptrace.h> #include <linux/signal.h> #include <linux/signalfd.h> +#include <linux/tracehook.h> #include <linux/capability.h> #include <linux/freezer.h> #include <linux/pid_namespace.h> @@ -39,24 +40,21 @@ static struct kmem_cache *sigqueue_cachep; -static int __sig_ignored(struct task_struct *t, int sig) +static void __user *sig_handler(struct task_struct *t, int sig) { - void __user *handler; + return t->sighand->action[sig - 1].sa.sa_handler; +} +static int sig_handler_ignored(void __user *handler, int sig) +{ /* Is it explicitly or implicitly ignored? */ - - handler = t->sighand->action[sig - 1].sa.sa_handler; return handler == SIG_IGN || (handler == SIG_DFL && sig_kernel_ignore(sig)); } static int sig_ignored(struct task_struct *t, int sig) { - /* - * Tracers always want to know about signals.. - */ - if (t->ptrace & PT_PTRACED) - return 0; + void __user *handler; /* * Blocked signals are never ignored, since the @@ -66,7 +64,14 @@ static int sig_ignored(struct task_struct *t, int sig) if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig)) return 0; - return __sig_ignored(t, sig); + handler = sig_handler(t, sig); + if (!sig_handler_ignored(handler, sig)) + return 0; + + /* + * Tracers may want to know about even ignored signals. + */ + return !tracehook_consider_ignored_signal(t, sig, handler); } /* @@ -129,7 +134,9 @@ void recalc_sigpending_and_wake(struct task_struct *t) void recalc_sigpending(void) { - if (!recalc_sigpending_tsk(current) && !freezing(current)) + if (unlikely(tracehook_force_sigpending())) + set_thread_flag(TIF_SIGPENDING); + else if (!recalc_sigpending_tsk(current) && !freezing(current)) clear_thread_flag(TIF_SIGPENDING); } @@ -295,12 +302,12 @@ flush_signal_handlers(struct task_struct *t, int force_default) int unhandled_signal(struct task_struct *tsk, int sig) { + void __user *handler = tsk->sighand->action[sig-1].sa.sa_handler; if (is_global_init(tsk)) return 1; - if (tsk->ptrace & PT_PTRACED) + if (handler != SIG_IGN && handler != SIG_DFL) return 0; - return (tsk->sighand->action[sig-1].sa.sa_handler == SIG_IGN) || - (tsk->sighand->action[sig-1].sa.sa_handler == SIG_DFL); + return !tracehook_consider_fatal_signal(tsk, sig, handler); } @@ -591,9 +598,6 @@ static int check_kill_permission(int sig, struct siginfo *info, return security_task_kill(t, info, sig, 0); } -/* forward decl */ -static void do_notify_parent_cldstop(struct task_struct *tsk, int why); - /* * Handle magic process-wide effects of stop/continue signals. Unlike * the signal actions, these happen immediately at signal-generation @@ -756,7 +760,8 @@ static void complete_signal(int sig, struct task_struct *p, int group) if (sig_fatal(p, sig) && !(signal->flags & (SIGNAL_UNKILLABLE | SIGNAL_GROUP_EXIT)) && !sigismember(&t->real_blocked, sig) && - (sig == SIGKILL || !(t->ptrace & PT_PTRACED))) { + (sig == SIGKILL || + !tracehook_consider_fatal_signal(t, sig, SIG_DFL))) { /* * This signal will be fatal to the whole group. */ @@ -1323,9 +1328,11 @@ static inline void __wake_up_parent(struct task_struct *p, /* * Let a parent know about the death of a child. * For a stopped/continued status change, use do_notify_parent_cldstop instead. + * + * Returns -1 if our parent ignored us and so we've switched to + * self-reaping, or else @sig. */ - -void do_notify_parent(struct task_struct *tsk, int sig) +int do_notify_parent(struct task_struct *tsk, int sig) { struct siginfo info; unsigned long flags; @@ -1396,12 +1403,14 @@ void do_notify_parent(struct task_struct *tsk, int sig) */ tsk->exit_signal = -1; if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN) - sig = 0; + sig = -1; } if (valid_signal(sig) && sig > 0) __group_send_sig_info(sig, &info, tsk->parent); __wake_up_parent(tsk, tsk->parent); spin_unlock_irqrestore(&psig->siglock, flags); + + return sig; } static void do_notify_parent_cldstop(struct task_struct *tsk, int why) @@ -1599,7 +1608,7 @@ finish_stop(int stop_count) * a group stop in progress and we are the last to stop, * report to the parent. When ptraced, every thread reports itself. */ - if (stop_count == 0 || (current->ptrace & PT_PTRACED)) { + if (tracehook_notify_jctl(stop_count == 0, CLD_STOPPED)) { read_lock(&tasklist_lock); do_notify_parent_cldstop(current, CLD_STOPPED); read_unlock(&tasklist_lock); @@ -1735,6 +1744,9 @@ relock: signal->flags &= ~SIGNAL_CLD_MASK; spin_unlock_irq(&sighand->siglock); + if (unlikely(!tracehook_notify_jctl(1, why))) + goto relock; + read_lock(&tasklist_lock); do_notify_parent_cldstop(current->group_leader, why); read_unlock(&tasklist_lock); @@ -1748,17 +1760,33 @@ relock: do_signal_stop(0)) goto relock; - signr = dequeue_signal(current, ¤t->blocked, info); - if (!signr) - break; /* will return 0 */ + /* + * Tracing can induce an artifical signal and choose sigaction. + * The return value in @signr determines the default action, + * but @info->si_signo is the signal number we will report. + */ + signr = tracehook_get_signal(current, regs, info, return_ka); + if (unlikely(signr < 0)) + goto relock; + if (unlikely(signr != 0)) + ka = return_ka; + else { + signr = dequeue_signal(current, ¤t->blocked, + info); - if (signr != SIGKILL) { - signr = ptrace_signal(signr, info, regs, cookie); if (!signr) - continue; + break; /* will return 0 */ + + if (signr != SIGKILL) { + signr = ptrace_signal(signr, info, + regs, cookie); + if (!signr) + continue; + } + + ka = &sighand->action[signr-1]; } - ka = &sighand->action[signr-1]; if (ka->sa.sa_handler == SIG_IGN) /* Do nothing. */ continue; if (ka->sa.sa_handler != SIG_DFL) { @@ -1806,7 +1834,7 @@ relock: spin_lock_irq(&sighand->siglock); } - if (likely(do_signal_stop(signr))) { + if (likely(do_signal_stop(info->si_signo))) { /* It released the siglock. */ goto relock; } @@ -1827,7 +1855,7 @@ relock: if (sig_kernel_coredump(signr)) { if (print_fatal_signals) - print_fatal_signal(regs, signr); + print_fatal_signal(regs, info->si_signo); /* * If it was able to dump core, this kills all * other threads in the group and synchronizes with @@ -1836,13 +1864,13 @@ relock: * first and our do_group_exit call below will use * that value and ignore the one we pass it. */ - do_coredump((long)signr, signr, regs); + do_coredump(info->si_signo, info->si_signo, regs); } /* * Death signals, no core dump. */ - do_group_exit(signr); + do_group_exit(info->si_signo); /* NOTREACHED */ } spin_unlock_irq(&sighand->siglock); @@ -1884,7 +1912,7 @@ void exit_signals(struct task_struct *tsk) out: spin_unlock_irq(&tsk->sighand->siglock); - if (unlikely(group_stop)) { + if (unlikely(group_stop) && tracehook_notify_jctl(1, CLD_STOPPED)) { read_lock(&tasklist_lock); do_notify_parent_cldstop(tsk, CLD_STOPPED); read_unlock(&tasklist_lock); @@ -1895,7 +1923,6 @@ EXPORT_SYMBOL(recalc_sigpending); EXPORT_SYMBOL_GPL(dequeue_signal); EXPORT_SYMBOL(flush_signals); EXPORT_SYMBOL(force_sig); -EXPORT_SYMBOL(ptrace_notify); EXPORT_SYMBOL(send_sig); EXPORT_SYMBOL(send_sig_info); EXPORT_SYMBOL(sigprocmask); @@ -2299,7 +2326,7 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) * (for example, SIGCHLD), shall cause the pending signal to * be discarded, whether or not it is blocked" */ - if (__sig_ignored(t, sig)) { + if (sig_handler_ignored(sig_handler(t, sig), sig)) { sigemptyset(&mask); sigaddset(&mask, sig); rm_from_queue_full(&mask, &t->signal->shared_pending); diff --git a/kernel/smp.c b/kernel/smp.c index 462c785ca1ee..96fc7c0edc59 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -33,7 +33,7 @@ struct call_single_queue { spinlock_t lock; }; -void __cpuinit init_call_single_data(void) +static int __cpuinit init_call_single_data(void) { int i; @@ -43,7 +43,9 @@ void __cpuinit init_call_single_data(void) spin_lock_init(&q->lock); INIT_LIST_HEAD(&q->list); } + return 0; } +early_initcall(init_call_single_data); static void csd_flag_wait(struct call_single_data *data) { diff --git a/kernel/softirq.c b/kernel/softirq.c index f6b03d56c2bf..c506f266a6b9 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -630,7 +630,7 @@ static struct notifier_block __cpuinitdata cpu_nfb = { .notifier_call = cpu_callback }; -__init int spawn_ksoftirqd(void) +static __init int spawn_ksoftirqd(void) { void *cpu = (void *)(long)smp_processor_id(); int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); @@ -640,6 +640,7 @@ __init int spawn_ksoftirqd(void) register_cpu_notifier(&cpu_nfb); return 0; } +early_initcall(spawn_ksoftirqd); #ifdef CONFIG_SMP /* diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 7bd8d1aadd5d..b75b492fbfcf 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -338,14 +338,33 @@ static struct notifier_block __cpuinitdata cpu_nfb = { .notifier_call = cpu_callback }; -__init void spawn_softlockup_task(void) +static int __initdata nosoftlockup; + +static int __init nosoftlockup_setup(char *str) +{ + nosoftlockup = 1; + return 1; +} +__setup("nosoftlockup", nosoftlockup_setup); + +static int __init spawn_softlockup_task(void) { void *cpu = (void *)(long)smp_processor_id(); - int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); + int err; - BUG_ON(err == NOTIFY_BAD); + if (nosoftlockup) + return 0; + + err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); + if (err == NOTIFY_BAD) { + BUG(); + return 1; + } cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); register_cpu_notifier(&cpu_nfb); atomic_notifier_chain_register(&panic_notifier_list, &panic_block); + + return 0; } +early_initcall(spawn_softlockup_task); diff --git a/kernel/sys.c b/kernel/sys.c index 0c9d3fa1f5ff..c01858090a98 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -301,26 +301,6 @@ void kernel_restart(char *cmd) } EXPORT_SYMBOL_GPL(kernel_restart); -/** - * kernel_kexec - reboot the system - * - * Move into place and start executing a preloaded standalone - * executable. If nothing was preloaded return an error. - */ -static void kernel_kexec(void) -{ -#ifdef CONFIG_KEXEC - struct kimage *image; - image = xchg(&kexec_image, NULL); - if (!image) - return; - kernel_restart_prepare(NULL); - printk(KERN_EMERG "Starting new kernel\n"); - machine_shutdown(); - machine_kexec(image); -#endif -} - static void kernel_shutdown_prepare(enum system_states state) { blocking_notifier_call_chain(&reboot_notifier_list, @@ -425,10 +405,15 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user kernel_restart(buffer); break; +#ifdef CONFIG_KEXEC case LINUX_REBOOT_CMD_KEXEC: - kernel_kexec(); - unlock_kernel(); - return -EINVAL; + { + int ret; + ret = kernel_kexec(); + unlock_kernel(); + return ret; + } +#endif #ifdef CONFIG_HIBERNATION case LINUX_REBOOT_CMD_SW_SUSPEND: diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 868e121c8e38..fc20e09a6cb1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1203,9 +1203,6 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos) iter->pos = *pos; - if (last_ent && !ent) - seq_puts(m, "\n\nvim:ft=help\n"); - return ent; } diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 421d6fe3650e..ece6cfb649fa 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -253,12 +253,14 @@ void start_critical_timings(void) if (preempt_trace() || irq_trace()) start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } +EXPORT_SYMBOL_GPL(start_critical_timings); void stop_critical_timings(void) { if (preempt_trace() || irq_trace()) stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } +EXPORT_SYMBOL_GPL(stop_critical_timings); #ifdef CONFIG_IRQSOFF_TRACER #ifdef CONFIG_PROVE_LOCKING @@ -337,12 +339,14 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller); #ifdef CONFIG_PREEMPT_TRACER void trace_preempt_on(unsigned long a0, unsigned long a1) { - stop_critical_timing(a0, a1); + if (preempt_trace()) + stop_critical_timing(a0, a1); } void trace_preempt_off(unsigned long a0, unsigned long a1) { - start_critical_timing(a0, a1); + if (preempt_trace()) + start_critical_timing(a0, a1); } #endif /* CONFIG_PREEMPT_TRACER */ diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 3c8d61df4474..e303ccb62cdf 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -26,7 +26,8 @@ static struct task_struct *wakeup_task; static int wakeup_cpu; static unsigned wakeup_prio = -1; -static DEFINE_SPINLOCK(wakeup_lock); +static raw_spinlock_t wakeup_lock = + (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; static void __wakeup_reset(struct trace_array *tr); @@ -56,7 +57,8 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) if (unlikely(disabled != 1)) goto out; - spin_lock_irqsave(&wakeup_lock, flags); + local_irq_save(flags); + __raw_spin_lock(&wakeup_lock); if (unlikely(!wakeup_task)) goto unlock; @@ -71,7 +73,8 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) trace_function(tr, data, ip, parent_ip, flags); unlock: - spin_unlock_irqrestore(&wakeup_lock, flags); + __raw_spin_unlock(&wakeup_lock); + local_irq_restore(flags); out: atomic_dec(&data->disabled); @@ -145,7 +148,8 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev, if (likely(disabled != 1)) goto out; - spin_lock_irqsave(&wakeup_lock, flags); + local_irq_save(flags); + __raw_spin_lock(&wakeup_lock); /* We could race with grabbing wakeup_lock */ if (unlikely(!tracer_enabled || next != wakeup_task)) @@ -174,7 +178,8 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev, out_unlock: __wakeup_reset(tr); - spin_unlock_irqrestore(&wakeup_lock, flags); + __raw_spin_unlock(&wakeup_lock); + local_irq_restore(flags); out: atomic_dec(&tr->data[cpu]->disabled); } @@ -209,8 +214,6 @@ static void __wakeup_reset(struct trace_array *tr) struct trace_array_cpu *data; int cpu; - assert_spin_locked(&wakeup_lock); - for_each_possible_cpu(cpu) { data = tr->data[cpu]; tracing_reset(data); @@ -229,9 +232,11 @@ static void wakeup_reset(struct trace_array *tr) { unsigned long flags; - spin_lock_irqsave(&wakeup_lock, flags); + local_irq_save(flags); + __raw_spin_lock(&wakeup_lock); __wakeup_reset(tr); - spin_unlock_irqrestore(&wakeup_lock, flags); + __raw_spin_unlock(&wakeup_lock); + local_irq_restore(flags); } static void @@ -252,7 +257,7 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p, goto out; /* interrupts should be off from try_to_wake_up */ - spin_lock(&wakeup_lock); + __raw_spin_lock(&wakeup_lock); /* check for races. */ if (!tracer_enabled || p->prio >= wakeup_prio) @@ -274,7 +279,7 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p, CALLER_ADDR1, CALLER_ADDR2, flags); out_locked: - spin_unlock(&wakeup_lock); + __raw_spin_unlock(&wakeup_lock); out: atomic_dec(&tr->data[cpu]->disabled); } |