diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/compat.c | 23 | ||||
-rw-r--r-- | kernel/debug/kdb/kdb_main.c | 2 | ||||
-rw-r--r-- | kernel/fork.c | 6 | ||||
-rw-r--r-- | kernel/module.c | 154 | ||||
-rw-r--r-- | kernel/ptrace.c | 74 | ||||
-rw-r--r-- | kernel/sched/core.c | 3 | ||||
-rw-r--r-- | kernel/signal.c | 24 |
7 files changed, 202 insertions, 84 deletions
diff --git a/kernel/compat.c b/kernel/compat.c index f6150e92dfc9..36700e9e2be9 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -535,9 +535,11 @@ asmlinkage long compat_sys_getrusage(int who, struct compat_rusage __user *ru) return 0; } -asmlinkage long -compat_sys_wait4(compat_pid_t pid, compat_uint_t __user *stat_addr, int options, - struct compat_rusage __user *ru) +COMPAT_SYSCALL_DEFINE4(wait4, + compat_pid_t, pid, + compat_uint_t __user *, stat_addr, + int, options, + struct compat_rusage __user *, ru) { if (!ru) { return sys_wait4(pid, stat_addr, options, NULL); @@ -564,9 +566,10 @@ compat_sys_wait4(compat_pid_t pid, compat_uint_t __user *stat_addr, int options, } } -asmlinkage long compat_sys_waitid(int which, compat_pid_t pid, - struct compat_siginfo __user *uinfo, int options, - struct compat_rusage __user *uru) +COMPAT_SYSCALL_DEFINE5(waitid, + int, which, compat_pid_t, pid, + struct compat_siginfo __user *, uinfo, int, options, + struct compat_rusage __user *, uru) { siginfo_t info; struct rusage ru; @@ -584,7 +587,11 @@ asmlinkage long compat_sys_waitid(int which, compat_pid_t pid, return ret; if (uru) { - ret = put_compat_rusage(&ru, uru); + /* sys_waitid() overwrites everything in ru */ + if (COMPAT_USE_64BIT_TIME) + ret = copy_to_user(uru, &ru, sizeof(ru)); + else + ret = put_compat_rusage(&ru, uru); if (ret) return ret; } @@ -994,7 +1001,7 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese, sigset_from_compat(&s, &s32); if (uts) { - if (get_compat_timespec(&t, uts)) + if (compat_get_timespec(&t, uts)) return -EFAULT; } diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 4d5f8d5612f3..8875254120b6 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1970,6 +1970,8 @@ static int kdb_lsmod(int argc, const char **argv) kdb_printf("Module Size modstruct Used by\n"); list_for_each_entry(mod, kdb_modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; kdb_printf("%-20s%8u 0x%p ", mod->name, mod->core_size, (void *)mod); diff --git a/kernel/fork.c b/kernel/fork.c index 65ca6d27f24e..c535f33bbb9c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1668,8 +1668,10 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, int, tls_val) #endif { - return do_fork(clone_flags, newsp, 0, - parent_tidptr, child_tidptr); + long ret = do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr); + asmlinkage_protect(5, ret, clone_flags, newsp, + parent_tidptr, child_tidptr, tls_val); + return ret; } #endif diff --git a/kernel/module.c b/kernel/module.c index b10b048367e1..eab08274ec9b 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -188,6 +188,7 @@ struct load_info { ongoing or failed initialization etc. */ static inline int strong_try_module_get(struct module *mod) { + BUG_ON(mod && mod->state == MODULE_STATE_UNFORMED); if (mod && mod->state == MODULE_STATE_COMING) return -EBUSY; if (try_module_get(mod)) @@ -343,6 +344,9 @@ bool each_symbol_section(bool (*fn)(const struct symsearch *arr, #endif }; + if (mod->state == MODULE_STATE_UNFORMED) + continue; + if (each_symbol_in_section(arr, ARRAY_SIZE(arr), mod, fn, data)) return true; } @@ -450,16 +454,24 @@ const struct kernel_symbol *find_symbol(const char *name, EXPORT_SYMBOL_GPL(find_symbol); /* Search for module by name: must hold module_mutex. */ -struct module *find_module(const char *name) +static struct module *find_module_all(const char *name, + bool even_unformed) { struct module *mod; list_for_each_entry(mod, &modules, list) { + if (!even_unformed && mod->state == MODULE_STATE_UNFORMED) + continue; if (strcmp(mod->name, name) == 0) return mod; } return NULL; } + +struct module *find_module(const char *name) +{ + return find_module_all(name, false); +} EXPORT_SYMBOL_GPL(find_module); #ifdef CONFIG_SMP @@ -525,6 +537,8 @@ bool is_module_percpu_address(unsigned long addr) preempt_disable(); list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; if (!mod->percpu_size) continue; for_each_possible_cpu(cpu) { @@ -1048,6 +1062,8 @@ static ssize_t show_initstate(struct module_attribute *mattr, case MODULE_STATE_GOING: state = "going"; break; + default: + BUG(); } return sprintf(buffer, "%s\n", state); } @@ -1786,6 +1802,8 @@ void set_all_modules_text_rw(void) mutex_lock(&module_mutex); list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; if ((mod->module_core) && (mod->core_text_size)) { set_page_attributes(mod->module_core, mod->module_core + mod->core_text_size, @@ -1807,6 +1825,8 @@ void set_all_modules_text_ro(void) mutex_lock(&module_mutex); list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; if ((mod->module_core) && (mod->core_text_size)) { set_page_attributes(mod->module_core, mod->module_core + mod->core_text_size, @@ -2527,6 +2547,13 @@ static int copy_module_from_fd(int fd, struct load_info *info) err = -EFBIG; goto out; } + + /* Don't hand 0 to vmalloc, it whines. */ + if (stat.size == 0) { + err = -EINVAL; + goto out; + } + info->hdr = vmalloc(stat.size); if (!info->hdr) { err = -ENOMEM; @@ -2990,8 +3017,9 @@ static bool finished_loading(const char *name) bool ret; mutex_lock(&module_mutex); - mod = find_module(name); - ret = !mod || mod->state != MODULE_STATE_COMING; + mod = find_module_all(name, true); + ret = !mod || mod->state == MODULE_STATE_LIVE + || mod->state == MODULE_STATE_GOING; mutex_unlock(&module_mutex); return ret; @@ -3136,6 +3164,32 @@ static int load_module(struct load_info *info, const char __user *uargs, goto free_copy; } + /* + * We try to place it in the list now to make sure it's unique + * before we dedicate too many resources. In particular, + * temporary percpu memory exhaustion. + */ + mod->state = MODULE_STATE_UNFORMED; +again: + mutex_lock(&module_mutex); + if ((old = find_module_all(mod->name, true)) != NULL) { + if (old->state == MODULE_STATE_COMING + || old->state == MODULE_STATE_UNFORMED) { + /* Wait in case it fails to load. */ + mutex_unlock(&module_mutex); + err = wait_event_interruptible(module_wq, + finished_loading(mod->name)); + if (err) + goto free_module; + goto again; + } + err = -EEXIST; + mutex_unlock(&module_mutex); + goto free_module; + } + list_add_rcu(&mod->list, &modules); + mutex_unlock(&module_mutex); + #ifdef CONFIG_MODULE_SIG mod->sig_ok = info->sig_ok; if (!mod->sig_ok) @@ -3145,7 +3199,7 @@ static int load_module(struct load_info *info, const char __user *uargs, /* Now module is in final location, initialize linked lists, etc. */ err = module_unload_init(mod); if (err) - goto free_module; + goto unlink_mod; /* Now we've got everything in the final locations, we can * find optional sections. */ @@ -3180,54 +3234,33 @@ static int load_module(struct load_info *info, const char __user *uargs, goto free_arch_cleanup; } - /* Mark state as coming so strong_try_module_get() ignores us. */ - mod->state = MODULE_STATE_COMING; - - /* Now sew it into the lists so we can get lockdep and oops - * info during argument parsing. No one should access us, since - * strong_try_module_get() will fail. - * lockdep/oops can run asynchronous, so use the RCU list insertion - * function to insert in a way safe to concurrent readers. - * The mutex protects against concurrent writers. - */ -again: - mutex_lock(&module_mutex); - if ((old = find_module(mod->name)) != NULL) { - if (old->state == MODULE_STATE_COMING) { - /* Wait in case it fails to load. */ - mutex_unlock(&module_mutex); - err = wait_event_interruptible(module_wq, - finished_loading(mod->name)); - if (err) - goto free_arch_cleanup; - goto again; - } - err = -EEXIST; - goto unlock; - } - - /* This has to be done once we're sure module name is unique. */ dynamic_debug_setup(info->debug, info->num_debug); - /* Find duplicate symbols */ + mutex_lock(&module_mutex); + /* Find duplicate symbols (must be called under lock). */ err = verify_export_symbols(mod); if (err < 0) - goto ddebug; + goto ddebug_cleanup; + /* This relies on module_mutex for list integrity. */ module_bug_finalize(info->hdr, info->sechdrs, mod); - list_add_rcu(&mod->list, &modules); + + /* Mark state as coming so strong_try_module_get() ignores us, + * but kallsyms etc. can see us. */ + mod->state = MODULE_STATE_COMING; + mutex_unlock(&module_mutex); /* Module is ready to execute: parsing args may do that. */ err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, -32768, 32767, &ddebug_dyndbg_module_param_cb); if (err < 0) - goto unlink; + goto bug_cleanup; /* Link in to syfs. */ err = mod_sysfs_setup(mod, info, mod->kp, mod->num_kp); if (err < 0) - goto unlink; + goto bug_cleanup; /* Get rid of temporary copy. */ free_copy(info); @@ -3237,16 +3270,13 @@ again: return do_init_module(mod); - unlink: + bug_cleanup: + /* module_bug_cleanup needs module_mutex protection */ mutex_lock(&module_mutex); - /* Unlink carefully: kallsyms could be walking list. */ - list_del_rcu(&mod->list); module_bug_cleanup(mod); - wake_up_all(&module_wq); - ddebug: - dynamic_debug_remove(info->debug); - unlock: + ddebug_cleanup: mutex_unlock(&module_mutex); + dynamic_debug_remove(info->debug); synchronize_sched(); kfree(mod->args); free_arch_cleanup: @@ -3255,6 +3285,12 @@ again: free_modinfo(mod); free_unload: module_unload_free(mod); + unlink_mod: + mutex_lock(&module_mutex); + /* Unlink carefully: kallsyms could be walking list. */ + list_del_rcu(&mod->list); + wake_up_all(&module_wq); + mutex_unlock(&module_mutex); free_module: module_deallocate(mod, info); free_copy: @@ -3377,6 +3413,8 @@ const char *module_address_lookup(unsigned long addr, preempt_disable(); list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; if (within_module_init(addr, mod) || within_module_core(addr, mod)) { if (modname) @@ -3400,6 +3438,8 @@ int lookup_module_symbol_name(unsigned long addr, char *symname) preempt_disable(); list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; if (within_module_init(addr, mod) || within_module_core(addr, mod)) { const char *sym; @@ -3424,6 +3464,8 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, preempt_disable(); list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; if (within_module_init(addr, mod) || within_module_core(addr, mod)) { const char *sym; @@ -3451,6 +3493,8 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, preempt_disable(); list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; if (symnum < mod->num_symtab) { *value = mod->symtab[symnum].st_value; *type = mod->symtab[symnum].st_info; @@ -3493,9 +3537,12 @@ unsigned long module_kallsyms_lookup_name(const char *name) ret = mod_find_symname(mod, colon+1); *colon = ':'; } else { - list_for_each_entry_rcu(mod, &modules, list) + list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; if ((ret = mod_find_symname(mod, name)) != 0) break; + } } preempt_enable(); return ret; @@ -3510,6 +3557,8 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, int ret; list_for_each_entry(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; for (i = 0; i < mod->num_symtab; i++) { ret = fn(data, mod->strtab + mod->symtab[i].st_name, mod, mod->symtab[i].st_value); @@ -3525,6 +3574,7 @@ static char *module_flags(struct module *mod, char *buf) { int bx = 0; + BUG_ON(mod->state == MODULE_STATE_UNFORMED); if (mod->taints || mod->state == MODULE_STATE_GOING || mod->state == MODULE_STATE_COMING) { @@ -3566,6 +3616,10 @@ static int m_show(struct seq_file *m, void *p) struct module *mod = list_entry(p, struct module, list); char buf[8]; + /* We always ignore unformed modules. */ + if (mod->state == MODULE_STATE_UNFORMED) + return 0; + seq_printf(m, "%s %u", mod->name, mod->init_size + mod->core_size); print_unload_info(m, mod); @@ -3626,6 +3680,8 @@ const struct exception_table_entry *search_module_extables(unsigned long addr) preempt_disable(); list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; if (mod->num_exentries == 0) continue; @@ -3674,10 +3730,13 @@ struct module *__module_address(unsigned long addr) if (addr < module_addr_min || addr > module_addr_max) return NULL; - list_for_each_entry_rcu(mod, &modules, list) + list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; if (within_module_core(addr, mod) || within_module_init(addr, mod)) return mod; + } return NULL; } EXPORT_SYMBOL_GPL(__module_address); @@ -3730,8 +3789,11 @@ void print_modules(void) printk(KERN_DEFAULT "Modules linked in:"); /* Most callers should already have preempt disabled, but make sure */ preempt_disable(); - list_for_each_entry_rcu(mod, &modules, list) + list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; printk(" %s%s", mod->name, module_flags(mod, buf)); + } preempt_enable(); if (last_unloaded_module[0]) printk(" [last unloaded: %s]", last_unloaded_module); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 1599157336a6..6cbeaae4406d 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -117,11 +117,45 @@ void __ptrace_unlink(struct task_struct *child) * TASK_KILLABLE sleeps. */ if (child->jobctl & JOBCTL_STOP_PENDING || task_is_traced(child)) - signal_wake_up(child, task_is_traced(child)); + ptrace_signal_wake_up(child, true); spin_unlock(&child->sighand->siglock); } +/* Ensure that nothing can wake it up, even SIGKILL */ +static bool ptrace_freeze_traced(struct task_struct *task) +{ + bool ret = false; + + /* Lockless, nobody but us can set this flag */ + if (task->jobctl & JOBCTL_LISTENING) + return ret; + + spin_lock_irq(&task->sighand->siglock); + if (task_is_traced(task) && !__fatal_signal_pending(task)) { + task->state = __TASK_TRACED; + ret = true; + } + spin_unlock_irq(&task->sighand->siglock); + + return ret; +} + +static void ptrace_unfreeze_traced(struct task_struct *task) +{ + if (task->state != __TASK_TRACED) + return; + + WARN_ON(!task->ptrace || task->parent != current); + + spin_lock_irq(&task->sighand->siglock); + if (__fatal_signal_pending(task)) + wake_up_state(task, __TASK_TRACED); + else + task->state = TASK_TRACED; + spin_unlock_irq(&task->sighand->siglock); +} + /** * ptrace_check_attach - check whether ptracee is ready for ptrace operation * @child: ptracee to check for @@ -139,7 +173,7 @@ void __ptrace_unlink(struct task_struct *child) * RETURNS: * 0 on success, -ESRCH if %child is not ready. */ -int ptrace_check_attach(struct task_struct *child, bool ignore_state) +static int ptrace_check_attach(struct task_struct *child, bool ignore_state) { int ret = -ESRCH; @@ -151,24 +185,29 @@ int ptrace_check_attach(struct task_struct *child, bool ignore_state) * be changed by us so it's not changing right after this. */ read_lock(&tasklist_lock); - if ((child->ptrace & PT_PTRACED) && child->parent == current) { + if (child->ptrace && child->parent == current) { + WARN_ON(child->state == __TASK_TRACED); /* * child->sighand can't be NULL, release_task() * does ptrace_unlink() before __exit_signal(). */ - spin_lock_irq(&child->sighand->siglock); - WARN_ON_ONCE(task_is_stopped(child)); - if (ignore_state || (task_is_traced(child) && - !(child->jobctl & JOBCTL_LISTENING))) + if (ignore_state || ptrace_freeze_traced(child)) ret = 0; - spin_unlock_irq(&child->sighand->siglock); } read_unlock(&tasklist_lock); - if (!ret && !ignore_state) - ret = wait_task_inactive(child, TASK_TRACED) ? 0 : -ESRCH; + if (!ret && !ignore_state) { + if (!wait_task_inactive(child, __TASK_TRACED)) { + /* + * This can only happen if may_ptrace_stop() fails and + * ptrace_stop() changes ->state back to TASK_RUNNING, + * so we should not worry about leaking __TASK_TRACED. + */ + WARN_ON(child->state == __TASK_TRACED); + ret = -ESRCH; + } + } - /* All systems go.. */ return ret; } @@ -317,7 +356,7 @@ static int ptrace_attach(struct task_struct *task, long request, */ if (task_is_stopped(task) && task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) - signal_wake_up(task, 1); + signal_wake_up_state(task, __TASK_STOPPED); spin_unlock(&task->sighand->siglock); @@ -737,7 +776,7 @@ int ptrace_request(struct task_struct *child, long request, * tracee into STOP. */ if (likely(task_set_jobctl_pending(child, JOBCTL_TRAP_STOP))) - signal_wake_up(child, child->jobctl & JOBCTL_LISTENING); + ptrace_signal_wake_up(child, child->jobctl & JOBCTL_LISTENING); unlock_task_sighand(child, &flags); ret = 0; @@ -763,7 +802,7 @@ int ptrace_request(struct task_struct *child, long request, * start of this trap and now. Trigger re-trap. */ if (child->jobctl & JOBCTL_TRAP_NOTIFY) - signal_wake_up(child, true); + ptrace_signal_wake_up(child, true); ret = 0; } unlock_task_sighand(child, &flags); @@ -900,6 +939,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, goto out_put_task_struct; ret = arch_ptrace(child, request, addr, data); + if (ret || request != PTRACE_DETACH) + ptrace_unfreeze_traced(child); out_put_task_struct: put_task_struct(child); @@ -1039,8 +1080,11 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, ret = ptrace_check_attach(child, request == PTRACE_KILL || request == PTRACE_INTERRUPT); - if (!ret) + if (!ret) { ret = compat_arch_ptrace(child, request, addr, data); + if (ret || request != PTRACE_DETACH) + ptrace_unfreeze_traced(child); + } out_put_task_struct: put_task_struct(child); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 257002c13bb0..26058d0bebba 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1523,7 +1523,8 @@ out: */ int wake_up_process(struct task_struct *p) { - return try_to_wake_up(p, TASK_ALL, 0); + WARN_ON(task_is_stopped_or_traced(p)); + return try_to_wake_up(p, TASK_NORMAL, 0); } EXPORT_SYMBOL(wake_up_process); diff --git a/kernel/signal.c b/kernel/signal.c index 372771e948c2..3d09cf6cde75 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -680,23 +680,17 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) * No need to set need_resched since signal event passing * goes through ->blocked */ -void signal_wake_up(struct task_struct *t, int resume) +void signal_wake_up_state(struct task_struct *t, unsigned int state) { - unsigned int mask; - set_tsk_thread_flag(t, TIF_SIGPENDING); - /* - * For SIGKILL, we want to wake it up in the stopped/traced/killable + * TASK_WAKEKILL also means wake it up in the stopped/traced/killable * case. We don't check t->state here because there is a race with it * executing another processor and just now entering stopped state. * By using wake_up_state, we ensure the process will wake up and * handle its death signal. */ - mask = TASK_INTERRUPTIBLE; - if (resume) - mask |= TASK_WAKEKILL; - if (!wake_up_state(t, mask)) + if (!wake_up_state(t, state | TASK_INTERRUPTIBLE)) kick_process(t); } @@ -844,7 +838,7 @@ static void ptrace_trap_notify(struct task_struct *t) assert_spin_locked(&t->sighand->siglock); task_set_jobctl_pending(t, JOBCTL_TRAP_NOTIFY); - signal_wake_up(t, t->jobctl & JOBCTL_LISTENING); + ptrace_signal_wake_up(t, t->jobctl & JOBCTL_LISTENING); } /* @@ -1800,6 +1794,10 @@ static inline int may_ptrace_stop(void) * If SIGKILL was already sent before the caller unlocked * ->siglock we must see ->core_state != NULL. Otherwise it * is safe to enter schedule(). + * + * This is almost outdated, a task with the pending SIGKILL can't + * block in TASK_TRACED. But PTRACE_EVENT_EXIT can be reported + * after SIGKILL was already dequeued. */ if (unlikely(current->mm->core_state) && unlikely(current->mm == current->parent->mm)) @@ -1925,6 +1923,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) if (gstop_done) do_notify_parent_cldstop(current, false, why); + /* tasklist protects us from ptrace_freeze_traced() */ __set_current_state(TASK_RUNNING); if (clear_code) current->exit_code = 0; @@ -3116,8 +3115,9 @@ int __save_altstack(stack_t __user *uss, unsigned long sp) #ifdef CONFIG_COMPAT #ifdef CONFIG_GENERIC_SIGALTSTACK -asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr, - compat_stack_t __user *uoss_ptr) +COMPAT_SYSCALL_DEFINE2(sigaltstack, + const compat_stack_t __user *, uss_ptr, + compat_stack_t __user *, uoss_ptr) { stack_t uss, uoss; int ret; |