diff options
Diffstat (limited to 'kernel')
35 files changed, 469 insertions, 289 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index e4956244ae50..939500317066 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -74,6 +74,8 @@ static int audit_initialized; int audit_enabled; int audit_ever_enabled; +EXPORT_SYMBOL_GPL(audit_enabled); + /* Default state when kernel boots without any parameters. */ static int audit_default; @@ -671,9 +673,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) pid = NETLINK_CREDS(skb)->pid; uid = NETLINK_CREDS(skb)->uid; - loginuid = NETLINK_CB(skb).loginuid; - sessionid = NETLINK_CB(skb).sessionid; - sid = NETLINK_CB(skb).sid; + loginuid = audit_get_loginuid(current); + sessionid = audit_get_sessionid(current); + security_task_getsecid(current, &sid); seq = nlh->nlmsg_seq; data = NLMSG_DATA(nlh); diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index add2819af71b..f8277c80d678 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1238,6 +1238,7 @@ static int audit_filter_user_rules(struct netlink_skb_parms *cb, for (i = 0; i < rule->field_count; i++) { struct audit_field *f = &rule->fields[i]; int result = 0; + u32 sid; switch (f->type) { case AUDIT_PID: @@ -1250,19 +1251,22 @@ static int audit_filter_user_rules(struct netlink_skb_parms *cb, result = audit_comparator(cb->creds.gid, f->op, f->val); break; case AUDIT_LOGINUID: - result = audit_comparator(cb->loginuid, f->op, f->val); + result = audit_comparator(audit_get_loginuid(current), + f->op, f->val); break; case AUDIT_SUBJ_USER: case AUDIT_SUBJ_ROLE: case AUDIT_SUBJ_TYPE: case AUDIT_SUBJ_SEN: case AUDIT_SUBJ_CLR: - if (f->lsm_rule) - result = security_audit_rule_match(cb->sid, + if (f->lsm_rule) { + security_task_getsecid(current, &sid); + result = security_audit_rule_match(sid, f->type, f->op, f->lsm_rule, NULL); + } break; } diff --git a/kernel/capability.c b/kernel/capability.c index 2f05303715a5..9e9385f132c8 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -306,7 +306,7 @@ int capable(int cap) BUG(); } - if (security_capable(cap) == 0) { + if (security_capable(current_cred(), cap) == 0) { current->flags |= PF_SUPERPRIV; return 1; } diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 4349935c2ad8..e92e98189032 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1575,8 +1575,10 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, return -ENODEV; trialcs = alloc_trial_cpuset(cs); - if (!trialcs) - return -ENOMEM; + if (!trialcs) { + retval = -ENOMEM; + goto out; + } switch (cft->private) { case FILE_CPULIST: @@ -1591,6 +1593,7 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, } free_trial_cpuset(trialcs); +out: cgroup_unlock(); return retval; } diff --git a/kernel/cred.c b/kernel/cred.c index 6a1aa004e376..3a9d6dd53a6c 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -252,13 +252,13 @@ struct cred *cred_alloc_blank(void) #endif atomic_set(&new->usage, 1); +#ifdef CONFIG_DEBUG_CREDENTIALS + new->magic = CRED_MAGIC; +#endif if (security_cred_alloc_blank(new, GFP_KERNEL) < 0) goto error; -#ifdef CONFIG_DEBUG_CREDENTIALS - new->magic = CRED_MAGIC; -#endif return new; error: @@ -657,6 +657,8 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) validate_creds(old); *new = *old; + atomic_set(&new->usage, 1); + set_cred_subscribers(new, 0); get_uid(new->user); get_group_info(new->group_info); @@ -674,8 +676,6 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) if (security_prepare_creds(new, old, GFP_KERNEL) < 0) goto error; - atomic_set(&new->usage, 1); - set_cred_subscribers(new, 0); put_cred(old); validate_creds(new); return new; @@ -748,7 +748,11 @@ bool creds_are_invalid(const struct cred *cred) if (cred->magic != CRED_MAGIC) return true; #ifdef CONFIG_SECURITY_SELINUX - if (selinux_is_enabled()) { + /* + * cred->security == NULL if security_cred_alloc_blank() or + * security_prepare_creds() returned an error. + */ + if (selinux_is_enabled() && cred->security) { if ((unsigned long) cred->security < PAGE_SIZE) return true; if ((*(u32 *)cred->security & 0xffffff00) == diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 4571ae7e085a..99c3bc8a6fb4 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -3,6 +3,12 @@ */ #include <linux/irqdesc.h> +#ifdef CONFIG_SPARSE_IRQ +# define IRQ_BITMAP_BITS (NR_IRQS + 8196) +#else +# define IRQ_BITMAP_BITS NR_IRQS +#endif + extern int noirqdebug; #define irq_data_to_desc(data) container_of(data, struct irq_desc, irq_data) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 282f20230e67..2039bea31bdf 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -94,7 +94,7 @@ int nr_irqs = NR_IRQS; EXPORT_SYMBOL_GPL(nr_irqs); static DEFINE_MUTEX(sparse_irq_lock); -static DECLARE_BITMAP(allocated_irqs, NR_IRQS); +static DECLARE_BITMAP(allocated_irqs, IRQ_BITMAP_BITS); #ifdef CONFIG_SPARSE_IRQ @@ -217,6 +217,15 @@ int __init early_irq_init(void) initcnt = arch_probe_nr_irqs(); printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d %d\n", NR_IRQS, nr_irqs, initcnt); + if (WARN_ON(nr_irqs > IRQ_BITMAP_BITS)) + nr_irqs = IRQ_BITMAP_BITS; + + if (WARN_ON(initcnt > IRQ_BITMAP_BITS)) + initcnt = IRQ_BITMAP_BITS; + + if (initcnt > nr_irqs) + nr_irqs = initcnt; + for (i = 0; i < initcnt; i++) { desc = alloc_desc(i, node); set_bit(i, allocated_irqs); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 538fce2db51c..b0c900520517 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1182,7 +1182,7 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, if (retval) kfree(action); -#ifdef CONFIG_DEBUG_SHIRQ +#ifdef CONFIG_DEBUG_SHIRQ_FIXME if (!retval && (irqflags & IRQF_SHARED)) { /* * It's a shared IRQ -- the driver ought to be prepared for it diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index 1d2541940480..441fd629ff04 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c @@ -56,6 +56,7 @@ void move_masked_irq(int irq) void move_native_irq(int irq) { struct irq_desc *desc = irq_to_desc(irq); + bool masked; if (likely(!(desc->status & IRQ_MOVE_PENDING))) return; @@ -63,8 +64,15 @@ void move_native_irq(int irq) if (unlikely(desc->status & IRQ_DISABLED)) return; - desc->irq_data.chip->irq_mask(&desc->irq_data); + /* + * Be careful vs. already masked interrupts. If this is a + * threaded interrupt with ONESHOT set, we can end up with an + * interrupt storm. + */ + masked = desc->status & IRQ_MASKED; + if (!masked) + desc->irq_data.chip->irq_mask(&desc->irq_data); move_masked_irq(irq); - desc->irq_data.chip->irq_unmask(&desc->irq_data); + if (!masked) + desc->irq_data.chip->irq_unmask(&desc->irq_data); } - diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index 891115a929aa..dc49358b73fa 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c @@ -23,7 +23,7 @@ #ifdef CONFIG_HARDIRQS_SW_RESEND /* Bitmap to handle software resend of interrupts: */ -static DECLARE_BITMAP(irqs_resend, NR_IRQS); +static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS); /* * Run software resends of IRQ's diff --git a/kernel/module.c b/kernel/module.c index 34e00b708fad..efa290ea94bf 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2460,9 +2460,9 @@ static void find_module_sections(struct module *mod, struct load_info *info) #endif #ifdef CONFIG_TRACEPOINTS - mod->tracepoints = section_objs(info, "__tracepoints", - sizeof(*mod->tracepoints), - &mod->num_tracepoints); + mod->tracepoints_ptrs = section_objs(info, "__tracepoints_ptrs", + sizeof(*mod->tracepoints_ptrs), + &mod->num_tracepoints); #endif #ifdef HAVE_JUMP_LABEL mod->jump_entries = section_objs(info, "__jump_table", @@ -3393,7 +3393,7 @@ void module_layout(struct module *mod, struct modversion_info *ver, struct kernel_param *kp, struct kernel_symbol *ks, - struct tracepoint *tp) + struct tracepoint * const *tp) { } EXPORT_SYMBOL(module_layout); @@ -3407,8 +3407,8 @@ void module_update_tracepoints(void) mutex_lock(&module_mutex); list_for_each_entry(mod, &modules, list) if (!mod->taints) - tracepoint_update_probe_range(mod->tracepoints, - mod->tracepoints + mod->num_tracepoints); + tracepoint_update_probe_range(mod->tracepoints_ptrs, + mod->tracepoints_ptrs + mod->num_tracepoints); mutex_unlock(&module_mutex); } @@ -3432,8 +3432,8 @@ int module_get_iter_tracepoints(struct tracepoint_iter *iter) else if (iter_mod > iter->module) iter->tracepoint = NULL; found = tracepoint_get_iter_range(&iter->tracepoint, - iter_mod->tracepoints, - iter_mod->tracepoints + iter_mod->tracepoints_ptrs, + iter_mod->tracepoints_ptrs + iter_mod->num_tracepoints); if (found) { iter->module = iter_mod; diff --git a/kernel/params.c b/kernel/params.c index 08107d181758..0da1411222b9 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -719,9 +719,7 @@ void destroy_params(const struct kernel_param *params, unsigned num) params[i].ops->free(params[i].arg); } -static void __init kernel_add_sysfs_param(const char *name, - struct kernel_param *kparam, - unsigned int name_skip) +static struct module_kobject * __init locate_module_kobject(const char *name) { struct module_kobject *mk; struct kobject *kobj; @@ -729,10 +727,7 @@ static void __init kernel_add_sysfs_param(const char *name, kobj = kset_find_obj(module_kset, name); if (kobj) { - /* We already have one. Remove params so we can add more. */ mk = to_module_kobject(kobj); - /* We need to remove it before adding parameters. */ - sysfs_remove_group(&mk->kobj, &mk->mp->grp); } else { mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); BUG_ON(!mk); @@ -743,15 +738,36 @@ static void __init kernel_add_sysfs_param(const char *name, "%s", name); if (err) { kobject_put(&mk->kobj); - printk(KERN_ERR "Module '%s' failed add to sysfs, " - "error number %d\n", name, err); - printk(KERN_ERR "The system will be unstable now.\n"); - return; + printk(KERN_ERR + "Module '%s' failed add to sysfs, error number %d\n", + name, err); + printk(KERN_ERR + "The system will be unstable now.\n"); + return NULL; } - /* So that exit path is even. */ + + /* So that we hold reference in both cases. */ kobject_get(&mk->kobj); } + return mk; +} + +static void __init kernel_add_sysfs_param(const char *name, + struct kernel_param *kparam, + unsigned int name_skip) +{ + struct module_kobject *mk; + int err; + + mk = locate_module_kobject(name); + if (!mk) + return; + + /* We need to remove old parameters before adding more. */ + if (mk->mp) + sysfs_remove_group(&mk->kobj, &mk->mp->grp); + /* These should not fail at boot. */ err = add_sysfs_param(mk, kparam, kparam->name + name_skip); BUG_ON(err); @@ -796,6 +812,32 @@ static void __init param_sysfs_builtin(void) } } +ssize_t __modver_version_show(struct module_attribute *mattr, + struct module *mod, char *buf) +{ + struct module_version_attribute *vattr = + container_of(mattr, struct module_version_attribute, mattr); + + return sprintf(buf, "%s\n", vattr->version); +} + +extern struct module_version_attribute __start___modver[], __stop___modver[]; + +static void __init version_sysfs_builtin(void) +{ + const struct module_version_attribute *vattr; + struct module_kobject *mk; + int err; + + for (vattr = __start___modver; vattr < __stop___modver; vattr++) { + mk = locate_module_kobject(vattr->module_name); + if (mk) { + err = sysfs_create_file(&mk->kobj, &vattr->mattr.attr); + kobject_uevent(&mk->kobj, KOBJ_ADD); + kobject_put(&mk->kobj); + } + } +} /* module-related sysfs stuff */ @@ -875,6 +917,7 @@ static int __init param_sysfs_init(void) } module_sysfs_initialized = 1; + version_sysfs_builtin(); param_sysfs_builtin(); return 0; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 84522c796987..656222fcf767 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -782,6 +782,10 @@ retry: raw_spin_unlock_irq(&ctx->lock); } +#define MAX_INTERRUPTS (~0ULL) + +static void perf_log_throttle(struct perf_event *event, int enable); + static int event_sched_in(struct perf_event *event, struct perf_cpu_context *cpuctx, @@ -794,6 +798,17 @@ event_sched_in(struct perf_event *event, event->state = PERF_EVENT_STATE_ACTIVE; event->oncpu = smp_processor_id(); + + /* + * Unthrottle events, since we scheduled we might have missed several + * ticks already, also for a heavily scheduling task there is little + * guarantee it'll get a tick in a timely manner. + */ + if (unlikely(event->hw.interrupts == MAX_INTERRUPTS)) { + perf_log_throttle(event, 1); + event->hw.interrupts = 0; + } + /* * The new state must be visible before we turn it on in the hardware: */ @@ -1596,10 +1611,6 @@ void __perf_event_task_sched_in(struct task_struct *task) } } -#define MAX_INTERRUPTS (~0ULL) - -static void perf_log_throttle(struct perf_event *event, int enable); - static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) { u64 frequency = event->attr.sample_freq; @@ -1901,11 +1912,12 @@ static void __perf_event_read(void *info) return; raw_spin_lock(&ctx->lock); - update_context_time(ctx); + if (ctx->is_active) + update_context_time(ctx); update_event_times(event); + if (event->state == PERF_EVENT_STATE_ACTIVE) + event->pmu->read(event); raw_spin_unlock(&ctx->lock); - - event->pmu->read(event); } static inline u64 perf_event_count(struct perf_event *event) @@ -1999,8 +2011,7 @@ static int alloc_callchain_buffers(void) * accessed from NMI. Use a temporary manual per cpu allocation * until that gets sorted out. */ - size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) * - num_possible_cpus(); + size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]); entries = kzalloc(size, GFP_KERNEL); if (!entries) @@ -2201,13 +2212,6 @@ find_lively_task_by_vpid(pid_t vpid) if (!task) return ERR_PTR(-ESRCH); - /* - * Can't attach events to a dying task. - */ - err = -ESRCH; - if (task->flags & PF_EXITING) - goto errout; - /* Reuse ptrace permission checks for now. */ err = -EACCES; if (!ptrace_may_access(task, PTRACE_MODE_READ)) @@ -2268,14 +2272,27 @@ retry: get_ctx(ctx); - if (cmpxchg(&task->perf_event_ctxp[ctxn], NULL, ctx)) { - /* - * We raced with some other task; use - * the context they set. - */ + err = 0; + mutex_lock(&task->perf_event_mutex); + /* + * If it has already passed perf_event_exit_task(). + * we must see PF_EXITING, it takes this mutex too. + */ + if (task->flags & PF_EXITING) + err = -ESRCH; + else if (task->perf_event_ctxp[ctxn]) + err = -EAGAIN; + else + rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx); + mutex_unlock(&task->perf_event_mutex); + + if (unlikely(err)) { put_task_struct(task); kfree(ctx); - goto retry; + + if (err == -EAGAIN) + goto retry; + goto errout; } } @@ -5374,6 +5391,8 @@ free_dev: goto out; } +static struct lock_class_key cpuctx_mutex; + int perf_pmu_register(struct pmu *pmu, char *name, int type) { int cpu, ret; @@ -5422,6 +5441,7 @@ skip_type: cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); __perf_event_init_context(&cpuctx->ctx); + lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex); cpuctx->ctx.type = cpu_context; cpuctx->ctx.pmu = pmu; cpuctx->jiffies_interval = 1; @@ -6127,7 +6147,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) * scheduled, so we are now safe from rescheduling changing * our context. */ - child_ctx = child->perf_event_ctxp[ctxn]; + child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]); task_ctx_sched_out(child_ctx, EVENT_ALL); /* @@ -6440,11 +6460,6 @@ int perf_event_init_context(struct task_struct *child, int ctxn) unsigned long flags; int ret = 0; - child->perf_event_ctxp[ctxn] = NULL; - - mutex_init(&child->perf_event_mutex); - INIT_LIST_HEAD(&child->perf_event_list); - if (likely(!parent->perf_event_ctxp[ctxn])) return 0; @@ -6533,6 +6548,10 @@ int perf_event_init_task(struct task_struct *child) { int ctxn, ret; + memset(child->perf_event_ctxp, 0, sizeof(child->perf_event_ctxp)); + mutex_init(&child->perf_event_mutex); + INIT_LIST_HEAD(&child->perf_event_list); + for_each_task_context_nr(ctxn) { ret = perf_event_init_context(child, ctxn); if (ret) diff --git a/kernel/power/main.c b/kernel/power/main.c index 7b5db6a8561e..701853042c28 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -326,7 +326,7 @@ EXPORT_SYMBOL_GPL(pm_wq); static int __init pm_start_workqueue(void) { - pm_wq = alloc_workqueue("pm", WQ_FREEZEABLE, 0); + pm_wq = alloc_workqueue("pm", WQ_FREEZABLE, 0); return pm_wq ? 0 : -ENOMEM; } diff --git a/kernel/power/process.c b/kernel/power/process.c index d6d2a10320e0..0cf3a27a6c9d 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -22,7 +22,7 @@ */ #define TIMEOUT (20 * HZ) -static inline int freezeable(struct task_struct * p) +static inline int freezable(struct task_struct * p) { if ((p == current) || (p->flags & PF_NOFREEZE) || @@ -53,7 +53,7 @@ static int try_to_freeze_tasks(bool sig_only) todo = 0; read_lock(&tasklist_lock); do_each_thread(g, p) { - if (frozen(p) || !freezeable(p)) + if (frozen(p) || !freezable(p)) continue; if (!freeze_task(p, sig_only)) @@ -167,7 +167,7 @@ static void thaw_tasks(bool nosig_only) read_lock(&tasklist_lock); do_each_thread(g, p) { - if (!freezeable(p)) + if (!freezable(p)) continue; if (nosig_only && should_send_signal(p)) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 0dac75ea4456..64db648ff911 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1519,11 +1519,8 @@ static int swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, unsigned int nr_pages, unsigned int nr_highmem) { - int error = 0; - if (nr_highmem > 0) { - error = get_highmem_buffer(PG_ANY); - if (error) + if (get_highmem_buffer(PG_ANY)) goto err_out; if (nr_highmem > alloc_highmem) { nr_highmem -= alloc_highmem; @@ -1546,7 +1543,7 @@ swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, err_out: swsusp_free(); - return error; + return -ENOMEM; } asmlinkage int swsusp_save(void) diff --git a/kernel/printk.c b/kernel/printk.c index 53d9a9ec88e6..36231525e22f 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -97,7 +97,7 @@ static int console_locked, console_suspended; /* * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars * It is also used in interesting ways to provide interlocking in - * release_console_sem(). + * console_unlock();. */ static DEFINE_SPINLOCK(logbuf_lock); @@ -262,25 +262,47 @@ int dmesg_restrict = 1; int dmesg_restrict; #endif +static int syslog_action_restricted(int type) +{ + if (dmesg_restrict) + return 1; + /* Unless restricted, we allow "read all" and "get buffer size" for everybody */ + return type != SYSLOG_ACTION_READ_ALL && type != SYSLOG_ACTION_SIZE_BUFFER; +} + +static int check_syslog_permissions(int type, bool from_file) +{ + /* + * If this is from /proc/kmsg and we've already opened it, then we've + * already done the capabilities checks at open time. + */ + if (from_file && type != SYSLOG_ACTION_OPEN) + return 0; + + if (syslog_action_restricted(type)) { + if (capable(CAP_SYSLOG)) + return 0; + /* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */ + if (capable(CAP_SYS_ADMIN)) { + WARN_ONCE(1, "Attempt to access syslog with CAP_SYS_ADMIN " + "but no CAP_SYSLOG (deprecated).\n"); + return 0; + } + return -EPERM; + } + return 0; +} + int do_syslog(int type, char __user *buf, int len, bool from_file) { unsigned i, j, limit, count; int do_clear = 0; char c; - int error = 0; + int error; - /* - * If this is from /proc/kmsg we only do the capabilities checks - * at open time. - */ - if (type == SYSLOG_ACTION_OPEN || !from_file) { - if (dmesg_restrict && !capable(CAP_SYSLOG)) - goto warn; /* switch to return -EPERM after 2.6.39 */ - if ((type != SYSLOG_ACTION_READ_ALL && - type != SYSLOG_ACTION_SIZE_BUFFER) && - !capable(CAP_SYSLOG)) - goto warn; /* switch to return -EPERM after 2.6.39 */ - } + error = check_syslog_permissions(type, from_file); + if (error) + goto out; error = security_syslog(type); if (error) @@ -423,12 +445,6 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) } out: return error; -warn: - /* remove after 2.6.39 */ - if (capable(CAP_SYS_ADMIN)) - WARN_ONCE(1, "Attempt to access syslog with CAP_SYS_ADMIN " - "but no CAP_SYSLOG (deprecated and denied).\n"); - return -EPERM; } SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) @@ -501,7 +517,7 @@ static void _call_console_drivers(unsigned start, /* * Call the console drivers, asking them to write out * log_buf[start] to log_buf[end - 1]. - * The console_sem must be held. + * The console_lock must be held. */ static void call_console_drivers(unsigned start, unsigned end) { @@ -604,11 +620,11 @@ static int have_callable_console(void) * * This is printk(). It can be called from any context. We want it to work. * - * We try to grab the console_sem. If we succeed, it's easy - we log the output and + * We try to grab the console_lock. If we succeed, it's easy - we log the output and * call the console drivers. If we fail to get the semaphore we place the output * into the log buffer and return. The current holder of the console_sem will - * notice the new output in release_console_sem() and will send it to the - * consoles before releasing the semaphore. + * notice the new output in console_unlock(); and will send it to the + * consoles before releasing the lock. * * One effect of this deferred printing is that code which calls printk() and * then changes console_loglevel may break. This is because console_loglevel @@ -659,19 +675,19 @@ static inline int can_use_console(unsigned int cpu) /* * Try to get console ownership to actually show the kernel * messages from a 'printk'. Return true (and with the - * console_semaphore held, and 'console_locked' set) if it + * console_lock held, and 'console_locked' set) if it * is successful, false otherwise. * * This gets called with the 'logbuf_lock' spinlock held and * interrupts disabled. It should return with 'lockbuf_lock' * released but interrupts still disabled. */ -static int acquire_console_semaphore_for_printk(unsigned int cpu) +static int console_trylock_for_printk(unsigned int cpu) __releases(&logbuf_lock) { int retval = 0; - if (!try_acquire_console_sem()) { + if (console_trylock()) { retval = 1; /* @@ -827,12 +843,12 @@ asmlinkage int vprintk(const char *fmt, va_list args) * actual magic (print out buffers, wake up klogd, * etc). * - * The acquire_console_semaphore_for_printk() function + * The console_trylock_for_printk() function * will release 'logbuf_lock' regardless of whether it * actually gets the semaphore or not. */ - if (acquire_console_semaphore_for_printk(this_cpu)) - release_console_sem(); + if (console_trylock_for_printk(this_cpu)) + console_unlock(); lockdep_on(); out_restore_irqs: @@ -993,7 +1009,7 @@ void suspend_console(void) if (!console_suspend_enabled) return; printk("Suspending console(s) (use no_console_suspend to debug)\n"); - acquire_console_sem(); + console_lock(); console_suspended = 1; up(&console_sem); } @@ -1004,7 +1020,7 @@ void resume_console(void) return; down(&console_sem); console_suspended = 0; - release_console_sem(); + console_unlock(); } /** @@ -1027,21 +1043,21 @@ static int __cpuinit console_cpu_notify(struct notifier_block *self, case CPU_DYING: case CPU_DOWN_FAILED: case CPU_UP_CANCELED: - acquire_console_sem(); - release_console_sem(); + console_lock(); + console_unlock(); } return NOTIFY_OK; } /** - * acquire_console_sem - lock the console system for exclusive use. + * console_lock - lock the console system for exclusive use. * - * Acquires a semaphore which guarantees that the caller has + * Acquires a lock which guarantees that the caller has * exclusive access to the console system and the console_drivers list. * * Can sleep, returns nothing. */ -void acquire_console_sem(void) +void console_lock(void) { BUG_ON(in_interrupt()); down(&console_sem); @@ -1050,21 +1066,29 @@ void acquire_console_sem(void) console_locked = 1; console_may_schedule = 1; } -EXPORT_SYMBOL(acquire_console_sem); +EXPORT_SYMBOL(console_lock); -int try_acquire_console_sem(void) +/** + * console_trylock - try to lock the console system for exclusive use. + * + * Tried to acquire a lock which guarantees that the caller has + * exclusive access to the console system and the console_drivers list. + * + * returns 1 on success, and 0 on failure to acquire the lock. + */ +int console_trylock(void) { if (down_trylock(&console_sem)) - return -1; + return 0; if (console_suspended) { up(&console_sem); - return -1; + return 0; } console_locked = 1; console_may_schedule = 0; - return 0; + return 1; } -EXPORT_SYMBOL(try_acquire_console_sem); +EXPORT_SYMBOL(console_trylock); int is_console_locked(void) { @@ -1095,20 +1119,20 @@ void wake_up_klogd(void) } /** - * release_console_sem - unlock the console system + * console_unlock - unlock the console system * - * Releases the semaphore which the caller holds on the console system + * Releases the console_lock which the caller holds on the console system * and the console driver list. * - * While the semaphore was held, console output may have been buffered - * by printk(). If this is the case, release_console_sem() emits - * the output prior to releasing the semaphore. + * While the console_lock was held, console output may have been buffered + * by printk(). If this is the case, console_unlock(); emits + * the output prior to releasing the lock. * * If there is output waiting for klogd, we wake it up. * - * release_console_sem() may be called from any context. + * console_unlock(); may be called from any context. */ -void release_console_sem(void) +void console_unlock(void) { unsigned long flags; unsigned _con_start, _log_end; @@ -1141,7 +1165,7 @@ void release_console_sem(void) if (wake_klogd) wake_up_klogd(); } -EXPORT_SYMBOL(release_console_sem); +EXPORT_SYMBOL(console_unlock); /** * console_conditional_schedule - yield the CPU if required @@ -1150,7 +1174,7 @@ EXPORT_SYMBOL(release_console_sem); * if this CPU should yield the CPU to another task, do * so here. * - * Must be called within acquire_console_sem(). + * Must be called within console_lock();. */ void __sched console_conditional_schedule(void) { @@ -1171,14 +1195,14 @@ void console_unblank(void) if (down_trylock(&console_sem) != 0) return; } else - acquire_console_sem(); + console_lock(); console_locked = 1; console_may_schedule = 0; for_each_console(c) if ((c->flags & CON_ENABLED) && c->unblank) c->unblank(); - release_console_sem(); + console_unlock(); } /* @@ -1189,7 +1213,7 @@ struct tty_driver *console_device(int *index) struct console *c; struct tty_driver *driver = NULL; - acquire_console_sem(); + console_lock(); for_each_console(c) { if (!c->device) continue; @@ -1197,7 +1221,7 @@ struct tty_driver *console_device(int *index) if (driver) break; } - release_console_sem(); + console_unlock(); return driver; } @@ -1208,17 +1232,17 @@ struct tty_driver *console_device(int *index) */ void console_stop(struct console *console) { - acquire_console_sem(); + console_lock(); console->flags &= ~CON_ENABLED; - release_console_sem(); + console_unlock(); } EXPORT_SYMBOL(console_stop); void console_start(struct console *console) { - acquire_console_sem(); + console_lock(); console->flags |= CON_ENABLED; - release_console_sem(); + console_unlock(); } EXPORT_SYMBOL(console_start); @@ -1340,7 +1364,7 @@ void register_console(struct console *newcon) * Put this console in the list - keep the * preferred driver at the head of the list. */ - acquire_console_sem(); + console_lock(); if ((newcon->flags & CON_CONSDEV) || console_drivers == NULL) { newcon->next = console_drivers; console_drivers = newcon; @@ -1352,14 +1376,14 @@ void register_console(struct console *newcon) } if (newcon->flags & CON_PRINTBUFFER) { /* - * release_console_sem() will print out the buffered messages + * console_unlock(); will print out the buffered messages * for us. */ spin_lock_irqsave(&logbuf_lock, flags); con_start = log_start; spin_unlock_irqrestore(&logbuf_lock, flags); } - release_console_sem(); + console_unlock(); console_sysfs_notify(); /* @@ -1396,7 +1420,7 @@ int unregister_console(struct console *console) return braille_unregister_console(console); #endif - acquire_console_sem(); + console_lock(); if (console_drivers == console) { console_drivers=console->next; res = 0; @@ -1418,7 +1442,7 @@ int unregister_console(struct console *console) if (console_drivers != NULL && console->flags & CON_CONSDEV) console_drivers->flags |= CON_CONSDEV; - release_console_sem(); + console_unlock(); console_sysfs_notify(); return res; } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 99bbaa3e5b0d..e2302e40b360 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -163,7 +163,7 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode) return !err; } -int ptrace_attach(struct task_struct *task) +static int ptrace_attach(struct task_struct *task) { int retval; @@ -219,7 +219,7 @@ out: * Performs checks and sets PT_PTRACED. * Should be used by all ptrace implementations for PTRACE_TRACEME. */ -int ptrace_traceme(void) +static int ptrace_traceme(void) { int ret = -EPERM; @@ -293,7 +293,7 @@ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p) return false; } -int ptrace_detach(struct task_struct *child, unsigned int data) +static int ptrace_detach(struct task_struct *child, unsigned int data) { bool dead = false; @@ -313,7 +313,7 @@ int ptrace_detach(struct task_struct *child, unsigned int data) child->exit_code = data; dead = __ptrace_detach(current, child); if (!child->exit_state) - wake_up_process(child); + wake_up_state(child, TASK_TRACED | TASK_STOPPED); } write_unlock_irq(&tasklist_lock); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 77e9166d7bbf..0c26e2df450e 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -699,7 +699,8 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) cfs_rq->nr_running--; } -#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED +#ifdef CONFIG_FAIR_GROUP_SCHED +# ifdef CONFIG_SMP static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq, int global_update) { @@ -721,10 +722,10 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) u64 now, delta; unsigned long load = cfs_rq->load.weight; - if (!cfs_rq) + if (cfs_rq->tg == &root_task_group) return; - now = rq_of(cfs_rq)->clock; + now = rq_of(cfs_rq)->clock_task; delta = now - cfs_rq->load_stamp; /* truncate load history at 4 idle periods */ @@ -762,6 +763,51 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) list_del_leaf_cfs_rq(cfs_rq); } +static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, + long weight_delta) +{ + long load_weight, load, shares; + + load = cfs_rq->load.weight + weight_delta; + + load_weight = atomic_read(&tg->load_weight); + load_weight -= cfs_rq->load_contribution; + load_weight += load; + + shares = (tg->shares * load); + if (load_weight) + shares /= load_weight; + + if (shares < MIN_SHARES) + shares = MIN_SHARES; + if (shares > tg->shares) + shares = tg->shares; + + return shares; +} + +static void update_entity_shares_tick(struct cfs_rq *cfs_rq) +{ + if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) { + update_cfs_load(cfs_rq, 0); + update_cfs_shares(cfs_rq, 0); + } +} +# else /* CONFIG_SMP */ +static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) +{ +} + +static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, + long weight_delta) +{ + return tg->shares; +} + +static inline void update_entity_shares_tick(struct cfs_rq *cfs_rq) +{ +} +# endif /* CONFIG_SMP */ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, unsigned long weight) { @@ -782,41 +828,20 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) { struct task_group *tg; struct sched_entity *se; - long load_weight, load, shares; - - if (!cfs_rq) - return; + long shares; tg = cfs_rq->tg; se = tg->se[cpu_of(rq_of(cfs_rq))]; if (!se) return; - - load = cfs_rq->load.weight + weight_delta; - - load_weight = atomic_read(&tg->load_weight); - load_weight -= cfs_rq->load_contribution; - load_weight += load; - - shares = (tg->shares * load); - if (load_weight) - shares /= load_weight; - - if (shares < MIN_SHARES) - shares = MIN_SHARES; - if (shares > tg->shares) - shares = tg->shares; +#ifndef CONFIG_SMP + if (likely(se->load.weight == tg->shares)) + return; +#endif + shares = calc_cfs_shares(cfs_rq, tg, weight_delta); reweight_entity(cfs_rq_of(se), se, shares); } - -static void update_entity_shares_tick(struct cfs_rq *cfs_rq) -{ - if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) { - update_cfs_load(cfs_rq, 0); - update_cfs_shares(cfs_rq, 0); - } -} #else /* CONFIG_FAIR_GROUP_SCHED */ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) { @@ -1404,7 +1429,7 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu, static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) { - unsigned long this_load, load; + s64 this_load, load; int idx, this_cpu, prev_cpu; unsigned long tl_per_task; struct task_group *tg; @@ -1443,8 +1468,8 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) * Otherwise check if either cpus are near enough in load to allow this * task to be woken on this_cpu. */ - if (this_load) { - unsigned long this_eff_load, prev_eff_load; + if (this_load > 0) { + s64 this_eff_load, prev_eff_load; this_eff_load = 100; this_eff_load *= power_of(prev_cpu); diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index c914ec747ca6..01f75a5f17af 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -210,11 +210,12 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se); static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) { - int this_cpu = smp_processor_id(); struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; struct sched_rt_entity *rt_se; - rt_se = rt_rq->tg->rt_se[this_cpu]; + int cpu = cpu_of(rq_of_rt_rq(rt_rq)); + + rt_se = rt_rq->tg->rt_se[cpu]; if (rt_rq->rt_nr_running) { if (rt_se && !on_rt_rq(rt_se)) @@ -226,10 +227,10 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) { - int this_cpu = smp_processor_id(); struct sched_rt_entity *rt_se; + int cpu = cpu_of(rq_of_rt_rq(rt_rq)); - rt_se = rt_rq->tg->rt_se[this_cpu]; + rt_se = rt_rq->tg->rt_se[cpu]; if (rt_se && on_rt_rq(rt_se)) dequeue_rt_entity(rt_se); @@ -565,8 +566,11 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) if (rt_rq->rt_time || rt_rq->rt_nr_running) idle = 0; raw_spin_unlock(&rt_rq->rt_runtime_lock); - } else if (rt_rq->rt_nr_running) + } else if (rt_rq->rt_nr_running) { idle = 0; + if (!rt_rq_throttled(rt_rq)) + enqueue = 1; + } if (enqueue) sched_rt_rq_enqueue(rt_rq); @@ -625,7 +629,7 @@ static void update_curr_rt(struct rq *rq) struct rt_rq *rt_rq = rt_rq_of_se(rt_se); u64 delta_exec; - if (!task_has_rt_policy(curr)) + if (curr->sched_class != &rt_sched_class) return; delta_exec = rq->clock_task - curr->se.exec_start; diff --git a/kernel/sys.c b/kernel/sys.c index 31b71a276b40..18da702ec813 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1385,7 +1385,8 @@ static int check_prlimit_permission(struct task_struct *task) const struct cred *cred = current_cred(), *tcred; tcred = __task_cred(task); - if ((cred->uid != tcred->euid || + if (current != task && + (cred->uid != tcred->euid || cred->uid != tcred->suid || cred->uid != tcred->uid || cred->gid != tcred->egid || diff --git a/kernel/sysctl.c b/kernel/sysctl.c index bc86bb32e126..4eed0af5d144 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -170,7 +170,8 @@ static int proc_taint(struct ctl_table *table, int write, #endif #ifdef CONFIG_MAGIC_SYSRQ -static int __sysrq_enabled; /* Note: sysrq code ises it's own private copy */ +/* Note: sysrq code uses it's own private copy */ +static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE; static int sysrq_sysctl_handler(ctl_table *table, int write, void __user *buffer, size_t *lenp, @@ -193,9 +194,9 @@ static int sysrq_sysctl_handler(ctl_table *table, int write, static struct ctl_table root_table[]; static struct ctl_table_root sysctl_table_root; static struct ctl_table_header root_table_header = { - .count = 1, + {{.count = 1, .ctl_table = root_table, - .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list), + .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, .root = &sysctl_table_root, .set = &sysctl_table_root.default_set, }; @@ -1566,11 +1567,16 @@ void sysctl_head_get(struct ctl_table_header *head) spin_unlock(&sysctl_lock); } +static void free_head(struct rcu_head *rcu) +{ + kfree(container_of(rcu, struct ctl_table_header, rcu)); +} + void sysctl_head_put(struct ctl_table_header *head) { spin_lock(&sysctl_lock); if (!--head->count) - kfree(head); + call_rcu(&head->rcu, free_head); spin_unlock(&sysctl_lock); } @@ -1947,10 +1953,10 @@ void unregister_sysctl_table(struct ctl_table_header * header) start_unregistering(header); if (!--header->parent->count) { WARN_ON(1); - kfree(header->parent); + call_rcu(&header->parent->rcu, free_head); } if (!--header->count) - kfree(header); + call_rcu(&header->rcu, free_head); spin_unlock(&sysctl_lock); } diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 48b2761b5668..a3b5aff62606 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -600,4 +600,14 @@ int tick_broadcast_oneshot_active(void) return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT; } +/* + * Check whether the broadcast device supports oneshot. + */ +bool tick_broadcast_oneshot_available(void) +{ + struct clock_event_device *bc = tick_broadcast_device.evtdev; + + return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false; +} + #endif diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 051bc80a0c43..ed228ef6f6b8 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -51,7 +51,11 @@ int tick_is_oneshot_available(void) { struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); - return dev && (dev->features & CLOCK_EVT_FEAT_ONESHOT); + if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT)) + return 0; + if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) + return 1; + return tick_broadcast_oneshot_available(); } /* diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 290eefbc1f60..f65d3a723a64 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -36,6 +36,7 @@ extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); extern int tick_broadcast_oneshot_active(void); extern void tick_check_oneshot_broadcast(int cpu); +bool tick_broadcast_oneshot_available(void); # else /* BROADCAST */ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { @@ -46,6 +47,7 @@ static inline void tick_broadcast_switch_to_oneshot(void) { } static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } static inline int tick_broadcast_oneshot_active(void) { return 0; } static inline void tick_check_oneshot_broadcast(int cpu) { } +static inline bool tick_broadcast_oneshot_available(void) { return true; } # endif /* !BROADCAST */ #else /* !ONESHOT */ @@ -76,6 +78,7 @@ static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc) return 0; } static inline int tick_broadcast_oneshot_active(void) { return 0; } +static inline bool tick_broadcast_oneshot_available(void) { return false; } #endif /* !TICK_ONESHOT */ /* diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3e216e01bbd1..c55ea2433471 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -642,8 +642,7 @@ static void tick_nohz_switch_to_nohz(void) } local_irq_enable(); - printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", - smp_processor_id()); + printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id()); } /* @@ -795,8 +794,10 @@ void tick_setup_sched_timer(void) } #ifdef CONFIG_NO_HZ - if (tick_nohz_enabled) + if (tick_nohz_enabled) { ts->nohz_mode = NOHZ_MODE_HIGHRES; + printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id()); + } #endif } #endif /* HIGH_RES_TIMERS */ diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 32a19f9397fc..3258455549f4 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -41,7 +41,7 @@ static void print_name_offset(struct seq_file *m, void *sym) char symname[KSYM_NAME_LEN]; if (lookup_symbol_name((unsigned long)sym, symname) < 0) - SEQ_printf(m, "<%p>", sym); + SEQ_printf(m, "<%pK>", sym); else SEQ_printf(m, "%s", symname); } @@ -112,7 +112,7 @@ next_one: static void print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now) { - SEQ_printf(m, " .base: %p\n", base); + SEQ_printf(m, " .base: %pK\n", base); SEQ_printf(m, " .index: %d\n", base->index); SEQ_printf(m, " .resolution: %Lu nsecs\n", diff --git a/kernel/timer.c b/kernel/timer.c index 43ca9936f2d0..d6459923d245 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -959,7 +959,7 @@ EXPORT_SYMBOL(try_to_del_timer_sync); * * Synchronization rules: Callers must prevent restarting of the timer, * otherwise this function is meaningless. It must not be called from - * hardirq contexts. The caller must not hold locks which would prevent + * interrupt contexts. The caller must not hold locks which would prevent * completion of the timer's handler. The timer's handler must not call * add_timer_on(). Upon exit the timer is not queued and the handler is * not running on any CPU. @@ -969,10 +969,12 @@ EXPORT_SYMBOL(try_to_del_timer_sync); int del_timer_sync(struct timer_list *timer) { #ifdef CONFIG_LOCKDEP - local_bh_disable(); + unsigned long flags; + + local_irq_save(flags); lock_map_acquire(&timer->lockdep_map); lock_map_release(&timer->lockdep_map); - local_bh_enable(); + local_irq_restore(flags); #endif /* * don't use it in hardirq context, because it diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 153562d0b93c..cbafed7d4f38 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -138,6 +138,13 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...) !blk_tracer_enabled)) return; + /* + * If the BLK_TC_NOTIFY action mask isn't set, don't send any note + * message to the trace. + */ + if (!(bt->act_mask & BLK_TC_NOTIFY)) + return; + local_irq_save(flags); buf = per_cpu_ptr(bt->msg_data, smp_processor_id()); va_start(args, fmt); @@ -1820,21 +1827,5 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) rwbs[i] = '\0'; } -void blk_fill_rwbs_rq(char *rwbs, struct request *rq) -{ - int rw = rq->cmd_flags & 0x03; - int bytes; - - if (rq->cmd_flags & REQ_DISCARD) - rw |= REQ_DISCARD; - - if (rq->cmd_flags & REQ_SECURE) - rw |= REQ_SECURE; - - bytes = blk_rq_bytes(rq); - - blk_fill_rwbs(rwbs, rw, bytes); -} - #endif /* CONFIG_EVENT_TRACING */ diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 35fde09b81de..5f499e0438a4 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1284,7 +1284,7 @@ trace_create_file_ops(struct module *mod) static void trace_module_add_events(struct module *mod) { struct ftrace_module_file_ops *file_ops = NULL; - struct ftrace_event_call *call, *start, *end; + struct ftrace_event_call **call, **start, **end; start = mod->trace_events; end = mod->trace_events + mod->num_trace_events; @@ -1297,7 +1297,7 @@ static void trace_module_add_events(struct module *mod) return; for_each_event(call, start, end) { - __trace_add_event_call(call, mod, + __trace_add_event_call(*call, mod, &file_ops->id, &file_ops->enable, &file_ops->filter, &file_ops->format); } @@ -1367,8 +1367,8 @@ static struct notifier_block trace_module_nb = { .priority = 0, }; -extern struct ftrace_event_call __start_ftrace_events[]; -extern struct ftrace_event_call __stop_ftrace_events[]; +extern struct ftrace_event_call *__start_ftrace_events[]; +extern struct ftrace_event_call *__stop_ftrace_events[]; static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata; @@ -1384,7 +1384,7 @@ __setup("trace_event=", setup_trace_event); static __init int event_trace_init(void) { - struct ftrace_event_call *call; + struct ftrace_event_call **call; struct dentry *d_tracer; struct dentry *entry; struct dentry *d_events; @@ -1430,7 +1430,7 @@ static __init int event_trace_init(void) pr_warning("tracing: Failed to allocate common fields"); for_each_event(call, __start_ftrace_events, __stop_ftrace_events) { - __trace_add_event_call(call, NULL, &ftrace_event_id_fops, + __trace_add_event_call(*call, NULL, &ftrace_event_id_fops, &ftrace_enable_fops, &ftrace_event_filter_fops, &ftrace_event_format_fops); diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 4b74d71705c0..bbeec31e0ae3 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -161,13 +161,13 @@ struct ftrace_event_class event_class_ftrace_##call = { \ .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\ }; \ \ -struct ftrace_event_call __used \ -__attribute__((__aligned__(4))) \ -__attribute__((section("_ftrace_events"))) event_##call = { \ +struct ftrace_event_call __used event_##call = { \ .name = #call, \ .event.type = etype, \ .class = &event_class_ftrace_##call, \ .print_fmt = print, \ }; \ +struct ftrace_event_call __used \ +__attribute__((section("_ftrace_events"))) *__event_##call = &event_##call; #include "trace_entries.h" diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index b706529b4fc7..5c9fe08d2093 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -55,20 +55,21 @@ struct ftrace_event_class event_class_syscall_exit = { .raw_init = init_syscall_trace, }; -extern unsigned long __start_syscalls_metadata[]; -extern unsigned long __stop_syscalls_metadata[]; +extern struct syscall_metadata *__start_syscalls_metadata[]; +extern struct syscall_metadata *__stop_syscalls_metadata[]; static struct syscall_metadata **syscalls_metadata; -static struct syscall_metadata *find_syscall_meta(unsigned long syscall) +static __init struct syscall_metadata * +find_syscall_meta(unsigned long syscall) { - struct syscall_metadata *start; - struct syscall_metadata *stop; + struct syscall_metadata **start; + struct syscall_metadata **stop; char str[KSYM_SYMBOL_LEN]; - start = (struct syscall_metadata *)__start_syscalls_metadata; - stop = (struct syscall_metadata *)__stop_syscalls_metadata; + start = __start_syscalls_metadata; + stop = __stop_syscalls_metadata; kallsyms_lookup(syscall, NULL, NULL, NULL, str); for ( ; start < stop; start++) { @@ -78,8 +79,8 @@ static struct syscall_metadata *find_syscall_meta(unsigned long syscall) * with "SyS" instead of "sys", leading to an unwanted * mismatch. */ - if (start->name && !strcmp(start->name + 3, str + 3)) - return start; + if ((*start)->name && !strcmp((*start)->name + 3, str + 3)) + return *start; } return NULL; } diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index e95ee7f31d43..68187af4889e 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -27,8 +27,8 @@ #include <linux/sched.h> #include <linux/jump_label.h> -extern struct tracepoint __start___tracepoints[]; -extern struct tracepoint __stop___tracepoints[]; +extern struct tracepoint * const __start___tracepoints_ptrs[]; +extern struct tracepoint * const __stop___tracepoints_ptrs[]; /* Set to 1 to enable tracepoint debug output */ static const int tracepoint_debug; @@ -298,10 +298,10 @@ static void disable_tracepoint(struct tracepoint *elem) * * Updates the probe callback corresponding to a range of tracepoints. */ -void -tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) +void tracepoint_update_probe_range(struct tracepoint * const *begin, + struct tracepoint * const *end) { - struct tracepoint *iter; + struct tracepoint * const *iter; struct tracepoint_entry *mark_entry; if (!begin) @@ -309,12 +309,12 @@ tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) mutex_lock(&tracepoints_mutex); for (iter = begin; iter < end; iter++) { - mark_entry = get_tracepoint(iter->name); + mark_entry = get_tracepoint((*iter)->name); if (mark_entry) { - set_tracepoint(&mark_entry, iter, + set_tracepoint(&mark_entry, *iter, !!mark_entry->refcount); } else { - disable_tracepoint(iter); + disable_tracepoint(*iter); } } mutex_unlock(&tracepoints_mutex); @@ -326,8 +326,8 @@ tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) static void tracepoint_update_probes(void) { /* Core kernel tracepoints */ - tracepoint_update_probe_range(__start___tracepoints, - __stop___tracepoints); + tracepoint_update_probe_range(__start___tracepoints_ptrs, + __stop___tracepoints_ptrs); /* tracepoints in modules. */ module_update_tracepoints(); } @@ -514,8 +514,8 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_update_all); * Will return the first tracepoint in the range if the input tracepoint is * NULL. */ -int tracepoint_get_iter_range(struct tracepoint **tracepoint, - struct tracepoint *begin, struct tracepoint *end) +int tracepoint_get_iter_range(struct tracepoint * const **tracepoint, + struct tracepoint * const *begin, struct tracepoint * const *end) { if (!*tracepoint && begin != end) { *tracepoint = begin; @@ -534,7 +534,8 @@ static void tracepoint_get_iter(struct tracepoint_iter *iter) /* Core kernel tracepoints */ if (!iter->module) { found = tracepoint_get_iter_range(&iter->tracepoint, - __start___tracepoints, __stop___tracepoints); + __start___tracepoints_ptrs, + __stop___tracepoints_ptrs); if (found) goto end; } @@ -585,8 +586,8 @@ int tracepoint_module_notify(struct notifier_block *self, switch (val) { case MODULE_STATE_COMING: case MODULE_STATE_GOING: - tracepoint_update_probe_range(mod->tracepoints, - mod->tracepoints + mod->num_tracepoints); + tracepoint_update_probe_range(mod->tracepoints_ptrs, + mod->tracepoints_ptrs + mod->num_tracepoints); break; } return 0; diff --git a/kernel/watchdog.c b/kernel/watchdog.c index d7ebdf4cea98..18bb15776c57 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -27,7 +27,7 @@ #include <asm/irq_regs.h> #include <linux/perf_event.h> -int watchdog_enabled; +int watchdog_enabled = 1; int __read_mostly softlockup_thresh = 60; static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); @@ -43,9 +43,6 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); #endif -static int no_watchdog; - - /* boot commands */ /* * Should we panic when a soft-lockup or hard-lockup occurs: @@ -58,7 +55,7 @@ static int __init hardlockup_panic_setup(char *str) if (!strncmp(str, "panic", 5)) hardlockup_panic = 1; else if (!strncmp(str, "0", 1)) - no_watchdog = 1; + watchdog_enabled = 0; return 1; } __setup("nmi_watchdog=", hardlockup_panic_setup); @@ -77,7 +74,7 @@ __setup("softlockup_panic=", softlockup_panic_setup); static int __init nowatchdog_setup(char *str) { - no_watchdog = 1; + watchdog_enabled = 0; return 1; } __setup("nowatchdog", nowatchdog_setup); @@ -85,7 +82,7 @@ __setup("nowatchdog", nowatchdog_setup); /* deprecated */ static int __init nosoftlockup_setup(char *str) { - no_watchdog = 1; + watchdog_enabled = 0; return 1; } __setup("nosoftlockup", nosoftlockup_setup); @@ -366,8 +363,14 @@ static int watchdog_nmi_enable(int cpu) goto out_save; } - printk(KERN_ERR "NMI watchdog disabled for cpu%i: unable to create perf event: %ld\n", - cpu, PTR_ERR(event)); + + /* vary the KERN level based on the returned errno */ + if (PTR_ERR(event) == -EOPNOTSUPP) + printk(KERN_INFO "NMI watchdog disabled (cpu%i): not supported (no LAPIC?)\n", cpu); + else if (PTR_ERR(event) == -ENOENT) + printk(KERN_WARNING "NMI watchdog disabled (cpu%i): hardware events not enabled\n", cpu); + else + printk(KERN_ERR "NMI watchdog disabled (cpu%i): unable to create perf event: %ld\n", cpu, PTR_ERR(event)); return PTR_ERR(event); /* success path */ @@ -432,9 +435,6 @@ static int watchdog_enable(int cpu) wake_up_process(p); } - /* if any cpu succeeds, watchdog is considered enabled for the system */ - watchdog_enabled = 1; - return 0; } @@ -462,12 +462,16 @@ static void watchdog_disable(int cpu) static void watchdog_enable_all_cpus(void) { int cpu; - int result = 0; + + watchdog_enabled = 0; for_each_online_cpu(cpu) - result += watchdog_enable(cpu); + if (!watchdog_enable(cpu)) + /* if any cpu succeeds, watchdog is considered + enabled for the system */ + watchdog_enabled = 1; - if (result) + if (!watchdog_enabled) printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n"); } @@ -476,9 +480,6 @@ static void watchdog_disable_all_cpus(void) { int cpu; - if (no_watchdog) - return; - for_each_online_cpu(cpu) watchdog_disable(cpu); @@ -498,10 +499,12 @@ int proc_dowatchdog_enabled(struct ctl_table *table, int write, { proc_dointvec(table, write, buffer, length, ppos); - if (watchdog_enabled) - watchdog_enable_all_cpus(); - else - watchdog_disable_all_cpus(); + if (write) { + if (watchdog_enabled) + watchdog_enable_all_cpus(); + else + watchdog_disable_all_cpus(); + } return 0; } @@ -530,7 +533,8 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) break; case CPU_ONLINE: case CPU_ONLINE_FROZEN: - err = watchdog_enable(hotcpu); + if (watchdog_enabled) + err = watchdog_enable(hotcpu); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: @@ -555,9 +559,6 @@ void __init lockup_detector_init(void) void *cpu = (void *)(long)smp_processor_id(); int err; - if (no_watchdog) - return; - err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); WARN_ON(notifier_to_errno(err)); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 11869faa6819..ee6578b578ad 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -79,7 +79,9 @@ enum { MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */ IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */ - MAYDAY_INITIAL_TIMEOUT = HZ / 100, /* call for help after 10ms */ + MAYDAY_INITIAL_TIMEOUT = HZ / 100 >= 2 ? HZ / 100 : 2, + /* call for help after 10ms + (min two ticks) */ MAYDAY_INTERVAL = HZ / 10, /* and then every 100ms */ CREATE_COOLDOWN = HZ, /* time to breath after fail */ TRUSTEE_COOLDOWN = HZ / 10, /* for trustee draining */ @@ -2047,6 +2049,15 @@ repeat: move_linked_works(work, scheduled, &n); process_scheduled_works(rescuer); + + /* + * Leave this gcwq. If keep_working() is %true, notify a + * regular worker; otherwise, we end up with 0 concurrency + * and stalling the execution. + */ + if (keep_working(gcwq)) + wake_up_worker(gcwq); + spin_unlock_irq(&gcwq->lock); } @@ -2956,7 +2967,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name, */ spin_lock(&workqueue_lock); - if (workqueue_freezing && wq->flags & WQ_FREEZEABLE) + if (workqueue_freezing && wq->flags & WQ_FREEZABLE) for_each_cwq_cpu(cpu, wq) get_cwq(cpu, wq)->max_active = 0; @@ -3068,7 +3079,7 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active) spin_lock_irq(&gcwq->lock); - if (!(wq->flags & WQ_FREEZEABLE) || + if (!(wq->flags & WQ_FREEZABLE) || !(gcwq->flags & GCWQ_FREEZING)) get_cwq(gcwq->cpu, wq)->max_active = max_active; @@ -3318,7 +3329,7 @@ static int __cpuinit trustee_thread(void *__gcwq) * want to get it over with ASAP - spam rescuers, wake up as * many idlers as necessary and create new ones till the * worklist is empty. Note that if the gcwq is frozen, there - * may be frozen works in freezeable cwqs. Don't declare + * may be frozen works in freezable cwqs. Don't declare * completion while frozen. */ while (gcwq->nr_workers != gcwq->nr_idle || @@ -3576,9 +3587,9 @@ EXPORT_SYMBOL_GPL(work_on_cpu); /** * freeze_workqueues_begin - begin freezing workqueues * - * Start freezing workqueues. After this function returns, all - * freezeable workqueues will queue new works to their frozen_works - * list instead of gcwq->worklist. + * Start freezing workqueues. After this function returns, all freezable + * workqueues will queue new works to their frozen_works list instead of + * gcwq->worklist. * * CONTEXT: * Grabs and releases workqueue_lock and gcwq->lock's. @@ -3604,7 +3615,7 @@ void freeze_workqueues_begin(void) list_for_each_entry(wq, &workqueues, list) { struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); - if (cwq && wq->flags & WQ_FREEZEABLE) + if (cwq && wq->flags & WQ_FREEZABLE) cwq->max_active = 0; } @@ -3615,7 +3626,7 @@ void freeze_workqueues_begin(void) } /** - * freeze_workqueues_busy - are freezeable workqueues still busy? + * freeze_workqueues_busy - are freezable workqueues still busy? * * Check whether freezing is complete. This function must be called * between freeze_workqueues_begin() and thaw_workqueues(). @@ -3624,8 +3635,8 @@ void freeze_workqueues_begin(void) * Grabs and releases workqueue_lock. * * RETURNS: - * %true if some freezeable workqueues are still busy. %false if - * freezing is complete. + * %true if some freezable workqueues are still busy. %false if freezing + * is complete. */ bool freeze_workqueues_busy(void) { @@ -3645,7 +3656,7 @@ bool freeze_workqueues_busy(void) list_for_each_entry(wq, &workqueues, list) { struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); - if (!cwq || !(wq->flags & WQ_FREEZEABLE)) + if (!cwq || !(wq->flags & WQ_FREEZABLE)) continue; BUG_ON(cwq->nr_active < 0); @@ -3690,7 +3701,7 @@ void thaw_workqueues(void) list_for_each_entry(wq, &workqueues, list) { struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); - if (!cwq || !(wq->flags & WQ_FREEZEABLE)) + if (!cwq || !(wq->flags & WQ_FREEZABLE)) continue; /* restore max_active and repopulate worklist */ |