diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 6 | ||||
-rw-r--r-- | kernel/audit.c | 49 | ||||
-rw-r--r-- | kernel/audit.h | 3 | ||||
-rw-r--r-- | kernel/audit_tree.c | 92 | ||||
-rw-r--r-- | kernel/audit_watch.c | 14 | ||||
-rw-r--r-- | kernel/auditsc.c | 15 | ||||
-rw-r--r-- | kernel/bpf/core.c | 12 | ||||
-rw-r--r-- | kernel/events/core.c | 41 | ||||
-rw-r--r-- | kernel/irq/dummychip.c | 1 | ||||
-rw-r--r-- | kernel/kexec.c | 2 | ||||
-rw-r--r-- | kernel/locking/rtmutex.c | 12 | ||||
-rw-r--r-- | kernel/module.c | 3 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 16 | ||||
-rw-r--r-- | kernel/relay.c | 4 | ||||
-rw-r--r-- | kernel/sched/core.c | 74 | ||||
-rw-r--r-- | kernel/sched/idle.c | 16 | ||||
-rw-r--r-- | kernel/time/clockevents.c | 6 | ||||
-rw-r--r-- | kernel/time/hrtimer.c | 14 | ||||
-rw-r--r-- | kernel/trace/trace.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 4 | ||||
-rw-r--r-- | kernel/trace/trace_output.c | 3 | ||||
-rw-r--r-- | kernel/trace/trace_uprobe.c | 2 | ||||
-rw-r--r-- | kernel/watchdog.c | 20 |
23 files changed, 231 insertions, 180 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 0f8f8b0bc1bf..60c302cfb4d3 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -197,9 +197,9 @@ x509.genkey: @echo >>x509.genkey "x509_extensions = myexts" @echo >>x509.genkey @echo >>x509.genkey "[ req_distinguished_name ]" - @echo >>x509.genkey "O = Magrathea" - @echo >>x509.genkey "CN = Glacier signing key" - @echo >>x509.genkey "emailAddress = slartibartfast@magrathea.h2g2" + @echo >>x509.genkey "#O = Unspecified company" + @echo >>x509.genkey "CN = Build time autogenerated kernel key" + @echo >>x509.genkey "#emailAddress = unspecified.user@unspecified.company" @echo >>x509.genkey @echo >>x509.genkey "[ myexts ]" @echo >>x509.genkey "basicConstraints=critical,CA:FALSE" diff --git a/kernel/audit.c b/kernel/audit.c index 72ab759a0b43..1c13e4267de6 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -43,6 +43,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/file.h> #include <linux/init.h> #include <linux/types.h> #include <linux/atomic.h> @@ -107,6 +108,7 @@ static u32 audit_rate_limit; * When set to zero, this means unlimited. */ static u32 audit_backlog_limit = 64; #define AUDIT_BACKLOG_WAIT_TIME (60 * HZ) +static u32 audit_backlog_wait_time_master = AUDIT_BACKLOG_WAIT_TIME; static u32 audit_backlog_wait_time = AUDIT_BACKLOG_WAIT_TIME; static u32 audit_backlog_wait_overflow = 0; @@ -338,13 +340,13 @@ static int audit_set_backlog_limit(u32 limit) static int audit_set_backlog_wait_time(u32 timeout) { return audit_do_config_change("audit_backlog_wait_time", - &audit_backlog_wait_time, timeout); + &audit_backlog_wait_time_master, timeout); } static int audit_set_enabled(u32 state) { int rc; - if (state < AUDIT_OFF || state > AUDIT_LOCKED) + if (state > AUDIT_LOCKED) return -EINVAL; rc = audit_do_config_change("audit_enabled", &audit_enabled, state); @@ -663,7 +665,7 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) case AUDIT_MAKE_EQUIV: /* Only support auditd and auditctl in initial pid namespace * for now. */ - if ((task_active_pid_ns(current) != &init_pid_ns)) + if (task_active_pid_ns(current) != &init_pid_ns) return -EPERM; if (!netlink_capable(skb, CAP_AUDIT_CONTROL)) @@ -834,7 +836,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) s.lost = atomic_read(&audit_lost); s.backlog = skb_queue_len(&audit_skb_queue); s.feature_bitmap = AUDIT_FEATURE_BITMAP_ALL; - s.backlog_wait_time = audit_backlog_wait_time; + s.backlog_wait_time = audit_backlog_wait_time_master; audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s)); break; } @@ -877,8 +879,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (s.mask & AUDIT_STATUS_BACKLOG_WAIT_TIME) { if (sizeof(s) > (size_t)nlh->nlmsg_len) return -EINVAL; - if (s.backlog_wait_time < 0 || - s.backlog_wait_time > 10*AUDIT_BACKLOG_WAIT_TIME) + if (s.backlog_wait_time > 10*AUDIT_BACKLOG_WAIT_TIME) return -EINVAL; err = audit_set_backlog_wait_time(s.backlog_wait_time); if (err < 0) @@ -1385,7 +1386,8 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, return NULL; } - audit_backlog_wait_time = AUDIT_BACKLOG_WAIT_TIME; + if (!reserve) + audit_backlog_wait_time = audit_backlog_wait_time_master; ab = audit_buffer_alloc(ctx, gfp_mask, type); if (!ab) { @@ -1759,7 +1761,7 @@ void audit_log_name(struct audit_context *context, struct audit_names *n, } else audit_log_format(ab, " name=(null)"); - if (n->ino != (unsigned long)-1) { + if (n->ino != (unsigned long)-1) audit_log_format(ab, " inode=%lu" " dev=%02x:%02x mode=%#ho" " ouid=%u ogid=%u rdev=%02x:%02x", @@ -1771,7 +1773,6 @@ void audit_log_name(struct audit_context *context, struct audit_names *n, from_kgid(&init_user_ns, n->gid), MAJOR(n->rdev), MINOR(n->rdev)); - } if (n->osid != 0) { char *ctx = NULL; u32 len; @@ -1838,11 +1839,29 @@ error_path: } EXPORT_SYMBOL(audit_log_task_context); +void audit_log_d_path_exe(struct audit_buffer *ab, + struct mm_struct *mm) +{ + struct file *exe_file; + + if (!mm) + goto out_null; + + exe_file = get_mm_exe_file(mm); + if (!exe_file) + goto out_null; + + audit_log_d_path(ab, " exe=", &exe_file->f_path); + fput(exe_file); + return; +out_null: + audit_log_format(ab, " exe=(null)"); +} + void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) { const struct cred *cred; char comm[sizeof(tsk->comm)]; - struct mm_struct *mm = tsk->mm; char *tty; if (!ab) @@ -1878,13 +1897,7 @@ void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) audit_log_format(ab, " comm="); audit_log_untrustedstring(ab, get_task_comm(comm, tsk)); - if (mm) { - down_read(&mm->mmap_sem); - if (mm->exe_file) - audit_log_d_path(ab, " exe=", &mm->exe_file->f_path); - up_read(&mm->mmap_sem); - } else - audit_log_format(ab, " exe=(null)"); + audit_log_d_path_exe(ab, tsk->mm); audit_log_task_context(ab); } EXPORT_SYMBOL(audit_log_task_info); @@ -1915,7 +1928,7 @@ void audit_log_link_denied(const char *operation, struct path *link) /* Generate AUDIT_PATH record with object. */ name->type = AUDIT_TYPE_NORMAL; - audit_copy_inode(name, link->dentry, link->dentry->d_inode); + audit_copy_inode(name, link->dentry, d_backing_inode(link->dentry)); audit_log_name(current->audit_context, name, link, 0, NULL); out: kfree(name); diff --git a/kernel/audit.h b/kernel/audit.h index 1caa0d345d90..d641f9bb3ed0 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -257,6 +257,9 @@ extern struct list_head audit_filter_list[]; extern struct audit_entry *audit_dupe_rule(struct audit_krule *old); +extern void audit_log_d_path_exe(struct audit_buffer *ab, + struct mm_struct *mm); + /* audit watch functions */ #ifdef CONFIG_AUDIT_WATCH extern void audit_put_watch(struct audit_watch *watch); diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 2e0c97427b33..b0f9877273fc 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -37,6 +37,7 @@ struct audit_chunk { static LIST_HEAD(tree_list); static LIST_HEAD(prune_list); +static struct task_struct *prune_thread; /* * One struct chunk is attached to each inode of interest. @@ -576,7 +577,7 @@ int audit_remove_tree_rule(struct audit_krule *rule) static int compare_root(struct vfsmount *mnt, void *arg) { - return mnt->mnt_root->d_inode == arg; + return d_backing_inode(mnt->mnt_root) == arg; } void audit_trim_trees(void) @@ -648,7 +649,58 @@ void audit_put_tree(struct audit_tree *tree) static int tag_mount(struct vfsmount *mnt, void *arg) { - return tag_chunk(mnt->mnt_root->d_inode, arg); + return tag_chunk(d_backing_inode(mnt->mnt_root), arg); +} + +/* + * That gets run when evict_chunk() ends up needing to kill audit_tree. + * Runs from a separate thread. + */ +static int prune_tree_thread(void *unused) +{ + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (list_empty(&prune_list)) + schedule(); + __set_current_state(TASK_RUNNING); + + mutex_lock(&audit_cmd_mutex); + mutex_lock(&audit_filter_mutex); + + while (!list_empty(&prune_list)) { + struct audit_tree *victim; + + victim = list_entry(prune_list.next, + struct audit_tree, list); + list_del_init(&victim->list); + + mutex_unlock(&audit_filter_mutex); + + prune_one(victim); + + mutex_lock(&audit_filter_mutex); + } + + mutex_unlock(&audit_filter_mutex); + mutex_unlock(&audit_cmd_mutex); + } + return 0; +} + +static int audit_launch_prune(void) +{ + if (prune_thread) + return 0; + prune_thread = kthread_create(prune_tree_thread, NULL, + "audit_prune_tree"); + if (IS_ERR(prune_thread)) { + pr_err("cannot start thread audit_prune_tree"); + prune_thread = NULL; + return -ENOMEM; + } else { + wake_up_process(prune_thread); + return 0; + } } /* called with audit_filter_mutex */ @@ -674,6 +726,12 @@ int audit_add_tree_rule(struct audit_krule *rule) /* do not set rule->tree yet */ mutex_unlock(&audit_filter_mutex); + if (unlikely(!prune_thread)) { + err = audit_launch_prune(); + if (err) + goto Err; + } + err = kern_path(tree->pathname, 0, &path); if (err) goto Err; @@ -811,36 +869,10 @@ int audit_tag_tree(char *old, char *new) return failed; } -/* - * That gets run when evict_chunk() ends up needing to kill audit_tree. - * Runs from a separate thread. - */ -static int prune_tree_thread(void *unused) -{ - mutex_lock(&audit_cmd_mutex); - mutex_lock(&audit_filter_mutex); - - while (!list_empty(&prune_list)) { - struct audit_tree *victim; - - victim = list_entry(prune_list.next, struct audit_tree, list); - list_del_init(&victim->list); - - mutex_unlock(&audit_filter_mutex); - - prune_one(victim); - - mutex_lock(&audit_filter_mutex); - } - - mutex_unlock(&audit_filter_mutex); - mutex_unlock(&audit_cmd_mutex); - return 0; -} static void audit_schedule_prune(void) { - kthread_run(prune_tree_thread, NULL, "audit_prune_tree"); + wake_up_process(prune_thread); } /* @@ -907,9 +939,9 @@ static void evict_chunk(struct audit_chunk *chunk) for (n = 0; n < chunk->count; n++) list_del_init(&chunk->owners[n].list); spin_unlock(&hash_lock); + mutex_unlock(&audit_filter_mutex); if (need_prune) audit_schedule_prune(); - mutex_unlock(&audit_filter_mutex); } static int audit_tree_handle_event(struct fsnotify_group *group, diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index ad9c1682f616..6e30024d9aac 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -146,7 +146,7 @@ int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev_t dev) /* Initialize a parent watch entry. */ static struct audit_parent *audit_init_parent(struct path *path) { - struct inode *inode = path->dentry->d_inode; + struct inode *inode = d_backing_inode(path->dentry); struct audit_parent *parent; int ret; @@ -361,11 +361,11 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent) struct dentry *d = kern_path_locked(watch->path, parent); if (IS_ERR(d)) return PTR_ERR(d); - mutex_unlock(&parent->dentry->d_inode->i_mutex); - if (d->d_inode) { + mutex_unlock(&d_backing_inode(parent->dentry)->i_mutex); + if (d_is_positive(d)) { /* update watch filter fields */ - watch->dev = d->d_inode->i_sb->s_dev; - watch->ino = d->d_inode->i_ino; + watch->dev = d_backing_inode(d)->i_sb->s_dev; + watch->ino = d_backing_inode(d)->i_ino; } dput(d); return 0; @@ -426,7 +426,7 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list) return ret; /* either find an old parent or attach a new one */ - parent = audit_find_parent(parent_path.dentry->d_inode); + parent = audit_find_parent(d_backing_inode(parent_path.dentry)); if (!parent) { parent = audit_init_parent(&parent_path); if (IS_ERR(parent)) { @@ -482,7 +482,7 @@ static int audit_watch_handle_event(struct fsnotify_group *group, switch (data_type) { case (FSNOTIFY_EVENT_PATH): - inode = ((struct path *)data)->dentry->d_inode; + inode = d_backing_inode(((struct path *)data)->dentry); break; case (FSNOTIFY_EVENT_INODE): inode = (struct inode *)data; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index dc4ae70a7413..9fb9d1cb83ce 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1629,7 +1629,7 @@ retry: rcu_read_lock(); seq = read_seqbegin(&rename_lock); for(;;) { - struct inode *inode = d->d_inode; + struct inode *inode = d_backing_inode(d); if (inode && unlikely(!hlist_empty(&inode->i_fsnotify_marks))) { struct audit_chunk *chunk; chunk = audit_tree_lookup(inode); @@ -1754,7 +1754,7 @@ void __audit_inode(struct filename *name, const struct dentry *dentry, unsigned int flags) { struct audit_context *context = current->audit_context; - const struct inode *inode = dentry->d_inode; + const struct inode *inode = d_backing_inode(dentry); struct audit_names *n; bool parent = flags & AUDIT_INODE_PARENT; @@ -1853,7 +1853,7 @@ void __audit_inode_child(const struct inode *parent, const unsigned char type) { struct audit_context *context = current->audit_context; - const struct inode *inode = dentry->d_inode; + const struct inode *inode = d_backing_inode(dentry); const char *dname = dentry->d_name.name; struct audit_names *n, *found_parent = NULL, *found_child = NULL; @@ -2361,7 +2361,6 @@ static void audit_log_task(struct audit_buffer *ab) kuid_t auid, uid; kgid_t gid; unsigned int sessionid; - struct mm_struct *mm = current->mm; char comm[sizeof(current->comm)]; auid = audit_get_loginuid(current); @@ -2376,13 +2375,7 @@ static void audit_log_task(struct audit_buffer *ab) audit_log_task_context(ab); audit_log_format(ab, " pid=%d comm=", task_pid_nr(current)); audit_log_untrustedstring(ab, get_task_comm(comm, current)); - if (mm) { - down_read(&mm->mmap_sem); - if (mm->exe_file) - audit_log_d_path(ab, " exe=", &mm->exe_file->f_path); - up_read(&mm->mmap_sem); - } else - audit_log_format(ab, " exe=(null)"); + audit_log_d_path_exe(ab, current->mm); } /** diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 4139a0f8b558..54f0e7fcd0e2 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -357,8 +357,8 @@ select_insn: ALU64_MOD_X: if (unlikely(SRC == 0)) return 0; - tmp = DST; - DST = do_div(tmp, SRC); + div64_u64_rem(DST, SRC, &tmp); + DST = tmp; CONT; ALU_MOD_X: if (unlikely(SRC == 0)) @@ -367,8 +367,8 @@ select_insn: DST = do_div(tmp, (u32) SRC); CONT; ALU64_MOD_K: - tmp = DST; - DST = do_div(tmp, IMM); + div64_u64_rem(DST, IMM, &tmp); + DST = tmp; CONT; ALU_MOD_K: tmp = (u32) DST; @@ -377,7 +377,7 @@ select_insn: ALU64_DIV_X: if (unlikely(SRC == 0)) return 0; - do_div(DST, SRC); + DST = div64_u64(DST, SRC); CONT; ALU_DIV_X: if (unlikely(SRC == 0)) @@ -387,7 +387,7 @@ select_insn: DST = (u32) tmp; CONT; ALU64_DIV_K: - do_div(DST, IMM); + DST = div64_u64(DST, IMM); CONT; ALU_DIV_K: tmp = (u32) DST; diff --git a/kernel/events/core.c b/kernel/events/core.c index 81aa3a4ece9f..1a3bf48743ce 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -913,10 +913,30 @@ static void put_ctx(struct perf_event_context *ctx) * Those places that change perf_event::ctx will hold both * perf_event_ctx::mutex of the 'old' and 'new' ctx value. * - * Lock ordering is by mutex address. There is one other site where - * perf_event_context::mutex nests and that is put_event(). But remember that - * that is a parent<->child context relation, and migration does not affect - * children, therefore these two orderings should not interact. + * Lock ordering is by mutex address. There are two other sites where + * perf_event_context::mutex nests and those are: + * + * - perf_event_exit_task_context() [ child , 0 ] + * __perf_event_exit_task() + * sync_child_event() + * put_event() [ parent, 1 ] + * + * - perf_event_init_context() [ parent, 0 ] + * inherit_task_group() + * inherit_group() + * inherit_event() + * perf_event_alloc() + * perf_init_event() + * perf_try_init_event() [ child , 1 ] + * + * While it appears there is an obvious deadlock here -- the parent and child + * nesting levels are inverted between the two. This is in fact safe because + * life-time rules separate them. That is an exiting task cannot fork, and a + * spawning task cannot (yet) exit. + * + * But remember that that these are parent<->child context relations, and + * migration does not affect children, therefore these two orderings should not + * interact. * * The change in perf_event::ctx does not affect children (as claimed above) * because the sys_perf_event_open() case will install a new event and break @@ -3657,9 +3677,6 @@ static void perf_remove_from_owner(struct perf_event *event) } } -/* - * Called when the last reference to the file is gone. - */ static void put_event(struct perf_event *event) { struct perf_event_context *ctx; @@ -3697,6 +3714,9 @@ int perf_event_release_kernel(struct perf_event *event) } EXPORT_SYMBOL_GPL(perf_event_release_kernel); +/* + * Called when the last reference to the file is gone. + */ static int perf_release(struct inode *inode, struct file *file) { put_event(file->private_data); @@ -7364,7 +7384,12 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) return -ENODEV; if (event->group_leader != event) { - ctx = perf_event_ctx_lock(event->group_leader); + /* + * This ctx->mutex can nest when we're called through + * inheritance. See the perf_event_ctx_lock_nested() comment. + */ + ctx = perf_event_ctx_lock_nested(event->group_leader, + SINGLE_DEPTH_NESTING); BUG_ON(!ctx); } diff --git a/kernel/irq/dummychip.c b/kernel/irq/dummychip.c index 988dc58e8847..2feb6feca0cc 100644 --- a/kernel/irq/dummychip.c +++ b/kernel/irq/dummychip.c @@ -57,5 +57,6 @@ struct irq_chip dummy_irq_chip = { .irq_ack = noop, .irq_mask = noop, .irq_unmask = noop, + .flags = IRQCHIP_SKIP_SET_WAKE, }; EXPORT_SYMBOL_GPL(dummy_irq_chip); diff --git a/kernel/kexec.c b/kernel/kexec.c index 38c25b1f2fd5..7a36fdcca5bf 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -707,7 +707,7 @@ static struct page *kimage_alloc_normal_control_pages(struct kimage *image, do { unsigned long pfn, epfn, addr, eaddr; - pages = kimage_alloc_pages(GFP_KERNEL, order); + pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order); if (!pages) break; pfn = page_to_pfn(pages); diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index b73279367087..b025295f4966 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -265,15 +265,17 @@ struct task_struct *rt_mutex_get_top_task(struct task_struct *task) } /* - * Called by sched_setscheduler() to check whether the priority change - * is overruled by a possible priority boosting. + * Called by sched_setscheduler() to get the priority which will be + * effective after the change. */ -int rt_mutex_check_prio(struct task_struct *task, int newprio) +int rt_mutex_get_effective_prio(struct task_struct *task, int newprio) { if (!task_has_pi_waiters(task)) - return 0; + return newprio; - return task_top_pi_waiter(task)->task->prio <= newprio; + if (task_top_pi_waiter(task)->task->prio <= newprio) + return task_top_pi_waiter(task)->task->prio; + return newprio; } /* diff --git a/kernel/module.c b/kernel/module.c index 42a1d2afb217..cfc9e843a924 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3370,6 +3370,9 @@ static int load_module(struct load_info *info, const char __user *uargs, module_bug_cleanup(mod); mutex_unlock(&module_mutex); + blocking_notifier_call_chain(&module_notify_list, + MODULE_STATE_GOING, mod); + /* we can't deallocate the module until we clear memory protection */ unset_module_init_ro_nx(mod); unset_module_core_ro_nx(mod); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 233165da782f..8cf7304b2867 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -162,11 +162,14 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO; module_param(kthread_prio, int, 0644); -/* Delay in jiffies for grace-period initialization delays. */ -static int gp_init_delay = IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_INIT) - ? CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY - : 0; +/* Delay in jiffies for grace-period initialization delays, debug only. */ +#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT +static int gp_init_delay = CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY; module_param(gp_init_delay, int, 0644); +#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */ +static const int gp_init_delay; +#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */ +#define PER_RCU_NODE_PERIOD 10 /* Number of grace periods between delays. */ /* * Track the rcutorture test sequence number and the update version @@ -1843,9 +1846,8 @@ static int rcu_gp_init(struct rcu_state *rsp) raw_spin_unlock_irq(&rnp->lock); cond_resched_rcu_qs(); ACCESS_ONCE(rsp->gp_activity) = jiffies; - if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_INIT) && - gp_init_delay > 0 && - !(rsp->gpnum % (rcu_num_nodes * 10))) + if (gp_init_delay > 0 && + !(rsp->gpnum % (rcu_num_nodes * PER_RCU_NODE_PERIOD))) schedule_timeout_uninterruptible(gp_init_delay); } diff --git a/kernel/relay.c b/kernel/relay.c index 5a56d3c8dc03..e9dbaeb8fd65 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -407,7 +407,7 @@ static inline void relay_set_buf_dentry(struct rchan_buf *buf, struct dentry *dentry) { buf->dentry = dentry; - buf->dentry->d_inode->i_size = buf->early_bytes; + d_inode(buf->dentry)->i_size = buf->early_bytes; } static struct dentry *relay_create_buf_file(struct rchan *chan, @@ -733,7 +733,7 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length) buf->padding[old_subbuf] = buf->prev_padding; buf->subbufs_produced++; if (buf->dentry) - buf->dentry->d_inode->i_size += + d_inode(buf->dentry)->i_size += buf->chan->subbuf_size - buf->padding[old_subbuf]; else diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f9123a82cbb6..123673291ffb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1016,13 +1016,6 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) rq_clock_skip_update(rq, true); } -static ATOMIC_NOTIFIER_HEAD(task_migration_notifier); - -void register_task_migration_notifier(struct notifier_block *n) -{ - atomic_notifier_chain_register(&task_migration_notifier, n); -} - #ifdef CONFIG_SMP void set_task_cpu(struct task_struct *p, unsigned int new_cpu) { @@ -1053,18 +1046,10 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) trace_sched_migrate_task(p, new_cpu); if (task_cpu(p) != new_cpu) { - struct task_migration_notifier tmn; - if (p->sched_class->migrate_task_rq) p->sched_class->migrate_task_rq(p, new_cpu); p->se.nr_migrations++; perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0); - - tmn.task = p; - tmn.from_cpu = task_cpu(p); - tmn.to_cpu = new_cpu; - - atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn); } __set_task_cpu(p, new_cpu); @@ -3315,15 +3300,18 @@ static void __setscheduler_params(struct task_struct *p, /* Actually do priority change: must hold pi & rq lock. */ static void __setscheduler(struct rq *rq, struct task_struct *p, - const struct sched_attr *attr) + const struct sched_attr *attr, bool keep_boost) { __setscheduler_params(p, attr); /* - * If we get here, there was no pi waiters boosting the - * task. It is safe to use the normal prio. + * Keep a potential priority boosting if called from + * sched_setscheduler(). */ - p->prio = normal_prio(p); + if (keep_boost) + p->prio = rt_mutex_get_effective_prio(p, normal_prio(p)); + else + p->prio = normal_prio(p); if (dl_prio(p->prio)) p->sched_class = &dl_sched_class; @@ -3423,7 +3411,7 @@ static int __sched_setscheduler(struct task_struct *p, int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 : MAX_RT_PRIO - 1 - attr->sched_priority; int retval, oldprio, oldpolicy = -1, queued, running; - int policy = attr->sched_policy; + int new_effective_prio, policy = attr->sched_policy; unsigned long flags; const struct sched_class *prev_class; struct rq *rq; @@ -3605,15 +3593,14 @@ change: oldprio = p->prio; /* - * Special case for priority boosted tasks. - * - * If the new priority is lower or equal (user space view) - * than the current (boosted) priority, we just store the new + * Take priority boosted tasks into account. If the new + * effective priority is unchanged, we just store the new * normal parameters and do not touch the scheduler class and * the runqueue. This will be done when the task deboost * itself. */ - if (rt_mutex_check_prio(p, newprio)) { + new_effective_prio = rt_mutex_get_effective_prio(p, newprio); + if (new_effective_prio == oldprio) { __setscheduler_params(p, attr); task_rq_unlock(rq, p, &flags); return 0; @@ -3627,7 +3614,7 @@ change: put_prev_task(rq, p); prev_class = p->sched_class; - __setscheduler(rq, p, attr); + __setscheduler(rq, p, attr, true); if (running) p->sched_class->set_curr_task(rq); @@ -4402,10 +4389,7 @@ long __sched io_schedule_timeout(long timeout) long ret; current->in_iowait = 1; - if (old_iowait) - blk_schedule_flush_plug(current); - else - blk_flush_plug(current); + blk_schedule_flush_plug(current); delayacct_blkio_start(); rq = raw_rq(); @@ -7012,27 +6996,23 @@ static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action, unsigned long flags; long cpu = (long)hcpu; struct dl_bw *dl_b; + bool overflow; + int cpus; - switch (action & ~CPU_TASKS_FROZEN) { + switch (action) { case CPU_DOWN_PREPARE: - /* explicitly allow suspend */ - if (!(action & CPU_TASKS_FROZEN)) { - bool overflow; - int cpus; - - rcu_read_lock_sched(); - dl_b = dl_bw_of(cpu); + rcu_read_lock_sched(); + dl_b = dl_bw_of(cpu); - raw_spin_lock_irqsave(&dl_b->lock, flags); - cpus = dl_bw_cpus(cpu); - overflow = __dl_overflow(dl_b, cpus, 0, 0); - raw_spin_unlock_irqrestore(&dl_b->lock, flags); + raw_spin_lock_irqsave(&dl_b->lock, flags); + cpus = dl_bw_cpus(cpu); + overflow = __dl_overflow(dl_b, cpus, 0, 0); + raw_spin_unlock_irqrestore(&dl_b->lock, flags); - rcu_read_unlock_sched(); + rcu_read_unlock_sched(); - if (overflow) - return notifier_from_errno(-EBUSY); - } + if (overflow) + return notifier_from_errno(-EBUSY); cpuset_update_active_cpus(false); break; case CPU_DOWN_PREPARE_FROZEN: @@ -7361,7 +7341,7 @@ static void normalize_task(struct rq *rq, struct task_struct *p) queued = task_on_rq_queued(p); if (queued) dequeue_task(rq, p, 0); - __setscheduler(rq, p, &attr); + __setscheduler(rq, p, &attr, false); if (queued) { enqueue_task(rq, p, 0); resched_curr(rq); diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index deef1caa94c6..fefcb1fa5160 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -81,7 +81,6 @@ static void cpuidle_idle_call(void) struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int next_state, entered_state; - unsigned int broadcast; bool reflect; /* @@ -150,17 +149,6 @@ static void cpuidle_idle_call(void) goto exit_idle; } - broadcast = drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP; - - /* - * Tell the time framework to switch to a broadcast timer - * because our local timer will be shutdown. If a local timer - * is used from another cpu as a broadcast timer, this call may - * fail if it is not available - */ - if (broadcast && tick_broadcast_enter()) - goto use_default; - /* Take note of the planned idle state. */ idle_set_state(this_rq(), &drv->states[next_state]); @@ -174,8 +162,8 @@ static void cpuidle_idle_call(void) /* The cpu is no longer idle or about to enter idle. */ idle_set_state(this_rq(), NULL); - if (broadcast) - tick_broadcast_exit(); + if (entered_state == -EBUSY) + goto use_default; /* * Give the governor an opportunity to reflect on the outcome diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 11dc22a6983b..637a09461c1d 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -117,11 +117,7 @@ static int __clockevents_set_state(struct clock_event_device *dev, /* Transition with new state-specific callbacks */ switch (state) { case CLOCK_EVT_STATE_DETACHED: - /* - * This is an internal state, which is guaranteed to go from - * SHUTDOWN to DETACHED. No driver interaction required. - */ - return 0; + /* The clockevent device is getting replaced. Shut it down. */ case CLOCK_EVT_STATE_SHUTDOWN: return dev->set_state_shutdown(dev); diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 76d4bd962b19..93ef7190bdea 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -266,21 +266,23 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) /* * Divide a ktime value by a nanosecond value */ -u64 __ktime_divns(const ktime_t kt, s64 div) +s64 __ktime_divns(const ktime_t kt, s64 div) { - u64 dclc; int sft = 0; + s64 dclc; + u64 tmp; dclc = ktime_to_ns(kt); + tmp = dclc < 0 ? -dclc : dclc; + /* Make sure the divisor is less than 2^32: */ while (div >> 32) { sft++; div >>= 1; } - dclc >>= sft; - do_div(dclc, (unsigned long) div); - - return dclc; + tmp >>= sft; + do_div(tmp, (unsigned long) div); + return dclc < 0 ? -tmp : tmp; } EXPORT_SYMBOL_GPL(__ktime_divns); #endif /* BITS_PER_LONG >= 64 */ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 91eecaaa43e0..05330494a0df 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6079,7 +6079,7 @@ trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent, struct dentry *ret = trace_create_file(name, mode, parent, data, fops); if (ret) /* See tracing_get_cpu() */ - ret->d_inode->i_cdev = (void *)(cpu + 1); + d_inode(ret)->i_cdev = (void *)(cpu + 1); return ret; } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 3ab69fb72b85..c4de47fc5cca 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -494,8 +494,8 @@ static void remove_event_file_dir(struct ftrace_event_file *file) if (dir) { spin_lock(&dir->d_lock); /* probably unneeded */ list_for_each_entry(child, &dir->d_subdirs, d_child) { - if (child->d_inode) /* probably unneeded */ - child->d_inode->i_private = NULL; + if (d_really_is_positive(child)) /* probably unneeded */ + d_inode(child)->i_private = NULL; } spin_unlock(&dir->d_lock); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 692bf7184c8c..25a086bcb700 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -178,12 +178,13 @@ ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) EXPORT_SYMBOL(ftrace_print_hex_seq); const char * -ftrace_print_array_seq(struct trace_seq *p, const void *buf, int buf_len, +ftrace_print_array_seq(struct trace_seq *p, const void *buf, int count, size_t el_size) { const char *ret = trace_seq_buffer_ptr(p); const char *prefix = ""; void *ptr = (void *)buf; + size_t buf_len = count * el_size; trace_seq_putc(p, '{'); diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index d60fe62ec4fa..6dd022c7b5bc 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -443,7 +443,7 @@ static int create_trace_uprobe(int argc, char **argv) if (ret) goto fail_address_parse; - inode = igrab(path.dentry->d_inode); + inode = igrab(d_inode(path.dentry)); path_put(&path); if (!inode || !S_ISREG(inode->i_mode)) { diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 2316f50b07a4..581a68a04c64 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -41,6 +41,8 @@ #define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT) #define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT) +static DEFINE_MUTEX(watchdog_proc_mutex); + #ifdef CONFIG_HARDLOCKUP_DETECTOR static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED; #else @@ -608,26 +610,36 @@ void watchdog_nmi_enable_all(void) { int cpu; - if (!watchdog_user_enabled) - return; + mutex_lock(&watchdog_proc_mutex); + + if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + goto unlock; get_online_cpus(); for_each_online_cpu(cpu) watchdog_nmi_enable(cpu); put_online_cpus(); + +unlock: + mutex_unlock(&watchdog_proc_mutex); } void watchdog_nmi_disable_all(void) { int cpu; + mutex_lock(&watchdog_proc_mutex); + if (!watchdog_running) - return; + goto unlock; get_online_cpus(); for_each_online_cpu(cpu) watchdog_nmi_disable(cpu); put_online_cpus(); + +unlock: + mutex_unlock(&watchdog_proc_mutex); } #else static int watchdog_nmi_enable(unsigned int cpu) { return 0; } @@ -744,8 +756,6 @@ static int proc_watchdog_update(void) } -static DEFINE_MUTEX(watchdog_proc_mutex); - /* * common function for watchdog, nmi_watchdog and soft_watchdog parameter * |