summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/ring_buffer.c7
-rw-r--r--kernel/kthread.c50
-rw-r--r--kernel/locking/rwsem-xadd.c19
-rw-r--r--kernel/locking/rwsem.c2
-rw-r--r--kernel/locking/rwsem.h30
-rw-r--r--kernel/module.c5
-rw-r--r--kernel/sched/autogroup.c7
-rw-r--r--kernel/sched/core.c56
-rw-r--r--kernel/sched/deadline.c4
-rw-r--r--kernel/sched/fair.c59
-rw-r--r--kernel/sched/rt.c2
-rw-r--r--kernel/sched/sched.h5
-rw-r--r--kernel/signal.c17
-rw-r--r--kernel/stop_machine.c19
-rw-r--r--kernel/time/tick-broadcast.c8
15 files changed, 139 insertions, 151 deletions
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 6c6b3c48db71..1d8ca9ea9979 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -14,6 +14,7 @@
#include <linux/slab.h>
#include <linux/circ_buf.h>
#include <linux/poll.h>
+#include <linux/nospec.h>
#include "internal.h"
@@ -867,8 +868,10 @@ perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
return NULL;
/* AUX space */
- if (pgoff >= rb->aux_pgoff)
- return virt_to_page(rb->aux_pages[pgoff - rb->aux_pgoff]);
+ if (pgoff >= rb->aux_pgoff) {
+ int aux_pgoff = array_index_nospec(pgoff - rb->aux_pgoff, rb->aux_nr_pages);
+ return virt_to_page(rb->aux_pages[aux_pgoff]);
+ }
}
return __perf_mmap_to_page(rb, pgoff);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index cd50e99202b0..2017a39ab490 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -55,7 +55,6 @@ enum KTHREAD_BITS {
KTHREAD_IS_PER_CPU = 0,
KTHREAD_SHOULD_STOP,
KTHREAD_SHOULD_PARK,
- KTHREAD_IS_PARKED,
};
static inline void set_kthread_struct(void *kthread)
@@ -177,14 +176,12 @@ void *kthread_probe_data(struct task_struct *task)
static void __kthread_parkme(struct kthread *self)
{
- __set_current_state(TASK_PARKED);
- while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) {
- if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags))
- complete(&self->parked);
+ for (;;) {
+ set_current_state(TASK_PARKED);
+ if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
+ break;
schedule();
- __set_current_state(TASK_PARKED);
}
- clear_bit(KTHREAD_IS_PARKED, &self->flags);
__set_current_state(TASK_RUNNING);
}
@@ -194,6 +191,11 @@ void kthread_parkme(void)
}
EXPORT_SYMBOL_GPL(kthread_parkme);
+void kthread_park_complete(struct task_struct *k)
+{
+ complete(&to_kthread(k)->parked);
+}
+
static int kthread(void *_create)
{
/* Copy data: it's on kthread's stack */
@@ -450,22 +452,15 @@ void kthread_unpark(struct task_struct *k)
{
struct kthread *kthread = to_kthread(k);
- clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
/*
- * We clear the IS_PARKED bit here as we don't wait
- * until the task has left the park code. So if we'd
- * park before that happens we'd see the IS_PARKED bit
- * which might be about to be cleared.
+ * Newly created kthread was parked when the CPU was offline.
+ * The binding was lost and we need to set it again.
*/
- if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
- /*
- * Newly created kthread was parked when the CPU was offline.
- * The binding was lost and we need to set it again.
- */
- if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
- __kthread_bind(k, kthread->cpu, TASK_PARKED);
- wake_up_state(k, TASK_PARKED);
- }
+ if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
+ __kthread_bind(k, kthread->cpu, TASK_PARKED);
+
+ clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
+ wake_up_state(k, TASK_PARKED);
}
EXPORT_SYMBOL_GPL(kthread_unpark);
@@ -488,12 +483,13 @@ int kthread_park(struct task_struct *k)
if (WARN_ON(k->flags & PF_EXITING))
return -ENOSYS;
- if (!test_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
- set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
- if (k != current) {
- wake_up_process(k);
- wait_for_completion(&kthread->parked);
- }
+ if (WARN_ON_ONCE(test_bit(KTHREAD_SHOULD_PARK, &kthread->flags)))
+ return -EBUSY;
+
+ set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
+ if (k != current) {
+ wake_up_process(k);
+ wait_for_completion(&kthread->parked);
}
return 0;
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index e795908f3607..a90336779375 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -352,16 +352,15 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
struct task_struct *owner;
bool ret = true;
+ BUILD_BUG_ON(!rwsem_has_anonymous_owner(RWSEM_OWNER_UNKNOWN));
+
if (need_resched())
return false;
rcu_read_lock();
owner = READ_ONCE(sem->owner);
- if (!rwsem_owner_is_writer(owner)) {
- /*
- * Don't spin if the rwsem is readers owned.
- */
- ret = !rwsem_owner_is_reader(owner);
+ if (!owner || !is_rwsem_owner_spinnable(owner)) {
+ ret = !owner; /* !owner is spinnable */
goto done;
}
@@ -382,11 +381,11 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
{
struct task_struct *owner = READ_ONCE(sem->owner);
- if (!rwsem_owner_is_writer(owner))
- goto out;
+ if (!is_rwsem_owner_spinnable(owner))
+ return false;
rcu_read_lock();
- while (sem->owner == owner) {
+ while (owner && (READ_ONCE(sem->owner) == owner)) {
/*
* Ensure we emit the owner->on_cpu, dereference _after_
* checking sem->owner still matches owner, if that fails,
@@ -408,12 +407,12 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
cpu_relax();
}
rcu_read_unlock();
-out:
+
/*
* If there is a new owner or the owner is not set, we continue
* spinning.
*/
- return !rwsem_owner_is_reader(READ_ONCE(sem->owner));
+ return is_rwsem_owner_spinnable(READ_ONCE(sem->owner));
}
static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 30465a2f2b6c..bc1e507be9ff 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -221,5 +221,3 @@ void up_read_non_owner(struct rw_semaphore *sem)
EXPORT_SYMBOL(up_read_non_owner);
#endif
-
-
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
index a17cba8d94bb..b9d0e72aa80f 100644
--- a/kernel/locking/rwsem.h
+++ b/kernel/locking/rwsem.h
@@ -1,20 +1,24 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* The owner field of the rw_semaphore structure will be set to
- * RWSEM_READ_OWNED when a reader grabs the lock. A writer will clear
+ * RWSEM_READER_OWNED when a reader grabs the lock. A writer will clear
* the owner field when it unlocks. A reader, on the other hand, will
* not touch the owner field when it unlocks.
*
- * In essence, the owner field now has the following 3 states:
+ * In essence, the owner field now has the following 4 states:
* 1) 0
* - lock is free or the owner hasn't set the field yet
* 2) RWSEM_READER_OWNED
* - lock is currently or previously owned by readers (lock is free
* or not set by owner yet)
- * 3) Other non-zero value
- * - a writer owns the lock
+ * 3) RWSEM_ANONYMOUSLY_OWNED bit set with some other bits set as well
+ * - lock is owned by an anonymous writer, so spinning on the lock
+ * owner should be disabled.
+ * 4) Other non-zero value
+ * - a writer owns the lock and other writers can spin on the lock owner.
*/
-#define RWSEM_READER_OWNED ((struct task_struct *)1UL)
+#define RWSEM_ANONYMOUSLY_OWNED (1UL << 0)
+#define RWSEM_READER_OWNED ((struct task_struct *)RWSEM_ANONYMOUSLY_OWNED)
#ifdef CONFIG_DEBUG_RWSEMS
# define DEBUG_RWSEMS_WARN_ON(c) DEBUG_LOCKS_WARN_ON(c)
@@ -51,14 +55,22 @@ static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
WRITE_ONCE(sem->owner, RWSEM_READER_OWNED);
}
-static inline bool rwsem_owner_is_writer(struct task_struct *owner)
+/*
+ * Return true if the a rwsem waiter can spin on the rwsem's owner
+ * and steal the lock, i.e. the lock is not anonymously owned.
+ * N.B. !owner is considered spinnable.
+ */
+static inline bool is_rwsem_owner_spinnable(struct task_struct *owner)
{
- return owner && owner != RWSEM_READER_OWNED;
+ return !((unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED);
}
-static inline bool rwsem_owner_is_reader(struct task_struct *owner)
+/*
+ * Return true if rwsem is owned by an anonymous writer or readers.
+ */
+static inline bool rwsem_has_anonymous_owner(struct task_struct *owner)
{
- return owner == RWSEM_READER_OWNED;
+ return (unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED;
}
#else
static inline void rwsem_set_owner(struct rw_semaphore *sem)
diff --git a/kernel/module.c b/kernel/module.c
index ce8066b88178..c9bea7f2b43e 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3517,6 +3517,11 @@ static noinline int do_init_module(struct module *mod)
* walking this with preempt disabled. In all the failure paths, we
* call synchronize_sched(), but we don't want to slow down the success
* path, so use actual RCU here.
+ * Note that module_alloc() on most architectures creates W+X page
+ * mappings which won't be cleaned up until do_free_init() runs. Any
+ * code such as mark_rodata_ro() which depends on those mappings to
+ * be cleaned up needs to sync with the queued work - ie
+ * rcu_barrier_sched()
*/
call_rcu_sched(&freeinit->rcu, do_free_init);
mutex_unlock(&module_mutex);
diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c
index 6be6c575b6cd..2d4ff5353ded 100644
--- a/kernel/sched/autogroup.c
+++ b/kernel/sched/autogroup.c
@@ -2,6 +2,7 @@
/*
* Auto-group scheduling implementation:
*/
+#include <linux/nospec.h>
#include "sched.h"
unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
@@ -209,7 +210,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
static unsigned long next = INITIAL_JIFFIES;
struct autogroup *ag;
unsigned long shares;
- int err;
+ int err, idx;
if (nice < MIN_NICE || nice > MAX_NICE)
return -EINVAL;
@@ -227,7 +228,9 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
next = HZ / 10 + jiffies;
ag = autogroup_task_get(p);
- shares = scale_load(sched_prio_to_weight[nice + 20]);
+
+ idx = array_index_nospec(nice + 20, 40);
+ shares = scale_load(sched_prio_to_weight[idx]);
down_write(&ag->lock);
err = sched_group_set_shares(ag->tg, shares);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5e10aaeebfcc..092f7c4de903 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7,6 +7,9 @@
*/
#include "sched.h"
+#include <linux/kthread.h>
+#include <linux/nospec.h>
+
#include <asm/switch_to.h>
#include <asm/tlb.h>
@@ -2718,20 +2721,28 @@ static struct rq *finish_task_switch(struct task_struct *prev)
membarrier_mm_sync_core_before_usermode(mm);
mmdrop(mm);
}
- if (unlikely(prev_state == TASK_DEAD)) {
- if (prev->sched_class->task_dead)
- prev->sched_class->task_dead(prev);
+ if (unlikely(prev_state & (TASK_DEAD|TASK_PARKED))) {
+ switch (prev_state) {
+ case TASK_DEAD:
+ if (prev->sched_class->task_dead)
+ prev->sched_class->task_dead(prev);
- /*
- * Remove function-return probe instances associated with this
- * task and put them back on the free list.
- */
- kprobe_flush_task(prev);
+ /*
+ * Remove function-return probe instances associated with this
+ * task and put them back on the free list.
+ */
+ kprobe_flush_task(prev);
+
+ /* Task is done with its stack. */
+ put_task_stack(prev);
- /* Task is done with its stack. */
- put_task_stack(prev);
+ put_task_struct(prev);
+ break;
- put_task_struct(prev);
+ case TASK_PARKED:
+ kthread_park_complete(prev);
+ break;
+ }
}
tick_nohz_task_switch();
@@ -3498,23 +3509,8 @@ static void __sched notrace __schedule(bool preempt)
void __noreturn do_task_dead(void)
{
- /*
- * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
- * when the following two conditions become true.
- * - There is race condition of mmap_sem (It is acquired by
- * exit_mm()), and
- * - SMI occurs before setting TASK_RUNINNG.
- * (or hypervisor of virtual machine switches to other guest)
- * As a result, we may become TASK_RUNNING after becoming TASK_DEAD
- *
- * To avoid it, we have to wait for releasing tsk->pi_lock which
- * is held by try_to_wake_up()
- */
- raw_spin_lock_irq(&current->pi_lock);
- raw_spin_unlock_irq(&current->pi_lock);
-
/* Causes final put_task_struct in finish_task_switch(): */
- __set_current_state(TASK_DEAD);
+ set_special_state(TASK_DEAD);
/* Tell freezer to ignore us: */
current->flags |= PF_NOFREEZE;
@@ -6928,11 +6924,15 @@ static int cpu_weight_nice_write_s64(struct cgroup_subsys_state *css,
struct cftype *cft, s64 nice)
{
unsigned long weight;
+ int idx;
if (nice < MIN_NICE || nice > MAX_NICE)
return -ERANGE;
- weight = sched_prio_to_weight[NICE_TO_PRIO(nice) - MAX_RT_PRIO];
+ idx = NICE_TO_PRIO(nice) - MAX_RT_PRIO;
+ idx = array_index_nospec(idx, 40);
+ weight = sched_prio_to_weight[idx];
+
return sched_group_set_shares(css_tg(css), scale_load(weight));
}
#endif
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index e7b3008b85bb..1356afd1eeb6 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1117,7 +1117,7 @@ extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
* should be larger than 2^(64 - 20 - 8), which is more than 64 seconds.
* So, overflow is not an issue here.
*/
-u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se)
+static u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se)
{
u64 u_inact = rq->dl.this_bw - rq->dl.running_bw; /* Utot - Uact */
u64 u_act;
@@ -2731,8 +2731,6 @@ bool dl_cpu_busy(unsigned int cpu)
#endif
#ifdef CONFIG_SCHED_DEBUG
-extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
-
void print_dl_stats(struct seq_file *m, int cpu)
{
print_dl_rq(m, cpu, &cpu_rq(cpu)->dl);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 54dc31e7ab9b..79f574dba096 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1854,7 +1854,6 @@ static int task_numa_migrate(struct task_struct *p)
static void numa_migrate_preferred(struct task_struct *p)
{
unsigned long interval = HZ;
- unsigned long numa_migrate_retry;
/* This task has no NUMA fault statistics yet */
if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults))
@@ -1862,18 +1861,7 @@ static void numa_migrate_preferred(struct task_struct *p)
/* Periodically retry migrating the task to the preferred node */
interval = min(interval, msecs_to_jiffies(p->numa_scan_period) / 16);
- numa_migrate_retry = jiffies + interval;
-
- /*
- * Check that the new retry threshold is after the current one. If
- * the retry is in the future, it implies that wake_affine has
- * temporarily asked NUMA balancing to backoff from placement.
- */
- if (numa_migrate_retry > p->numa_migrate_retry)
- return;
-
- /* Safe to try placing the task on the preferred node */
- p->numa_migrate_retry = numa_migrate_retry;
+ p->numa_migrate_retry = jiffies + interval;
/* Success if task is already running on preferred CPU */
if (task_node(p) == p->numa_preferred_nid)
@@ -5922,48 +5910,6 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
return this_eff_load < prev_eff_load ? this_cpu : nr_cpumask_bits;
}
-#ifdef CONFIG_NUMA_BALANCING
-static void
-update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target)
-{
- unsigned long interval;
-
- if (!static_branch_likely(&sched_numa_balancing))
- return;
-
- /* If balancing has no preference then continue gathering data */
- if (p->numa_preferred_nid == -1)
- return;
-
- /*
- * If the wakeup is not affecting locality then it is neutral from
- * the perspective of NUMA balacing so continue gathering data.
- */
- if (cpu_to_node(prev_cpu) == cpu_to_node(target))
- return;
-
- /*
- * Temporarily prevent NUMA balancing trying to place waker/wakee after
- * wakee has been moved by wake_affine. This will potentially allow
- * related tasks to converge and update their data placement. The
- * 4 * numa_scan_period is to allow the two-pass filter to migrate
- * hot data to the wakers node.
- */
- interval = max(sysctl_numa_balancing_scan_delay,
- p->numa_scan_period << 2);
- p->numa_migrate_retry = jiffies + msecs_to_jiffies(interval);
-
- interval = max(sysctl_numa_balancing_scan_delay,
- current->numa_scan_period << 2);
- current->numa_migrate_retry = jiffies + msecs_to_jiffies(interval);
-}
-#else
-static void
-update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target)
-{
-}
-#endif
-
static int wake_affine(struct sched_domain *sd, struct task_struct *p,
int this_cpu, int prev_cpu, int sync)
{
@@ -5979,7 +5925,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
if (target == nr_cpumask_bits)
return prev_cpu;
- update_wa_numa_placement(p, prev_cpu, target);
schedstat_inc(sd->ttwu_move_affine);
schedstat_inc(p->se.statistics.nr_wakeups_affine);
return target;
@@ -9847,6 +9792,7 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf)
if (curr_cost > this_rq->max_idle_balance_cost)
this_rq->max_idle_balance_cost = curr_cost;
+out:
/*
* While browsing the domains, we released the rq lock, a task could
* have been enqueued in the meantime. Since we're not going idle,
@@ -9855,7 +9801,6 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf)
if (this_rq->cfs.h_nr_running && !pulled_task)
pulled_task = 1;
-out:
/* Move the next balance forward */
if (time_after(this_rq->next_balance, next_balance))
this_rq->next_balance = next_balance;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 7aef6b4e885a..ef3c4e6f5345 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2701,8 +2701,6 @@ int sched_rr_handler(struct ctl_table *table, int write,
}
#ifdef CONFIG_SCHED_DEBUG
-extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
-
void print_rt_stats(struct seq_file *m, int cpu)
{
rt_rq_iter_t iter;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 15750c222ca2..1f0a4bc6a39d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2025,8 +2025,9 @@ extern bool sched_debug_enabled;
extern void print_cfs_stats(struct seq_file *m, int cpu);
extern void print_rt_stats(struct seq_file *m, int cpu);
extern void print_dl_stats(struct seq_file *m, int cpu);
-extern void
-print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
+extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
+extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
+extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
#ifdef CONFIG_NUMA_BALANCING
extern void
show_numa_stats(struct task_struct *p, struct seq_file *m);
diff --git a/kernel/signal.c b/kernel/signal.c
index d4ccea599692..9c33163a6165 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1961,14 +1961,27 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
return;
}
+ set_special_state(TASK_TRACED);
+
/*
* We're committing to trapping. TRACED should be visible before
* TRAPPING is cleared; otherwise, the tracer might fail do_wait().
* Also, transition to TRACED and updates to ->jobctl should be
* atomic with respect to siglock and should be done after the arch
* hook as siglock is released and regrabbed across it.
+ *
+ * TRACER TRACEE
+ *
+ * ptrace_attach()
+ * [L] wait_on_bit(JOBCTL_TRAPPING) [S] set_special_state(TRACED)
+ * do_wait()
+ * set_current_state() smp_wmb();
+ * ptrace_do_wait()
+ * wait_task_stopped()
+ * task_stopped_code()
+ * [L] task_is_traced() [S] task_clear_jobctl_trapping();
*/
- set_current_state(TASK_TRACED);
+ smp_wmb();
current->last_siginfo = info;
current->exit_code = exit_code;
@@ -2176,7 +2189,7 @@ static bool do_signal_stop(int signr)
if (task_participate_group_stop(current))
notify = CLD_STOPPED;
- __set_current_state(TASK_STOPPED);
+ set_special_state(TASK_STOPPED);
spin_unlock_irq(&current->sighand->siglock);
/*
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index b7591261652d..64c0291b579c 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -21,6 +21,7 @@
#include <linux/smpboot.h>
#include <linux/atomic.h>
#include <linux/nmi.h>
+#include <linux/sched/wake_q.h>
/*
* Structure to determine completion condition and record errors. May
@@ -65,27 +66,31 @@ static void cpu_stop_signal_done(struct cpu_stop_done *done)
}
static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
- struct cpu_stop_work *work)
+ struct cpu_stop_work *work,
+ struct wake_q_head *wakeq)
{
list_add_tail(&work->list, &stopper->works);
- wake_up_process(stopper->thread);
+ wake_q_add(wakeq, stopper->thread);
}
/* queue @work to @stopper. if offline, @work is completed immediately */
static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
{
struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
+ DEFINE_WAKE_Q(wakeq);
unsigned long flags;
bool enabled;
spin_lock_irqsave(&stopper->lock, flags);
enabled = stopper->enabled;
if (enabled)
- __cpu_stop_queue_work(stopper, work);
+ __cpu_stop_queue_work(stopper, work, &wakeq);
else if (work->done)
cpu_stop_signal_done(work->done);
spin_unlock_irqrestore(&stopper->lock, flags);
+ wake_up_q(&wakeq);
+
return enabled;
}
@@ -229,6 +234,7 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
{
struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
+ DEFINE_WAKE_Q(wakeq);
int err;
retry:
spin_lock_irq(&stopper1->lock);
@@ -252,8 +258,8 @@ retry:
goto unlock;
err = 0;
- __cpu_stop_queue_work(stopper1, work1);
- __cpu_stop_queue_work(stopper2, work2);
+ __cpu_stop_queue_work(stopper1, work1, &wakeq);
+ __cpu_stop_queue_work(stopper2, work2, &wakeq);
unlock:
spin_unlock(&stopper2->lock);
spin_unlock_irq(&stopper1->lock);
@@ -263,6 +269,9 @@ unlock:
cpu_relax();
goto retry;
}
+
+ wake_up_q(&wakeq);
+
return err;
}
/**
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index b398c2ea69b2..aa2094d5dd27 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -612,6 +612,14 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
now = ktime_get();
/* Find all expired events */
for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
+ /*
+ * Required for !SMP because for_each_cpu() reports
+ * unconditionally CPU0 as set on UP kernels.
+ */
+ if (!IS_ENABLED(CONFIG_SMP) &&
+ cpumask_empty(tick_broadcast_oneshot_mask))
+ break;
+
td = &per_cpu(tick_cpu_device, cpu);
if (td->evtdev->next_event <= now) {
cpumask_set_cpu(cpu, tmpmask);