summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/RCU/checklist.txt6
-rw-r--r--Documentation/RCU/stallwarn.txt16
-rw-r--r--Documentation/RCU/whatisRCU.txt9
-rw-r--r--arch/alpha/kernel/process.c6
-rw-r--r--arch/alpha/kernel/smp.c1
-rw-r--r--arch/cris/kernel/process.c3
-rw-r--r--arch/frv/kernel/process.c3
-rw-r--r--arch/h8300/kernel/process.c3
-rw-r--r--arch/ia64/kernel/process.c3
-rw-r--r--arch/m32r/kernel/process.c3
-rw-r--r--arch/m68k/kernel/process.c3
-rw-r--r--arch/mn10300/kernel/process.c3
-rw-r--r--arch/parisc/kernel/process.c3
-rw-r--r--arch/score/kernel/process.c4
-rw-r--r--arch/x86/kernel/cpuid.c5
-rw-r--r--arch/x86/kernel/msr.c5
-rw-r--r--arch/xtensa/kernel/process.c3
-rw-r--r--include/linux/interrupt.h2
-rw-r--r--include/linux/rcupdate.h6
-rw-r--r--kernel/rcupdate.c4
-rw-r--r--kernel/rcutiny.c33
-rw-r--r--kernel/rcutiny_plugin.h10
-rw-r--r--kernel/rcutorture.c159
-rw-r--r--kernel/rcutree.c166
-rw-r--r--kernel/rcutree.h3
-rw-r--r--kernel/rcutree_plugin.h47
-rw-r--r--kernel/rcutree_trace.c4
-rw-r--r--kernel/sched/core.c41
-rw-r--r--kernel/time/tick-sched.c3
-rw-r--r--lib/Kconfig.debug14
-rw-r--r--mm/kmemleak.c6
31 files changed, 316 insertions, 261 deletions
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index fc103d7a0474..cdb20d41a44a 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -310,6 +310,12 @@ over a rather long period of time, but improvements are always welcome!
code under the influence of preempt_disable(), you instead
need to use synchronize_irq() or synchronize_sched().
+ This same limitation also applies to synchronize_rcu_bh()
+ and synchronize_srcu(), as well as to the asynchronous and
+ expedited forms of the three primitives, namely call_rcu(),
+ call_rcu_bh(), call_srcu(), synchronize_rcu_expedited(),
+ synchronize_rcu_bh_expedited(), and synchronize_srcu_expedited().
+
12. Any lock acquired by an RCU callback must be acquired elsewhere
with softirq disabled, e.g., via spin_lock_irqsave(),
spin_lock_bh(), etc. Failing to disable irq on a given
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
index 523364e4e1f1..1927151b386b 100644
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt
@@ -99,7 +99,7 @@ In kernels with CONFIG_RCU_FAST_NO_HZ, even more information is
printed:
INFO: rcu_preempt detected stall on CPU
- 0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 drain=0 . timer=-1
+ 0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 drain=0 . timer not pending
(t=65000 jiffies)
The "(64628 ticks this GP)" indicates that this CPU has taken more
@@ -116,13 +116,13 @@ number between the two "/"s is the value of the nesting, which will
be a small positive number if in the idle loop and a very large positive
number (as shown above) otherwise.
-For CONFIG_RCU_FAST_NO_HZ kernels, the "drain=0" indicates that the
-CPU is not in the process of trying to force itself into dyntick-idle
-state, the "." indicates that the CPU has not given up forcing RCU
-into dyntick-idle mode (it would be "H" otherwise), and the "timer=-1"
-indicates that the CPU has not recented forced RCU into dyntick-idle
-mode (it would otherwise indicate the number of microseconds remaining
-in this forced state).
+For CONFIG_RCU_FAST_NO_HZ kernels, the "drain=0" indicates that the CPU is
+not in the process of trying to force itself into dyntick-idle state, the
+"." indicates that the CPU has not given up forcing RCU into dyntick-idle
+mode (it would be "H" otherwise), and the "timer not pending" indicates
+that the CPU has not recently forced RCU into dyntick-idle mode (it
+would otherwise indicate the number of microseconds remaining in this
+forced state).
Multiple Warnings From One Stall
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 69ee188515e7..bf0f6de2aa00 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -873,7 +873,7 @@ d. Do you need to treat NMI handlers, hardirq handlers,
and code segments with preemption disabled (whether
via preempt_disable(), local_irq_save(), local_bh_disable(),
or some other mechanism) as if they were explicit RCU readers?
- If so, you need RCU-sched.
+ If so, RCU-sched is the only choice that will work for you.
e. Do you need RCU grace periods to complete even in the face
of softirq monopolization of one or more of the CPUs? For
@@ -884,7 +884,12 @@ f. Is your workload too update-intensive for normal use of
RCU, but inappropriate for other synchronization mechanisms?
If so, consider SLAB_DESTROY_BY_RCU. But please be careful!
-g. Otherwise, use RCU.
+g. Do you need read-side critical sections that are respected
+ even though they are in the middle of the idle loop, during
+ user-mode execution, or on an offlined CPU? If so, SRCU is the
+ only choice that will work for you.
+
+h. Otherwise, use RCU.
Of course, this all assumes that you have determined that RCU is in fact
the right tool for your job.
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index d6fde98b74b3..83638aa096d5 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -28,6 +28,7 @@
#include <linux/tty.h>
#include <linux/console.h>
#include <linux/slab.h>
+#include <linux/rcupdate.h>
#include <asm/reg.h>
#include <asm/uaccess.h>
@@ -54,9 +55,12 @@ cpu_idle(void)
/* FIXME -- EV6 and LCA45 know how to power down
the CPU. */
+ rcu_idle_enter();
while (!need_resched())
cpu_relax();
- schedule();
+
+ rcu_idle_exit();
+ schedule_preempt_disabled();
}
}
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 35ddc02bfa4a..a41ad90a97a6 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -166,6 +166,7 @@ smp_callin(void)
DBGS(("smp_callin: commencing CPU %d current %p active_mm %p\n",
cpuid, current, current->active_mm));
+ preempt_disable();
/* Do nothing. */
cpu_idle();
}
diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c
index 66fd01728790..7f65be6f7f17 100644
--- a/arch/cris/kernel/process.c
+++ b/arch/cris/kernel/process.c
@@ -25,6 +25,7 @@
#include <linux/elfcore.h>
#include <linux/mqueue.h>
#include <linux/reboot.h>
+#include <linux/rcupdate.h>
//#define DEBUG
@@ -74,6 +75,7 @@ void cpu_idle (void)
{
/* endless idle loop with no priority at all */
while (1) {
+ rcu_idle_enter();
while (!need_resched()) {
void (*idle)(void);
/*
@@ -86,6 +88,7 @@ void cpu_idle (void)
idle = default_idle;
idle();
}
+ rcu_idle_exit();
schedule_preempt_disabled();
}
}
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c
index ff95f50efea5..2eb7fa5bf9d8 100644
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c
@@ -25,6 +25,7 @@
#include <linux/reboot.h>
#include <linux/interrupt.h>
#include <linux/pagemap.h>
+#include <linux/rcupdate.h>
#include <asm/asm-offsets.h>
#include <asm/uaccess.h>
@@ -69,12 +70,14 @@ void cpu_idle(void)
{
/* endless idle loop with no priority at all */
while (1) {
+ rcu_idle_enter();
while (!need_resched()) {
check_pgt_cache();
if (!frv_dma_inprogress && idle)
idle();
}
+ rcu_idle_exit();
schedule_preempt_disabled();
}
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index 0e9c315be104..f153ed1a4c08 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -36,6 +36,7 @@
#include <linux/reboot.h>
#include <linux/fs.h>
#include <linux/slab.h>
+#include <linux/rcupdate.h>
#include <asm/uaccess.h>
#include <asm/traps.h>
@@ -78,8 +79,10 @@ void (*idle)(void) = default_idle;
void cpu_idle(void)
{
while (1) {
+ rcu_idle_enter();
while (!need_resched())
idle();
+ rcu_idle_exit();
schedule_preempt_disabled();
}
}
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index dd6fc1449741..3e316ec0b835 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -29,6 +29,7 @@
#include <linux/kdebug.h>
#include <linux/utsname.h>
#include <linux/tracehook.h>
+#include <linux/rcupdate.h>
#include <asm/cpu.h>
#include <asm/delay.h>
@@ -279,6 +280,7 @@ cpu_idle (void)
/* endless idle loop with no priority at all */
while (1) {
+ rcu_idle_enter();
if (can_do_pal_halt) {
current_thread_info()->status &= ~TS_POLLING;
/*
@@ -309,6 +311,7 @@ cpu_idle (void)
normal_xtp();
#endif
}
+ rcu_idle_exit();
schedule_preempt_disabled();
check_pgt_cache();
if (cpu_is_offline(cpu))
diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c
index 3a4a32b27208..384e63f3a4c4 100644
--- a/arch/m32r/kernel/process.c
+++ b/arch/m32r/kernel/process.c
@@ -26,6 +26,7 @@
#include <linux/ptrace.h>
#include <linux/unistd.h>
#include <linux/hardirq.h>
+#include <linux/rcupdate.h>
#include <asm/io.h>
#include <asm/uaccess.h>
@@ -82,6 +83,7 @@ void cpu_idle (void)
{
/* endless idle loop with no priority at all */
while (1) {
+ rcu_idle_enter();
while (!need_resched()) {
void (*idle)(void) = pm_idle;
@@ -90,6 +92,7 @@ void cpu_idle (void)
idle();
}
+ rcu_idle_exit();
schedule_preempt_disabled();
}
}
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index c488e3cfab53..ac2892e49c7c 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -25,6 +25,7 @@
#include <linux/reboot.h>
#include <linux/init_task.h>
#include <linux/mqueue.h>
+#include <linux/rcupdate.h>
#include <asm/uaccess.h>
#include <asm/traps.h>
@@ -75,8 +76,10 @@ void cpu_idle(void)
{
/* endless idle loop with no priority at all */
while (1) {
+ rcu_idle_enter();
while (!need_resched())
idle();
+ rcu_idle_exit();
schedule_preempt_disabled();
}
}
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c
index 7dab0cd36466..e9cceba193b6 100644
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c
@@ -25,6 +25,7 @@
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/slab.h>
+#include <linux/rcupdate.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/io.h>
@@ -107,6 +108,7 @@ void cpu_idle(void)
{
/* endless idle loop with no priority at all */
for (;;) {
+ rcu_idle_enter();
while (!need_resched()) {
void (*idle)(void);
@@ -121,6 +123,7 @@ void cpu_idle(void)
}
idle();
}
+ rcu_idle_exit();
schedule_preempt_disabled();
}
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 2c05a9292a81..8c6b6b6561f0 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -48,6 +48,7 @@
#include <linux/unistd.h>
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
+#include <linux/rcupdate.h>
#include <asm/io.h>
#include <asm/asm-offsets.h>
@@ -69,8 +70,10 @@ void cpu_idle(void)
/* endless idle loop with no priority at all */
while (1) {
+ rcu_idle_enter();
while (!need_resched())
barrier();
+ rcu_idle_exit();
schedule_preempt_disabled();
check_pgt_cache();
}
diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c
index 2707023c7563..637970cfd3f4 100644
--- a/arch/score/kernel/process.c
+++ b/arch/score/kernel/process.c
@@ -27,6 +27,7 @@
#include <linux/reboot.h>
#include <linux/elfcore.h>
#include <linux/pm.h>
+#include <linux/rcupdate.h>
void (*pm_power_off)(void);
EXPORT_SYMBOL(pm_power_off);
@@ -50,9 +51,10 @@ void __noreturn cpu_idle(void)
{
/* endless idle loop with no priority at all */
while (1) {
+ rcu_idle_enter();
while (!need_resched())
barrier();
-
+ rcu_idle_exit();
schedule_preempt_disabled();
}
}
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 39472dd2323f..60c78917190c 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -199,12 +199,14 @@ static int __init cpuid_init(void)
goto out_chrdev;
}
cpuid_class->devnode = cpuid_devnode;
+ get_online_cpus();
for_each_online_cpu(i) {
err = cpuid_device_create(i);
if (err != 0)
goto out_class;
}
register_hotcpu_notifier(&cpuid_class_cpu_notifier);
+ put_online_cpus();
err = 0;
goto out;
@@ -214,6 +216,7 @@ out_class:
for_each_online_cpu(i) {
cpuid_device_destroy(i);
}
+ put_online_cpus();
class_destroy(cpuid_class);
out_chrdev:
__unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
@@ -225,11 +228,13 @@ static void __exit cpuid_exit(void)
{
int cpu = 0;
+ get_online_cpus();
for_each_online_cpu(cpu)
cpuid_device_destroy(cpu);
class_destroy(cpuid_class);
__unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
unregister_hotcpu_notifier(&cpuid_class_cpu_notifier);
+ put_online_cpus();
}
module_init(cpuid_init);
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index eb113693f043..a7c5661f8496 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -257,12 +257,14 @@ static int __init msr_init(void)
goto out_chrdev;
}
msr_class->devnode = msr_devnode;
+ get_online_cpus();
for_each_online_cpu(i) {
err = msr_device_create(i);
if (err != 0)
goto out_class;
}
register_hotcpu_notifier(&msr_class_cpu_notifier);
+ put_online_cpus();
err = 0;
goto out;
@@ -271,6 +273,7 @@ out_class:
i = 0;
for_each_online_cpu(i)
msr_device_destroy(i);
+ put_online_cpus();
class_destroy(msr_class);
out_chrdev:
__unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
@@ -281,11 +284,13 @@ out:
static void __exit msr_exit(void)
{
int cpu = 0;
+ get_online_cpus();
for_each_online_cpu(cpu)
msr_device_destroy(cpu);
class_destroy(msr_class);
__unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
unregister_hotcpu_notifier(&msr_class_cpu_notifier);
+ put_online_cpus();
}
module_init(msr_init);
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index 2c8d6a3d250a..bc44311aa18c 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -31,6 +31,7 @@
#include <linux/mqueue.h>
#include <linux/fs.h>
#include <linux/slab.h>
+#include <linux/rcupdate.h>
#include <asm/pgtable.h>
#include <asm/uaccess.h>
@@ -110,8 +111,10 @@ void cpu_idle(void)
/* endless idle loop with no priority at all */
while (1) {
+ rcu_idle_enter();
while (!need_resched())
platform_idle();
+ rcu_idle_exit();
schedule_preempt_disabled();
}
}
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index c5f856a040b9..5e4e6170f43a 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -430,6 +430,8 @@ enum
NR_SOFTIRQS
};
+#define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ))
+
/* map softirq index to softirq name. update 'softirq_to_name' in
* kernel/softirq.c when adding a new softirq.
*/
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 115ead2b5155..0fbbd52e01f9 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -210,14 +210,12 @@ extern void exit_rcu(void);
* to nest RCU_NONIDLE() wrappers, but the nesting level is currently
* quite limited. If deeper nesting is required, it will be necessary
* to adjust DYNTICK_TASK_NESTING_VALUE accordingly.
- *
- * This macro may be used from process-level code only.
*/
#define RCU_NONIDLE(a) \
do { \
- rcu_idle_exit(); \
+ rcu_irq_enter(); \
do { a; } while (0); \
- rcu_idle_enter(); \
+ rcu_irq_exit(); \
} while (0)
/*
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 4e6a61b15e86..29ca1c6da594 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -45,6 +45,7 @@
#include <linux/mutex.h>
#include <linux/export.h>
#include <linux/hardirq.h>
+#include <linux/delay.h>
#define CREATE_TRACE_POINTS
#include <trace/events/rcu.h>
@@ -81,6 +82,9 @@ void __rcu_read_unlock(void)
} else {
barrier(); /* critical section before exit code. */
t->rcu_read_lock_nesting = INT_MIN;
+#ifdef CONFIG_PROVE_RCU_DELAY
+ udelay(10); /* Make preemption more probable. */
+#endif /* #ifdef CONFIG_PROVE_RCU_DELAY */
barrier(); /* assign before ->rcu_read_unlock_special load */
if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
rcu_read_unlock_special(t);
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 547b1fe5b052..e4c6a598d6f7 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -56,25 +56,28 @@ static void __call_rcu(struct rcu_head *head,
static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */
-static void rcu_idle_enter_common(long long oldval)
+static void rcu_idle_enter_common(long long newval)
{
- if (rcu_dynticks_nesting) {
+ if (newval) {
RCU_TRACE(trace_rcu_dyntick("--=",
- oldval, rcu_dynticks_nesting));
+ rcu_dynticks_nesting, newval));
+ rcu_dynticks_nesting = newval;
return;
}
- RCU_TRACE(trace_rcu_dyntick("Start", oldval, rcu_dynticks_nesting));
+ RCU_TRACE(trace_rcu_dyntick("Start", rcu_dynticks_nesting, newval));
if (!is_idle_task(current)) {
struct task_struct *idle = idle_task(smp_processor_id());
RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task",
- oldval, rcu_dynticks_nesting));
+ rcu_dynticks_nesting, newval));
ftrace_dump(DUMP_ALL);
WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
current->pid, current->comm,
idle->pid, idle->comm); /* must be idle task! */
}
rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */
+ barrier();
+ rcu_dynticks_nesting = newval;
}
/*
@@ -84,17 +87,16 @@ static void rcu_idle_enter_common(long long oldval)
void rcu_idle_enter(void)
{
unsigned long flags;
- long long oldval;
+ long long newval;
local_irq_save(flags);
- oldval = rcu_dynticks_nesting;
WARN_ON_ONCE((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0);
if ((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) ==
DYNTICK_TASK_NEST_VALUE)
- rcu_dynticks_nesting = 0;
+ newval = 0;
else
- rcu_dynticks_nesting -= DYNTICK_TASK_NEST_VALUE;
- rcu_idle_enter_common(oldval);
+ newval = rcu_dynticks_nesting - DYNTICK_TASK_NEST_VALUE;
+ rcu_idle_enter_common(newval);
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(rcu_idle_enter);
@@ -105,15 +107,15 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter);
void rcu_irq_exit(void)
{
unsigned long flags;
- long long oldval;
+ long long newval;
local_irq_save(flags);
- oldval = rcu_dynticks_nesting;
- rcu_dynticks_nesting--;
- WARN_ON_ONCE(rcu_dynticks_nesting < 0);
- rcu_idle_enter_common(oldval);
+ newval = rcu_dynticks_nesting - 1;
+ WARN_ON_ONCE(newval < 0);
+ rcu_idle_enter_common(newval);
local_irq_restore(flags);
}
+EXPORT_SYMBOL_GPL(rcu_irq_exit);
/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcutree.c. */
static void rcu_idle_exit_common(long long oldval)
@@ -171,6 +173,7 @@ void rcu_irq_enter(void)
rcu_idle_exit_common(oldval);
local_irq_restore(flags);
}
+EXPORT_SYMBOL_GPL(rcu_irq_enter);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index 918fd1e8509c..3d0190282204 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -278,7 +278,7 @@ static int rcu_boost(void)
rcu_preempt_ctrlblk.exp_tasks == NULL)
return 0; /* Nothing to boost. */
- raw_local_irq_save(flags);
+ local_irq_save(flags);
/*
* Recheck with irqs disabled: all tasks in need of boosting
@@ -287,7 +287,7 @@ static int rcu_boost(void)
*/
if (rcu_preempt_ctrlblk.boost_tasks == NULL &&
rcu_preempt_ctrlblk.exp_tasks == NULL) {
- raw_local_irq_restore(flags);
+ local_irq_restore(flags);
return 0;
}
@@ -317,7 +317,7 @@ static int rcu_boost(void)
t = container_of(tb, struct task_struct, rcu_node_entry);
rt_mutex_init_proxy_locked(&mtx, t);
t->rcu_boost_mutex = &mtx;
- raw_local_irq_restore(flags);
+ local_irq_restore(flags);
rt_mutex_lock(&mtx);
rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
@@ -991,9 +991,9 @@ static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n)
{
unsigned long flags;
- raw_local_irq_save(flags);
+ local_irq_save(flags);
rcp->qlen -= n;
- raw_local_irq_restore(flags);
+ local_irq_restore(flags);
}
/*
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 25b15033c61f..aaa7b9f3532a 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -53,10 +53,11 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@fre
static int nreaders = -1; /* # reader threads, defaults to 2*ncpus */
static int nfakewriters = 4; /* # fake writer threads */
-static int stat_interval; /* Interval between stats, in seconds. */
- /* Defaults to "only at end of test". */
+static int stat_interval = 60; /* Interval between stats, in seconds. */
+ /* Zero means "only at end of test". */
static bool verbose; /* Print more debug info. */
-static bool test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */
+static bool test_no_idle_hz = true;
+ /* Test RCU support for tickless idle CPUs. */
static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/
static int stutter = 5; /* Start/stop testing interval (in sec) */
static int irqreader = 1; /* RCU readers from irq (timers). */
@@ -119,11 +120,11 @@ MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)");
#define TORTURE_FLAG "-torture:"
#define PRINTK_STRING(s) \
- do { printk(KERN_ALERT "%s" TORTURE_FLAG s "\n", torture_type); } while (0)
+ do { pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0)
#define VERBOSE_PRINTK_STRING(s) \
- do { if (verbose) printk(KERN_ALERT "%s" TORTURE_FLAG s "\n", torture_type); } while (0)
+ do { if (verbose) pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0)
#define VERBOSE_PRINTK_ERRSTRING(s) \
- do { if (verbose) printk(KERN_ALERT "%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0)
+ do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0)
static char printk_buf[4096];
@@ -176,8 +177,14 @@ static long n_rcu_torture_boosts;
static long n_rcu_torture_timers;
static long n_offline_attempts;
static long n_offline_successes;
+static unsigned long sum_offline;
+static int min_offline = -1;
+static int max_offline;
static long n_online_attempts;
static long n_online_successes;
+static unsigned long sum_online;
+static int min_online = -1;
+static int max_online;
static long n_barrier_attempts;
static long n_barrier_successes;
static struct list_head rcu_torture_removed;
@@ -235,7 +242,7 @@ rcutorture_shutdown_notify(struct notifier_block *unused1,
if (fullstop == FULLSTOP_DONTSTOP)
fullstop = FULLSTOP_SHUTDOWN;
else
- printk(KERN_WARNING /* but going down anyway, so... */
+ pr_warn(/* but going down anyway, so... */
"Concurrent 'rmmod rcutorture' and shutdown illegal!\n");
mutex_unlock(&fullstop_mutex);
return NOTIFY_DONE;
@@ -248,7 +255,7 @@ rcutorture_shutdown_notify(struct notifier_block *unused1,
static void rcutorture_shutdown_absorb(char *title)
{
if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) {
- printk(KERN_NOTICE
+ pr_notice(
"rcutorture thread %s parking due to system shutdown\n",
title);
schedule_timeout_uninterruptible(MAX_SCHEDULE_TIMEOUT);
@@ -1214,11 +1221,13 @@ rcu_torture_printk(char *page)
n_rcu_torture_boost_failure,
n_rcu_torture_boosts,
n_rcu_torture_timers);
- cnt += sprintf(&page[cnt], "onoff: %ld/%ld:%ld/%ld ",
- n_online_successes,
- n_online_attempts,
- n_offline_successes,
- n_offline_attempts);
+ cnt += sprintf(&page[cnt],
+ "onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ",
+ n_online_successes, n_online_attempts,
+ n_offline_successes, n_offline_attempts,
+ min_online, max_online,
+ min_offline, max_offline,
+ sum_online, sum_offline, HZ);
cnt += sprintf(&page[cnt], "barrier: %ld/%ld:%ld",
n_barrier_successes,
n_barrier_attempts,
@@ -1267,7 +1276,7 @@ rcu_torture_stats_print(void)
int cnt;
cnt = rcu_torture_printk(printk_buf);
- printk(KERN_ALERT "%s", printk_buf);
+ pr_alert("%s", printk_buf);
}
/*
@@ -1380,20 +1389,20 @@ rcu_torture_stutter(void *arg)
static inline void
rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, char *tag)
{
- printk(KERN_ALERT "%s" TORTURE_FLAG
- "--- %s: nreaders=%d nfakewriters=%d "
- "stat_interval=%d verbose=%d test_no_idle_hz=%d "
- "shuffle_interval=%d stutter=%d irqreader=%d "
- "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d "
- "test_boost=%d/%d test_boost_interval=%d "
- "test_boost_duration=%d shutdown_secs=%d "
- "onoff_interval=%d onoff_holdoff=%d\n",
- torture_type, tag, nrealreaders, nfakewriters,
- stat_interval, verbose, test_no_idle_hz, shuffle_interval,
- stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter,
- test_boost, cur_ops->can_boost,
- test_boost_interval, test_boost_duration, shutdown_secs,
- onoff_interval, onoff_holdoff);
+ pr_alert("%s" TORTURE_FLAG
+ "--- %s: nreaders=%d nfakewriters=%d "
+ "stat_interval=%d verbose=%d test_no_idle_hz=%d "
+ "shuffle_interval=%d stutter=%d irqreader=%d "
+ "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d "
+ "test_boost=%d/%d test_boost_interval=%d "
+ "test_boost_duration=%d shutdown_secs=%d "
+ "onoff_interval=%d onoff_holdoff=%d\n",
+ torture_type, tag, nrealreaders, nfakewriters,
+ stat_interval, verbose, test_no_idle_hz, shuffle_interval,
+ stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter,
+ test_boost, cur_ops->can_boost,
+ test_boost_interval, test_boost_duration, shutdown_secs,
+ onoff_interval, onoff_holdoff);
}
static struct notifier_block rcutorture_shutdown_nb = {
@@ -1460,9 +1469,9 @@ rcu_torture_shutdown(void *arg)
!kthread_should_stop()) {
delta = shutdown_time - jiffies_snap;
if (verbose)
- printk(KERN_ALERT "%s" TORTURE_FLAG
- "rcu_torture_shutdown task: %lu jiffies remaining\n",
- torture_type, delta);
+ pr_alert("%s" TORTURE_FLAG
+ "rcu_torture_shutdown task: %lu jiffies remaining\n",
+ torture_type, delta);
schedule_timeout_interruptible(delta);
jiffies_snap = ACCESS_ONCE(jiffies);
}
@@ -1490,8 +1499,10 @@ static int __cpuinit
rcu_torture_onoff(void *arg)
{
int cpu;
+ unsigned long delta;
int maxcpu = -1;
DEFINE_RCU_RANDOM(rand);
+ unsigned long starttime;
VERBOSE_PRINTK_STRING("rcu_torture_onoff task started");
for_each_online_cpu(cpu)
@@ -1506,29 +1517,51 @@ rcu_torture_onoff(void *arg)
cpu = (rcu_random(&rand) >> 4) % (maxcpu + 1);
if (cpu_online(cpu) && cpu_is_hotpluggable(cpu)) {
if (verbose)
- printk(KERN_ALERT "%s" TORTURE_FLAG
- "rcu_torture_onoff task: offlining %d\n",
- torture_type, cpu);
+ pr_alert("%s" TORTURE_FLAG
+ "rcu_torture_onoff task: offlining %d\n",
+ torture_type, cpu);
+ starttime = jiffies;
n_offline_attempts++;
if (cpu_down(cpu) == 0) {
if (verbose)
- printk(KERN_ALERT "%s" TORTURE_FLAG
- "rcu_torture_onoff task: offlined %d\n",
- torture_type, cpu);
+ pr_alert("%s" TORTURE_FLAG
+ "rcu_torture_onoff task: offlined %d\n",
+ torture_type, cpu);
n_offline_successes++;
+ delta = jiffies - starttime;
+ sum_offline += delta;
+ if (min_offline < 0) {
+ min_offline = delta;
+ max_offline = delta;
+ }
+ if (min_offline > delta)
+ min_offline = delta;
+ if (max_offline < delta)
+ max_offline = delta;
}
} else if (cpu_is_hotpluggable(cpu)) {
if (verbose)
- printk(KERN_ALERT "%s" TORTURE_FLAG
- "rcu_torture_onoff task: onlining %d\n",
- torture_type, cpu);
+ pr_alert("%s" TORTURE_FLAG
+ "rcu_torture_onoff task: onlining %d\n",
+ torture_type, cpu);
+ starttime = jiffies;
n_online_attempts++;
if (cpu_up(cpu) == 0) {
if (verbose)
- printk(KERN_ALERT "%s" TORTURE_FLAG
- "rcu_torture_onoff task: onlined %d\n",
- torture_type, cpu);
+ pr_alert("%s" TORTURE_FLAG
+ "rcu_torture_onoff task: onlined %d\n",
+ torture_type, cpu);
n_online_successes++;
+ delta = jiffies - starttime;
+ sum_online += delta;
+ if (min_online < 0) {
+ min_online = delta;
+ max_online = delta;
+ }
+ if (min_online > delta)
+ min_online = delta;
+ if (max_online < delta)
+ max_online = delta;
}
}
schedule_timeout_interruptible(onoff_interval * HZ);
@@ -1593,14 +1626,14 @@ static int __cpuinit rcu_torture_stall(void *args)
if (!kthread_should_stop()) {
stop_at = get_seconds() + stall_cpu;
/* RCU CPU stall is expected behavior in following code. */
- printk(KERN_ALERT "rcu_torture_stall start.\n");
+ pr_alert("rcu_torture_stall start.\n");
rcu_read_lock();
preempt_disable();
while (ULONG_CMP_LT(get_seconds(), stop_at))
continue; /* Induce RCU CPU stall warning. */
preempt_enable();
rcu_read_unlock();
- printk(KERN_ALERT "rcu_torture_stall end.\n");
+ pr_alert("rcu_torture_stall end.\n");
}
rcutorture_shutdown_absorb("rcu_torture_stall");
while (!kthread_should_stop())
@@ -1716,12 +1749,12 @@ static int rcu_torture_barrier_init(void)
if (n_barrier_cbs == 0)
return 0;
if (cur_ops->call == NULL || cur_ops->cb_barrier == NULL) {
- printk(KERN_ALERT "%s" TORTURE_FLAG
- " Call or barrier ops missing for %s,\n",
- torture_type, cur_ops->name);
- printk(KERN_ALERT "%s" TORTURE_FLAG
- " RCU barrier testing omitted from run.\n",
- torture_type);
+ pr_alert("%s" TORTURE_FLAG
+ " Call or barrier ops missing for %s,\n",
+ torture_type, cur_ops->name);
+ pr_alert("%s" TORTURE_FLAG
+ " RCU barrier testing omitted from run.\n",
+ torture_type);
return 0;
}
atomic_set(&barrier_cbs_count, 0);
@@ -1814,7 +1847,7 @@ rcu_torture_cleanup(void)
mutex_lock(&fullstop_mutex);
rcutorture_record_test_transition();
if (fullstop == FULLSTOP_SHUTDOWN) {
- printk(KERN_WARNING /* but going down anyway, so... */
+ pr_warn(/* but going down anyway, so... */
"Concurrent 'rmmod rcutorture' and shutdown illegal!\n");
mutex_unlock(&fullstop_mutex);
schedule_timeout_uninterruptible(10);
@@ -1938,17 +1971,17 @@ rcu_torture_init(void)
break;
}
if (i == ARRAY_SIZE(torture_ops)) {
- printk(KERN_ALERT "rcu-torture: invalid torture type: \"%s\"\n",
- torture_type);
- printk(KERN_ALERT "rcu-torture types:");
+ pr_alert("rcu-torture: invalid torture type: \"%s\"\n",
+ torture_type);
+ pr_alert("rcu-torture types:");
for (i = 0; i < ARRAY_SIZE(torture_ops); i++)
- printk(KERN_ALERT " %s", torture_ops[i]->name);
- printk(KERN_ALERT "\n");
+ pr_alert(" %s", torture_ops[i]->name);
+ pr_alert("\n");
mutex_unlock(&fullstop_mutex);
return -EINVAL;
}
if (cur_ops->fqs == NULL && fqs_duration != 0) {
- printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n");
+ pr_alert("rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n");
fqs_duration = 0;
}
if (cur_ops->init)
@@ -1996,14 +2029,15 @@ rcu_torture_init(void)
/* Start up the kthreads. */
VERBOSE_PRINTK_STRING("Creating rcu_torture_writer task");
- writer_task = kthread_run(rcu_torture_writer, NULL,
- "rcu_torture_writer");
+ writer_task = kthread_create(rcu_torture_writer, NULL,
+ "rcu_torture_writer");
if (IS_ERR(writer_task)) {
firsterr = PTR_ERR(writer_task);
VERBOSE_PRINTK_ERRSTRING("Failed to create writer");
writer_task = NULL;
goto unwind;
}
+ wake_up_process(writer_task);
fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]),
GFP_KERNEL);
if (fakewriter_tasks == NULL) {
@@ -2118,14 +2152,15 @@ rcu_torture_init(void)
}
if (shutdown_secs > 0) {
shutdown_time = jiffies + shutdown_secs * HZ;
- shutdown_task = kthread_run(rcu_torture_shutdown, NULL,
- "rcu_torture_shutdown");
+ shutdown_task = kthread_create(rcu_torture_shutdown, NULL,
+ "rcu_torture_shutdown");
if (IS_ERR(shutdown_task)) {
firsterr = PTR_ERR(shutdown_task);
VERBOSE_PRINTK_ERRSTRING("Failed to create shutdown");
shutdown_task = NULL;
goto unwind;
}
+ wake_up_process(shutdown_task);
}
i = rcu_torture_onoff_init();
if (i != 0) {
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index b703989148e4..80fd02e5d115 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -323,35 +323,6 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
}
/*
- * If the specified CPU is offline, tell the caller that it is in
- * a quiescent state. Otherwise, whack it with a reschedule IPI.
- * Grace periods can end up waiting on an offline CPU when that
- * CPU is in the process of coming online -- it will be added to the
- * rcu_node bitmasks before it actually makes it online. The same thing
- * can happen while a CPU is in the process of coming online. Because this
- * race is quite rare, we check for it after detecting that the grace
- * period has been delayed rather than checking each and every CPU
- * each and every time we start a new grace period.
- */
-static int rcu_implicit_offline_qs(struct rcu_data *rdp)
-{
- /*
- * If the CPU is offline for more than a jiffy, it is in a quiescent
- * state. We can trust its state not to change because interrupts
- * are disabled. The reason for the jiffy's worth of slack is to
- * handle CPUs initializing on the way up and finding their way
- * to the idle loop on the way down.
- */
- if (cpu_is_offline(rdp->cpu) &&
- ULONG_CMP_LT(rdp->rsp->gp_start + 2, jiffies)) {
- trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl");
- rdp->offline_fqs++;
- return 1;
- }
- return 0;
-}
-
-/*
* rcu_idle_enter_common - inform RCU that current CPU is moving towards idle
*
* If the new value of the ->dynticks_nesting counter now is zero,
@@ -679,7 +650,7 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp)
* Return true if the specified CPU has passed through a quiescent
* state by virtue of being in or having passed through an dynticks
* idle state since the last call to dyntick_save_progress_counter()
- * for this same CPU.
+ * for this same CPU, or by virtue of having been offline.
*/
static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
{
@@ -703,8 +674,26 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
return 1;
}
- /* Go check for the CPU being offline. */
- return rcu_implicit_offline_qs(rdp);
+ /*
+ * Check for the CPU being offline, but only if the grace period
+ * is old enough. We don't need to worry about the CPU changing
+ * state: If we see it offline even once, it has been through a
+ * quiescent state.
+ *
+ * The reason for insisting that the grace period be at least
+ * one jiffy old is that CPUs that are not quite online and that
+ * have just gone offline can still execute RCU read-side critical
+ * sections.
+ */
+ if (ULONG_CMP_GE(rdp->rsp->gp_start + 2, jiffies))
+ return 0; /* Grace period is not old enough. */
+ barrier();
+ if (cpu_is_offline(rdp->cpu)) {
+ trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl");
+ rdp->offline_fqs++;
+ return 1;
+ }
+ return 0;
}
static int jiffies_till_stall_check(void)
@@ -761,14 +750,15 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
rcu_for_each_leaf_node(rsp, rnp) {
raw_spin_lock_irqsave(&rnp->lock, flags);
ndetected += rcu_print_task_stall(rnp);
+ if (rnp->qsmask != 0) {
+ for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
+ if (rnp->qsmask & (1UL << cpu)) {
+ print_cpu_stall_info(rsp,
+ rnp->grplo + cpu);
+ ndetected++;
+ }
+ }
raw_spin_unlock_irqrestore(&rnp->lock, flags);
- if (rnp->qsmask == 0)
- continue;
- for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
- if (rnp->qsmask & (1UL << cpu)) {
- print_cpu_stall_info(rsp, rnp->grplo + cpu);
- ndetected++;
- }
}
/*
@@ -833,7 +823,8 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
j = ACCESS_ONCE(jiffies);
js = ACCESS_ONCE(rsp->jiffies_stall);
rnp = rdp->mynode;
- if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) {
+ if (rcu_gp_in_progress(rsp) &&
+ (ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) {
/* We haven't checked in, so go dump stack. */
print_cpu_stall(rsp);
@@ -1486,17 +1477,6 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
int i;
struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
- /*
- * If there is an rcu_barrier() operation in progress, then
- * only the task doing that operation is permitted to adopt
- * callbacks. To do otherwise breaks rcu_barrier() and friends
- * by causing them to fail to wait for the callbacks in the
- * orphanage.
- */
- if (rsp->rcu_barrier_in_progress &&
- rsp->rcu_barrier_in_progress != current)
- return;
-
/* Do the accounting first. */
rdp->qlen_lazy += rsp->qlen_lazy;
rdp->qlen += rsp->qlen;
@@ -1551,9 +1531,8 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
* The CPU has been completely removed, and some other CPU is reporting
* this fact from process context. Do the remainder of the cleanup,
* including orphaning the outgoing CPU's RCU callbacks, and also
- * adopting them, if there is no _rcu_barrier() instance running.
- * There can only be one CPU hotplug operation at a time, so no other
- * CPU can be attempting to update rcu_cpu_kthread_task.
+ * adopting them. There can only be one CPU hotplug operation at a time,
+ * so no other CPU can be attempting to update rcu_cpu_kthread_task.
*/
static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
{
@@ -1611,14 +1590,13 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL,
"rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n",
cpu, rdp->qlen, rdp->nxtlist);
+ init_callback_list(rdp);
+ /* Disallow further callbacks on this CPU. */
+ rdp->nxttail[RCU_NEXT_TAIL] = NULL;
}
#else /* #ifdef CONFIG_HOTPLUG_CPU */
-static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
-{
-}
-
static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
{
}
@@ -1987,8 +1965,6 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
head->func = func;
head->next = NULL;
- smp_mb(); /* Ensure RCU update seen before callback registry. */
-
/*
* Opportunistically note grace-period endings and beginnings.
* Note that we might see a beginning right after we see an
@@ -1999,6 +1975,12 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
rdp = this_cpu_ptr(rsp->rda);
/* Add the callback to our list. */
+ if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL)) {
+ /* _call_rcu() is illegal on offline CPU; leak the callback. */
+ WARN_ON_ONCE(1);
+ local_irq_restore(flags);
+ return;
+ }
ACCESS_ONCE(rdp->qlen)++;
if (lazy)
rdp->qlen_lazy++;
@@ -2367,13 +2349,10 @@ static void rcu_barrier_func(void *type)
static void _rcu_barrier(struct rcu_state *rsp)
{
int cpu;
- unsigned long flags;
struct rcu_data *rdp;
- struct rcu_data rd;
unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done);
unsigned long snap_done;
- init_rcu_head_on_stack(&rd.barrier_head);
_rcu_barrier_trace(rsp, "Begin", -1, snap);
/* Take mutex to serialize concurrent rcu_barrier() requests. */
@@ -2413,70 +2392,30 @@ static void _rcu_barrier(struct rcu_state *rsp)
/*
* Initialize the count to one rather than to zero in order to
* avoid a too-soon return to zero in case of a short grace period
- * (or preemption of this task). Also flag this task as doing
- * an rcu_barrier(). This will prevent anyone else from adopting
- * orphaned callbacks, which could cause otherwise failure if a
- * CPU went offline and quickly came back online. To see this,
- * consider the following sequence of events:
- *
- * 1. We cause CPU 0 to post an rcu_barrier_callback() callback.
- * 2. CPU 1 goes offline, orphaning its callbacks.
- * 3. CPU 0 adopts CPU 1's orphaned callbacks.
- * 4. CPU 1 comes back online.
- * 5. We cause CPU 1 to post an rcu_barrier_callback() callback.
- * 6. Both rcu_barrier_callback() callbacks are invoked, awakening
- * us -- but before CPU 1's orphaned callbacks are invoked!!!
+ * (or preemption of this task). Exclude CPU-hotplug operations
+ * to ensure that no offline CPU has callbacks queued.
*/
init_completion(&rsp->barrier_completion);
atomic_set(&rsp->barrier_cpu_count, 1);
- raw_spin_lock_irqsave(&rsp->onofflock, flags);
- rsp->rcu_barrier_in_progress = current;
- raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
+ get_online_cpus();
/*
- * Force every CPU with callbacks to register a new callback
- * that will tell us when all the preceding callbacks have
- * been invoked. If an offline CPU has callbacks, wait for
- * it to either come back online or to finish orphaning those
- * callbacks.
+ * Force each CPU with callbacks to register a new callback.
+ * When that callback is invoked, we will know that all of the
+ * corresponding CPU's preceding callbacks have been invoked.
*/
- for_each_possible_cpu(cpu) {
- preempt_disable();
+ for_each_online_cpu(cpu) {
rdp = per_cpu_ptr(rsp->rda, cpu);
- if (cpu_is_offline(cpu)) {
- _rcu_barrier_trace(rsp, "Offline", cpu,
- rsp->n_barrier_done);
- preempt_enable();
- while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen))
- schedule_timeout_interruptible(1);
- } else if (ACCESS_ONCE(rdp->qlen)) {
+ if (ACCESS_ONCE(rdp->qlen)) {
_rcu_barrier_trace(rsp, "OnlineQ", cpu,
rsp->n_barrier_done);
smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
- preempt_enable();
} else {
_rcu_barrier_trace(rsp, "OnlineNQ", cpu,
rsp->n_barrier_done);
- preempt_enable();
}
}
-
- /*
- * Now that all online CPUs have rcu_barrier_callback() callbacks
- * posted, we can adopt all of the orphaned callbacks and place
- * an rcu_barrier_callback() callback after them. When that is done,
- * we are guaranteed to have an rcu_barrier_callback() callback
- * following every callback that could possibly have been
- * registered before _rcu_barrier() was called.
- */
- raw_spin_lock_irqsave(&rsp->onofflock, flags);
- rcu_adopt_orphan_cbs(rsp);
- rsp->rcu_barrier_in_progress = NULL;
- raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
- atomic_inc(&rsp->barrier_cpu_count);
- smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */
- rd.rsp = rsp;
- rsp->call(&rd.barrier_head, rcu_barrier_callback);
+ put_online_cpus();
/*
* Now that we have an rcu_barrier_callback() callback on each
@@ -2497,8 +2436,6 @@ static void _rcu_barrier(struct rcu_state *rsp)
/* Other rcu_barrier() invocations can now safely proceed. */
mutex_unlock(&rsp->barrier_mutex);
-
- destroy_rcu_head_on_stack(&rd.barrier_head);
}
/**
@@ -2564,6 +2501,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
rdp->qlen_last_fqs_check = 0;
rdp->n_force_qs_snap = rsp->n_force_qs;
rdp->blimit = blimit;
+ init_callback_list(rdp); /* Re-enable callbacks on this CPU. */
rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
atomic_set(&rdp->dynticks->dynticks,
(atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 935dd4ca6816..3cf71ce206e0 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -396,9 +396,6 @@ struct rcu_state {
struct rcu_head **orphan_donetail; /* Tail of above. */
long qlen_lazy; /* Number of lazy callbacks. */
long qlen; /* Total number of callbacks. */
- struct task_struct *rcu_barrier_in_progress;
- /* Task doing rcu_barrier(), */
- /* or NULL if no barrier. */
struct mutex barrier_mutex; /* Guards barrier fields. */
atomic_t barrier_cpu_count; /* # CPUs waiting on. */
struct completion barrier_completion; /* Wake at barrier end. */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 4734afbea73a..46d7d6cf16db 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -421,9 +421,11 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
unsigned long flags;
struct task_struct *t;
- if (!rcu_preempt_blocked_readers_cgp(rnp))
- return;
raw_spin_lock_irqsave(&rnp->lock, flags);
+ if (!rcu_preempt_blocked_readers_cgp(rnp)) {
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ return;
+ }
t = list_entry(rnp->gp_tasks,
struct task_struct, rcu_node_entry);
list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
@@ -583,17 +585,23 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
}
+ rnp->gp_tasks = NULL;
+ rnp->exp_tasks = NULL;
#ifdef CONFIG_RCU_BOOST
- /* In case root is being boosted and leaf is not. */
+ rnp->boost_tasks = NULL;
+ /*
+ * In case root is being boosted and leaf was not. Make sure
+ * that we boost the tasks blocking the current grace period
+ * in this case.
+ */
raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
if (rnp_root->boost_tasks != NULL &&
- rnp_root->boost_tasks != rnp_root->gp_tasks)
+ rnp_root->boost_tasks != rnp_root->gp_tasks &&
+ rnp_root->boost_tasks != rnp_root->exp_tasks)
rnp_root->boost_tasks = rnp_root->gp_tasks;
raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
#endif /* #ifdef CONFIG_RCU_BOOST */
- rnp->gp_tasks = NULL;
- rnp->exp_tasks = NULL;
return retval;
}
@@ -1204,9 +1212,9 @@ static int rcu_boost_kthread(void *arg)
* kthread to start boosting them. If there is an expedited grace
* period in progress, it is always time to boost.
*
- * The caller must hold rnp->lock, which this function releases,
- * but irqs remain disabled. The ->boost_kthread_task is immortal,
- * so we don't need to worry about it going away.
+ * The caller must hold rnp->lock, which this function releases.
+ * The ->boost_kthread_task is immortal, so we don't need to worry
+ * about it going away.
*/
static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
{
@@ -2217,11 +2225,15 @@ static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
{
struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
struct timer_list *tltp = &rdtp->idle_gp_timer;
+ char c;
- sprintf(cp, "drain=%d %c timer=%lu",
- rdtp->dyntick_drain,
- rdtp->dyntick_holdoff == jiffies ? 'H' : '.',
- timer_pending(tltp) ? tltp->expires - jiffies : -1);
+ c = rdtp->dyntick_holdoff == jiffies ? 'H' : '.';
+ if (timer_pending(tltp))
+ sprintf(cp, "drain=%d %c timer=%lu",
+ rdtp->dyntick_drain, c, tltp->expires - jiffies);
+ else
+ sprintf(cp, "drain=%d %c timer not pending",
+ rdtp->dyntick_drain, c);
}
#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */
@@ -2289,11 +2301,10 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp)
/* Increment ->ticks_this_gp for all flavors of RCU. */
static void increment_cpu_stall_ticks(void)
{
- __get_cpu_var(rcu_sched_data).ticks_this_gp++;
- __get_cpu_var(rcu_bh_data).ticks_this_gp++;
-#ifdef CONFIG_TREE_PREEMPT_RCU
- __get_cpu_var(rcu_preempt_data).ticks_this_gp++;
-#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+ struct rcu_state *rsp;
+
+ for_each_rcu_flavor(rsp)
+ __this_cpu_ptr(rsp->rda)->ticks_this_gp++;
}
#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index bd4df13d4afb..7340efdcd324 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -51,8 +51,8 @@ static int show_rcubarrier(struct seq_file *m, void *unused)
struct rcu_state *rsp;
for_each_rcu_flavor(rsp)
- seq_printf(m, "%s: %c bcc: %d nbd: %lu\n",
- rsp->name, rsp->rcu_barrier_in_progress ? 'B' : '.',
+ seq_printf(m, "%s: bcc: %d nbd: %lu\n",
+ rsp->name,
atomic_read(&rsp->barrier_cpu_count),
rsp->n_barrier_done);
return 0;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fbf1fd098dc6..8c38b5e7ce47 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5304,27 +5304,17 @@ void idle_task_exit(void)
}
/*
- * While a dead CPU has no uninterruptible tasks queued at this point,
- * it might still have a nonzero ->nr_uninterruptible counter, because
- * for performance reasons the counter is not stricly tracking tasks to
- * their home CPUs. So we just add the counter to another CPU's counter,
- * to keep the global sum constant after CPU-down:
- */
-static void migrate_nr_uninterruptible(struct rq *rq_src)
-{
- struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask));
-
- rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible;
- rq_src->nr_uninterruptible = 0;
-}
-
-/*
- * remove the tasks which were accounted by rq from calc_load_tasks.
+ * Since this CPU is going 'away' for a while, fold any nr_active delta
+ * we might have. Assumes we're called after migrate_tasks() so that the
+ * nr_active count is stable.
+ *
+ * Also see the comment "Global load-average calculations".
*/
-static void calc_global_load_remove(struct rq *rq)
+static void calc_load_migrate(struct rq *rq)
{
- atomic_long_sub(rq->calc_load_active, &calc_load_tasks);
- rq->calc_load_active = 0;
+ long delta = calc_load_fold_active(rq);
+ if (delta)
+ atomic_long_add(delta, &calc_load_tasks);
}
/*
@@ -5617,9 +5607,18 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
migrate_tasks(cpu);
BUG_ON(rq->nr_running != 1); /* the migration thread */
raw_spin_unlock_irqrestore(&rq->lock, flags);
+ break;
- migrate_nr_uninterruptible(rq);
- calc_global_load_remove(rq);
+ case CPU_DEAD:
+ {
+ struct rq *dest_rq;
+
+ local_irq_save(flags);
+ dest_rq = cpu_rq(smp_processor_id());
+ raw_spin_lock(&dest_rq->lock);
+ calc_load_migrate(rq);
+ raw_spin_unlock_irqrestore(&dest_rq->lock, flags);
+ }
break;
#endif
}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 024540f97f74..4b1785a7bb83 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -436,7 +436,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
static int ratelimit;
- if (ratelimit < 10) {
+ if (ratelimit < 10 &&
+ (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
(unsigned int) local_softirq_pending());
ratelimit++;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2403a63b5da5..dacbbe4d7a80 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -629,6 +629,20 @@ config PROVE_RCU_REPEATEDLY
Say N if you are unsure.
+config PROVE_RCU_DELAY
+ bool "RCU debugging: preemptible RCU race provocation"
+ depends on DEBUG_KERNEL && PREEMPT_RCU
+ default n
+ help
+ There is a class of races that involve an unlikely preemption
+ of __rcu_read_unlock() just after ->rcu_read_lock_nesting has
+ been set to INT_MIN. This feature inserts a delay at that
+ point to increase the probability of these races.
+
+ Say Y to increase probability of preemption of __rcu_read_unlock().
+
+ Say N if you are unsure.
+
config SPARSE_RCU_POINTER
bool "RCU debugging: sparse-based checks for pointer usage"
default n
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 45eb6217bf38..0de83b4541e9 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1483,13 +1483,11 @@ static void *kmemleak_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct kmemleak_object *prev_obj = v;
struct kmemleak_object *next_obj = NULL;
- struct list_head *n = &prev_obj->object_list;
+ struct kmemleak_object *obj = prev_obj;
++(*pos);
- list_for_each_continue_rcu(n, &object_list) {
- struct kmemleak_object *obj =
- list_entry(n, struct kmemleak_object, object_list);
+ list_for_each_entry_continue_rcu(obj, &object_list, object_list) {
if (get_object(obj)) {
next_obj = obj;
break;