diff options
author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2013-11-08 18:03:10 +0100 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2013-12-12 21:34:15 +0100 |
commit | a096932f0c9c9dca9cce72f1c0fb2395df8f2dff (patch) | |
tree | 8ba0b24f12aa60152240f5f7445668f414c48787 /kernel/rcu | |
parent | rcu: Warn on allegedly impossible rcu_read_unlock_special() from irq (diff) | |
download | linux-a096932f0c9c9dca9cce72f1c0fb2395df8f2dff.tar.xz linux-a096932f0c9c9dca9cce72f1c0fb2395df8f2dff.zip |
rcu: Don't activate RCU core on NO_HZ_FULL CPUs
Whenever a CPU receives a scheduling-clock interrupt, RCU checks to see
if the RCU core needs anything from this CPU. If so, RCU raises
RCU_SOFTIRQ to carry out any needed processing.
This approach has worked well historically, but it is undesirable on
NO_HZ_FULL CPUs. Such CPUs are expected to spend almost all of their time
in userspace, so that scheduling-clock interrupts can be disabled while
there is only one runnable task on the CPU in question. Unfortunately,
raising any softirq has the potential to wake up ksoftirqd, which would
provide the second runnable task on that CPU, preventing disabling of
scheduling-clock interrupts.
What is needed instead is for RCU to leave NO_HZ_FULL CPUs alone,
relying on the grace-period kthreads' quiescent-state forcing to
do any needed RCU work on behalf of those CPUs.
This commit therefore refrains from raising RCU_SOFTIRQ on any
NO_HZ_FULL CPUs during any grace periods that have been in effect
for less than one second. The one-second limit handles the case
where an inappropriate workload is running on a NO_HZ_FULL CPU
that features lots of scheduling-clock interrupts, but no idle
or userspace time.
Reported-by: Mike Galbraith <bitbucket@online.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Mike Galbraith <bitbucket@online.de>
Toasted-by: Frederic Weisbecker <fweisbec@gmail.com>
Diffstat (limited to 'kernel/rcu')
-rw-r--r-- | kernel/rcu/tree.c | 4 | ||||
-rw-r--r-- | kernel/rcu/tree.h | 1 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h | 20 |
3 files changed, 25 insertions, 0 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index db1a9fdaeab8..e37bd561c26f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2796,6 +2796,10 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) /* Check for CPU stalls, if enabled. */ check_cpu_stall(rsp, rdp); + /* Is this CPU a NO_HZ_FULL CPU that should ignore RCU? */ + if (rcu_nohz_full_cpu(rsp)) + return 0; + /* Is the RCU core waiting for a quiescent state from this CPU? */ if (rcu_scheduler_fully_active && rdp->qs_pending && !rdp->passed_quiesce) { diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index a87adfc2916b..8c19873f1ac9 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -571,6 +571,7 @@ static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle, unsigned long maxj); static void rcu_bind_gp_kthread(void); static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp); +static bool rcu_nohz_full_cpu(struct rcu_state *rsp); #endif /* #ifndef RCU_TREE_NONCORE */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index fa7a18b62253..e0885cb6c599 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -2872,3 +2872,23 @@ static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp) } #endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ + +/* + * Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the + * grace-period kthread will do force_quiescent_state() processing? + * The idea is to avoid waking up RCU core processing on such a + * CPU unless the grace period has extended for too long. + * + * This code relies on the fact that all NO_HZ_FULL CPUs are also + * CONFIG_RCU_NOCB_CPUs. + */ +static bool rcu_nohz_full_cpu(struct rcu_state *rsp) +{ +#ifdef CONFIG_NO_HZ_FULL + if (tick_nohz_full_cpu(smp_processor_id()) && + (!rcu_gp_in_progress(rsp) || + ULONG_CMP_LT(jiffies, ACCESS_ONCE(rsp->gp_start) + HZ))) + return 1; +#endif /* #ifdef CONFIG_NO_HZ_FULL */ + return 0; +} |