diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/rcupdate.c | 4 | ||||
-rw-r--r-- | kernel/rcutiny.c | 33 | ||||
-rw-r--r-- | kernel/rcutiny_plugin.h | 10 | ||||
-rw-r--r-- | kernel/rcutorture.c | 159 | ||||
-rw-r--r-- | kernel/rcutree.c | 597 | ||||
-rw-r--r-- | kernel/rcutree.h | 28 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 174 | ||||
-rw-r--r-- | kernel/rcutree_trace.c | 15 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 3 |
9 files changed, 614 insertions, 409 deletions
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 4e6a61b15e86..29ca1c6da594 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -45,6 +45,7 @@ #include <linux/mutex.h> #include <linux/export.h> #include <linux/hardirq.h> +#include <linux/delay.h> #define CREATE_TRACE_POINTS #include <trace/events/rcu.h> @@ -81,6 +82,9 @@ void __rcu_read_unlock(void) } else { barrier(); /* critical section before exit code. */ t->rcu_read_lock_nesting = INT_MIN; +#ifdef CONFIG_PROVE_RCU_DELAY + udelay(10); /* Make preemption more probable. */ +#endif /* #ifdef CONFIG_PROVE_RCU_DELAY */ barrier(); /* assign before ->rcu_read_unlock_special load */ if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) rcu_read_unlock_special(t); diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 547b1fe5b052..e4c6a598d6f7 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -56,25 +56,28 @@ static void __call_rcu(struct rcu_head *head, static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; /* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */ -static void rcu_idle_enter_common(long long oldval) +static void rcu_idle_enter_common(long long newval) { - if (rcu_dynticks_nesting) { + if (newval) { RCU_TRACE(trace_rcu_dyntick("--=", - oldval, rcu_dynticks_nesting)); + rcu_dynticks_nesting, newval)); + rcu_dynticks_nesting = newval; return; } - RCU_TRACE(trace_rcu_dyntick("Start", oldval, rcu_dynticks_nesting)); + RCU_TRACE(trace_rcu_dyntick("Start", rcu_dynticks_nesting, newval)); if (!is_idle_task(current)) { struct task_struct *idle = idle_task(smp_processor_id()); RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task", - oldval, rcu_dynticks_nesting)); + rcu_dynticks_nesting, newval)); ftrace_dump(DUMP_ALL); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", current->pid, current->comm, idle->pid, idle->comm); /* must be idle task! */ } rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ + barrier(); + rcu_dynticks_nesting = newval; } /* @@ -84,17 +87,16 @@ static void rcu_idle_enter_common(long long oldval) void rcu_idle_enter(void) { unsigned long flags; - long long oldval; + long long newval; local_irq_save(flags); - oldval = rcu_dynticks_nesting; WARN_ON_ONCE((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0); if ((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) - rcu_dynticks_nesting = 0; + newval = 0; else - rcu_dynticks_nesting -= DYNTICK_TASK_NEST_VALUE; - rcu_idle_enter_common(oldval); + newval = rcu_dynticks_nesting - DYNTICK_TASK_NEST_VALUE; + rcu_idle_enter_common(newval); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(rcu_idle_enter); @@ -105,15 +107,15 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter); void rcu_irq_exit(void) { unsigned long flags; - long long oldval; + long long newval; local_irq_save(flags); - oldval = rcu_dynticks_nesting; - rcu_dynticks_nesting--; - WARN_ON_ONCE(rcu_dynticks_nesting < 0); - rcu_idle_enter_common(oldval); + newval = rcu_dynticks_nesting - 1; + WARN_ON_ONCE(newval < 0); + rcu_idle_enter_common(newval); local_irq_restore(flags); } +EXPORT_SYMBOL_GPL(rcu_irq_exit); /* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcutree.c. */ static void rcu_idle_exit_common(long long oldval) @@ -171,6 +173,7 @@ void rcu_irq_enter(void) rcu_idle_exit_common(oldval); local_irq_restore(flags); } +EXPORT_SYMBOL_GPL(rcu_irq_enter); #ifdef CONFIG_DEBUG_LOCK_ALLOC diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 918fd1e8509c..3d0190282204 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -278,7 +278,7 @@ static int rcu_boost(void) rcu_preempt_ctrlblk.exp_tasks == NULL) return 0; /* Nothing to boost. */ - raw_local_irq_save(flags); + local_irq_save(flags); /* * Recheck with irqs disabled: all tasks in need of boosting @@ -287,7 +287,7 @@ static int rcu_boost(void) */ if (rcu_preempt_ctrlblk.boost_tasks == NULL && rcu_preempt_ctrlblk.exp_tasks == NULL) { - raw_local_irq_restore(flags); + local_irq_restore(flags); return 0; } @@ -317,7 +317,7 @@ static int rcu_boost(void) t = container_of(tb, struct task_struct, rcu_node_entry); rt_mutex_init_proxy_locked(&mtx, t); t->rcu_boost_mutex = &mtx; - raw_local_irq_restore(flags); + local_irq_restore(flags); rt_mutex_lock(&mtx); rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ @@ -991,9 +991,9 @@ static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n) { unsigned long flags; - raw_local_irq_save(flags); + local_irq_save(flags); rcp->qlen -= n; - raw_local_irq_restore(flags); + local_irq_restore(flags); } /* diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 25b15033c61f..aaa7b9f3532a 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -53,10 +53,11 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@fre static int nreaders = -1; /* # reader threads, defaults to 2*ncpus */ static int nfakewriters = 4; /* # fake writer threads */ -static int stat_interval; /* Interval between stats, in seconds. */ - /* Defaults to "only at end of test". */ +static int stat_interval = 60; /* Interval between stats, in seconds. */ + /* Zero means "only at end of test". */ static bool verbose; /* Print more debug info. */ -static bool test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */ +static bool test_no_idle_hz = true; + /* Test RCU support for tickless idle CPUs. */ static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/ static int stutter = 5; /* Start/stop testing interval (in sec) */ static int irqreader = 1; /* RCU readers from irq (timers). */ @@ -119,11 +120,11 @@ MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)"); #define TORTURE_FLAG "-torture:" #define PRINTK_STRING(s) \ - do { printk(KERN_ALERT "%s" TORTURE_FLAG s "\n", torture_type); } while (0) + do { pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0) #define VERBOSE_PRINTK_STRING(s) \ - do { if (verbose) printk(KERN_ALERT "%s" TORTURE_FLAG s "\n", torture_type); } while (0) + do { if (verbose) pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0) #define VERBOSE_PRINTK_ERRSTRING(s) \ - do { if (verbose) printk(KERN_ALERT "%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0) + do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0) static char printk_buf[4096]; @@ -176,8 +177,14 @@ static long n_rcu_torture_boosts; static long n_rcu_torture_timers; static long n_offline_attempts; static long n_offline_successes; +static unsigned long sum_offline; +static int min_offline = -1; +static int max_offline; static long n_online_attempts; static long n_online_successes; +static unsigned long sum_online; +static int min_online = -1; +static int max_online; static long n_barrier_attempts; static long n_barrier_successes; static struct list_head rcu_torture_removed; @@ -235,7 +242,7 @@ rcutorture_shutdown_notify(struct notifier_block *unused1, if (fullstop == FULLSTOP_DONTSTOP) fullstop = FULLSTOP_SHUTDOWN; else - printk(KERN_WARNING /* but going down anyway, so... */ + pr_warn(/* but going down anyway, so... */ "Concurrent 'rmmod rcutorture' and shutdown illegal!\n"); mutex_unlock(&fullstop_mutex); return NOTIFY_DONE; @@ -248,7 +255,7 @@ rcutorture_shutdown_notify(struct notifier_block *unused1, static void rcutorture_shutdown_absorb(char *title) { if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) { - printk(KERN_NOTICE + pr_notice( "rcutorture thread %s parking due to system shutdown\n", title); schedule_timeout_uninterruptible(MAX_SCHEDULE_TIMEOUT); @@ -1214,11 +1221,13 @@ rcu_torture_printk(char *page) n_rcu_torture_boost_failure, n_rcu_torture_boosts, n_rcu_torture_timers); - cnt += sprintf(&page[cnt], "onoff: %ld/%ld:%ld/%ld ", - n_online_successes, - n_online_attempts, - n_offline_successes, - n_offline_attempts); + cnt += sprintf(&page[cnt], + "onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ", + n_online_successes, n_online_attempts, + n_offline_successes, n_offline_attempts, + min_online, max_online, + min_offline, max_offline, + sum_online, sum_offline, HZ); cnt += sprintf(&page[cnt], "barrier: %ld/%ld:%ld", n_barrier_successes, n_barrier_attempts, @@ -1267,7 +1276,7 @@ rcu_torture_stats_print(void) int cnt; cnt = rcu_torture_printk(printk_buf); - printk(KERN_ALERT "%s", printk_buf); + pr_alert("%s", printk_buf); } /* @@ -1380,20 +1389,20 @@ rcu_torture_stutter(void *arg) static inline void rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, char *tag) { - printk(KERN_ALERT "%s" TORTURE_FLAG - "--- %s: nreaders=%d nfakewriters=%d " - "stat_interval=%d verbose=%d test_no_idle_hz=%d " - "shuffle_interval=%d stutter=%d irqreader=%d " - "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d " - "test_boost=%d/%d test_boost_interval=%d " - "test_boost_duration=%d shutdown_secs=%d " - "onoff_interval=%d onoff_holdoff=%d\n", - torture_type, tag, nrealreaders, nfakewriters, - stat_interval, verbose, test_no_idle_hz, shuffle_interval, - stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter, - test_boost, cur_ops->can_boost, - test_boost_interval, test_boost_duration, shutdown_secs, - onoff_interval, onoff_holdoff); + pr_alert("%s" TORTURE_FLAG + "--- %s: nreaders=%d nfakewriters=%d " + "stat_interval=%d verbose=%d test_no_idle_hz=%d " + "shuffle_interval=%d stutter=%d irqreader=%d " + "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d " + "test_boost=%d/%d test_boost_interval=%d " + "test_boost_duration=%d shutdown_secs=%d " + "onoff_interval=%d onoff_holdoff=%d\n", + torture_type, tag, nrealreaders, nfakewriters, + stat_interval, verbose, test_no_idle_hz, shuffle_interval, + stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter, + test_boost, cur_ops->can_boost, + test_boost_interval, test_boost_duration, shutdown_secs, + onoff_interval, onoff_holdoff); } static struct notifier_block rcutorture_shutdown_nb = { @@ -1460,9 +1469,9 @@ rcu_torture_shutdown(void *arg) !kthread_should_stop()) { delta = shutdown_time - jiffies_snap; if (verbose) - printk(KERN_ALERT "%s" TORTURE_FLAG - "rcu_torture_shutdown task: %lu jiffies remaining\n", - torture_type, delta); + pr_alert("%s" TORTURE_FLAG + "rcu_torture_shutdown task: %lu jiffies remaining\n", + torture_type, delta); schedule_timeout_interruptible(delta); jiffies_snap = ACCESS_ONCE(jiffies); } @@ -1490,8 +1499,10 @@ static int __cpuinit rcu_torture_onoff(void *arg) { int cpu; + unsigned long delta; int maxcpu = -1; DEFINE_RCU_RANDOM(rand); + unsigned long starttime; VERBOSE_PRINTK_STRING("rcu_torture_onoff task started"); for_each_online_cpu(cpu) @@ -1506,29 +1517,51 @@ rcu_torture_onoff(void *arg) cpu = (rcu_random(&rand) >> 4) % (maxcpu + 1); if (cpu_online(cpu) && cpu_is_hotpluggable(cpu)) { if (verbose) - printk(KERN_ALERT "%s" TORTURE_FLAG - "rcu_torture_onoff task: offlining %d\n", - torture_type, cpu); + pr_alert("%s" TORTURE_FLAG + "rcu_torture_onoff task: offlining %d\n", + torture_type, cpu); + starttime = jiffies; n_offline_attempts++; if (cpu_down(cpu) == 0) { if (verbose) - printk(KERN_ALERT "%s" TORTURE_FLAG - "rcu_torture_onoff task: offlined %d\n", - torture_type, cpu); + pr_alert("%s" TORTURE_FLAG + "rcu_torture_onoff task: offlined %d\n", + torture_type, cpu); n_offline_successes++; + delta = jiffies - starttime; + sum_offline += delta; + if (min_offline < 0) { + min_offline = delta; + max_offline = delta; + } + if (min_offline > delta) + min_offline = delta; + if (max_offline < delta) + max_offline = delta; } } else if (cpu_is_hotpluggable(cpu)) { if (verbose) - printk(KERN_ALERT "%s" TORTURE_FLAG - "rcu_torture_onoff task: onlining %d\n", - torture_type, cpu); + pr_alert("%s" TORTURE_FLAG + "rcu_torture_onoff task: onlining %d\n", + torture_type, cpu); + starttime = jiffies; n_online_attempts++; if (cpu_up(cpu) == 0) { if (verbose) - printk(KERN_ALERT "%s" TORTURE_FLAG - "rcu_torture_onoff task: onlined %d\n", - torture_type, cpu); + pr_alert("%s" TORTURE_FLAG + "rcu_torture_onoff task: onlined %d\n", + torture_type, cpu); n_online_successes++; + delta = jiffies - starttime; + sum_online += delta; + if (min_online < 0) { + min_online = delta; + max_online = delta; + } + if (min_online > delta) + min_online = delta; + if (max_online < delta) + max_online = delta; } } schedule_timeout_interruptible(onoff_interval * HZ); @@ -1593,14 +1626,14 @@ static int __cpuinit rcu_torture_stall(void *args) if (!kthread_should_stop()) { stop_at = get_seconds() + stall_cpu; /* RCU CPU stall is expected behavior in following code. */ - printk(KERN_ALERT "rcu_torture_stall start.\n"); + pr_alert("rcu_torture_stall start.\n"); rcu_read_lock(); preempt_disable(); while (ULONG_CMP_LT(get_seconds(), stop_at)) continue; /* Induce RCU CPU stall warning. */ preempt_enable(); rcu_read_unlock(); - printk(KERN_ALERT "rcu_torture_stall end.\n"); + pr_alert("rcu_torture_stall end.\n"); } rcutorture_shutdown_absorb("rcu_torture_stall"); while (!kthread_should_stop()) @@ -1716,12 +1749,12 @@ static int rcu_torture_barrier_init(void) if (n_barrier_cbs == 0) return 0; if (cur_ops->call == NULL || cur_ops->cb_barrier == NULL) { - printk(KERN_ALERT "%s" TORTURE_FLAG - " Call or barrier ops missing for %s,\n", - torture_type, cur_ops->name); - printk(KERN_ALERT "%s" TORTURE_FLAG - " RCU barrier testing omitted from run.\n", - torture_type); + pr_alert("%s" TORTURE_FLAG + " Call or barrier ops missing for %s,\n", + torture_type, cur_ops->name); + pr_alert("%s" TORTURE_FLAG + " RCU barrier testing omitted from run.\n", + torture_type); return 0; } atomic_set(&barrier_cbs_count, 0); @@ -1814,7 +1847,7 @@ rcu_torture_cleanup(void) mutex_lock(&fullstop_mutex); rcutorture_record_test_transition(); if (fullstop == FULLSTOP_SHUTDOWN) { - printk(KERN_WARNING /* but going down anyway, so... */ + pr_warn(/* but going down anyway, so... */ "Concurrent 'rmmod rcutorture' and shutdown illegal!\n"); mutex_unlock(&fullstop_mutex); schedule_timeout_uninterruptible(10); @@ -1938,17 +1971,17 @@ rcu_torture_init(void) break; } if (i == ARRAY_SIZE(torture_ops)) { - printk(KERN_ALERT "rcu-torture: invalid torture type: \"%s\"\n", - torture_type); - printk(KERN_ALERT "rcu-torture types:"); + pr_alert("rcu-torture: invalid torture type: \"%s\"\n", + torture_type); + pr_alert("rcu-torture types:"); for (i = 0; i < ARRAY_SIZE(torture_ops); i++) - printk(KERN_ALERT " %s", torture_ops[i]->name); - printk(KERN_ALERT "\n"); + pr_alert(" %s", torture_ops[i]->name); + pr_alert("\n"); mutex_unlock(&fullstop_mutex); return -EINVAL; } if (cur_ops->fqs == NULL && fqs_duration != 0) { - printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n"); + pr_alert("rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n"); fqs_duration = 0; } if (cur_ops->init) @@ -1996,14 +2029,15 @@ rcu_torture_init(void) /* Start up the kthreads. */ VERBOSE_PRINTK_STRING("Creating rcu_torture_writer task"); - writer_task = kthread_run(rcu_torture_writer, NULL, - "rcu_torture_writer"); + writer_task = kthread_create(rcu_torture_writer, NULL, + "rcu_torture_writer"); if (IS_ERR(writer_task)) { firsterr = PTR_ERR(writer_task); VERBOSE_PRINTK_ERRSTRING("Failed to create writer"); writer_task = NULL; goto unwind; } + wake_up_process(writer_task); fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]), GFP_KERNEL); if (fakewriter_tasks == NULL) { @@ -2118,14 +2152,15 @@ rcu_torture_init(void) } if (shutdown_secs > 0) { shutdown_time = jiffies + shutdown_secs * HZ; - shutdown_task = kthread_run(rcu_torture_shutdown, NULL, - "rcu_torture_shutdown"); + shutdown_task = kthread_create(rcu_torture_shutdown, NULL, + "rcu_torture_shutdown"); if (IS_ERR(shutdown_task)) { firsterr = PTR_ERR(shutdown_task); VERBOSE_PRINTK_ERRSTRING("Failed to create shutdown"); shutdown_task = NULL; goto unwind; } + wake_up_process(shutdown_task); } i = rcu_torture_onoff_init(); if (i != 0) { diff --git a/kernel/rcutree.c b/kernel/rcutree.c index be76c80a14d1..80fd02e5d115 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -52,6 +52,7 @@ #include <linux/prefetch.h> #include <linux/delay.h> #include <linux/stop_machine.h> +#include <linux/random.h> #include "rcutree.h" #include <trace/events/rcu.h> @@ -61,6 +62,7 @@ /* Data structures. */ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; +static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; #define RCU_STATE_INITIALIZER(sname, cr) { \ .level = { &sname##_state.node[0] }, \ @@ -72,7 +74,6 @@ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; .orphan_nxttail = &sname##_state.orphan_nxtlist, \ .orphan_donetail = &sname##_state.orphan_donelist, \ .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ - .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.fqslock), \ .name = #sname, \ } @@ -88,7 +89,7 @@ LIST_HEAD(rcu_struct_flavors); /* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF; -module_param(rcu_fanout_leaf, int, 0); +module_param(rcu_fanout_leaf, int, 0444); int rcu_num_lvls __read_mostly = RCU_NUM_LVLS; static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */ NUM_RCU_LVL_0, @@ -175,8 +176,6 @@ void rcu_sched_qs(int cpu) { struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); - rdp->passed_quiesce_gpnum = rdp->gpnum; - barrier(); if (rdp->passed_quiesce == 0) trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs"); rdp->passed_quiesce = 1; @@ -186,8 +185,6 @@ void rcu_bh_qs(int cpu) { struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); - rdp->passed_quiesce_gpnum = rdp->gpnum; - barrier(); if (rdp->passed_quiesce == 0) trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs"); rdp->passed_quiesce = 1; @@ -216,9 +213,9 @@ static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */ static int qhimark = 10000; /* If this many pending, ignore blimit. */ static int qlowmark = 100; /* Once only this many pending, use blimit. */ -module_param(blimit, int, 0); -module_param(qhimark, int, 0); -module_param(qlowmark, int, 0); +module_param(blimit, int, 0444); +module_param(qhimark, int, 0444); +module_param(qlowmark, int, 0444); int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */ int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; @@ -226,7 +223,14 @@ int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; module_param(rcu_cpu_stall_suppress, int, 0644); module_param(rcu_cpu_stall_timeout, int, 0644); -static void force_quiescent_state(struct rcu_state *rsp, int relaxed); +static ulong jiffies_till_first_fqs = RCU_JIFFIES_TILL_FORCE_QS; +static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS; + +module_param(jiffies_till_first_fqs, ulong, 0644); +module_param(jiffies_till_next_fqs, ulong, 0644); + +static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)); +static void force_quiescent_state(struct rcu_state *rsp); static int rcu_pending(int cpu); /* @@ -252,7 +256,7 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); */ void rcu_bh_force_quiescent_state(void) { - force_quiescent_state(&rcu_bh_state, 0); + force_quiescent_state(&rcu_bh_state); } EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); @@ -286,7 +290,7 @@ EXPORT_SYMBOL_GPL(rcutorture_record_progress); */ void rcu_sched_force_quiescent_state(void) { - force_quiescent_state(&rcu_sched_state, 0); + force_quiescent_state(&rcu_sched_state); } EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state); @@ -319,35 +323,6 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp) } /* - * If the specified CPU is offline, tell the caller that it is in - * a quiescent state. Otherwise, whack it with a reschedule IPI. - * Grace periods can end up waiting on an offline CPU when that - * CPU is in the process of coming online -- it will be added to the - * rcu_node bitmasks before it actually makes it online. The same thing - * can happen while a CPU is in the process of coming online. Because this - * race is quite rare, we check for it after detecting that the grace - * period has been delayed rather than checking each and every CPU - * each and every time we start a new grace period. - */ -static int rcu_implicit_offline_qs(struct rcu_data *rdp) -{ - /* - * If the CPU is offline for more than a jiffy, it is in a quiescent - * state. We can trust its state not to change because interrupts - * are disabled. The reason for the jiffy's worth of slack is to - * handle CPUs initializing on the way up and finding their way - * to the idle loop on the way down. - */ - if (cpu_is_offline(rdp->cpu) && - ULONG_CMP_LT(rdp->rsp->gp_start + 2, jiffies)) { - trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); - rdp->offline_fqs++; - return 1; - } - return 0; -} - -/* * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle * * If the new value of the ->dynticks_nesting counter now is zero, @@ -675,7 +650,7 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp) * Return true if the specified CPU has passed through a quiescent * state by virtue of being in or having passed through an dynticks * idle state since the last call to dyntick_save_progress_counter() - * for this same CPU. + * for this same CPU, or by virtue of having been offline. */ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) { @@ -699,8 +674,26 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) return 1; } - /* Go check for the CPU being offline. */ - return rcu_implicit_offline_qs(rdp); + /* + * Check for the CPU being offline, but only if the grace period + * is old enough. We don't need to worry about the CPU changing + * state: If we see it offline even once, it has been through a + * quiescent state. + * + * The reason for insisting that the grace period be at least + * one jiffy old is that CPUs that are not quite online and that + * have just gone offline can still execute RCU read-side critical + * sections. + */ + if (ULONG_CMP_GE(rdp->rsp->gp_start + 2, jiffies)) + return 0; /* Grace period is not old enough. */ + barrier(); + if (cpu_is_offline(rdp->cpu)) { + trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); + rdp->offline_fqs++; + return 1; + } + return 0; } static int jiffies_till_stall_check(void) @@ -757,14 +750,15 @@ static void print_other_cpu_stall(struct rcu_state *rsp) rcu_for_each_leaf_node(rsp, rnp) { raw_spin_lock_irqsave(&rnp->lock, flags); ndetected += rcu_print_task_stall(rnp); + if (rnp->qsmask != 0) { + for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) + if (rnp->qsmask & (1UL << cpu)) { + print_cpu_stall_info(rsp, + rnp->grplo + cpu); + ndetected++; + } + } raw_spin_unlock_irqrestore(&rnp->lock, flags); - if (rnp->qsmask == 0) - continue; - for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) - if (rnp->qsmask & (1UL << cpu)) { - print_cpu_stall_info(rsp, rnp->grplo + cpu); - ndetected++; - } } /* @@ -784,11 +778,11 @@ static void print_other_cpu_stall(struct rcu_state *rsp) else if (!trigger_all_cpu_backtrace()) dump_stack(); - /* If so configured, complain about tasks blocking the grace period. */ + /* Complain about tasks blocking the grace period. */ rcu_print_detail_task_stall(rsp); - force_quiescent_state(rsp, 0); /* Kick them all. */ + force_quiescent_state(rsp); /* Kick them all. */ } static void print_cpu_stall(struct rcu_state *rsp) @@ -829,7 +823,8 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) j = ACCESS_ONCE(jiffies); js = ACCESS_ONCE(rsp->jiffies_stall); rnp = rdp->mynode; - if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) { + if (rcu_gp_in_progress(rsp) && + (ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) { /* We haven't checked in, so go dump stack. */ print_cpu_stall(rsp); @@ -891,12 +886,8 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct */ rdp->gpnum = rnp->gpnum; trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart"); - if (rnp->qsmask & rdp->grpmask) { - rdp->qs_pending = 1; - rdp->passed_quiesce = 0; - } else { - rdp->qs_pending = 0; - } + rdp->passed_quiesce = 0; + rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask); zero_cpu_stall_ticks(rdp); } } @@ -976,10 +967,13 @@ __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat * our behalf. Catch up with this state to avoid noting * spurious new grace periods. If another grace period * has started, then rnp->gpnum will have advanced, so - * we will detect this later on. + * we will detect this later on. Of course, any quiescent + * states we found for the old GP are now invalid. */ - if (ULONG_CMP_LT(rdp->gpnum, rdp->completed)) + if (ULONG_CMP_LT(rdp->gpnum, rdp->completed)) { rdp->gpnum = rdp->completed; + rdp->passed_quiesce = 0; + } /* * If RCU does not need a quiescent state from this CPU, @@ -1023,97 +1017,56 @@ rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat /* Prior grace period ended, so advance callbacks for current CPU. */ __rcu_process_gp_end(rsp, rnp, rdp); - /* - * Because this CPU just now started the new grace period, we know - * that all of its callbacks will be covered by this upcoming grace - * period, even the ones that were registered arbitrarily recently. - * Therefore, advance all outstanding callbacks to RCU_WAIT_TAIL. - * - * Other CPUs cannot be sure exactly when the grace period started. - * Therefore, their recently registered callbacks must pass through - * an additional RCU_NEXT_READY stage, so that they will be handled - * by the next RCU grace period. - */ - rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; - rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; - /* Set state so that this CPU will detect the next quiescent state. */ __note_new_gpnum(rsp, rnp, rdp); } /* - * Start a new RCU grace period if warranted, re-initializing the hierarchy - * in preparation for detecting the next grace period. The caller must hold - * the root node's ->lock, which is released before return. Hard irqs must - * be disabled. - * - * Note that it is legal for a dying CPU (which is marked as offline) to - * invoke this function. This can happen when the dying CPU reports its - * quiescent state. + * Initialize a new grace period. */ -static void -rcu_start_gp(struct rcu_state *rsp, unsigned long flags) - __releases(rcu_get_root(rsp)->lock) +static int rcu_gp_init(struct rcu_state *rsp) { - struct rcu_data *rdp = this_cpu_ptr(rsp->rda); + struct rcu_data *rdp; struct rcu_node *rnp = rcu_get_root(rsp); - if (!rcu_scheduler_fully_active || - !cpu_needs_another_gp(rsp, rdp)) { - /* - * Either the scheduler hasn't yet spawned the first - * non-idle task or this CPU does not need another - * grace period. Either way, don't start a new grace - * period. - */ - raw_spin_unlock_irqrestore(&rnp->lock, flags); - return; - } + raw_spin_lock_irq(&rnp->lock); + rsp->gp_flags = 0; /* Clear all flags: New grace period. */ - if (rsp->fqs_active) { - /* - * This CPU needs a grace period, but force_quiescent_state() - * is running. Tell it to start one on this CPU's behalf. - */ - rsp->fqs_need_gp = 1; - raw_spin_unlock_irqrestore(&rnp->lock, flags); - return; + if (rcu_gp_in_progress(rsp)) { + /* Grace period already in progress, don't start another. */ + raw_spin_unlock_irq(&rnp->lock); + return 0; } /* Advance to a new grace period and initialize state. */ rsp->gpnum++; trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); - WARN_ON_ONCE(rsp->fqs_state == RCU_GP_INIT); - rsp->fqs_state = RCU_GP_INIT; /* Hold off force_quiescent_state. */ - rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; record_gp_stall_check_time(rsp); - raw_spin_unlock(&rnp->lock); /* leave irqs disabled. */ + raw_spin_unlock_irq(&rnp->lock); /* Exclude any concurrent CPU-hotplug operations. */ - raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ + get_online_cpus(); /* * Set the quiescent-state-needed bits in all the rcu_node - * structures for all currently online CPUs in breadth-first - * order, starting from the root rcu_node structure. This - * operation relies on the layout of the hierarchy within the - * rsp->node[] array. Note that other CPUs will access only - * the leaves of the hierarchy, which still indicate that no + * structures for all currently online CPUs in breadth-first order, + * starting from the root rcu_node structure, relying on the layout + * of the tree within the rsp->node[] array. Note that other CPUs + * will access only the leaves of the hierarchy, thus seeing that no * grace period is in progress, at least until the corresponding * leaf node has been initialized. In addition, we have excluded * CPU-hotplug operations. * - * Note that the grace period cannot complete until we finish - * the initialization process, as there will be at least one - * qsmask bit set in the root node until that time, namely the - * one corresponding to this CPU, due to the fact that we have - * irqs disabled. + * The grace period cannot complete until the initialization + * process finishes, because this kthread handles both. */ rcu_for_each_node_breadth_first(rsp, rnp) { - raw_spin_lock(&rnp->lock); /* irqs already disabled. */ + raw_spin_lock_irq(&rnp->lock); + rdp = this_cpu_ptr(rsp->rda); rcu_preempt_check_blocked_tasks(rnp); rnp->qsmask = rnp->qsmaskinit; rnp->gpnum = rsp->gpnum; + WARN_ON_ONCE(rnp->completed != rsp->completed); rnp->completed = rsp->completed; if (rnp == rdp->mynode) rcu_start_gp_per_cpu(rsp, rnp, rdp); @@ -1121,37 +1074,54 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) trace_rcu_grace_period_init(rsp->name, rnp->gpnum, rnp->level, rnp->grplo, rnp->grphi, rnp->qsmask); - raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_unlock_irq(&rnp->lock); +#ifdef CONFIG_PROVE_RCU_DELAY + if ((random32() % (rcu_num_nodes * 8)) == 0) + schedule_timeout_uninterruptible(2); +#endif /* #ifdef CONFIG_PROVE_RCU_DELAY */ + cond_resched(); } - rnp = rcu_get_root(rsp); - raw_spin_lock(&rnp->lock); /* irqs already disabled. */ - rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */ - raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ - raw_spin_unlock_irqrestore(&rsp->onofflock, flags); + put_online_cpus(); + return 1; } /* - * Report a full set of quiescent states to the specified rcu_state - * data structure. This involves cleaning up after the prior grace - * period and letting rcu_start_gp() start up the next grace period - * if one is needed. Note that the caller must hold rnp->lock, as - * required by rcu_start_gp(), which will release it. + * Do one round of quiescent-state forcing. */ -static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) - __releases(rcu_get_root(rsp)->lock) +int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) { - unsigned long gp_duration; + int fqs_state = fqs_state_in; struct rcu_node *rnp = rcu_get_root(rsp); - struct rcu_data *rdp = this_cpu_ptr(rsp->rda); - WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); + rsp->n_force_qs++; + if (fqs_state == RCU_SAVE_DYNTICK) { + /* Collect dyntick-idle snapshots. */ + force_qs_rnp(rsp, dyntick_save_progress_counter); + fqs_state = RCU_FORCE_QS; + } else { + /* Handle dyntick-idle and offline CPUs. */ + force_qs_rnp(rsp, rcu_implicit_dynticks_qs); + } + /* Clear flag to prevent immediate re-entry. */ + if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { + raw_spin_lock_irq(&rnp->lock); + rsp->gp_flags &= ~RCU_GP_FLAG_FQS; + raw_spin_unlock_irq(&rnp->lock); + } + return fqs_state; +} - /* - * Ensure that all grace-period and pre-grace-period activity - * is seen before the assignment to rsp->completed. - */ - smp_mb(); /* See above block comment. */ +/* + * Clean up after the old grace period. + */ +static void rcu_gp_cleanup(struct rcu_state *rsp) +{ + unsigned long gp_duration; + struct rcu_data *rdp; + struct rcu_node *rnp = rcu_get_root(rsp); + + raw_spin_lock_irq(&rnp->lock); gp_duration = jiffies - rsp->gp_start; if (gp_duration > rsp->gp_max) rsp->gp_max = gp_duration; @@ -1163,35 +1133,149 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) * they can do to advance the grace period. It is therefore * safe for us to drop the lock in order to mark the grace * period as completed in all of the rcu_node structures. - * - * But if this CPU needs another grace period, it will take - * care of this while initializing the next grace period. - * We use RCU_WAIT_TAIL instead of the usual RCU_DONE_TAIL - * because the callbacks have not yet been advanced: Those - * callbacks are waiting on the grace period that just now - * completed. */ - if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) { - raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_unlock_irq(&rnp->lock); - /* - * Propagate new ->completed value to rcu_node structures - * so that other CPUs don't have to wait until the start - * of the next grace period to process their callbacks. - */ - rcu_for_each_node_breadth_first(rsp, rnp) { - raw_spin_lock(&rnp->lock); /* irqs already disabled. */ - rnp->completed = rsp->gpnum; - raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ - } - rnp = rcu_get_root(rsp); - raw_spin_lock(&rnp->lock); /* irqs already disabled. */ + /* + * Propagate new ->completed value to rcu_node structures so + * that other CPUs don't have to wait until the start of the next + * grace period to process their callbacks. This also avoids + * some nasty RCU grace-period initialization races by forcing + * the end of the current grace period to be completely recorded in + * all of the rcu_node structures before the beginning of the next + * grace period is recorded in any of the rcu_node structures. + */ + rcu_for_each_node_breadth_first(rsp, rnp) { + raw_spin_lock_irq(&rnp->lock); + rnp->completed = rsp->gpnum; + raw_spin_unlock_irq(&rnp->lock); + cond_resched(); } + rnp = rcu_get_root(rsp); + raw_spin_lock_irq(&rnp->lock); - rsp->completed = rsp->gpnum; /* Declare the grace period complete. */ + rsp->completed = rsp->gpnum; /* Declare grace period done. */ trace_rcu_grace_period(rsp->name, rsp->completed, "end"); rsp->fqs_state = RCU_GP_IDLE; - rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ + rdp = this_cpu_ptr(rsp->rda); + if (cpu_needs_another_gp(rsp, rdp)) + rsp->gp_flags = 1; + raw_spin_unlock_irq(&rnp->lock); +} + +/* + * Body of kthread that handles grace periods. + */ +static int __noreturn rcu_gp_kthread(void *arg) +{ + int fqs_state; + unsigned long j; + int ret; + struct rcu_state *rsp = arg; + struct rcu_node *rnp = rcu_get_root(rsp); + + for (;;) { + + /* Handle grace-period start. */ + for (;;) { + wait_event_interruptible(rsp->gp_wq, + rsp->gp_flags & + RCU_GP_FLAG_INIT); + if ((rsp->gp_flags & RCU_GP_FLAG_INIT) && + rcu_gp_init(rsp)) + break; + cond_resched(); + flush_signals(current); + } + + /* Handle quiescent-state forcing. */ + fqs_state = RCU_SAVE_DYNTICK; + j = jiffies_till_first_fqs; + if (j > HZ) { + j = HZ; + jiffies_till_first_fqs = HZ; + } + for (;;) { + rsp->jiffies_force_qs = jiffies + j; + ret = wait_event_interruptible_timeout(rsp->gp_wq, + (rsp->gp_flags & RCU_GP_FLAG_FQS) || + (!ACCESS_ONCE(rnp->qsmask) && + !rcu_preempt_blocked_readers_cgp(rnp)), + j); + /* If grace period done, leave loop. */ + if (!ACCESS_ONCE(rnp->qsmask) && + !rcu_preempt_blocked_readers_cgp(rnp)) + break; + /* If time for quiescent-state forcing, do it. */ + if (ret == 0 || (rsp->gp_flags & RCU_GP_FLAG_FQS)) { + fqs_state = rcu_gp_fqs(rsp, fqs_state); + cond_resched(); + } else { + /* Deal with stray signal. */ + cond_resched(); + flush_signals(current); + } + j = jiffies_till_next_fqs; + if (j > HZ) { + j = HZ; + jiffies_till_next_fqs = HZ; + } else if (j < 1) { + j = 1; + jiffies_till_next_fqs = 1; + } + } + + /* Handle grace-period end. */ + rcu_gp_cleanup(rsp); + } +} + +/* + * Start a new RCU grace period if warranted, re-initializing the hierarchy + * in preparation for detecting the next grace period. The caller must hold + * the root node's ->lock, which is released before return. Hard irqs must + * be disabled. + * + * Note that it is legal for a dying CPU (which is marked as offline) to + * invoke this function. This can happen when the dying CPU reports its + * quiescent state. + */ +static void +rcu_start_gp(struct rcu_state *rsp, unsigned long flags) + __releases(rcu_get_root(rsp)->lock) +{ + struct rcu_data *rdp = this_cpu_ptr(rsp->rda); + struct rcu_node *rnp = rcu_get_root(rsp); + + if (!rsp->gp_kthread || + !cpu_needs_another_gp(rsp, rdp)) { + /* + * Either we have not yet spawned the grace-period + * task or this CPU does not need another grace period. + * Either way, don't start a new grace period. + */ + raw_spin_unlock_irqrestore(&rnp->lock, flags); + return; + } + + rsp->gp_flags = RCU_GP_FLAG_INIT; + raw_spin_unlock_irqrestore(&rnp->lock, flags); + wake_up(&rsp->gp_wq); +} + +/* + * Report a full set of quiescent states to the specified rcu_state + * data structure. This involves cleaning up after the prior grace + * period and letting rcu_start_gp() start up the next grace period + * if one is needed. Note that the caller must hold rnp->lock, as + * required by rcu_start_gp(), which will release it. + */ +static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) + __releases(rcu_get_root(rsp)->lock) +{ + WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); + raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags); + wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */ } /* @@ -1260,7 +1344,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, * based on quiescent states detected in an earlier grace period! */ static void -rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastgp) +rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) { unsigned long flags; unsigned long mask; @@ -1268,7 +1352,8 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long las rnp = rdp->mynode; raw_spin_lock_irqsave(&rnp->lock, flags); - if (lastgp != rnp->gpnum || rnp->completed == rnp->gpnum) { + if (rdp->passed_quiesce == 0 || rdp->gpnum != rnp->gpnum || + rnp->completed == rnp->gpnum) { /* * The grace period in which this quiescent state was @@ -1327,7 +1412,7 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) * Tell RCU we are done (but rcu_report_qs_rdp() will be the * judge of that). */ - rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesce_gpnum); + rcu_report_qs_rdp(rdp->cpu, rsp, rdp); } #ifdef CONFIG_HOTPLUG_CPU @@ -1676,6 +1761,7 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) struct rcu_node *rnp; rcu_for_each_leaf_node(rsp, rnp) { + cond_resched(); mask = 0; raw_spin_lock_irqsave(&rnp->lock, flags); if (!rcu_gp_in_progress(rsp)) { @@ -1712,72 +1798,39 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) * Force quiescent states on reluctant CPUs, and also detect which * CPUs are in dyntick-idle mode. */ -static void force_quiescent_state(struct rcu_state *rsp, int relaxed) +static void force_quiescent_state(struct rcu_state *rsp) { unsigned long flags; - struct rcu_node *rnp = rcu_get_root(rsp); - - trace_rcu_utilization("Start fqs"); - if (!rcu_gp_in_progress(rsp)) { - trace_rcu_utilization("End fqs"); - return; /* No grace period in progress, nothing to force. */ - } - if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) { - rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */ - trace_rcu_utilization("End fqs"); - return; /* Someone else is already on the job. */ - } - if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies)) - goto unlock_fqs_ret; /* no emergency and done recently. */ - rsp->n_force_qs++; - raw_spin_lock(&rnp->lock); /* irqs already disabled */ - rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; - if(!rcu_gp_in_progress(rsp)) { - rsp->n_force_qs_ngp++; - raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ - goto unlock_fqs_ret; /* no GP in progress, time updated. */ - } - rsp->fqs_active = 1; - switch (rsp->fqs_state) { - case RCU_GP_IDLE: - case RCU_GP_INIT: - - break; /* grace period idle or initializing, ignore. */ - - case RCU_SAVE_DYNTICK: - - raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ - - /* Record dyntick-idle state. */ - force_qs_rnp(rsp, dyntick_save_progress_counter); - raw_spin_lock(&rnp->lock); /* irqs already disabled */ - if (rcu_gp_in_progress(rsp)) - rsp->fqs_state = RCU_FORCE_QS; - break; - - case RCU_FORCE_QS: - - /* Check dyntick-idle state, send IPI to laggarts. */ - raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ - force_qs_rnp(rsp, rcu_implicit_dynticks_qs); - - /* Leave state in case more forcing is required. */ - - raw_spin_lock(&rnp->lock); /* irqs already disabled */ - break; + bool ret; + struct rcu_node *rnp; + struct rcu_node *rnp_old = NULL; + + /* Funnel through hierarchy to reduce memory contention. */ + rnp = per_cpu_ptr(rsp->rda, raw_smp_processor_id())->mynode; + for (; rnp != NULL; rnp = rnp->parent) { + ret = (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) || + !raw_spin_trylock(&rnp->fqslock); + if (rnp_old != NULL) + raw_spin_unlock(&rnp_old->fqslock); + if (ret) { + rsp->n_force_qs_lh++; + return; + } + rnp_old = rnp; } - rsp->fqs_active = 0; - if (rsp->fqs_need_gp) { - raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */ - rsp->fqs_need_gp = 0; - rcu_start_gp(rsp, flags); /* releases rnp->lock */ - trace_rcu_utilization("End fqs"); - return; + /* rnp_old == rcu_get_root(rsp), rnp == NULL. */ + + /* Reached the root of the rcu_node tree, acquire lock. */ + raw_spin_lock_irqsave(&rnp_old->lock, flags); + raw_spin_unlock(&rnp_old->fqslock); + if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { + rsp->n_force_qs_lh++; + raw_spin_unlock_irqrestore(&rnp_old->lock, flags); + return; /* Someone beat us to it. */ } - raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ -unlock_fqs_ret: - raw_spin_unlock_irqrestore(&rsp->fqslock, flags); - trace_rcu_utilization("End fqs"); + rsp->gp_flags |= RCU_GP_FLAG_FQS; + raw_spin_unlock_irqrestore(&rnp_old->lock, flags); + wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */ } /* @@ -1794,13 +1847,6 @@ __rcu_process_callbacks(struct rcu_state *rsp) WARN_ON_ONCE(rdp->beenonline == 0); /* - * If an RCU GP has gone long enough, go check for dyntick - * idle CPUs and, if needed, send resched IPIs. - */ - if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) - force_quiescent_state(rsp, 1); - - /* * Advance callbacks in response to end of earlier grace * period that some other CPU ended. */ @@ -1827,6 +1873,8 @@ static void rcu_process_callbacks(struct softirq_action *unused) { struct rcu_state *rsp; + if (cpu_is_offline(smp_processor_id())) + return; trace_rcu_utilization("Start RCU core"); for_each_rcu_flavor(rsp) __rcu_process_callbacks(rsp); @@ -1898,12 +1946,11 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, rdp->blimit = LONG_MAX; if (rsp->n_force_qs == rdp->n_force_qs_snap && *rdp->nxttail[RCU_DONE_TAIL] != head) - force_quiescent_state(rsp, 0); + force_quiescent_state(rsp); rdp->n_force_qs_snap = rsp->n_force_qs; rdp->qlen_last_fqs_check = rdp->qlen; } - } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) - force_quiescent_state(rsp, 1); + } } static void @@ -1918,8 +1965,6 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), head->func = func; head->next = NULL; - smp_mb(); /* Ensure RCU update seen before callback registry. */ - /* * Opportunistically note grace-period endings and beginnings. * Note that we might see a beginning right after we see an @@ -2190,17 +2235,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) /* Is the RCU core waiting for a quiescent state from this CPU? */ if (rcu_scheduler_fully_active && rdp->qs_pending && !rdp->passed_quiesce) { - - /* - * If force_quiescent_state() coming soon and this CPU - * needs a quiescent state, and this is either RCU-sched - * or RCU-bh, force a local reschedule. - */ rdp->n_rp_qs_pending++; - if (!rdp->preemptible && - ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1, - jiffies)) - set_need_resched(); } else if (rdp->qs_pending && rdp->passed_quiesce) { rdp->n_rp_report_qs++; return 1; @@ -2230,13 +2265,6 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) return 1; } - /* Has an RCU GP gone long enough to send resched IPIs &c? */ - if (rcu_gp_in_progress(rsp) && - ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) { - rdp->n_rp_need_fqs++; - return 1; - } - /* nothing to do */ rdp->n_rp_need_nothing++; return 0; @@ -2506,7 +2534,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) rdp->completed = rnp->completed; rdp->passed_quiesce = 0; rdp->qs_pending = 0; - rdp->passed_quiesce_gpnum = rnp->gpnum - 1; trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl"); } raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ @@ -2578,6 +2605,28 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, } /* + * Spawn the kthread that handles this RCU flavor's grace periods. + */ +static int __init rcu_spawn_gp_kthread(void) +{ + unsigned long flags; + struct rcu_node *rnp; + struct rcu_state *rsp; + struct task_struct *t; + + for_each_rcu_flavor(rsp) { + t = kthread_run(rcu_gp_kthread, rsp, rsp->name); + BUG_ON(IS_ERR(t)); + rnp = rcu_get_root(rsp); + raw_spin_lock_irqsave(&rnp->lock, flags); + rsp->gp_kthread = t; + raw_spin_unlock_irqrestore(&rnp->lock, flags); + } + return 0; +} +early_initcall(rcu_spawn_gp_kthread); + +/* * This function is invoked towards the end of the scheduler's initialization * process. Before this is called, the idle task might contain * RCU read-side critical sections (during which time, this idle @@ -2612,7 +2661,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) int cprv; int i; - cprv = NR_CPUS; + cprv = nr_cpu_ids; for (i = rcu_num_lvls - 1; i >= 0; i--) { ccur = rsp->levelcnt[i]; rsp->levelspread[i] = (cprv + ccur - 1) / ccur; @@ -2627,10 +2676,14 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) static void __init rcu_init_one(struct rcu_state *rsp, struct rcu_data __percpu *rda) { - static char *buf[] = { "rcu_node_level_0", - "rcu_node_level_1", - "rcu_node_level_2", - "rcu_node_level_3" }; /* Match MAX_RCU_LVLS */ + static char *buf[] = { "rcu_node_0", + "rcu_node_1", + "rcu_node_2", + "rcu_node_3" }; /* Match MAX_RCU_LVLS */ + static char *fqs[] = { "rcu_node_fqs_0", + "rcu_node_fqs_1", + "rcu_node_fqs_2", + "rcu_node_fqs_3" }; /* Match MAX_RCU_LVLS */ int cpustride = 1; int i; int j; @@ -2655,7 +2708,11 @@ static void __init rcu_init_one(struct rcu_state *rsp, raw_spin_lock_init(&rnp->lock); lockdep_set_class_and_name(&rnp->lock, &rcu_node_class[i], buf[i]); - rnp->gpnum = 0; + raw_spin_lock_init(&rnp->fqslock); + lockdep_set_class_and_name(&rnp->fqslock, + &rcu_fqs_class[i], fqs[i]); + rnp->gpnum = rsp->gpnum; + rnp->completed = rsp->completed; rnp->qsmask = 0; rnp->qsmaskinit = 0; rnp->grplo = j * cpustride; @@ -2678,6 +2735,7 @@ static void __init rcu_init_one(struct rcu_state *rsp, } rsp->rda = rda; + init_waitqueue_head(&rsp->gp_wq); rnp = rsp->level[rcu_num_lvls - 1]; for_each_possible_cpu(i) { while (i > rnp->grphi) @@ -2701,7 +2759,8 @@ static void __init rcu_init_geometry(void) int rcu_capacity[MAX_RCU_LVLS + 1]; /* If the compile-time values are accurate, just leave. */ - if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF) + if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF && + nr_cpu_ids == NR_CPUS) return; /* diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 94dfdf1f31f5..3cf71ce206e0 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -202,6 +202,7 @@ struct rcu_node { /* per-CPU kthreads as needed. */ unsigned int node_kthread_status; /* State of node_kthread_task for tracing. */ + raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp; } ____cacheline_internodealigned_in_smp; /* @@ -245,8 +246,6 @@ struct rcu_data { /* in order to detect GP end. */ unsigned long gpnum; /* Highest gp number that this CPU */ /* is aware of having started. */ - unsigned long passed_quiesce_gpnum; - /* gpnum at time of quiescent state. */ bool passed_quiesce; /* User-mode/idle loop etc. */ bool qs_pending; /* Core waits for quiesc state. */ bool beenonline; /* CPU online at least once. */ @@ -312,11 +311,13 @@ struct rcu_data { unsigned long n_rp_cpu_needs_gp; unsigned long n_rp_gp_completed; unsigned long n_rp_gp_started; - unsigned long n_rp_need_fqs; unsigned long n_rp_need_nothing; - /* 6) _rcu_barrier() callback. */ + /* 6) _rcu_barrier() and OOM callbacks. */ struct rcu_head barrier_head; +#ifdef CONFIG_RCU_FAST_NO_HZ + struct rcu_head oom_head; +#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ int cpu; struct rcu_state *rsp; @@ -375,20 +376,17 @@ struct rcu_state { u8 fqs_state ____cacheline_internodealigned_in_smp; /* Force QS state. */ - u8 fqs_active; /* force_quiescent_state() */ - /* is running. */ - u8 fqs_need_gp; /* A CPU was prevented from */ - /* starting a new grace */ - /* period because */ - /* force_quiescent_state() */ - /* was running. */ u8 boost; /* Subject to priority boost. */ unsigned long gpnum; /* Current gp number. */ unsigned long completed; /* # of last completed gp. */ + struct task_struct *gp_kthread; /* Task for grace periods. */ + wait_queue_head_t gp_wq; /* Where GP task waits. */ + int gp_flags; /* Commands for GP task. */ /* End of fields guarded by root rcu_node's lock. */ - raw_spinlock_t onofflock; /* exclude on/offline and */ + raw_spinlock_t onofflock ____cacheline_internodealigned_in_smp; + /* exclude on/offline and */ /* starting new GP. */ struct rcu_head *orphan_nxtlist; /* Orphaned callbacks that */ /* need a grace period. */ @@ -403,8 +401,6 @@ struct rcu_state { struct completion barrier_completion; /* Wake at barrier end. */ unsigned long n_barrier_done; /* ++ at start and end of */ /* _rcu_barrier(). */ - raw_spinlock_t fqslock; /* Only one task forcing */ - /* quiescent states. */ unsigned long jiffies_force_qs; /* Time at which to invoke */ /* force_quiescent_state(). */ unsigned long n_force_qs; /* Number of calls to */ @@ -423,6 +419,10 @@ struct rcu_state { struct list_head flavors; /* List of RCU flavors. */ }; +/* Values for rcu_state structure's gp_flags field. */ +#define RCU_GP_FLAG_INIT 0x1 /* Need grace-period initialization. */ +#define RCU_GP_FLAG_FQS 0x2 /* Need grace-period quiescent-state forcing. */ + extern struct list_head rcu_struct_flavors; #define for_each_rcu_flavor(rsp) \ list_for_each_entry((rsp), &rcu_struct_flavors, flavors) diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 7f3244c0df01..46d7d6cf16db 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -25,6 +25,7 @@ */ #include <linux/delay.h> +#include <linux/oom.h> #define RCU_KTHREAD_PRIO 1 @@ -118,7 +119,7 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed); */ void rcu_force_quiescent_state(void) { - force_quiescent_state(&rcu_preempt_state, 0); + force_quiescent_state(&rcu_preempt_state); } EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); @@ -136,8 +137,6 @@ static void rcu_preempt_qs(int cpu) { struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); - rdp->passed_quiesce_gpnum = rdp->gpnum; - barrier(); if (rdp->passed_quiesce == 0) trace_rcu_grace_period("rcu_preempt", rdp->gpnum, "cpuqs"); rdp->passed_quiesce = 1; @@ -422,9 +421,11 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) unsigned long flags; struct task_struct *t; - if (!rcu_preempt_blocked_readers_cgp(rnp)) - return; raw_spin_lock_irqsave(&rnp->lock, flags); + if (!rcu_preempt_blocked_readers_cgp(rnp)) { + raw_spin_unlock_irqrestore(&rnp->lock, flags); + return; + } t = list_entry(rnp->gp_tasks, struct task_struct, rcu_node_entry); list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) @@ -584,17 +585,23 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ } + rnp->gp_tasks = NULL; + rnp->exp_tasks = NULL; #ifdef CONFIG_RCU_BOOST - /* In case root is being boosted and leaf is not. */ + rnp->boost_tasks = NULL; + /* + * In case root is being boosted and leaf was not. Make sure + * that we boost the tasks blocking the current grace period + * in this case. + */ raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ if (rnp_root->boost_tasks != NULL && - rnp_root->boost_tasks != rnp_root->gp_tasks) + rnp_root->boost_tasks != rnp_root->gp_tasks && + rnp_root->boost_tasks != rnp_root->exp_tasks) rnp_root->boost_tasks = rnp_root->gp_tasks; raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ #endif /* #ifdef CONFIG_RCU_BOOST */ - rnp->gp_tasks = NULL; - rnp->exp_tasks = NULL; return retval; } @@ -676,7 +683,7 @@ void synchronize_rcu(void) EXPORT_SYMBOL_GPL(synchronize_rcu); static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); -static long sync_rcu_preempt_exp_count; +static unsigned long sync_rcu_preempt_exp_count; static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); /* @@ -791,7 +798,7 @@ void synchronize_rcu_expedited(void) unsigned long flags; struct rcu_node *rnp; struct rcu_state *rsp = &rcu_preempt_state; - long snap; + unsigned long snap; int trycount = 0; smp_mb(); /* Caller's modifications seen first by other CPUs. */ @@ -799,33 +806,47 @@ void synchronize_rcu_expedited(void) smp_mb(); /* Above access cannot bleed into critical section. */ /* + * Block CPU-hotplug operations. This means that any CPU-hotplug + * operation that finds an rcu_node structure with tasks in the + * process of being boosted will know that all tasks blocking + * this expedited grace period will already be in the process of + * being boosted. This simplifies the process of moving tasks + * from leaf to root rcu_node structures. + */ + get_online_cpus(); + + /* * Acquire lock, falling back to synchronize_rcu() if too many * lock-acquisition failures. Of course, if someone does the * expedited grace period for us, just leave. */ while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { + if (ULONG_CMP_LT(snap, + ACCESS_ONCE(sync_rcu_preempt_exp_count))) { + put_online_cpus(); + goto mb_ret; /* Others did our work for us. */ + } if (trycount++ < 10) { udelay(trycount * num_online_cpus()); } else { + put_online_cpus(); synchronize_rcu(); return; } - if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) - goto mb_ret; /* Others did our work for us. */ } - if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) + if (ULONG_CMP_LT(snap, ACCESS_ONCE(sync_rcu_preempt_exp_count))) { + put_online_cpus(); goto unlock_mb_ret; /* Others did our work for us. */ + } /* force all RCU readers onto ->blkd_tasks lists. */ synchronize_sched_expedited(); - raw_spin_lock_irqsave(&rsp->onofflock, flags); - /* Initialize ->expmask for all non-leaf rcu_node structures. */ rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { - raw_spin_lock(&rnp->lock); /* irqs already disabled. */ + raw_spin_lock_irqsave(&rnp->lock, flags); rnp->expmask = rnp->qsmaskinit; - raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_unlock_irqrestore(&rnp->lock, flags); } /* Snapshot current state of ->blkd_tasks lists. */ @@ -834,7 +855,7 @@ void synchronize_rcu_expedited(void) if (NUM_RCU_NODES > 1) sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp)); - raw_spin_unlock_irqrestore(&rsp->onofflock, flags); + put_online_cpus(); /* Wait for snapshotted ->blkd_tasks lists to drain. */ rnp = rcu_get_root(rsp); @@ -1191,9 +1212,9 @@ static int rcu_boost_kthread(void *arg) * kthread to start boosting them. If there is an expedited grace * period in progress, it is always time to boost. * - * The caller must hold rnp->lock, which this function releases, - * but irqs remain disabled. The ->boost_kthread_task is immortal, - * so we don't need to worry about it going away. + * The caller must hold rnp->lock, which this function releases. + * The ->boost_kthread_task is immortal, so we don't need to worry + * about it going away. */ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) { @@ -2075,16 +2096,16 @@ static void rcu_prepare_for_idle(int cpu) #ifdef CONFIG_TREE_PREEMPT_RCU if (per_cpu(rcu_preempt_data, cpu).nxtlist) { rcu_preempt_qs(cpu); - force_quiescent_state(&rcu_preempt_state, 0); + force_quiescent_state(&rcu_preempt_state); } #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ if (per_cpu(rcu_sched_data, cpu).nxtlist) { rcu_sched_qs(cpu); - force_quiescent_state(&rcu_sched_state, 0); + force_quiescent_state(&rcu_sched_state); } if (per_cpu(rcu_bh_data, cpu).nxtlist) { rcu_bh_qs(cpu); - force_quiescent_state(&rcu_bh_state, 0); + force_quiescent_state(&rcu_bh_state); } /* @@ -2112,6 +2133,88 @@ static void rcu_idle_count_callbacks_posted(void) __this_cpu_add(rcu_dynticks.nonlazy_posted, 1); } +/* + * Data for flushing lazy RCU callbacks at OOM time. + */ +static atomic_t oom_callback_count; +static DECLARE_WAIT_QUEUE_HEAD(oom_callback_wq); + +/* + * RCU OOM callback -- decrement the outstanding count and deliver the + * wake-up if we are the last one. + */ +static void rcu_oom_callback(struct rcu_head *rhp) +{ + if (atomic_dec_and_test(&oom_callback_count)) + wake_up(&oom_callback_wq); +} + +/* + * Post an rcu_oom_notify callback on the current CPU if it has at + * least one lazy callback. This will unnecessarily post callbacks + * to CPUs that already have a non-lazy callback at the end of their + * callback list, but this is an infrequent operation, so accept some + * extra overhead to keep things simple. + */ +static void rcu_oom_notify_cpu(void *unused) +{ + struct rcu_state *rsp; + struct rcu_data *rdp; + + for_each_rcu_flavor(rsp) { + rdp = __this_cpu_ptr(rsp->rda); + if (rdp->qlen_lazy != 0) { + atomic_inc(&oom_callback_count); + rsp->call(&rdp->oom_head, rcu_oom_callback); + } + } +} + +/* + * If low on memory, ensure that each CPU has a non-lazy callback. + * This will wake up CPUs that have only lazy callbacks, in turn + * ensuring that they free up the corresponding memory in a timely manner. + * Because an uncertain amount of memory will be freed in some uncertain + * timeframe, we do not claim to have freed anything. + */ +static int rcu_oom_notify(struct notifier_block *self, + unsigned long notused, void *nfreed) +{ + int cpu; + + /* Wait for callbacks from earlier instance to complete. */ + wait_event(oom_callback_wq, atomic_read(&oom_callback_count) == 0); + + /* + * Prevent premature wakeup: ensure that all increments happen + * before there is a chance of the counter reaching zero. + */ + atomic_set(&oom_callback_count, 1); + + get_online_cpus(); + for_each_online_cpu(cpu) { + smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1); + cond_resched(); + } + put_online_cpus(); + + /* Unconditionally decrement: no need to wake ourselves up. */ + atomic_dec(&oom_callback_count); + + return NOTIFY_OK; +} + +static struct notifier_block rcu_oom_nb = { + .notifier_call = rcu_oom_notify +}; + +static int __init rcu_register_oom_notifier(void) +{ + register_oom_notifier(&rcu_oom_nb); + return 0; +} +early_initcall(rcu_register_oom_notifier); + #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ #ifdef CONFIG_RCU_CPU_STALL_INFO @@ -2122,11 +2225,15 @@ static void print_cpu_stall_fast_no_hz(char *cp, int cpu) { struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); struct timer_list *tltp = &rdtp->idle_gp_timer; + char c; - sprintf(cp, "drain=%d %c timer=%lu", - rdtp->dyntick_drain, - rdtp->dyntick_holdoff == jiffies ? 'H' : '.', - timer_pending(tltp) ? tltp->expires - jiffies : -1); + c = rdtp->dyntick_holdoff == jiffies ? 'H' : '.'; + if (timer_pending(tltp)) + sprintf(cp, "drain=%d %c timer=%lu", + rdtp->dyntick_drain, c, tltp->expires - jiffies); + else + sprintf(cp, "drain=%d %c timer not pending", + rdtp->dyntick_drain, c); } #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ @@ -2194,11 +2301,10 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp) /* Increment ->ticks_this_gp for all flavors of RCU. */ static void increment_cpu_stall_ticks(void) { - __get_cpu_var(rcu_sched_data).ticks_this_gp++; - __get_cpu_var(rcu_bh_data).ticks_this_gp++; -#ifdef CONFIG_TREE_PREEMPT_RCU - __get_cpu_var(rcu_preempt_data).ticks_this_gp++; -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) + __this_cpu_ptr(rsp->rda)->ticks_this_gp++; } #else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */ diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 6a2e52a85d77..7340efdcd324 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -86,12 +86,11 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) { if (!rdp->beenonline) return; - seq_printf(m, "%3d%cc=%lu g=%lu pq=%d pgp=%lu qp=%d", + seq_printf(m, "%3d%cc=%lu g=%lu pq=%d qp=%d", rdp->cpu, cpu_is_offline(rdp->cpu) ? '!' : ' ', rdp->completed, rdp->gpnum, - rdp->passed_quiesce, rdp->passed_quiesce_gpnum, - rdp->qs_pending); + rdp->passed_quiesce, rdp->qs_pending); seq_printf(m, " dt=%d/%llx/%d df=%lu", atomic_read(&rdp->dynticks->dynticks), rdp->dynticks->dynticks_nesting, @@ -150,12 +149,11 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) { if (!rdp->beenonline) return; - seq_printf(m, "%d,%s,%lu,%lu,%d,%lu,%d", + seq_printf(m, "%d,%s,%lu,%lu,%d,%d", rdp->cpu, cpu_is_offline(rdp->cpu) ? "\"N\"" : "\"Y\"", rdp->completed, rdp->gpnum, - rdp->passed_quiesce, rdp->passed_quiesce_gpnum, - rdp->qs_pending); + rdp->passed_quiesce, rdp->qs_pending); seq_printf(m, ",%d,%llx,%d,%lu", atomic_read(&rdp->dynticks->dynticks), rdp->dynticks->dynticks_nesting, @@ -186,7 +184,7 @@ static int show_rcudata_csv(struct seq_file *m, void *unused) int cpu; struct rcu_state *rsp; - seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\","); + seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pq\","); seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); seq_puts(m, "\"of\",\"qll\",\"ql\",\"qs\""); #ifdef CONFIG_RCU_BOOST @@ -386,10 +384,9 @@ static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp) rdp->n_rp_report_qs, rdp->n_rp_cb_ready, rdp->n_rp_cpu_needs_gp); - seq_printf(m, "gpc=%ld gps=%ld nf=%ld nn=%ld\n", + seq_printf(m, "gpc=%ld gps=%ld nn=%ld\n", rdp->n_rp_gp_completed, rdp->n_rp_gp_started, - rdp->n_rp_need_fqs, rdp->n_rp_need_nothing); } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 024540f97f74..4b1785a7bb83 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -436,7 +436,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) if (unlikely(local_softirq_pending() && cpu_online(cpu))) { static int ratelimit; - if (ratelimit < 10) { + if (ratelimit < 10 && + (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", (unsigned int) local_softirq_pending()); ratelimit++; |