From 2431774f04d1050292054c763070021bade7b151 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 20 Jul 2021 06:16:27 -0700 Subject: rcu: Mark accesses to rcu_state.n_force_qs This commit marks accesses to the rcu_state.n_force_qs. These data races are hard to make happen, but syzkaller was equal to the task. Reported-by: syzbot+e08a83a1940ec3846cd5@syzkaller.appspotmail.com Acked-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index bce848e50512..c89f5e6c4154 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1907,7 +1907,7 @@ static void rcu_gp_fqs(bool first_time) struct rcu_node *rnp = rcu_get_root(); WRITE_ONCE(rcu_state.gp_activity, jiffies); - rcu_state.n_force_qs++; + WRITE_ONCE(rcu_state.n_force_qs, rcu_state.n_force_qs + 1); if (first_time) { /* Collect dyntick-idle snapshots. */ force_qs_rnp(dyntick_save_progress_counter); @@ -2550,7 +2550,7 @@ static void rcu_do_batch(struct rcu_data *rdp) /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */ if (count == 0 && rdp->qlen_last_fqs_check != 0) { rdp->qlen_last_fqs_check = 0; - rdp->n_force_qs_snap = rcu_state.n_force_qs; + rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs); } else if (count < rdp->qlen_last_fqs_check - qhimark) rdp->qlen_last_fqs_check = count; @@ -2898,10 +2898,10 @@ static void __call_rcu_core(struct rcu_data *rdp, struct rcu_head *head, } else { /* Give the grace period a kick. */ rdp->blimit = DEFAULT_MAX_RCU_BLIMIT; - if (rcu_state.n_force_qs == rdp->n_force_qs_snap && + if (READ_ONCE(rcu_state.n_force_qs) == rdp->n_force_qs_snap && rcu_segcblist_first_pend_cb(&rdp->cblist) != head) rcu_force_quiescent_state(); - rdp->n_force_qs_snap = rcu_state.n_force_qs; + rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs); rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist); } } @@ -4128,7 +4128,7 @@ int rcutree_prepare_cpu(unsigned int cpu) /* Set up local state, ensuring consistent view of global state. */ raw_spin_lock_irqsave_rcu_node(rnp, flags); rdp->qlen_last_fqs_check = 0; - rdp->n_force_qs_snap = rcu_state.n_force_qs; + rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs); rdp->blimit = blimit; rdp->dynticks_nesting = 1; /* CPU not up, no tearing. */ rcu_dynticks_eqs_online(); -- cgit v1.2.3 From 52b030aa278642194f5d25872c33360013b0167e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 21 Jul 2021 13:24:35 -0700 Subject: rcu-nocb: Fix a couple of tree_nocb code-style nits This commit removes a non-value-returning "return" statement at the end of __call_rcu_nocb_wake() and adds a blank line following declarations in nocb_cb_can_run(). Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_nocb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index 8fdf44f8523f..368ef7b9af4f 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -549,7 +549,6 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, rcu_nocb_unlock_irqrestore(rdp, flags); trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot")); } - return; } /* @@ -767,6 +766,7 @@ static int rcu_nocb_gp_kthread(void *arg) static inline bool nocb_cb_can_run(struct rcu_data *rdp) { u8 flags = SEGCBLIST_OFFLOADED | SEGCBLIST_KTHREAD_CB; + return rcu_segcblist_test_flags(&rdp->cblist, flags); } -- cgit v1.2.3 From 88ee23ef1c129e40309f4612f80dd74be4590c03 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 22 Jul 2021 15:49:05 -0700 Subject: rcu: Eliminate rcu_implicit_dynticks_qs() local variable rnhqp The rcu_implicit_dynticks_qs() function's local variable rnhqp references the ->rcu_need_heavy_qs field in the rcu_data structure referenced by the function parameter rdp, with a rather odd method for computing the pointer to this field. This commit therefore simplifies things and saves a few lines of code by replacing each instance of rnhqp with &rdp->need_heavy_qs. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index c89f5e6c4154..18d2f35d1450 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1219,7 +1219,6 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp) static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) { unsigned long jtsq; - bool *rnhqp; bool *ruqp; struct rcu_node *rnp = rdp->mynode; @@ -1286,12 +1285,11 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) */ jtsq = READ_ONCE(jiffies_to_sched_qs); ruqp = per_cpu_ptr(&rcu_data.rcu_urgent_qs, rdp->cpu); - rnhqp = per_cpu_ptr(&rcu_data.rcu_need_heavy_qs, rdp->cpu); - if (!READ_ONCE(*rnhqp) && + if (!READ_ONCE(rdp->rcu_need_heavy_qs) && (time_after(jiffies, rcu_state.gp_start + jtsq * 2) || time_after(jiffies, rcu_state.jiffies_resched) || rcu_state.cbovld)) { - WRITE_ONCE(*rnhqp, true); + WRITE_ONCE(rdp->rcu_need_heavy_qs, true); /* Store rcu_need_heavy_qs before rcu_urgent_qs. */ smp_store_release(ruqp, true); } else if (time_after(jiffies, rcu_state.gp_start + jtsq)) { -- cgit v1.2.3 From 9424b867a759febc2b67b6777bfa27f0f830d437 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 22 Jul 2021 16:47:42 -0700 Subject: rcu: Eliminate rcu_implicit_dynticks_qs() local variable ruqp The rcu_implicit_dynticks_qs() function's local variable ruqp references the ->rcu_urgent_qs field in the rcu_data structure referenced by the function parameter rdp, with a rather odd method for computing the pointer to this field. This commit therefore simplifies things and saves a couple of lines of code by replacing each instance of ruqp with &rdp->need_heavy_qs. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 18d2f35d1450..0bfebec94277 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1219,7 +1219,6 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp) static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) { unsigned long jtsq; - bool *ruqp; struct rcu_node *rnp = rdp->mynode; /* @@ -1284,16 +1283,15 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) * is set way high. */ jtsq = READ_ONCE(jiffies_to_sched_qs); - ruqp = per_cpu_ptr(&rcu_data.rcu_urgent_qs, rdp->cpu); if (!READ_ONCE(rdp->rcu_need_heavy_qs) && (time_after(jiffies, rcu_state.gp_start + jtsq * 2) || time_after(jiffies, rcu_state.jiffies_resched) || rcu_state.cbovld)) { WRITE_ONCE(rdp->rcu_need_heavy_qs, true); /* Store rcu_need_heavy_qs before rcu_urgent_qs. */ - smp_store_release(ruqp, true); + smp_store_release(&rdp->rcu_urgent_qs, true); } else if (time_after(jiffies, rcu_state.gp_start + jtsq)) { - WRITE_ONCE(*ruqp, true); + WRITE_ONCE(rdp->rcu_urgent_qs, true); } /* @@ -1307,7 +1305,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) if (tick_nohz_full_cpu(rdp->cpu) && (time_after(jiffies, READ_ONCE(rdp->last_fqs_resched) + jtsq * 3) || rcu_state.cbovld)) { - WRITE_ONCE(*ruqp, true); + WRITE_ONCE(rdp->rcu_urgent_qs, true); resched_cpu(rdp->cpu); WRITE_ONCE(rdp->last_fqs_resched, jiffies); } -- cgit v1.2.3 From 3ac858785231e2573d2964950738aac087ebf30c Mon Sep 17 00:00:00 2001 From: Zhouyi Zhou Date: Mon, 26 Jul 2021 05:43:33 +0800 Subject: rcu: Fix undefined Kconfig macros Invoking scripts/checkkconfigsymbols.py in the Linux-kernel source tree located the following issues: 1. TREE_PREEMPT_RCU Referencing files: arch/sh/configs/sdk7786_defconfig It should now be CONFIG_PREEMPT_RCU. Except that the CONFIG_PREEMPT=y in that same file implies CONFIG_PREEMPT_RCU=y. Therefore, delete the CONFIG_TREE_PREEMPT_RCU=y line. The reason is as follows: In kernel/rcu/Kconfig, we have config PREEMPT_RCU bool default y if PREEMPTION https://www.kernel.org/doc/Documentation/kbuild/kconfig-language.txt says, "The default value is only assigned to the config symbol if no other value was set by the user (via the input prompt above)." there is no prompt in config PREEMPT_RCU entry, so we are guaranteed to get CONFIG_PREEMPT_RCU=y when CONFIG_PREEMPT is present. 2. RCU_CPU_STALL_INFO Referencing files: arch/xtensa/configs/nommu_kc705_defconfig The old Kconfig option RCU_CPU_STALL_INFO was removed by commit 75c27f119b64 ("rcu: Remove CONFIG_RCU_CPU_STALL_INFO"), and the kernel now acts as if this Kconfig option was unconditionally enabled. 3. RCU_NOCB_CPU_ALL Referencing files: Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst This is an old snapshot of the code. I update this from the real rcu_prepare_for_idle() function in kernel/rcu/tree_plugin.h. This change was tested by invoking "make htmldocs". 4. RCU_TORTURE_TESTS Referencing files: kernel/rcu/rcutorture.c Forward-progress checking conflicts with CPU-stall testing, so we should complain at "modprobe rcutorture" when both are enabled. Signed-off-by: Zhouyi Zhou Signed-off-by: Paul E. McKenney --- .../Memory-Ordering/Tree-RCU-Memory-Ordering.rst | 69 ++++++++++------------ arch/sh/configs/sdk7786_defconfig | 1 - arch/xtensa/configs/nommu_kc705_defconfig | 1 - kernel/rcu/rcutorture.c | 2 +- 4 files changed, 33 insertions(+), 40 deletions(-) (limited to 'kernel/rcu') diff --git a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst index eeb351296df1..7fdf151a8680 100644 --- a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst +++ b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst @@ -202,49 +202,44 @@ newly arrived RCU callbacks against future grace periods: 1 static void rcu_prepare_for_idle(void) 2 { 3 bool needwake; - 4 struct rcu_data *rdp; - 5 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); - 6 struct rcu_node *rnp; - 7 struct rcu_state *rsp; - 8 int tne; - 9 - 10 if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) || - 11 rcu_is_nocb_cpu(smp_processor_id())) - 12 return; + 4 struct rcu_data *rdp = this_cpu_ptr(&rcu_data); + 5 struct rcu_node *rnp; + 6 int tne; + 7 + 8 lockdep_assert_irqs_disabled(); + 9 if (rcu_rdp_is_offloaded(rdp)) + 10 return; + 11 + 12 /* Handle nohz enablement switches conservatively. */ 13 tne = READ_ONCE(tick_nohz_active); - 14 if (tne != rdtp->tick_nohz_enabled_snap) { - 15 if (rcu_cpu_has_callbacks(NULL)) - 16 invoke_rcu_core(); - 17 rdtp->tick_nohz_enabled_snap = tne; + 14 if (tne != rdp->tick_nohz_enabled_snap) { + 15 if (!rcu_segcblist_empty(&rdp->cblist)) + 16 invoke_rcu_core(); /* force nohz to see update. */ + 17 rdp->tick_nohz_enabled_snap = tne; 18 return; - 19 } + 19 } 20 if (!tne) 21 return; - 22 if (rdtp->all_lazy && - 23 rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) { - 24 rdtp->all_lazy = false; - 25 rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; - 26 invoke_rcu_core(); - 27 return; - 28 } - 29 if (rdtp->last_accelerate == jiffies) - 30 return; - 31 rdtp->last_accelerate = jiffies; - 32 for_each_rcu_flavor(rsp) { - 33 rdp = this_cpu_ptr(rsp->rda); - 34 if (rcu_segcblist_pend_cbs(&rdp->cblist)) - 35 continue; - 36 rnp = rdp->mynode; - 37 raw_spin_lock_rcu_node(rnp); - 38 needwake = rcu_accelerate_cbs(rsp, rnp, rdp); - 39 raw_spin_unlock_rcu_node(rnp); - 40 if (needwake) - 41 rcu_gp_kthread_wake(rsp); - 42 } - 43 } + 22 + 23 /* + 24 * If we have not yet accelerated this jiffy, accelerate all + 25 * callbacks on this CPU. + 26 */ + 27 if (rdp->last_accelerate == jiffies) + 28 return; + 29 rdp->last_accelerate = jiffies; + 30 if (rcu_segcblist_pend_cbs(&rdp->cblist)) { + 31 rnp = rdp->mynode; + 32 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ + 33 needwake = rcu_accelerate_cbs(rnp, rdp); + 34 raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ + 35 if (needwake) + 36 rcu_gp_kthread_wake(); + 37 } + 38 } But the only part of ``rcu_prepare_for_idle()`` that really matters for -this discussion are lines 37–39. We will therefore abbreviate this +this discussion are lines 32–34. We will therefore abbreviate this function as follows: .. kernel-figure:: rcu_node-lock.svg diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig index f776a1d0d277..a8662b6927ec 100644 --- a/arch/sh/configs/sdk7786_defconfig +++ b/arch/sh/configs/sdk7786_defconfig @@ -5,7 +5,6 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_AUDIT=y CONFIG_AUDITSYSCALL=y -CONFIG_TREE_PREEMPT_RCU=y CONFIG_RCU_TRACE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y diff --git a/arch/xtensa/configs/nommu_kc705_defconfig b/arch/xtensa/configs/nommu_kc705_defconfig index 88b2e222d4bf..fcb620ef3799 100644 --- a/arch/xtensa/configs/nommu_kc705_defconfig +++ b/arch/xtensa/configs/nommu_kc705_defconfig @@ -119,7 +119,6 @@ CONFIG_DEBUG_SPINLOCK=y CONFIG_DEBUG_MUTEXES=y CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_STACKTRACE=y -# CONFIG_RCU_CPU_STALL_INFO is not set CONFIG_RCU_TRACE=y # CONFIG_FTRACE is not set # CONFIG_LD_NO_RELAX is not set diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index ab4215266ebe..f640cbd9262c 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -2449,7 +2449,7 @@ static int __init rcu_torture_fwd_prog_init(void) } if (stall_cpu > 0) { VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Disabled, conflicts with CPU-stall testing"); - if (IS_MODULE(CONFIG_RCU_TORTURE_TESTS)) + if (IS_MODULE(CONFIG_RCU_TORTURE_TEST)) return -EINVAL; /* In module, can fail back to user. */ WARN_ON(1); /* Make sure rcutorture notices conflict. */ return 0; -- cgit v1.2.3 From ebc88ad491362e6a4fae5bfb1c23c06c876f70be Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 26 Jul 2021 11:57:39 -0700 Subject: rcu: Comment rcu_gp_init() code waiting for CPU-hotplug operations Near the beginning of rcu_gp_init() is a per-rcu_node loop that waits for CPU-hotplug operations that might have started before the new grace period did. This commit adds a comment explaining that this wait does not exclude CPU-hotplug operations. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 0bfebec94277..e6e1b9281530 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1775,6 +1775,8 @@ static noinline_for_stack bool rcu_gp_init(void) */ WRITE_ONCE(rcu_state.gp_state, RCU_GP_ONOFF); rcu_for_each_leaf_node(rnp) { + // Wait for CPU-hotplug operations that might have + // started before this grace period did. smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values. firstseq = READ_ONCE(rnp->ofl_seq); if (firstseq & 0x1) -- cgit v1.2.3 From 2caebefb00f03b5ba13d44aa6cc3723759b43822 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 28 Jul 2021 12:38:42 -0700 Subject: rcu: Move rcu_dynticks_eqs_online() to rcu_cpu_starting() The purpose of rcu_dynticks_eqs_online() is to adjust the ->dynticks counter of an incoming CPU when required. It is currently invoked from rcutree_prepare_cpu(), which runs before the incoming CPU is running, and thus on some other CPU. This makes the per-CPU accesses in rcu_dynticks_eqs_online() iffy at best, and it all "works" only because the running CPU cannot possibly be in dyntick-idle mode, which means that rcu_dynticks_eqs_online() never has any effect. It is currently OK for rcu_dynticks_eqs_online() to have no effect, but only because the CPU-offline process just happens to leave ->dynticks in the correct state. After all, if ->dynticks were in the wrong state on a just-onlined CPU, rcutorture would complain bitterly the next time that CPU went idle, at least in kernels built with CONFIG_RCU_EQS_DEBUG=y, for example, those built by rcutorture scenario TREE04. One could argue that this means that rcu_dynticks_eqs_online() is unnecessary, however, removing it would make the CPU-online process vulnerable to slight changes in the CPU-offline process. One could also ask why it is safe to move the rcu_dynticks_eqs_online() call so late in the CPU-online process. Indeed, there was a time when it would not have been safe, which does much to explain its current location. However, the marking of a CPU as online from an RCU perspective has long since moved from rcutree_prepare_cpu() to rcu_cpu_starting(), and all that is required is that ->dynticks be set correctly by the time that the CPU is marked as online from an RCU perspective. After all, the RCU grace-period kthread does not check to see if offline CPUs are also idle. (In case you were curious, this is one reason why there is quiescent-state reporting as part of the offlining process.) This commit therefore moves the call to rcu_dynticks_eqs_online() from rcutree_prepare_cpu() to rcu_cpu_starting(), this latter being guaranteed to be running on the incoming CPU. The call to this function must of course be placed before this rcu_cpu_starting() announces this CPU's presence to RCU. Reported-by: Mathieu Desnoyers Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index e6e1b9281530..801075e36515 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4129,7 +4129,6 @@ int rcutree_prepare_cpu(unsigned int cpu) rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs); rdp->blimit = blimit; rdp->dynticks_nesting = 1; /* CPU not up, no tearing. */ - rcu_dynticks_eqs_online(); raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ /* @@ -4249,6 +4248,7 @@ void rcu_cpu_starting(unsigned int cpu) mask = rdp->grpmask; WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1); WARN_ON_ONCE(!(rnp->ofl_seq & 0x1)); + rcu_dynticks_eqs_online(); smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier(). raw_spin_lock_irqsave_rcu_node(rnp, flags); WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask); -- cgit v1.2.3 From 768f5d50e6ad88363291f96a2e230442b8d633bc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 29 Jul 2021 15:35:21 -0700 Subject: rcu: Simplify rcu_report_dead() call to rcu_report_exp_rdp() Currently, rcu_report_dead() disables preemption across its call to rcu_report_exp_rdp(), but this is pointless because interrupts are already disabled by the caller. In addition, rcu_report_dead() computes the address of the outgoing CPU's rcu_data structure, which is also pointless because this address is already present in local variable rdp. This commit therefore drops the preemption disabling and passes rdp to rcu_report_exp_rdp(). Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 801075e36515..dc2968473593 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4294,9 +4294,7 @@ void rcu_report_dead(unsigned int cpu) do_nocb_deferred_wakeup(rdp); /* QS for any half-done expedited grace period. */ - preempt_disable(); - rcu_report_exp_rdp(this_cpu_ptr(&rcu_data)); - preempt_enable(); + rcu_report_exp_rdp(rdp); rcu_preempt_deferred_qs(current); /* Remove outgoing CPU from mask in the leaf rcu_node structure. */ -- cgit v1.2.3 From 4aa846f97c0c0d9740d120f9ac3e2fba1522ac0c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 29 Jul 2021 20:30:32 -0700 Subject: rcu: Make rcutree_dying_cpu() use its "cpu" parameter The CPU-hotplug functions take a "cpu" parameter, but rcutree_dying_cpu() ignores it in favor of this_cpu_ptr(). This works at the moment, but it would be better to be consistent. This might also work better given some possible future changes. This commit therefore uses per_cpu_ptr() to avoid ignoring the rcutree_dying_cpu() function's argument. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index dc2968473593..6a1e9d3374db 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2356,7 +2356,7 @@ rcu_check_quiescent_state(struct rcu_data *rdp) int rcutree_dying_cpu(unsigned int cpu) { bool blkd; - struct rcu_data *rdp = this_cpu_ptr(&rcu_data); + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); struct rcu_node *rnp = rdp->mynode; if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) -- cgit v1.2.3 From ebb6d30d9ed1fe7137486e1be2ae9d621e918c4a Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Tue, 10 Aug 2021 10:48:15 +0200 Subject: rcu: Make rcu_normal_after_boot writable again Certain configurations (e.g., systems that make heavy use of netns) need to use synchronize_rcu_expedited() to service RCU grace periods even after boot. Even though synchronize_rcu_expedited() has been traditionally considered harmful for RT for the heavy use of IPIs, it is perfectly usable under certain conditions (e.g. nohz_full). Make rcupdate.rcu_normal_after_boot= again writeable on RT (if NO_HZ_ FULL is defined), but keep its default value to 1 (enabled) to avoid regressions. Users who need synchronize_rcu_expedited() will boot with rcupdate.rcu_normal_after_ boot=0 in the kernel cmdline. Reflect the change in synchronize_rcu_expedited_wait() by removing the WARN related to CONFIG_PREEMPT_RT. Signed-off-by: Juri Lelli Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_exp.h | 1 - kernel/rcu/update.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 2796084ef85a..d9e4f8eb9ae2 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -512,7 +512,6 @@ static void synchronize_rcu_expedited_wait(void) j = READ_ONCE(jiffies_till_first_fqs); if (synchronize_rcu_expedited_wait_once(j + HZ)) return; - WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT)); } for (;;) { diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index c21b38cc25e9..bd551134e2f4 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -57,7 +57,7 @@ module_param(rcu_expedited, int, 0); module_param(rcu_normal, int, 0); static int rcu_normal_after_boot = IS_ENABLED(CONFIG_PREEMPT_RT); -#ifndef CONFIG_PREEMPT_RT +#if !defined(CONFIG_PREEMPT_RT) || defined(CONFIG_NO_HZ_FULL) module_param(rcu_normal_after_boot, int, 0); #endif #endif /* #ifndef CONFIG_TINY_RCU */ -- cgit v1.2.3 From 1eac0075ebeecbf5c972f575ac448a0ea92e4f3a Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Tue, 10 Aug 2021 10:48:16 +0200 Subject: rcu: Make rcu update module parameters world-readable rcu update module parameters currently don't appear in sysfs and this is a serviceability issue as it might be needed to access their default values at runtime. Fix this issue by changing rcu update module parameters permissions to world-readable. Suggested-by: Paul E. McKenney Signed-off-by: Juri Lelli Signed-off-by: Paul E. McKenney --- kernel/rcu/update.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index bd551134e2f4..94282dc12bab 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -54,11 +54,11 @@ #define MODULE_PARAM_PREFIX "rcupdate." #ifndef CONFIG_TINY_RCU -module_param(rcu_expedited, int, 0); -module_param(rcu_normal, int, 0); +module_param(rcu_expedited, int, 0444); +module_param(rcu_normal, int, 0444); static int rcu_normal_after_boot = IS_ENABLED(CONFIG_PREEMPT_RT); #if !defined(CONFIG_PREEMPT_RT) || defined(CONFIG_NO_HZ_FULL) -module_param(rcu_normal_after_boot, int, 0); +module_param(rcu_normal_after_boot, int, 0444); #endif #endif /* #ifndef CONFIG_TINY_RCU */ -- cgit v1.2.3 From f0b2b2df5423fb369ac762c77900bc7765496d58 Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Wed, 18 Aug 2021 13:34:00 +0530 Subject: rcu: Fix existing exp request check in sync_sched_exp_online_cleanup() The sync_sched_exp_online_cleanup() checks to see if RCU needs an expedited quiescent state from the incoming CPU, sending it an IPI if so. Before sending IPI, it checks whether expedited qs need has been already requested for the incoming CPU, by checking rcu_data.cpu_no_qs.b.exp for the current cpu, on which sync_sched_exp_online_cleanup() is running. This works for the case where incoming CPU is same as self. However, for the case where incoming CPU is different from self, expedited request won't get marked, which can potentially delay reporting of expedited quiescent state for the incoming CPU. Fixes: e015a3411220 ("rcu: Avoid self-IPI in sync_sched_exp_online_cleanup()") Signed-off-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_exp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index d9e4f8eb9ae2..f3947c49eee7 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -759,7 +759,7 @@ static void sync_sched_exp_online_cleanup(int cpu) my_cpu = get_cpu(); /* Quiescent state either not needed or already requested, leave. */ if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) || - __this_cpu_read(rcu_data.cpu_no_qs.b.exp)) { + rdp->cpu_no_qs.b.exp) { put_cpu(); return; } -- cgit v1.2.3 From cbe0d8d91415c9692fe88191940d98952b6855d9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 30 Jul 2021 12:17:59 -0700 Subject: rcu-tasks: Wait for trc_read_check_handler() IPIs Currently, RCU Tasks Trace initializes the trc_n_readers_need_end counter to the value one, increments it before each trc_read_check_handler() IPI, then decrements it within trc_read_check_handler() if the target task was in a quiescent state (or if the target task moved to some other CPU while the IPI was in flight), complaining if the new value was zero. The rationale for complaining is that the initial value of one must be decremented away before zero can be reached, and this decrement has not yet happened. Except that trc_read_check_handler() is initiated with an asynchronous smp_call_function_single(), which might be significantly delayed. This can result in false-positive complaints about the counter reaching zero. This commit therefore waits for in-flight IPI handlers to complete before decrementing away the initial value of one from the trc_n_readers_need_end counter. Signed-off-by: Paul E. McKenney --- kernel/rcu/tasks.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 806160c44b17..3b2f8038064a 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -1150,14 +1150,28 @@ static void check_all_holdout_tasks_trace(struct list_head *hop, } } +static void rcu_tasks_trace_empty_fn(void *unused) +{ +} + /* Wait for grace period to complete and provide ordering. */ static void rcu_tasks_trace_postgp(struct rcu_tasks *rtp) { + int cpu; bool firstreport; struct task_struct *g, *t; LIST_HEAD(holdouts); long ret; + // Wait for any lingering IPI handlers to complete. Note that + // if a CPU has gone offline or transitioned to userspace in the + // meantime, all IPI handlers should have been drained beforehand. + // Yes, this assumes that CPUs process IPIs in order. If that ever + // changes, there will need to be a recheck and/or timed wait. + for_each_online_cpu(cpu) + if (smp_load_acquire(per_cpu_ptr(&trc_ipi_to_cpu, cpu))) + smp_call_function_single(cpu, rcu_tasks_trace_empty_fn, NULL, 1); + // Remove the safety count. smp_mb__before_atomic(); // Order vs. earlier atomics atomic_dec(&trc_n_readers_need_end); -- cgit v1.2.3 From fda84866b1e68ab409074e7fcf1a7db800615445 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 3 Aug 2021 17:42:25 -0700 Subject: rcutorture: Suppressing read-exit testing is not an error Currently, specifying the rcutorture.read_exit_burst=0 kernel boot parameter will result in a -EINVAL exit code that will stop the rcutorture test run before it has fully initialized. This commit therefore uses a zero exit code in that case, thus allowing rcutorture.read_exit_burst=0 to complete normally. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcutorture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index ab4215266ebe..59254fa15cc6 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -2741,7 +2741,7 @@ static int rcu_torture_read_exit(void *unused) static int rcu_torture_read_exit_init(void) { if (read_exit_burst <= 0) - return -EINVAL; + return 0; init_waitqueue_head(&read_exit_wq); read_exit_child_stop = false; read_exit_child_stopped = false; -- cgit v1.2.3 From efeff6b39b9de4480572c7b0c5eb77204795cb57 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Aug 2021 13:28:24 -0700 Subject: rcutorture: Warn on individual rcu_torture_init() error conditions When running rcutorture as a module, any rcu_torture_init() issues will be reflected in the error code from modprobe or insmod, as the case may be. However, these error codes are not available when running rcutorture built-in, for example, when using the kvm.sh script. This commit therefore adds WARN_ON_ONCE() to allow distinguishing rcu_torture_init() errors when running rcutorture built-in. Signed-off-by: Paul E. McKenney --- include/linux/torture.h | 8 ++++++++ kernel/rcu/rcutorture.c | 30 +++++++++++++++--------------- 2 files changed, 23 insertions(+), 15 deletions(-) (limited to 'kernel/rcu') diff --git a/include/linux/torture.h b/include/linux/torture.h index 0910c5803f35..24f58e50a94b 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -47,6 +47,14 @@ do { \ } while (0) void verbose_torout_sleep(void); +#define torture_init_error(firsterr) \ +({ \ + int ___firsterr = (firsterr); \ + \ + WARN_ONCE(!IS_MODULE(CONFIG_RCU_TORTURE_TEST) && ___firsterr < 0, "Torture-test initialization failed with error code %d\n", ___firsterr); \ + ___firsterr < 0; \ +}) + /* Definitions for online/offline exerciser. */ #ifdef CONFIG_HOTPLUG_CPU int torture_num_online_cpus(void); diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 59254fa15cc6..b90cd4d98a20 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -3037,7 +3037,7 @@ rcu_torture_init(void) rcu_torture_write_types(); firsterr = torture_create_kthread(rcu_torture_writer, NULL, writer_task); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; if (nfakewriters > 0) { fakewriter_tasks = kcalloc(nfakewriters, @@ -3052,7 +3052,7 @@ rcu_torture_init(void) for (i = 0; i < nfakewriters; i++) { firsterr = torture_create_kthread(rcu_torture_fakewriter, NULL, fakewriter_tasks[i]); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; } reader_tasks = kcalloc(nrealreaders, sizeof(reader_tasks[0]), @@ -3068,7 +3068,7 @@ rcu_torture_init(void) rcu_torture_reader_mbchk[i].rtc_chkrdr = -1; firsterr = torture_create_kthread(rcu_torture_reader, (void *)i, reader_tasks[i]); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; } nrealnocbers = nocbs_nthreads; @@ -3088,18 +3088,18 @@ rcu_torture_init(void) } for (i = 0; i < nrealnocbers; i++) { firsterr = torture_create_kthread(rcu_nocb_toggle, NULL, nocb_tasks[i]); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; } if (stat_interval > 0) { firsterr = torture_create_kthread(rcu_torture_stats, NULL, stats_task); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; } if (test_no_idle_hz && shuffle_interval > 0) { firsterr = torture_shuffle_init(shuffle_interval * HZ); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; } if (stutter < 0) @@ -3109,7 +3109,7 @@ rcu_torture_init(void) t = cur_ops->stall_dur ? cur_ops->stall_dur() : stutter * HZ; firsterr = torture_stutter_init(stutter * HZ, t); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; } if (fqs_duration < 0) @@ -3118,7 +3118,7 @@ rcu_torture_init(void) /* Create the fqs thread */ firsterr = torture_create_kthread(rcu_torture_fqs, NULL, fqs_task); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; } if (test_boost_interval < 1) @@ -3132,7 +3132,7 @@ rcu_torture_init(void) firsterr = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "RCU_TORTURE", rcutorture_booster_init, rcutorture_booster_cleanup); - if (firsterr < 0) + if (torture_init_error(firsterr)) goto unwind; rcutor_hp = firsterr; @@ -3153,23 +3153,23 @@ rcu_torture_init(void) } shutdown_jiffies = jiffies + shutdown_secs * HZ; firsterr = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; firsterr = torture_onoff_init(onoff_holdoff * HZ, onoff_interval, rcutorture_sync); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; firsterr = rcu_torture_stall_init(); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; firsterr = rcu_torture_fwd_prog_init(); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; firsterr = rcu_torture_barrier_init(); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; firsterr = rcu_torture_read_exit_init(); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; if (object_debug) rcu_test_debug_objects(); -- cgit v1.2.3 From ed60ad733aa49b70720c9d8dded1b18374ec5022 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Aug 2021 15:57:12 -0700 Subject: refscale: Warn on individual ref_scale_init() error conditions When running refscale as a module, any ref_scale_init() issues will be reflected in the error code from modprobe or insmod, as the case may be. However, these error codes are not available when running refscale built-in, for example, when using the kvm.sh script. This commit therefore adds WARN_ON_ONCE() to allow distinguishing ref_scale_init() errors when running refscale built-in. Signed-off-by: Paul E. McKenney --- kernel/rcu/refscale.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c index 66dc14cf5687..1631ef8a138d 100644 --- a/kernel/rcu/refscale.c +++ b/kernel/rcu/refscale.c @@ -824,7 +824,7 @@ ref_scale_init(void) init_waitqueue_head(&shutdown_wq); firsterr = torture_create_kthread(ref_scale_shutdown, NULL, shutdown_task); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; schedule_timeout_uninterruptible(1); } @@ -851,7 +851,7 @@ ref_scale_init(void) for (i = 0; i < nreaders; i++) { firsterr = torture_create_kthread(ref_scale_reader, (void *)i, reader_tasks[i].task); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; init_waitqueue_head(&(reader_tasks[i].wq)); @@ -860,7 +860,7 @@ ref_scale_init(void) // Main Task init_waitqueue_head(&main_wq); firsterr = torture_create_kthread(main_func, NULL, main_task); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; torture_init_end(); -- cgit v1.2.3 From eb77abfdeed29dd032c923e16fe8d91fa95cd316 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Aug 2021 15:58:53 -0700 Subject: rcuscale: Warn on individual rcu_scale_init() error conditions When running rcuscale as a module, any rcu_scale_init() issues will be reflected in the error code from modprobe or insmod, as the case may be. However, these error codes are not available when running rcuscale built-in, for example, when using the kvm.sh script. This commit therefore adds WARN_ON_ONCE() to allow distinguishing rcu_scale_init() errors when running rcuscale built-in. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcuscale.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c index 2cc34a22a506..228f143bf935 100644 --- a/kernel/rcu/rcuscale.c +++ b/kernel/rcu/rcuscale.c @@ -758,7 +758,7 @@ kfree_scale_init(void) init_waitqueue_head(&shutdown_wq); firsterr = torture_create_kthread(kfree_scale_shutdown, NULL, shutdown_task); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; schedule_timeout_uninterruptible(1); } @@ -775,7 +775,7 @@ kfree_scale_init(void) for (i = 0; i < kfree_nrealthreads; i++) { firsterr = torture_create_kthread(kfree_scale_thread, (void *)i, kfree_reader_tasks[i]); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; } @@ -838,7 +838,7 @@ rcu_scale_init(void) init_waitqueue_head(&shutdown_wq); firsterr = torture_create_kthread(rcu_scale_shutdown, NULL, shutdown_task); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; schedule_timeout_uninterruptible(1); } @@ -852,7 +852,7 @@ rcu_scale_init(void) for (i = 0; i < nrealreaders; i++) { firsterr = torture_create_kthread(rcu_scale_reader, (void *)i, reader_tasks[i]); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; } while (atomic_read(&n_rcu_scale_reader_started) < nrealreaders) @@ -879,7 +879,7 @@ rcu_scale_init(void) } firsterr = torture_create_kthread(rcu_scale_writer, (void *)i, writer_tasks[i]); - if (firsterr) + if (torture_init_error(firsterr)) goto unwind; } torture_init_end(); -- cgit v1.2.3 From fd13fe16db0d82612b260640f4e26f6d9d1e11fd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 6 Aug 2021 08:57:26 -0700 Subject: rcutorture: Don't cpuhp_remove_state() if cpuhp_setup_state() failed Currently, in CONFIG_RCU_BOOST kernels, if the rcu_torture_init() function's call to cpuhp_setup_state() fails, rcu_torture_cleanup() gamely passes nonsense to cpuhp_remove_state(). This results in strange and misleading splats. This commit therefore ensures that if the rcu_torture_init() function's call to cpuhp_setup_state() fails, rcu_torture_cleanup() avoids invoking cpuhp_remove_state(). Signed-off-by: Paul E. McKenney --- kernel/rcu/rcutorture.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index b90cd4d98a20..424184764ef0 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -2819,7 +2819,7 @@ rcu_torture_cleanup(void) rcutorture_seq_diff(gp_seq, start_gp_seq)); torture_stop_kthread(rcu_torture_stats, stats_task); torture_stop_kthread(rcu_torture_fqs, fqs_task); - if (rcu_torture_can_boost()) + if (rcu_torture_can_boost() && rcutor_hp >= 0) cpuhp_remove_state(rcutor_hp); /* @@ -3132,9 +3132,9 @@ rcu_torture_init(void) firsterr = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "RCU_TORTURE", rcutorture_booster_init, rcutorture_booster_cleanup); + rcutor_hp = firsterr; if (torture_init_error(firsterr)) goto unwind; - rcutor_hp = firsterr; // Testing RCU priority boosting requires rcutorture do // some serious abuse. Counter this by running ksoftirqd -- cgit v1.2.3 From 71921a9606ddbcc1d98c00eca7ae82c373d1fecd Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Fri, 20 Aug 2021 09:42:36 +0200 Subject: rcutorture: Avoid problematic critical section nesting on PREEMPT_RT rcutorture is generating some nesting scenarios that are not compatible on PREEMPT_RT. For example: preempt_disable(); rcu_read_lock_bh(); preempt_enable(); rcu_read_unlock_bh(); The problem here is that on PREEMPT_RT the bottom halves have to be disabled and enabled in preemptible context. Reorder locking: start with BH locking and continue with then with disabling preemption or interrupts. In the unlocking do it reverse by first enabling interrupts and preemption and BH at the very end. Ensure that on PREEMPT_RT BH locking remains unchanged if in non-preemptible context. Link: https://lkml.kernel.org/r/20190911165729.11178-6-swood@redhat.com Link: https://lkml.kernel.org/r/20210819182035.GF4126399@paulmck-ThinkPad-P17-Gen-1 Signed-off-by: Scott Wood [bigeasy: Drop ATOM_BH, make it only about changing BH in atomic context. Allow enabling RCU in IRQ-off section. Reword commit message.] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Paul E. McKenney --- kernel/rcu/rcutorture.c | 48 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 12 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 424184764ef0..fb079b78c232 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1432,28 +1432,34 @@ static void rcutorture_one_extend(int *readstate, int newstate, /* First, put new protection in place to avoid critical-section gap. */ if (statesnew & RCUTORTURE_RDR_BH) local_bh_disable(); + if (statesnew & RCUTORTURE_RDR_RBH) + rcu_read_lock_bh(); if (statesnew & RCUTORTURE_RDR_IRQ) local_irq_disable(); if (statesnew & RCUTORTURE_RDR_PREEMPT) preempt_disable(); - if (statesnew & RCUTORTURE_RDR_RBH) - rcu_read_lock_bh(); if (statesnew & RCUTORTURE_RDR_SCHED) rcu_read_lock_sched(); if (statesnew & RCUTORTURE_RDR_RCU) idxnew = cur_ops->readlock() << RCUTORTURE_RDR_SHIFT; - /* Next, remove old protection, irq first due to bh conflict. */ + /* + * Next, remove old protection, in decreasing order of strength + * to avoid unlock paths that aren't safe in the stronger + * context. Namely: BH can not be enabled with disabled interrupts. + * Additionally PREEMPT_RT requires that BH is enabled in preemptible + * context. + */ if (statesold & RCUTORTURE_RDR_IRQ) local_irq_enable(); - if (statesold & RCUTORTURE_RDR_BH) - local_bh_enable(); if (statesold & RCUTORTURE_RDR_PREEMPT) preempt_enable(); - if (statesold & RCUTORTURE_RDR_RBH) - rcu_read_unlock_bh(); if (statesold & RCUTORTURE_RDR_SCHED) rcu_read_unlock_sched(); + if (statesold & RCUTORTURE_RDR_BH) + local_bh_enable(); + if (statesold & RCUTORTURE_RDR_RBH) + rcu_read_unlock_bh(); if (statesold & RCUTORTURE_RDR_RCU) { bool lockit = !statesnew && !(torture_random(trsp) & 0xffff); @@ -1496,6 +1502,9 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp) int mask = rcutorture_extend_mask_max(); unsigned long randmask1 = torture_random(trsp) >> 8; unsigned long randmask2 = randmask1 >> 3; + unsigned long preempts = RCUTORTURE_RDR_PREEMPT | RCUTORTURE_RDR_SCHED; + unsigned long preempts_irq = preempts | RCUTORTURE_RDR_IRQ; + unsigned long bhs = RCUTORTURE_RDR_BH | RCUTORTURE_RDR_RBH; WARN_ON_ONCE(mask >> RCUTORTURE_RDR_SHIFT); /* Mostly only one bit (need preemption!), sometimes lots of bits. */ @@ -1503,11 +1512,26 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp) mask = mask & randmask2; else mask = mask & (1 << (randmask2 % RCUTORTURE_RDR_NBITS)); - /* Can't enable bh w/irq disabled. */ - if ((mask & RCUTORTURE_RDR_IRQ) && - ((!(mask & RCUTORTURE_RDR_BH) && (oldmask & RCUTORTURE_RDR_BH)) || - (!(mask & RCUTORTURE_RDR_RBH) && (oldmask & RCUTORTURE_RDR_RBH)))) - mask |= RCUTORTURE_RDR_BH | RCUTORTURE_RDR_RBH; + + /* + * Can't enable bh w/irq disabled. + */ + if (mask & RCUTORTURE_RDR_IRQ) + mask |= oldmask & bhs; + + /* + * Ideally these sequences would be detected in debug builds + * (regardless of RT), but until then don't stop testing + * them on non-RT. + */ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { + /* Can't modify BH in atomic context */ + if (oldmask & preempts_irq) + mask &= ~bhs; + if ((oldmask | mask) & preempts_irq) + mask |= oldmask & bhs; + } + return mask ?: RCUTORTURE_RDR_RCU; } -- cgit v1.2.3 From 96017bf9039763a2e02dcc6adaa18592cd73a39d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 28 Jul 2021 10:53:41 -0700 Subject: rcu-tasks: Simplify trc_read_check_handler() atomic operations Currently, trc_wait_for_one_reader() atomically increments the trc_n_readers_need_end counter before sending the IPI invoking trc_read_check_handler(). All failure paths out of trc_read_check_handler() and also from the smp_call_function_single() within trc_wait_for_one_reader() must carefully atomically decrement this counter. This is more complex than it needs to be. This commit therefore simplifies things and saves a few lines of code by dispensing with the atomic decrements in favor of having trc_read_check_handler() do the atomic increment only in the success case. In theory, this represents no change in functionality. Signed-off-by: Paul E. McKenney --- kernel/rcu/tasks.h | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 3b2f8038064a..c9d8583ffe59 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -890,32 +890,24 @@ static void trc_read_check_handler(void *t_in) // If the task is no longer running on this CPU, leave. if (unlikely(texp != t)) { - if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end))) - wake_up(&trc_wait); goto reset_ipi; // Already on holdout list, so will check later. } // If the task is not in a read-side critical section, and // if this is the last reader, awaken the grace-period kthread. if (likely(!READ_ONCE(t->trc_reader_nesting))) { - if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end))) - wake_up(&trc_wait); - // Mark as checked after decrement to avoid false - // positives on the above WARN_ON_ONCE(). WRITE_ONCE(t->trc_reader_checked, true); goto reset_ipi; } // If we are racing with an rcu_read_unlock_trace(), try again later. - if (unlikely(READ_ONCE(t->trc_reader_nesting) < 0)) { - if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end))) - wake_up(&trc_wait); + if (unlikely(READ_ONCE(t->trc_reader_nesting) < 0)) goto reset_ipi; - } WRITE_ONCE(t->trc_reader_checked, true); // Get here if the task is in a read-side critical section. Set // its state so that it will awaken the grace-period kthread upon // exit from that critical section. + atomic_inc(&trc_n_readers_need_end); // One more to wait on. WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs)); WRITE_ONCE(t->trc_reader_special.b.need_qs, true); @@ -1015,21 +1007,15 @@ static void trc_wait_for_one_reader(struct task_struct *t, if (per_cpu(trc_ipi_to_cpu, cpu) || t->trc_ipi_to_cpu >= 0) return; - atomic_inc(&trc_n_readers_need_end); per_cpu(trc_ipi_to_cpu, cpu) = true; t->trc_ipi_to_cpu = cpu; rcu_tasks_trace.n_ipis++; - if (smp_call_function_single(cpu, - trc_read_check_handler, t, 0)) { + if (smp_call_function_single(cpu, trc_read_check_handler, t, 0)) { // Just in case there is some other reason for // failure than the target CPU being offline. rcu_tasks_trace.n_ipis_fails++; per_cpu(trc_ipi_to_cpu, cpu) = false; t->trc_ipi_to_cpu = cpu; - if (atomic_dec_and_test(&trc_n_readers_need_end)) { - WARN_ON_ONCE(1); - wake_up(&trc_wait); - } } } } -- cgit v1.2.3 From 18f08e758f34e6dfe0668bee51bd2af7adacf381 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 28 Jul 2021 11:32:28 -0700 Subject: rcu-tasks: Add trc_inspect_reader() checks for exiting critical section Currently, trc_inspect_reader() treats a task exiting its RCU Tasks Trace read-side critical section the same as being within that critical section. However, this can fail because that task might have already checked its .need_qs field, which means that it might never decrement the all-important trc_n_readers_need_end counter. Of course, for that to happen, the task would need to never again execute an RCU Tasks Trace read-side critical section, but this really could happen if the system's last trampoline was removed. Note that exit from such a critical section cannot be treated as a quiescent state due to the possibility of nested critical sections. This means that if trc_inspect_reader() sees a negative nesting value, it must set up to try again later. This commit therefore ignores tasks that are exiting their RCU Tasks Trace read-side critical sections so that they will be rechecked later. [ paulmck: Apply feedback from Neeraj Upadhyay and Boqun Feng. ] Signed-off-by: Paul E. McKenney --- kernel/rcu/tasks.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index c9d8583ffe59..8387e70e6b00 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -923,7 +923,7 @@ reset_ipi: static bool trc_inspect_reader(struct task_struct *t, void *arg) { int cpu = task_cpu(t); - bool in_qs = false; + int nesting; bool ofl = cpu_is_offline(cpu); if (task_curr(t)) { @@ -943,18 +943,18 @@ static bool trc_inspect_reader(struct task_struct *t, void *arg) n_heavy_reader_updates++; if (ofl) n_heavy_reader_ofl_updates++; - in_qs = true; + nesting = 0; } else { // The task is not running, so C-language access is safe. - in_qs = likely(!t->trc_reader_nesting); + nesting = t->trc_reader_nesting; } - // Mark as checked so that the grace-period kthread will - // remove it from the holdout list. - t->trc_reader_checked = true; - - if (in_qs) - return true; // Already in quiescent state, done!!! + // If not exiting a read-side critical section, mark as checked + // so that the grace-period kthread will remove it from the + // holdout list. + t->trc_reader_checked = nesting >= 0; + if (nesting <= 0) + return !nesting; // If in QS, done, otherwise try again later. // The task is in a read-side critical section, so set up its // state so that it will awaken the grace-period kthread upon exit -- cgit v1.2.3 From a5c071ccfa1728508f31e61213ee795e4529d0d4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 28 Jul 2021 12:28:27 -0700 Subject: rcu-tasks: Remove second argument of rcu_read_unlock_trace_special() The second argument of rcu_read_unlock_trace_special() is always zero. When called from exit_tasks_rcu_finish_trace(), it is the constant zero, and rcu_read_unlock_trace_special() doesn't get called from rcu_read_unlock_trace() unless the value of local variable "nesting" is zero because in that case the early return is taken instead. This commit therefore removes the "nesting" argument from the rcu_read_unlock_trace_special() function, substituting the constant zero within that function. This commit also adds a WARN_ON_ONCE() to rcu_read_lock_trace_held() in case non-zeroness some day appears. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate_trace.h | 5 +++-- kernel/rcu/tasks.h | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'kernel/rcu') diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h index 86c8f6c98412..6f9c35817398 100644 --- a/include/linux/rcupdate_trace.h +++ b/include/linux/rcupdate_trace.h @@ -31,7 +31,7 @@ static inline int rcu_read_lock_trace_held(void) #ifdef CONFIG_TASKS_TRACE_RCU -void rcu_read_unlock_trace_special(struct task_struct *t, int nesting); +void rcu_read_unlock_trace_special(struct task_struct *t); /** * rcu_read_lock_trace - mark beginning of RCU-trace read-side critical section @@ -80,7 +80,8 @@ static inline void rcu_read_unlock_trace(void) WRITE_ONCE(t->trc_reader_nesting, nesting); return; // We assume shallow reader nesting. } - rcu_read_unlock_trace_special(t, nesting); + WARN_ON_ONCE(nesting != 0); + rcu_read_unlock_trace_special(t); } void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func); diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 8387e70e6b00..a3f4f9bd8c67 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -848,7 +848,7 @@ static void rcu_read_unlock_iw(struct irq_work *iwp) static DEFINE_IRQ_WORK(rcu_tasks_trace_iw, rcu_read_unlock_iw); /* If we are the last reader, wake up the grace-period kthread. */ -void rcu_read_unlock_trace_special(struct task_struct *t, int nesting) +void rcu_read_unlock_trace_special(struct task_struct *t) { int nq = READ_ONCE(t->trc_reader_special.b.need_qs); @@ -858,7 +858,7 @@ void rcu_read_unlock_trace_special(struct task_struct *t, int nesting) // Update .need_qs before ->trc_reader_nesting for irq/NMI handlers. if (nq) WRITE_ONCE(t->trc_reader_special.b.need_qs, false); - WRITE_ONCE(t->trc_reader_nesting, nesting); + WRITE_ONCE(t->trc_reader_nesting, 0); if (nq && atomic_dec_and_test(&trc_n_readers_need_end)) irq_work_queue(&rcu_tasks_trace_iw); } @@ -1200,7 +1200,7 @@ static void exit_tasks_rcu_finish_trace(struct task_struct *t) WARN_ON_ONCE(READ_ONCE(t->trc_reader_nesting)); WRITE_ONCE(t->trc_reader_nesting, 0); if (WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs))) - rcu_read_unlock_trace_special(t, 0); + rcu_read_unlock_trace_special(t); } /** -- cgit v1.2.3 From c4f113ac450afc9c7c4d2ce84a08f516dbec69b1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Aug 2021 09:54:45 -0700 Subject: rcu-tasks: Fix s/instruction/instructions/ typo in comment Signed-off-by: Paul E. McKenney --- kernel/rcu/tasks.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index a3f4f9bd8c67..43c0f715ac63 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -758,7 +758,7 @@ EXPORT_SYMBOL_GPL(show_rcu_tasks_rude_gp_kthread); // 2. Protects code in the idle loop, exception entry/exit, and // CPU-hotplug code paths, similar to the capabilities of SRCU. // -// 3. Avoids expensive read-side instruction, having overhead similar +// 3. Avoids expensive read-side instructions, having overhead similar // to that of Preemptible RCU. // // There are of course downsides. The grace-period code can send IPIs to -- cgit v1.2.3 From 0db7c32ad3160ae06f497d48a74bd46a2a35e6bf Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 11 Aug 2021 09:07:44 -0700 Subject: rcu-tasks: Move RTGS_WAIT_CBS to beginning of rcu_tasks_kthread() loop Early in debugging, it made some sense to differentiate the first iteration from subsequent iterations, but now this just causes confusion. This commit therefore moves the "set_tasks_gp_state(rtp, RTGS_WAIT_CBS)" statement to the beginning of the "for" loop in rcu_tasks_kthread(). Reported-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney --- kernel/rcu/tasks.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 43c0f715ac63..7e2641783e43 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -197,6 +197,7 @@ static int __noreturn rcu_tasks_kthread(void *arg) * This loop is terminated by the system going down. ;-) */ for (;;) { + set_tasks_gp_state(rtp, RTGS_WAIT_CBS); /* Pick up any new callbacks. */ raw_spin_lock_irqsave(&rtp->cbs_lock, flags); @@ -236,8 +237,6 @@ static int __noreturn rcu_tasks_kthread(void *arg) } /* Paranoid sleep to keep this from entering a tight loop */ schedule_timeout_idle(rtp->gp_sleep); - - set_tasks_gp_state(rtp, RTGS_WAIT_CBS); } } -- cgit v1.2.3 From d0a85858569ead8d39ba5b41501cd99bc7d7e7bd Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Wed, 18 Aug 2021 12:58:39 +0530 Subject: rcu-tasks: Fix s/rcu_add_holdout/trc_add_holdout/ typo in comment Signed-off-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney --- kernel/rcu/tasks.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 7e2641783e43..75e7888b3fc9 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -991,7 +991,7 @@ static void trc_wait_for_one_reader(struct task_struct *t, // If this task is not yet on the holdout list, then we are in // an RCU read-side critical section. Otherwise, the invocation of - // rcu_add_holdout() that added it to the list did the necessary + // trc_add_holdout() that added it to the list did the necessary // get_task_struct(). Either way, the task cannot be freed out // from under this code. -- cgit v1.2.3 From 89401176daf0a44ab517d9a0e296adb85af246df Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Wed, 18 Aug 2021 12:58:40 +0530 Subject: rcu-tasks: Correct firstreport usage in check_all_holdout_tasks_trace In check_all_holdout_tasks_trace(), firstreport is a pointer argument; so, check the dereferenced value, instead of checking the pointer. Signed-off-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney --- kernel/rcu/tasks.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 75e7888b3fc9..e2ec548fc0c4 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -1129,7 +1129,7 @@ static void check_all_holdout_tasks_trace(struct list_head *hop, cpus_read_unlock(); if (needreport) { - if (firstreport) + if (*firstreport) pr_err("INFO: rcu_tasks_trace detected stalls? (Late IPI?)\n"); show_stalled_ipi_trace(); } -- cgit v1.2.3 From d39ec8f3c12abe3710f7031ce3d5564bda12b19e Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Wed, 18 Aug 2021 12:58:41 +0530 Subject: rcu-tasks: Correct comparisons for CPU numbers in show_stalled_task_trace Valid CPU numbers can be zero or greater, but the checks for ->trc_ipi_to_cpu and tick_nohz_full_cpu()'s argument are for strictly greater than. This commit therefore corrects the check for no_hz_full cpu in show_stalled_task_trace() so as to include cpu 0. Signed-off-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney --- kernel/rcu/tasks.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index e2ec548fc0c4..af7388849bed 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -1084,9 +1084,9 @@ static void show_stalled_task_trace(struct task_struct *t, bool *firstreport) cpu = task_cpu(t); pr_alert("P%d: %c%c%c nesting: %d%c cpu: %d\n", t->pid, - ".I"[READ_ONCE(t->trc_ipi_to_cpu) > 0], + ".I"[READ_ONCE(t->trc_ipi_to_cpu) >= 0], ".i"[is_idle_task(t)], - ".N"[cpu > 0 && tick_nohz_full_cpu(cpu)], + ".N"[cpu >= 0 && tick_nohz_full_cpu(cpu)], READ_ONCE(t->trc_reader_nesting), " N"[!!READ_ONCE(t->trc_reader_special.b.need_qs)], cpu); -- cgit v1.2.3 From a6517e9ce0115e33617062c9e73b4c5e6f787525 Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Wed, 18 Aug 2021 12:58:43 +0530 Subject: rcu-tasks: Clarify read side section info for rcu_tasks_rude GP primitives RCU tasks rude variant does not check whether the current running context on a CPU is usermode. Read side critical section ends on transition to usermode execution, by the virtue of usermode execution being schedulable. Clarify this in comments for call_rcu_tasks_rude() and synchronize_rcu_tasks_rude(). Signed-off-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney --- kernel/rcu/tasks.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index af7388849bed..8c63b4d23829 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -677,11 +677,11 @@ DEFINE_RCU_TASKS(rcu_tasks_rude, rcu_tasks_rude_wait_gp, call_rcu_tasks_rude, * period elapses, in other words after all currently executing RCU * read-side critical sections have completed. call_rcu_tasks_rude() * assumes that the read-side critical sections end at context switch, - * cond_resched_rcu_qs(), or transition to usermode execution. As such, - * there are no read-side primitives analogous to rcu_read_lock() and - * rcu_read_unlock() because this primitive is intended to determine - * that all tasks have passed through a safe state, not so much for - * data-structure synchronization. + * cond_resched_rcu_qs(), or transition to usermode execution (as + * usermode execution is schedulable). As such, there are no read-side + * primitives analogous to rcu_read_lock() and rcu_read_unlock() because + * this primitive is intended to determine that all tasks have passed + * through a safe state, not so much for data-structure synchronization. * * See the description of call_rcu() for more detailed information on * memory ordering guarantees. @@ -699,8 +699,8 @@ EXPORT_SYMBOL_GPL(call_rcu_tasks_rude); * grace period has elapsed, in other words after all currently * executing rcu-tasks read-side critical sections have elapsed. These * read-side critical sections are delimited by calls to schedule(), - * cond_resched_tasks_rcu_qs(), userspace execution, and (in theory, - * anyway) cond_resched(). + * cond_resched_tasks_rcu_qs(), userspace execution (which is a schedulable + * context), and (in theory, anyway) cond_resched(). * * This is a very specialized primitive, intended only for a few uses in * tracing and other situations requiring manipulation of function preambles -- cgit v1.2.3 From ed42c38067129c85ab1bda39f2fd91924a432dc0 Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Wed, 25 Aug 2021 12:40:50 +0530 Subject: rcu-tasks: Fix read-side primitives comment for call_rcu_tasks_trace call_rcu_tasks_trace() does have read-side primitives - rcu_read_lock_trace() and rcu_read_unlock_trace(). Fix this information in the comments. Signed-off-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney --- kernel/rcu/tasks.h | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 8c63b4d23829..47a29e411217 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -1207,15 +1207,11 @@ static void exit_tasks_rcu_finish_trace(struct task_struct *t) * @rhp: structure to be used for queueing the RCU updates. * @func: actual callback function to be invoked after the grace period * - * The callback function will be invoked some time after a full grace - * period elapses, in other words after all currently executing RCU - * read-side critical sections have completed. call_rcu_tasks_trace() - * assumes that the read-side critical sections end at context switch, - * cond_resched_rcu_qs(), or transition to usermode execution. As such, - * there are no read-side primitives analogous to rcu_read_lock() and - * rcu_read_unlock() because this primitive is intended to determine - * that all tasks have passed through a safe state, not so much for - * data-structure synchronization. + * The callback function will be invoked some time after a trace rcu-tasks + * grace period elapses, in other words after all currently executing + * trace rcu-tasks read-side critical sections have completed. These + * read-side critical sections are delimited by calls to rcu_read_lock_trace() + * and rcu_read_unlock_trace(). * * See the description of call_rcu() for more detailed information on * memory ordering guarantees. @@ -1231,7 +1227,7 @@ EXPORT_SYMBOL_GPL(call_rcu_tasks_trace); * * Control will return to the caller some time after a trace rcu-tasks * grace period has elapsed, in other words after all currently executing - * rcu-tasks read-side critical sections have elapsed. These read-side + * trace rcu-tasks read-side critical sections have elapsed. These read-side * critical sections are delimited by calls to rcu_read_lock_trace() * and rcu_read_unlock_trace(). * -- cgit v1.2.3 From 46aa886c483f57ef13cd5ea0a85e70b93eb1d381 Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Fri, 27 Aug 2021 13:43:35 +0530 Subject: rcu-tasks: Fix IPI failure handling in trc_wait_for_one_reader The trc_wait_for_one_reader() function is called at multiple stages of trace rcu-tasks GP function, rcu_tasks_wait_gp(): - First, it is called as part of per task function - rcu_tasks_trace_pertask(), for all non-idle tasks. As part of per task processing, this function add the task in the holdout list and if the task is currently running on a CPU, it sends IPI to the task's CPU. The IPI handler takes action depending on whether task is in trace rcu-tasks read side critical section or not: - a. If the task is in trace rcu-tasks read side critical section (t->trc_reader_nesting != 0), the IPI handler sets the task's ->trc_reader_special.b.need_qs, so that this task notifies exit from its outermost read side critical section (by decrementing trc_n_readers_need_end) to the GP handling function. trc_wait_for_one_reader() also increments trc_n_readers_need_end, so that the trace rcu-tasks GP handler function waits for this task's read side exit notification. The IPI handler also sets t->trc_reader_checked to true, and no further IPIs are sent for this task, for this trace rcu-tasks grace period and this task can be removed from holdout list. - b. If the task is in the process of exiting its trace rcu-tasks read side critical section, (t->trc_reader_nesting < 0), defer this task's processing to future calls to trc_wait_for_one_reader(). - c. If task is not in rcu-task read side critical section, t->trc_reader_nesting == 0, ->trc_reader_checked is set for this task, so that this task is removed from holdout list. - Second, trc_wait_for_one_reader() is called as part of post scan, in function rcu_tasks_trace_postscan(), for all idle tasks. - Third, in function check_all_holdout_tasks_trace(), this function is called for each task in the holdout list, but only if there isn't a pending IPI for the task (->trc_ipi_to_cpu == -1). This function removed the task from holdout list, if IPI handler has completed the required work, to ensure that the current trace rcu-tasks grace period either waits for this task, or this task is not in a trace rcu-tasks read side critical section. Now, considering the scenario where smp_call_function_single() fails in first case, inside rcu_tasks_trace_pertask(). In this case, ->trc_ipi_to_cpu is set to the current CPU for that task. This will result in trc_wait_for_one_reader() getting skipped in third case, inside check_all_holdout_tasks_trace(), for this task. This further results in ->trc_reader_checked never getting set for this task, and the task not getting removed from holdout list. This can cause the current trace rcu-tasks grace period to stall. Fix the above problem, by resetting ->trc_ipi_to_cpu to -1, on smp_call_function_single() failure, so that future IPI calls can be send for this task. Note that all three of the trc_wait_for_one_reader() function's callers (rcu_tasks_trace_pertask(), rcu_tasks_trace_postscan(), check_all_holdout_tasks_trace()) hold cpu_read_lock(). This means that smp_call_function_single() cannot race with CPU hotplug, and thus should never fail. Therefore, also add a warning in order to report any such failure in case smp_call_function_single() grows some other reason for failure. Signed-off-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney --- kernel/rcu/tasks.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 47a29e411217..0c10c8407dca 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -1012,9 +1012,11 @@ static void trc_wait_for_one_reader(struct task_struct *t, if (smp_call_function_single(cpu, trc_read_check_handler, t, 0)) { // Just in case there is some other reason for // failure than the target CPU being offline. + WARN_ONCE(1, "%s(): smp_call_function_single() failed for CPU: %d\n", + __func__, cpu); rcu_tasks_trace.n_ipis_fails++; per_cpu(trc_ipi_to_cpu, cpu) = false; - t->trc_ipi_to_cpu = cpu; + t->trc_ipi_to_cpu = -1; } } } -- cgit v1.2.3 From 8af9e2c7826a67a26c2c7a0cd3ce09a5acaf8035 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 15 Sep 2021 09:24:18 -0700 Subject: rcu-tasks: Update comments to cond_resched_tasks_rcu_qs() The cond_resched_rcu_qs() function no longer exists, despite being mentioned several times in kernel/rcu/tasks.h. This commit therefore updates it to the current cond_resched_tasks_rcu_qs(). Reported-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney --- kernel/rcu/tasks.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 0c10c8407dca..66e7586a33e9 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -368,7 +368,7 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp) //////////////////////////////////////////////////////////////////////// // // Simple variant of RCU whose quiescent states are voluntary context -// switch, cond_resched_rcu_qs(), user-space execution, and idle. +// switch, cond_resched_tasks_rcu_qs(), user-space execution, and idle. // As such, grace periods can take one good long time. There are no // read-side primitives similar to rcu_read_lock() and rcu_read_unlock() // because this implementation is intended to get the system into a safe @@ -539,7 +539,7 @@ DEFINE_RCU_TASKS(rcu_tasks, rcu_tasks_wait_gp, call_rcu_tasks, "RCU Tasks"); * period elapses, in other words after all currently executing RCU * read-side critical sections have completed. call_rcu_tasks() assumes * that the read-side critical sections end at a voluntary context - * switch (not a preemption!), cond_resched_rcu_qs(), entry into idle, + * switch (not a preemption!), cond_resched_tasks_rcu_qs(), entry into idle, * or transition to usermode execution. As such, there are no read-side * primitives analogous to rcu_read_lock() and rcu_read_unlock() because * this primitive is intended to determine that all tasks have passed @@ -677,7 +677,7 @@ DEFINE_RCU_TASKS(rcu_tasks_rude, rcu_tasks_rude_wait_gp, call_rcu_tasks_rude, * period elapses, in other words after all currently executing RCU * read-side critical sections have completed. call_rcu_tasks_rude() * assumes that the read-side critical sections end at context switch, - * cond_resched_rcu_qs(), or transition to usermode execution (as + * cond_resched_tasks_rcu_qs(), or transition to usermode execution (as * usermode execution is schedulable). As such, there are no read-side * primitives analogous to rcu_read_lock() and rcu_read_unlock() because * this primitive is intended to determine that all tasks have passed -- cgit v1.2.3 From 925da92ba5cb0c82d07cdd5049a07e40f54e9c44 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 26 Aug 2021 22:21:22 -0400 Subject: rcu: Avoid unneeded function call in rcu_read_unlock() Since commit aa40c138cc8f3 ("rcu: Report QS for outermost PREEMPT=n rcu_read_unlock() for strict GPs") the function rcu_read_unlock_strict() is invoked by the inlined rcu_read_unlock() function. However, rcu_read_unlock_strict() is an empty function in production kernels, which are built with CONFIG_RCU_STRICT_GRACE_PERIOD=n. There is a mention of rcu_read_unlock_strict() in the BPF verifier, but this is in a deny-list, meaning that BPF does not care whether rcu_read_unlock_strict() is ever called. This commit therefore provides a slight performance improvement by hoisting the check of CONFIG_RCU_STRICT_GRACE_PERIOD from rcu_read_unlock_strict() into rcu_read_unlock(), thus avoiding the pointless call to an empty function. Cc: Alexei Starovoitov Acked-by: Andrii Nakryiko Signed-off-by: Waiman Long Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 3 ++- kernel/rcu/tree_plugin.h | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/rcu') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 434d12fe2d4f..5e0beb5c5659 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -71,7 +71,8 @@ static inline void __rcu_read_lock(void) static inline void __rcu_read_unlock(void) { preempt_enable(); - rcu_read_unlock_strict(); + if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) + rcu_read_unlock_strict(); } static inline int rcu_preempt_depth(void) diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index d070059163d7..1a6fdb03d0a5 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -814,8 +814,7 @@ void rcu_read_unlock_strict(void) { struct rcu_data *rdp; - if (!IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) || - irqs_disabled() || preempt_count() || !rcu_state.gp_kthread) + if (irqs_disabled() || preempt_count() || !rcu_state.gp_kthread) return; rdp = this_cpu_ptr(&rcu_data); rcu_report_qs_rdp(rdp); -- cgit v1.2.3 From 7663ad9a5dbcc27f3090e6bfd192c7e59222709f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 28 Sep 2021 10:40:21 +0200 Subject: rcu: Always inline rcu_dynticks_task*_{enter,exit}() RCU managed to grow a few noinstr violations: vmlinux.o: warning: objtool: rcu_dynticks_eqs_enter()+0x0: call to rcu_dynticks_task_trace_enter() leaves .noinstr.text section vmlinux.o: warning: objtool: rcu_dynticks_eqs_exit()+0xe: call to rcu_dynticks_task_trace_exit() leaves .noinstr.text section Fix them by adding __always_inline to the relevant trivial functions. Also replace the noinstr with __always_inline for the existing rcu_dynticks_task_*() functions since noinstr would force noinline them, even when empty, which seems silly. Fixes: 7d0c9c50c5a1 ("rcu-tasks: Avoid IPIing userspace/idle tasks if kernel is so built") Reported-by: Stephen Rothwell Reviewed-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 1a6fdb03d0a5..5199559fbbf0 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1479,7 +1479,7 @@ static void rcu_bind_gp_kthread(void) } /* Record the current task on dyntick-idle entry. */ -static void noinstr rcu_dynticks_task_enter(void) +static __always_inline void rcu_dynticks_task_enter(void) { #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id()); @@ -1487,7 +1487,7 @@ static void noinstr rcu_dynticks_task_enter(void) } /* Record no current task on dyntick-idle exit. */ -static void noinstr rcu_dynticks_task_exit(void) +static __always_inline void rcu_dynticks_task_exit(void) { #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) WRITE_ONCE(current->rcu_tasks_idle_cpu, -1); @@ -1495,7 +1495,7 @@ static void noinstr rcu_dynticks_task_exit(void) } /* Turn on heavyweight RCU tasks trace readers on idle/user entry. */ -static void rcu_dynticks_task_trace_enter(void) +static __always_inline void rcu_dynticks_task_trace_enter(void) { #ifdef CONFIG_TASKS_TRACE_RCU if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) @@ -1504,7 +1504,7 @@ static void rcu_dynticks_task_trace_enter(void) } /* Turn off heavyweight RCU tasks trace readers on idle/user exit. */ -static void rcu_dynticks_task_trace_exit(void) +static __always_inline void rcu_dynticks_task_trace_exit(void) { #ifdef CONFIG_TASKS_TRACE_RCU if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) -- cgit v1.2.3 From 74aece72f95f399dd29363669dc32a1344c8fab4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 28 Sep 2021 10:40:22 +0200 Subject: rcu: Fix rcu_dynticks_curr_cpu_in_eqs() vs noinstr vmlinux.o: warning: objtool: rcu_nmi_enter()+0x36: call to __kasan_check_read() leaves .noinstr.text section noinstr cannot have atomic_*() functions in because they're explicitly annotated, use arch_atomic_*(). Fixes: 2be57f732889 ("rcu: Weaken ->dynticks accesses and updates") Reported-by: Stephen Rothwell Reviewed-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/rcu') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 6a1e9d3374db..ef8d36f580fc 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -327,7 +327,7 @@ static void rcu_dynticks_eqs_online(void) */ static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void) { - return !(atomic_read(this_cpu_ptr(&rcu_data.dynticks)) & 0x1); + return !(arch_atomic_read(this_cpu_ptr(&rcu_data.dynticks)) & 0x1); } /* -- cgit v1.2.3