summaryrefslogtreecommitdiffstats
path: root/kernel/context_tracking.c
diff options
context:
space:
mode:
authorFrederic Weisbecker <frederic@kernel.org>2022-06-08 16:40:33 +0200
committerPaul E. McKenney <paulmck@kernel.org>2022-07-05 22:32:59 +0200
commit172114552701b85d5c3b1a089a73ee85d0d7786b (patch)
treeed4d0e8f6df0783fe036827770ed701c4f199587 /kernel/context_tracking.c
parentrcu/context-tracking: Move deferred nocb resched to context tracking (diff)
downloadlinux-172114552701b85d5c3b1a089a73ee85d0d7786b.tar.xz
linux-172114552701b85d5c3b1a089a73ee85d0d7786b.zip
rcu/context-tracking: Move RCU-dynticks internal functions to context_tracking
Move the core RCU eqs/dynticks functions to context tracking so that we can later merge all that code within context tracking. Acked-by: Paul E. McKenney <paulmck@kernel.org> Signed-off-by: Frederic Weisbecker <frederic@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Neeraj Upadhyay <quic_neeraju@quicinc.com> Cc: Uladzislau Rezki <uladzislau.rezki@sony.com> Cc: Joel Fernandes <joel@joelfernandes.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Nicolas Saenz Julienne <nsaenz@kernel.org> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Xiongfeng Wang <wangxiongfeng2@huawei.com> Cc: Yu Liao <liaoyu15@huawei.com> Cc: Phil Auld <pauld@redhat.com> Cc: Paul Gortmaker<paul.gortmaker@windriver.com> Cc: Alex Belits <abelits@marvell.com> Signed-off-by: Paul E. McKenney <paulmck@kernel.org> Reviewed-by: Nicolas Saenz Julienne <nsaenzju@redhat.com> Tested-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
Diffstat (limited to 'kernel/context_tracking.c')
-rw-r--r--kernel/context_tracking.c336
1 files changed, 336 insertions, 0 deletions
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 8cf59d8a6af6..072c4b6044b3 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -21,6 +21,7 @@
#include <linux/hardirq.h>
#include <linux/export.h>
#include <linux/kprobes.h>
+#include <trace/events/rcu.h>
DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
@@ -33,6 +34,309 @@ DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
EXPORT_SYMBOL_GPL(context_tracking);
#ifdef CONFIG_CONTEXT_TRACKING_IDLE
+#define TPS(x) tracepoint_string(x)
+
+/* Record the current task on dyntick-idle entry. */
+static __always_inline void rcu_dynticks_task_enter(void)
+{
+#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
+ WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id());
+#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
+}
+
+/* Record no current task on dyntick-idle exit. */
+static __always_inline void rcu_dynticks_task_exit(void)
+{
+#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
+ WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
+#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
+}
+
+/* Turn on heavyweight RCU tasks trace readers on idle/user entry. */
+static __always_inline void rcu_dynticks_task_trace_enter(void)
+{
+#ifdef CONFIG_TASKS_TRACE_RCU
+ if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
+ current->trc_reader_special.b.need_mb = true;
+#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
+}
+
+/* Turn off heavyweight RCU tasks trace readers on idle/user exit. */
+static __always_inline void rcu_dynticks_task_trace_exit(void)
+{
+#ifdef CONFIG_TASKS_TRACE_RCU
+ if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
+ current->trc_reader_special.b.need_mb = false;
+#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
+}
+
+/*
+ * Record entry into an extended quiescent state. This is only to be
+ * called when not already in an extended quiescent state, that is,
+ * RCU is watching prior to the call to this function and is no longer
+ * watching upon return.
+ */
+static noinstr void rcu_dynticks_eqs_enter(void)
+{
+ int seq;
+
+ /*
+ * CPUs seeing atomic_add_return() must see prior RCU read-side
+ * critical sections, and we also must force ordering with the
+ * next idle sojourn.
+ */
+ rcu_dynticks_task_trace_enter(); // Before ->dynticks update!
+ seq = rcu_dynticks_inc(1);
+ // RCU is no longer watching. Better be in extended quiescent state!
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (seq & 0x1));
+}
+
+/*
+ * Record exit from an extended quiescent state. This is only to be
+ * called from an extended quiescent state, that is, RCU is not watching
+ * prior to the call to this function and is watching upon return.
+ */
+static noinstr void rcu_dynticks_eqs_exit(void)
+{
+ int seq;
+
+ /*
+ * CPUs seeing atomic_add_return() must see prior idle sojourns,
+ * and we also must force ordering with the next RCU read-side
+ * critical section.
+ */
+ seq = rcu_dynticks_inc(1);
+ // RCU is now watching. Better not be in an extended quiescent state!
+ rcu_dynticks_task_trace_exit(); // After ->dynticks update!
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & 0x1));
+}
+
+/*
+ * Enter an RCU extended quiescent state, which can be either the
+ * idle loop or adaptive-tickless usermode execution.
+ *
+ * We crowbar the ->dynticks_nmi_nesting field to zero to allow for
+ * the possibility of usermode upcalls having messed up our count
+ * of interrupt nesting level during the prior busy period.
+ */
+static void noinstr rcu_eqs_enter(bool user)
+{
+ struct context_tracking *ct = this_cpu_ptr(&context_tracking);
+
+ WARN_ON_ONCE(ct_dynticks_nmi_nesting() != DYNTICK_IRQ_NONIDLE);
+ WRITE_ONCE(ct->dynticks_nmi_nesting, 0);
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
+ ct_dynticks_nesting() == 0);
+ if (ct_dynticks_nesting() != 1) {
+ // RCU will still be watching, so just do accounting and leave.
+ ct->dynticks_nesting--;
+ return;
+ }
+
+ instrumentation_begin();
+ lockdep_assert_irqs_disabled();
+ trace_rcu_dyntick(TPS("Start"), ct_dynticks_nesting(), 0, ct_dynticks());
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
+ rcu_preempt_deferred_qs(current);
+
+ // instrumentation for the noinstr rcu_dynticks_eqs_enter()
+ instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));
+
+ instrumentation_end();
+ WRITE_ONCE(ct->dynticks_nesting, 0); /* Avoid irq-access tearing. */
+ // RCU is watching here ...
+ rcu_dynticks_eqs_enter();
+ // ... but is no longer watching here.
+ rcu_dynticks_task_enter();
+}
+
+/*
+ * Exit an RCU extended quiescent state, which can be either the
+ * idle loop or adaptive-tickless usermode execution.
+ *
+ * We crowbar the ->dynticks_nmi_nesting field to DYNTICK_IRQ_NONIDLE to
+ * allow for the possibility of usermode upcalls messing up our count of
+ * interrupt nesting level during the busy period that is just now starting.
+ */
+static void noinstr rcu_eqs_exit(bool user)
+{
+ struct context_tracking *ct = this_cpu_ptr(&context_tracking);
+ long oldval;
+
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled());
+ oldval = ct_dynticks_nesting();
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0);
+ if (oldval) {
+ // RCU was already watching, so just do accounting and leave.
+ ct->dynticks_nesting++;
+ return;
+ }
+ rcu_dynticks_task_exit();
+ // RCU is not watching here ...
+ rcu_dynticks_eqs_exit();
+ // ... but is watching here.
+ instrumentation_begin();
+
+ // instrumentation for the noinstr rcu_dynticks_eqs_exit()
+ instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));
+
+ trace_rcu_dyntick(TPS("End"), ct_dynticks_nesting(), 1, ct_dynticks());
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
+ WRITE_ONCE(ct->dynticks_nesting, 1);
+ WARN_ON_ONCE(ct_dynticks_nmi_nesting());
+ WRITE_ONCE(ct->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);
+ instrumentation_end();
+}
+
+/**
+ * rcu_nmi_exit - inform RCU of exit from NMI context
+ *
+ * If we are returning from the outermost NMI handler that interrupted an
+ * RCU-idle period, update ct->dynticks and ct->dynticks_nmi_nesting
+ * to let the RCU grace-period handling know that the CPU is back to
+ * being RCU-idle.
+ *
+ * If you add or remove a call to rcu_nmi_exit(), be sure to test
+ * with CONFIG_RCU_EQS_DEBUG=y.
+ */
+void noinstr rcu_nmi_exit(void)
+{
+ struct context_tracking *ct = this_cpu_ptr(&context_tracking);
+
+ instrumentation_begin();
+ /*
+ * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks.
+ * (We are exiting an NMI handler, so RCU better be paying attention
+ * to us!)
+ */
+ WARN_ON_ONCE(ct_dynticks_nmi_nesting() <= 0);
+ WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs());
+
+ /*
+ * If the nesting level is not 1, the CPU wasn't RCU-idle, so
+ * leave it in non-RCU-idle state.
+ */
+ if (ct_dynticks_nmi_nesting() != 1) {
+ trace_rcu_dyntick(TPS("--="), ct_dynticks_nmi_nesting(), ct_dynticks_nmi_nesting() - 2,
+ ct_dynticks());
+ WRITE_ONCE(ct->dynticks_nmi_nesting, /* No store tearing. */
+ ct_dynticks_nmi_nesting() - 2);
+ instrumentation_end();
+ return;
+ }
+
+ /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
+ trace_rcu_dyntick(TPS("Startirq"), ct_dynticks_nmi_nesting(), 0, ct_dynticks());
+ WRITE_ONCE(ct->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
+
+ // instrumentation for the noinstr rcu_dynticks_eqs_enter()
+ instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));
+ instrumentation_end();
+
+ // RCU is watching here ...
+ rcu_dynticks_eqs_enter();
+ // ... but is no longer watching here.
+
+ if (!in_nmi())
+ rcu_dynticks_task_enter();
+}
+
+/**
+ * rcu_nmi_enter - inform RCU of entry to NMI context
+ *
+ * If the CPU was idle from RCU's viewpoint, update ct->dynticks and
+ * ct->dynticks_nmi_nesting to let the RCU grace-period handling know
+ * that the CPU is active. This implementation permits nested NMIs, as
+ * long as the nesting level does not overflow an int. (You will probably
+ * run out of stack space first.)
+ *
+ * If you add or remove a call to rcu_nmi_enter(), be sure to test
+ * with CONFIG_RCU_EQS_DEBUG=y.
+ */
+void noinstr rcu_nmi_enter(void)
+{
+ long incby = 2;
+ struct context_tracking *ct = this_cpu_ptr(&context_tracking);
+
+ /* Complain about underflow. */
+ WARN_ON_ONCE(ct_dynticks_nmi_nesting() < 0);
+
+ /*
+ * If idle from RCU viewpoint, atomically increment ->dynticks
+ * to mark non-idle and increment ->dynticks_nmi_nesting by one.
+ * Otherwise, increment ->dynticks_nmi_nesting by two. This means
+ * if ->dynticks_nmi_nesting is equal to one, we are guaranteed
+ * to be in the outermost NMI handler that interrupted an RCU-idle
+ * period (observation due to Andy Lutomirski).
+ */
+ if (rcu_dynticks_curr_cpu_in_eqs()) {
+
+ if (!in_nmi())
+ rcu_dynticks_task_exit();
+
+ // RCU is not watching here ...
+ rcu_dynticks_eqs_exit();
+ // ... but is watching here.
+
+ instrumentation_begin();
+ // instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs()
+ instrument_atomic_read(&ct->dynticks, sizeof(ct->dynticks));
+ // instrumentation for the noinstr rcu_dynticks_eqs_exit()
+ instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));
+
+ incby = 1;
+ } else if (!in_nmi()) {
+ instrumentation_begin();
+ rcu_irq_enter_check_tick();
+ } else {
+ instrumentation_begin();
+ }
+
+ trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
+ ct_dynticks_nmi_nesting(),
+ ct_dynticks_nmi_nesting() + incby, ct_dynticks());
+ instrumentation_end();
+ WRITE_ONCE(ct->dynticks_nmi_nesting, /* Prevent store tearing. */
+ ct_dynticks_nmi_nesting() + incby);
+ barrier();
+}
+
+/**
+ * rcu_idle_enter - inform RCU that current CPU is entering idle
+ *
+ * Enter idle mode, in other words, -leave- the mode in which RCU
+ * read-side critical sections can occur. (Though RCU read-side
+ * critical sections can occur in irq handlers in idle, a possibility
+ * handled by irq_enter() and irq_exit().)
+ *
+ * If you add or remove a call to rcu_idle_enter(), be sure to test with
+ * CONFIG_RCU_EQS_DEBUG=y.
+ */
+void noinstr rcu_idle_enter(void)
+{
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled());
+ rcu_eqs_enter(false);
+}
+
+/**
+ * rcu_idle_exit - inform RCU that current CPU is leaving idle
+ *
+ * Exit idle mode, in other words, -enter- the mode in which RCU
+ * read-side critical sections can occur.
+ *
+ * If you add or remove a call to rcu_idle_exit(), be sure to test with
+ * CONFIG_RCU_EQS_DEBUG=y.
+ */
+void noinstr rcu_idle_exit(void)
+{
+ unsigned long flags;
+
+ raw_local_irq_save(flags);
+ rcu_eqs_exit(false);
+ raw_local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(rcu_idle_exit);
+
noinstr void ct_idle_enter(void)
{
rcu_idle_enter();
@@ -139,6 +443,38 @@ noinstr void ct_nmi_exit(void)
}
#endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */
+#ifdef CONFIG_NO_HZ_FULL
+/**
+ * rcu_user_enter - inform RCU that we are resuming userspace.
+ *
+ * Enter RCU idle mode right before resuming userspace. No use of RCU
+ * is permitted between this call and rcu_user_exit(). This way the
+ * CPU doesn't need to maintain the tick for RCU maintenance purposes
+ * when the CPU runs in userspace.
+ *
+ * If you add or remove a call to rcu_user_enter(), be sure to test with
+ * CONFIG_RCU_EQS_DEBUG=y.
+ */
+noinstr void rcu_user_enter(void)
+{
+ rcu_eqs_enter(true);
+}
+
+/**
+ * rcu_user_exit - inform RCU that we are exiting userspace.
+ *
+ * Exit RCU idle mode while entering the kernel because it can
+ * run a RCU read side critical section anytime.
+ *
+ * If you add or remove a call to rcu_user_exit(), be sure to test with
+ * CONFIG_RCU_EQS_DEBUG=y.
+ */
+void noinstr rcu_user_exit(void)
+{
+ rcu_eqs_exit(true);
+}
+#endif /* #ifdef CONFIG_NO_HZ_FULL */
+
#ifdef CONFIG_CONTEXT_TRACKING_USER
#define CREATE_TRACE_POINTS