diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/rcu/rcu.h | 27 | ||||
-rw-r--r-- | kernel/rcu/rcuperf.c | 9 | ||||
-rw-r--r-- | kernel/rcu/rcutorture.c | 6 | ||||
-rw-r--r-- | kernel/rcu/srcutree.c | 8 | ||||
-rw-r--r-- | kernel/rcu/tiny.c | 4 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 132 | ||||
-rw-r--r-- | kernel/rcu/tree.h | 1 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h | 55 | ||||
-rw-r--r-- | kernel/rcu/update.c | 45 |
9 files changed, 177 insertions, 110 deletions
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index bee070979970..4d04683c31b2 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -91,7 +91,17 @@ static inline void rcu_seq_end(unsigned long *sp) WRITE_ONCE(*sp, rcu_seq_endval(sp)); } -/* Take a snapshot of the update side's sequence number. */ +/* + * rcu_seq_snap - Take a snapshot of the update side's sequence number. + * + * This function returns the earliest value of the grace-period sequence number + * that will indicate that a full grace period has elapsed since the current + * time. Once the grace-period sequence number has reached this value, it will + * be safe to invoke all callbacks that have been registered prior to the + * current time. This value is the current grace-period number plus two to the + * power of the number of low-order bits reserved for state, then rounded up to + * the next value in which the state bits are all zero. + */ static inline unsigned long rcu_seq_snap(unsigned long *sp) { unsigned long s; @@ -148,7 +158,20 @@ static inline bool rcu_seq_new_gp(unsigned long old, unsigned long new) */ static inline unsigned long rcu_seq_diff(unsigned long new, unsigned long old) { - return (new - old) >> RCU_SEQ_CTR_SHIFT; + unsigned long rnd_diff; + + if (old == new) + return 0; + /* + * Compute the number of grace periods (still shifted up), plus + * one if either of new and old is not an exact grace period. + */ + rnd_diff = (new & ~RCU_SEQ_STATE_MASK) - + ((old + RCU_SEQ_STATE_MASK) & ~RCU_SEQ_STATE_MASK) + + ((new & RCU_SEQ_STATE_MASK) || (old & RCU_SEQ_STATE_MASK)); + if (ULONG_CMP_GE(RCU_SEQ_STATE_MASK, rnd_diff)) + return 1; /* Definitely no grace period has elapsed. */ + return ((rnd_diff - RCU_SEQ_STATE_MASK - 1) >> RCU_SEQ_CTR_SHIFT) + 2; } /* diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c index 06eb5e42726d..34244523550e 100644 --- a/kernel/rcu/rcuperf.c +++ b/kernel/rcu/rcuperf.c @@ -531,7 +531,7 @@ retry: return 0; } -static inline void +static void rcu_perf_print_module_parms(struct rcu_perf_ops *cur_ops, const char *tag) { pr_alert("%s" PERF_FLAG @@ -675,12 +675,11 @@ rcu_perf_init(void) break; } if (i == ARRAY_SIZE(perf_ops)) { - pr_alert("rcu-perf: invalid perf type: \"%s\"\n", - perf_type); + pr_alert("rcu-perf: invalid perf type: \"%s\"\n", perf_type); pr_alert("rcu-perf types:"); for (i = 0; i < ARRAY_SIZE(perf_ops); i++) - pr_alert(" %s", perf_ops[i]->name); - pr_alert("\n"); + pr_cont(" %s", perf_ops[i]->name); + pr_cont("\n"); firsterr = -EINVAL; goto unwind; } diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index bdc86cdf3b8b..c596c6f1e457 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1539,7 +1539,7 @@ rcu_torture_stats(void *arg) return 0; } -static inline void +static void rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag) { pr_alert("%s" TORTURE_FLAG @@ -1961,8 +1961,8 @@ rcu_torture_init(void) torture_type); pr_alert("rcu-torture types:"); for (i = 0; i < ARRAY_SIZE(torture_ops); i++) - pr_alert(" %s", torture_ops[i]->name); - pr_alert("\n"); + pr_cont(" %s", torture_ops[i]->name); + pr_cont("\n"); firsterr = -EINVAL; goto unwind; } diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index d6d6ea9738c0..6c9866a854b1 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -26,6 +26,8 @@ * */ +#define pr_fmt(fmt) "rcu: " fmt + #include <linux/export.h> #include <linux/mutex.h> #include <linux/percpu.h> @@ -390,7 +392,8 @@ void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced) } if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) || WARN_ON(srcu_readers_active(sp))) { - pr_info("%s: Active srcu_struct %p state: %d\n", __func__, sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq))); + pr_info("%s: Active srcu_struct %p state: %d\n", + __func__, sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq))); return; /* Caller forgot to stop doing call_srcu()? */ } free_percpu(sp->sda); @@ -1265,7 +1268,8 @@ void srcu_torture_stats_print(struct srcu_struct *sp, char *tt, char *tf) unsigned long s0 = 0, s1 = 0; idx = sp->srcu_idx & 0x1; - pr_alert("%s%s Tree SRCU per-CPU(idx=%d):", tt, tf, idx); + pr_alert("%s%s Tree SRCU g%ld per-CPU(idx=%d):", + tt, tf, rcu_seq_current(&sp->srcu_gp_seq), idx); for_each_possible_cpu(cpu) { unsigned long l0, l1; unsigned long u0, u1; diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index a64eee0db39e..befc9321a89c 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -122,10 +122,8 @@ void rcu_check_callbacks(int user) { if (user) rcu_sched_qs(); - else if (!in_softirq()) + if (user || !in_softirq()) rcu_bh_qs(); - if (user) - rcu_note_voluntary_context_switch(current); } /* diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index b4bcb5e21ca6..6930934e8b9f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -27,6 +27,9 @@ * For detailed explanation of Read-Copy Update mechanism see - * Documentation/RCU */ + +#define pr_fmt(fmt) "rcu: " fmt + #include <linux/types.h> #include <linux/kernel.h> #include <linux/init.h> @@ -378,20 +381,6 @@ static bool rcu_dynticks_in_eqs_since(struct rcu_dynticks *rdtp, int snap) } /* - * Do a double-increment of the ->dynticks counter to emulate a - * momentary idle-CPU quiescent state. - */ -static void rcu_dynticks_momentary_idle(void) -{ - struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); - int special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR, - &rdtp->dynticks); - - /* It is illegal to call this from idle state. */ - WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR)); -} - -/* * Set the special (bottom) bit of the specified CPU so that it * will take special action (such as flushing its TLB) on the * next exit from an extended quiescent state. Returns true if @@ -422,12 +411,17 @@ bool rcu_eqs_special_set(int cpu) * * We inform the RCU core by emulating a zero-duration dyntick-idle period. * - * The caller must have disabled interrupts. + * The caller must have disabled interrupts and must not be idle. */ static void rcu_momentary_dyntick_idle(void) { + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); + int special; + raw_cpu_write(rcu_dynticks.rcu_need_heavy_qs, false); - rcu_dynticks_momentary_idle(); + special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks); + /* It is illegal to call this from idle state. */ + WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR)); } /* @@ -449,7 +443,7 @@ void rcu_note_context_switch(bool preempt) rcu_momentary_dyntick_idle(); this_cpu_inc(rcu_dynticks.rcu_qs_ctr); if (!preempt) - rcu_note_voluntary_context_switch_lite(current); + rcu_tasks_qs(current); out: trace_rcu_utilization(TPS("End context switch")); barrier(); /* Avoid RCU read-side critical sections leaking up. */ @@ -511,8 +505,38 @@ static ulong jiffies_till_first_fqs = ULONG_MAX; static ulong jiffies_till_next_fqs = ULONG_MAX; static bool rcu_kick_kthreads; -module_param(jiffies_till_first_fqs, ulong, 0644); -module_param(jiffies_till_next_fqs, ulong, 0644); +static int param_set_first_fqs_jiffies(const char *val, const struct kernel_param *kp) +{ + ulong j; + int ret = kstrtoul(val, 0, &j); + + if (!ret) + WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : j); + return ret; +} + +static int param_set_next_fqs_jiffies(const char *val, const struct kernel_param *kp) +{ + ulong j; + int ret = kstrtoul(val, 0, &j); + + if (!ret) + WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : (j ?: 1)); + return ret; +} + +static struct kernel_param_ops first_fqs_jiffies_ops = { + .set = param_set_first_fqs_jiffies, + .get = param_get_ulong, +}; + +static struct kernel_param_ops next_fqs_jiffies_ops = { + .set = param_set_next_fqs_jiffies, + .get = param_get_ulong, +}; + +module_param_cb(jiffies_till_first_fqs, &first_fqs_jiffies_ops, &jiffies_till_first_fqs, 0644); +module_param_cb(jiffies_till_next_fqs, &next_fqs_jiffies_ops, &jiffies_till_next_fqs, 0644); module_param(rcu_kick_kthreads, bool, 0644); /* @@ -607,11 +631,32 @@ EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state); */ void show_rcu_gp_kthreads(void) { + int cpu; + struct rcu_data *rdp; + struct rcu_node *rnp; struct rcu_state *rsp; for_each_rcu_flavor(rsp) { pr_info("%s: wait state: %d ->state: %#lx\n", rsp->name, rsp->gp_state, rsp->gp_kthread->state); + rcu_for_each_node_breadth_first(rsp, rnp) { + if (ULONG_CMP_GE(rsp->gp_seq, rnp->gp_seq_needed)) + continue; + pr_info("\trcu_node %d:%d ->gp_seq %lu ->gp_seq_needed %lu\n", + rnp->grplo, rnp->grphi, rnp->gp_seq, + rnp->gp_seq_needed); + if (!rcu_is_leaf_node(rnp)) + continue; + for_each_leaf_node_possible_cpu(rnp, cpu) { + rdp = per_cpu_ptr(rsp->rda, cpu); + if (rdp->gpwrap || + ULONG_CMP_GE(rsp->gp_seq, + rdp->gp_seq_needed)) + continue; + pr_info("\tcpu %d ->gp_seq_needed %lu\n", + cpu, rdp->gp_seq_needed); + } + } /* sched_show_task(rsp->gp_kthread); */ } } @@ -1318,7 +1363,7 @@ static void rcu_stall_kick_kthreads(struct rcu_state *rsp) } } -static inline void panic_on_rcu_stall(void) +static void panic_on_rcu_stall(void) { if (sysctl_panic_on_rcu_stall) panic("RCU Stall\n"); @@ -1344,8 +1389,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gp_seq) * See Documentation/RCU/stallwarn.txt for info on how to debug * RCU CPU stall warnings. */ - pr_err("INFO: %s detected stalls on CPUs/tasks:", - rsp->name); + pr_err("INFO: %s detected stalls on CPUs/tasks:", rsp->name); print_cpu_stall_info_begin(); rcu_for_each_leaf_node(rsp, rnp) { raw_spin_lock_irqsave_rcu_node(rnp, flags); @@ -2136,10 +2180,6 @@ static int __noreturn rcu_gp_kthread(void *arg) /* Handle quiescent-state forcing. */ first_gp_fqs = true; j = jiffies_till_first_fqs; - if (j > HZ) { - j = HZ; - jiffies_till_first_fqs = HZ; - } ret = 0; for (;;) { if (!ret) { @@ -2174,13 +2214,6 @@ static int __noreturn rcu_gp_kthread(void *arg) WRITE_ONCE(rsp->gp_activity, jiffies); ret = 0; /* Force full wait till next FQS. */ j = jiffies_till_next_fqs; - if (j > HZ) { - j = HZ; - jiffies_till_next_fqs = HZ; - } else if (j < 1) { - j = 1; - jiffies_till_next_fqs = 1; - } } else { /* Deal with stray signal. */ cond_resched_tasks_rcu_qs(); @@ -2615,6 +2648,7 @@ void rcu_check_callbacks(int user) rcu_sched_qs(); rcu_bh_qs(); + rcu_note_voluntary_context_switch(current); } else if (!in_softirq()) { @@ -2630,8 +2664,7 @@ void rcu_check_callbacks(int user) rcu_preempt_check_callbacks(); if (rcu_pending()) invoke_rcu_core(); - if (user) - rcu_note_voluntary_context_switch(current); + trace_rcu_utilization(TPS("End scheduler-tick")); } @@ -2730,6 +2763,7 @@ static void rcu_check_gp_start_stall(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) { + const unsigned long gpssdelay = rcu_jiffies_till_stall_check() * HZ; unsigned long flags; unsigned long j; struct rcu_node *rnp_root = rcu_get_root(rsp); @@ -2739,8 +2773,8 @@ rcu_check_gp_start_stall(struct rcu_state *rsp, struct rcu_node *rnp, ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed)) return; j = jiffies; /* Expensive access, and in common case don't get here. */ - if (time_before(j, READ_ONCE(rsp->gp_req_activity) + HZ) || - time_before(j, READ_ONCE(rsp->gp_activity) + HZ) || + if (time_before(j, READ_ONCE(rsp->gp_req_activity) + gpssdelay) || + time_before(j, READ_ONCE(rsp->gp_activity) + gpssdelay) || atomic_read(&warned)) return; @@ -2748,8 +2782,8 @@ rcu_check_gp_start_stall(struct rcu_state *rsp, struct rcu_node *rnp, j = jiffies; if (rcu_gp_in_progress(rsp) || ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) || - time_before(j, READ_ONCE(rsp->gp_req_activity) + HZ) || - time_before(j, READ_ONCE(rsp->gp_activity) + HZ) || + time_before(j, READ_ONCE(rsp->gp_req_activity) + gpssdelay) || + time_before(j, READ_ONCE(rsp->gp_activity) + gpssdelay) || atomic_read(&warned)) { raw_spin_unlock_irqrestore_rcu_node(rnp, flags); return; @@ -2761,18 +2795,18 @@ rcu_check_gp_start_stall(struct rcu_state *rsp, struct rcu_node *rnp, j = jiffies; if (rcu_gp_in_progress(rsp) || ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) || - time_before(j, rsp->gp_req_activity + HZ) || - time_before(j, rsp->gp_activity + HZ) || + time_before(j, rsp->gp_req_activity + gpssdelay) || + time_before(j, rsp->gp_activity + gpssdelay) || atomic_xchg(&warned, 1)) { raw_spin_unlock_rcu_node(rnp_root); /* irqs remain disabled. */ raw_spin_unlock_irqrestore_rcu_node(rnp, flags); return; } - pr_alert("%s: g%ld->%ld gar:%lu ga:%lu f%#x %s->state:%#lx\n", + pr_alert("%s: g%ld->%ld gar:%lu ga:%lu f%#x gs:%d %s->state:%#lx\n", __func__, (long)READ_ONCE(rsp->gp_seq), (long)READ_ONCE(rnp_root->gp_seq_needed), j - rsp->gp_req_activity, j - rsp->gp_activity, - rsp->gp_flags, rsp->name, + rsp->gp_flags, rsp->gp_state, rsp->name, rsp->gp_kthread ? rsp->gp_kthread->state : 0x1ffffL); WARN_ON(1); if (rnp_root != rnp) @@ -3057,7 +3091,7 @@ EXPORT_SYMBOL_GPL(kfree_call_rcu); * when there was in fact only one the whole time, as this just adds * some overhead: RCU still operates correctly. */ -static inline int rcu_blocking_is_gp(void) +static int rcu_blocking_is_gp(void) { int ret; @@ -3292,7 +3326,7 @@ static int rcu_pending(void) * non-NULL, store an indication of whether all callbacks are lazy. * (If there are no callbacks, all of them are deemed to be lazy.) */ -static bool __maybe_unused rcu_cpu_has_callbacks(bool *all_lazy) +static bool rcu_cpu_has_callbacks(bool *all_lazy) { bool al = true; bool hc = false; @@ -3859,12 +3893,16 @@ static int __init rcu_spawn_gp_kthread(void) struct task_struct *t; /* Force priority into range. */ - if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 1) + if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 2 + && IS_BUILTIN(CONFIG_RCU_TORTURE_TEST)) + kthread_prio = 2; + else if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 1) kthread_prio = 1; else if (kthread_prio < 0) kthread_prio = 0; else if (kthread_prio > 99) kthread_prio = 99; + if (kthread_prio != kthread_prio_in) pr_alert("rcu_spawn_gp_kthread(): Limited prio to %d from %d\n", kthread_prio, kthread_prio_in); @@ -4018,7 +4056,7 @@ static void __init rcu_init_geometry(void) if (rcu_fanout_leaf == RCU_FANOUT_LEAF && nr_cpu_ids == NR_CPUS) return; - pr_info("RCU: Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%u\n", + pr_info("Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%u\n", rcu_fanout_leaf, nr_cpu_ids); /* diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index d51e6edc8e83..4e74df768c57 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -483,7 +483,6 @@ static void __init rcu_spawn_nocb_kthreads(void); #ifdef CONFIG_RCU_NOCB_CPU static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp); #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ -static void __maybe_unused rcu_kick_nohz_cpu(int cpu); static bool init_nocb_callback_list(struct rcu_data *rdp); static void rcu_bind_gp_kthread(void); static bool rcu_nohz_full_cpu(struct rcu_state *rsp); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 613372246a07..c1b17f5b9361 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -74,8 +74,8 @@ static void __init rcu_bootup_announce_oddness(void) pr_info("\tRCU event tracing is enabled.\n"); if ((IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 64) || (!IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 32)) - pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n", - RCU_FANOUT); + pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d.\n", + RCU_FANOUT); if (rcu_fanout_exact) pr_info("\tHierarchical RCU autobalancing is disabled.\n"); if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ)) @@ -88,11 +88,13 @@ static void __init rcu_bootup_announce_oddness(void) pr_info("\tBuild-time adjustment of leaf fanout to %d.\n", RCU_FANOUT_LEAF); if (rcu_fanout_leaf != RCU_FANOUT_LEAF) - pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); + pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", + rcu_fanout_leaf); if (nr_cpu_ids != NR_CPUS) pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%u.\n", NR_CPUS, nr_cpu_ids); #ifdef CONFIG_RCU_BOOST - pr_info("\tRCU priority boosting: priority %d delay %d ms.\n", kthread_prio, CONFIG_RCU_BOOST_DELAY); + pr_info("\tRCU priority boosting: priority %d delay %d ms.\n", + kthread_prio, CONFIG_RCU_BOOST_DELAY); #endif if (blimit != DEFAULT_RCU_BLIMIT) pr_info("\tBoot-time adjustment of callback invocation limit to %ld.\n", blimit); @@ -127,6 +129,7 @@ static struct rcu_data __percpu *const rcu_data_p = &rcu_preempt_data; static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, bool wake); +static void rcu_read_unlock_special(struct task_struct *t); /* * Tell them what RCU they are running. @@ -291,13 +294,17 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp) } /* - * Record a preemptible-RCU quiescent state for the specified CPU. Note - * that this just means that the task currently running on the CPU is - * not in a quiescent state. There might be any number of tasks blocked - * while in an RCU read-side critical section. + * Record a preemptible-RCU quiescent state for the specified CPU. + * Note that this does not necessarily mean that the task currently running + * on the CPU is in a quiescent state: Instead, it means that the current + * grace period need not wait on any RCU read-side critical section that + * starts later on this CPU. It also means that if the current task is + * in an RCU read-side critical section, it has already added itself to + * some leaf rcu_node structure's ->blkd_tasks list. In addition to the + * current task, there might be any number of other tasks blocked while + * in an RCU read-side critical section. * - * As with the other rcu_*_qs() functions, callers to this function - * must disable preemption. + * Callers to this function must disable preemption. */ static void rcu_preempt_qs(void) { @@ -461,7 +468,7 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp) * notify RCU core processing or task having blocked during the RCU * read-side critical section. */ -void rcu_read_unlock_special(struct task_struct *t) +static void rcu_read_unlock_special(struct task_struct *t) { bool empty_exp; bool empty_norm; @@ -727,6 +734,7 @@ rcu_preempt_check_blocked_tasks(struct rcu_state *rsp, struct rcu_node *rnp) */ static void rcu_preempt_check_callbacks(void) { + struct rcu_state *rsp = &rcu_preempt_state; struct task_struct *t = current; if (t->rcu_read_lock_nesting == 0) { @@ -735,7 +743,9 @@ static void rcu_preempt_check_callbacks(void) } if (t->rcu_read_lock_nesting > 0 && __this_cpu_read(rcu_data_p->core_needs_qs) && - __this_cpu_read(rcu_data_p->cpu_no_qs.b.norm)) + __this_cpu_read(rcu_data_p->cpu_no_qs.b.norm) && + !t->rcu_read_unlock_special.b.need_qs && + time_after(jiffies, rsp->gp_start + HZ)) t->rcu_read_unlock_special.b.need_qs = true; } @@ -1791,7 +1801,7 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) } print_cpu_stall_fast_no_hz(fast_no_hz, cpu); delta = rcu_seq_ctr(rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq); - pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%ld softirq=%u/%u fqs=%ld %s\n", + pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s\n", cpu, "O."[!!cpu_online(cpu)], "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)], @@ -2636,23 +2646,6 @@ static bool init_nocb_callback_list(struct rcu_data *rdp) #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ /* - * An adaptive-ticks CPU can potentially execute in kernel mode for an - * arbitrarily long period of time with the scheduling-clock tick turned - * off. RCU will be paying attention to this CPU because it is in the - * kernel, but the CPU cannot be guaranteed to be executing the RCU state - * machine because the scheduling-clock tick has been disabled. Therefore, - * if an adaptive-ticks CPU is failing to respond to the current grace - * period and has not be idle from an RCU perspective, kick it. - */ -static void __maybe_unused rcu_kick_nohz_cpu(int cpu) -{ -#ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_cpu(cpu)) - smp_send_reschedule(cpu); -#endif /* #ifdef CONFIG_NO_HZ_FULL */ -} - -/* * Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the * grace-period kthread will do force_quiescent_state() processing? * The idea is to avoid waking up RCU core processing on such a @@ -2677,8 +2670,6 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp) */ static void rcu_bind_gp_kthread(void) { - int __maybe_unused cpu; - if (!tick_nohz_full_enabled()) return; housekeeping_affine(current, HK_FLAG_RCU); diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 4c230a60ece4..39cb23d22109 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -507,14 +507,15 @@ early_initcall(check_cpu_stall_init); #ifdef CONFIG_TASKS_RCU /* - * Simple variant of RCU whose quiescent states are voluntary context switch, - * user-space execution, and idle. As such, grace periods can take one good - * long time. There are no read-side primitives similar to rcu_read_lock() - * and rcu_read_unlock() because this implementation is intended to get - * the system into a safe state for some of the manipulations involved in - * tracing and the like. Finally, this implementation does not support - * high call_rcu_tasks() rates from multiple CPUs. If this is required, - * per-CPU callback lists will be needed. + * Simple variant of RCU whose quiescent states are voluntary context + * switch, cond_resched_rcu_qs(), user-space execution, and idle. + * As such, grace periods can take one good long time. There are no + * read-side primitives similar to rcu_read_lock() and rcu_read_unlock() + * because this implementation is intended to get the system into a safe + * state for some of the manipulations involved in tracing and the like. + * Finally, this implementation does not support high call_rcu_tasks() + * rates from multiple CPUs. If this is required, per-CPU callback lists + * will be needed. */ /* Global list of callbacks and associated lock. */ @@ -542,11 +543,11 @@ static struct task_struct *rcu_tasks_kthread_ptr; * period elapses, in other words after all currently executing RCU * read-side critical sections have completed. call_rcu_tasks() assumes * that the read-side critical sections end at a voluntary context - * switch (not a preemption!), entry into idle, or transition to usermode - * execution. As such, there are no read-side primitives analogous to - * rcu_read_lock() and rcu_read_unlock() because this primitive is intended - * to determine that all tasks have passed through a safe state, not so - * much for data-strcuture synchronization. + * switch (not a preemption!), cond_resched_rcu_qs(), entry into idle, + * or transition to usermode execution. As such, there are no read-side + * primitives analogous to rcu_read_lock() and rcu_read_unlock() because + * this primitive is intended to determine that all tasks have passed + * through a safe state, not so much for data-strcuture synchronization. * * See the description of call_rcu() for more detailed information on * memory ordering guarantees. @@ -667,6 +668,7 @@ static int __noreturn rcu_tasks_kthread(void *arg) struct rcu_head *list; struct rcu_head *next; LIST_HEAD(rcu_tasks_holdouts); + int fract; /* Run on housekeeping CPUs by default. Sysadm can move if desired. */ housekeeping_affine(current, HK_FLAG_RCU); @@ -748,13 +750,25 @@ static int __noreturn rcu_tasks_kthread(void *arg) * holdouts. When the list is empty, we are done. */ lastreport = jiffies; - while (!list_empty(&rcu_tasks_holdouts)) { + + /* Start off with HZ/10 wait and slowly back off to 1 HZ wait*/ + fract = 10; + + for (;;) { bool firstreport; bool needreport; int rtst; struct task_struct *t1; - schedule_timeout_interruptible(HZ); + if (list_empty(&rcu_tasks_holdouts)) + break; + + /* Slowly back off waiting for holdouts */ + schedule_timeout_interruptible(HZ/fract); + + if (fract > 1) + fract--; + rtst = READ_ONCE(rcu_task_stall_timeout); needreport = rtst > 0 && time_after(jiffies, lastreport + rtst); @@ -800,6 +814,7 @@ static int __noreturn rcu_tasks_kthread(void *arg) list = next; cond_resched(); } + /* Paranoid sleep to keep this from entering a tight loop */ schedule_timeout_uninterruptible(HZ/10); } } |