diff options
author | Paul E. McKenney <paulmck@kernel.org> | 2023-02-03 01:40:07 +0100 |
---|---|---|
committer | Paul E. McKenney <paulmck@kernel.org> | 2023-02-03 01:40:07 +0100 |
commit | bba8d3d17dc2678f9647962900aa421a18c25320 (patch) | |
tree | 6d174ba5d2e9210c79355991d06188bd4820382c /kernel | |
parent | Merge branches 'doc.2023.01.05a', 'fixes.2023.01.23a', 'kvfree.2023.01.03a', ... (diff) | |
parent | rcu: Allow up to five minutes expedited RCU CPU stall-warning timeouts (diff) | |
download | linux-bba8d3d17dc2678f9647962900aa421a18c25320.tar.xz linux-bba8d3d17dc2678f9647962900aa421a18c25320.zip |
Merge branch 'stall.2023.01.09a' into HEAD
stall.2023.01.09a: RCU CPU stall-warning updates.
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/rcu/Kconfig.debug | 15 | ||||
-rw-r--r-- | kernel/rcu/rcu.h | 1 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 18 | ||||
-rw-r--r-- | kernel/rcu/tree.h | 19 | ||||
-rw-r--r-- | kernel/rcu/tree_stall.h | 37 | ||||
-rw-r--r-- | kernel/rcu/update.c | 2 | ||||
-rw-r--r-- | kernel/sched/core.c | 5 |
7 files changed, 93 insertions, 4 deletions
diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug index 232e29fe3e5e..2984de629f74 100644 --- a/kernel/rcu/Kconfig.debug +++ b/kernel/rcu/Kconfig.debug @@ -82,7 +82,7 @@ config RCU_CPU_STALL_TIMEOUT config RCU_EXP_CPU_STALL_TIMEOUT int "Expedited RCU CPU stall timeout in milliseconds" depends on RCU_STALL_COMMON - range 0 21000 + range 0 300000 default 0 help If a given expedited RCU grace period extends more than the @@ -92,6 +92,19 @@ config RCU_EXP_CPU_STALL_TIMEOUT says to use the RCU_CPU_STALL_TIMEOUT value converted from seconds to milliseconds. +config RCU_CPU_STALL_CPUTIME + bool "Provide additional RCU stall debug information" + depends on RCU_STALL_COMMON + default n + help + Collect statistics during the sampling period, such as the number of + (hard interrupts, soft interrupts, task switches) and the cputime of + (hard interrupts, soft interrupts, kernel tasks) are added to the + RCU stall report. For multiple continuous RCU stalls, all sampling + periods begin at half of the first RCU stall timeout. + The boot option rcupdate.rcu_cpu_stall_cputime has the same function + as this one, but will override this if it exists. + config RCU_TRACE bool "Enable tracing for RCU" depends on DEBUG_KERNEL diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 95a0038c9218..115616ac3bfa 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -224,6 +224,7 @@ extern int rcu_cpu_stall_ftrace_dump; extern int rcu_cpu_stall_suppress; extern int rcu_cpu_stall_timeout; extern int rcu_exp_cpu_stall_timeout; +extern int rcu_cpu_stall_cputime; extern bool rcu_exp_stall_task_details __read_mostly; int rcu_jiffies_till_stall_check(void); int rcu_exp_jiffies_till_stall_check(void); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 739219a78b84..8e880c09ab59 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -866,6 +866,24 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) rdp->rcu_iw_gp_seq = rnp->gp_seq; irq_work_queue_on(&rdp->rcu_iw, rdp->cpu); } + + if (rcu_cpu_stall_cputime && rdp->snap_record.gp_seq != rdp->gp_seq) { + int cpu = rdp->cpu; + struct rcu_snap_record *rsrp; + struct kernel_cpustat *kcsp; + + kcsp = &kcpustat_cpu(cpu); + + rsrp = &rdp->snap_record; + rsrp->cputime_irq = kcpustat_field(kcsp, CPUTIME_IRQ, cpu); + rsrp->cputime_softirq = kcpustat_field(kcsp, CPUTIME_SOFTIRQ, cpu); + rsrp->cputime_system = kcpustat_field(kcsp, CPUTIME_SYSTEM, cpu); + rsrp->nr_hardirqs = kstat_cpu_irqs_sum(rdp->cpu); + rsrp->nr_softirqs = kstat_cpu_softirqs_sum(rdp->cpu); + rsrp->nr_csw = nr_context_switches_cpu(rdp->cpu); + rsrp->jiffies = jiffies; + rsrp->gp_seq = rdp->gp_seq; + } } return 0; diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index fcb5d696eb17..192536916f9a 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -158,6 +158,23 @@ union rcu_noqs { u16 s; /* Set of bits, aggregate OR here. */ }; +/* + * Record the snapshot of the core stats at half of the first RCU stall timeout. + * The member gp_seq is used to ensure that all members are updated only once + * during the sampling period. The snapshot is taken only if this gp_seq is not + * equal to rdp->gp_seq. + */ +struct rcu_snap_record { + unsigned long gp_seq; /* Track rdp->gp_seq counter */ + u64 cputime_irq; /* Accumulated cputime of hard irqs */ + u64 cputime_softirq;/* Accumulated cputime of soft irqs */ + u64 cputime_system; /* Accumulated cputime of kernel tasks */ + unsigned long nr_hardirqs; /* Accumulated number of hard irqs */ + unsigned int nr_softirqs; /* Accumulated number of soft irqs */ + unsigned long long nr_csw; /* Accumulated number of task switches */ + unsigned long jiffies; /* Track jiffies value */ +}; + /* Per-CPU data for read-copy update. */ struct rcu_data { /* 1) quiescent-state and grace-period handling : */ @@ -262,6 +279,8 @@ struct rcu_data { short rcu_onl_gp_flags; /* ->gp_flags at last online. */ unsigned long last_fqs_resched; /* Time of last rcu_resched(). */ unsigned long last_sched_clock; /* Jiffies of last rcu_sched_clock_irq(). */ + struct rcu_snap_record snap_record; /* Snapshot of core stats at half of */ + /* the first RCU stall timeout */ long lazy_len; /* Length of buffered lazy callbacks. */ int cpu; diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 5653560573e2..b10b8349bb2a 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -39,7 +39,7 @@ int rcu_exp_jiffies_till_stall_check(void) // CONFIG_RCU_EXP_CPU_STALL_TIMEOUT, so check the allowed range. // The minimum clamped value is "2UL", because at least one full // tick has to be guaranteed. - till_stall_check = clamp(msecs_to_jiffies(cpu_stall_timeout), 2UL, 21UL * HZ); + till_stall_check = clamp(msecs_to_jiffies(cpu_stall_timeout), 2UL, 300UL * HZ); if (cpu_stall_timeout && jiffies_to_msecs(till_stall_check) != cpu_stall_timeout) WRITE_ONCE(rcu_exp_cpu_stall_timeout, jiffies_to_msecs(till_stall_check)); @@ -428,6 +428,35 @@ static bool rcu_is_rcuc_kthread_starving(struct rcu_data *rdp, unsigned long *jp return j > 2 * HZ; } +static void print_cpu_stat_info(int cpu) +{ + struct rcu_snap_record rsr, *rsrp; + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + struct kernel_cpustat *kcsp = &kcpustat_cpu(cpu); + + if (!rcu_cpu_stall_cputime) + return; + + rsrp = &rdp->snap_record; + if (rsrp->gp_seq != rdp->gp_seq) + return; + + rsr.cputime_irq = kcpustat_field(kcsp, CPUTIME_IRQ, cpu); + rsr.cputime_softirq = kcpustat_field(kcsp, CPUTIME_SOFTIRQ, cpu); + rsr.cputime_system = kcpustat_field(kcsp, CPUTIME_SYSTEM, cpu); + + pr_err("\t hardirqs softirqs csw/system\n"); + pr_err("\t number: %8ld %10d %12lld\n", + kstat_cpu_irqs_sum(cpu) - rsrp->nr_hardirqs, + kstat_cpu_softirqs_sum(cpu) - rsrp->nr_softirqs, + nr_context_switches_cpu(cpu) - rsrp->nr_csw); + pr_err("\tcputime: %8lld %10lld %12lld ==> %d(ms)\n", + div_u64(rsr.cputime_irq - rsrp->cputime_irq, NSEC_PER_MSEC), + div_u64(rsr.cputime_softirq - rsrp->cputime_softirq, NSEC_PER_MSEC), + div_u64(rsr.cputime_system - rsrp->cputime_system, NSEC_PER_MSEC), + jiffies_to_msecs(jiffies - rsrp->jiffies)); +} + /* * Print out diagnostic information for the specified stalled CPU. * @@ -484,6 +513,8 @@ static void print_cpu_stall_info(int cpu) data_race(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart, rcuc_starved ? buf : "", falsepositive ? " (false positive?)" : ""); + + print_cpu_stat_info(cpu); } /* Complain about starvation of grace-period kthread. */ @@ -588,7 +619,7 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) for_each_possible_cpu(cpu) totqlen += rcu_get_n_cbs_cpu(cpu); - pr_cont("\t(detected by %d, t=%ld jiffies, g=%ld, q=%lu ncpus=%d)\n", + pr_err("\t(detected by %d, t=%ld jiffies, g=%ld, q=%lu ncpus=%d)\n", smp_processor_id(), (long)(jiffies - gps), (long)rcu_seq_current(&rcu_state.gp_seq), totqlen, rcu_state.n_online_cpus); if (ndetected) { @@ -649,7 +680,7 @@ static void print_cpu_stall(unsigned long gps) raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags); for_each_possible_cpu(cpu) totqlen += rcu_get_n_cbs_cpu(cpu); - pr_cont("\t(t=%lu jiffies g=%ld q=%lu ncpus=%d)\n", + pr_err("\t(t=%lu jiffies g=%ld q=%lu ncpus=%d)\n", jiffies - gps, (long)rcu_seq_current(&rcu_state.gp_seq), totqlen, rcu_state.n_online_cpus); diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 624ff75e08ab..19bf6fa3ee6a 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -547,6 +547,8 @@ int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; module_param(rcu_cpu_stall_timeout, int, 0644); int rcu_exp_cpu_stall_timeout __read_mostly = CONFIG_RCU_EXP_CPU_STALL_TIMEOUT; module_param(rcu_exp_cpu_stall_timeout, int, 0644); +int rcu_cpu_stall_cputime __read_mostly = IS_ENABLED(CONFIG_RCU_CPU_STALL_CPUTIME); +module_param(rcu_cpu_stall_cputime, int, 0644); bool rcu_exp_stall_task_details __read_mostly; module_param(rcu_exp_stall_task_details, bool, 0644); #endif /* #ifdef CONFIG_RCU_STALL_COMMON */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 25b582b6ee5f..2e40a6c116e1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5282,6 +5282,11 @@ bool single_task_running(void) } EXPORT_SYMBOL(single_task_running); +unsigned long long nr_context_switches_cpu(int cpu) +{ + return cpu_rq(cpu)->nr_switches; +} + unsigned long long nr_context_switches(void) { int i; |