diff options
author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2015-10-01 19:26:24 +0200 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2015-12-04 21:26:52 +0100 |
commit | df5bd5144a80a9f6c3807383b11f735dae9caf9d (patch) | |
tree | fce44d0970a70446c5fd6b50f2e764db0efb8e56 /kernel/rcu/tree_trace.c | |
parent | rcu: Invert sync_rcu_exp_select_cpus() "if" statement (diff) | |
download | linux-df5bd5144a80a9f6c3807383b11f735dae9caf9d.tar.xz linux-df5bd5144a80a9f6c3807383b11f735dae9caf9d.zip |
rcu: Reduce expedited GP memory contention via per-CPU variables
Currently, the piggybacked-work checks carried out by sync_exp_work_done()
atomically increment a small set of variables (the ->expedited_workdone0,
->expedited_workdone1, ->expedited_workdone2, ->expedited_workdone3
fields in the rcu_state structure), which will form a memory-contention
bottleneck given a sufficiently large number of CPUs concurrently invoking
either synchronize_rcu_expedited() or synchronize_sched_expedited().
This commit therefore moves these for fields to the per-CPU rcu_data
structure, eliminating the memory contention. The show_rcuexp() function
also changes to sum up each field in the rcu_data structures.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel/rcu/tree_trace.c')
-rw-r--r-- | kernel/rcu/tree_trace.c | 18 |
1 files changed, 12 insertions, 6 deletions
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index 8efaba870d96..d43649450ea4 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c @@ -183,14 +183,20 @@ static const struct file_operations rcudata_fops = { static int show_rcuexp(struct seq_file *m, void *v) { + int cpu; struct rcu_state *rsp = (struct rcu_state *)m->private; - + struct rcu_data *rdp; + unsigned long s0 = 0, s1 = 0, s2 = 0, s3 = 0; + + for_each_possible_cpu(cpu) { + rdp = per_cpu_ptr(rsp->rda, cpu); + s0 += atomic_long_read(&rdp->expedited_workdone0); + s1 += atomic_long_read(&rdp->expedited_workdone1); + s2 += atomic_long_read(&rdp->expedited_workdone2); + s3 += atomic_long_read(&rdp->expedited_workdone3); + } seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n", - rsp->expedited_sequence, - atomic_long_read(&rsp->expedited_workdone0), - atomic_long_read(&rsp->expedited_workdone1), - atomic_long_read(&rsp->expedited_workdone2), - atomic_long_read(&rsp->expedited_workdone3), + rsp->expedited_sequence, s0, s1, s2, s3, atomic_long_read(&rsp->expedited_normal), atomic_read(&rsp->expedited_need_qs), rsp->expedited_sequence / 2); |