diff options
author | Paul E. McKenney <paulmck@linux.ibm.com> | 2019-07-03 01:03:33 +0200 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.ibm.com> | 2019-08-13 23:37:32 +0200 |
commit | d1b222c6be1f8bfc77099e034219732ecaeaaf96 (patch) | |
tree | e9da6ce477c530ed6584bd922484ad992f994cda /kernel/rcu/tree.h | |
parent | rcu/nocb: Atomic ->len field in rcu_segcblist structure (diff) | |
download | linux-d1b222c6be1f8bfc77099e034219732ecaeaaf96.tar.xz linux-d1b222c6be1f8bfc77099e034219732ecaeaaf96.zip |
rcu/nocb: Add bypass callback queueing
Use of the rcu_data structure's segmented ->cblist for no-CBs CPUs
takes advantage of unrelated grace periods, thus reducing the memory
footprint in the face of floods of call_rcu() invocations. However,
the ->cblist field is a more-complex rcu_segcblist structure which must
be protected via locking. Even though there are only three entities
which can acquire this lock (the CPU invoking call_rcu(), the no-CBs
grace-period kthread, and the no-CBs callbacks kthread), the contention
on this lock is excessive under heavy stress.
This commit therefore greatly reduces contention by provisioning
an rcu_cblist structure field named ->nocb_bypass within the
rcu_data structure. Each no-CBs CPU is permitted only a limited
number of enqueues onto the ->cblist per jiffy, controlled by a new
nocb_nobypass_lim_per_jiffy kernel boot parameter that defaults to
about 16 enqueues per millisecond (16 * 1000 / HZ). When that limit is
exceeded, the CPU instead enqueues onto the new ->nocb_bypass.
The ->nocb_bypass is flushed into the ->cblist every jiffy or when
the number of callbacks on ->nocb_bypass exceeds qhimark, whichever
happens first. During call_rcu() floods, this flushing is carried out
by the CPU during the course of its call_rcu() invocations. However,
a CPU could simply stop invoking call_rcu() at any time. The no-CBs
grace-period kthread therefore carries out less-aggressive flushing
(every few jiffies or when the number of callbacks on ->nocb_bypass
exceeds (2 * qhimark), whichever comes first). This means that the
no-CBs grace-period kthread cannot be permitted to do unbounded waits
while there are callbacks on ->nocb_bypass. A ->nocb_bypass_timer is
used to provide the needed wakeups.
[ paulmck: Apply Coverity feedback reported by Colin Ian King. ]
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
Diffstat (limited to 'kernel/rcu/tree.h')
-rw-r--r-- | kernel/rcu/tree.h | 28 |
1 files changed, 18 insertions, 10 deletions
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 2c3e9068671c..e4df86db8137 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -200,18 +200,26 @@ struct rcu_data { atomic_t nocb_lock_contended; /* Contention experienced. */ int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */ struct timer_list nocb_timer; /* Enforce finite deferral. */ + unsigned long nocb_gp_adv_time; /* Last call_rcu() CB adv (jiffies). */ + + /* The following fields are used by call_rcu, hence own cacheline. */ + raw_spinlock_t nocb_bypass_lock ____cacheline_internodealigned_in_smp; + struct rcu_cblist nocb_bypass; /* Lock-contention-bypass CB list. */ + unsigned long nocb_bypass_first; /* Time (jiffies) of first enqueue. */ + unsigned long nocb_nobypass_last; /* Last ->cblist enqueue (jiffies). */ + int nocb_nobypass_count; /* # ->cblist enqueues at ^^^ time. */ /* The following fields are used by GP kthread, hence own cacheline. */ raw_spinlock_t nocb_gp_lock ____cacheline_internodealigned_in_smp; - bool nocb_gp_sleep; - /* Is the nocb GP thread asleep? */ + struct timer_list nocb_bypass_timer; /* Force nocb_bypass flush. */ + bool nocb_gp_sleep; /* Is the nocb GP thread asleep? */ struct swait_queue_head nocb_gp_wq; /* For nocb kthreads to sleep on. */ bool nocb_cb_sleep; /* Is the nocb CB thread asleep? */ struct task_struct *nocb_cb_kthread; struct rcu_data *nocb_next_cb_rdp; /* Next rcu_data in wakeup chain. */ - /* The following fields are used by CB kthread, hence new cachline. */ + /* The following fields are used by CB kthread, hence new cacheline. */ struct rcu_data *nocb_gp_rdp ____cacheline_internodealigned_in_smp; /* GP rdp takes GP-end wakeups. */ #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ @@ -419,6 +427,10 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp); static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp); static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq); static void rcu_init_one_nocb(struct rcu_node *rnp); +static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, + unsigned long j); +static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, + bool *was_alldone, unsigned long flags); static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty, unsigned long flags); static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp); @@ -430,19 +442,15 @@ static void rcu_nocb_lock(struct rcu_data *rdp); static void rcu_nocb_unlock(struct rcu_data *rdp); static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp, unsigned long flags); +static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp); #ifdef CONFIG_RCU_NOCB_CPU static void __init rcu_organize_nocb_kthreads(void); #define rcu_nocb_lock_irqsave(rdp, flags) \ do { \ - if (!rcu_segcblist_is_offloaded(&(rdp)->cblist)) { \ + if (!rcu_segcblist_is_offloaded(&(rdp)->cblist)) \ local_irq_save(flags); \ - } else if (!raw_spin_trylock_irqsave(&(rdp)->nocb_lock, (flags))) {\ - atomic_inc(&(rdp)->nocb_lock_contended); \ - smp_mb__after_atomic(); /* atomic_inc() before lock. */ \ + else \ raw_spin_lock_irqsave(&(rdp)->nocb_lock, (flags)); \ - smp_mb__before_atomic(); /* atomic_dec() after lock. */ \ - atomic_dec(&(rdp)->nocb_lock_contended); \ - } \ } while (0) #else /* #ifdef CONFIG_RCU_NOCB_CPU */ #define rcu_nocb_lock_irqsave(rdp, flags) local_irq_save(flags) |