From 18108ebfebe9e871d0a9af830baf8f5df69eb5fc Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Mon, 27 Feb 2012 09:28:10 -0800 Subject: rcu: Improve SRCU's wait_idx() comments The safety of SRCU is provided byy wait_idx() rather than flipping. The flipping actually prevents starvation. This commit therefore updates the comments to more accurately and precisely describe what is going on. Signed-off-by: Lai Jiangshan Signed-off-by: Paul E. McKenney --- kernel/srcu.c | 77 ++++++++++++++++++++++++++++------------------------------- 1 file changed, 37 insertions(+), 40 deletions(-) diff --git a/kernel/srcu.c b/kernel/srcu.c index b6b9ea2eb51c..1fecb4d858ed 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c @@ -249,6 +249,10 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock); */ #define SYNCHRONIZE_SRCU_READER_DELAY 5 +/* + * Wait until all pre-existing readers complete. Such readers + * will have used the index specified by "idx". + */ static void wait_idx(struct srcu_struct *sp, int idx, bool expedited) { int trycount = 0; @@ -291,24 +295,9 @@ static void wait_idx(struct srcu_struct *sp, int idx, bool expedited) smp_mb(); /* E */ } -/* - * Flip the readers' index by incrementing ->completed, then wait - * until there are no more readers using the counters referenced by - * the old index value. (Recall that the index is the bottom bit - * of ->completed.) - * - * Of course, it is possible that a reader might be delayed for the - * full duration of flip_idx_and_wait() between fetching the - * index and incrementing its counter. This possibility is handled - * by the next __synchronize_srcu() invoking wait_idx() for such readers - * before starting a new grace period. - */ -static void flip_idx_and_wait(struct srcu_struct *sp, bool expedited) +static void srcu_flip(struct srcu_struct *sp) { - int idx; - - idx = sp->completed++ & 0x1; - wait_idx(sp, idx, expedited); + sp->completed++; } /* @@ -316,6 +305,8 @@ static void flip_idx_and_wait(struct srcu_struct *sp, bool expedited) */ static void __synchronize_srcu(struct srcu_struct *sp, bool expedited) { + int busy_idx; + rcu_lockdep_assert(!lock_is_held(&sp->dep_map) && !lock_is_held(&rcu_bh_lock_map) && !lock_is_held(&rcu_lock_map) && @@ -323,8 +314,28 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool expedited) "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section"); mutex_lock(&sp->mutex); + busy_idx = sp->completed & 0X1UL; /* + * If we recently flipped the index, there will be some readers + * using idx=0 and others using idx=1. Therefore, two calls to + * wait_idx()s suffice to ensure that all pre-existing readers + * have completed: + * + * __synchronize_srcu() { + * wait_idx(sp, 0, expedited); + * wait_idx(sp, 1, expedited); + * } + * + * Starvation is prevented by the fact that we flip the index. + * While we wait on one index to clear out, almost all new readers + * will be using the other index. The number of new readers using the + * index we are waiting on is sharply bounded by roughly the number + * of CPUs. + * + * How can new readers possibly using the old pre-flip value of + * the index? Consider the following sequence of events: + * * Suppose that during the previous grace period, a reader * picked up the old value of the index, but did not increment * its counter until after the previous instance of @@ -333,31 +344,17 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool expedited) * not start until after the grace period started, so the grace * period was not obligated to wait for that reader. * - * However, the current SRCU grace period does have to wait for - * that reader. This is handled by invoking wait_idx() on the - * non-active set of counters (hence sp->completed - 1). Once - * wait_idx() returns, we know that all readers that picked up - * the old value of ->completed and that already incremented their - * counter will have completed. - * - * But what about readers that picked up the old value of - * ->completed, but -still- have not managed to increment their - * counter? We do not need to wait for those readers, because - * they will have started their SRCU read-side critical section - * after the current grace period starts. - * - * Because it is unlikely that readers will be preempted between - * fetching ->completed and incrementing their counter, wait_idx() - * will normally not need to wait. + * However, this sequence of events is quite improbable, so + * this call to wait_idx(), which waits on really old readers + * describe in this comment above, will almost never need to wait. */ - wait_idx(sp, (sp->completed - 1) & 0x1, expedited); + wait_idx(sp, 1 - busy_idx, expedited); - /* - * Now that wait_idx() has waited for the really old readers, - * invoke flip_idx_and_wait() to flip the counter and wait - * for current SRCU readers. - */ - flip_idx_and_wait(sp, expedited); + /* Flip the index to avoid reader-induced starvation. */ + srcu_flip(sp); + + /* Wait for recent pre-existing readers. */ + wait_idx(sp, busy_idx, expedited); mutex_unlock(&sp->mutex); } -- cgit v1.2.3