summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2014-06-11 01:31:55 +0200
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2014-07-09 18:15:00 +0200
commitdfeb9765ce3c33cb3cbc5f16db423f1c58a4cc55 (patch)
treebeffed3c9b871cb3ce35e1271820da5cc16c8f00 /kernel
parentrcu: Loosen __call_rcu()'s rcu_head alignment constraint (diff)
downloadlinux-dfeb9765ce3c33cb3cbc5f16db423f1c58a4cc55.tar.xz
linux-dfeb9765ce3c33cb3cbc5f16db423f1c58a4cc55.zip
rcu: Allow post-unlock reference for rt_mutex
The current approach to RCU priority boosting uses an rt_mutex strictly for its priority-boosting side effects. The rt_mutex_init_proxy_locked() function is used by the booster to initialize the lock as held by the boostee. The booster then uses rt_mutex_lock() to acquire this rt_mutex, which priority-boosts the boostee. When the boostee reaches the end of its outermost RCU read-side critical section, it checks a field in its task structure to see whether it has been boosted, and, if so, uses rt_mutex_unlock() to release the rt_mutex. The booster can then go on to boost the next task that is blocking the current RCU grace period. But reasonable implementations of rt_mutex_unlock() might result in the boostee referencing the rt_mutex's data after releasing it. But the booster might have re-initialized the rt_mutex between the time that the boostee released it and the time that it later referenced it. This is clearly asking for trouble, so this commit introduces a completion that forces the booster to wait until the boostee has completely finished with the rt_mutex, thus avoiding the case where the booster is re-initializing the rt_mutex before the last boostee's last reference to that rt_mutex. This of course does introduce some overhead, but the priority-boosting code paths are miles from any possible fastpath, and the overhead of executing the completion will normally be quite small compared to the overhead of priority boosting and deboosting, so this should be OK. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/rcu/tree.h5
-rw-r--r--kernel/rcu/tree_plugin.h8
2 files changed, 12 insertions, 1 deletions
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 0f69a79c5b7d..3eeb919e26a2 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -172,6 +172,11 @@ struct rcu_node {
/* queued on this rcu_node structure that */
/* are blocking the current grace period, */
/* there can be no such task. */
+ struct completion boost_completion;
+ /* Used to ensure that the rt_mutex used */
+ /* to carry out the boosting is fully */
+ /* released with no future boostee accesses */
+ /* before that rt_mutex is re-initialized. */
unsigned long boost_time;
/* When to start boosting (jiffies). */
struct task_struct *boost_kthread_task;
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 5da9f9b3abc9..9c811879d31e 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -427,8 +427,10 @@ void rcu_read_unlock_special(struct task_struct *t)
#ifdef CONFIG_RCU_BOOST
/* Unboost if we were boosted. */
- if (rbmp)
+ if (rbmp) {
rt_mutex_unlock(rbmp);
+ complete(&rnp->boost_completion);
+ }
#endif /* #ifdef CONFIG_RCU_BOOST */
/*
@@ -1202,10 +1204,14 @@ static int rcu_boost(struct rcu_node *rnp)
t = container_of(tb, struct task_struct, rcu_node_entry);
rt_mutex_init_proxy_locked(&mtx, t);
t->rcu_boost_mutex = &mtx;
+ init_completion(&rnp->boost_completion);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */
rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
+ /* Wait until boostee is done accessing mtx before reinitializing. */
+ wait_for_completion(&rnp->boost_completion);
+
return ACCESS_ONCE(rnp->exp_tasks) != NULL ||
ACCESS_ONCE(rnp->boost_tasks) != NULL;
}