summaryrefslogtreecommitdiffstats
path: root/kernel/rcu/sync.c
blob: be10036fa621556f29b850a1de8db0009ee4bbe5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
/*
 * RCU-based infrastructure for lightweight reader-writer locking
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, you can access it online at
 * http://www.gnu.org/licenses/gpl-2.0.html.
 *
 * Copyright (c) 2015, Red Hat, Inc.
 *
 * Author: Oleg Nesterov <oleg@redhat.com>
 */

#include <linux/rcu_sync.h>
#include <linux/sched.h>

#ifdef CONFIG_PROVE_RCU
#define __INIT_HELD(func)	.held = func,
#else
#define __INIT_HELD(func)
#endif

static const struct {
	void (*sync)(void);
	void (*call)(struct rcu_head *, void (*)(struct rcu_head *));
	void (*wait)(void);
#ifdef CONFIG_PROVE_RCU
	int  (*held)(void);
#endif
} gp_ops[] = {
	[RCU_SYNC] = {
		.sync = synchronize_rcu,
		.call = call_rcu,
		.wait = rcu_barrier,
		__INIT_HELD(rcu_read_lock_held)
	},
	[RCU_SCHED_SYNC] = {
		.sync = synchronize_rcu,
		.call = call_rcu,
		.wait = rcu_barrier,
		__INIT_HELD(rcu_read_lock_sched_held)
	},
	[RCU_BH_SYNC] = {
		.sync = synchronize_rcu,
		.call = call_rcu,
		.wait = rcu_barrier,
		__INIT_HELD(rcu_read_lock_bh_held)
	},
};

enum { GP_IDLE = 0, GP_PENDING, GP_PASSED };
enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY };

#define	rss_lock	gp_wait.lock

#ifdef CONFIG_PROVE_RCU
void rcu_sync_lockdep_assert(struct rcu_sync *rsp)
{
	RCU_LOCKDEP_WARN(!gp_ops[rsp->gp_type].held(),
			 "suspicious rcu_sync_is_idle() usage");
}

EXPORT_SYMBOL_GPL(rcu_sync_lockdep_assert);
#endif

/**
 * rcu_sync_init() - Initialize an rcu_sync structure
 * @rsp: Pointer to rcu_sync structure to be initialized
 * @type: Flavor of RCU with which to synchronize rcu_sync structure
 */
void rcu_sync_init(struct rcu_sync *rsp, enum rcu_sync_type type)
{
	memset(rsp, 0, sizeof(*rsp));
	init_waitqueue_head(&rsp->gp_wait);
	rsp->gp_type = type;
}

/**
 * rcu_sync_enter_start - Force readers onto slow path for multiple updates
 * @rsp: Pointer to rcu_sync structure to use for synchronization
 *
 * Must be called after rcu_sync_init() and before first use.
 *
 * Ensures rcu_sync_is_idle() returns false and rcu_sync_{enter,exit}()
 * pairs turn into NO-OPs.
 */
void rcu_sync_enter_start(struct rcu_sync *rsp)
{
	rsp->gp_count++;
	rsp->gp_state = GP_PASSED;
}

/**
 * rcu_sync_enter() - Force readers onto slowpath
 * @rsp: Pointer to rcu_sync structure to use for synchronization
 *
 * This function is used by updaters who need readers to make use of
 * a slowpath during the update.  After this function returns, all
 * subsequent calls to rcu_sync_is_idle() will return false, which
 * tells readers to stay off their fastpaths.  A later call to
 * rcu_sync_exit() re-enables reader slowpaths.
 *
 * When called in isolation, rcu_sync_enter() must wait for a grace
 * period, however, closely spaced calls to rcu_sync_enter() can
 * optimize away the grace-period wait via a state machine implemented
 * by rcu_sync_enter(), rcu_sync_exit(), and rcu_sync_func().
 */
void rcu_sync_enter(struct rcu_sync *rsp)
{
	bool need_wait, need_sync;

	spin_lock_irq(&rsp->rss_lock);
	need_wait = rsp->gp_count++;
	need_sync = rsp->gp_state == GP_IDLE;
	if (need_sync)
		rsp->gp_state = GP_PENDING;
	spin_unlock_irq(&rsp->rss_lock);

	WARN_ON_ONCE(need_wait && need_sync);
	if (need_sync) {
		gp_ops[rsp->gp_type].sync();
		rsp->gp_state = GP_PASSED;
		wake_up_all(&rsp->gp_wait);
	} else if (need_wait) {
		wait_event(rsp->gp_wait, rsp->gp_state == GP_PASSED);
	} else {
		/*
		 * Possible when there's a pending CB from a rcu_sync_exit().
		 * Nobody has yet been allowed the 'fast' path and thus we can
		 * avoid doing any sync(). The callback will get 'dropped'.
		 */
		WARN_ON_ONCE(rsp->gp_state != GP_PASSED);
	}
}

/**
 * rcu_sync_func() - Callback function managing reader access to fastpath
 * @rhp: Pointer to rcu_head in rcu_sync structure to use for synchronization
 *
 * This function is passed to one of the call_rcu() functions by
 * rcu_sync_exit(), so that it is invoked after a grace period following the
 * that invocation of rcu_sync_exit().  It takes action based on events that
 * have taken place in the meantime, so that closely spaced rcu_sync_enter()
 * and rcu_sync_exit() pairs need not wait for a grace period.
 *
 * If another rcu_sync_enter() is invoked before the grace period
 * ended, reset state to allow the next rcu_sync_exit() to let the
 * readers back onto their fastpaths (after a grace period).  If both
 * another rcu_sync_enter() and its matching rcu_sync_exit() are invoked
 * before the grace period ended, re-invoke call_rcu() on behalf of that
 * rcu_sync_exit().  Otherwise, set all state back to idle so that readers
 * can again use their fastpaths.
 */
static void rcu_sync_func(struct rcu_head *rhp)
{
	struct rcu_sync *rsp = container_of(rhp, struct rcu_sync, cb_head);
	unsigned long flags;

	WARN_ON_ONCE(rsp->gp_state != GP_PASSED);
	WARN_ON_ONCE(rsp->cb_state == CB_IDLE);

	spin_lock_irqsave(&rsp->rss_lock, flags);
	if (rsp->gp_count) {
		/*
		 * A new rcu_sync_begin() has happened; drop the callback.
		 */
		rsp->cb_state = CB_IDLE;
	} else if (rsp->cb_state == CB_REPLAY) {
		/*
		 * A new rcu_sync_exit() has happened; requeue the callback
		 * to catch a later GP.
		 */
		rsp->cb_state = CB_PENDING;
		gp_ops[rsp->gp_type].call(&rsp->cb_head, rcu_sync_func);
	} else {
		/*
		 * We're at least a GP after rcu_sync_exit(); eveybody will now
		 * have observed the write side critical section. Let 'em rip!.
		 */
		rsp->cb_state = CB_IDLE;
		rsp->gp_state = GP_IDLE;
	}
	spin_unlock_irqrestore(&rsp->rss_lock, flags);
}

/**
 * rcu_sync_exit() - Allow readers back onto fast patch after grace period
 * @rsp: Pointer to rcu_sync structure to use for synchronization
 *
 * This function is used by updaters who have completed, and can therefore
 * now allow readers to make use of their fastpaths after a grace period
 * has elapsed.  After this grace period has completed, all subsequent
 * calls to rcu_sync_is_idle() will return true, which tells readers that
 * they can once again use their fastpaths.
 */
void rcu_sync_exit(struct rcu_sync *rsp)
{
	spin_lock_irq(&rsp->rss_lock);
	if (!--rsp->gp_count) {
		if (rsp->cb_state == CB_IDLE) {
			rsp->cb_state = CB_PENDING;
			gp_ops[rsp->gp_type].call(&rsp->cb_head, rcu_sync_func);
		} else if (rsp->cb_state == CB_PENDING) {
			rsp->cb_state = CB_REPLAY;
		}
	}
	spin_unlock_irq(&rsp->rss_lock);
}

/**
 * rcu_sync_dtor() - Clean up an rcu_sync structure
 * @rsp: Pointer to rcu_sync structure to be cleaned up
 */
void rcu_sync_dtor(struct rcu_sync *rsp)
{
	int cb_state;

	WARN_ON_ONCE(rsp->gp_count);

	spin_lock_irq(&rsp->rss_lock);
	if (rsp->cb_state == CB_REPLAY)
		rsp->cb_state = CB_PENDING;
	cb_state = rsp->cb_state;
	spin_unlock_irq(&rsp->rss_lock);

	if (cb_state != CB_IDLE) {
		gp_ops[rsp->gp_type].wait();
		WARN_ON_ONCE(rsp->cb_state != CB_IDLE);
	}
}