1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
|
#ifndef _LINUX_CLOSURE_H
#define _LINUX_CLOSURE_H
#include <linux/llist.h>
#include <linux/sched.h>
#include <linux/workqueue.h>
/*
* Closure is perhaps the most overused and abused term in computer science, but
* since I've been unable to come up with anything better you're stuck with it
* again.
*
* What are closures?
*
* They embed a refcount. The basic idea is they count "things that are in
* progress" - in flight bios, some other thread that's doing something else -
* anything you might want to wait on.
*
* The refcount may be manipulated with closure_get() and closure_put().
* closure_put() is where many of the interesting things happen, when it causes
* the refcount to go to 0.
*
* Closures can be used to wait on things both synchronously and asynchronously,
* and synchronous and asynchronous use can be mixed without restriction. To
* wait synchronously, use closure_sync() - you will sleep until your closure's
* refcount hits 1.
*
* To wait asynchronously, use
* continue_at(cl, next_function, workqueue);
*
* passing it, as you might expect, the function to run when nothing is pending
* and the workqueue to run that function out of.
*
* continue_at() also, critically, is a macro that returns the calling function.
* There's good reason for this.
*
* To use safely closures asynchronously, they must always have a refcount while
* they are running owned by the thread that is running them. Otherwise, suppose
* you submit some bios and wish to have a function run when they all complete:
*
* foo_endio(struct bio *bio, int error)
* {
* closure_put(cl);
* }
*
* closure_init(cl);
*
* do_stuff();
* closure_get(cl);
* bio1->bi_endio = foo_endio;
* bio_submit(bio1);
*
* do_more_stuff();
* closure_get(cl);
* bio2->bi_endio = foo_endio;
* bio_submit(bio2);
*
* continue_at(cl, complete_some_read, system_wq);
*
* If closure's refcount started at 0, complete_some_read() could run before the
* second bio was submitted - which is almost always not what you want! More
* importantly, it wouldn't be possible to say whether the original thread or
* complete_some_read()'s thread owned the closure - and whatever state it was
* associated with!
*
* So, closure_init() initializes a closure's refcount to 1 - and when a
* closure_fn is run, the refcount will be reset to 1 first.
*
* Then, the rule is - if you got the refcount with closure_get(), release it
* with closure_put() (i.e, in a bio->bi_endio function). If you have a refcount
* on a closure because you called closure_init() or you were run out of a
* closure - _always_ use continue_at(). Doing so consistently will help
* eliminate an entire class of particularly pernicious races.
*
* For a closure to wait on an arbitrary event, we need to introduce waitlists:
*
* struct closure_waitlist list;
* closure_wait_event(list, cl, condition);
* closure_wake_up(wait_list);
*
* These work analagously to wait_event() and wake_up() - except that instead of
* operating on the current thread (for wait_event()) and lists of threads, they
* operate on an explicit closure and lists of closures.
*
* Because it's a closure we can now wait either synchronously or
* asynchronously. closure_wait_event() returns the current value of the
* condition, and if it returned false continue_at() or closure_sync() can be
* used to wait for it to become true.
*
* It's useful for waiting on things when you can't sleep in the context in
* which you must check the condition (perhaps a spinlock held, or you might be
* beneath generic_make_request() - in which case you can't sleep on IO).
*
* closure_wait_event() will wait either synchronously or asynchronously,
* depending on whether the closure is in blocking mode or not. You can pick a
* mode explicitly with closure_wait_event_sync() and
* closure_wait_event_async(), which do just what you might expect.
*
* Lastly, you might have a wait list dedicated to a specific event, and have no
* need for specifying the condition - you just want to wait until someone runs
* closure_wake_up() on the appropriate wait list. In that case, just use
* closure_wait(). It will return either true or false, depending on whether the
* closure was already on a wait list or not - a closure can only be on one wait
* list at a time.
*
* Parents:
*
* closure_init() takes two arguments - it takes the closure to initialize, and
* a (possibly null) parent.
*
* If parent is non null, the new closure will have a refcount for its lifetime;
* a closure is considered to be "finished" when its refcount hits 0 and the
* function to run is null. Hence
*
* continue_at(cl, NULL, NULL);
*
* returns up the (spaghetti) stack of closures, precisely like normal return
* returns up the C stack. continue_at() with non null fn is better thought of
* as doing a tail call.
*
* All this implies that a closure should typically be embedded in a particular
* struct (which its refcount will normally control the lifetime of), and that
* struct can very much be thought of as a stack frame.
*
* Locking:
*
* Closures are based on work items but they can be thought of as more like
* threads - in that like threads and unlike work items they have a well
* defined lifetime; they are created (with closure_init()) and eventually
* complete after a continue_at(cl, NULL, NULL).
*
* Suppose you've got some larger structure with a closure embedded in it that's
* used for periodically doing garbage collection. You only want one garbage
* collection happening at a time, so the natural thing to do is protect it with
* a lock. However, it's difficult to use a lock protecting a closure correctly
* because the unlock should come after the last continue_to() (additionally, if
* you're using the closure asynchronously a mutex won't work since a mutex has
* to be unlocked by the same process that locked it).
*
* So to make it less error prone and more efficient, we also have the ability
* to use closures as locks:
*
* closure_init_unlocked();
* closure_trylock();
*
* That's all we need for trylock() - the last closure_put() implicitly unlocks
* it for you. But for closure_lock(), we also need a wait list:
*
* struct closure_with_waitlist frobnicator_cl;
*
* closure_init_unlocked(&frobnicator_cl);
* closure_lock(&frobnicator_cl);
*
* A closure_with_waitlist embeds a closure and a wait list - much like struct
* delayed_work embeds a work item and a timer_list. The important thing is, use
* it exactly like you would a regular closure and closure_put() will magically
* handle everything for you.
*/
struct closure;
typedef void (closure_fn) (struct closure *);
struct closure_waitlist {
struct llist_head list;
};
enum closure_type {
TYPE_closure = 0,
TYPE_closure_with_waitlist = 1,
MAX_CLOSURE_TYPE = 1,
};
enum closure_state {
/*
* CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by
* the thread that owns the closure, and cleared by the thread that's
* waking up the closure.
*
* CLOSURE_SLEEPING: Must be set before a thread uses a closure to sleep
* - indicates that cl->task is valid and closure_put() may wake it up.
* Only set or cleared by the thread that owns the closure.
*
* The rest are for debugging and don't affect behaviour:
*
* CLOSURE_RUNNING: Set when a closure is running (i.e. by
* closure_init() and when closure_put() runs then next function), and
* must be cleared before remaining hits 0. Primarily to help guard
* against incorrect usage and accidentally transferring references.
* continue_at() and closure_return() clear it for you, if you're doing
* something unusual you can use closure_set_dead() which also helps
* annotate where references are being transferred.
*
* CLOSURE_STACK: Sanity check - remaining should never hit 0 on a
* closure with this flag set
*/
CLOSURE_BITS_START = (1 << 23),
CLOSURE_DESTRUCTOR = (1 << 23),
CLOSURE_WAITING = (1 << 25),
CLOSURE_SLEEPING = (1 << 27),
CLOSURE_RUNNING = (1 << 29),
CLOSURE_STACK = (1 << 31),
};
#define CLOSURE_GUARD_MASK \
((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_SLEEPING| \
CLOSURE_RUNNING|CLOSURE_STACK) << 1)
#define CLOSURE_REMAINING_MASK (CLOSURE_BITS_START - 1)
#define CLOSURE_REMAINING_INITIALIZER (1|CLOSURE_RUNNING)
struct closure {
union {
struct {
struct workqueue_struct *wq;
struct task_struct *task;
struct llist_node list;
closure_fn *fn;
};
struct work_struct work;
};
struct closure *parent;
atomic_t remaining;
enum closure_type type;
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
#define CLOSURE_MAGIC_DEAD 0xc054dead
#define CLOSURE_MAGIC_ALIVE 0xc054a11e
unsigned magic;
struct list_head all;
unsigned long ip;
unsigned long waiting_on;
#endif
};
struct closure_with_waitlist {
struct closure cl;
struct closure_waitlist wait;
};
extern unsigned invalid_closure_type(void);
#define __CLOSURE_TYPE(cl, _t) \
__builtin_types_compatible_p(typeof(cl), struct _t) \
? TYPE_ ## _t : \
#define __closure_type(cl) \
( \
__CLOSURE_TYPE(cl, closure) \
__CLOSURE_TYPE(cl, closure_with_waitlist) \
invalid_closure_type() \
)
void closure_sub(struct closure *cl, int v);
void closure_put(struct closure *cl);
void __closure_wake_up(struct closure_waitlist *list);
bool closure_wait(struct closure_waitlist *list, struct closure *cl);
void closure_sync(struct closure *cl);
bool closure_trylock(struct closure *cl, struct closure *parent);
void __closure_lock(struct closure *cl, struct closure *parent,
struct closure_waitlist *wait_list);
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
void closure_debug_init(void);
void closure_debug_create(struct closure *cl);
void closure_debug_destroy(struct closure *cl);
#else
static inline void closure_debug_init(void) {}
static inline void closure_debug_create(struct closure *cl) {}
static inline void closure_debug_destroy(struct closure *cl) {}
#endif
static inline void closure_set_ip(struct closure *cl)
{
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
cl->ip = _THIS_IP_;
#endif
}
static inline void closure_set_ret_ip(struct closure *cl)
{
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
cl->ip = _RET_IP_;
#endif
}
static inline void closure_get(struct closure *cl)
{
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
BUG_ON((atomic_inc_return(&cl->remaining) &
CLOSURE_REMAINING_MASK) <= 1);
#else
atomic_inc(&cl->remaining);
#endif
}
static inline void closure_set_stopped(struct closure *cl)
{
atomic_sub(CLOSURE_RUNNING, &cl->remaining);
}
static inline bool closure_is_unlocked(struct closure *cl)
{
return atomic_read(&cl->remaining) == -1;
}
static inline void do_closure_init(struct closure *cl, struct closure *parent,
bool running)
{
cl->parent = parent;
if (parent)
closure_get(parent);
if (running) {
closure_debug_create(cl);
atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
} else
atomic_set(&cl->remaining, -1);
closure_set_ip(cl);
}
/*
* Hack to get at the embedded closure if there is one, by doing an unsafe cast:
* the result of __closure_type() is thrown away, it's used merely for type
* checking.
*/
#define __to_internal_closure(cl) \
({ \
BUILD_BUG_ON(__closure_type(*cl) > MAX_CLOSURE_TYPE); \
(struct closure *) cl; \
})
#define closure_init_type(cl, parent, running) \
do { \
struct closure *_cl = __to_internal_closure(cl); \
_cl->type = __closure_type(*(cl)); \
do_closure_init(_cl, parent, running); \
} while (0)
/**
* closure_init() - Initialize a closure, setting the refcount to 1
* @cl: closure to initialize
* @parent: parent of the new closure. cl will take a refcount on it for its
* lifetime; may be NULL.
*/
#define closure_init(cl, parent) \
closure_init_type(cl, parent, true)
static inline void closure_init_stack(struct closure *cl)
{
memset(cl, 0, sizeof(struct closure));
atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER|CLOSURE_STACK);
}
/**
* closure_init_unlocked() - Initialize a closure but leave it unlocked.
* @cl: closure to initialize
*
* For when the closure will be used as a lock. The closure may not be used
* until after a closure_lock() or closure_trylock().
*/
#define closure_init_unlocked(cl) \
do { \
memset((cl), 0, sizeof(*(cl))); \
closure_init_type(cl, NULL, false); \
} while (0)
/**
* closure_lock() - lock and initialize a closure.
* @cl: the closure to lock
* @parent: the new parent for this closure
*
* The closure must be of one of the types that has a waitlist (otherwise we
* wouldn't be able to sleep on contention).
*
* @parent has exactly the same meaning as in closure_init(); if non null, the
* closure will take a reference on @parent which will be released when it is
* unlocked.
*/
#define closure_lock(cl, parent) \
__closure_lock(__to_internal_closure(cl), parent, &(cl)->wait)
static inline void __closure_end_sleep(struct closure *cl)
{
__set_current_state(TASK_RUNNING);
if (atomic_read(&cl->remaining) & CLOSURE_SLEEPING)
atomic_sub(CLOSURE_SLEEPING, &cl->remaining);
}
static inline void __closure_start_sleep(struct closure *cl)
{
closure_set_ip(cl);
cl->task = current;
set_current_state(TASK_UNINTERRUPTIBLE);
if (!(atomic_read(&cl->remaining) & CLOSURE_SLEEPING))
atomic_add(CLOSURE_SLEEPING, &cl->remaining);
}
/**
* closure_wake_up() - wake up all closures on a wait list.
*/
static inline void closure_wake_up(struct closure_waitlist *list)
{
smp_mb();
__closure_wake_up(list);
}
/*
* Wait on an event, synchronously or asynchronously - analogous to wait_event()
* but for closures.
*
* The loop is oddly structured so as to avoid a race; we must check the
* condition again after we've added ourself to the waitlist. We know if we were
* already on the waitlist because closure_wait() returns false; thus, we only
* schedule or break if closure_wait() returns false. If it returns true, we
* just loop again - rechecking the condition.
*
* The __closure_wake_up() is necessary because we may race with the event
* becoming true; i.e. we see event false -> wait -> recheck condition, but the
* thread that made the event true may have called closure_wake_up() before we
* added ourself to the wait list.
*
* We have to call closure_sync() at the end instead of just
* __closure_end_sleep() because a different thread might've called
* closure_wake_up() before us and gotten preempted before they dropped the
* refcount on our closure. If this was a stack allocated closure, that would be
* bad.
*/
#define closure_wait_event(list, cl, condition) \
({ \
typeof(condition) ret; \
\
while (1) { \
ret = (condition); \
if (ret) { \
__closure_wake_up(list); \
closure_sync(cl); \
break; \
} \
\
__closure_start_sleep(cl); \
\
if (!closure_wait(list, cl)) \
schedule(); \
} \
\
ret; \
})
static inline void closure_queue(struct closure *cl)
{
struct workqueue_struct *wq = cl->wq;
if (wq) {
INIT_WORK(&cl->work, cl->work.func);
BUG_ON(!queue_work(wq, &cl->work));
} else
cl->fn(cl);
}
static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
struct workqueue_struct *wq)
{
BUG_ON(object_is_on_stack(cl));
closure_set_ip(cl);
cl->fn = fn;
cl->wq = wq;
/* between atomic_dec() in closure_put() */
smp_mb__before_atomic_dec();
}
#define continue_at(_cl, _fn, _wq) \
do { \
set_closure_fn(_cl, _fn, _wq); \
closure_sub(_cl, CLOSURE_RUNNING + 1); \
return; \
} while (0)
#define closure_return(_cl) continue_at((_cl), NULL, NULL)
#define continue_at_nobarrier(_cl, _fn, _wq) \
do { \
set_closure_fn(_cl, _fn, _wq); \
closure_queue(_cl); \
return; \
} while (0)
#define closure_return_with_destructor(_cl, _destructor) \
do { \
set_closure_fn(_cl, _destructor, NULL); \
closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1); \
return; \
} while (0)
static inline void closure_call(struct closure *cl, closure_fn fn,
struct workqueue_struct *wq,
struct closure *parent)
{
closure_init(cl, parent);
continue_at_nobarrier(cl, fn, wq);
}
static inline void closure_trylock_call(struct closure *cl, closure_fn fn,
struct workqueue_struct *wq,
struct closure *parent)
{
if (closure_trylock(cl, parent))
continue_at_nobarrier(cl, fn, wq);
}
#endif /* _LINUX_CLOSURE_H */
|