fs/bcachefs/btree_gc.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_BTREE_GC_H
#define _BCACHEFS_BTREE_GC_H

#include "btree_types.h"

void bch2_coalesce(struct bch_fs *);

struct journal_keys;
int bch2_gc(struct bch_fs *, struct journal_keys *, bool, bool);
void bch2_gc_thread_stop(struct bch_fs *);
int bch2_gc_thread_start(struct bch_fs *);
void bch2_mark_dev_superblock(struct bch_fs *, struct bch_dev *, unsigned);

/*
 * For concurrent mark and sweep (with other index updates), we define a total
 * ordering of _all_ references GC walks:
 *
 * Note that some references will have the same GC position as others - e.g.
 * everything within the same btree node; in those cases we're relying on
 * whatever locking exists for where those references live, i.e. the write lock
 * on a btree node.
 *
 * That locking is also required to ensure GC doesn't pass the updater in
 * between the updater adding/removing the reference and updating the GC marks;
 * without that, we would at best double count sometimes.
 *
 * That part is important - whenever calling bch2_mark_pointers(), a lock _must_
 * be held that prevents GC from passing the position the updater is at.
 *
 * (What about the start of gc, when we're clearing all the marks? GC clears the
 * mark with the gc pos seqlock held, and bch_mark_bucket checks against the gc
 * position inside its cmpxchg loop, so crap magically works).
 */

/* Position of (the start of) a gc phase: */
static inline struct gc_pos gc_phase(enum gc_phase phase)
{
	return (struct gc_pos) {
		.phase	= phase,
		.pos	= POS_MIN,
		.level	= 0,
	};
}

static inline int gc_pos_cmp(struct gc_pos l, struct gc_pos r)
{
	if (l.phase != r.phase)
		return l.phase < r.phase ? -1 : 1;
	if (bkey_cmp(l.pos, r.pos))
		return bkey_cmp(l.pos, r.pos);
	if (l.level != r.level)
		return l.level < r.level ? -1 : 1;
	return 0;
}

static inline enum gc_phase btree_id_to_gc_phase(enum btree_id id)
{
	switch (id) {
#define x(n, v, s) case BTREE_ID_##n: return GC_PHASE_BTREE_##n;
	BCH_BTREE_IDS()
#undef x
	default:
		BUG();
	}
}

static inline struct gc_pos gc_pos_btree(enum btree_id id,
					 struct bpos pos, unsigned level)
{
	return (struct gc_pos) {
		.phase	= btree_id_to_gc_phase(id),
		.pos	= pos,
		.level	= level,
	};
}

/*
 * GC position of the pointers within a btree node: note, _not_ for &b->key
 * itself, that lives in the parent node:
 */
static inline struct gc_pos gc_pos_btree_node(struct btree *b)
{
	return gc_pos_btree(b->btree_id, b->key.k.p, b->level);
}

/*
 * GC position of the pointer to a btree root: we don't use
 * gc_pos_pointer_to_btree_node() here to avoid a potential race with
 * btree_split() increasing the tree depth - the new root will have level > the
 * old root and thus have a greater gc position than the old root, but that
 * would be incorrect since once gc has marked the root it's not coming back.
 */
static inline struct gc_pos gc_pos_btree_root(enum btree_id id)
{
	return gc_pos_btree(id, POS_MAX, BTREE_MAX_DEPTH);
}

static inline struct gc_pos gc_pos_alloc(struct bch_fs *c, struct open_bucket *ob)
{
	return (struct gc_pos) {
		.phase	= GC_PHASE_ALLOC,
		.pos	= POS(ob ? ob - c->open_buckets : 0, 0),
	};
}

static inline bool gc_visited(struct bch_fs *c, struct gc_pos pos)
{
	unsigned seq;
	bool ret;

	do {
		seq = read_seqcount_begin(&c->gc_pos_lock);
		ret = gc_pos_cmp(pos, c->gc_pos) <= 0;
	} while (read_seqcount_retry(&c->gc_pos_lock, seq));

	return ret;
}

#endif /* _BCACHEFS_BTREE_GC_H */