diff options
author | Kent Overstreet <kent.overstreet@linux.dev> | 2023-07-12 19:55:03 +0200 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 23:10:07 +0200 |
commit | 8479938d7a0f6c6cf6362c72880e753b3d7a707a (patch) | |
tree | 2fcef58d98c02334138389be77b5177437e30a70 /fs/bcachefs | |
parent | bcachefs: Add a race_fault() for write buffer slowpath (diff) | |
download | linux-8479938d7a0f6c6cf6362c72880e753b3d7a707a.tar.xz linux-8479938d7a0f6c6cf6362c72880e753b3d7a707a.zip |
bcachefs: Convert snapshot table to RCU array
This switches the generic radix tree for the in-memory table of snapshot
nodes to a simple rcu array. This means we have to add new locking to
deal with reallocations, but is faster than traversing the radix tree.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs')
-rw-r--r-- | fs/bcachefs/bcachefs.h | 5 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_leaf.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/fsck.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/quota.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/subvolume.c | 142 | ||||
-rw-r--r-- | fs/bcachefs/subvolume.h | 95 | ||||
-rw-r--r-- | fs/bcachefs/subvolume_types.h | 4 |
7 files changed, 207 insertions, 47 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index d8c020644f54..445d010c83b3 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -774,9 +774,10 @@ struct bch_fs { struct mutex sb_lock; /* snapshot.c: */ - GENRADIX(struct snapshot_t) snapshots; - struct bch_snapshot_table __rcu *snapshot_table; + struct snapshot_table __rcu *snapshots; + size_t snapshot_table_size; struct mutex snapshot_table_lock; + struct work_struct snapshot_delete_work; struct work_struct snapshot_wait_for_pagecache_and_delete_work; snapshot_id_list snapshots_unlinked; diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index 53219fdcff66..3638cef211b2 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -311,7 +311,7 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans, !(i->flags & BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) && test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags) && i->k->k.p.snapshot && - bch2_snapshot_internal_node(trans->c, i->k->k.p.snapshot)); + bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot)); } static noinline int @@ -1229,7 +1229,7 @@ static inline int check_pos_snapshot_overwritten(struct btree_trans *trans, struct bpos pos) { if (!btree_type_has_snapshots(id) || - !snapshot_t(trans->c, pos.snapshot)->children[0]) + bch2_snapshot_is_leaf(trans->c, pos.snapshot)) return 0; return __check_pos_snapshot_overwritten(trans, id, pos); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index ddc2782fc5b1..bc769b3e932a 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -894,7 +894,7 @@ static int check_inode(struct btree_trans *trans, * particular is not atomic, so on the internal snapshot nodes * we can see inodes marked for deletion after a clean shutdown */ - if (bch2_snapshot_internal_node(c, k.k->p.snapshot)) + if (bch2_snapshot_is_internal_node(c, k.k->p.snapshot)) return 0; if (!bkey_is_inode(k.k)) diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index d90db3fb823e..4f0654ff816f 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -562,7 +562,7 @@ static int bch2_fs_quota_read_inode(struct btree_trans *trans, int ret; ret = bch2_snapshot_tree_lookup(trans, - snapshot_t(c, k.k->p.snapshot)->tree, &s_t); + bch2_snapshot_tree(c, k.k->p.snapshot), &s_t); bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, "%s: snapshot tree %u not found", __func__, snapshot_t(c, k.k->p.snapshot)->tree); diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index cdaaf49d3b3e..c2c2cfd74e71 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -12,9 +12,9 @@ static int bch2_subvolume_delete(struct btree_trans *, u32); -static inline u32 get_ancestor_below(struct bch_fs *c, u32 id, u32 ancestor) +static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor) { - struct snapshot_t *s = snapshot_t(c, id); + const struct snapshot_t *s = __snapshot_t(t, id); if (s->skip[2] <= ancestor) return s->skip[2]; @@ -27,22 +27,102 @@ static inline u32 get_ancestor_below(struct bch_fs *c, u32 id, u32 ancestor) bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) { + struct snapshot_table *t; + EBUG_ON(c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_snapshots); + rcu_read_lock(); + t = rcu_dereference(c->snapshots); + while (id && id < ancestor) - id = get_ancestor_below(c, id, ancestor); + id = get_ancestor_below(t, id, ancestor); + rcu_read_unlock(); return id == ancestor; } static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor) { + struct snapshot_table *t; + + rcu_read_lock(); + t = rcu_dereference(c->snapshots); + while (id && id < ancestor) - id = snapshot_t(c, id)->parent; + id = __snapshot_t(t, id)->parent; + rcu_read_unlock(); return id == ancestor; } +static inline u32 bch2_snapshot_depth(struct bch_fs *c, u32 parent) +{ + u32 depth; + + rcu_read_lock(); + depth = parent ? snapshot_t(c, parent)->depth + 1 : 0; + rcu_read_unlock(); + + return depth; +} + +struct snapshot_t_free_rcu { + struct rcu_head rcu; + struct snapshot_table *t; +}; + +static void snapshot_t_free_rcu(struct rcu_head *rcu) +{ + struct snapshot_t_free_rcu *free_rcu = + container_of(rcu, struct snapshot_t_free_rcu, rcu); + + kvfree(free_rcu->t); + kfree(free_rcu); +} + +static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id) +{ + size_t idx = U32_MAX - id; + size_t new_size; + struct snapshot_table *new, *old; + + new_size = max(16UL, roundup_pow_of_two(idx + 1)); + + new = kvzalloc(struct_size(new, s, new_size), GFP_KERNEL); + if (!new) + return NULL; + + old = rcu_dereference_protected(c->snapshots, true); + if (old) + memcpy(new->s, + rcu_dereference_protected(c->snapshots, true)->s, + sizeof(new->s[0]) * c->snapshot_table_size); + + rcu_assign_pointer(c->snapshots, new); + c->snapshot_table_size = new_size; + if (old) { + struct snapshot_t_free_rcu *rcu = + kmalloc(sizeof(*rcu), GFP_KERNEL|__GFP_NOFAIL); + + rcu->t = old; + call_rcu(&rcu->rcu, snapshot_t_free_rcu); + } + + return &rcu_dereference_protected(c->snapshots, true)->s[idx]; +} + +static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id) +{ + size_t idx = U32_MAX - id; + + lockdep_assert_held(&c->snapshot_table_lock); + + if (likely(idx < c->snapshot_table_size)) + return &rcu_dereference_protected(c->snapshots, true)->s[idx]; + + return __snapshot_t_mut(c, id); +} + /* Snapshot tree: */ void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c, @@ -209,12 +289,15 @@ int bch2_mark_snapshot(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct snapshot_t *t; + int ret = 0; - t = genradix_ptr_alloc(&c->snapshots, - U32_MAX - new.k->p.offset, - GFP_KERNEL); - if (!t) - return -BCH_ERR_ENOMEM_mark_snapshot; + mutex_lock(&c->snapshot_table_lock); + + t = snapshot_t_mut(c, new.k->p.offset); + if (!t) { + ret = -BCH_ERR_ENOMEM_mark_snapshot; + goto err; + } if (new.k->type == KEY_TYPE_snapshot) { struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new); @@ -246,8 +329,9 @@ int bch2_mark_snapshot(struct btree_trans *trans, t->subvol = 0; t->tree = 0; } - - return 0; +err: + mutex_unlock(&c->snapshot_table_lock); + return ret; } static int snapshot_lookup(struct btree_trans *trans, u32 id, @@ -300,9 +384,14 @@ static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k) nr_live += ret; } - snapshot_t(c, id)->equiv = nr_live == 1 - ? snapshot_t(c, child[live_idx])->equiv + mutex_lock(&c->snapshot_table_lock); + + snapshot_t_mut(c, id)->equiv = nr_live == 1 + ? snapshot_t_mut(c, child[live_idx])->equiv : id; + + mutex_unlock(&c->snapshot_table_lock); + return 0; } @@ -520,16 +609,18 @@ static int snapshot_tree_ptr_good(struct btree_trans *trans, static u32 snapshot_skiplist_get(struct bch_fs *c, u32 id) { - struct snapshot_t *s; + const struct snapshot_t *s; if (!id) return 0; + rcu_read_lock(); s = snapshot_t(c, id); - if (!s->parent) - return id; + if (s->parent) + id = bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth)); + rcu_read_unlock(); - return bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth)); + return id; } static int snapshot_skiplist_good(struct btree_trans *trans, struct bch_snapshot s) @@ -633,9 +724,6 @@ static int check_snapshot(struct btree_trans *trans, struct bkey_i_snapshot *u; u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset); u32 real_depth; - struct snapshot_t *parent = parent_id - ? snapshot_t(c, parent_id) - : NULL; struct printbuf buf = PRINTBUF; bool should_have_subvol; u32 i, id; @@ -726,7 +814,7 @@ static int check_snapshot(struct btree_trans *trans, } ret = 0; - real_depth = parent ? parent->depth + 1 : 0; + real_depth = bch2_snapshot_depth(c, parent_id); if (le32_to_cpu(s.depth) != real_depth && (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists || @@ -823,9 +911,13 @@ static int check_subvol(struct btree_trans *trans, if (!BCH_SUBVOLUME_SNAP(subvol.v)) { u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot)); - u32 snapshot_tree = snapshot_t(c, snapshot_root)->tree; + u32 snapshot_tree; struct bch_snapshot_tree st; + rcu_read_lock(); + snapshot_tree = snapshot_t(c, snapshot_root)->tree; + rcu_read_unlock(); + ret = bch2_snapshot_tree_lookup(trans, snapshot_tree, &st); bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, @@ -869,7 +961,7 @@ int bch2_check_subvols(struct bch_fs *c) void bch2_fs_snapshots_exit(struct bch_fs *c) { - genradix_free(&c->snapshots); + kfree(rcu_dereference_protected(c->snapshots, true)); } int bch2_snapshots_read(struct bch_fs *c) @@ -1011,7 +1103,7 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree, struct bkey_i_snapshot *n; struct bkey_s_c k; unsigned i, j; - u32 depth = parent ? snapshot_t(c, parent)->depth + 1 : 0; + u32 depth = bch2_snapshot_depth(c, parent); int ret; bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, @@ -1150,7 +1242,7 @@ static int snapshot_delete_key(struct btree_trans *trans, struct bpos *last_pos) { struct bch_fs *c = trans->c; - u32 equiv = snapshot_t(c, k.k->p.snapshot)->equiv; + u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot); if (!bkey_eq(k.k->p, *last_pos)) equiv_seen->nr = 0; diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h index ab0b4a6de255..12a08a34e9bb 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h @@ -32,17 +32,40 @@ int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned, .min_val_size = 24, \ }) -static inline struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id) +static inline struct snapshot_t *__snapshot_t(struct snapshot_table *t, u32 id) { - return genradix_ptr(&c->snapshots, U32_MAX - id); + return &t->s[U32_MAX - id]; } -static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id) +static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id) +{ + return __snapshot_t(rcu_dereference(c->snapshots), id); +} + +static inline u32 bch2_snapshot_tree(struct bch_fs *c, u32 id) +{ + rcu_read_lock(); + id = snapshot_t(c, id)->tree; + rcu_read_unlock(); + + return id; +} + +static inline u32 __bch2_snapshot_parent_early(struct bch_fs *c, u32 id) { return snapshot_t(c, id)->parent; } -static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id) +static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id) +{ + rcu_read_lock(); + id = __bch2_snapshot_parent_early(c, id); + rcu_read_unlock(); + + return id; +} + +static inline u32 __bch2_snapshot_parent(struct bch_fs *c, u32 id) { #ifdef CONFIG_BCACHEFS_DEBUG u32 parent = snapshot_t(c, id)->parent; @@ -59,10 +82,21 @@ static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id) #endif } +static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id) +{ + rcu_read_lock(); + id = __bch2_snapshot_parent(c, id); + rcu_read_unlock(); + + return id; +} + static inline u32 bch2_snapshot_nth_parent(struct bch_fs *c, u32 id, u32 n) { + rcu_read_lock(); while (n--) - id = bch2_snapshot_parent(c, id); + id = __bch2_snapshot_parent(c, id); + rcu_read_unlock(); return id; } @@ -71,37 +105,60 @@ static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id) { u32 parent; - while ((parent = bch2_snapshot_parent(c, id))) + rcu_read_lock(); + while ((parent = __bch2_snapshot_parent(c, id))) id = parent; + rcu_read_unlock(); + return id; } -static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id) +static inline u32 __bch2_snapshot_equiv(struct bch_fs *c, u32 id) { return snapshot_t(c, id)->equiv; } +static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id) +{ + rcu_read_lock(); + id = __bch2_snapshot_equiv(c, id); + rcu_read_unlock(); + + return id; +} + static inline bool bch2_snapshot_is_equiv(struct bch_fs *c, u32 id) { - return id == snapshot_t(c, id)->equiv; + return id == bch2_snapshot_equiv(c, id); } -static inline u32 bch2_snapshot_internal_node(struct bch_fs *c, u32 id) +static inline bool bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id) { - struct snapshot_t *s = snapshot_t(c, id); + const struct snapshot_t *s; + bool ret; + + rcu_read_lock(); + s = snapshot_t(c, id); + ret = s->children[0]; + rcu_read_unlock(); - return s->children[0] || s->children[1]; + return ret; +} + +static inline u32 bch2_snapshot_is_leaf(struct bch_fs *c, u32 id) +{ + return !bch2_snapshot_is_internal_node(c, id); } static inline u32 bch2_snapshot_sibling(struct bch_fs *c, u32 id) { - struct snapshot_t *s; - u32 parent = bch2_snapshot_parent(c, id); + const struct snapshot_t *s; + u32 parent = __bch2_snapshot_parent(c, id); if (!parent) return 0; - s = snapshot_t(c, bch2_snapshot_parent(c, id)); + s = snapshot_t(c, __bch2_snapshot_parent(c, id)); if (id == s->children[0]) return s->children[1]; if (id == s->children[1]) @@ -113,9 +170,15 @@ bool bch2_snapshot_is_ancestor(struct bch_fs *, u32, u32); static inline bool bch2_snapshot_has_children(struct bch_fs *c, u32 id) { - struct snapshot_t *t = snapshot_t(c, id); + const struct snapshot_t *t; + bool ret; - return (t->children[0]|t->children[1]) != 0; + rcu_read_lock(); + t = snapshot_t(c, id); + ret = (t->children[0]|t->children[1]) != 0; + rcu_read_unlock(); + + return ret; } static inline bool snapshot_list_has_id(snapshot_id_list *s, u32 id) diff --git a/fs/bcachefs/subvolume_types.h b/fs/bcachefs/subvolume_types.h index 750d975ac468..c596e4270690 100644 --- a/fs/bcachefs/subvolume_types.h +++ b/fs/bcachefs/subvolume_types.h @@ -16,6 +16,10 @@ struct snapshot_t { u32 equiv; }; +struct snapshot_table { + struct snapshot_t s[0]; +}; + typedef struct { u32 subvol; u64 inum; |