diff options
-rw-r--r-- | fs/bcachefs/btree_journal_iter.c | 23 | ||||
-rw-r--r-- | fs/bcachefs/btree_journal_iter.h | 15 | ||||
-rw-r--r-- | fs/bcachefs/btree_trans_commit.c | 26 | ||||
-rw-r--r-- | fs/bcachefs/btree_update.h | 14 | ||||
-rw-r--r-- | fs/bcachefs/recovery.c | 72 |
5 files changed, 126 insertions, 24 deletions
diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c index 332dbf164929..74933490aaba 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c @@ -16,21 +16,6 @@ * operations for the regular btree iter code to use: */ -static int __journal_key_cmp(enum btree_id l_btree_id, - unsigned l_level, - struct bpos l_pos, - const struct journal_key *r) -{ - return (cmp_int(l_btree_id, r->btree_id) ?: - cmp_int(l_level, r->level) ?: - bpos_cmp(l_pos, r->k->k.p)); -} - -static int journal_key_cmp(const struct journal_key *l, const struct journal_key *r) -{ - return __journal_key_cmp(l->btree_id, l->level, l->k->k.p, r); -} - static inline size_t idx_to_pos(struct journal_keys *keys, size_t idx) { size_t gap_size = keys->size - keys->nr; @@ -548,7 +533,13 @@ static void __journal_keys_sort(struct journal_keys *keys) struct journal_key *dst = keys->data; darray_for_each(*keys, src) { - if (src + 1 < &darray_top(*keys) && + /* + * We don't accumulate accounting keys here because we have to + * compare each individual accounting key against the version in + * the btree during replay: + */ + if (src->k->k.type != KEY_TYPE_accounting && + src + 1 < &darray_top(*keys) && !journal_key_cmp(src, src + 1)) continue; diff --git a/fs/bcachefs/btree_journal_iter.h b/fs/bcachefs/btree_journal_iter.h index 1ba4a79b0ef9..5b66c8f85fc1 100644 --- a/fs/bcachefs/btree_journal_iter.h +++ b/fs/bcachefs/btree_journal_iter.h @@ -26,6 +26,21 @@ struct btree_and_journal_iter { bool prefetch; }; +static inline int __journal_key_cmp(enum btree_id l_btree_id, + unsigned l_level, + struct bpos l_pos, + const struct journal_key *r) +{ + return (cmp_int(l_btree_id, r->btree_id) ?: + cmp_int(l_level, r->level) ?: + bpos_cmp(l_pos, r->k->k.p)); +} + +static inline int journal_key_cmp(const struct journal_key *l, const struct journal_key *r) +{ + return __journal_key_cmp(l->btree_id, l->level, l->k->k.p, r); +} + struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *, enum btree_id, unsigned, struct bpos, struct bpos, size_t *); struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *, enum btree_id, diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 5e67dcb30f33..05e819174697 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -785,8 +785,15 @@ revert_fs_usage: static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans) { + /* + * Accounting keys aren't deduped in the journal: we have to compare + * each individual update against what's in the btree to see if it has + * been applied yet, and accounting updates also don't overwrite, + * they're deltas that accumulate. + */ trans_for_each_update(trans, i) - bch2_journal_key_overwritten(trans->c, i->btree_id, i->level, i->k->k.p); + if (i->k->k.type != KEY_TYPE_accounting) + bch2_journal_key_overwritten(trans->c, i->btree_id, i->level, i->k->k.p); } static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, @@ -993,15 +1000,24 @@ static noinline int do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans) { struct bch_fs *c = trans->c; - int ret = 0; trans_for_each_update(trans, i) { - ret = bch2_journal_key_insert(c, i->btree_id, i->level, i->k); + int ret = bch2_journal_key_insert(c, i->btree_id, i->level, i->k); if (ret) - break; + return ret; } - return ret; + for (struct jset_entry *i = trans->journal_entries; + i != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s); + i = vstruct_next(i)) + if (i->type == BCH_JSET_ENTRY_btree_keys || + i->type == BCH_JSET_ENTRY_write_buffer_keys) { + int ret = bch2_journal_key_insert(c, i->btree_id, i->level, i->start); + if (ret) + return ret; + } + + return 0; } int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index cbe0ee3c7168..3758af7bbde8 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -130,7 +130,19 @@ static inline int __must_check bch2_trans_update_buffered(struct btree_trans *tr enum btree_id btree, struct bkey_i *k) { - if (unlikely(trans->journal_replay_not_finished)) + /* + * Most updates skip the btree write buffer until journal replay is + * finished because synchronization with journal replay relies on having + * a btree node locked - if we're overwriting a key in the journal that + * journal replay hasn't yet replayed, we have to mark it as + * overwritten. + * + * But accounting updates don't overwrite, they're deltas, and they have + * to be flushed to the btree strictly in order for journal replay to be + * able to tell which updates need to be applied: + */ + if (k->k.type != KEY_TYPE_accounting && + unlikely(trans->journal_replay_not_finished)) return bch2_btree_insert_clone_trans(trans, btree, k); struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(k->k.u64s)); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index d336a7c69edd..0091af3beeef 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -10,6 +10,7 @@ #include "btree_io.h" #include "buckets.h" #include "dirent.h" +#include "disk_accounting.h" #include "errcode.h" #include "error.h" #include "fs-common.h" @@ -135,6 +136,47 @@ static void replay_now_at(struct journal *j, u64 seq) bch2_journal_pin_put(j, j->replay_journal_seq++); } +static int bch2_journal_replay_accounting_key(struct btree_trans *trans, + struct journal_key *k) +{ + struct journal_keys *keys = &trans->c->journal_keys; + + struct btree_iter iter; + bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, + BTREE_MAX_DEPTH, k->level, + BTREE_ITER_intent); + int ret = bch2_btree_iter_traverse(&iter); + if (ret) + goto out; + + struct bkey u; + struct bkey_s_c old = bch2_btree_path_peek_slot(btree_iter_path(trans, &iter), &u); + + /* Has this delta already been applied to the btree? */ + if (bversion_cmp(old.k->version, k->k->k.version) >= 0) { + ret = 0; + goto out; + } + + struct bkey_i *new = k->k; + if (old.k->type == KEY_TYPE_accounting) { + new = bch2_bkey_make_mut_noupdate(trans, bkey_i_to_s_c(k->k)); + ret = PTR_ERR_OR_ZERO(new); + if (ret) + goto out; + + bch2_accounting_accumulate(bkey_i_to_accounting(new), + bkey_s_c_to_accounting(old)); + } + + trans->journal_res.seq = k->journal_seq; + + ret = bch2_trans_update(trans, &iter, new, BTREE_TRIGGER_norun); +out: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + static int bch2_journal_replay_key(struct btree_trans *trans, struct journal_key *k) { @@ -185,6 +227,11 @@ static int bch2_journal_replay_key(struct btree_trans *trans, if (k->overwritten) goto out; + if (k->k->k.type == KEY_TYPE_accounting) { + ret = bch2_trans_update_buffered(trans, BTREE_ID_accounting, k->k); + goto out; + } + ret = bch2_trans_update(trans, &iter, k->k, update_flags); out: bch2_trans_iter_exit(trans, &iter); @@ -223,6 +270,27 @@ int bch2_journal_replay(struct bch_fs *c) trans = bch2_trans_get(c); /* + * Replay accounting keys first: we can't allow the write buffer to + * flush accounting keys until we're done + */ + darray_for_each(*keys, k) { + if (!(k->k->k.type == KEY_TYPE_accounting && !k->allocated)) + continue; + + cond_resched(); + + ret = commit_do(trans, NULL, NULL, + BCH_TRANS_COMMIT_no_enospc| + BCH_TRANS_COMMIT_journal_reclaim| + BCH_TRANS_COMMIT_no_journal_res, + bch2_journal_replay_accounting_key(trans, k)); + if (bch2_fs_fatal_err_on(ret, c, "error replaying accounting; %s", bch2_err_str(ret))) + goto err; + + k->overwritten = true; + } + + /* * First, attempt to replay keys in sorted order. This is more * efficient - better locality of btree access - but some might fail if * that would cause a journal deadlock. @@ -244,7 +312,7 @@ int bch2_journal_replay(struct bch_fs *c) BCH_TRANS_COMMIT_journal_reclaim| (!k->allocated ? BCH_TRANS_COMMIT_no_journal_res : 0), bch2_journal_replay_key(trans, k)); - BUG_ON(!ret && !k->overwritten); + BUG_ON(!ret && !k->overwritten && k->k->k.type != KEY_TYPE_accounting); if (ret) { ret = darray_push(&keys_sorted, k); if (ret) @@ -281,7 +349,7 @@ int bch2_journal_replay(struct bch_fs *c) if (ret) goto err; - BUG_ON(!k->overwritten); + BUG_ON(k->btree_id != BTREE_ID_accounting && !k->overwritten); } /* |