diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/bcachefs/Makefile | 1 | ||||
-rw-r--r-- | fs/bcachefs/alloc_background.c | 104 | ||||
-rw-r--r-- | fs/bcachefs/alloc_background.h | 16 | ||||
-rw-r--r-- | fs/bcachefs/alloc_foreground.c | 23 | ||||
-rw-r--r-- | fs/bcachefs/backpointers.c | 799 | ||||
-rw-r--r-- | fs/bcachefs/backpointers.h | 131 | ||||
-rw-r--r-- | fs/bcachefs/bcachefs.h | 7 | ||||
-rw-r--r-- | fs/bcachefs/bcachefs_format.h | 62 | ||||
-rw-r--r-- | fs/bcachefs/bkey_methods.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/buckets.c | 48 | ||||
-rw-r--r-- | fs/bcachefs/buckets.h | 19 | ||||
-rw-r--r-- | fs/bcachefs/errcode.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/recovery.c | 35 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 2 |
14 files changed, 1186 insertions, 66 deletions
diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile index c0e715760c8b..456d540441ce 100644 --- a/fs/bcachefs/Makefile +++ b/fs/bcachefs/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_BCACHEFS_FS) += bcachefs.o bcachefs-y := \ alloc_background.o \ alloc_foreground.o \ + backpointers.o \ bkey.o \ bkey_methods.o \ bkey_sort.o \ diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index f75d05beaf31..58ec650a512c 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -2,6 +2,7 @@ #include "bcachefs.h" #include "alloc_background.h" #include "alloc_foreground.h" +#include "backpointers.h" #include "btree_cache.h" #include "btree_io.h" #include "btree_key_cache.h" @@ -266,12 +267,34 @@ int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k, { struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k); - if (bkey_val_bytes(k.k) != sizeof(struct bch_alloc_v4)) { - prt_printf(err, "bad val size (%zu != %zu)", - bkey_val_bytes(k.k), sizeof(struct bch_alloc_v4)); + if (alloc_v4_u64s(a.v) != bkey_val_u64s(k.k)) { + prt_printf(err, "bad val size (%lu != %u)", + bkey_val_u64s(k.k), alloc_v4_u64s(a.v)); return -BCH_ERR_invalid_bkey; } + if (!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) && + BCH_ALLOC_V4_NR_BACKPOINTERS(a.v)) { + prt_printf(err, "invalid backpointers_start"); + return -BCH_ERR_invalid_bkey; + } + + /* + * XXX this is wrong, we'll be checking updates that happened from + * before BCH_FS_CHECK_BACKPOINTERS_DONE + */ + if (rw == WRITE && test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) { + unsigned i, bp_len = 0; + + for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a.v); i++) + bp_len += alloc_v4_backpointers_c(a.v)[i].bucket_len; + + if (bp_len > a.v->dirty_sectors) { + prt_printf(err, "too many backpointers"); + return -BCH_ERR_invalid_bkey; + } + } + if (rw == WRITE) { if (alloc_data_type(*a.v, a.v->data_type) != a.v->data_type) { prt_printf(err, "invalid data type (got %u should be %u)", @@ -328,9 +351,19 @@ int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k, return 0; } +static inline u64 swab40(u64 x) +{ + return (((x & 0x00000000ffULL) << 32)| + ((x & 0x000000ff00ULL) << 16)| + ((x & 0x0000ff0000ULL) >> 0)| + ((x & 0x00ff000000ULL) >> 16)| + ((x & 0xff00000000ULL) >> 32)); +} + void bch2_alloc_v4_swab(struct bkey_s k) { struct bch_alloc_v4 *a = bkey_s_to_alloc_v4(k).v; + struct bch_backpointer *bp, *bps; a->journal_seq = swab64(a->journal_seq); a->flags = swab32(a->flags); @@ -340,12 +373,20 @@ void bch2_alloc_v4_swab(struct bkey_s k) a->io_time[1] = swab64(a->io_time[1]); a->stripe = swab32(a->stripe); a->nr_external_backpointers = swab32(a->nr_external_backpointers); + + bps = alloc_v4_backpointers(a); + for (bp = bps; bp < bps + BCH_ALLOC_V4_NR_BACKPOINTERS(a); bp++) { + bp->bucket_offset = swab40(bp->bucket_offset); + bp->bucket_len = swab32(bp->bucket_len); + bch2_bpos_swab(&bp->pos); + } } void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { struct bch_alloc_v4 _a; const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a); + unsigned i; prt_newline(out); printbuf_indent_add(out, 2); @@ -374,14 +415,25 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c prt_newline(out); prt_printf(out, "io_time[WRITE] %llu", a->io_time[WRITE]); prt_newline(out); - prt_printf(out, "backpointers: %llu", BCH_ALLOC_V4_NR_BACKPOINTERS(a)); + prt_printf(out, "bp_start %llu", BCH_ALLOC_V4_BACKPOINTERS_START(a)); + prt_newline(out); - printbuf_indent_sub(out, 2); -} + if (BCH_ALLOC_V4_NR_BACKPOINTERS(a)) { + struct bkey_s_c_alloc_v4 a_raw = bkey_s_c_to_alloc_v4(k); + const struct bch_backpointer *bps = alloc_v4_backpointers_c(a_raw.v); -static inline void *alloc_v4_backpointers(struct bch_alloc_v4 *a) -{ - return (void *) ((u64 *) &a->v + BCH_ALLOC_V4_BACKPOINTERS_START(a)); + prt_printf(out, "backpointers: %llu", BCH_ALLOC_V4_NR_BACKPOINTERS(a_raw.v)); + printbuf_indent_add(out, 2); + + for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a_raw.v); i++) { + prt_newline(out); + bch2_backpointer_to_text(out, &bps[i]); + } + + printbuf_indent_sub(out, 2); + } + + printbuf_indent_sub(out, 2); } void __bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out) @@ -422,12 +474,18 @@ static noinline struct bkey_i_alloc_v4 * __bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k) { struct bkey_i_alloc_v4 *ret; - if (k.k->type == KEY_TYPE_alloc_v4) { - unsigned bytes = min(sizeof(struct bkey_i_alloc_v4), bkey_bytes(k.k)); + struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k); + unsigned bytes = sizeof(struct bkey_i_alloc_v4) + + BCH_ALLOC_V4_NR_BACKPOINTERS(a.v) * + sizeof(struct bch_backpointer); void *src, *dst; - ret = bch2_trans_kmalloc(trans, bytes); + /* + * Reserve space for one more backpointer here: + * Not sketchy at doing it this way, nope... + */ + ret = bch2_trans_kmalloc(trans, bytes + sizeof(struct bch_backpointer)); if (IS_ERR(ret)) return ret; @@ -437,16 +495,20 @@ __bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k) SET_BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v, BCH_ALLOC_V4_U64s); dst = alloc_v4_backpointers(&ret->v); + memmove(dst, src, BCH_ALLOC_V4_NR_BACKPOINTERS(&ret->v) * + sizeof(struct bch_backpointer)); if (src < dst) memset(src, 0, dst - src); set_alloc_v4_u64s(ret); } else { - ret = bch2_trans_kmalloc(trans, sizeof(*ret)); - if (!IS_ERR(ret)) { - bkey_alloc_v4_init(&ret->k_i); - ret->k.p = k.k->p; - bch2_alloc_to_v4(k, &ret->v); - } + ret = bch2_trans_kmalloc(trans, sizeof(struct bkey_i_alloc_v4) + + sizeof(struct bch_backpointer)); + if (IS_ERR(ret)) + return ret; + + bkey_alloc_v4_init(&ret->k_i); + ret->k.p = k.k->p; + bch2_alloc_to_v4(k, &ret->v); } return ret; } @@ -455,8 +517,12 @@ static inline struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut_inlined(struct btree_ { if (likely(k.k->type == KEY_TYPE_alloc_v4) && BCH_ALLOC_V4_BACKPOINTERS_START(bkey_s_c_to_alloc_v4(k).v) == BCH_ALLOC_V4_U64s) { + /* + * Reserve space for one more backpointer here: + * Not sketchy at doing it this way, nope... + */ struct bkey_i_alloc_v4 *ret = - bch2_trans_kmalloc_nomemzero(trans, bkey_bytes(k.k)); + bch2_trans_kmalloc_nomemzero(trans, bkey_bytes(k.k) + sizeof(struct bch_backpointer)); if (!IS_ERR(ret)) bkey_reassemble(&ret->k_i, k); return ret; diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index c562aff3ac33..b843316d3846 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -73,7 +73,9 @@ static inline struct bpos alloc_freespace_pos(struct bpos pos, struct bch_alloc_ static inline unsigned alloc_v4_u64s(const struct bch_alloc_v4 *a) { unsigned ret = (BCH_ALLOC_V4_BACKPOINTERS_START(a) ?: - BCH_ALLOC_V4_U64s_V0); + BCH_ALLOC_V4_U64s_V0) + + BCH_ALLOC_V4_NR_BACKPOINTERS(a) * + (sizeof(struct bch_backpointer) / sizeof(u64)); BUG_ON(ret > U8_MAX - BKEY_U64s); return ret; @@ -175,6 +177,18 @@ static inline u64 should_invalidate_buckets(struct bch_dev *ca, void bch2_do_invalidates(struct bch_fs *); +static inline struct bch_backpointer *alloc_v4_backpointers(struct bch_alloc_v4 *a) +{ + return (void *) ((u64 *) &a->v + + (BCH_ALLOC_V4_BACKPOINTERS_START(a) ?: + BCH_ALLOC_V4_U64s_V0)); +} + +static inline const struct bch_backpointer *alloc_v4_backpointers_c(const struct bch_alloc_v4 *a) +{ + return (void *) ((u64 *) &a->v + BCH_ALLOC_V4_BACKPOINTERS_START(a)); +} + int bch2_fs_freespace_init(struct bch_fs *); void bch2_recalc_capacity(struct bch_fs *); diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index ba14cfe06515..5988aa288c98 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -14,6 +14,7 @@ #include "bcachefs.h" #include "alloc_background.h" #include "alloc_foreground.h" +#include "backpointers.h" #include "btree_iter.h" #include "btree_update.h" #include "btree_gc.h" @@ -346,6 +347,28 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc } + if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) { + struct bch_backpointer bp; + u64 bp_offset = 0; + + ret = bch2_get_next_backpointer(trans, POS(ca->dev_idx, b), -1, + &bp_offset, &bp); + if (ret) { + ob = ERR_PTR(ret); + goto err; + } + + if (bp_offset != U64_MAX) { + /* + * Bucket may have data in it - we don't call + * bc2h_trans_inconnsistent() because fsck hasn't + * finished yet + */ + ob = NULL; + goto err; + } + } + ob = __try_alloc_bucket(c, ca, b, reserve, a, s, cl); if (!ob) iter.path->preserve = false; diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c new file mode 100644 index 000000000000..6efc286cd6ba --- /dev/null +++ b/fs/bcachefs/backpointers.c @@ -0,0 +1,799 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bcachefs.h" +#include "alloc_background.h" +#include "backpointers.h" +#include "btree_cache.h" +#include "btree_update.h" +#include "error.h" + +static bool extent_matches_bp(struct bch_fs *c, + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, + struct bpos bucket, + struct bch_backpointer bp) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + struct bpos bucket2; + struct bch_backpointer bp2; + + if (p.ptr.cached) + continue; + + bch2_extent_ptr_to_bp(c, btree_id, level, k, p, + &bucket2, &bp2); + if (bpos_eq(bucket, bucket2) && + !memcmp(&bp, &bp2, sizeof(bp))) + return true; + } + + return false; +} + +int bch2_backpointer_invalid(const struct bch_fs *c, struct bkey_s_c k, + int rw, struct printbuf *err) +{ + struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); + struct bpos bucket = bp_pos_to_bucket(c, bp.k->p); + + if (bkey_val_bytes(bp.k) < sizeof(*bp.v)) { + prt_str(err, "incorrect value size"); + return -BCH_ERR_invalid_bkey; + } + + if (!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset))) { + prt_str(err, "backpointer at wrong pos"); + return -BCH_ERR_invalid_bkey; + } + + return 0; +} + +void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp) +{ + prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=", + bch2_btree_ids[bp->btree_id], + bp->level, + (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT), + (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), + bp->bucket_len); + bch2_bpos_to_text(out, bp->pos); +} + +void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) +{ + bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v); +} + +void bch2_backpointer_swab(struct bkey_s k) +{ + struct bkey_s_backpointer bp = bkey_s_to_backpointer(k); + + bp.v->bucket_offset = swab32(bp.v->bucket_offset); + bp.v->bucket_len = swab32(bp.v->bucket_len); + bch2_bpos_swab(&bp.v->pos); +} + +#define BACKPOINTER_OFFSET_MAX ((1ULL << 40) - 1) + +static inline int backpointer_cmp(struct bch_backpointer l, struct bch_backpointer r) +{ + return cmp_int(l.bucket_offset, r.bucket_offset); +} + +static int bch2_backpointer_del_by_offset(struct btree_trans *trans, + struct bpos bucket, + u64 bp_offset, + struct bch_backpointer bp) +{ + struct bch_fs *c = trans->c; + struct btree_iter iter; + struct bkey_s_c k; + int ret; + + if (bp_offset < BACKPOINTER_OFFSET_MAX) { + struct bch_backpointer *bps; + struct bkey_i_alloc_v4 *a; + unsigned i, nr; + + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, + bucket, + BTREE_ITER_INTENT| + BTREE_ITER_SLOTS| + BTREE_ITER_WITH_UPDATES); + k = bch2_btree_iter_peek_slot(&iter); + ret = bkey_err(k); + if (ret) + goto err; + + if (k.k->type != KEY_TYPE_alloc_v4) { + ret = -ENOENT; + goto err; + } + + a = bch2_alloc_to_v4_mut(trans, k); + ret = PTR_ERR_OR_ZERO(a); + if (ret) + goto err; + bps = alloc_v4_backpointers(&a->v); + nr = BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v); + + for (i = 0; i < nr; i++) { + if (bps[i].bucket_offset == bp_offset) + goto found; + if (bps[i].bucket_offset > bp_offset) + break; + } + + ret = -ENOENT; + goto err; +found: + if (memcmp(&bps[i], &bp, sizeof(bp))) { + ret = -ENOENT; + goto err; + } + array_remove_item(bps, nr, i); + SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v, nr); + set_alloc_v4_u64s(a); + ret = bch2_trans_update(trans, &iter, &a->k_i, 0); + } else { + bp_offset -= BACKPOINTER_OFFSET_MAX; + + bch2_trans_iter_init(trans, &iter, BTREE_ID_backpointers, + bucket_pos_to_bp(c, bucket, bp_offset), + BTREE_ITER_INTENT| + BTREE_ITER_SLOTS| + BTREE_ITER_WITH_UPDATES); + k = bch2_btree_iter_peek_slot(&iter); + ret = bkey_err(k); + if (ret) + goto err; + + if (k.k->type != KEY_TYPE_backpointer || + memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp))) { + ret = -ENOENT; + goto err; + } + + ret = bch2_btree_delete_at(trans, &iter, 0); + } +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +bool bch2_bucket_backpointer_del(struct btree_trans *trans, + struct bkey_i_alloc_v4 *a, + struct bch_backpointer bp) +{ + struct bch_backpointer *bps = alloc_v4_backpointers(&a->v); + unsigned i, nr = BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v); + + for (i = 0; i < nr; i++) { + int cmp = backpointer_cmp(bps[i], bp) ?: + memcmp(&bps[i], &bp, sizeof(bp)); + if (!cmp) { + array_remove_item(bps, nr, i); + SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v, nr); + set_alloc_v4_u64s(a); + return true; + } + if (cmp >= 0) + break; + } + + return false; +} + +static noinline int backpointer_mod_err(struct btree_trans *trans, + struct bch_backpointer bp, + struct bkey_s_c bp_k, + struct bkey_s_c orig_k, + bool insert) +{ + struct bch_fs *c = trans->c; + struct printbuf buf = PRINTBUF; + + if (insert) { + prt_printf(&buf, "existing backpointer found when inserting "); + bch2_backpointer_to_text(&buf, &bp); + prt_newline(&buf); + printbuf_indent_add(&buf, 2); + + prt_printf(&buf, "found "); + bch2_bkey_val_to_text(&buf, c, bp_k); + prt_newline(&buf); + + prt_printf(&buf, "for "); + bch2_bkey_val_to_text(&buf, c, orig_k); + + bch_err(c, "%s", buf.buf); + } else if (test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) { + prt_printf(&buf, "backpointer not found when deleting"); + prt_newline(&buf); + printbuf_indent_add(&buf, 2); + + prt_printf(&buf, "searching for "); + bch2_backpointer_to_text(&buf, &bp); + prt_newline(&buf); + + prt_printf(&buf, "got "); + bch2_bkey_val_to_text(&buf, c, bp_k); + prt_newline(&buf); + + prt_printf(&buf, "for "); + bch2_bkey_val_to_text(&buf, c, orig_k); + + bch_err(c, "%s", buf.buf); + } + + printbuf_exit(&buf); + + if (test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) { + bch2_inconsistent_error(c); + return -EIO; + } else { + return 0; + } +} + +int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, + struct bkey_i_alloc_v4 *a, + struct bch_backpointer bp, + struct bkey_s_c orig_k, + bool insert) +{ + struct bch_fs *c = trans->c; + struct bkey_i_backpointer *bp_k; + struct btree_iter bp_iter; + struct bkey_s_c k; + int ret; + + bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer)); + ret = PTR_ERR_OR_ZERO(bp_k); + if (ret) + return ret; + + bkey_backpointer_init(&bp_k->k_i); + bp_k->k.p = bucket_pos_to_bp(c, a->k.p, bp.bucket_offset); + bp_k->v = bp; + + if (!insert) { + bp_k->k.type = KEY_TYPE_deleted; + set_bkey_val_u64s(&bp_k->k, 0); + } + + bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, + bucket_pos_to_bp(c, a->k.p, bp.bucket_offset), + BTREE_ITER_INTENT| + BTREE_ITER_SLOTS| + BTREE_ITER_WITH_UPDATES); + k = bch2_btree_iter_peek_slot(&bp_iter); + ret = bkey_err(k); + if (ret) + goto err; + + if (insert + ? k.k->type + : (k.k->type != KEY_TYPE_backpointer || + memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp)))) { + ret = backpointer_mod_err(trans, bp, k, orig_k, insert); + if (ret) + goto err; + } + + ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0); +err: + bch2_trans_iter_exit(trans, &bp_iter); + return ret; +} + +/* + * Find the next backpointer >= *bp_offset: + */ +int bch2_get_next_backpointer(struct btree_trans *trans, + struct bpos bucket, int gen, + u64 *bp_offset, + struct bch_backpointer *dst) +{ + struct bch_fs *c = trans->c; + struct bpos bp_pos, bp_end_pos; + struct btree_iter alloc_iter, bp_iter = { NULL }; + struct bkey_s_c k; + struct bkey_s_c_alloc_v4 a; + size_t i; + int ret; + + if (*bp_offset == U64_MAX) + return 0; + + bp_pos = bucket_pos_to_bp(c, bucket, + max(*bp_offset, BACKPOINTER_OFFSET_MAX) - BACKPOINTER_OFFSET_MAX); + bp_end_pos = bucket_pos_to_bp(c, bpos_nosnap_successor(bucket), 0); + + bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc, + bucket, BTREE_ITER_CACHED); + k = bch2_btree_iter_peek_slot(&alloc_iter); + ret = bkey_err(k); + if (ret) + goto out; + + if (k.k->type != KEY_TYPE_alloc_v4) + goto done; + + a = bkey_s_c_to_alloc_v4(k); + if (gen >= 0 && a.v->gen != gen) + goto done; + + for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a.v); i++) { + if (alloc_v4_backpointers_c(a.v)[i].bucket_offset < *bp_offset) + continue; + + *dst = alloc_v4_backpointers_c(a.v)[i]; + *bp_offset = dst->bucket_offset; + goto out; + } + + for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers, + bp_pos, 0, k, ret) { + if (bpos_ge(k.k->p, bp_end_pos)) + break; + + if (k.k->type != KEY_TYPE_backpointer) + continue; + + *dst = *bkey_s_c_to_backpointer(k).v; + *bp_offset = dst->bucket_offset + BACKPOINTER_OFFSET_MAX; + goto out; + } +done: + *bp_offset = U64_MAX; +out: + bch2_trans_iter_exit(trans, &bp_iter); + bch2_trans_iter_exit(trans, &alloc_iter); + return ret; +} + +static void backpointer_not_found(struct btree_trans *trans, + struct bpos bucket, + u64 bp_offset, + struct bch_backpointer bp, + struct bkey_s_c k, + const char *thing_it_points_to) +{ + struct bch_fs *c = trans->c; + struct printbuf buf = PRINTBUF; + + if (likely(!bch2_backpointers_no_use_write_buffer)) + return; + + prt_printf(&buf, "backpointer doesn't match %s it points to:\n ", + thing_it_points_to); + prt_printf(&buf, "bucket: "); + bch2_bpos_to_text(&buf, bucket); + prt_printf(&buf, "\n "); + + if (bp_offset >= BACKPOINTER_OFFSET_MAX) { + struct bpos bp_pos = + bucket_pos_to_bp(c, bucket, + bp_offset - BACKPOINTER_OFFSET_MAX); + prt_printf(&buf, "backpointer pos: "); + bch2_bpos_to_text(&buf, bp_pos); + prt_printf(&buf, "\n "); + } + + bch2_backpointer_to_text(&buf, &bp); + prt_printf(&buf, "\n "); + bch2_bkey_val_to_text(&buf, c, k); + if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) + bch_err_ratelimited(c, "%s", buf.buf); + else + bch2_trans_inconsistent(trans, "%s", buf.buf); + + printbuf_exit(&buf); +} + +struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, + struct btree_iter *iter, + struct bpos bucket, + u64 bp_offset, + struct bch_backpointer bp) +{ + struct bch_fs *c = trans->c; + struct bkey_s_c k; + + bch2_trans_node_iter_init(trans, iter, + bp.btree_id, + bp.pos, + 0, + min(bp.level, c->btree_roots[bp.btree_id].level), + 0); + k = bch2_btree_iter_peek_slot(iter); + if (bkey_err(k)) { + bch2_trans_iter_exit(trans, iter); + return k; + } + + if (bp.level == c->btree_roots[bp.btree_id].level + 1) + k = bkey_i_to_s_c(&c->btree_roots[bp.btree_id].key); + + if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp)) + return k; + + bch2_trans_iter_exit(trans, iter); + + if (unlikely(bch2_backpointers_no_use_write_buffer)) { + if (bp.level) { + struct btree *b; + + /* + * If a backpointer for a btree node wasn't found, it may be + * because it was overwritten by a new btree node that hasn't + * been written out yet - backpointer_get_node() checks for + * this: + */ + b = bch2_backpointer_get_node(trans, iter, bucket, bp_offset, bp); + if (!IS_ERR_OR_NULL(b)) + return bkey_i_to_s_c(&b->key); + + bch2_trans_iter_exit(trans, iter); + + if (IS_ERR(b)) + return bkey_s_c_err(PTR_ERR(b)); + return bkey_s_c_null; + } + + backpointer_not_found(trans, bucket, bp_offset, bp, k, "extent"); + } + + return bkey_s_c_null; +} + +struct btree *bch2_backpointer_get_node(struct btree_trans *trans, + struct btree_iter *iter, + struct bpos bucket, + u64 bp_offset, + struct bch_backpointer bp) +{ + struct bch_fs *c = trans->c; + struct btree *b; + + BUG_ON(!bp.level); + + bch2_trans_node_iter_init(trans, iter, + bp.btree_id, + bp.pos, + 0, + bp.level - 1, + 0); + b = bch2_btree_iter_peek_node(iter); + if (IS_ERR(b)) + goto err; + + if (b && extent_matches_bp(c, bp.btree_id, bp.level, + bkey_i_to_s_c(&b->key), + bucket, bp)) + return b; + + if (b && btree_node_will_make_reachable(b)) { + b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); + } else { + backpointer_not_found(trans, bucket, bp_offset, bp, + bkey_i_to_s_c(&b->key), "btree node"); + b = NULL; + } +err: + bch2_trans_iter_exit(trans, iter); + return b; +} + +static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter, + struct bkey_s_c k) +{ + struct bch_fs *c = trans->c; + struct btree_iter alloc_iter = { NULL }; + struct bch_dev *ca; + struct bkey_s_c alloc_k; + struct printbuf buf = PRINTBUF; + int ret = 0; + + if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c, + "backpointer for mising device:\n%s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + ret = bch2_btree_delete_at(trans, bp_iter, 0); + goto out; + } + + ca = bch_dev_bkey_exists(c, k.k->p.inode); + + bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc, + bp_pos_to_bucket(c, k.k->p), 0); + + alloc_k = bch2_btree_iter_peek_slot(&alloc_iter); + ret = bkey_err(alloc_k); + if (ret) + goto out; + + if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, c, + "backpointer for nonexistent alloc key: %llu:%llu:0\n%s", + alloc_iter.pos.inode, alloc_iter.pos.offset, + (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { + ret = bch2_btree_delete_at(trans, bp_iter, 0); + goto out; + } +out: +fsck_err: + bch2_trans_iter_exit(trans, &alloc_iter); + printbuf_exit(&buf); + return ret; +} + +/* verify that every backpointer has a corresponding alloc key */ +int bch2_check_btree_backpointers(struct bch_fs *c) +{ + struct btree_iter iter; + struct bkey_s_c k; + + return bch2_trans_run(c, + for_each_btree_key_commit(&trans, iter, + BTREE_ID_backpointers, POS_MIN, 0, k, + NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, + bch2_check_btree_backpointer(&trans, &iter, k))); +} + +static int check_bp_exists(struct btree_trans *trans, + struct bpos bucket_pos, + struct bch_backpointer bp, + struct bkey_s_c orig_k) +{ + struct bch_fs *c = trans->c; + struct btree_iter alloc_iter, bp_iter = { NULL }; + struct printbuf buf = PRINTBUF; + struct bkey_s_c alloc_k, bp_k; + int ret; + + bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc, bucket_pos, 0); + alloc_k = bch2_btree_iter_peek_slot(&alloc_iter); + ret = bkey_err(alloc_k); + if (ret) + goto err; + + if (alloc_k.k->type == KEY_TYPE_alloc_v4) { + struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(alloc_k); + const struct bch_backpointer *bps = alloc_v4_backpointers_c(a.v); + unsigned i, nr = BCH_ALLOC_V4_NR_BACKPOINTERS(a.v); + + for (i = 0; i < nr; i++) { + int cmp = backpointer_cmp(bps[i], bp) ?: + memcmp(&bps[i], &bp, sizeof(bp)); + if (!cmp) + goto out; + if (cmp >= 0) + break; + } + } else { + goto missing; + } + + bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, + bucket_pos_to_bp(c, bucket_pos, bp.bucket_offset), + 0); + bp_k = bch2_btree_iter_peek_slot(&bp_iter); + ret = bkey_err(bp_k); + if (ret) + goto err; + + if (bp_k.k->type != KEY_TYPE_backpointer || + memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) + goto missing; +out: +err: +fsck_err: + bch2_trans_iter_exit(trans, &bp_iter); + bch2_trans_iter_exit(trans, &alloc_iter); + printbuf_exit(&buf); + return ret; +missing: + prt_printf(&buf, "missing backpointer for btree=%s l=%u ", + bch2_btree_ids[bp.btree_id], bp.level); + bch2_bkey_val_to_text(&buf, c, orig_k); + prt_printf(&buf, "\nbp pos "); + bch2_bpos_to_text(&buf, bp_iter.pos); + + if (c->sb.version < bcachefs_metadata_version_backpointers || + c->opts.reconstruct_alloc || + fsck_err(c, "%s", buf.buf)) { + struct bkey_i_alloc_v4 *a = bch2_alloc_to_v4_mut(trans, alloc_k); + + ret = PTR_ERR_OR_ZERO(a) ?: + bch2_bucket_backpointer_mod(trans, a, bp, orig_k, true); + } + + goto out; +} + +static int check_extent_to_backpointers(struct btree_trans *trans, + struct btree_iter *iter) +{ + struct bch_fs *c = trans->c; + struct bkey_ptrs_c ptrs; + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + struct bkey_s_c k; + int ret; + + k = bch2_btree_iter_peek_all_levels(iter); + ret = bkey_err(k); + if (ret) + return ret; + if (!k.k) + return 0; + + ptrs = bch2_bkey_ptrs_c(k); + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + struct bpos bucket_pos; + struct bch_backpointer bp; + + if (p.ptr.cached) + continue; + + bch2_extent_ptr_to_bp(c, iter->btree_id, iter->path->level, + k, p, &bucket_pos, &bp); + + ret = check_bp_exists(trans, bucket_pos, bp, k); + if (ret) + return ret; + } + + return 0; +} + +static int check_btree_root_to_backpointers(struct btree_trans *trans, + enum btree_id btree_id) +{ + struct bch_fs *c = trans->c; + struct btree_iter iter; + struct btree *b; + struct bkey_s_c k; + struct bkey_ptrs_c ptrs; + struct extent_ptr_decoded p; + const union bch_extent_entry *entry; + int ret; + + bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, + c->btree_roots[btree_id].level, 0); + b = bch2_btree_iter_peek_node(&iter); + ret = PTR_ERR_OR_ZERO(b); + if (ret) + goto err; + + BUG_ON(b != btree_node_root(c, b)); + + k = bkey_i_to_s_c(&b->key); + ptrs = bch2_bkey_ptrs_c(k); + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + struct bpos bucket_pos; + struct bch_backpointer bp; + + if (p.ptr.cached) + continue; + + bch2_extent_ptr_to_bp(c, iter.btree_id, iter.path->level + 1, + k, p, &bucket_pos, &bp); + + ret = check_bp_exists(trans, bucket_pos, bp, k); + if (ret) + goto err; + } +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +int bch2_check_extents_to_backpointers(struct bch_fs *c) +{ + struct btree_trans trans; + struct btree_iter iter; + enum btree_id btree_id; + int ret = 0; + + bch2_trans_init(&trans, c, 0, 0); + for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) { + unsigned depth = btree_type_has_ptrs(btree_id) ? 0 : 1; + + bch2_trans_node_iter_init(&trans, &iter, btree_id, POS_MIN, 0, + depth, + BTREE_ITER_ALL_LEVELS| + BTREE_ITER_PREFETCH); + + do { + ret = commit_do(&trans, NULL, NULL, + BTREE_INSERT_LAZY_RW| + BTREE_INSERT_NOFAIL, + check_extent_to_backpointers(&trans, &iter)); + if (ret) + break; + } while (!bch2_btree_iter_advance(&iter)); + + bch2_trans_iter_exit(&trans, &iter); + + if (ret) + break; + + ret = commit_do(&trans, NULL, NULL, + BTREE_INSERT_LAZY_RW| + BTREE_INSERT_NOFAIL, + check_btree_root_to_backpointers(&trans, btree_id)); + if (ret) + break; + } + bch2_trans_exit(&trans); + return ret; +} + +static int check_one_backpointer(struct btree_trans *trans, + struct bpos bucket, + u64 *bp_offset) +{ + struct btree_iter iter; + struct bch_backpointer bp; + struct bkey_s_c k; + struct printbuf buf = PRINTBUF; + int ret; + + ret = bch2_get_next_backpointer(trans, bucket, -1, + bp_offset, &bp); + if (ret || *bp_offset == U64_MAX) + return ret; + + k = bch2_backpointer_get_key(trans, &iter, bucket, *bp_offset, bp); + ret = bkey_err(k); + if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) + return 0; + if (ret) + return ret; + + if (fsck_err_on(!k.k, trans->c, + "%s backpointer points to missing extent\n%s", + *bp_offset < BACKPOINTER_OFFSET_MAX ? "alloc" : "btree", + (bch2_backpointer_to_text(&buf, &bp), buf.buf))) { + ret = bch2_backpointer_del_by_offset(trans, bucket, *bp_offset, bp); + if (ret == -ENOENT) + bch_err(trans->c, "backpointer at %llu not found", *bp_offset); + } + + bch2_trans_iter_exit(trans, &iter); +fsck_err: + printbuf_exit(&buf); + return ret; +} + +int bch2_check_backpointers_to_extents(struct bch_fs *c) +{ + struct btree_trans trans; + struct btree_iter iter; + struct bkey_s_c k; + int ret = 0; + + bch2_trans_init(&trans, c, 0, 0); + for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN, + BTREE_ITER_PREFETCH, k, ret) { + u64 bp_offset = 0; + + while (!(ret = commit_do(&trans, NULL, NULL, + BTREE_INSERT_LAZY_RW| + BTREE_INSERT_NOFAIL, + check_one_backpointer(&trans, iter.pos, &bp_offset))) && + bp_offset < U64_MAX) + bp_offset++; + + if (ret) + break; + } + bch2_trans_iter_exit(&trans, &iter); + bch2_trans_exit(&trans); + return ret < 0 ? ret : 0; +} diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h new file mode 100644 index 000000000000..e1506492f022 --- /dev/null +++ b/fs/bcachefs/backpointers.h @@ -0,0 +1,131 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_BACKPOINTERS_BACKGROUND_H +#define _BCACHEFS_BACKPOINTERS_BACKGROUND_H + +#include "btree_iter.h" +#include "btree_update.h" +#include "buckets.h" +#include "super.h" + +int bch2_backpointer_invalid(const struct bch_fs *, struct bkey_s_c k, + int, struct printbuf *); +void bch2_backpointer_to_text(struct printbuf *, const struct bch_backpointer *); +void bch2_backpointer_k_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); +void bch2_backpointer_swab(struct bkey_s); + +#define bch2_bkey_ops_backpointer ((struct bkey_ops) { \ + .key_invalid = bch2_backpointer_invalid, \ + .val_to_text = bch2_backpointer_k_to_text, \ + .swab = bch2_backpointer_swab, \ +}) + +#define MAX_EXTENT_COMPRESS_RATIO_SHIFT 10 + +/* + * Convert from pos in backpointer btree to pos of corresponding bucket in alloc + * btree: + */ +static inline struct bpos bp_pos_to_bucket(const struct bch_fs *c, + struct bpos bp_pos) +{ + struct bch_dev *ca = bch_dev_bkey_exists(c, bp_pos.inode); + u64 bucket_sector = bp_pos.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT; + + return POS(bp_pos.inode, sector_to_bucket(ca, bucket_sector)); +} + +/* + * Convert from pos in alloc btree + bucket offset to pos in backpointer btree: + */ +static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c, + struct bpos bucket, + u64 bucket_offset) +{ + struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode); + struct bpos ret; + + ret = POS(bucket.inode, + (bucket_to_sector(ca, bucket.offset) << + MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset); + + BUG_ON(!bkey_eq(bucket, bp_pos_to_bucket(c, ret))); + + return ret; +} + +bool bch2_bucket_backpointer_del(struct btree_trans *, + struct bkey_i_alloc_v4 *, + struct bch_backpointer); + +int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *, + struct bkey_i_alloc_v4 *, + struct bch_backpointer, struct bkey_s_c, bool); + +static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans, + struct bkey_i_alloc_v4 *a, + struct bch_backpointer bp, + struct bkey_s_c orig_k, + bool insert) +{ + struct bch_fs *c = trans->c; + struct bkey_i_backpointer *bp_k; + int ret; + + if (!insert && + unlikely(BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v)) && + bch2_bucket_backpointer_del(trans, a, bp)) + return 0; + + if (unlikely(bch2_backpointers_no_use_write_buffer)) + return bch2_bucket_backpointer_mod_nowritebuffer(trans, a, bp, orig_k, insert); + + bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer)); + ret = PTR_ERR_OR_ZERO(bp_k); + if (ret) + return ret; + + bkey_backpointer_init(&bp_k->k_i); + bp_k->k.p = bucket_pos_to_bp(c, a->k.p, bp.bucket_offset); + bp_k->v = bp; + + if (!insert) { + bp_k->k.type = KEY_TYPE_deleted; + set_bkey_val_u64s(&bp_k->k, 0); + } + + return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k->k_i); +} + +static inline void bch2_extent_ptr_to_bp(struct bch_fs *c, + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, struct extent_ptr_decoded p, + struct bpos *bucket_pos, struct bch_backpointer *bp) +{ + enum bch_data_type data_type = level ? BCH_DATA_btree : BCH_DATA_user; + s64 sectors = level ? btree_sectors(c) : k.k->size; + u32 bucket_offset; + + *bucket_pos = PTR_BUCKET_POS_OFFSET(c, &p.ptr, &bucket_offset); + *bp = (struct bch_backpointer) { + .btree_id = btree_id, + .level = level, + .data_type = data_type, + .bucket_offset = ((u64) bucket_offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) + + p.crc.offset, + .bucket_len = ptr_disk_sectors(sectors, p), + .pos = k.k->p, + }; +} + +int bch2_get_next_backpointer(struct btree_trans *, struct bpos, int, + u64 *, struct bch_backpointer *); +struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct btree_iter *, + struct bpos, u64, struct bch_backpointer); +struct btree *bch2_backpointer_get_node(struct btree_trans *, struct btree_iter *, + struct bpos, u64, struct bch_backpointer); + +int bch2_check_btree_backpointers(struct bch_fs *); +int bch2_check_extents_to_backpointers(struct bch_fs *); +int bch2_check_backpointers_to_extents(struct bch_fs *); + +#endif /* _BCACHEFS_BACKPOINTERS_BACKGROUND_H */ diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 91f635faccb0..6d048e5d8843 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -315,7 +315,10 @@ do { \ "done in memory") \ BCH_DEBUG_PARAM(verify_all_btree_replicas, \ "When reading btree nodes, read all replicas and " \ - "compare them") + "compare them") \ + BCH_DEBUG_PARAM(backpointers_no_use_write_buffer, \ + "Don't use the write buffer for backpointers, enabling "\ + "extra runtime checks") /* Parameters that should only be compiled in debug mode: */ #define BCH_DEBUG_PARAMS_DEBUG() \ @@ -435,6 +438,7 @@ enum gc_phase { GC_PHASE_BTREE_lru, GC_PHASE_BTREE_freespace, GC_PHASE_BTREE_need_discard, + GC_PHASE_BTREE_backpointers, GC_PHASE_PENDING_DELETE, }; @@ -552,6 +556,7 @@ enum { BCH_FS_INITIAL_GC_DONE, /* kill when we enumerate fsck passes */ BCH_FS_CHECK_ALLOC_DONE, BCH_FS_CHECK_LRUS_DONE, + BCH_FS_CHECK_BACKPOINTERS_DONE, BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, BCH_FS_FSCK_DONE, BCH_FS_INITIAL_GC_UNFIXED, /* kill when we enumerate fsck errors */ diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 8e070402e73f..66c885186160 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -369,7 +369,8 @@ static inline void bkey_init(struct bkey *k) x(alloc_v3, 24) \ x(set, 25) \ x(lru, 26) \ - x(alloc_v4, 27) + x(alloc_v4, 27) \ + x(backpointer, 28) enum bch_bkey_type { #define x(name, nr) KEY_TYPE_##name = nr, @@ -890,6 +891,12 @@ struct bch_alloc { x(stripe, 32) \ x(stripe_redundancy, 8) +enum { +#define x(name, _bits) BCH_ALLOC_FIELD_V1_##name, + BCH_ALLOC_FIELDS_V1() +#undef x +}; + struct bch_alloc_v2 { struct bch_val v; __u8 nr_fields; @@ -918,6 +925,9 @@ struct bch_alloc_v3 { __u8 data[]; } __packed __aligned(8); +LE32_BITMASK(BCH_ALLOC_V3_NEED_DISCARD,struct bch_alloc_v3, flags, 0, 1) +LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2) + struct bch_alloc_v4 { struct bch_val v; __u64 journal_seq; @@ -931,25 +941,27 @@ struct bch_alloc_v4 { __u64 io_time[2]; __u32 stripe; __u32 nr_external_backpointers; - struct bpos backpointers[0]; } __packed __aligned(8); #define BCH_ALLOC_V4_U64s_V0 6 #define BCH_ALLOC_V4_U64s (sizeof(struct bch_alloc_v4) / sizeof(u64)) -LE32_BITMASK(BCH_ALLOC_V3_NEED_DISCARD,struct bch_alloc_v3, flags, 0, 1) -LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2) - BITMASK(BCH_ALLOC_V4_NEED_DISCARD, struct bch_alloc_v4, flags, 0, 1) BITMASK(BCH_ALLOC_V4_NEED_INC_GEN, struct bch_alloc_v4, flags, 1, 2) BITMASK(BCH_ALLOC_V4_BACKPOINTERS_START,struct bch_alloc_v4, flags, 2, 8) BITMASK(BCH_ALLOC_V4_NR_BACKPOINTERS, struct bch_alloc_v4, flags, 8, 14) -enum { -#define x(name, _bits) BCH_ALLOC_FIELD_V1_##name, - BCH_ALLOC_FIELDS_V1() -#undef x -}; +#define BCH_ALLOC_V4_NR_BACKPOINTERS_MAX 40 + +struct bch_backpointer { + struct bch_val v; + __u8 btree_id; + __u8 level; + __u8 data_type; + __u64 bucket_offset:40; + __u32 bucket_len; + struct bpos pos; +} __packed __aligned(8); /* Quotas: */ @@ -1486,7 +1498,8 @@ struct bch_sb_field_journal_seq_blacklist { x(inode_v2, 18) \ x(freespace, 19) \ x(alloc_v4, 20) \ - x(new_data_types, 21) + x(new_data_types, 21) \ + x(backpointers, 22) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, @@ -2007,19 +2020,20 @@ LE32_BITMASK(JSET_NO_FLUSH, struct jset, flags, 5, 6); /* Btree: */ #define BCH_BTREE_IDS() \ - x(extents, 0) \ - x(inodes, 1) \ - x(dirents, 2) \ - x(xattrs, 3) \ - x(alloc, 4) \ - x(quotas, 5) \ - x(stripes, 6) \ - x(reflink, 7) \ - x(subvolumes, 8) \ - x(snapshots, 9) \ - x(lru, 10) \ - x(freespace, 11) \ - x(need_discard, 12) + x(extents, 0) \ + x(inodes, 1) \ + x(dirents, 2) \ + x(xattrs, 3) \ + x(alloc, 4) \ + x(quotas, 5) \ + x(stripes, 6) \ + x(reflink, 7) \ + x(subvolumes, 8) \ + x(snapshots, 9) \ + x(lru, 10) \ + x(freespace, 11) \ + x(need_discard, 12) \ + x(backpointers, 13) enum btree_id { #define x(kwd, val) BTREE_ID_##kwd = val, diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 29809da5e9cf..45c8b2c61c5b 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "backpointers.h" #include "bkey_methods.h" #include "btree_types.h" #include "alloc_background.h" @@ -191,6 +192,9 @@ static unsigned bch2_key_types_allowed[] = { [BKEY_TYPE_need_discard] = (1U << KEY_TYPE_deleted)| (1U << KEY_TYPE_set), + [BKEY_TYPE_backpointers] = + (1U << KEY_TYPE_deleted)| + (1U << KEY_TYPE_backpointer), [BKEY_TYPE_btree] = (1U << KEY_TYPE_deleted)| (1U << KEY_TYPE_btree_ptr)| diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 86f48f5762dd..b657f8545a3b 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -7,6 +7,7 @@ #include "bcachefs.h" #include "alloc_background.h" +#include "backpointers.h" #include "bset.h" #include "btree_gc.h" #include "btree_update.h" @@ -662,16 +663,6 @@ err: return ret; } -static s64 ptr_disk_sectors(s64 sectors, struct extent_ptr_decoded p) -{ - EBUG_ON(sectors < 0); - - return crc_is_compressed(p.crc) - ? DIV_ROUND_UP_ULL(sectors * p.crc.compressed_size, - p.crc.uncompressed_size) - : sectors; -} - static int check_bucket_ref(struct bch_fs *c, struct bkey_s_c k, const struct bch_extent_ptr *ptr, @@ -1399,22 +1390,42 @@ need_mark: /* trans_mark: */ -static int bch2_trans_mark_pointer(struct btree_trans *trans, - struct bkey_s_c k, struct extent_ptr_decoded p, - s64 sectors, enum bch_data_type data_type) +static inline int bch2_trans_mark_pointer(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, struct extent_ptr_decoded p, + unsigned flags) { + bool insert = !(flags & BTREE_TRIGGER_OVERWRITE); struct btree_iter iter; struct bkey_i_alloc_v4 *a; + struct bpos bucket_pos; + struct bch_backpointer bp; + s64 sectors; int ret; - a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(trans->c, &p.ptr)); + bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, &bucket_pos, &bp); + sectors = bp.bucket_len; + if (!insert) + sectors = -sectors; + + a = bch2_trans_start_alloc_update(trans, &iter, bucket_pos); if (IS_ERR(a)) return PTR_ERR(a); - ret = __mark_pointer(trans, k, &p.ptr, sectors, data_type, + ret = __mark_pointer(trans, k, &p.ptr, sectors, bp.data_type, a->v.gen, &a->v.data_type, - &a->v.dirty_sectors, &a->v.cached_sectors) ?: - bch2_trans_update(trans, &iter, &a->k_i, 0); + &a->v.dirty_sectors, &a->v.cached_sectors); + if (ret) + goto err; + + if (!p.ptr.cached) { + ret = bch2_bucket_backpointer_mod(trans, a, bp, k, insert); + if (ret) + goto err; + } + + ret = bch2_trans_update(trans, &iter, &a->k_i, 0); +err: bch2_trans_iter_exit(trans, &iter); return ret; } @@ -1497,8 +1508,7 @@ int bch2_trans_mark_extent(struct btree_trans *trans, if (flags & BTREE_TRIGGER_OVERWRITE) disk_sectors = -disk_sectors; - ret = bch2_trans_mark_pointer(trans, k, p, - disk_sectors, data_type); + ret = bch2_trans_mark_pointer(trans, btree_id, level, k, p, flags); if (ret < 0) return ret; diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index e8e3a3b09714..3398c9c3a81b 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -75,6 +75,15 @@ static inline struct bpos PTR_BUCKET_POS(const struct bch_fs *c, return POS(ptr->dev, PTR_BUCKET_NR(ca, ptr)); } +static inline struct bpos PTR_BUCKET_POS_OFFSET(const struct bch_fs *c, + const struct bch_extent_ptr *ptr, + u32 *bucket_offset) +{ + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + + return POS(ptr->dev, sector_to_bucket_and_offset(ca, ptr->offset, bucket_offset)); +} + static inline struct bucket *PTR_GC_BUCKET(struct bch_dev *ca, const struct bch_extent_ptr *ptr) { @@ -90,6 +99,16 @@ static inline enum bch_data_type ptr_data_type(const struct bkey *k, return ptr->cached ? BCH_DATA_cached : BCH_DATA_user; } +static inline s64 ptr_disk_sectors(s64 sectors, struct extent_ptr_decoded p) +{ + EBUG_ON(sectors < 0); + + return crc_is_compressed(p.crc) + ? DIV_ROUND_UP_ULL(sectors * p.crc.compressed_size, + p.crc.uncompressed_size) + : sectors; +} + static inline int gen_cmp(u8 a, u8 b) { return (s8) (a - b); diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 7a6448f48fca..804bc15dce31 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -60,6 +60,7 @@ x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_res) \ x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_reclaim) \ x(BCH_ERR_btree_insert_fail, btree_insert_need_flush_buffer) \ + x(0, backpointer_to_overwritten_btree_node) \ x(0, lock_fail_root_changed) \ x(0, journal_reclaim_would_deadlock) \ x(0, fsck) \ diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 61890755d335..55356c117737 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "backpointers.h" #include "bkey_buf.h" #include "alloc_background.h" #include "btree_gc.h" @@ -925,6 +926,7 @@ static bool btree_id_is_alloc(enum btree_id id) { switch (id) { case BTREE_ID_alloc: + case BTREE_ID_backpointers: case BTREE_ID_need_discard: case BTREE_ID_freespace: return true; @@ -1091,8 +1093,8 @@ int bch2_fs_recovery(struct bch_fs *c) } if (!c->opts.nochanges) { - if (c->sb.version < bcachefs_metadata_version_new_data_types) { - bch_info(c, "version prior to new_data_types, upgrade and fsck required"); + if (c->sb.version < bcachefs_metadata_version_backpointers) { + bch_info(c, "version prior to backpointers, upgrade and fsck required"); c->opts.version_upgrade = true; c->opts.fsck = true; c->opts.fix_errors = FSCK_OPT_YES; @@ -1301,6 +1303,28 @@ use_clean: bch_verbose(c, "done checking lrus"); set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags); + bch_info(c, "checking backpointers to alloc keys"); + err = "error checking backpointers to alloc keys"; + ret = bch2_check_btree_backpointers(c); + if (ret) + goto err; + bch_verbose(c, "done checking backpointers to alloc keys"); + + bch_info(c, "checking backpointers to extents"); + err = "error checking backpointers to extents"; + ret = bch2_check_backpointers_to_extents(c); + if (ret) + goto err; + bch_verbose(c, "done checking backpointers to extents"); + + bch_info(c, "checking extents to backpointers"); + err = "error checking extents to backpointers"; + ret = bch2_check_extents_to_backpointers(c); + if (ret) + goto err; + bch_verbose(c, "done checking extents to backpointers"); + set_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags); + bch_info(c, "checking alloc to lru refs"); err = "error checking alloc to lru refs"; ret = bch2_check_alloc_to_lru_refs(c); @@ -1312,6 +1336,7 @@ use_clean: set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags); set_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags); set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags); + set_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags); set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags); set_bit(BCH_FS_FSCK_DONE, &c->flags); @@ -1471,6 +1496,9 @@ int bch2_fs_initialize(struct bch_fs *c) c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done); + if (c->sb.version < bcachefs_metadata_version_backpointers) + c->opts.version_upgrade = true; + if (c->opts.version_upgrade) { c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current); c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); @@ -1479,6 +1507,9 @@ int bch2_fs_initialize(struct bch_fs *c) mutex_unlock(&c->sb_lock); set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags); + set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags); + set_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags); + set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags); set_bit(BCH_FS_MAY_GO_RW, &c->flags); set_bit(BCH_FS_FSCK_DONE, &c->flags); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index ade8d074e887..c5efaa7d38a8 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1431,6 +1431,8 @@ static int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca) BTREE_TRIGGER_NORUN, NULL) ?: bch2_btree_delete_range(c, BTREE_ID_freespace, start, end, BTREE_TRIGGER_NORUN, NULL) ?: + bch2_btree_delete_range(c, BTREE_ID_backpointers, start, end, + BTREE_TRIGGER_NORUN, NULL) ?: bch2_btree_delete_range(c, BTREE_ID_alloc, start, end, BTREE_TRIGGER_NORUN, NULL); if (ret) |