diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2022-01-01 02:03:29 +0100 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 23:09:29 +0200 |
commit | 3d48a7f85f83a51a0eb0d0a6537be26a20691260 (patch) | |
tree | 20187f3ae7c67dde71f213cfa29c203fb6aa3451 | |
parent | bcachefs: LRU btree (diff) | |
download | linux-3d48a7f85f83a51a0eb0d0a6537be26a20691260.tar.xz linux-3d48a7f85f83a51a0eb0d0a6537be26a20691260.zip |
bcachefs: KEY_TYPE_alloc_v4
This introduces a new alloc key which doesn't use varints. Soon we'll be
adding backpointers and storing them in alloc keys, which means our
pack/unpack workflow for alloc keys won't really work - we'll need to be
mutating alloc keys in place.
Instead of bch2_alloc_unpack(), we now have bch2_alloc_to_v4() that
converts older types of alloc keys to v4 if needed.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r-- | fs/bcachefs/alloc_background.c | 285 | ||||
-rw-r--r-- | fs/bcachefs/alloc_background.h | 51 | ||||
-rw-r--r-- | fs/bcachefs/bcachefs_format.h | 42 | ||||
-rw-r--r-- | fs/bcachefs/bkey_methods.c | 3 | ||||
-rw-r--r-- | fs/bcachefs/btree_gc.c | 76 | ||||
-rw-r--r-- | fs/bcachefs/btree_types.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/buckets.c | 186 | ||||
-rw-r--r-- | fs/bcachefs/buckets.h | 8 | ||||
-rw-r--r-- | fs/bcachefs/buckets_types.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/movinggc.c | 26 |
10 files changed, 379 insertions, 301 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 33b2e4d7da3b..099be1290b4c 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -33,13 +33,27 @@ const char * const bch2_allocator_states[] = { NULL }; +/* Persistent alloc info: */ + static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = { #define x(name, bits) [BCH_ALLOC_FIELD_V1_##name] = bits / 8, BCH_ALLOC_FIELDS_V1() #undef x }; -/* Persistent alloc info: */ +struct bkey_alloc_unpacked { + u64 journal_seq; + u64 bucket; + u8 dev; + u8 gen; + u8 oldest_gen; + u8 data_type; + bool need_discard:1; + bool need_inc_gen:1; +#define x(_name, _bits) u##_bits _name; + BCH_ALLOC_FIELDS_V2() +#undef x +}; static inline u64 alloc_field_v1_get(const struct bch_alloc *a, const void **p, unsigned field) @@ -161,6 +175,8 @@ static int bch2_alloc_unpack_v3(struct bkey_alloc_unpacked *out, out->gen = a.v->gen; out->oldest_gen = a.v->oldest_gen; out->data_type = a.v->data_type; + out->need_discard = BCH_ALLOC_V3_NEED_DISCARD(a.v); + out->need_inc_gen = BCH_ALLOC_V3_NEED_INC_GEN(a.v); out->journal_seq = le64_to_cpu(a.v->journal_seq); #define x(_name, _bits) \ @@ -182,47 +198,7 @@ static int bch2_alloc_unpack_v3(struct bkey_alloc_unpacked *out, return 0; } -static void bch2_alloc_pack_v3(struct bkey_alloc_buf *dst, - const struct bkey_alloc_unpacked src) -{ - struct bkey_i_alloc_v3 *a = bkey_alloc_v3_init(&dst->k); - unsigned nr_fields = 0, last_nonzero_fieldnr = 0; - u8 *out = a->v.data; - u8 *end = (void *) &dst[1]; - u8 *last_nonzero_field = out; - unsigned bytes; - - a->k.p = POS(src.dev, src.bucket); - a->v.gen = src.gen; - a->v.oldest_gen = src.oldest_gen; - a->v.data_type = src.data_type; - a->v.journal_seq = cpu_to_le64(src.journal_seq); - -#define x(_name, _bits) \ - nr_fields++; \ - \ - if (src._name) { \ - out += bch2_varint_encode_fast(out, src._name); \ - \ - last_nonzero_field = out; \ - last_nonzero_fieldnr = nr_fields; \ - } else { \ - *out++ = 0; \ - } - - BCH_ALLOC_FIELDS_V2() -#undef x - BUG_ON(out > end); - - out = last_nonzero_field; - a->v.nr_fields = last_nonzero_fieldnr; - - bytes = (u8 *) out - (u8 *) &a->v; - set_bkey_val_bytes(&a->k, bytes); - memset_u64s_tail(&a->v, 0, bytes); -} - -struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k) +static struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k) { struct bkey_alloc_unpacked ret = { .dev = k.k->p.inode, @@ -245,32 +221,71 @@ struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k) return ret; } -struct bkey_alloc_buf *bch2_alloc_pack(struct btree_trans *trans, - const struct bkey_alloc_unpacked src) +void bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out) { - struct bkey_alloc_buf *dst; + if (k.k->type == KEY_TYPE_alloc_v4) { + *out = *bkey_s_c_to_alloc_v4(k).v; + } else { + struct bkey_alloc_unpacked u = bch2_alloc_unpack(k); + + *out = (struct bch_alloc_v4) { + .journal_seq = u.journal_seq, + .flags = u.need_discard, + .gen = u.gen, + .oldest_gen = u.oldest_gen, + .data_type = u.data_type, + .stripe_redundancy = u.stripe_redundancy, + .dirty_sectors = u.dirty_sectors, + .cached_sectors = u.cached_sectors, + .io_time[READ] = u.read_time, + .io_time[WRITE] = u.write_time, + .stripe = u.stripe, + }; + } +} - dst = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf)); - if (!IS_ERR(dst)) - bch2_alloc_pack_v3(dst, src); +struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k) +{ + struct bkey_i_alloc_v4 *ret; - return dst; + if (k.k->type == KEY_TYPE_alloc_v4) { + ret = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); + if (!IS_ERR(ret)) + bkey_reassemble(&ret->k_i, k); + } else { + ret = bch2_trans_kmalloc(trans, sizeof(*ret)); + if (!IS_ERR(ret)) { + bkey_alloc_v4_init(&ret->k_i); + ret->k.p = k.k->p; + bch2_alloc_to_v4(k, &ret->v); + } + } + return ret; } -int bch2_alloc_write(struct btree_trans *trans, struct btree_iter *iter, - struct bkey_alloc_unpacked *u, unsigned trigger_flags) +struct bkey_i_alloc_v4 * +bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter, + struct bpos pos) { - struct bkey_alloc_buf *a = bch2_alloc_pack(trans, *u); + struct bkey_s_c k; + struct bkey_i_alloc_v4 *a; + int ret; - /* - * Without BTREE_UPDATE_NO_KEY_CACHE_COHERENCY, we may end up updating - * the btree instead of the key cache - this can casue the allocator to - * self-deadlock, since updating the btree may require allocating new - * btree nodes: - */ - return PTR_ERR_OR_ZERO(a) ?: - bch2_trans_update(trans, iter, &a->k, trigger_flags| - BTREE_UPDATE_NO_KEY_CACHE_COHERENCY); + bch2_trans_iter_init(trans, iter, BTREE_ID_alloc, pos, + BTREE_ITER_WITH_UPDATES| + BTREE_ITER_CACHED| + BTREE_ITER_INTENT); + k = bch2_btree_iter_peek_slot(iter); + ret = bkey_err(k); + if (ret) { + bch2_trans_iter_exit(trans, iter); + return ERR_PTR(ret); + } + + a = bch2_alloc_to_v4_mut(trans, k); + if (IS_ERR(a)) + bch2_trans_iter_exit(trans, iter); + return a; } static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a) @@ -316,28 +331,70 @@ const char *bch2_alloc_v2_invalid(const struct bch_fs *c, struct bkey_s_c k) const char *bch2_alloc_v3_invalid(const struct bch_fs *c, struct bkey_s_c k) { struct bkey_alloc_unpacked u; + struct bch_dev *ca; if (k.k->p.inode >= c->sb.nr_devices || !c->devs[k.k->p.inode]) return "invalid device"; + ca = bch_dev_bkey_exists(c, k.k->p.inode); + + if (k.k->p.offset < ca->mi.first_bucket || + k.k->p.offset >= ca->mi.nbuckets) + return "invalid bucket"; + if (bch2_alloc_unpack_v3(&u, k)) return "unpack error"; return NULL; } -void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, - struct bkey_s_c k) +const char *bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k) { - struct bkey_alloc_unpacked u = bch2_alloc_unpack(k); + struct bch_dev *ca; - pr_buf(out, "gen %u oldest_gen %u data_type %s journal_seq %llu", - u.gen, u.oldest_gen, bch2_data_types[u.data_type], - u.journal_seq); -#define x(_name, ...) pr_buf(out, " " #_name " %llu", (u64) u._name); - BCH_ALLOC_FIELDS_V2() -#undef x + if (k.k->p.inode >= c->sb.nr_devices || + !c->devs[k.k->p.inode]) + return "invalid device"; + + ca = bch_dev_bkey_exists(c, k.k->p.inode); + + if (k.k->p.offset < ca->mi.first_bucket || + k.k->p.offset >= ca->mi.nbuckets) + return "invalid bucket"; + + return NULL; +} + +void bch2_alloc_v4_swab(struct bkey_s k) +{ + struct bch_alloc_v4 *a = bkey_s_to_alloc_v4(k).v; + + a->journal_seq = swab64(a->journal_seq); + a->flags = swab32(a->flags); + a->dirty_sectors = swab32(a->dirty_sectors); + a->cached_sectors = swab32(a->cached_sectors); + a->io_time[0] = swab64(a->io_time[0]); + a->io_time[1] = swab64(a->io_time[1]); + a->stripe = swab32(a->stripe); + a->nr_external_backpointers = swab32(a->nr_external_backpointers); +} + +void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) +{ + struct bch_alloc_v4 a; + + bch2_alloc_to_v4(k, &a); + + pr_buf(out, "gen %u oldest_gen %u data_type %s journal_seq %llu need_discard %llu", + a.gen, a.oldest_gen, bch2_data_types[a.data_type], + a.journal_seq, BCH_ALLOC_V4_NEED_DISCARD(&a)); + pr_buf(out, " dirty_sectors %u", a.dirty_sectors); + pr_buf(out, " cached_sectors %u", a.cached_sectors); + pr_buf(out, " stripe %u", a.stripe); + pr_buf(out, " stripe_redundancy %u", a.stripe_redundancy); + pr_buf(out, " read_time %llu", a.io_time[READ]); + pr_buf(out, " write_time %llu", a.io_time[WRITE]); } int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only) @@ -345,9 +402,9 @@ int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only) struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; + struct bch_alloc_v4 a; struct bch_dev *ca; struct bucket *g; - struct bkey_alloc_unpacked u; int ret; bch2_trans_init(&trans, c, 0, 0); @@ -356,28 +413,28 @@ int bch2_alloc_read(struct bch_fs *c, bool gc, bool metadata_only) BTREE_ITER_PREFETCH, k, ret) { ca = bch_dev_bkey_exists(c, k.k->p.inode); g = __bucket(ca, k.k->p.offset, gc); - u = bch2_alloc_unpack(k); + bch2_alloc_to_v4(k, &a); if (!gc) - *bucket_gen(ca, k.k->p.offset) = u.gen; + *bucket_gen(ca, k.k->p.offset) = a.gen; - g->_mark.gen = u.gen; - g->io_time[READ] = u.read_time; - g->io_time[WRITE] = u.write_time; - g->oldest_gen = !gc ? u.oldest_gen : u.gen; + g->_mark.gen = a.gen; + g->io_time[READ] = a.io_time[READ]; + g->io_time[WRITE] = a.io_time[WRITE]; + g->oldest_gen = !gc ? a.oldest_gen : a.gen; g->gen_valid = 1; if (!gc || (metadata_only && - (u.data_type == BCH_DATA_user || - u.data_type == BCH_DATA_cached || - u.data_type == BCH_DATA_parity))) { - g->_mark.data_type = u.data_type; - g->_mark.dirty_sectors = u.dirty_sectors; - g->_mark.cached_sectors = u.cached_sectors; - g->_mark.stripe = u.stripe != 0; - g->stripe = u.stripe; - g->stripe_redundancy = u.stripe_redundancy; + (a.data_type == BCH_DATA_user || + a.data_type == BCH_DATA_cached || + a.data_type == BCH_DATA_parity))) { + g->_mark.data_type = a.data_type; + g->_mark.dirty_sectors = a.dirty_sectors; + g->_mark.cached_sectors = a.cached_sectors; + g->_mark.stripe = a.stripe != 0; + g->stripe = a.stripe; + g->stripe_redundancy = a.stripe_redundancy; } } @@ -398,29 +455,22 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, { struct bch_fs *c = trans->c; struct btree_iter iter; - struct bkey_s_c k; - struct bkey_alloc_unpacked u; - u64 *time, now; + struct bkey_i_alloc_v4 *a; + u64 now; int ret = 0; - bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS(dev, bucket_nr), - BTREE_ITER_CACHED| - BTREE_ITER_INTENT); - k = bch2_btree_iter_peek_slot(&iter); - ret = bkey_err(k); + a = bch2_trans_start_alloc_update(trans, &iter, POS(dev, bucket_nr)); + ret = PTR_ERR_OR_ZERO(a); if (ret) - goto out; - - u = bch2_alloc_unpack(k); + return ret; - time = rw == READ ? &u.read_time : &u.write_time; now = atomic64_read(&c->io_clock[rw].now); - if (*time == now) + if (a->v.io_time[rw] == now) goto out; - *time = now; + a->v.io_time[rw] = now; - ret = bch2_alloc_write(trans, &iter, &u, 0) ?: + ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?: bch2_trans_commit(trans, NULL, NULL, 0); out: bch2_trans_iter_exit(trans, &iter); @@ -604,7 +654,7 @@ static size_t find_reclaimable_buckets(struct bch_fs *c, struct bch_dev *ca) static int bucket_invalidate_btree(struct btree_trans *trans, struct bch_dev *ca, u64 b, - struct bkey_alloc_unpacked *u) + struct bkey_i_alloc_v4 *a) { struct bch_fs *c = trans->c; struct btree_iter iter; @@ -621,16 +671,19 @@ static int bucket_invalidate_btree(struct btree_trans *trans, if (ret) goto err; - *u = bch2_alloc_unpack(k); - u->gen++; - u->data_type = 0; - u->dirty_sectors = 0; - u->cached_sectors = 0; - u->read_time = atomic64_read(&c->io_clock[READ].now); - u->write_time = atomic64_read(&c->io_clock[WRITE].now); - - ret = bch2_alloc_write(trans, &iter, u, - BTREE_TRIGGER_BUCKET_INVALIDATE); + bkey_alloc_v4_init(&a->k_i); + a->k.p = iter.pos; + bch2_alloc_to_v4(k, &a->v); + a->v.gen++; + a->v.data_type = 0; + a->v.dirty_sectors = 0; + a->v.cached_sectors = 0; + a->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now); + a->v.io_time[WRITE] = atomic64_read(&c->io_clock[WRITE].now); + + ret = bch2_trans_update(trans, &iter, &a->k_i, + BTREE_TRIGGER_BUCKET_INVALIDATE| + BTREE_UPDATE_NO_KEY_CACHE_COHERENCY); err: bch2_trans_iter_exit(trans, &iter); return ret; @@ -639,7 +692,7 @@ err: static int bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca, u64 *journal_seq, unsigned flags) { - struct bkey_alloc_unpacked u; + struct bkey_i_alloc_v4 a; size_t b; u64 commit_seq = 0; int ret = 0; @@ -671,7 +724,7 @@ static int bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca, BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_NOFAIL| flags, - bucket_invalidate_btree(&trans, ca, b, &u)); + bucket_invalidate_btree(&trans, ca, b, &a)); if (!ret) { /* remove from alloc_heap: */ @@ -687,14 +740,14 @@ static int bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca, * If we invalidating cached data then we need to wait on the * journal commit: */ - if (u.data_type) + if (a.v.data_type) *journal_seq = max(*journal_seq, commit_seq); /* * We already waiting on u.alloc_seq when we filtered out * buckets that need journal commit: */ - BUG_ON(*journal_seq > u.journal_seq); + BUG_ON(*journal_seq > a.v.journal_seq); } else { size_t b2; diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 3eaa6d204286..b66c8cf0341e 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -10,48 +10,14 @@ extern const char * const bch2_allocator_states[]; -struct bkey_alloc_unpacked { - u64 journal_seq; - u64 bucket; - u8 dev; - u8 gen; - u8 oldest_gen; - u8 data_type; -#define x(_name, _bits) u##_bits _name; - BCH_ALLOC_FIELDS_V2() -#undef x -}; - /* How out of date a pointer gen is allowed to be: */ #define BUCKET_GC_GEN_MAX 96U -/* returns true if not equal */ -static inline bool bkey_alloc_unpacked_cmp(struct bkey_alloc_unpacked l, - struct bkey_alloc_unpacked r) -{ - return l.gen != r.gen || - l.oldest_gen != r.oldest_gen || - l.data_type != r.data_type -#define x(_name, ...) || l._name != r._name - BCH_ALLOC_FIELDS_V2() -#undef x - ; -} - -struct bkey_alloc_buf { - struct bkey_i k; - struct bch_alloc_v3 v; - -#define x(_name, _bits) + _bits / 8 - u8 _pad[0 + BCH_ALLOC_FIELDS_V2()]; -#undef x -} __attribute__((packed, aligned(8))); +struct bkey_i_alloc_v4 * +bch2_trans_start_alloc_update(struct btree_trans *, struct btree_iter *, struct bpos); -struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c); -struct bkey_alloc_buf *bch2_alloc_pack(struct btree_trans *, - const struct bkey_alloc_unpacked); -int bch2_alloc_write(struct btree_trans *, struct btree_iter *, - struct bkey_alloc_unpacked *, unsigned); +void bch2_alloc_to_v4(struct bkey_s_c, struct bch_alloc_v4 *); +struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s_c); int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int); @@ -60,6 +26,8 @@ int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int); const char *bch2_alloc_v1_invalid(const struct bch_fs *, struct bkey_s_c); const char *bch2_alloc_v2_invalid(const struct bch_fs *, struct bkey_s_c); const char *bch2_alloc_v3_invalid(const struct bch_fs *, struct bkey_s_c); +const char *bch2_alloc_v4_invalid(const struct bch_fs *, struct bkey_s_c k); +void bch2_alloc_v4_swab(struct bkey_s); void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_alloc (struct bkey_ops) { \ @@ -80,6 +48,13 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); .atomic_trigger = bch2_mark_alloc, \ } +#define bch2_bkey_ops_alloc_v4 (struct bkey_ops) { \ + .key_invalid = bch2_alloc_v4_invalid, \ + .val_to_text = bch2_alloc_to_text, \ + .swab = bch2_alloc_v4_swab, \ + .atomic_trigger = bch2_mark_alloc, \ +} + static inline bool bkey_is_alloc(const struct bkey *k) { return k->type == KEY_TYPE_alloc || diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 982409ed940e..a640a45a123a 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -82,6 +82,21 @@ typedef uuid_t __uuid_t; #endif +#define BITMASK(name, type, field, offset, end) \ +static const unsigned name##_OFFSET = offset; \ +static const unsigned name##_BITS = (end - offset); \ + \ +static inline __u64 name(const type *k) \ +{ \ + return (k->field >> offset) & ~(~0ULL << (end - offset)); \ +} \ + \ +static inline void SET_##name(type *k, __u64 v) \ +{ \ + k->field &= ~(~(~0ULL << (end - offset)) << offset); \ + k->field |= (v & ~(~0ULL << (end - offset))) << offset; \ +} + #define LE_BITMASK(_bits, name, type, field, offset, end) \ static const unsigned name##_OFFSET = offset; \ static const unsigned name##_BITS = (end - offset); \ @@ -353,7 +368,8 @@ static inline void bkey_init(struct bkey *k) x(inode_v2, 23) \ x(alloc_v3, 24) \ x(set, 25) \ - x(lru, 26) + x(lru, 26) \ + x(alloc_v4, 27) enum bch_bkey_type { #define x(name, nr) KEY_TYPE_##name = nr, @@ -903,6 +919,30 @@ struct bch_alloc_v3 { __u8 data[]; } __attribute__((packed, aligned(8))); +struct bch_alloc_v4 { + struct bch_val v; + __u64 journal_seq; + __u32 flags; + __u8 gen; + __u8 oldest_gen; + __u8 data_type; + __u8 stripe_redundancy; + __u32 dirty_sectors; + __u32 cached_sectors; + __u64 io_time[2]; + __u32 stripe; + __u32 nr_external_backpointers; + struct bpos backpointers[0]; +} __attribute__((packed, aligned(8))); + +LE32_BITMASK(BCH_ALLOC_V3_NEED_DISCARD,struct bch_alloc_v3, flags, 0, 1) +LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2) + +BITMASK(BCH_ALLOC_V4_NEED_DISCARD, struct bch_alloc_v4, flags, 0, 1) +BITMASK(BCH_ALLOC_V4_NEED_INC_GEN, struct bch_alloc_v4, flags, 1, 2) +BITMASK(BCH_ALLOC_V4_BACKPOINTERS_START,struct bch_alloc_v4, flags, 2, 8) +BITMASK(BCH_ALLOC_V4_NR_BACKPOINTERS, struct bch_alloc_v4, flags, 8, 14) + enum { #define x(name, _bits) BCH_ALLOC_FIELD_V1_##name, BCH_ALLOC_FIELDS_V1() diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 8757218e571b..62774e4f6dbb 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -149,7 +149,8 @@ static unsigned bch2_key_types_allowed[] = { (1U << KEY_TYPE_deleted)| (1U << KEY_TYPE_alloc)| (1U << KEY_TYPE_alloc_v2)| - (1U << KEY_TYPE_alloc_v3), + (1U << KEY_TYPE_alloc_v3)| + (1U << KEY_TYPE_alloc_v4), [BKEY_TYPE_quotas] = (1U << KEY_TYPE_deleted)| (1U << KEY_TYPE_quota), diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 8ec9c43d98e1..c3d6c62ef062 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1306,6 +1306,19 @@ static int bch2_gc_start(struct bch_fs *c, return 0; } +/* returns true if not equal */ +static inline bool bch2_alloc_v4_cmp(struct bch_alloc_v4 l, + struct bch_alloc_v4 r) +{ + return l.gen != r.gen || + l.oldest_gen != r.oldest_gen || + l.data_type != r.data_type || + l.dirty_sectors != r.dirty_sectors || + l.cached_sectors != r.cached_sectors || + l.stripe_redundancy != r.stripe_redundancy || + l.stripe != r.stripe; +} + static int bch2_alloc_write_key(struct btree_trans *trans, struct btree_iter *iter, bool metadata_only) @@ -1314,8 +1327,8 @@ static int bch2_alloc_write_key(struct btree_trans *trans, struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode); struct bucket *g; struct bkey_s_c k; - struct bkey_alloc_unpacked old_u, new_u, gc_u; - struct bkey_alloc_buf *a; + struct bkey_i_alloc_v4 *a; + struct bch_alloc_v4 old, new, gc; int ret; k = bch2_btree_iter_peek_slot(iter); @@ -1323,60 +1336,61 @@ static int bch2_alloc_write_key(struct btree_trans *trans, if (ret) return ret; - old_u = new_u = bch2_alloc_unpack(k); + bch2_alloc_to_v4(k, &old); + new = old; percpu_down_read(&c->mark_lock); g = gc_bucket(ca, iter->pos.offset); - gc_u = (struct bkey_alloc_unpacked) { - .dev = iter->pos.inode, - .bucket = iter->pos.offset, + gc = (struct bch_alloc_v4) { .gen = g->mark.gen, .data_type = g->mark.data_type, .dirty_sectors = g->mark.dirty_sectors, .cached_sectors = g->mark.cached_sectors, - .read_time = g->io_time[READ], - .write_time = g->io_time[WRITE], + .io_time[READ] = g->io_time[READ], + .io_time[WRITE] = g->io_time[WRITE], .stripe = g->stripe, .stripe_redundancy = g->stripe_redundancy, }; percpu_up_read(&c->mark_lock); if (metadata_only && - gc_u.data_type != BCH_DATA_sb && - gc_u.data_type != BCH_DATA_journal && - gc_u.data_type != BCH_DATA_btree) + gc.data_type != BCH_DATA_sb && + gc.data_type != BCH_DATA_journal && + gc.data_type != BCH_DATA_btree) return 0; - if (gen_after(old_u.gen, gc_u.gen)) + if (gen_after(old.gen, gc.gen)) return 0; #define copy_bucket_field(_f) \ - if (fsck_err_on(new_u._f != gc_u._f, c, \ + if (fsck_err_on(new._f != gc._f, c, \ "bucket %llu:%llu gen %u data type %s has wrong " #_f \ ": got %u, should be %u", \ iter->pos.inode, iter->pos.offset, \ - new_u.gen, \ - bch2_data_types[new_u.data_type], \ - new_u._f, gc_u._f)) \ - new_u._f = gc_u._f; \ + new.gen, \ + bch2_data_types[new.data_type], \ + new._f, gc._f)) \ + new._f = gc._f; \ copy_bucket_field(gen); copy_bucket_field(data_type); - copy_bucket_field(stripe); copy_bucket_field(dirty_sectors); copy_bucket_field(cached_sectors); copy_bucket_field(stripe_redundancy); copy_bucket_field(stripe); #undef copy_bucket_field - if (!bkey_alloc_unpacked_cmp(old_u, new_u)) + if (!bch2_alloc_v4_cmp(old, new)) return 0; - a = bch2_alloc_pack(trans, new_u); - if (IS_ERR(a)) - return PTR_ERR(a); + a = bch2_alloc_to_v4_mut(trans, k); + ret = PTR_ERR_OR_ZERO(a); + if (ret) + return ret; + + a->v = new; - ret = bch2_trans_update(trans, iter, &a->k, BTREE_TRIGGER_NORUN); + ret = bch2_trans_update(trans, iter, &a->k_i, BTREE_TRIGGER_NORUN); fsck_err: return ret; } @@ -1873,7 +1887,8 @@ static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct btree_i { struct bch_dev *ca = bch_dev_bkey_exists(trans->c, iter->pos.inode); struct bkey_s_c k; - struct bkey_alloc_unpacked u; + struct bch_alloc_v4 a; + struct bkey_i_alloc_v4 *a_mut; int ret; k = bch2_btree_iter_peek_slot(iter); @@ -1881,14 +1896,19 @@ static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct btree_i if (ret) return ret; - u = bch2_alloc_unpack(k); + bch2_alloc_to_v4(k, &a); - if (u.oldest_gen == ca->oldest_gen[iter->pos.offset]) + if (a.oldest_gen == ca->oldest_gen[iter->pos.offset]) return 0; - u.oldest_gen = ca->oldest_gen[iter->pos.offset]; + a_mut = bch2_alloc_to_v4_mut(trans, k); + ret = PTR_ERR_OR_ZERO(a_mut); + if (ret) + return ret; + + a_mut->v.oldest_gen = ca->oldest_gen[iter->pos.offset]; - return bch2_alloc_write(trans, iter, &u, BTREE_TRIGGER_NORUN); + return bch2_trans_update(trans, iter, &a_mut->k_i, 0); } int bch2_gc_gens(struct bch_fs *c) diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index e848b153ae93..d5be6004071a 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -678,6 +678,7 @@ enum btree_update_flags { ((1U << KEY_TYPE_alloc)| \ (1U << KEY_TYPE_alloc_v2)| \ (1U << KEY_TYPE_alloc_v3)| \ + (1U << KEY_TYPE_alloc_v4)| \ (1U << KEY_TYPE_stripe)| \ (1U << KEY_TYPE_inode)| \ (1U << KEY_TYPE_inode_v2)| \ diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 8eeabb5a66bd..b4252c2f028a 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -508,15 +508,14 @@ int bch2_mark_alloc(struct btree_trans *trans, bool gc = flags & BTREE_TRIGGER_GC; u64 journal_seq = trans->journal_res.seq; struct bch_fs *c = trans->c; - struct bkey_alloc_unpacked old_u = bch2_alloc_unpack(old); - struct bkey_alloc_unpacked new_u = bch2_alloc_unpack(new); - struct bch_dev *ca = bch_dev_bkey_exists(c, new_u.dev); + struct bch_alloc_v4 old_a, new_a; + struct bch_dev *ca = bch_dev_bkey_exists(c, new.k->p.inode); struct bucket *g; struct bucket_mark old_m, m; int ret = 0; - if (bch2_trans_inconsistent_on(new_u.bucket < ca->mi.first_bucket || - new_u.bucket >= ca->mi.nbuckets, trans, + if (bch2_trans_inconsistent_on(new.k->p.offset < ca->mi.first_bucket || + new.k->p.offset >= ca->mi.nbuckets, trans, "alloc key outside range of device's buckets")) return -EIO; @@ -527,11 +526,13 @@ int bch2_mark_alloc(struct btree_trans *trans, !(flags & BTREE_TRIGGER_BUCKET_INVALIDATE)) return 0; + bch2_alloc_to_v4(old, &old_a); + bch2_alloc_to_v4(new, &new_a); + if ((flags & BTREE_TRIGGER_INSERT) && - !old_u.data_type != !new_u.data_type && - new.k->type == KEY_TYPE_alloc_v3) { - struct bch_alloc_v3 *v = (struct bch_alloc_v3 *) new.v; - u64 old_journal_seq = le64_to_cpu(v->journal_seq); + !old_a.data_type != !new_a.data_type && + new.k->type == KEY_TYPE_alloc_v4) { + struct bch_alloc_v4 *v = (struct bch_alloc_v4 *) new.v; BUG_ON(!journal_seq); @@ -540,18 +541,18 @@ int bch2_mark_alloc(struct btree_trans *trans, * before the bucket became empty again, then the we don't have * to wait on a journal flush before we can reuse the bucket: */ - new_u.journal_seq = !new_u.data_type && - (journal_seq == old_journal_seq || - bch2_journal_noflush_seq(&c->journal, old_journal_seq)) + new_a.journal_seq = !new_a.data_type && + (journal_seq == v->journal_seq || + bch2_journal_noflush_seq(&c->journal, v->journal_seq)) ? 0 : journal_seq; - v->journal_seq = cpu_to_le64(new_u.journal_seq); + v->journal_seq = new_a.journal_seq; } - if (old_u.data_type && !new_u.data_type && new_u.journal_seq) { + if (old_a.data_type && !new_a.data_type && new_a.journal_seq) { ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, c->journal.flushed_seq_ondisk, - new_u.dev, new_u.bucket, - new_u.journal_seq); + new.k->p.inode, new.k->p.offset, + new_a.journal_seq); if (ret) { bch2_fs_fatal_error(c, "error setting bucket_needs_journal_commit: %i", ret); @@ -560,27 +561,27 @@ int bch2_mark_alloc(struct btree_trans *trans, } percpu_down_read(&c->mark_lock); - if (!gc && new_u.gen != old_u.gen) - *bucket_gen(ca, new_u.bucket) = new_u.gen; + if (!gc && new_a.gen != old_a.gen) + *bucket_gen(ca, new.k->p.offset) = new_a.gen; - g = __bucket(ca, new_u.bucket, gc); + g = __bucket(ca, new.k->p.offset, gc); old_m = bucket_cmpxchg(g, m, ({ - m.gen = new_u.gen; - m.data_type = new_u.data_type; - m.dirty_sectors = new_u.dirty_sectors; - m.cached_sectors = new_u.cached_sectors; - m.stripe = new_u.stripe != 0; + m.gen = new_a.gen; + m.data_type = new_a.data_type; + m.dirty_sectors = new_a.dirty_sectors; + m.cached_sectors = new_a.cached_sectors; + m.stripe = new_a.stripe != 0; })); bch2_dev_usage_update(c, ca, old_m, m, journal_seq, gc); - g->io_time[READ] = new_u.read_time; - g->io_time[WRITE] = new_u.write_time; - g->oldest_gen = new_u.oldest_gen; + g->io_time[READ] = new_a.io_time[READ]; + g->io_time[WRITE] = new_a.io_time[WRITE]; + g->oldest_gen = new_a.oldest_gen; g->gen_valid = 1; - g->stripe = new_u.stripe; - g->stripe_redundancy = new_u.stripe_redundancy; + g->stripe = new_a.stripe; + g->stripe_redundancy = new_a.stripe_redundancy; percpu_up_read(&c->mark_lock); /* @@ -598,7 +599,7 @@ int bch2_mark_alloc(struct btree_trans *trans, return ret; } - trace_invalidate(ca, bucket_to_sector(ca, new_u.bucket), + trace_invalidate(ca, bucket_to_sector(ca, new.k->p.offset), old_m.cached_sectors); } @@ -1378,50 +1379,32 @@ need_mark: /* trans_mark: */ -static int bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter, - const struct bch_extent_ptr *ptr, - struct bkey_alloc_unpacked *u) -{ - struct bch_fs *c = trans->c; - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); - struct bkey_s_c k; - int ret; - - bch2_trans_iter_init(trans, iter, BTREE_ID_alloc, - POS(ptr->dev, PTR_BUCKET_NR(ca, ptr)), - BTREE_ITER_WITH_UPDATES| - BTREE_ITER_CACHED| - BTREE_ITER_INTENT); - k = bch2_btree_iter_peek_slot(iter); - ret = bkey_err(k); - if (ret) { - bch2_trans_iter_exit(trans, iter); - return ret; - } - - *u = bch2_alloc_unpack(k); - return 0; -} - static int bch2_trans_mark_pointer(struct btree_trans *trans, struct bkey_s_c k, struct extent_ptr_decoded p, s64 sectors, enum bch_data_type data_type) { struct btree_iter iter; - struct bkey_alloc_unpacked u; + struct bkey_i_alloc_v4 *a; + u16 dirty_sectors, cached_sectors; int ret; - ret = bch2_trans_start_alloc_update(trans, &iter, &p.ptr, &u); - if (ret) - return ret; + a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(trans->c, &p.ptr)); + if (IS_ERR(a)) + return PTR_ERR(a); + + dirty_sectors = a->v.dirty_sectors; + cached_sectors = a->v.cached_sectors; ret = __mark_pointer(trans, k, &p.ptr, sectors, data_type, - u.gen, &u.data_type, - &u.dirty_sectors, &u.cached_sectors); + a->v.gen, &a->v.data_type, + &dirty_sectors, &cached_sectors); if (ret) goto out; - ret = bch2_alloc_write(trans, &iter, &u, 0); + a->v.dirty_sectors = dirty_sectors; + a->v.cached_sectors = cached_sectors; + + ret = bch2_trans_update(trans, &iter, &a->k_i, 0); if (ret) goto out; out: @@ -1554,7 +1537,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, struct bch_fs *c = trans->c; const struct bch_extent_ptr *ptr = &s.v->ptrs[idx]; struct btree_iter iter; - struct bkey_alloc_unpacked u; + struct bkey_i_alloc_v4 *a; enum bch_data_type data_type = idx >= s.v->nr_blocks - s.v->nr_redundant ? BCH_DATA_parity : 0; s64 sectors = data_type ? le16_to_cpu(s.v->sectors) : 0; @@ -1563,59 +1546,59 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, if (deleting) sectors = -sectors; - ret = bch2_trans_start_alloc_update(trans, &iter, ptr, &u); - if (ret) - return ret; + a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(c, ptr)); + if (IS_ERR(a)) + return PTR_ERR(a); ret = check_bucket_ref(c, s.s_c, ptr, sectors, data_type, - u.gen, u.data_type, - u.dirty_sectors, u.cached_sectors); + a->v.gen, a->v.data_type, + a->v.dirty_sectors, a->v.cached_sectors); if (ret) goto err; if (!deleting) { - if (bch2_trans_inconsistent_on(u.stripe || - u.stripe_redundancy, trans, + if (bch2_trans_inconsistent_on(a->v.stripe || + a->v.stripe_redundancy, trans, "bucket %llu:%llu gen %u data type %s dirty_sectors %u: multiple stripes using same bucket (%u, %llu)", - iter.pos.inode, iter.pos.offset, u.gen, - bch2_data_types[u.data_type], - u.dirty_sectors, - u.stripe, s.k->p.offset)) { + iter.pos.inode, iter.pos.offset, a->v.gen, + bch2_data_types[a->v.data_type], + a->v.dirty_sectors, + a->v.stripe, s.k->p.offset)) { ret = -EIO; goto err; } - if (bch2_trans_inconsistent_on(data_type && u.dirty_sectors, trans, + if (bch2_trans_inconsistent_on(data_type && a->v.dirty_sectors, trans, "bucket %llu:%llu gen %u data type %s dirty_sectors %u: data already in stripe bucket %llu", - iter.pos.inode, iter.pos.offset, u.gen, - bch2_data_types[u.data_type], - u.dirty_sectors, + iter.pos.inode, iter.pos.offset, a->v.gen, + bch2_data_types[a->v.data_type], + a->v.dirty_sectors, s.k->p.offset)) { ret = -EIO; goto err; } - u.stripe = s.k->p.offset; - u.stripe_redundancy = s.v->nr_redundant; + a->v.stripe = s.k->p.offset; + a->v.stripe_redundancy = s.v->nr_redundant; } else { - if (bch2_trans_inconsistent_on(u.stripe != s.k->p.offset || - u.stripe_redundancy != s.v->nr_redundant, trans, + if (bch2_trans_inconsistent_on(a->v.stripe != s.k->p.offset || + a->v.stripe_redundancy != s.v->nr_redundant, trans, "bucket %llu:%llu gen %u: not marked as stripe when deleting stripe %llu (got %u)", - iter.pos.inode, iter.pos.offset, u.gen, - s.k->p.offset, u.stripe)) { + iter.pos.inode, iter.pos.offset, a->v.gen, + s.k->p.offset, a->v.stripe)) { ret = -EIO; goto err; } - u.stripe = 0; - u.stripe_redundancy = 0; + a->v.stripe = 0; + a->v.stripe_redundancy = 0; } - u.dirty_sectors += sectors; + a->v.dirty_sectors += sectors; if (data_type) - u.data_type = !deleting ? data_type : 0; + a->v.data_type = !deleting ? data_type : 0; - ret = bch2_alloc_write(trans, &iter, &u, 0); + ret = bch2_trans_update(trans, &iter, &a->k_i, 0); if (ret) goto err; err: @@ -1845,11 +1828,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct btree_iter iter; - struct bkey_alloc_unpacked u; - struct bch_extent_ptr ptr = { - .dev = ca->dev_idx, - .offset = bucket_to_sector(ca, b), - }; + struct bkey_i_alloc_v4 *a; int ret = 0; /* @@ -1858,26 +1837,27 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, if (b >= ca->mi.nbuckets) return 0; - ret = bch2_trans_start_alloc_update(trans, &iter, &ptr, &u); - if (ret) - return ret; + a = bch2_trans_start_alloc_update(trans, &iter, POS(ca->dev_idx, b)); + if (IS_ERR(a)) + return PTR_ERR(a); - if (u.data_type && u.data_type != type) { + if (a->v.data_type && a->v.data_type != type) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" "while marking %s", - iter.pos.inode, iter.pos.offset, u.gen, - bch2_data_types[u.data_type], + iter.pos.inode, iter.pos.offset, a->v.gen, + bch2_data_types[a->v.data_type], bch2_data_types[type], bch2_data_types[type]); ret = -EIO; goto out; } - u.data_type = type; - u.dirty_sectors = sectors; + a->v.data_type = type; + a->v.dirty_sectors = sectors; - ret = bch2_alloc_write(trans, &iter, &u, 0); + ret = bch2_trans_update(trans, &iter, &a->k_i, + BTREE_UPDATE_NO_KEY_CACHE_COHERENCY); if (ret) goto out; out: diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 90f53e677281..4937d7939c2b 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -97,6 +97,14 @@ static inline size_t PTR_BUCKET_NR(const struct bch_dev *ca, return sector_to_bucket(ca, ptr->offset); } +static inline struct bpos PTR_BUCKET_POS(const struct bch_fs *c, + const struct bch_extent_ptr *ptr) +{ + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + + return POS(ptr->dev, PTR_BUCKET_NR(ca, ptr)); +} + static inline struct bucket *PTR_GC_BUCKET(struct bch_dev *ca, const struct bch_extent_ptr *ptr) { diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h index 2c73dc60b838..4f7018398385 100644 --- a/fs/bcachefs/buckets_types.h +++ b/fs/bcachefs/buckets_types.h @@ -111,7 +111,7 @@ struct copygc_heap_entry { u8 dev; u8 gen; u8 replicas; - u16 fragmentation; + u32 fragmentation; u32 sectors; u64 offset; }; diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index a54a83d3247b..aecec55eb421 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -129,7 +129,7 @@ static int walk_buckets_to_copygc(struct bch_fs *c) struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; - struct bkey_alloc_unpacked u; + struct bch_alloc_v4 a; int ret; bch2_trans_init(&trans, c, 0, 0); @@ -139,20 +139,20 @@ static int walk_buckets_to_copygc(struct bch_fs *c) struct bch_dev *ca = bch_dev_bkey_exists(c, iter.pos.inode); struct copygc_heap_entry e; - u = bch2_alloc_unpack(k); + bch2_alloc_to_v4(k, &a); - if (u.data_type != BCH_DATA_user || - u.dirty_sectors >= ca->mi.bucket_size || + if (a.data_type != BCH_DATA_user || + a.dirty_sectors >= ca->mi.bucket_size || bch2_bucket_is_open(c, iter.pos.inode, iter.pos.offset)) continue; e = (struct copygc_heap_entry) { .dev = iter.pos.inode, - .gen = u.gen, - .replicas = 1 + u.stripe_redundancy, - .fragmentation = u.dirty_sectors * (1U << 15) - / ca->mi.bucket_size, - .sectors = u.dirty_sectors, + .gen = a.gen, + .replicas = 1 + a.stripe_redundancy, + .fragmentation = div_u64((u64) a.dirty_sectors * (1ULL << 31), + ca->mi.bucket_size), + .sectors = a.dirty_sectors, .offset = bucket_to_sector(ca, iter.pos.offset), }; heap_add_or_replace(h, e, -fragmentation_cmp, NULL); @@ -180,7 +180,7 @@ static int check_copygc_was_done(struct bch_fs *c, struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; - struct bkey_alloc_unpacked u; + struct bch_alloc_v4 a; struct copygc_heap_entry *i; int ret = 0; @@ -199,10 +199,10 @@ static int check_copygc_was_done(struct bch_fs *c, if (ret) break; - u = bch2_alloc_unpack(k); + bch2_alloc_to_v4(k, &a); - if (u.gen == i->gen && u.dirty_sectors) { - *sectors_not_moved += u.dirty_sectors; + if (a.gen == i->gen && a.dirty_sectors) { + *sectors_not_moved += a.dirty_sectors; *buckets_not_moved += 1; } } |