diff options
author | Kent Overstreet <kent.overstreet@linux.dev> | 2022-12-05 16:24:19 +0100 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 23:09:53 +0200 |
commit | 80c33085783656617d0d07e1bc9fba70a592ce5c (patch) | |
tree | 2f2a6b43a3b6caab092c4f74df9f5a582e1a60b0 /fs/bcachefs/movinggc.c | |
parent | bcachefs: Use btree write buffer for LRU btree (diff) | |
download | linux-80c33085783656617d0d07e1bc9fba70a592ce5c.tar.xz linux-80c33085783656617d0d07e1bc9fba70a592ce5c.zip |
bcachefs: Fragmentation LRU
Now that we have much more efficient updates to the LRU btree, this
patch adds a new LRU that indexes buckets by fragmentation.
This means copygc no longer has to scan every bucket to find buckets
that need to be evacuated.
Changes:
- A new field in bch_alloc_v4, fragmentation_lru - this corresponds to
the bucket's position in the fragmentation LRU. We add a new field
for this instead of calculating it as needed because we may make the
fragmentation LRU optional; this field indicates whether a bucket is
on the fragmentation LRU.
Also, zoned devices will introduce variable bucket sizes; explicitly
recording the LRU position will be safer for them.
- A new copygc path for using the fragmentation LRU instead of
scanning every bucket and building up an in-memory heap.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs/movinggc.c')
-rw-r--r-- | fs/bcachefs/movinggc.c | 171 |
1 files changed, 70 insertions, 101 deletions
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index b420b79edb36..74e57f6ea148 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -10,6 +10,7 @@ #include "alloc_foreground.h" #include "btree_iter.h" #include "btree_update.h" +#include "btree_write_buffer.h" #include "buckets.h" #include "clock.h" #include "disk_groups.h" @@ -19,6 +20,7 @@ #include "eytzinger.h" #include "io.h" #include "keylist.h" +#include "lru.h" #include "move.h" #include "movinggc.h" #include "super-io.h" @@ -31,138 +33,105 @@ #include <linux/sort.h> #include <linux/wait.h> -static inline int fragmentation_cmp(copygc_heap *heap, - struct copygc_heap_entry l, - struct copygc_heap_entry r) +static int bch2_bucket_is_movable(struct btree_trans *trans, + struct bpos bucket, u64 time, u8 *gen) { - return cmp_int(l.fragmentation, r.fragmentation); -} - -static int find_buckets_to_copygc(struct bch_fs *c) -{ - copygc_heap *h = &c->copygc_heap; - struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; + struct bch_alloc_v4 _a; + const struct bch_alloc_v4 *a; int ret; - bch2_trans_init(&trans, c, 0, 0); + if (bch2_bucket_is_open(trans->c, bucket.inode, bucket.offset)) + return 0; - /* - * Find buckets with lowest sector counts, skipping completely - * empty buckets, by building a maxheap sorted by sector count, - * and repeatedly replacing the maximum element until all - * buckets have been visited. - */ - h->used = 0; - - for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN, - BTREE_ITER_PREFETCH, k, ret) { - struct bch_dev *ca = bch_dev_bkey_exists(c, iter.pos.inode); - struct copygc_heap_entry e; - struct bch_alloc_v4 a_convert; - const struct bch_alloc_v4 *a; - - a = bch2_alloc_to_v4(k, &a_convert); - - if ((a->data_type != BCH_DATA_btree && - a->data_type != BCH_DATA_user) || - a->dirty_sectors >= ca->mi.bucket_size || - bch2_bucket_is_open(c, iter.pos.inode, iter.pos.offset)) - continue; + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, bucket, 0); + k = bch2_btree_iter_peek_slot(&iter); + ret = bkey_err(k); + bch2_trans_iter_exit(trans, &iter); + + if (ret) + return ret; - e = (struct copygc_heap_entry) { - .dev = iter.pos.inode, - .gen = a->gen, - .replicas = 1 + a->stripe_redundancy, - .fragmentation = div_u64((u64) a->dirty_sectors * (1ULL << 31), - ca->mi.bucket_size), - .sectors = a->dirty_sectors, - .bucket = iter.pos.offset, - }; - heap_add_or_replace(h, e, -fragmentation_cmp, NULL); + a = bch2_alloc_to_v4(k, &_a); + *gen = a->gen; + ret = (a->data_type == BCH_DATA_btree || + a->data_type == BCH_DATA_user) && + a->fragmentation_lru && + a->fragmentation_lru <= time; + if (ret) { + struct printbuf buf = PRINTBUF; + + bch2_bkey_val_to_text(&buf, trans->c, k); + pr_debug("%s", buf.buf); + printbuf_exit(&buf); } - bch2_trans_iter_exit(&trans, &iter); - bch2_trans_exit(&trans); return ret; } +static int bch2_copygc_next_bucket(struct btree_trans *trans, + struct bpos *bucket, u8 *gen, struct bpos *pos) +{ + struct btree_iter iter; + struct bkey_s_c k; + int ret; + + ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_lru, + bpos_max(*pos, lru_pos(BCH_LRU_FRAGMENTATION_START, 0, 0)), + lru_pos(BCH_LRU_FRAGMENTATION_START, U64_MAX, LRU_TIME_MAX), + 0, k, ({ + *bucket = u64_to_bucket(k.k->p.offset); + + bch2_bucket_is_movable(trans, *bucket, lru_pos_time(k.k->p), gen); + })); + + *pos = iter.pos; + if (ret < 0) + return ret; + return ret ? 0 : -ENOENT; +} + static int bch2_copygc(struct bch_fs *c) { - copygc_heap *h = &c->copygc_heap; - struct copygc_heap_entry e; struct bch_move_stats move_stats; - struct bch_dev *ca; - unsigned dev_idx; - size_t heap_size = 0; + struct btree_trans trans; struct moving_context ctxt; struct data_update_opts data_opts = { .btree_insert_flags = BTREE_INSERT_USE_RESERVE|JOURNAL_WATERMARK_copygc, }; + struct bpos bucket; + struct bpos pos; + u8 gen = 0; + unsigned nr_evacuated; int ret = 0; bch2_move_stats_init(&move_stats, "copygc"); - - for_each_rw_member(ca, c, dev_idx) - heap_size += ca->mi.nbuckets >> 7; - - if (h->size < heap_size) { - free_heap(&c->copygc_heap); - if (!init_heap(&c->copygc_heap, heap_size, GFP_KERNEL)) { - bch_err(c, "error allocating copygc heap"); - return 0; - } - } - - ret = find_buckets_to_copygc(c); - if (ret) { - bch2_fs_fatal_error(c, "error walking buckets to copygc!"); - return ret; - } - - if (!h->used) { - s64 wait = S64_MAX, dev_wait; - u64 dev_min_wait_fragmented = 0; - u64 dev_min_wait_allowed = 0; - int dev_min_wait = -1; - - for_each_rw_member(ca, c, dev_idx) { - struct bch_dev_usage usage = bch2_dev_usage_read(ca); - s64 allowed = ((__dev_buckets_available(ca, usage, RESERVE_none) * - ca->mi.bucket_size) >> 1); - s64 fragmented = usage.d[BCH_DATA_user].fragmented; - - dev_wait = max(0LL, allowed - fragmented); - - if (dev_min_wait < 0 || dev_wait < wait) { - dev_min_wait = dev_idx; - dev_min_wait_fragmented = fragmented; - dev_min_wait_allowed = allowed; - } - } - - bch_err_ratelimited(c, "copygc requested to run but found no buckets to move! dev %u fragmented %llu allowed %llu", - dev_min_wait, dev_min_wait_fragmented, dev_min_wait_allowed); - return 0; - } - - heap_resort(h, fragmentation_cmp, NULL); - bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats, writepoint_ptr(&c->copygc_write_point), false); + bch2_trans_init(&trans, c, 0, 0); + + ret = bch2_btree_write_buffer_flush(&trans); + BUG_ON(ret); - /* not correct w.r.t. device removal */ - while (h->used && !ret) { - BUG_ON(!heap_pop(h, e, -fragmentation_cmp, NULL)); - ret = __bch2_evacuate_bucket(&ctxt, POS(e.dev, e.bucket), e.gen, - data_opts); + for (nr_evacuated = 0, pos = POS_MIN; + nr_evacuated < 32 && !ret; + nr_evacuated++, pos = bpos_nosnap_successor(pos)) { + ret = bch2_copygc_next_bucket(&trans, &bucket, &gen, &pos) ?: + __bch2_evacuate_bucket(&trans, &ctxt, bucket, gen, data_opts); + if (bkey_eq(pos, POS_MAX)) + break; } + bch2_trans_exit(&trans); bch2_moving_ctxt_exit(&ctxt); + /* no entries in LRU btree found, or got to end: */ + if (ret == -ENOENT) + ret = 0; + if (ret < 0 && !bch2_err_matches(ret, EROFS)) bch_err(c, "error from bch2_move_data() in copygc: %s", bch2_err_str(ret)); |