diff options
-rw-r--r-- | fs/bcachefs/bcachefs.h | 11 | ||||
-rw-r--r-- | fs/bcachefs/btree_gc.c | 202 | ||||
-rw-r--r-- | fs/bcachefs/buckets.c | 152 | ||||
-rw-r--r-- | fs/bcachefs/reflink.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/reflink.h | 24 |
5 files changed, 359 insertions, 32 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 6962b3ddf575..9bd60369703f 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -391,6 +391,14 @@ struct gc_pos { unsigned level; }; +struct reflink_gc { + u64 offset; + u32 size; + u32 refcount; +}; + +typedef GENRADIX(struct reflink_gc) reflink_gc_table; + struct io_count { u64 sectors[2][BCH_DATA_NR]; }; @@ -806,6 +814,9 @@ mempool_t bio_bounce_pages; /* REFLINK */ u64 reflink_hint; + reflink_gc_table reflink_gc_table; + size_t reflink_gc_nr; + size_t reflink_gc_idx; /* VFS IO PATH - fs-io.c */ struct bio_set writepage_bioset; diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 5b839cca8a9d..5a2acaba04c9 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -23,6 +23,7 @@ #include "keylist.h" #include "move.h" #include "recovery.h" +#include "reflink.h" #include "replicas.h" #include "super-io.h" #include "trace.h" @@ -1285,6 +1286,201 @@ static int bch2_gc_start(struct bch_fs *c, return 0; } +static int bch2_gc_reflink_done_initial_fn(struct bch_fs *c, struct bkey_s_c k) +{ + struct reflink_gc *r; + const __le64 *refcount = bkey_refcount_c(k); + char buf[200]; + int ret = 0; + + if (!refcount) + return 0; + + r = genradix_ptr(&c->reflink_gc_table, c->reflink_gc_idx++); + if (!r) + return -ENOMEM; + + if (!r || + r->offset != k.k->p.offset || + r->size != k.k->size) { + bch_err(c, "unexpected inconsistency walking reflink table at gc finish"); + return -EINVAL; + } + + if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c, + "reflink key has wrong refcount:\n" + " %s\n" + " should be %u", + (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf), + r->refcount)) { + struct bkey_i *new; + + new = kmalloc(bkey_bytes(k.k), GFP_KERNEL); + if (!new) { + ret = -ENOMEM; + goto fsck_err; + } + + bkey_reassemble(new, k); + + if (!r->refcount) { + new->k.type = KEY_TYPE_deleted; + new->k.size = 0; + } else { + *bkey_refcount(new) = cpu_to_le64(r->refcount); + } + + ret = bch2_journal_key_insert(c, BTREE_ID_reflink, 0, new); + if (ret) + kfree(new); + } +fsck_err: + return ret; +} + +static int bch2_gc_reflink_done(struct bch_fs *c, bool initial, + bool metadata_only) +{ + struct btree_trans trans; + struct btree_iter *iter; + struct bkey_s_c k; + struct reflink_gc *r; + size_t idx = 0; + char buf[200]; + int ret = 0; + + if (metadata_only) + return 0; + + if (initial) { + c->reflink_gc_idx = 0; + + ret = bch2_btree_and_journal_walk(c, BTREE_ID_reflink, + bch2_gc_reflink_done_initial_fn); + goto out; + } + + bch2_trans_init(&trans, c, 0, 0); + + for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN, + BTREE_ITER_PREFETCH, k, ret) { + const __le64 *refcount = bkey_refcount_c(k); + + if (!refcount) + continue; + + r = genradix_ptr(&c->reflink_gc_table, idx); + if (!r || + r->offset != k.k->p.offset || + r->size != k.k->size) { + bch_err(c, "unexpected inconsistency walking reflink table at gc finish"); + ret = -EINVAL; + break; + } + + if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c, + "reflink key has wrong refcount:\n" + " %s\n" + " should be %u", + (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf), + r->refcount)) { + struct bkey_i *new; + + new = kmalloc(bkey_bytes(k.k), GFP_KERNEL); + if (!new) { + ret = -ENOMEM; + break; + } + + bkey_reassemble(new, k); + + if (!r->refcount) + new->k.type = KEY_TYPE_deleted; + else + *bkey_refcount(new) = cpu_to_le64(r->refcount); + + ret = __bch2_trans_do(&trans, NULL, NULL, 0, + __bch2_btree_insert(&trans, BTREE_ID_reflink, new)); + kfree(new); + + if (ret) + break; + } + } +fsck_err: + bch2_trans_iter_put(&trans, iter); + bch2_trans_exit(&trans); +out: + genradix_free(&c->reflink_gc_table); + c->reflink_gc_nr = 0; + return ret; +} + +static int bch2_gc_reflink_start_initial_fn(struct bch_fs *c, struct bkey_s_c k) +{ + + struct reflink_gc *r; + const __le64 *refcount = bkey_refcount_c(k); + + if (!refcount) + return 0; + + r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++, + GFP_KERNEL); + if (!r) + return -ENOMEM; + + r->offset = k.k->p.offset; + r->size = k.k->size; + r->refcount = 0; + return 0; +} + +static int bch2_gc_reflink_start(struct bch_fs *c, bool initial, + bool metadata_only) +{ + struct btree_trans trans; + struct btree_iter *iter; + struct bkey_s_c k; + struct reflink_gc *r; + int ret; + + if (metadata_only) + return 0; + + genradix_free(&c->reflink_gc_table); + c->reflink_gc_nr = 0; + + if (initial) + return bch2_btree_and_journal_walk(c, BTREE_ID_reflink, + bch2_gc_reflink_start_initial_fn); + + bch2_trans_init(&trans, c, 0, 0); + + for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN, + BTREE_ITER_PREFETCH, k, ret) { + const __le64 *refcount = bkey_refcount_c(k); + + if (!refcount) + continue; + + r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++, + GFP_KERNEL); + if (!r) { + ret = -ENOMEM; + break; + } + + r->offset = k.k->p.offset; + r->size = k.k->size; + r->refcount = 0; + } + bch2_trans_iter_put(&trans, iter); + + bch2_trans_exit(&trans); + return 0; +} + /** * bch2_gc - walk _all_ references to buckets, and recompute them: * @@ -1319,7 +1515,8 @@ int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only) closure_wait_event(&c->btree_interior_update_wait, !bch2_btree_interior_updates_nr_pending(c)); again: - ret = bch2_gc_start(c, metadata_only); + ret = bch2_gc_start(c, metadata_only) ?: + bch2_gc_reflink_start(c, initial, metadata_only); if (ret) goto out; @@ -1381,7 +1578,8 @@ out: bch2_journal_block(&c->journal); percpu_down_write(&c->mark_lock); - ret = bch2_gc_done(c, initial, metadata_only); + ret = bch2_gc_reflink_done(c, initial, metadata_only) ?: + bch2_gc_done(c, initial, metadata_only); bch2_journal_unblock(&c->journal); } else { diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index b452ff003e6c..ba6b1e770dcf 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -14,6 +14,7 @@ #include "ec.h" #include "error.h" #include "movinggc.h" +#include "reflink.h" #include "replicas.h" #include "trace.h" @@ -1076,6 +1077,124 @@ static int bch2_mark_stripe(struct bch_fs *c, return 0; } +static int __reflink_p_frag_references(struct bkey_s_c_reflink_p p, + u64 p_start, u64 p_end, + u64 v_start, u64 v_end) +{ + if (p_start == p_end) + return false; + + p_start += le64_to_cpu(p.v->idx); + p_end += le64_to_cpu(p.v->idx); + + if (p_end <= v_start) + return false; + if (p_start >= v_end) + return false; + return true; +} + +static int reflink_p_frag_references(struct bkey_s_c_reflink_p p, + u64 start, u64 end, + struct bkey_s_c k) +{ + return __reflink_p_frag_references(p, start, end, + bkey_start_offset(k.k), + k.k->p.offset); +} + +static int __bch2_mark_reflink_p(struct bch_fs *c, + struct bkey_s_c_reflink_p p, + u64 idx, unsigned sectors, + unsigned front_frag, + unsigned back_frag, + unsigned flags, + size_t *r_idx) +{ + struct reflink_gc *r; + int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1; + int frags_referenced; + + while (1) { + if (*r_idx >= c->reflink_gc_nr) + goto not_found; + r = genradix_ptr(&c->reflink_gc_table, *r_idx); + BUG_ON(!r); + + if (r->offset > idx) + break; + (*r_idx)++; + } + + frags_referenced = + __reflink_p_frag_references(p, 0, front_frag, + r->offset - r->size, r->offset) + + __reflink_p_frag_references(p, back_frag, p.k->size, + r->offset - r->size, r->offset); + + if (frags_referenced == 2) { + BUG_ON(!(flags & BTREE_TRIGGER_OVERWRITE_SPLIT)); + add = -add; + } else if (frags_referenced == 1) { + BUG_ON(!(flags & BTREE_TRIGGER_OVERWRITE)); + add = 0; + } + + BUG_ON((s64) r->refcount + add < 0); + + r->refcount += add; + return min_t(u64, sectors, r->offset - idx); +not_found: + bch2_fs_inconsistent(c, + "%llu:%llu len %u points to nonexistent indirect extent %llu", + p.k->p.inode, p.k->p.offset, p.k->size, idx); + bch2_inconsistent_error(c); + return -EIO; +} + +static int bch2_mark_reflink_p(struct bch_fs *c, + struct bkey_s_c_reflink_p p, unsigned offset, + s64 sectors, unsigned flags) +{ + u64 idx = le64_to_cpu(p.v->idx) + offset; + struct reflink_gc *ref; + size_t l, r, m; + unsigned front_frag, back_frag; + s64 ret = 0; + + if (sectors < 0) + sectors = -sectors; + + BUG_ON(offset + sectors > p.k->size); + + front_frag = offset; + back_frag = offset + sectors; + + l = 0; + r = c->reflink_gc_nr; + while (l < r) { + m = l + (r - l) / 2; + + ref = genradix_ptr(&c->reflink_gc_table, m); + if (ref->offset <= idx) + l = m + 1; + else + r = m; + } + + while (sectors) { + ret = __bch2_mark_reflink_p(c, p, idx, sectors, + front_frag, back_frag, flags, &l); + if (ret < 0) + return ret; + + idx += ret; + sectors -= ret; + } + + return 0; +} + static int bch2_mark_key_locked(struct bch_fs *c, struct bkey_s_c old, struct bkey_s_c new, @@ -1131,6 +1250,10 @@ static int bch2_mark_key_locked(struct bch_fs *c, fs_usage->persistent_reserved[replicas - 1] += sectors; break; } + case KEY_TYPE_reflink_p: + ret = bch2_mark_reflink_p(c, bkey_s_c_to_reflink_p(k), + offset, sectors, flags); + break; } preempt_enable(); @@ -1693,35 +1816,6 @@ static int bch2_trans_mark_stripe(struct btree_trans *trans, return ret; } -static __le64 *bkey_refcount(struct bkey_i *k) -{ - switch (k->k.type) { - case KEY_TYPE_reflink_v: - return &bkey_i_to_reflink_v(k)->v.refcount; - case KEY_TYPE_indirect_inline_data: - return &bkey_i_to_indirect_inline_data(k)->v.refcount; - default: - return NULL; - } -} - -static bool reflink_p_frag_references(struct bkey_s_c_reflink_p p, - u64 start, u64 end, - struct bkey_s_c k) -{ - if (start == end) - return false; - - start += le64_to_cpu(p.v->idx); - end += le64_to_cpu(p.v->idx); - - if (end <= bkey_start_offset(k.k)) - return false; - if (start >= k.k->p.offset) - return false; - return true; -} - static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, struct bkey_s_c_reflink_p p, u64 idx, unsigned sectors, diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index c624fabe1e1c..e986b5284d37 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -151,7 +151,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, set_bkey_val_bytes(&r_v->k, sizeof(__le64) + bkey_val_bytes(&orig->k)); - refcount = (void *) &r_v->v; + refcount = bkey_refcount(r_v); *refcount = 0; memcpy(refcount + 1, &orig->v, bkey_val_bytes(&orig->k)); diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h index 9d5e7dc58f2b..bfc785619ee8 100644 --- a/fs/bcachefs/reflink.h +++ b/fs/bcachefs/reflink.h @@ -34,6 +34,30 @@ void bch2_indirect_inline_data_to_text(struct printbuf *, .val_to_text = bch2_indirect_inline_data_to_text, \ } +static inline const __le64 *bkey_refcount_c(struct bkey_s_c k) +{ + switch (k.k->type) { + case KEY_TYPE_reflink_v: + return &bkey_s_c_to_reflink_v(k).v->refcount; + case KEY_TYPE_indirect_inline_data: + return &bkey_s_c_to_indirect_inline_data(k).v->refcount; + default: + return NULL; + } +} + +static inline __le64 *bkey_refcount(struct bkey_i *k) +{ + switch (k->k.type) { + case KEY_TYPE_reflink_v: + return &bkey_i_to_reflink_v(k)->v.refcount; + case KEY_TYPE_indirect_inline_data: + return &bkey_i_to_indirect_inline_data(k)->v.refcount; + default: + return NULL; + } +} + s64 bch2_remap_range(struct bch_fs *, struct bpos, struct bpos, u64, u64 *, u64, s64 *); |