diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2019-11-15 21:52:28 +0100 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 23:08:32 +0200 |
commit | 08c07fea7b437f0a841f85cc9b670b60dacd85cf (patch) | |
tree | 278d64703656939e47e339e1cf1846b8df659c34 /fs/bcachefs | |
parent | bcachefs: Rework of cut_front & cut_back (diff) | |
download | linux-08c07fea7b437f0a841f85cc9b670b60dacd85cf.tar.xz linux-08c07fea7b437f0a841f85cc9b670b60dacd85cf.zip |
bcachefs: Split out extent_update.c
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs')
-rw-r--r-- | fs/bcachefs/Makefile | 1 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_leaf.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/extent_update.c | 532 | ||||
-rw-r--r-- | fs/bcachefs/extent_update.h | 18 | ||||
-rw-r--r-- | fs/bcachefs/extents.c | 525 | ||||
-rw-r--r-- | fs/bcachefs/extents.h | 11 | ||||
-rw-r--r-- | fs/bcachefs/fs-io.c | 1 | ||||
-rw-r--r-- | fs/bcachefs/io.c | 2 |
8 files changed, 555 insertions, 537 deletions
diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile index 9d120936703a..6d5ad877fb07 100644 --- a/fs/bcachefs/Makefile +++ b/fs/bcachefs/Makefile @@ -26,6 +26,7 @@ bcachefs-y := \ ec.o \ error.o \ extents.o \ + extent_update.o \ fs.o \ fs-common.o \ fs-ioctl.o \ diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index a774fce027c2..6e5405f0b372 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -10,7 +10,7 @@ #include "buckets.h" #include "debug.h" #include "error.h" -#include "extents.h" +#include "extent_update.h" #include "journal.h" #include "journal_reclaim.h" #include "keylist.h" diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c new file mode 100644 index 000000000000..21426e01c395 --- /dev/null +++ b/fs/bcachefs/extent_update.c @@ -0,0 +1,532 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bcachefs.h" +#include "bkey_on_stack.h" +#include "btree_update.h" +#include "btree_update_interior.h" +#include "buckets.h" +#include "debug.h" +#include "extents.h" +#include "extent_update.h" + +/* + * This counts the number of iterators to the alloc & ec btrees we'll need + * inserting/removing this extent: + */ +static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + unsigned ret = 0; + + bkey_extent_entry_for_each(ptrs, entry) { + switch (__extent_entry_type(entry)) { + case BCH_EXTENT_ENTRY_ptr: + case BCH_EXTENT_ENTRY_stripe_ptr: + ret++; + } + } + + return ret; +} + +static int count_iters_for_insert(struct btree_trans *trans, + struct bkey_s_c k, + unsigned offset, + struct bpos *end, + unsigned *nr_iters, + unsigned max_iters, + bool overwrite) +{ + int ret = 0; + + switch (k.k->type) { + case KEY_TYPE_extent: + case KEY_TYPE_reflink_v: + *nr_iters += bch2_bkey_nr_alloc_ptrs(k); + + if (*nr_iters >= max_iters) { + *end = bpos_min(*end, k.k->p); + ret = 1; + } + + break; + case KEY_TYPE_reflink_p: { + struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); + u64 idx = le64_to_cpu(p.v->idx); + unsigned sectors = bpos_min(*end, p.k->p).offset - + bkey_start_offset(p.k); + struct btree_iter *iter; + struct bkey_s_c r_k; + + for_each_btree_key(trans, iter, + BTREE_ID_REFLINK, POS(0, idx + offset), + BTREE_ITER_SLOTS, r_k, ret) { + if (bkey_cmp(bkey_start_pos(r_k.k), + POS(0, idx + sectors)) >= 0) + break; + + *nr_iters += 1 + bch2_bkey_nr_alloc_ptrs(r_k); + + if (*nr_iters >= max_iters) { + struct bpos pos = bkey_start_pos(k.k); + pos.offset += r_k.k->p.offset - idx; + + *end = bpos_min(*end, pos); + ret = 1; + break; + } + } + + bch2_trans_iter_put(trans, iter); + break; + } + } + + return ret; +} + +#define EXTENT_ITERS_MAX (BTREE_ITER_MAX / 3) + +int bch2_extent_atomic_end(struct btree_iter *iter, + struct bkey_i *insert, + struct bpos *end) +{ + struct btree_trans *trans = iter->trans; + struct btree *b; + struct btree_node_iter node_iter; + struct bkey_packed *_k; + unsigned nr_iters = 0; + int ret; + + ret = bch2_btree_iter_traverse(iter); + if (ret) + return ret; + + b = iter->l[0].b; + node_iter = iter->l[0].iter; + + BUG_ON(bkey_cmp(bkey_start_pos(&insert->k), b->data->min_key) < 0); + + *end = bpos_min(insert->k.p, b->key.k.p); + + ret = count_iters_for_insert(trans, bkey_i_to_s_c(insert), 0, end, + &nr_iters, EXTENT_ITERS_MAX / 2, false); + if (ret < 0) + return ret; + + while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b, + KEY_TYPE_discard))) { + struct bkey unpacked; + struct bkey_s_c k = bkey_disassemble(b, _k, &unpacked); + unsigned offset = 0; + + if (bkey_cmp(bkey_start_pos(k.k), *end) >= 0) + break; + + if (bkey_cmp(bkey_start_pos(&insert->k), + bkey_start_pos(k.k)) > 0) + offset = bkey_start_offset(&insert->k) - + bkey_start_offset(k.k); + + ret = count_iters_for_insert(trans, k, offset, end, + &nr_iters, EXTENT_ITERS_MAX, true); + if (ret) + break; + + bch2_btree_node_iter_advance(&node_iter, b); + } + + return ret < 0 ? ret : 0; +} + +int bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter) +{ + struct bpos end; + int ret; + + ret = bch2_extent_atomic_end(iter, k, &end); + if (ret) + return ret; + + bch2_cut_back(end, k); + return 0; +} + +int bch2_extent_is_atomic(struct bkey_i *k, struct btree_iter *iter) +{ + struct bpos end; + int ret; + + ret = bch2_extent_atomic_end(iter, k, &end); + if (ret) + return ret; + + return !bkey_cmp(end, k->k.p); +} + +enum btree_insert_ret +bch2_extent_can_insert(struct btree_trans *trans, + struct btree_insert_entry *insert, + unsigned *u64s) +{ + struct btree_iter_level *l = &insert->iter->l[0]; + struct btree_node_iter node_iter = l->iter; + enum bch_extent_overlap overlap; + struct bkey_packed *_k; + struct bkey unpacked; + struct bkey_s_c k; + int sectors; + + /* + * We avoid creating whiteouts whenever possible when deleting, but + * those optimizations mean we may potentially insert two whiteouts + * instead of one (when we overlap with the front of one extent and the + * back of another): + */ + if (bkey_whiteout(&insert->k->k)) + *u64s += BKEY_U64s; + + _k = bch2_btree_node_iter_peek_filter(&node_iter, l->b, + KEY_TYPE_discard); + if (!_k) + return BTREE_INSERT_OK; + + k = bkey_disassemble(l->b, _k, &unpacked); + + overlap = bch2_extent_overlap(&insert->k->k, k.k); + + /* account for having to split existing extent: */ + if (overlap == BCH_EXTENT_OVERLAP_MIDDLE) + *u64s += _k->u64s; + + if (overlap == BCH_EXTENT_OVERLAP_MIDDLE && + (sectors = bch2_extent_is_compressed(k))) { + int flags = trans->flags & BTREE_INSERT_NOFAIL + ? BCH_DISK_RESERVATION_NOFAIL : 0; + + switch (bch2_disk_reservation_add(trans->c, + trans->disk_res, + sectors, flags)) { + case 0: + break; + case -ENOSPC: + return BTREE_INSERT_ENOSPC; + default: + BUG(); + } + } + + return BTREE_INSERT_OK; +} + +static void verify_extent_nonoverlapping(struct bch_fs *c, + struct btree *b, + struct btree_node_iter *_iter, + struct bkey_i *insert) +{ +#ifdef CONFIG_BCACHEFS_DEBUG + struct btree_node_iter iter; + struct bkey_packed *k; + struct bkey uk; + + if (!expensive_debug_checks(c)) + return; + + iter = *_iter; + k = bch2_btree_node_iter_prev_filter(&iter, b, KEY_TYPE_discard); + BUG_ON(k && + (uk = bkey_unpack_key(b, k), + bkey_cmp(uk.p, bkey_start_pos(&insert->k)) > 0)); + + iter = *_iter; + k = bch2_btree_node_iter_peek_filter(&iter, b, KEY_TYPE_discard); +#if 0 + BUG_ON(k && + (uk = bkey_unpack_key(b, k), + bkey_cmp(insert->k.p, bkey_start_pos(&uk))) > 0); +#else + if (k && + (uk = bkey_unpack_key(b, k), + bkey_cmp(insert->k.p, bkey_start_pos(&uk))) > 0) { + char buf1[100]; + char buf2[100]; + + bch2_bkey_to_text(&PBUF(buf1), &insert->k); + bch2_bkey_to_text(&PBUF(buf2), &uk); + + bch2_dump_btree_node(b); + panic("insert > next :\n" + "insert %s\n" + "next %s\n", + buf1, buf2); + } +#endif + +#endif +} + +static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter, + struct bkey_i *insert) +{ + struct btree_iter_level *l = &iter->l[0]; + struct bkey_packed *k = + bch2_btree_node_iter_bset_pos(&l->iter, l->b, bset_tree_last(l->b)); + + BUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(c, l->b)); + + EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size); + verify_extent_nonoverlapping(c, l->b, &l->iter, insert); + + if (debug_check_bkeys(c)) + bch2_bkey_debugcheck(c, l->b, bkey_i_to_s_c(insert)); + + bch2_bset_insert(l->b, &l->iter, k, insert, 0); + bch2_btree_node_iter_fix(iter, l->b, &l->iter, k, 0, k->u64s); +} + +static void +extent_squash(struct bch_fs *c, struct btree_iter *iter, + struct bkey_i *insert, + struct bkey_packed *_k, struct bkey_s k, + enum bch_extent_overlap overlap) +{ + struct btree_iter_level *l = &iter->l[0]; + int u64s_delta; + + switch (overlap) { + case BCH_EXTENT_OVERLAP_FRONT: + /* insert overlaps with start of k: */ + u64s_delta = bch2_cut_front_s(insert->k.p, k); + btree_keys_account_val_delta(l->b, _k, u64s_delta); + + EBUG_ON(bkey_deleted(k.k)); + extent_save(l->b, _k, k.k); + bch2_btree_iter_fix_key_modified(iter, l->b, _k); + break; + + case BCH_EXTENT_OVERLAP_BACK: + /* insert overlaps with end of k: */ + u64s_delta = bch2_cut_back_s(bkey_start_pos(&insert->k), k); + btree_keys_account_val_delta(l->b, _k, u64s_delta); + + EBUG_ON(bkey_deleted(k.k)); + extent_save(l->b, _k, k.k); + + /* + * As the auxiliary tree is indexed by the end of the + * key and we've just changed the end, update the + * auxiliary tree. + */ + bch2_bset_fix_invalidated_key(l->b, _k); + bch2_btree_node_iter_fix(iter, l->b, &l->iter, + _k, _k->u64s, _k->u64s); + break; + + case BCH_EXTENT_OVERLAP_ALL: { + /* The insert key completely covers k, invalidate k */ + if (!bkey_whiteout(k.k)) + btree_account_key_drop(l->b, _k); + + k.k->size = 0; + k.k->type = KEY_TYPE_deleted; + + if (_k >= btree_bset_last(l->b)->start) { + unsigned u64s = _k->u64s; + + bch2_bset_delete(l->b, _k, _k->u64s); + bch2_btree_node_iter_fix(iter, l->b, &l->iter, + _k, u64s, 0); + } else { + extent_save(l->b, _k, k.k); + bch2_btree_iter_fix_key_modified(iter, l->b, _k); + } + + break; + } + case BCH_EXTENT_OVERLAP_MIDDLE: { + struct bkey_on_stack split; + + bkey_on_stack_init(&split); + bkey_on_stack_realloc(&split, c, k.k->u64s); + + /* + * The insert key falls 'in the middle' of k + * The insert key splits k in 3: + * - start only in k, preserve + * - middle common section, invalidate in k + * - end only in k, preserve + * + * We update the old key to preserve the start, + * insert will be the new common section, + * we manually insert the end that we are preserving. + * + * modify k _before_ doing the insert (which will move + * what k points to) + */ + bkey_reassemble(split.k, k.s_c); + split.k->k.needs_whiteout |= bkey_written(l->b, _k); + + bch2_cut_back(bkey_start_pos(&insert->k), split.k); + BUG_ON(bkey_deleted(&split.k->k)); + + u64s_delta = bch2_cut_front_s(insert->k.p, k); + btree_keys_account_val_delta(l->b, _k, u64s_delta); + + BUG_ON(bkey_deleted(k.k)); + extent_save(l->b, _k, k.k); + bch2_btree_iter_fix_key_modified(iter, l->b, _k); + + extent_bset_insert(c, iter, split.k); + bkey_on_stack_exit(&split, c); + break; + } + } +} + +/** + * bch_extent_insert_fixup - insert a new extent and deal with overlaps + * + * this may result in not actually doing the insert, or inserting some subset + * of the insert key. For cmpxchg operations this is where that logic lives. + * + * All subsets of @insert that need to be inserted are inserted using + * bch2_btree_insert_and_journal(). If @b or @res fills up, this function + * returns false, setting @iter->pos for the prefix of @insert that actually got + * inserted. + * + * BSET INVARIANTS: this function is responsible for maintaining all the + * invariants for bsets of extents in memory. things get really hairy with 0 + * size extents + * + * within one bset: + * + * bkey_start_pos(bkey_next(k)) >= k + * or bkey_start_offset(bkey_next(k)) >= k->offset + * + * i.e. strict ordering, no overlapping extents. + * + * multiple bsets (i.e. full btree node): + * + * ∀ k, j + * k.size != 0 ∧ j.size != 0 → + * ¬ (k > bkey_start_pos(j) ∧ k < j) + * + * i.e. no two overlapping keys _of nonzero size_ + * + * We can't realistically maintain this invariant for zero size keys because of + * the key merging done in bch2_btree_insert_key() - for two mergeable keys k, j + * there may be another 0 size key between them in another bset, and it will + * thus overlap with the merged key. + * + * In addition, the end of iter->pos indicates how much has been processed. + * If the end of iter->pos is not the same as the end of insert, then + * key insertion needs to continue/be retried. + */ +void bch2_insert_fixup_extent(struct btree_trans *trans, + struct btree_insert_entry *insert_entry) +{ + struct bch_fs *c = trans->c; + struct btree_iter *iter = insert_entry->iter; + struct bkey_i *insert = insert_entry->k; + struct btree_iter_level *l = &iter->l[0]; + struct btree_node_iter node_iter = l->iter; + bool deleting = bkey_whiteout(&insert->k); + bool update_journal = !deleting; + bool update_btree = !deleting; + struct bkey_i whiteout = *insert; + struct bkey_packed *_k; + struct bkey unpacked; + + EBUG_ON(iter->level); + EBUG_ON(!insert->k.size); + EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k))); + + while ((_k = bch2_btree_node_iter_peek_filter(&l->iter, l->b, + KEY_TYPE_discard))) { + struct bkey_s k = __bkey_disassemble(l->b, _k, &unpacked); + struct bpos cur_end = bpos_min(insert->k.p, k.k->p); + enum bch_extent_overlap overlap = + bch2_extent_overlap(&insert->k, k.k); + + if (bkey_cmp(bkey_start_pos(k.k), insert->k.p) >= 0) + break; + + if (!bkey_whiteout(k.k)) + update_journal = true; + + if (!update_journal) { + bch2_cut_front(cur_end, insert); + bch2_cut_front(cur_end, &whiteout); + bch2_btree_iter_set_pos_same_leaf(iter, cur_end); + goto next; + } + + /* + * When deleting, if possible just do it by switching the type + * of the key we're deleting, instead of creating and inserting + * a new whiteout: + */ + if (deleting && + !update_btree && + !bkey_cmp(insert->k.p, k.k->p) && + !bkey_cmp(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) { + if (!bkey_whiteout(k.k)) { + btree_account_key_drop(l->b, _k); + _k->type = KEY_TYPE_discard; + reserve_whiteout(l->b, _k); + bch2_btree_iter_fix_key_modified(iter, + l->b, _k); + } + break; + } + + if (k.k->needs_whiteout || bkey_written(l->b, _k)) { + insert->k.needs_whiteout = true; + update_btree = true; + } + + if (update_btree && + overlap == BCH_EXTENT_OVERLAP_ALL && + bkey_whiteout(k.k) && + k.k->needs_whiteout) { + unreserve_whiteout(l->b, _k); + _k->needs_whiteout = false; + } + + extent_squash(c, iter, insert, _k, k, overlap); + + if (!update_btree) + bch2_cut_front(cur_end, insert); +next: + node_iter = l->iter; + + if (overlap == BCH_EXTENT_OVERLAP_FRONT || + overlap == BCH_EXTENT_OVERLAP_MIDDLE) + break; + } + + l->iter = node_iter; + bch2_btree_iter_set_pos_same_leaf(iter, insert->k.p); + + if (update_btree) { + if (deleting) + insert->k.type = KEY_TYPE_discard; + + EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size); + + extent_bset_insert(c, iter, insert); + } + + if (update_journal) { + struct bkey_i *k = !deleting ? insert : &whiteout; + + if (deleting) + k->k.type = KEY_TYPE_discard; + + EBUG_ON(bkey_deleted(&k->k) || !k->k.size); + + bch2_btree_journal_key(trans, iter, k); + } + + bch2_cut_front(insert->k.p, insert); +} diff --git a/fs/bcachefs/extent_update.h b/fs/bcachefs/extent_update.h new file mode 100644 index 000000000000..89d18e4b6758 --- /dev/null +++ b/fs/bcachefs/extent_update.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_EXTENT_UPDATE_H +#define _BCACHEFS_EXTENT_UPDATE_H + +#include "bcachefs.h" + +int bch2_extent_atomic_end(struct btree_iter *, struct bkey_i *, + struct bpos *); +int bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *); +int bch2_extent_is_atomic(struct bkey_i *, struct btree_iter *); + +enum btree_insert_ret +bch2_extent_can_insert(struct btree_trans *, struct btree_insert_entry *, + unsigned *); +void bch2_insert_fixup_extent(struct btree_trans *, + struct btree_insert_entry *); + +#endif /* _BCACHEFS_EXTENT_UPDATE_H */ diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 6c1cc90ab320..8f511760102a 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -8,14 +8,11 @@ #include "bcachefs.h" #include "bkey_methods.h" -#include "bkey_on_stack.h" #include "btree_gc.h" -#include "btree_update.h" -#include "btree_update_interior.h" +#include "btree_iter.h" #include "buckets.h" #include "checksum.h" #include "debug.h" -#include "dirent.h" #include "disk_groups.h" #include "error.h" #include "extents.h" @@ -26,7 +23,6 @@ #include "super-io.h" #include "trace.h" #include "util.h" -#include "xattr.h" unsigned bch2_bkey_nr_ptrs(struct bkey_s_c k) { @@ -826,525 +822,6 @@ int bch2_cut_back_s(struct bpos where, struct bkey_s k) return -val_u64s_delta; } -static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k) -{ - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const union bch_extent_entry *entry; - unsigned ret = 0; - - bkey_extent_entry_for_each(ptrs, entry) { - switch (__extent_entry_type(entry)) { - case BCH_EXTENT_ENTRY_ptr: - case BCH_EXTENT_ENTRY_stripe_ptr: - ret++; - } - } - - return ret; -} - -static int count_iters_for_insert(struct btree_trans *trans, - struct bkey_s_c k, - unsigned offset, - struct bpos *end, - unsigned *nr_iters, - unsigned max_iters, - bool overwrite) -{ - int ret = 0; - - switch (k.k->type) { - case KEY_TYPE_extent: - case KEY_TYPE_reflink_v: - *nr_iters += bch2_bkey_nr_alloc_ptrs(k); - - if (*nr_iters >= max_iters) { - *end = bpos_min(*end, k.k->p); - ret = 1; - } - - break; - case KEY_TYPE_reflink_p: { - struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); - u64 idx = le64_to_cpu(p.v->idx); - unsigned sectors = bpos_min(*end, p.k->p).offset - - bkey_start_offset(p.k); - struct btree_iter *iter; - struct bkey_s_c r_k; - - for_each_btree_key(trans, iter, - BTREE_ID_REFLINK, POS(0, idx + offset), - BTREE_ITER_SLOTS, r_k, ret) { - if (bkey_cmp(bkey_start_pos(r_k.k), - POS(0, idx + sectors)) >= 0) - break; - - *nr_iters += 1 + bch2_bkey_nr_alloc_ptrs(r_k); - - if (*nr_iters >= max_iters) { - struct bpos pos = bkey_start_pos(k.k); - pos.offset += r_k.k->p.offset - idx; - - *end = bpos_min(*end, pos); - ret = 1; - break; - } - } - - bch2_trans_iter_put(trans, iter); - break; - } - } - - return ret; -} - -#define EXTENT_ITERS_MAX (BTREE_ITER_MAX / 3) - -int bch2_extent_atomic_end(struct btree_iter *iter, - struct bkey_i *insert, - struct bpos *end) -{ - struct btree_trans *trans = iter->trans; - struct btree *b; - struct btree_node_iter node_iter; - struct bkey_packed *_k; - unsigned nr_iters = 0; - int ret; - - ret = bch2_btree_iter_traverse(iter); - if (ret) - return ret; - - b = iter->l[0].b; - node_iter = iter->l[0].iter; - - BUG_ON(bkey_cmp(bkey_start_pos(&insert->k), b->data->min_key) < 0); - - *end = bpos_min(insert->k.p, b->key.k.p); - - ret = count_iters_for_insert(trans, bkey_i_to_s_c(insert), 0, end, - &nr_iters, EXTENT_ITERS_MAX / 2, false); - if (ret < 0) - return ret; - - while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b, - KEY_TYPE_discard))) { - struct bkey unpacked; - struct bkey_s_c k = bkey_disassemble(b, _k, &unpacked); - unsigned offset = 0; - - if (bkey_cmp(bkey_start_pos(k.k), *end) >= 0) - break; - - if (bkey_cmp(bkey_start_pos(&insert->k), - bkey_start_pos(k.k)) > 0) - offset = bkey_start_offset(&insert->k) - - bkey_start_offset(k.k); - - ret = count_iters_for_insert(trans, k, offset, end, - &nr_iters, EXTENT_ITERS_MAX, true); - if (ret) - break; - - bch2_btree_node_iter_advance(&node_iter, b); - } - - return ret < 0 ? ret : 0; -} - -int bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter) -{ - struct bpos end; - int ret; - - ret = bch2_extent_atomic_end(iter, k, &end); - if (ret) - return ret; - - bch2_cut_back(end, k); - return 0; -} - -int bch2_extent_is_atomic(struct bkey_i *k, struct btree_iter *iter) -{ - struct bpos end; - int ret; - - ret = bch2_extent_atomic_end(iter, k, &end); - if (ret) - return ret; - - return !bkey_cmp(end, k->k.p); -} - -enum btree_insert_ret -bch2_extent_can_insert(struct btree_trans *trans, - struct btree_insert_entry *insert, - unsigned *u64s) -{ - struct btree_iter_level *l = &insert->iter->l[0]; - struct btree_node_iter node_iter = l->iter; - enum bch_extent_overlap overlap; - struct bkey_packed *_k; - struct bkey unpacked; - struct bkey_s_c k; - int sectors; - - /* - * We avoid creating whiteouts whenever possible when deleting, but - * those optimizations mean we may potentially insert two whiteouts - * instead of one (when we overlap with the front of one extent and the - * back of another): - */ - if (bkey_whiteout(&insert->k->k)) - *u64s += BKEY_U64s; - - _k = bch2_btree_node_iter_peek_filter(&node_iter, l->b, - KEY_TYPE_discard); - if (!_k) - return BTREE_INSERT_OK; - - k = bkey_disassemble(l->b, _k, &unpacked); - - overlap = bch2_extent_overlap(&insert->k->k, k.k); - - /* account for having to split existing extent: */ - if (overlap == BCH_EXTENT_OVERLAP_MIDDLE) - *u64s += _k->u64s; - - if (overlap == BCH_EXTENT_OVERLAP_MIDDLE && - (sectors = bch2_extent_is_compressed(k))) { - int flags = trans->flags & BTREE_INSERT_NOFAIL - ? BCH_DISK_RESERVATION_NOFAIL : 0; - - switch (bch2_disk_reservation_add(trans->c, - trans->disk_res, - sectors, flags)) { - case 0: - break; - case -ENOSPC: - return BTREE_INSERT_ENOSPC; - default: - BUG(); - } - } - - return BTREE_INSERT_OK; -} - -static void verify_extent_nonoverlapping(struct bch_fs *c, - struct btree *b, - struct btree_node_iter *_iter, - struct bkey_i *insert) -{ -#ifdef CONFIG_BCACHEFS_DEBUG - struct btree_node_iter iter; - struct bkey_packed *k; - struct bkey uk; - - if (!expensive_debug_checks(c)) - return; - - iter = *_iter; - k = bch2_btree_node_iter_prev_filter(&iter, b, KEY_TYPE_discard); - BUG_ON(k && - (uk = bkey_unpack_key(b, k), - bkey_cmp(uk.p, bkey_start_pos(&insert->k)) > 0)); - - iter = *_iter; - k = bch2_btree_node_iter_peek_filter(&iter, b, KEY_TYPE_discard); -#if 0 - BUG_ON(k && - (uk = bkey_unpack_key(b, k), - bkey_cmp(insert->k.p, bkey_start_pos(&uk))) > 0); -#else - if (k && - (uk = bkey_unpack_key(b, k), - bkey_cmp(insert->k.p, bkey_start_pos(&uk))) > 0) { - char buf1[100]; - char buf2[100]; - - bch2_bkey_to_text(&PBUF(buf1), &insert->k); - bch2_bkey_to_text(&PBUF(buf2), &uk); - - bch2_dump_btree_node(b); - panic("insert > next :\n" - "insert %s\n" - "next %s\n", - buf1, buf2); - } -#endif - -#endif -} - -static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter, - struct bkey_i *insert) -{ - struct btree_iter_level *l = &iter->l[0]; - struct bkey_packed *k = - bch2_btree_node_iter_bset_pos(&l->iter, l->b, bset_tree_last(l->b)); - - BUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(c, l->b)); - - EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size); - verify_extent_nonoverlapping(c, l->b, &l->iter, insert); - - if (debug_check_bkeys(c)) - bch2_bkey_debugcheck(c, l->b, bkey_i_to_s_c(insert)); - - bch2_bset_insert(l->b, &l->iter, k, insert, 0); - bch2_btree_node_iter_fix(iter, l->b, &l->iter, k, 0, k->u64s); -} - -static void -extent_squash(struct bch_fs *c, struct btree_iter *iter, - struct bkey_i *insert, - struct bkey_packed *_k, struct bkey_s k, - enum bch_extent_overlap overlap) -{ - struct btree_iter_level *l = &iter->l[0]; - int u64s_delta; - - switch (overlap) { - case BCH_EXTENT_OVERLAP_FRONT: - /* insert overlaps with start of k: */ - u64s_delta = bch2_cut_front_s(insert->k.p, k); - btree_keys_account_val_delta(l->b, _k, u64s_delta); - - EBUG_ON(bkey_deleted(k.k)); - extent_save(l->b, _k, k.k); - bch2_btree_iter_fix_key_modified(iter, l->b, _k); - break; - - case BCH_EXTENT_OVERLAP_BACK: - /* insert overlaps with end of k: */ - u64s_delta = bch2_cut_back_s(bkey_start_pos(&insert->k), k); - btree_keys_account_val_delta(l->b, _k, u64s_delta); - - EBUG_ON(bkey_deleted(k.k)); - extent_save(l->b, _k, k.k); - - /* - * As the auxiliary tree is indexed by the end of the - * key and we've just changed the end, update the - * auxiliary tree. - */ - bch2_bset_fix_invalidated_key(l->b, _k); - bch2_btree_node_iter_fix(iter, l->b, &l->iter, - _k, _k->u64s, _k->u64s); - break; - - case BCH_EXTENT_OVERLAP_ALL: { - /* The insert key completely covers k, invalidate k */ - if (!bkey_whiteout(k.k)) - btree_account_key_drop(l->b, _k); - - k.k->size = 0; - k.k->type = KEY_TYPE_deleted; - - if (_k >= btree_bset_last(l->b)->start) { - unsigned u64s = _k->u64s; - - bch2_bset_delete(l->b, _k, _k->u64s); - bch2_btree_node_iter_fix(iter, l->b, &l->iter, - _k, u64s, 0); - } else { - extent_save(l->b, _k, k.k); - bch2_btree_iter_fix_key_modified(iter, l->b, _k); - } - - break; - } - case BCH_EXTENT_OVERLAP_MIDDLE: { - struct bkey_on_stack split; - - bkey_on_stack_init(&split); - bkey_on_stack_realloc(&split, c, k.k->u64s); - - /* - * The insert key falls 'in the middle' of k - * The insert key splits k in 3: - * - start only in k, preserve - * - middle common section, invalidate in k - * - end only in k, preserve - * - * We update the old key to preserve the start, - * insert will be the new common section, - * we manually insert the end that we are preserving. - * - * modify k _before_ doing the insert (which will move - * what k points to) - */ - bkey_reassemble(split.k, k.s_c); - split.k->k.needs_whiteout |= bkey_written(l->b, _k); - - bch2_cut_back(bkey_start_pos(&insert->k), split.k); - BUG_ON(bkey_deleted(&split.k->k)); - - u64s_delta = bch2_cut_front_s(insert->k.p, k); - btree_keys_account_val_delta(l->b, _k, u64s_delta); - - BUG_ON(bkey_deleted(k.k)); - extent_save(l->b, _k, k.k); - bch2_btree_iter_fix_key_modified(iter, l->b, _k); - - extent_bset_insert(c, iter, split.k); - bkey_on_stack_exit(&split, c); - break; - } - } -} - -/** - * bch_extent_insert_fixup - insert a new extent and deal with overlaps - * - * this may result in not actually doing the insert, or inserting some subset - * of the insert key. For cmpxchg operations this is where that logic lives. - * - * All subsets of @insert that need to be inserted are inserted using - * bch2_btree_insert_and_journal(). If @b or @res fills up, this function - * returns false, setting @iter->pos for the prefix of @insert that actually got - * inserted. - * - * BSET INVARIANTS: this function is responsible for maintaining all the - * invariants for bsets of extents in memory. things get really hairy with 0 - * size extents - * - * within one bset: - * - * bkey_start_pos(bkey_next(k)) >= k - * or bkey_start_offset(bkey_next(k)) >= k->offset - * - * i.e. strict ordering, no overlapping extents. - * - * multiple bsets (i.e. full btree node): - * - * ∀ k, j - * k.size != 0 ∧ j.size != 0 → - * ¬ (k > bkey_start_pos(j) ∧ k < j) - * - * i.e. no two overlapping keys _of nonzero size_ - * - * We can't realistically maintain this invariant for zero size keys because of - * the key merging done in bch2_btree_insert_key() - for two mergeable keys k, j - * there may be another 0 size key between them in another bset, and it will - * thus overlap with the merged key. - * - * In addition, the end of iter->pos indicates how much has been processed. - * If the end of iter->pos is not the same as the end of insert, then - * key insertion needs to continue/be retried. - */ -void bch2_insert_fixup_extent(struct btree_trans *trans, - struct btree_insert_entry *insert_entry) -{ - struct bch_fs *c = trans->c; - struct btree_iter *iter = insert_entry->iter; - struct bkey_i *insert = insert_entry->k; - struct btree_iter_level *l = &iter->l[0]; - struct btree_node_iter node_iter = l->iter; - bool deleting = bkey_whiteout(&insert->k); - bool update_journal = !deleting; - bool update_btree = !deleting; - struct bkey_i whiteout = *insert; - struct bkey_packed *_k; - struct bkey unpacked; - - EBUG_ON(iter->level); - EBUG_ON(!insert->k.size); - EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k))); - - while ((_k = bch2_btree_node_iter_peek_filter(&l->iter, l->b, - KEY_TYPE_discard))) { - struct bkey_s k = __bkey_disassemble(l->b, _k, &unpacked); - struct bpos cur_end = bpos_min(insert->k.p, k.k->p); - enum bch_extent_overlap overlap = - bch2_extent_overlap(&insert->k, k.k); - - if (bkey_cmp(bkey_start_pos(k.k), insert->k.p) >= 0) - break; - - if (!bkey_whiteout(k.k)) - update_journal = true; - - if (!update_journal) { - bch2_cut_front(cur_end, insert); - bch2_cut_front(cur_end, &whiteout); - bch2_btree_iter_set_pos_same_leaf(iter, cur_end); - goto next; - } - - /* - * When deleting, if possible just do it by switching the type - * of the key we're deleting, instead of creating and inserting - * a new whiteout: - */ - if (deleting && - !update_btree && - !bkey_cmp(insert->k.p, k.k->p) && - !bkey_cmp(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) { - if (!bkey_whiteout(k.k)) { - btree_account_key_drop(l->b, _k); - _k->type = KEY_TYPE_discard; - reserve_whiteout(l->b, _k); - bch2_btree_iter_fix_key_modified(iter, - l->b, _k); - } - break; - } - - if (k.k->needs_whiteout || bkey_written(l->b, _k)) { - insert->k.needs_whiteout = true; - update_btree = true; - } - - if (update_btree && - overlap == BCH_EXTENT_OVERLAP_ALL && - bkey_whiteout(k.k) && - k.k->needs_whiteout) { - unreserve_whiteout(l->b, _k); - _k->needs_whiteout = false; - } - - extent_squash(c, iter, insert, _k, k, overlap); - - if (!update_btree) - bch2_cut_front(cur_end, insert); -next: - node_iter = l->iter; - - if (overlap == BCH_EXTENT_OVERLAP_FRONT || - overlap == BCH_EXTENT_OVERLAP_MIDDLE) - break; - } - - l->iter = node_iter; - bch2_btree_iter_set_pos_same_leaf(iter, insert->k.p); - - if (update_btree) { - if (deleting) - insert->k.type = KEY_TYPE_discard; - - EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size); - - extent_bset_insert(c, iter, insert); - } - - if (update_journal) { - struct bkey_i *k = !deleting ? insert : &whiteout; - - if (deleting) - k->k.type = KEY_TYPE_discard; - - EBUG_ON(bkey_deleted(&k->k) || !k->k.size); - - bch2_btree_journal_key(trans, iter, k); - } - - bch2_cut_front(insert->k.p, insert); -} - const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k) { return bch2_bkey_ptrs_invalid(c, k); diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 6e893c37c287..e360e1989812 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -433,17 +433,6 @@ enum merge_result bch2_reservation_merge(struct bch_fs *, .key_merge = bch2_reservation_merge, \ } -int bch2_extent_atomic_end(struct btree_iter *, struct bkey_i *, - struct bpos *); -int bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *); -int bch2_extent_is_atomic(struct bkey_i *, struct btree_iter *); - -enum btree_insert_ret -bch2_extent_can_insert(struct btree_trans *, struct btree_insert_entry *, - unsigned *); -void bch2_insert_fixup_extent(struct btree_trans *, - struct btree_insert_entry *); - void bch2_bkey_mark_replicas_cached(struct bch_fs *, struct bkey_s, unsigned, unsigned); diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 8b8442f9a81c..fab952856e36 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -9,6 +9,7 @@ #include "clock.h" #include "error.h" #include "extents.h" +#include "extent_update.h" #include "fs.h" #include "fs-io.h" #include "fsck.h" diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 0f1be5c5543d..ef953499e66c 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -19,7 +19,7 @@ #include "disk_groups.h" #include "ec.h" #include "error.h" -#include "extents.h" +#include "extent_update.h" #include "inode.h" #include "io.h" #include "journal.h" |