diff options
author | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-21 21:03:05 +0200 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-31 17:18:37 +0100 |
commit | 8480905765c3729025331720d23735ce085ef070 (patch) | |
tree | f66b78b6b97cd8b851a07ec5b15b6b08f0401f25 /fs/bcachefs | |
parent | bcachefs: Ensure devices are always correctly initialized (diff) | |
download | linux-8480905765c3729025331720d23735ce085ef070.tar.xz linux-8480905765c3729025331720d23735ce085ef070.zip |
bcachefs: Improve io option handling in data move path
The data move path now correctly picks IO options when inodes in
different snapshots have different options applied.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to '')
-rw-r--r-- | fs/bcachefs/move.c | 131 | ||||
-rw-r--r-- | fs/bcachefs/move.h | 26 |
2 files changed, 107 insertions, 50 deletions
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 82f60c7883ba..38b076ff1906 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -20,6 +20,7 @@ #include "keylist.h" #include "move.h" #include "replicas.h" +#include "snapshot.h" #include "super-io.h" #include "trace.h" @@ -413,35 +414,87 @@ err: return ret; } -static int lookup_inode(struct btree_trans *trans, struct bpos pos, - struct bch_inode_unpacked *inode) +struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, + struct per_snapshot_io_opts *io_opts, + struct bkey_s_c extent_k) +{ + struct bch_fs *c = trans->c; + u32 restart_count = trans->restart_count; + int ret = 0; + + if (io_opts->cur_inum != extent_k.k->p.inode) { + struct btree_iter iter; + struct bkey_s_c k; + + io_opts->d.nr = 0; + + for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode), + BTREE_ITER_ALL_SNAPSHOTS, k, ret) { + if (k.k->p.offset != extent_k.k->p.inode) + break; + + if (!bkey_is_inode(k.k)) + continue; + + struct bch_inode_unpacked inode; + BUG_ON(bch2_inode_unpack(k, &inode)); + + struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot }; + bch2_inode_opts_get(&e.io_opts, trans->c, &inode); + + ret = darray_push(&io_opts->d, e); + if (ret) + break; + } + bch2_trans_iter_exit(trans, &iter); + io_opts->cur_inum = extent_k.k->p.inode; + } + + ret = ret ?: trans_was_restarted(trans, restart_count); + if (ret) + return ERR_PTR(ret); + + if (extent_k.k->p.snapshot) { + struct snapshot_io_opts_entry *i; + darray_for_each(io_opts->d, i) + if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot)) + return &i->io_opts; + } + + return &io_opts->fs_io_opts; +} + +static int bch2_move_get_io_opts_one(struct btree_trans *trans, + struct bch_io_opts *io_opts, + struct bkey_s_c extent_k) { struct btree_iter iter; struct bkey_s_c k; int ret; - bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, pos, - BTREE_ITER_ALL_SNAPSHOTS); - k = bch2_btree_iter_peek(&iter); + /* reflink btree? */ + if (!extent_k.k->p.inode) { + *io_opts = bch2_opts_to_inode_opts(trans->c->opts); + return 0; + } + + k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, + SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot), + BTREE_ITER_CACHED); ret = bkey_err(k); - if (ret) - goto err; + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + return ret; - if (!k.k || !bkey_eq(k.k->p, pos)) { - ret = -BCH_ERR_ENOENT_inode; - goto err; + if (!ret && bkey_is_inode(k.k)) { + struct bch_inode_unpacked inode; + bch2_inode_unpack(k, &inode); + bch2_inode_opts_get(io_opts, trans->c, &inode); + } else { + *io_opts = bch2_opts_to_inode_opts(trans->c->opts); } - ret = bkey_is_inode(k.k) ? 0 : -EIO; - if (ret) - goto err; - - ret = bch2_inode_unpack(k, inode); - if (ret) - goto err; -err: bch2_trans_iter_exit(trans, &iter); - return ret; + return 0; } static int move_ratelimit(struct btree_trans *trans, @@ -492,30 +545,6 @@ static int move_ratelimit(struct btree_trans *trans, return 0; } -static int move_get_io_opts(struct btree_trans *trans, - struct bch_io_opts *io_opts, - struct bkey_s_c k, u64 *cur_inum) -{ - struct bch_inode_unpacked inode; - int ret; - - if (*cur_inum == k.k->p.inode) - return 0; - - ret = lookup_inode(trans, - SPOS(0, k.k->p.inode, k.k->p.snapshot), - &inode); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - return ret; - - if (!ret) - bch2_inode_opts_get(io_opts, trans->c, &inode); - else - *io_opts = bch2_opts_to_inode_opts(trans->c->opts); - *cur_inum = k.k->p.inode; - return 0; -} - static int __bch2_move_data(struct moving_context *ctxt, struct bpos start, struct bpos end, @@ -523,15 +552,16 @@ static int __bch2_move_data(struct moving_context *ctxt, enum btree_id btree_id) { struct bch_fs *c = ctxt->c; - struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); + struct per_snapshot_io_opts snapshot_io_opts; + struct bch_io_opts *io_opts; struct bkey_buf sk; struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; struct data_update_opts data_opts; - u64 cur_inum = U64_MAX; int ret = 0, ret2; + per_snapshot_io_opts_init(&snapshot_io_opts, c); bch2_bkey_buf_init(&sk); if (ctxt->stats) { @@ -569,12 +599,13 @@ static int __bch2_move_data(struct moving_context *ctxt, if (!bkey_extent_is_direct_data(k.k)) goto next_nondata; - ret = move_get_io_opts(trans, &io_opts, k, &cur_inum); + io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, k); + ret = PTR_ERR_OR_ZERO(io_opts); if (ret) continue; memset(&data_opts, 0, sizeof(data_opts)); - if (!pred(c, arg, k, &io_opts, &data_opts)) + if (!pred(c, arg, k, io_opts, &data_opts)) goto next; /* @@ -585,7 +616,7 @@ static int __bch2_move_data(struct moving_context *ctxt, k = bkey_i_to_s_c(sk.k); ret2 = bch2_move_extent(trans, &iter, ctxt, NULL, - io_opts, btree_id, k, data_opts); + *io_opts, btree_id, k, data_opts); if (ret2) { if (bch2_err_matches(ret2, BCH_ERR_transaction_restart)) continue; @@ -612,6 +643,7 @@ next_nondata: bch2_trans_iter_exit(trans, &iter); bch2_trans_put(trans); bch2_bkey_buf_exit(&sk, c); + per_snapshot_io_opts_exit(&snapshot_io_opts); return ret; } @@ -673,7 +705,6 @@ int __bch2_evacuate_bucket(struct btree_trans *trans, struct data_update_opts data_opts; unsigned dirty_sectors, bucket_size; u64 fragmentation; - u64 cur_inum = U64_MAX; struct bpos bp_pos = POS_MIN; int ret = 0; @@ -737,7 +768,7 @@ int __bch2_evacuate_bucket(struct btree_trans *trans, bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); - ret = move_get_io_opts(trans, &io_opts, k, &cur_inum); + ret = bch2_move_get_io_opts_one(trans, &io_opts, k); if (ret) { bch2_trans_iter_exit(trans, &iter); continue; diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index cbdd58db8782..aa4b65c4f960 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -62,6 +62,32 @@ struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *); void bch2_moving_ctxt_do_pending_writes(struct moving_context *, struct btree_trans *); +/* Inodes in different snapshots may have different IO options: */ +struct snapshot_io_opts_entry { + u32 snapshot; + struct bch_io_opts io_opts; +}; + +struct per_snapshot_io_opts { + u64 cur_inum; + struct bch_io_opts fs_io_opts; + DARRAY(struct snapshot_io_opts_entry) d; +}; + +static inline void per_snapshot_io_opts_init(struct per_snapshot_io_opts *io_opts, struct bch_fs *c) +{ + memset(io_opts, 0, sizeof(*io_opts)); + io_opts->fs_io_opts = bch2_opts_to_inode_opts(c->opts); +} + +static inline void per_snapshot_io_opts_exit(struct per_snapshot_io_opts *io_opts) +{ + darray_exit(&io_opts->d); +} + +struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *, + struct per_snapshot_io_opts *, struct bkey_s_c); + int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *); int bch2_move_data(struct bch_fs *, |