summaryrefslogtreecommitdiffstats
path: root/fs/bcachefs
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2023-10-21 21:03:05 +0200
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-31 17:18:37 +0100
commit8480905765c3729025331720d23735ce085ef070 (patch)
treef66b78b6b97cd8b851a07ec5b15b6b08f0401f25 /fs/bcachefs
parentbcachefs: Ensure devices are always correctly initialized (diff)
downloadlinux-8480905765c3729025331720d23735ce085ef070.tar.xz
linux-8480905765c3729025331720d23735ce085ef070.zip
bcachefs: Improve io option handling in data move path
The data move path now correctly picks IO options when inodes in different snapshots have different options applied. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to '')
-rw-r--r--fs/bcachefs/move.c131
-rw-r--r--fs/bcachefs/move.h26
2 files changed, 107 insertions, 50 deletions
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 82f60c7883ba..38b076ff1906 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -20,6 +20,7 @@
#include "keylist.h"
#include "move.h"
#include "replicas.h"
+#include "snapshot.h"
#include "super-io.h"
#include "trace.h"
@@ -413,35 +414,87 @@ err:
return ret;
}
-static int lookup_inode(struct btree_trans *trans, struct bpos pos,
- struct bch_inode_unpacked *inode)
+struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans,
+ struct per_snapshot_io_opts *io_opts,
+ struct bkey_s_c extent_k)
+{
+ struct bch_fs *c = trans->c;
+ u32 restart_count = trans->restart_count;
+ int ret = 0;
+
+ if (io_opts->cur_inum != extent_k.k->p.inode) {
+ struct btree_iter iter;
+ struct bkey_s_c k;
+
+ io_opts->d.nr = 0;
+
+ for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode),
+ BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
+ if (k.k->p.offset != extent_k.k->p.inode)
+ break;
+
+ if (!bkey_is_inode(k.k))
+ continue;
+
+ struct bch_inode_unpacked inode;
+ BUG_ON(bch2_inode_unpack(k, &inode));
+
+ struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot };
+ bch2_inode_opts_get(&e.io_opts, trans->c, &inode);
+
+ ret = darray_push(&io_opts->d, e);
+ if (ret)
+ break;
+ }
+ bch2_trans_iter_exit(trans, &iter);
+ io_opts->cur_inum = extent_k.k->p.inode;
+ }
+
+ ret = ret ?: trans_was_restarted(trans, restart_count);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (extent_k.k->p.snapshot) {
+ struct snapshot_io_opts_entry *i;
+ darray_for_each(io_opts->d, i)
+ if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot))
+ return &i->io_opts;
+ }
+
+ return &io_opts->fs_io_opts;
+}
+
+static int bch2_move_get_io_opts_one(struct btree_trans *trans,
+ struct bch_io_opts *io_opts,
+ struct bkey_s_c extent_k)
{
struct btree_iter iter;
struct bkey_s_c k;
int ret;
- bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, pos,
- BTREE_ITER_ALL_SNAPSHOTS);
- k = bch2_btree_iter_peek(&iter);
+ /* reflink btree? */
+ if (!extent_k.k->p.inode) {
+ *io_opts = bch2_opts_to_inode_opts(trans->c->opts);
+ return 0;
+ }
+
+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
+ SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot),
+ BTREE_ITER_CACHED);
ret = bkey_err(k);
- if (ret)
- goto err;
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ return ret;
- if (!k.k || !bkey_eq(k.k->p, pos)) {
- ret = -BCH_ERR_ENOENT_inode;
- goto err;
+ if (!ret && bkey_is_inode(k.k)) {
+ struct bch_inode_unpacked inode;
+ bch2_inode_unpack(k, &inode);
+ bch2_inode_opts_get(io_opts, trans->c, &inode);
+ } else {
+ *io_opts = bch2_opts_to_inode_opts(trans->c->opts);
}
- ret = bkey_is_inode(k.k) ? 0 : -EIO;
- if (ret)
- goto err;
-
- ret = bch2_inode_unpack(k, inode);
- if (ret)
- goto err;
-err:
bch2_trans_iter_exit(trans, &iter);
- return ret;
+ return 0;
}
static int move_ratelimit(struct btree_trans *trans,
@@ -492,30 +545,6 @@ static int move_ratelimit(struct btree_trans *trans,
return 0;
}
-static int move_get_io_opts(struct btree_trans *trans,
- struct bch_io_opts *io_opts,
- struct bkey_s_c k, u64 *cur_inum)
-{
- struct bch_inode_unpacked inode;
- int ret;
-
- if (*cur_inum == k.k->p.inode)
- return 0;
-
- ret = lookup_inode(trans,
- SPOS(0, k.k->p.inode, k.k->p.snapshot),
- &inode);
- if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
- return ret;
-
- if (!ret)
- bch2_inode_opts_get(io_opts, trans->c, &inode);
- else
- *io_opts = bch2_opts_to_inode_opts(trans->c->opts);
- *cur_inum = k.k->p.inode;
- return 0;
-}
-
static int __bch2_move_data(struct moving_context *ctxt,
struct bpos start,
struct bpos end,
@@ -523,15 +552,16 @@ static int __bch2_move_data(struct moving_context *ctxt,
enum btree_id btree_id)
{
struct bch_fs *c = ctxt->c;
- struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
+ struct per_snapshot_io_opts snapshot_io_opts;
+ struct bch_io_opts *io_opts;
struct bkey_buf sk;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct data_update_opts data_opts;
- u64 cur_inum = U64_MAX;
int ret = 0, ret2;
+ per_snapshot_io_opts_init(&snapshot_io_opts, c);
bch2_bkey_buf_init(&sk);
if (ctxt->stats) {
@@ -569,12 +599,13 @@ static int __bch2_move_data(struct moving_context *ctxt,
if (!bkey_extent_is_direct_data(k.k))
goto next_nondata;
- ret = move_get_io_opts(trans, &io_opts, k, &cur_inum);
+ io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, k);
+ ret = PTR_ERR_OR_ZERO(io_opts);
if (ret)
continue;
memset(&data_opts, 0, sizeof(data_opts));
- if (!pred(c, arg, k, &io_opts, &data_opts))
+ if (!pred(c, arg, k, io_opts, &data_opts))
goto next;
/*
@@ -585,7 +616,7 @@ static int __bch2_move_data(struct moving_context *ctxt,
k = bkey_i_to_s_c(sk.k);
ret2 = bch2_move_extent(trans, &iter, ctxt, NULL,
- io_opts, btree_id, k, data_opts);
+ *io_opts, btree_id, k, data_opts);
if (ret2) {
if (bch2_err_matches(ret2, BCH_ERR_transaction_restart))
continue;
@@ -612,6 +643,7 @@ next_nondata:
bch2_trans_iter_exit(trans, &iter);
bch2_trans_put(trans);
bch2_bkey_buf_exit(&sk, c);
+ per_snapshot_io_opts_exit(&snapshot_io_opts);
return ret;
}
@@ -673,7 +705,6 @@ int __bch2_evacuate_bucket(struct btree_trans *trans,
struct data_update_opts data_opts;
unsigned dirty_sectors, bucket_size;
u64 fragmentation;
- u64 cur_inum = U64_MAX;
struct bpos bp_pos = POS_MIN;
int ret = 0;
@@ -737,7 +768,7 @@ int __bch2_evacuate_bucket(struct btree_trans *trans,
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
- ret = move_get_io_opts(trans, &io_opts, k, &cur_inum);
+ ret = bch2_move_get_io_opts_one(trans, &io_opts, k);
if (ret) {
bch2_trans_iter_exit(trans, &iter);
continue;
diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h
index cbdd58db8782..aa4b65c4f960 100644
--- a/fs/bcachefs/move.h
+++ b/fs/bcachefs/move.h
@@ -62,6 +62,32 @@ struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *);
void bch2_moving_ctxt_do_pending_writes(struct moving_context *,
struct btree_trans *);
+/* Inodes in different snapshots may have different IO options: */
+struct snapshot_io_opts_entry {
+ u32 snapshot;
+ struct bch_io_opts io_opts;
+};
+
+struct per_snapshot_io_opts {
+ u64 cur_inum;
+ struct bch_io_opts fs_io_opts;
+ DARRAY(struct snapshot_io_opts_entry) d;
+};
+
+static inline void per_snapshot_io_opts_init(struct per_snapshot_io_opts *io_opts, struct bch_fs *c)
+{
+ memset(io_opts, 0, sizeof(*io_opts));
+ io_opts->fs_io_opts = bch2_opts_to_inode_opts(c->opts);
+}
+
+static inline void per_snapshot_io_opts_exit(struct per_snapshot_io_opts *io_opts)
+{
+ darray_exit(&io_opts->d);
+}
+
+struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *,
+ struct per_snapshot_io_opts *, struct bkey_s_c);
+
int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *);
int bch2_move_data(struct bch_fs *,