diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-18 20:25:50 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-18 20:25:50 +0200 |
commit | aae703b02f92bde9264366c545e87cec451de471 (patch) | |
tree | 09d5276405b9f1ece0602d2007da516a860bac51 /fs | |
parent | Merge tag 'erofs-for-6.1-rc2-fixes' of git://git.kernel.org/pub/scm/linux/ker... (diff) | |
parent | MAINTAINERS: update btrfs website links and files (diff) | |
download | linux-aae703b02f92bde9264366c545e87cec451de471.tar.xz linux-aae703b02f92bde9264366c545e87cec451de471.zip |
Merge tag 'for-6.1-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba:
- fiemap fixes:
- add missing path cache update
- fix processing of delayed data and tree refs during backref
walking, this could lead to reporting incorrect extent sharing
- fix extent range locking under heavy contention to avoid deadlocks
- make it possible to test send v3 in debugging mode
- update links in MAINTAINERS
* tag 'for-6.1-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
MAINTAINERS: update btrfs website links and files
btrfs: ignore fiemap path cache if we have multiple leaves for a data extent
btrfs: fix processing of delayed tree block refs during backref walking
btrfs: fix processing of delayed data refs during backref walking
btrfs: delete stale comments after merge conflict resolution
btrfs: unlock locked extent area if we have contention
btrfs: send: update command for protocol version check
btrfs: send: allow protocol version 3 with CONFIG_BTRFS_DEBUG
btrfs: add missing path cache update during fiemap
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/backref.c | 84 | ||||
-rw-r--r-- | fs/btrfs/backref.h | 1 | ||||
-rw-r--r-- | fs/btrfs/block-group.c | 2 | ||||
-rw-r--r-- | fs/btrfs/extent-io-tree.c | 15 | ||||
-rw-r--r-- | fs/btrfs/send.c | 5 | ||||
-rw-r--r-- | fs/btrfs/send.h | 5 |
6 files changed, 87 insertions, 25 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index dce3a16996b9..4ec18ceb2f21 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -138,6 +138,7 @@ struct share_check { u64 root_objectid; u64 inum; int share_count; + bool have_delayed_delete_refs; }; static inline int extent_is_shared(struct share_check *sc) @@ -820,16 +821,11 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, struct preftrees *preftrees, struct share_check *sc) { struct btrfs_delayed_ref_node *node; - struct btrfs_delayed_extent_op *extent_op = head->extent_op; struct btrfs_key key; - struct btrfs_key tmp_op_key; struct rb_node *n; int count; int ret = 0; - if (extent_op && extent_op->update_key) - btrfs_disk_key_to_cpu(&tmp_op_key, &extent_op->key); - spin_lock(&head->lock); for (n = rb_first_cached(&head->ref_tree); n; n = rb_next(n)) { node = rb_entry(n, struct btrfs_delayed_ref_node, @@ -855,10 +851,16 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, case BTRFS_TREE_BLOCK_REF_KEY: { /* NORMAL INDIRECT METADATA backref */ struct btrfs_delayed_tree_ref *ref; + struct btrfs_key *key_ptr = NULL; + + if (head->extent_op && head->extent_op->update_key) { + btrfs_disk_key_to_cpu(&key, &head->extent_op->key); + key_ptr = &key; + } ref = btrfs_delayed_node_to_tree_ref(node); ret = add_indirect_ref(fs_info, preftrees, ref->root, - &tmp_op_key, ref->level + 1, + key_ptr, ref->level + 1, node->bytenr, count, sc, GFP_ATOMIC); break; @@ -884,13 +886,22 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, key.offset = ref->offset; /* - * Found a inum that doesn't match our known inum, we - * know it's shared. + * If we have a share check context and a reference for + * another inode, we can't exit immediately. This is + * because even if this is a BTRFS_ADD_DELAYED_REF + * reference we may find next a BTRFS_DROP_DELAYED_REF + * which cancels out this ADD reference. + * + * If this is a DROP reference and there was no previous + * ADD reference, then we need to signal that when we + * process references from the extent tree (through + * add_inline_refs() and add_keyed_refs()), we should + * not exit early if we find a reference for another + * inode, because one of the delayed DROP references + * may cancel that reference in the extent tree. */ - if (sc && sc->inum && ref->objectid != sc->inum) { - ret = BACKREF_FOUND_SHARED; - goto out; - } + if (sc && count < 0) + sc->have_delayed_delete_refs = true; ret = add_indirect_ref(fs_info, preftrees, ref->root, &key, 0, node->bytenr, count, sc, @@ -920,7 +931,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, } if (!ret) ret = extent_is_shared(sc); -out: + spin_unlock(&head->lock); return ret; } @@ -1023,7 +1034,8 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info, key.type = BTRFS_EXTENT_DATA_KEY; key.offset = btrfs_extent_data_ref_offset(leaf, dref); - if (sc && sc->inum && key.objectid != sc->inum) { + if (sc && sc->inum && key.objectid != sc->inum && + !sc->have_delayed_delete_refs) { ret = BACKREF_FOUND_SHARED; break; } @@ -1033,6 +1045,7 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info, ret = add_indirect_ref(fs_info, preftrees, root, &key, 0, bytenr, count, sc, GFP_NOFS); + break; } default: @@ -1122,7 +1135,8 @@ static int add_keyed_refs(struct btrfs_root *extent_root, key.type = BTRFS_EXTENT_DATA_KEY; key.offset = btrfs_extent_data_ref_offset(leaf, dref); - if (sc && sc->inum && key.objectid != sc->inum) { + if (sc && sc->inum && key.objectid != sc->inum && + !sc->have_delayed_delete_refs) { ret = BACKREF_FOUND_SHARED; break; } @@ -1522,6 +1536,9 @@ static bool lookup_backref_shared_cache(struct btrfs_backref_shared_cache *cache { struct btrfs_backref_shared_cache_entry *entry; + if (!cache->use_cache) + return false; + if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL)) return false; @@ -1557,6 +1574,19 @@ static bool lookup_backref_shared_cache(struct btrfs_backref_shared_cache *cache return false; *is_shared = entry->is_shared; + /* + * If the node at this level is shared, than all nodes below are also + * shared. Currently some of the nodes below may be marked as not shared + * because we have just switched from one leaf to another, and switched + * also other nodes above the leaf and below the current level, so mark + * them as shared. + */ + if (*is_shared) { + for (int i = 0; i < level; i++) { + cache->entries[i].is_shared = true; + cache->entries[i].gen = entry->gen; + } + } return true; } @@ -1573,6 +1603,9 @@ static void store_backref_shared_cache(struct btrfs_backref_shared_cache *cache, struct btrfs_backref_shared_cache_entry *entry; u64 gen; + if (!cache->use_cache) + return; + if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL)) return; @@ -1648,6 +1681,7 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr, .root_objectid = root->root_key.objectid, .inum = inum, .share_count = 0, + .have_delayed_delete_refs = false, }; int level; @@ -1669,6 +1703,7 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr, /* -1 means we are in the bytenr of the data extent. */ level = -1; ULIST_ITER_INIT(&uiter); + cache->use_cache = true; while (1) { bool is_shared; bool cached; @@ -1698,6 +1733,24 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr, extent_gen > btrfs_root_last_snapshot(&root->root_item)) break; + /* + * If our data extent was not directly shared (without multiple + * reference items), than it might have a single reference item + * with a count > 1 for the same offset, which means there are 2 + * (or more) file extent items that point to the data extent - + * this happens when a file extent item needs to be split and + * then one item gets moved to another leaf due to a b+tree leaf + * split when inserting some item. In this case the file extent + * items may be located in different leaves and therefore some + * of the leaves may be referenced through shared subtrees while + * others are not. Since our extent buffer cache only works for + * a single path (by far the most common case and simpler to + * deal with), we can not use it if we have multiple leaves + * (which implies multiple paths). + */ + if (level == -1 && tmp->nnodes > 1) + cache->use_cache = false; + if (level >= 0) store_backref_shared_cache(cache, root, bytenr, level, false); @@ -1713,6 +1766,7 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr, break; } shared.share_count = 0; + shared.have_delayed_delete_refs = false; cond_resched(); } diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 52ae6957b414..8e69584d538d 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -29,6 +29,7 @@ struct btrfs_backref_shared_cache { * a given data extent should never exceed the maximum b+tree height. */ struct btrfs_backref_shared_cache_entry entries[BTRFS_MAX_LEVEL]; + bool use_cache; }; typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root, diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 32c415cfbdfe..deebc8ddbd93 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -774,10 +774,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait) btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work); out: - /* REVIEW */ if (wait && caching_ctl) ret = btrfs_caching_ctl_wait_done(cache, caching_ctl); - /* wait_event(caching_ctl->wait, space_cache_v1_done(cache)); */ if (caching_ctl) btrfs_put_caching_control(caching_ctl); diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c index 618275af19c4..83cb0378096f 100644 --- a/fs/btrfs/extent-io-tree.c +++ b/fs/btrfs/extent-io-tree.c @@ -1641,16 +1641,17 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, int err; u64 failed_start; - while (1) { + err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start, + cached_state, NULL, GFP_NOFS); + while (err == -EEXIST) { + if (failed_start != start) + clear_extent_bit(tree, start, failed_start - 1, + EXTENT_LOCKED, cached_state); + + wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start, cached_state, NULL, GFP_NOFS); - if (err == -EEXIST) { - wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); - start = failed_start; - } else - break; - WARN_ON(start > end); } return err; } diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 4ef4167072b8..ec6e1752af2c 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -348,6 +348,7 @@ static bool proto_cmd_ok(const struct send_ctx *sctx, int cmd) switch (sctx->proto) { case 1: return cmd <= BTRFS_SEND_C_MAX_V1; case 2: return cmd <= BTRFS_SEND_C_MAX_V2; + case 3: return cmd <= BTRFS_SEND_C_MAX_V3; default: return false; } } @@ -6469,7 +6470,9 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) if (ret < 0) goto out; } - if (sctx->cur_inode_needs_verity) { + + if (proto_cmd_ok(sctx, BTRFS_SEND_C_ENABLE_VERITY) + && sctx->cur_inode_needs_verity) { ret = process_verity(sctx); if (ret < 0) goto out; diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h index 0a4537775e0c..f7585cfa7e52 100644 --- a/fs/btrfs/send.h +++ b/fs/btrfs/send.h @@ -10,7 +10,12 @@ #include <linux/types.h> #define BTRFS_SEND_STREAM_MAGIC "btrfs-stream" +/* Conditional support for the upcoming protocol version. */ +#ifdef CONFIG_BTRFS_DEBUG +#define BTRFS_SEND_STREAM_VERSION 3 +#else #define BTRFS_SEND_STREAM_VERSION 2 +#endif /* * In send stream v1, no command is larger than 64K. In send stream v2, no limit |