diff options
author | Qu Wenruo <wqu@suse.com> | 2018-03-29 03:08:11 +0200 |
---|---|---|
committer | David Sterba <dsterba@suse.com> | 2018-03-31 02:01:06 +0200 |
commit | 581c1760415c48cca9349b198bba52dd38750765 (patch) | |
tree | 90e5aa035bfcab9a05fdec79eaa4ce90dc7aa267 /fs/btrfs/disk-io.c | |
parent | btrfs: tests/qgroup: Fix wrong tree backref level (diff) | |
download | linux-581c1760415c48cca9349b198bba52dd38750765.tar.xz linux-581c1760415c48cca9349b198bba52dd38750765.zip |
btrfs: Validate child tree block's level and first key
We have several reports about node pointer points to incorrect child
tree blocks, which could have even wrong owner and level but still with
valid generation and checksum.
Although btrfs check could handle it and print error message like:
leaf parent key incorrect 60670574592
Kernel doesn't have enough check on this type of corruption correctly.
At least add such check to read_tree_block() and btrfs_read_buffer(),
where we need two new parameters @level and @first_key to verify the
child tree block.
The new @level check is mandatory and all call sites are already
modified to extract expected level from its call chain.
While @first_key is optional, the following call sites are skipping such
check:
1) Root node/leaf
As ROOT_ITEM doesn't contain the first key, skip @first_key check.
2) Direct backref
Only parent bytenr and level is known and we need to resolve the key
all by ourselves, skip @first_key check.
Another note of this verification is, it needs extra info from nodeptr
or ROOT_ITEM, so it can't fit into current tree-checker framework, which
is limited to node/leaf boundary.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 95 |
1 files changed, 82 insertions, 13 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 269374261e36..a2f3a0c67a99 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -427,13 +427,59 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, return ret; } +static int verify_level_key(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int level, + struct btrfs_key *first_key) +{ + int found_level; + struct btrfs_key found_key; + int ret; + + found_level = btrfs_header_level(eb); + if (found_level != level) { +#ifdef CONFIG_BTRFS_DEBUG + WARN_ON(1); + btrfs_err(fs_info, +"tree level mismatch detected, bytenr=%llu level expected=%u has=%u", + eb->start, level, found_level); +#endif + return -EIO; + } + + if (!first_key) + return 0; + + if (found_level) + btrfs_node_key_to_cpu(eb, &found_key, 0); + else + btrfs_item_key_to_cpu(eb, &found_key, 0); + ret = btrfs_comp_cpu_keys(first_key, &found_key); + +#ifdef CONFIG_BTRFS_DEBUG + if (ret) { + WARN_ON(1); + btrfs_err(fs_info, +"tree first key mismatch detected, bytenr=%llu key expected=(%llu, %u, %llu) has=(%llu, %u, %llu)", + eb->start, first_key->objectid, first_key->type, + first_key->offset, found_key.objectid, + found_key.type, found_key.offset); + } +#endif + return ret; +} + /* * helper to read a given tree block, doing retries as required when * the checksums don't match and we have alternate mirrors to try. + * + * @parent_transid: expected transid, skip check if 0 + * @level: expected level, mandatory check + * @first_key: expected key of first slot, skip check if NULL */ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, - u64 parent_transid) + u64 parent_transid, int level, + struct btrfs_key *first_key) { struct extent_io_tree *io_tree; int failed = 0; @@ -448,11 +494,14 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info, ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE, mirror_num); if (!ret) { - if (!verify_parent_transid(io_tree, eb, + if (verify_parent_transid(io_tree, eb, parent_transid, 0)) - break; - else ret = -EIO; + else if (verify_level_key(fs_info, eb, level, + first_key)) + ret = -EUCLEAN; + else + break; } /* @@ -460,7 +509,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info, * there is no reason to read the other copies, they won't be * any less wrong. */ - if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) + if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags) || + ret == -EUCLEAN) break; num_copies = btrfs_num_copies(fs_info, @@ -1049,8 +1099,17 @@ void btrfs_wait_tree_block_writeback(struct extent_buffer *buf) buf->start, buf->start + buf->len - 1); } +/* + * Read tree block at logical address @bytenr and do variant basic but critical + * verification. + * + * @parent_transid: expected transid of this tree block, skip check if 0 + * @level: expected level, mandatory check + * @first_key: expected key in slot 0, skip check if NULL + */ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, - u64 parent_transid) + u64 parent_transid, int level, + struct btrfs_key *first_key) { struct extent_buffer *buf = NULL; int ret; @@ -1059,7 +1118,8 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, if (IS_ERR(buf)) return buf; - ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid); + ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid, + level, first_key); if (ret) { free_extent_buffer(buf); return ERR_PTR(ret); @@ -1388,6 +1448,7 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, struct btrfs_path *path; u64 generation; int ret; + int level; path = btrfs_alloc_path(); if (!path) @@ -1410,9 +1471,10 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, } generation = btrfs_root_generation(&root->root_item); + level = btrfs_root_level(&root->root_item); root->node = read_tree_block(fs_info, btrfs_root_bytenr(&root->root_item), - generation); + generation, level, NULL); if (IS_ERR(root->node)) { ret = PTR_ERR(root->node); goto find_fail; @@ -2261,6 +2323,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, struct btrfs_root *log_tree_root; struct btrfs_super_block *disk_super = fs_info->super_copy; u64 bytenr = btrfs_super_log_root(disk_super); + int level = btrfs_super_log_root_level(disk_super); if (fs_devices->rw_devices == 0) { btrfs_warn(fs_info, "log replay required on RO media"); @@ -2274,7 +2337,8 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, __setup_root(log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); log_tree_root->node = read_tree_block(fs_info, bytenr, - fs_info->generation + 1); + fs_info->generation + 1, + level, NULL); if (IS_ERR(log_tree_root->node)) { btrfs_warn(fs_info, "failed to read log tree"); ret = PTR_ERR(log_tree_root->node); @@ -2390,6 +2454,7 @@ int open_ctree(struct super_block *sb, int num_backups_tried = 0; int backup_index = 0; int clear_free_space_tree = 0; + int level; tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL); chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL); @@ -2725,12 +2790,13 @@ int open_ctree(struct super_block *sb, } generation = btrfs_super_chunk_root_generation(disk_super); + level = btrfs_super_chunk_root_level(disk_super); __setup_root(chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID); chunk_root->node = read_tree_block(fs_info, btrfs_super_chunk_root(disk_super), - generation); + generation, level, NULL); if (IS_ERR(chunk_root->node) || !extent_buffer_uptodate(chunk_root->node)) { btrfs_err(fs_info, "failed to read chunk root"); @@ -2764,10 +2830,11 @@ int open_ctree(struct super_block *sb, retry_root_backup: generation = btrfs_super_generation(disk_super); + level = btrfs_super_root_level(disk_super); tree_root->node = read_tree_block(fs_info, btrfs_super_root(disk_super), - generation); + generation, level, NULL); if (IS_ERR(tree_root->node) || !extent_buffer_uptodate(tree_root->node)) { btrfs_warn(fs_info, "failed to read tree root"); @@ -3887,12 +3954,14 @@ void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info) __btrfs_btree_balance_dirty(fs_info, 0); } -int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) +int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level, + struct btrfs_key *first_key) { struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; struct btrfs_fs_info *fs_info = root->fs_info; - return btree_read_extent_buffer_pages(fs_info, buf, parent_transid); + return btree_read_extent_buffer_pages(fs_info, buf, parent_transid, + level, first_key); } static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info) |