diff options
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 229 |
1 files changed, 119 insertions, 110 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6fe9197f6ee4..deb74a8c191a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -260,15 +260,12 @@ void btrfs_csum_final(u32 crc, u8 *result) } /* - * compute the csum for a btree block, and either verify it or write it - * into the csum field of the block. + * Compute the csum of a btree block and store the result to provided buffer. + * + * Returns error if the extent buffer cannot be mapped. */ -static int csum_tree_block(struct btrfs_fs_info *fs_info, - struct extent_buffer *buf, - int verify) +static int csum_tree_block(struct extent_buffer *buf, u8 *result) { - u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); - char result[BTRFS_CSUM_SIZE]; unsigned long len; unsigned long cur_len; unsigned long offset = BTRFS_CSUM_SIZE; @@ -288,7 +285,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info, */ err = map_private_extent_buffer(buf, offset, 32, &kaddr, &map_start, &map_len); - if (err) + if (WARN_ON(err)) return err; cur_len = min(len, map_len - (offset - map_start)); crc = btrfs_csum_data(kaddr + offset - map_start, @@ -300,23 +297,6 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info, btrfs_csum_final(crc, result); - if (verify) { - if (memcmp_extent_buffer(buf, result, 0, csum_size)) { - u32 val; - u32 found = 0; - memcpy(&found, result, csum_size); - - read_extent_buffer(buf, &val, 0, csum_size); - btrfs_warn_rl(fs_info, - "%s checksum verify failed on %llu wanted %X found %X level %d", - fs_info->sb->s_id, buf->start, - val, found, btrfs_header_level(buf)); - return -EUCLEAN; - } - } else { - write_extent_buffer(buf, result, 0, csum_size); - } - return 0; } @@ -414,22 +394,21 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, return ret; } -static int verify_level_key(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int level, - struct btrfs_key *first_key, u64 parent_transid) +int btrfs_verify_level_key(struct extent_buffer *eb, int level, + struct btrfs_key *first_key, u64 parent_transid) { + struct btrfs_fs_info *fs_info = eb->fs_info; int found_level; struct btrfs_key found_key; int ret; found_level = btrfs_header_level(eb); if (found_level != level) { -#ifdef CONFIG_BTRFS_DEBUG - WARN_ON(1); + WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG), + KERN_ERR "BTRFS: tree level check failed\n"); btrfs_err(fs_info, "tree level mismatch detected, bytenr=%llu level expected=%u has=%u", eb->start, level, found_level); -#endif return -EIO; } @@ -450,9 +429,9 @@ static int verify_level_key(struct btrfs_fs_info *fs_info, btrfs_item_key_to_cpu(eb, &found_key, 0); ret = btrfs_comp_cpu_keys(first_key, &found_key); -#ifdef CONFIG_BTRFS_DEBUG if (ret) { - WARN_ON(1); + WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG), + KERN_ERR "BTRFS: tree first key check failed\n"); btrfs_err(fs_info, "tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)", eb->start, parent_transid, first_key->objectid, @@ -460,7 +439,6 @@ static int verify_level_key(struct btrfs_fs_info *fs_info, found_key.objectid, found_key.type, found_key.offset); } -#endif return ret; } @@ -472,11 +450,11 @@ static int verify_level_key(struct btrfs_fs_info *fs_info, * @level: expected level, mandatory check * @first_key: expected key of first slot, skip check if NULL */ -static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, +static int btree_read_extent_buffer_pages(struct extent_buffer *eb, u64 parent_transid, int level, struct btrfs_key *first_key) { + struct btrfs_fs_info *fs_info = eb->fs_info; struct extent_io_tree *io_tree; int failed = 0; int ret; @@ -487,14 +465,13 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info, io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree; while (1) { clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); - ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE, - mirror_num); + ret = read_extent_buffer_pages(eb, WAIT_COMPLETE, mirror_num); if (!ret) { if (verify_parent_transid(io_tree, eb, parent_transid, 0)) ret = -EIO; - else if (verify_level_key(fs_info, eb, level, - first_key, parent_transid)) + else if (btrfs_verify_level_key(eb, level, + first_key, parent_transid)) ret = -EUCLEAN; else break; @@ -519,7 +496,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info, } if (failed && !ret && failed_mirror) - repair_eb_io_failure(fs_info, eb, failed_mirror); + btrfs_repair_eb_io_failure(eb, failed_mirror); return ret; } @@ -533,7 +510,10 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page) { u64 start = page_offset(page); u64 found_start; + u8 result[BTRFS_CSUM_SIZE]; + u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); struct extent_buffer *eb; + int ret; eb = (struct extent_buffer *)page->private; if (page != eb->pages[0]) @@ -552,12 +532,28 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page) ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid, btrfs_header_fsid(), BTRFS_FSID_SIZE) == 0); - return csum_tree_block(fs_info, eb, 0); + if (csum_tree_block(eb, result)) + return -EINVAL; + + if (btrfs_header_level(eb)) + ret = btrfs_check_node(eb); + else + ret = btrfs_check_leaf_full(eb); + + if (ret < 0) { + btrfs_err(fs_info, + "block=%llu write time tree block corruption detected", + eb->start); + return ret; + } + write_extent_buffer(eb, result, 0, csum_size); + + return 0; } -static int check_tree_block_fsid(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb) +static int check_tree_block_fsid(struct extent_buffer *eb) { + struct btrfs_fs_info *fs_info = eb->fs_info; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; u8 fsid[BTRFS_FSID_SIZE]; int ret = 1; @@ -595,7 +591,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, struct extent_buffer *eb; struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; struct btrfs_fs_info *fs_info = root->fs_info; + u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); int ret = 0; + u8 result[BTRFS_CSUM_SIZE]; int reads_done; if (!page->private) @@ -625,7 +623,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, ret = -EIO; goto err; } - if (check_tree_block_fsid(fs_info, eb)) { + if (check_tree_block_fsid(eb)) { btrfs_err_rl(fs_info, "bad fsid on block %llu", eb->start); ret = -EIO; @@ -642,25 +640,44 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), eb, found_level); - ret = csum_tree_block(fs_info, eb, 1); + ret = csum_tree_block(eb, result); if (ret) goto err; + if (memcmp_extent_buffer(eb, result, 0, csum_size)) { + u32 val; + u32 found = 0; + + memcpy(&found, result, csum_size); + + read_extent_buffer(eb, &val, 0, csum_size); + btrfs_warn_rl(fs_info, + "%s checksum verify failed on %llu wanted %x found %x level %d", + fs_info->sb->s_id, eb->start, + val, found, btrfs_header_level(eb)); + ret = -EUCLEAN; + goto err; + } + /* * If this is a leaf block and it is corrupt, set the corrupt bit so * that we don't try and read the other copies of this block, just * return -EIO. */ - if (found_level == 0 && btrfs_check_leaf_full(fs_info, eb)) { + if (found_level == 0 && btrfs_check_leaf_full(eb)) { set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); ret = -EIO; } - if (found_level > 0 && btrfs_check_node(fs_info, eb)) + if (found_level > 0 && btrfs_check_node(eb)) ret = -EIO; if (!ret) set_extent_buffer_uptodate(eb); + else + btrfs_err(fs_info, + "block=%llu read time tree block corruption detected", + eb->start); err: if (reads_done && test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) @@ -832,11 +849,11 @@ static blk_status_t btree_csum_one_bio(struct bio *bio) { struct bio_vec *bvec; struct btrfs_root *root; - int i, ret = 0; + int ret = 0; struct bvec_iter_all iter_all; ASSERT(!bio_flagged(bio, BIO_CLONED)); - bio_for_each_segment_all(bvec, bio, i, iter_all) { + bio_for_each_segment_all(bvec, bio, iter_all) { root = BTRFS_I(bvec->bv_page->mapping->host)->root; ret = csum_dirty_buffer(root->fs_info, bvec->bv_page); if (ret) @@ -867,11 +884,10 @@ static int check_async_write(struct btrfs_inode *bi) return 1; } -static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio, - int mirror_num, unsigned long bio_flags, - u64 bio_offset) +static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio *bio, + int mirror_num, + unsigned long bio_flags) { - struct inode *inode = private_data; struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); int async = check_async_write(BTRFS_I(inode)); blk_status_t ret; @@ -897,8 +913,7 @@ static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio, * checksumming can happen in parallel across all CPUs */ ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, 0, - bio_offset, private_data, - btree_submit_bio_start); + 0, inode, btree_submit_bio_start); } if (ret) @@ -1017,22 +1032,23 @@ static const struct address_space_operations btree_aops = { void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr) { struct extent_buffer *buf = NULL; - struct inode *btree_inode = fs_info->btree_inode; + int ret; buf = btrfs_find_create_tree_block(fs_info, bytenr); if (IS_ERR(buf)) return; - read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, - buf, WAIT_NONE, 0); - free_extent_buffer(buf); + + ret = read_extent_buffer_pages(buf, WAIT_NONE, 0); + if (ret < 0) + free_extent_buffer_stale(buf); + else + free_extent_buffer(buf); } int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr, int mirror_num, struct extent_buffer **eb) { struct extent_buffer *buf = NULL; - struct inode *btree_inode = fs_info->btree_inode; - struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree; int ret; buf = btrfs_find_create_tree_block(fs_info, bytenr); @@ -1041,15 +1057,14 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr, set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags); - ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK, - mirror_num); + ret = read_extent_buffer_pages(buf, WAIT_PAGE_LOCK, mirror_num); if (ret) { - free_extent_buffer(buf); + free_extent_buffer_stale(buf); return ret; } if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) { - free_extent_buffer(buf); + free_extent_buffer_stale(buf); return -EIO; } else if (extent_buffer_uptodate(buf)) { *eb = buf; @@ -1068,19 +1083,6 @@ struct extent_buffer *btrfs_find_create_tree_block( return alloc_extent_buffer(fs_info, bytenr); } - -int btrfs_write_tree_block(struct extent_buffer *buf) -{ - return filemap_fdatawrite_range(buf->pages[0]->mapping, buf->start, - buf->start + buf->len - 1); -} - -void btrfs_wait_tree_block_writeback(struct extent_buffer *buf) -{ - filemap_fdatawait_range(buf->pages[0]->mapping, - buf->start, buf->start + buf->len - 1); -} - /* * Read tree block at logical address @bytenr and do variant basic but critical * verification. @@ -1100,19 +1102,19 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, if (IS_ERR(buf)) return buf; - ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid, + ret = btree_read_extent_buffer_pages(buf, parent_transid, level, first_key); if (ret) { - free_extent_buffer(buf); + free_extent_buffer_stale(buf); return ERR_PTR(ret); } return buf; } -void clean_tree_block(struct btrfs_fs_info *fs_info, - struct extent_buffer *buf) +void btrfs_clean_tree_block(struct extent_buffer *buf) { + struct btrfs_fs_info *fs_info = buf->fs_info; if (btrfs_header_generation(buf) == fs_info->running_transaction->transid) { btrfs_assert_tree_locked(buf); @@ -1208,7 +1210,8 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, root->log_transid_committed = -1; root->last_log_commit = 0; if (!dummy) - extent_io_tree_init(&root->dirty_log_pages, NULL); + extent_io_tree_init(fs_info, &root->dirty_log_pages, + IO_TREE_ROOT_DIRTY_LOG_PAGES, NULL); memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); @@ -1255,9 +1258,9 @@ struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info) #endif struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 objectid) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct extent_buffer *leaf; struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *root; @@ -2138,8 +2141,9 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info) inode->i_mapping->a_ops = &btree_aops; RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); - extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode); - BTRFS_I(inode)->io_tree.track_uptodate = 0; + extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree, + IO_TREE_INODE_IO, inode); + BTRFS_I(inode)->io_tree.track_uptodate = false; extent_map_tree_init(&BTRFS_I(inode)->extent_tree); BTRFS_I(inode)->io_tree.ops = &btree_extent_io_ops; @@ -2162,7 +2166,6 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info) spin_lock_init(&fs_info->qgroup_lock); mutex_init(&fs_info->qgroup_ioctl_lock); fs_info->qgroup_tree = RB_ROOT; - fs_info->qgroup_op_tree = RB_ROOT; INIT_LIST_HEAD(&fs_info->dirty_qgroups); fs_info->qgroup_seq = 1; fs_info->qgroup_ulist = NULL; @@ -2630,11 +2633,17 @@ int open_ctree(struct super_block *sb, goto fail; } - ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL); + ret = percpu_counter_init(&fs_info->dio_bytes, 0, GFP_KERNEL); if (ret) { err = ret; goto fail_srcu; } + + ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL); + if (ret) { + err = ret; + goto fail_dio_bytes; + } fs_info->dirty_metadata_batch = PAGE_SIZE * (1 + ilog2(nr_cpu_ids)); @@ -2667,7 +2676,6 @@ int open_ctree(struct super_block *sb, spin_lock_init(&fs_info->defrag_inodes_lock); spin_lock_init(&fs_info->tree_mod_seq_lock); spin_lock_init(&fs_info->super_lock); - spin_lock_init(&fs_info->qgroup_op_lock); spin_lock_init(&fs_info->buffer_lock); spin_lock_init(&fs_info->unused_bgs_lock); rwlock_init(&fs_info->tree_mod_log_lock); @@ -2694,7 +2702,6 @@ int open_ctree(struct super_block *sb, atomic_set(&fs_info->async_delalloc_pages, 0); atomic_set(&fs_info->defrag_running, 0); - atomic_set(&fs_info->qgroup_op_seq, 0); atomic_set(&fs_info->reada_works_cnt, 0); atomic_set(&fs_info->nr_delayed_iputs, 0); atomic64_set(&fs_info->tree_mod_seq, 0); @@ -2748,8 +2755,10 @@ int open_ctree(struct super_block *sb, fs_info->block_group_cache_tree = RB_ROOT; fs_info->first_logical_byte = (u64)-1; - extent_io_tree_init(&fs_info->freed_extents[0], NULL); - extent_io_tree_init(&fs_info->freed_extents[1], NULL); + extent_io_tree_init(fs_info, &fs_info->freed_extents[0], + IO_TREE_FS_INFO_FREED_EXTENTS0, NULL); + extent_io_tree_init(fs_info, &fs_info->freed_extents[1], + IO_TREE_FS_INFO_FREED_EXTENTS1, NULL); fs_info->pinned_extents = &fs_info->freed_extents[0]; set_bit(BTRFS_FS_BARRIER, &fs_info->flags); @@ -2776,8 +2785,6 @@ int open_ctree(struct super_block *sb, init_waitqueue_head(&fs_info->async_submit_wait); init_waitqueue_head(&fs_info->delayed_iputs_wait); - INIT_LIST_HEAD(&fs_info->pinned_chunks); - /* Usable values until the real ones are cached from the superblock */ fs_info->nodesize = 4096; fs_info->sectorsize = 4096; @@ -3335,6 +3342,8 @@ fail_delalloc_bytes: percpu_counter_destroy(&fs_info->delalloc_bytes); fail_dirty_metadata_bytes: percpu_counter_destroy(&fs_info->dirty_metadata_bytes); +fail_dio_bytes: + percpu_counter_destroy(&fs_info->dio_bytes); fail_srcu: cleanup_srcu_struct(&fs_info->subvol_srcu); fail: @@ -4016,6 +4025,10 @@ void close_ctree(struct btrfs_fs_info *fs_info) percpu_counter_sum(&fs_info->delalloc_bytes)); } + if (percpu_counter_sum(&fs_info->dio_bytes)) + btrfs_info(fs_info, "at unmount dio bytes count %lld", + percpu_counter_sum(&fs_info->dio_bytes)); + btrfs_sysfs_remove_mounted(fs_info); btrfs_sysfs_remove_fsid(fs_info->fs_devices); @@ -4042,25 +4055,17 @@ void close_ctree(struct btrfs_fs_info *fs_info) btrfsic_unmount(fs_info->fs_devices); #endif - btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); + btrfs_close_devices(fs_info->fs_devices); percpu_counter_destroy(&fs_info->dirty_metadata_bytes); percpu_counter_destroy(&fs_info->delalloc_bytes); + percpu_counter_destroy(&fs_info->dio_bytes); percpu_counter_destroy(&fs_info->dev_replace.bio_counter); cleanup_srcu_struct(&fs_info->subvol_srcu); btrfs_free_stripe_hash_table(fs_info); btrfs_free_ref_cache(fs_info); - - while (!list_empty(&fs_info->pinned_chunks)) { - struct extent_map *em; - - em = list_first_entry(&fs_info->pinned_chunks, - struct extent_map, list); - list_del_init(&em->list); - free_extent_map(em); - } } int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, @@ -4114,7 +4119,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) * So here we should only check item pointers, not item data. */ if (btrfs_header_level(buf) == 0 && - btrfs_check_leaf_relaxed(fs_info, buf)) { + btrfs_check_leaf_relaxed(buf)) { btrfs_print_leaf(buf); ASSERT(0); } @@ -4157,10 +4162,7 @@ void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info) int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level, struct btrfs_key *first_key) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; - struct btrfs_fs_info *fs_info = root->fs_info; - - return btree_read_extent_buffer_pages(fs_info, buf, parent_transid, + return btree_read_extent_buffer_pages(buf, parent_transid, level, first_key); } @@ -4484,10 +4486,17 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans, void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, struct btrfs_fs_info *fs_info) { + struct btrfs_device *dev, *tmp; + btrfs_cleanup_dirty_bgs(cur_trans, fs_info); ASSERT(list_empty(&cur_trans->dirty_bgs)); ASSERT(list_empty(&cur_trans->io_bgs)); + list_for_each_entry_safe(dev, tmp, &cur_trans->dev_update_list, + post_commit_list) { + list_del_init(&dev->post_commit_list); + } + btrfs_destroy_delayed_refs(cur_trans, fs_info); cur_trans->state = TRANS_STATE_COMMIT_START; |