diff options
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 987 |
1 files changed, 570 insertions, 417 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 60ce1190307b..37a31b12bb0c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -68,15 +68,15 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, int read_only); static void btrfs_destroy_ordered_extents(struct btrfs_root *root); static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, - struct btrfs_root *root); + struct btrfs_fs_info *fs_info); static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root); -static int btrfs_destroy_marked_extents(struct btrfs_root *root, +static int btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info, struct extent_io_tree *dirty_pages, int mark); -static int btrfs_destroy_pinned_extent(struct btrfs_root *root, +static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info, struct extent_io_tree *pinned_extents); -static int btrfs_cleanup_transaction(struct btrfs_root *root); -static void btrfs_error_commit_super(struct btrfs_root *root); +static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info); +static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info); /* * btrfs_end_io_wq structs are used to do processing in task context when an IO @@ -101,7 +101,7 @@ int __init btrfs_end_io_wq_init(void) btrfs_end_io_wq_cache = kmem_cache_create("btrfs_end_io_wq", sizeof(struct btrfs_end_io_wq), 0, - SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, + SLAB_MEM_SPREAD, NULL); if (!btrfs_end_io_wq_cache) return -ENOMEM; @@ -124,7 +124,6 @@ struct async_submit_bio { struct list_head list; extent_submit_bio_hook_t *submit_bio_start; extent_submit_bio_hook_t *submit_bio_done; - int rw; int mirror_num; unsigned long bio_flags; /* @@ -225,6 +224,7 @@ static struct extent_map *btree_get_extent(struct inode *inode, struct page *page, size_t pg_offset, u64 start, u64 len, int create) { + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct extent_map *em; int ret; @@ -232,8 +232,7 @@ static struct extent_map *btree_get_extent(struct inode *inode, read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); if (em) { - em->bdev = - BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; + em->bdev = fs_info->fs_devices->latest_bdev; read_unlock(&em_tree->lock); goto out; } @@ -248,7 +247,7 @@ static struct extent_map *btree_get_extent(struct inode *inode, em->len = (u64)-1; em->block_len = (u64)-1; em->block_start = 0; - em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; + em->bdev = fs_info->fs_devices->latest_bdev; write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em, 0); @@ -272,7 +271,7 @@ u32 btrfs_csum_data(char *data, u32 seed, size_t len) return btrfs_crc32c(seed, data, len); } -void btrfs_csum_final(u32 crc, char *result) +void btrfs_csum_final(u32 crc, u8 *result) { put_unaligned_le32(~crc, result); } @@ -327,8 +326,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info, read_extent_buffer(buf, &val, 0, csum_size); btrfs_warn_rl(fs_info, - "%s checksum verify failed on %llu wanted %X found %X " - "level %d", + "%s checksum verify failed on %llu wanted %X found %X level %d", fs_info->sb->s_id, buf->start, val, found, btrfs_header_level(buf)); if (result != (char *)&inline_result) @@ -403,7 +401,8 @@ out: * Return 0 if the superblock checksum type matches the checksum value of that * algorithm. Pass the raw disk superblock data. */ -static int btrfs_check_super_csum(char *raw_disk_sb) +static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, + char *raw_disk_sb) { struct btrfs_super_block *disk_sb = (struct btrfs_super_block *)raw_disk_sb; @@ -429,7 +428,7 @@ static int btrfs_check_super_csum(char *raw_disk_sb) } if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) { - printk(KERN_ERR "BTRFS: unsupported checksum algorithm %u\n", + btrfs_err(fs_info, "unsupported checksum algorithm %u", csum_type); ret = 1; } @@ -441,9 +440,9 @@ static int btrfs_check_super_csum(char *raw_disk_sb) * helper to read a given tree block, doing retries as required when * the checksums don't match and we have alternate mirrors to try. */ -static int btree_read_extent_buffer_pages(struct btrfs_root *root, +static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, - u64 start, u64 parent_transid) + u64 parent_transid) { struct extent_io_tree *io_tree; int failed = 0; @@ -453,10 +452,9 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, int failed_mirror = 0; clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); - io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; + io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree; while (1) { - ret = read_extent_buffer_pages(io_tree, eb, start, - WAIT_COMPLETE, + ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE, btree_get_extent, mirror_num); if (!ret) { if (!verify_parent_transid(io_tree, eb, @@ -474,7 +472,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) break; - num_copies = btrfs_num_copies(root->fs_info, + num_copies = btrfs_num_copies(fs_info, eb->start, eb->len); if (num_copies == 1) break; @@ -493,7 +491,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, } if (failed && !ret && failed_mirror) - repair_eb_io_failure(root, eb, failed_mirror); + repair_eb_io_failure(fs_info, eb, failed_mirror); return ret; } @@ -547,25 +545,63 @@ static int check_tree_block_fsid(struct btrfs_fs_info *fs_info, return ret; } -#define CORRUPT(reason, eb, root, slot) \ - btrfs_crit(root->fs_info, "corrupt leaf, %s: block=%llu," \ - "root=%llu, slot=%d", reason, \ - btrfs_header_bytenr(eb), root->objectid, slot) +#define CORRUPT(reason, eb, root, slot) \ + btrfs_crit(root->fs_info, \ + "corrupt %s, %s: block=%llu, root=%llu, slot=%d", \ + btrfs_header_level(eb) == 0 ? "leaf" : "node", \ + reason, btrfs_header_bytenr(eb), root->objectid, slot) static noinline int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf) { + struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_key key; struct btrfs_key leaf_key; u32 nritems = btrfs_header_nritems(leaf); int slot; + /* + * Extent buffers from a relocation tree have a owner field that + * corresponds to the subvolume tree they are based on. So just from an + * extent buffer alone we can not find out what is the id of the + * corresponding subvolume tree, so we can not figure out if the extent + * buffer corresponds to the root of the relocation tree or not. So skip + * this check for relocation trees. + */ + if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) { + struct btrfs_root *check_root; + + key.objectid = btrfs_header_owner(leaf); + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + + check_root = btrfs_get_fs_root(fs_info, &key, false); + /* + * The only reason we also check NULL here is that during + * open_ctree() some roots has not yet been set up. + */ + if (!IS_ERR_OR_NULL(check_root)) { + struct extent_buffer *eb; + + eb = btrfs_root_node(check_root); + /* if leaf is the root, then it's fine */ + if (leaf != eb) { + CORRUPT("non-root leaf's nritems is 0", + leaf, check_root, 0); + free_extent_buffer(eb); + return -EIO; + } + free_extent_buffer(eb); + } + return 0; + } + if (nritems == 0) return 0; /* Check the 0 item */ if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != - BTRFS_LEAF_DATA_SIZE(root)) { + BTRFS_LEAF_DATA_SIZE(fs_info)) { CORRUPT("invalid item offset size pair", leaf, root, 0); return -EIO; } @@ -604,7 +640,7 @@ static noinline int check_leaf(struct btrfs_root *root, * all point outside of the leaf. */ if (btrfs_item_end_nr(leaf, slot) > - BTRFS_LEAF_DATA_SIZE(root)) { + BTRFS_LEAF_DATA_SIZE(fs_info)) { CORRUPT("slot end outside of leaf", leaf, root, slot); return -EIO; } @@ -613,6 +649,42 @@ static noinline int check_leaf(struct btrfs_root *root, return 0; } +static int check_node(struct btrfs_root *root, struct extent_buffer *node) +{ + unsigned long nr = btrfs_header_nritems(node); + struct btrfs_key key, next_key; + int slot; + u64 bytenr; + int ret = 0; + + if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) { + btrfs_crit(root->fs_info, + "corrupt node: block %llu root %llu nritems %lu", + node->start, root->objectid, nr); + return -EIO; + } + + for (slot = 0; slot < nr - 1; slot++) { + bytenr = btrfs_node_blockptr(node, slot); + btrfs_node_key_to_cpu(node, &key, slot); + btrfs_node_key_to_cpu(node, &next_key, slot + 1); + + if (!bytenr) { + CORRUPT("invalid item slot", node, root, slot); + ret = -EIO; + goto out; + } + + if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) { + CORRUPT("bad key order", node, root, slot); + ret = -EIO; + goto out; + } + } +out: + return ret; +} + static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, u64 phy_offset, struct page *page, u64 start, u64 end, int mirror) @@ -683,12 +755,15 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, ret = -EIO; } + if (found_level > 0 && check_node(root, eb)) + ret = -EIO; + if (!ret) set_extent_buffer_uptodate(eb); err: if (reads_done && test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) - btree_readahead_hook(fs_info, eb, eb->start, ret); + btree_readahead_hook(fs_info, eb, ret); if (ret) { /* @@ -713,7 +788,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror) eb->read_mirror = failed_mirror; atomic_dec(&eb->io_pages); if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) - btree_readahead_hook(eb->fs_info, eb, eb->start, -EIO); + btree_readahead_hook(eb->fs_info, eb, -EIO); return -EIO; /* we fixed nothing */ } @@ -727,7 +802,7 @@ static void end_workqueue_bio(struct bio *bio) fs_info = end_io_wq->info; end_io_wq->error = bio->bi_error; - if (bio->bi_rw & REQ_WRITE) { + if (bio_op(bio) == REQ_OP_WRITE) { if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) { wq = fs_info->endio_meta_write_workers; func = btrfs_endio_meta_write_helper; @@ -797,7 +872,7 @@ static void run_one_async_start(struct btrfs_work *work) int ret; async = container_of(work, struct async_submit_bio, work); - ret = async->submit_bio_start(async->inode, async->rw, async->bio, + ret = async->submit_bio_start(async->inode, async->bio, async->mirror_num, async->bio_flags, async->bio_offset); if (ret) @@ -830,9 +905,8 @@ static void run_one_async_done(struct btrfs_work *work) return; } - async->submit_bio_done(async->inode, async->rw, async->bio, - async->mirror_num, async->bio_flags, - async->bio_offset); + async->submit_bio_done(async->inode, async->bio, async->mirror_num, + async->bio_flags, async->bio_offset); } static void run_one_async_free(struct btrfs_work *work) @@ -844,7 +918,7 @@ static void run_one_async_free(struct btrfs_work *work) } int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, - int rw, struct bio *bio, int mirror_num, + struct bio *bio, int mirror_num, unsigned long bio_flags, u64 bio_offset, extent_submit_bio_hook_t *submit_bio_start, @@ -857,7 +931,6 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, return -ENOMEM; async->inode = inode; - async->rw = rw; async->bio = bio; async->mirror_num = mirror_num; async->submit_bio_start = submit_bio_start; @@ -873,7 +946,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, atomic_inc(&fs_info->nr_async_submits); - if (rw & REQ_SYNC) + if (op_is_sync(bio->bi_opf)) btrfs_set_work_high_priority(&async->work); btrfs_queue_work(fs_info->workers, &async->work); @@ -903,9 +976,8 @@ static int btree_csum_one_bio(struct bio *bio) return ret; } -static int __btree_submit_bio_start(struct inode *inode, int rw, - struct bio *bio, int mirror_num, - unsigned long bio_flags, +static int __btree_submit_bio_start(struct inode *inode, struct bio *bio, + int mirror_num, unsigned long bio_flags, u64 bio_offset) { /* @@ -915,7 +987,7 @@ static int __btree_submit_bio_start(struct inode *inode, int rw, return btree_csum_one_bio(bio); } -static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, +static int __btree_submit_bio_done(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags, u64 bio_offset) { @@ -925,7 +997,7 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, * when we're called for a write, we're already in the async * submission context. Just jump into btrfs_map_bio */ - ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); + ret = btrfs_map_bio(btrfs_sb(inode->i_sb), bio, mirror_num, 1); if (ret) { bio->bi_error = ret; bio_endio(bio); @@ -944,37 +1016,35 @@ static int check_async_write(struct inode *inode, unsigned long bio_flags) return 1; } -static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, +static int btree_submit_bio_hook(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags, u64 bio_offset) { + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); int async = check_async_write(inode, bio_flags); int ret; - if (!(rw & REQ_WRITE)) { + if (bio_op(bio) != REQ_OP_WRITE) { /* * called for a read, do the setup so that checksum validation * can happen in the async kernel threads */ - ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, - bio, BTRFS_WQ_ENDIO_METADATA); + ret = btrfs_bio_wq_end_io(fs_info, bio, + BTRFS_WQ_ENDIO_METADATA); if (ret) goto out_w_error; - ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, - mirror_num, 0); + ret = btrfs_map_bio(fs_info, bio, mirror_num, 0); } else if (!async) { ret = btree_csum_one_bio(bio); if (ret) goto out_w_error; - ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, - mirror_num, 0); + ret = btrfs_map_bio(fs_info, bio, mirror_num, 0); } else { /* * kthread helpers are used to submit writes so that * checksumming can happen in parallel across all CPUs */ - ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, - inode, rw, bio, mirror_num, 0, + ret = btrfs_wq_submit_bio(fs_info, inode, bio, mirror_num, 0, bio_offset, __btree_submit_bio_start, __btree_submit_bio_done); @@ -1092,34 +1162,34 @@ static const struct address_space_operations btree_aops = { .set_page_dirty = btree_set_page_dirty, }; -void readahead_tree_block(struct btrfs_root *root, u64 bytenr) +void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr) { struct extent_buffer *buf = NULL; - struct inode *btree_inode = root->fs_info->btree_inode; + struct inode *btree_inode = fs_info->btree_inode; - buf = btrfs_find_create_tree_block(root, bytenr); + buf = btrfs_find_create_tree_block(fs_info, bytenr); if (IS_ERR(buf)) return; read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, - buf, 0, WAIT_NONE, btree_get_extent, 0); + buf, WAIT_NONE, btree_get_extent, 0); free_extent_buffer(buf); } -int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, +int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr, int mirror_num, struct extent_buffer **eb) { struct extent_buffer *buf = NULL; - struct inode *btree_inode = root->fs_info->btree_inode; + struct inode *btree_inode = fs_info->btree_inode; struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree; int ret; - buf = btrfs_find_create_tree_block(root, bytenr); + buf = btrfs_find_create_tree_block(fs_info, bytenr); if (IS_ERR(buf)) return 0; set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags); - ret = read_extent_buffer_pages(io_tree, buf, 0, WAIT_PAGE_LOCK, + ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK, btree_get_extent, mirror_num); if (ret) { free_extent_buffer(buf); @@ -1137,19 +1207,13 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, return 0; } -struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info, - u64 bytenr) +struct extent_buffer *btrfs_find_create_tree_block( + struct btrfs_fs_info *fs_info, + u64 bytenr) { - return find_extent_buffer(fs_info, bytenr); -} - -struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, - u64 bytenr) -{ - if (btrfs_test_is_dummy_root(root)) - return alloc_test_extent_buffer(root->fs_info, bytenr, - root->nodesize); - return alloc_extent_buffer(root->fs_info, bytenr); + if (btrfs_is_testing(fs_info)) + return alloc_test_extent_buffer(fs_info, bytenr); + return alloc_extent_buffer(fs_info, bytenr); } @@ -1165,17 +1229,17 @@ int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) buf->start, buf->start + buf->len - 1); } -struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, +struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, u64 parent_transid) { struct extent_buffer *buf = NULL; int ret; - buf = btrfs_find_create_tree_block(root, bytenr); + buf = btrfs_find_create_tree_block(fs_info, bytenr); if (IS_ERR(buf)) return buf; - ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); + ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid); if (ret) { free_extent_buffer(buf); return ERR_PTR(ret); @@ -1229,15 +1293,12 @@ btrfs_free_subvolume_writers(struct btrfs_subvolume_writers *writers) kfree(writers); } -static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize, - struct btrfs_root *root, struct btrfs_fs_info *fs_info, +static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, u64 objectid) { + bool dummy = test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state); root->node = NULL; root->commit_root = NULL; - root->sectorsize = sectorsize; - root->nodesize = nodesize; - root->stripesize = stripesize; root->state = 0; root->orphan_cleanup_state = 0; @@ -1287,14 +1348,14 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize, root->log_transid = 0; root->log_transid_committed = -1; root->last_log_commit = 0; - if (fs_info) + if (!dummy) extent_io_tree_init(&root->dirty_log_pages, fs_info->btree_inode->i_mapping); memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); - if (fs_info) + if (!dummy) root->defrag_trans_start = fs_info->generation; else root->defrag_trans_start = 0; @@ -1315,17 +1376,19 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info, #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS /* Should only be used by the testing infrastructure */ -struct btrfs_root *btrfs_alloc_dummy_root(u32 sectorsize, u32 nodesize) +struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info) { struct btrfs_root *root; - root = btrfs_alloc_root(NULL, GFP_KERNEL); + if (!fs_info) + return ERR_PTR(-EINVAL); + + root = btrfs_alloc_root(fs_info, GFP_KERNEL); if (!root) return ERR_PTR(-ENOMEM); + /* We don't use the stripesize in selftest, set it as sectorsize */ - __setup_root(nodesize, sectorsize, sectorsize, root, NULL, - BTRFS_ROOT_TREE_OBJECTID); - set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state); + __setup_root(root, fs_info, BTRFS_ROOT_TREE_OBJECTID); root->alloc_bytenr = 0; return root; @@ -1347,8 +1410,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, if (!root) return ERR_PTR(-ENOMEM); - __setup_root(tree_root->nodesize, tree_root->sectorsize, - tree_root->stripesize, root, fs_info, objectid); + __setup_root(root, fs_info, objectid); root->root_key.objectid = objectid; root->root_key.type = BTRFS_ROOT_ITEM_KEY; root->root_key.offset = 0; @@ -1360,18 +1422,15 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, goto fail; } - memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header)); + memzero_extent_buffer(leaf, 0, sizeof(struct btrfs_header)); btrfs_set_header_bytenr(leaf, leaf->start); btrfs_set_header_generation(leaf, trans->transid); btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(leaf, objectid); root->node = leaf; - write_extent_buffer(leaf, fs_info->fsid, btrfs_header_fsid(), - BTRFS_FSID_SIZE); - write_extent_buffer(leaf, fs_info->chunk_tree_uuid, - btrfs_header_chunk_tree_uuid(leaf), - BTRFS_UUID_SIZE); + write_extent_buffer_fsid(leaf, fs_info->fsid); + write_extent_buffer_chunk_tree_uuid(leaf, fs_info->chunk_tree_uuid); btrfs_mark_buffer_dirty(leaf); root->commit_root = btrfs_root_node(root); @@ -1416,16 +1475,13 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) { struct btrfs_root *root; - struct btrfs_root *tree_root = fs_info->tree_root; struct extent_buffer *leaf; root = btrfs_alloc_root(fs_info, GFP_NOFS); if (!root) return ERR_PTR(-ENOMEM); - __setup_root(tree_root->nodesize, tree_root->sectorsize, - tree_root->stripesize, root, fs_info, - BTRFS_TREE_LOG_OBJECTID); + __setup_root(root, fs_info, BTRFS_TREE_LOG_OBJECTID); root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; root->root_key.type = BTRFS_ROOT_ITEM_KEY; @@ -1447,15 +1503,14 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, return ERR_CAST(leaf); } - memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header)); + memzero_extent_buffer(leaf, 0, sizeof(struct btrfs_header)); btrfs_set_header_bytenr(leaf, leaf->start); btrfs_set_header_generation(leaf, trans->transid); btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID); root->node = leaf; - write_extent_buffer(root->node, root->fs_info->fsid, - btrfs_header_fsid(), BTRFS_FSID_SIZE); + write_extent_buffer_fsid(root->node, fs_info->fsid); btrfs_mark_buffer_dirty(root->node); btrfs_tree_unlock(root->node); return root; @@ -1477,10 +1532,11 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, int btrfs_add_log_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root) { + struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_root *log_root; struct btrfs_inode_item *inode_item; - log_root = alloc_log_tree(trans, root->fs_info); + log_root = alloc_log_tree(trans, fs_info); if (IS_ERR(log_root)) return PTR_ERR(log_root); @@ -1491,7 +1547,8 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, btrfs_set_stack_inode_generation(inode_item, 1); btrfs_set_stack_inode_size(inode_item, 3); btrfs_set_stack_inode_nlink(inode_item, 1); - btrfs_set_stack_inode_nbytes(inode_item, root->nodesize); + btrfs_set_stack_inode_nbytes(inode_item, + fs_info->nodesize); btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); btrfs_set_root_node(&log_root->root_item, log_root->node); @@ -1523,8 +1580,7 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, goto alloc_fail; } - __setup_root(tree_root->nodesize, tree_root->sectorsize, - tree_root->stripesize, root, fs_info, key->objectid); + __setup_root(root, fs_info, key->objectid); ret = btrfs_find_root(tree_root, key, path, &root->root_item, &root->root_key); @@ -1535,7 +1591,8 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, } generation = btrfs_root_generation(&root->root_item); - root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), + root->node = read_tree_block(fs_info, + btrfs_root_bytenr(&root->root_item), generation); if (IS_ERR(root->node)) { ret = PTR_ERR(root->node); @@ -1600,14 +1657,14 @@ int btrfs_init_fs_root(struct btrfs_root *root) ret = get_anon_bdev(&root->anon_dev); if (ret) - goto free_writers; + goto fail; mutex_lock(&root->objectid_mutex); ret = btrfs_find_highest_objectid(root, &root->highest_objectid); if (ret) { mutex_unlock(&root->objectid_mutex); - goto free_root_dev; + goto fail; } ASSERT(root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID); @@ -1615,19 +1672,13 @@ int btrfs_init_fs_root(struct btrfs_root *root) mutex_unlock(&root->objectid_mutex); return 0; - -free_root_dev: - free_anon_bdev(root->anon_dev); -free_writers: - btrfs_free_subvolume_writers(root->subv_writers); fail: - kfree(root->free_ino_ctl); - kfree(root->free_ino_pinned); + /* the caller is responsible to call free_fs_root */ return ret; } -static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, - u64 root_id) +struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, + u64 root_id) { struct btrfs_root *root; @@ -1749,7 +1800,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) list_for_each_entry_rcu(device, &info->fs_devices->devices, dev_list) { if (!device->bdev) continue; - bdi = blk_get_backing_dev_info(device->bdev); + bdi = device->bdev->bd_bdi; if (bdi_congested(bdi, bdi_bits)) { ret = 1; break; @@ -1796,6 +1847,7 @@ static void end_workqueue_fn(struct btrfs_work *work) static int cleaner_kthread(void *arg) { struct btrfs_root *root = arg; + struct btrfs_fs_info *fs_info = root->fs_info; int again; struct btrfs_trans_handle *trans; @@ -1803,40 +1855,40 @@ static int cleaner_kthread(void *arg) again = 0; /* Make the cleaner go to sleep early. */ - if (btrfs_need_cleaner_sleep(root)) + if (btrfs_need_cleaner_sleep(fs_info)) goto sleep; /* * Do not do anything if we might cause open_ctree() to block * before we have finished mounting the filesystem. */ - if (!root->fs_info->open) + if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags)) goto sleep; - if (!mutex_trylock(&root->fs_info->cleaner_mutex)) + if (!mutex_trylock(&fs_info->cleaner_mutex)) goto sleep; /* * Avoid the problem that we change the status of the fs * during the above check and trylock. */ - if (btrfs_need_cleaner_sleep(root)) { - mutex_unlock(&root->fs_info->cleaner_mutex); + if (btrfs_need_cleaner_sleep(fs_info)) { + mutex_unlock(&fs_info->cleaner_mutex); goto sleep; } - mutex_lock(&root->fs_info->cleaner_delayed_iput_mutex); - btrfs_run_delayed_iputs(root); - mutex_unlock(&root->fs_info->cleaner_delayed_iput_mutex); + mutex_lock(&fs_info->cleaner_delayed_iput_mutex); + btrfs_run_delayed_iputs(fs_info); + mutex_unlock(&fs_info->cleaner_delayed_iput_mutex); again = btrfs_clean_one_deleted_snapshot(root); - mutex_unlock(&root->fs_info->cleaner_mutex); + mutex_unlock(&fs_info->cleaner_mutex); /* * The defragger has dealt with the R/O remount and umount, * needn't do anything special here. */ - btrfs_run_defrag_inodes(root->fs_info); + btrfs_run_defrag_inodes(fs_info); /* * Acquires fs_info->delete_unused_bgs_mutex to avoid racing @@ -1846,7 +1898,7 @@ static int cleaner_kthread(void *arg) * can't hold, nor need to, fs_info->cleaner_mutex when deleting * unused block groups. */ - btrfs_delete_unused_bgs(root->fs_info); + btrfs_delete_unused_bgs(fs_info); sleep: if (!again) { set_current_state(TASK_INTERRUPTIBLE); @@ -1870,15 +1922,15 @@ sleep: trans = btrfs_attach_transaction(root); if (IS_ERR(trans)) { if (PTR_ERR(trans) != -ENOENT) - btrfs_err(root->fs_info, + btrfs_err(fs_info, "cleaner transaction attach returned %ld", PTR_ERR(trans)); } else { int ret; - ret = btrfs_commit_transaction(trans, root); + ret = btrfs_commit_transaction(trans); if (ret) - btrfs_err(root->fs_info, + btrfs_err(fs_info, "cleaner open transaction commit returned %d", ret); } @@ -1889,6 +1941,7 @@ sleep: static int transaction_kthread(void *arg) { struct btrfs_root *root = arg; + struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_trans_handle *trans; struct btrfs_transaction *cur; u64 transid; @@ -1898,26 +1951,26 @@ static int transaction_kthread(void *arg) do { cannot_commit = false; - delay = HZ * root->fs_info->commit_interval; - mutex_lock(&root->fs_info->transaction_kthread_mutex); + delay = HZ * fs_info->commit_interval; + mutex_lock(&fs_info->transaction_kthread_mutex); - spin_lock(&root->fs_info->trans_lock); - cur = root->fs_info->running_transaction; + spin_lock(&fs_info->trans_lock); + cur = fs_info->running_transaction; if (!cur) { - spin_unlock(&root->fs_info->trans_lock); + spin_unlock(&fs_info->trans_lock); goto sleep; } now = get_seconds(); if (cur->state < TRANS_STATE_BLOCKED && (now < cur->start_time || - now - cur->start_time < root->fs_info->commit_interval)) { - spin_unlock(&root->fs_info->trans_lock); + now - cur->start_time < fs_info->commit_interval)) { + spin_unlock(&fs_info->trans_lock); delay = HZ * 5; goto sleep; } transid = cur->transid; - spin_unlock(&root->fs_info->trans_lock); + spin_unlock(&fs_info->trans_lock); /* If the file system is aborted, this will always fail. */ trans = btrfs_attach_transaction(root); @@ -1927,20 +1980,20 @@ static int transaction_kthread(void *arg) goto sleep; } if (transid == trans->transid) { - btrfs_commit_transaction(trans, root); + btrfs_commit_transaction(trans); } else { - btrfs_end_transaction(trans, root); + btrfs_end_transaction(trans); } sleep: - wake_up_process(root->fs_info->cleaner_kthread); - mutex_unlock(&root->fs_info->transaction_kthread_mutex); + wake_up_process(fs_info->cleaner_kthread); + mutex_unlock(&fs_info->transaction_kthread_mutex); if (unlikely(test_bit(BTRFS_FS_STATE_ERROR, - &root->fs_info->fs_state))) - btrfs_cleanup_transaction(root); + &fs_info->fs_state))) + btrfs_cleanup_transaction(fs_info); set_current_state(TASK_INTERRUPTIBLE); if (!kthread_should_stop() && - (!btrfs_transaction_blocked(root->fs_info) || + (!btrfs_transaction_blocked(fs_info) || cannot_commit)) schedule_timeout(delay); __set_current_state(TASK_RUNNING); @@ -2227,8 +2280,7 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info) if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { btrfs_free_log_root_tree(NULL, fs_info); - btrfs_destroy_pinned_extent(fs_info->tree_root, - fs_info->pinned_extents); + btrfs_destroy_pinned_extent(fs_info, fs_info->pinned_extents); } } @@ -2254,33 +2306,31 @@ static void btrfs_init_balance(struct btrfs_fs_info *fs_info) init_waitqueue_head(&fs_info->balance_wait_q); } -static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info, - struct btrfs_root *tree_root) +static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info) { - fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID; - set_nlink(fs_info->btree_inode, 1); + struct inode *inode = fs_info->btree_inode; + + inode->i_ino = BTRFS_BTREE_INODE_OBJECTID; + set_nlink(inode, 1); /* * we set the i_size on the btree inode to the max possible int. * the real end of the address space is determined by all of * the devices in the system */ - fs_info->btree_inode->i_size = OFFSET_MAX; - fs_info->btree_inode->i_mapping->a_ops = &btree_aops; + inode->i_size = OFFSET_MAX; + inode->i_mapping->a_ops = &btree_aops; - RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node); - extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, - fs_info->btree_inode->i_mapping); - BTRFS_I(fs_info->btree_inode)->io_tree.track_uptodate = 0; - extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree); + RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); + extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping); + BTRFS_I(inode)->io_tree.track_uptodate = 0; + extent_map_tree_init(&BTRFS_I(inode)->extent_tree); - BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; + BTRFS_I(inode)->io_tree.ops = &btree_extent_io_ops; - BTRFS_I(fs_info->btree_inode)->root = tree_root; - memset(&BTRFS_I(fs_info->btree_inode)->location, 0, - sizeof(struct btrfs_key)); - set_bit(BTRFS_INODE_DUMMY, - &BTRFS_I(fs_info->btree_inode)->runtime_flags); - btrfs_insert_inode_hash(fs_info->btree_inode); + BTRFS_I(inode)->root = fs_info->tree_root; + memset(&BTRFS_I(inode)->location, 0, sizeof(struct btrfs_key)); + set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags); + btrfs_insert_inode_hash(inode); } static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info) @@ -2303,9 +2353,8 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info) fs_info->qgroup_op_tree = RB_ROOT; INIT_LIST_HEAD(&fs_info->dirty_qgroups); fs_info->qgroup_seq = 1; - fs_info->quota_enabled = 0; - fs_info->pending_quota_state = 0; fs_info->qgroup_ulist = NULL; + fs_info->qgroup_rescan_running = false; mutex_init(&fs_info->qgroup_rescan_lock); } @@ -2316,17 +2365,19 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info, unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND; fs_info->workers = - btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI, - max_active, 16); + btrfs_alloc_workqueue(fs_info, "worker", + flags | WQ_HIGHPRI, max_active, 16); fs_info->delalloc_workers = - btrfs_alloc_workqueue("delalloc", flags, max_active, 2); + btrfs_alloc_workqueue(fs_info, "delalloc", + flags, max_active, 2); fs_info->flush_workers = - btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0); + btrfs_alloc_workqueue(fs_info, "flush_delalloc", + flags, max_active, 0); fs_info->caching_workers = - btrfs_alloc_workqueue("cache", flags, max_active, 0); + btrfs_alloc_workqueue(fs_info, "cache", flags, max_active, 0); /* * a higher idle thresh on the submit workers makes it much more @@ -2334,41 +2385,48 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info, * devices */ fs_info->submit_workers = - btrfs_alloc_workqueue("submit", flags, + btrfs_alloc_workqueue(fs_info, "submit", flags, min_t(u64, fs_devices->num_devices, max_active), 64); fs_info->fixup_workers = - btrfs_alloc_workqueue("fixup", flags, 1, 0); + btrfs_alloc_workqueue(fs_info, "fixup", flags, 1, 0); /* * endios are largely parallel and should have a very * low idle thresh */ fs_info->endio_workers = - btrfs_alloc_workqueue("endio", flags, max_active, 4); + btrfs_alloc_workqueue(fs_info, "endio", flags, max_active, 4); fs_info->endio_meta_workers = - btrfs_alloc_workqueue("endio-meta", flags, max_active, 4); + btrfs_alloc_workqueue(fs_info, "endio-meta", flags, + max_active, 4); fs_info->endio_meta_write_workers = - btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2); + btrfs_alloc_workqueue(fs_info, "endio-meta-write", flags, + max_active, 2); fs_info->endio_raid56_workers = - btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4); + btrfs_alloc_workqueue(fs_info, "endio-raid56", flags, + max_active, 4); fs_info->endio_repair_workers = - btrfs_alloc_workqueue("endio-repair", flags, 1, 0); + btrfs_alloc_workqueue(fs_info, "endio-repair", flags, 1, 0); fs_info->rmw_workers = - btrfs_alloc_workqueue("rmw", flags, max_active, 2); + btrfs_alloc_workqueue(fs_info, "rmw", flags, max_active, 2); fs_info->endio_write_workers = - btrfs_alloc_workqueue("endio-write", flags, max_active, 2); + btrfs_alloc_workqueue(fs_info, "endio-write", flags, + max_active, 2); fs_info->endio_freespace_worker = - btrfs_alloc_workqueue("freespace-write", flags, max_active, 0); + btrfs_alloc_workqueue(fs_info, "freespace-write", flags, + max_active, 0); fs_info->delayed_workers = - btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0); + btrfs_alloc_workqueue(fs_info, "delayed-meta", flags, + max_active, 0); fs_info->readahead_workers = - btrfs_alloc_workqueue("readahead", flags, max_active, 2); + btrfs_alloc_workqueue(fs_info, "readahead", flags, + max_active, 2); fs_info->qgroup_rescan_workers = - btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0); + btrfs_alloc_workqueue(fs_info, "qgroup-rescan", flags, 1, 0); fs_info->extent_workers = - btrfs_alloc_workqueue("extent-refs", flags, + btrfs_alloc_workqueue(fs_info, "extent-refs", flags, min_t(u64, fs_devices->num_devices, max_active), 8); @@ -2393,7 +2451,6 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, struct btrfs_fs_devices *fs_devices) { int ret; - struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *log_tree_root; struct btrfs_super_block *disk_super = fs_info->super_copy; u64 bytenr = btrfs_super_log_root(disk_super); @@ -2407,12 +2464,10 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, if (!log_tree_root) return -ENOMEM; - __setup_root(tree_root->nodesize, tree_root->sectorsize, - tree_root->stripesize, log_tree_root, fs_info, - BTRFS_TREE_LOG_OBJECTID); + __setup_root(log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); - log_tree_root->node = read_tree_block(tree_root, bytenr, - fs_info->generation + 1); + log_tree_root->node = read_tree_block(fs_info, bytenr, + fs_info->generation + 1); if (IS_ERR(log_tree_root->node)) { btrfs_warn(fs_info, "failed to read log tree"); ret = PTR_ERR(log_tree_root->node); @@ -2427,15 +2482,15 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, /* returns with log_tree_root freed on success */ ret = btrfs_recover_log_trees(log_tree_root); if (ret) { - btrfs_handle_fs_error(tree_root->fs_info, ret, - "Failed to recover log tree"); + btrfs_handle_fs_error(fs_info, ret, + "Failed to recover log tree"); free_extent_buffer(log_tree_root->node); kfree(log_tree_root); return ret; } if (fs_info->sb->s_flags & MS_RDONLY) { - ret = btrfs_commit_super(tree_root); + ret = btrfs_commit_super(fs_info); if (ret) return ret; } @@ -2443,13 +2498,15 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, return 0; } -static int btrfs_read_roots(struct btrfs_fs_info *fs_info, - struct btrfs_root *tree_root) +static int btrfs_read_roots(struct btrfs_fs_info *fs_info) { + struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *root; struct btrfs_key location; int ret; + BUG_ON(!fs_info->tree_root); + location.objectid = BTRFS_EXTENT_TREE_OBJECTID; location.type = BTRFS_ROOT_ITEM_KEY; location.offset = 0; @@ -2479,8 +2536,7 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info, root = btrfs_read_tree_root(tree_root, &location); if (!IS_ERR(root)) { set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); - fs_info->quota_enabled = 1; - fs_info->pending_quota_state = 1; + set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); fs_info->quota_root = root; } @@ -2527,6 +2583,7 @@ int open_ctree(struct super_block *sb, int num_backups_tried = 0; int backup_index = 0; int max_active; + int clear_free_space_tree = 0; tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL); chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL); @@ -2623,6 +2680,7 @@ int open_ctree(struct super_block *sb, atomic_set(&fs_info->qgroup_op_seq, 0); atomic_set(&fs_info->reada_works_cnt, 0); atomic64_set(&fs_info->tree_mod_seq, 0); + fs_info->fs_frozen = 0; fs_info->sb = sb; fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; fs_info->metadata_ratio = 0; @@ -2659,7 +2717,7 @@ int open_ctree(struct super_block *sb, sb->s_blocksize_bits = blksize_bits(4096); sb->s_bdi = &fs_info->bdi; - btrfs_init_btree_inode(fs_info, tree_root); + btrfs_init_btree_inode(fs_info); spin_lock_init(&fs_info->block_group_cache_lock); fs_info->block_group_cache_tree = RB_ROOT; @@ -2670,8 +2728,7 @@ int open_ctree(struct super_block *sb, extent_io_tree_init(&fs_info->freed_extents[1], fs_info->btree_inode->i_mapping); fs_info->pinned_extents = &fs_info->freed_extents[0]; - fs_info->do_barriers = 1; - + set_bit(BTRFS_FS_BARRIER, &fs_info->flags); mutex_init(&fs_info->ordered_operations_mutex); mutex_init(&fs_info->tree_log_mutex); @@ -2698,14 +2755,18 @@ int open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->pinned_chunks); + /* Usable values until the real ones are cached from the superblock */ + fs_info->nodesize = 4096; + fs_info->sectorsize = 4096; + fs_info->stripesize = 4096; + ret = btrfs_alloc_stripe_hash_table(fs_info); if (ret) { err = ret; goto fail_alloc; } - __setup_root(4096, 4096, 4096, tree_root, - fs_info, BTRFS_ROOT_TREE_OBJECTID); + __setup_root(tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); invalidate_bdev(fs_devices->latest_bdev); @@ -2722,7 +2783,7 @@ int open_ctree(struct super_block *sb, * We want to check superblock checksum, the type is stored inside. * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k). */ - if (btrfs_check_super_csum(bh->b_data)) { + if (btrfs_check_super_csum(fs_info, bh->b_data)) { btrfs_err(fs_info, "superblock checksum mismatch"); err = -EINVAL; brelse(bh); @@ -2769,7 +2830,7 @@ int open_ctree(struct super_block *sb, */ fs_info->compress_type = BTRFS_COMPRESS_ZLIB; - ret = btrfs_parse_options(tree_root, options, sb->s_flags); + ret = btrfs_parse_options(fs_info, options, sb->s_flags); if (ret) { err = ret; goto fail_alloc; @@ -2787,7 +2848,7 @@ int open_ctree(struct super_block *sb, features = btrfs_super_incompat_flags(disk_super); features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; - if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO) + if (fs_info->compress_type == BTRFS_COMPRESS_LZO) features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) @@ -2810,6 +2871,11 @@ int open_ctree(struct super_block *sb, fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids)); fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids)); + /* Cache block sizes */ + fs_info->nodesize = nodesize; + fs_info->sectorsize = sectorsize; + fs_info->stripesize = stripesize; + /* * mixed block groups end up with duplicate but slightly offset * extent buffers for the same range. It leads to corruptions @@ -2850,15 +2916,11 @@ int open_ctree(struct super_block *sb, fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, SZ_4M / PAGE_SIZE); - tree_root->nodesize = nodesize; - tree_root->sectorsize = sectorsize; - tree_root->stripesize = stripesize; - sb->s_blocksize = sectorsize; sb->s_blocksize_bits = blksize_bits(sectorsize); mutex_lock(&fs_info->chunk_mutex); - ret = btrfs_read_sys_array(tree_root); + ret = btrfs_read_sys_array(fs_info); mutex_unlock(&fs_info->chunk_mutex); if (ret) { btrfs_err(fs_info, "failed to read the system array: %d", ret); @@ -2867,10 +2929,9 @@ int open_ctree(struct super_block *sb, generation = btrfs_super_chunk_root_generation(disk_super); - __setup_root(nodesize, sectorsize, stripesize, chunk_root, - fs_info, BTRFS_CHUNK_TREE_OBJECTID); + __setup_root(chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID); - chunk_root->node = read_tree_block(chunk_root, + chunk_root->node = read_tree_block(fs_info, btrfs_super_chunk_root(disk_super), generation); if (IS_ERR(chunk_root->node) || @@ -2887,7 +2948,7 @@ int open_ctree(struct super_block *sb, read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, btrfs_header_chunk_tree_uuid(chunk_root->node), BTRFS_UUID_SIZE); - ret = btrfs_read_chunk_tree(chunk_root); + ret = btrfs_read_chunk_tree(fs_info); if (ret) { btrfs_err(fs_info, "failed to read chunk tree: %d", ret); goto fail_tree_roots; @@ -2907,7 +2968,7 @@ int open_ctree(struct super_block *sb, retry_root_backup: generation = btrfs_super_generation(disk_super); - tree_root->node = read_tree_block(tree_root, + tree_root->node = read_tree_block(fs_info, btrfs_super_root(disk_super), generation); if (IS_ERR(tree_root->node) || @@ -2935,7 +2996,7 @@ retry_root_backup: mutex_unlock(&tree_root->objectid_mutex); - ret = btrfs_read_roots(fs_info, tree_root); + ret = btrfs_read_roots(fs_info); if (ret) goto recovery_tree_root; @@ -2988,7 +3049,7 @@ retry_root_backup: goto fail_sysfs; } - ret = btrfs_read_block_groups(fs_info->extent_root); + ret = btrfs_read_block_groups(fs_info); if (ret) { btrfs_err(fs_info, "failed to read block groups: %d", ret); goto fail_sysfs; @@ -3016,8 +3077,8 @@ retry_root_backup: if (IS_ERR(fs_info->transaction_kthread)) goto fail_cleaner; - if (!btrfs_test_opt(tree_root, SSD) && - !btrfs_test_opt(tree_root, NOSSD) && + if (!btrfs_test_opt(fs_info, SSD) && + !btrfs_test_opt(fs_info, NOSSD) && !fs_info->fs_devices->rotating) { btrfs_info(fs_info, "detected SSD devices, enabling SSD mode"); btrfs_set_opt(fs_info->mount_opt, SSD); @@ -3030,9 +3091,9 @@ retry_root_backup: btrfs_apply_pending_changes(fs_info); #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY - if (btrfs_test_opt(tree_root, CHECK_INTEGRITY)) { - ret = btrfsic_mount(tree_root, fs_devices, - btrfs_test_opt(tree_root, + if (btrfs_test_opt(fs_info, CHECK_INTEGRITY)) { + ret = btrfsic_mount(fs_info, fs_devices, + btrfs_test_opt(fs_info, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA) ? 1 : 0, fs_info->check_integrity_print_mask); @@ -3048,7 +3109,7 @@ retry_root_backup: /* do not make disk changes in broken FS or nologreplay is given */ if (btrfs_super_log_root(disk_super) != 0 && - !btrfs_test_opt(tree_root, NOLOGREPLAY)) { + !btrfs_test_opt(fs_info, NOLOGREPLAY)) { ret = btrfs_replay_log(fs_info, fs_devices); if (ret) { err = ret; @@ -3056,7 +3117,7 @@ retry_root_backup: } } - ret = btrfs_find_orphan_roots(tree_root); + ret = btrfs_find_orphan_roots(fs_info); if (ret) goto fail_qgroup; @@ -3089,14 +3150,34 @@ retry_root_backup: if (sb->s_flags & MS_RDONLY) return 0; - if (btrfs_test_opt(tree_root, FREE_SPACE_TREE) && + if (btrfs_test_opt(fs_info, CLEAR_CACHE) && + btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { + clear_free_space_tree = 1; + } else if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && + !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID)) { + btrfs_warn(fs_info, "free space tree is invalid"); + clear_free_space_tree = 1; + } + + if (clear_free_space_tree) { + btrfs_info(fs_info, "clearing free space tree"); + ret = btrfs_clear_free_space_tree(fs_info); + if (ret) { + btrfs_warn(fs_info, + "failed to clear free space tree: %d", ret); + close_ctree(fs_info); + return ret; + } + } + + if (btrfs_test_opt(fs_info, FREE_SPACE_TREE) && !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { btrfs_info(fs_info, "creating free space tree"); ret = btrfs_create_free_space_tree(fs_info); if (ret) { btrfs_warn(fs_info, "failed to create free space tree: %d", ret); - close_ctree(tree_root); + close_ctree(fs_info); return ret; } } @@ -3105,7 +3186,7 @@ retry_root_backup: if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) || (ret = btrfs_orphan_cleanup(fs_info->tree_root))) { up_read(&fs_info->cleanup_work_sem); - close_ctree(tree_root); + close_ctree(fs_info); return ret; } up_read(&fs_info->cleanup_work_sem); @@ -3113,41 +3194,29 @@ retry_root_backup: ret = btrfs_resume_balance_async(fs_info); if (ret) { btrfs_warn(fs_info, "failed to resume balance: %d", ret); - close_ctree(tree_root); + close_ctree(fs_info); return ret; } ret = btrfs_resume_dev_replace_async(fs_info); if (ret) { btrfs_warn(fs_info, "failed to resume device replace: %d", ret); - close_ctree(tree_root); + close_ctree(fs_info); return ret; } btrfs_qgroup_rescan_resume(fs_info); - if (btrfs_test_opt(tree_root, CLEAR_CACHE) && - btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { - btrfs_info(fs_info, "clearing free space tree"); - ret = btrfs_clear_free_space_tree(fs_info); - if (ret) { - btrfs_warn(fs_info, - "failed to clear free space tree: %d", ret); - close_ctree(tree_root); - return ret; - } - } - if (!fs_info->uuid_root) { btrfs_info(fs_info, "creating UUID tree"); ret = btrfs_create_uuid_tree(fs_info); if (ret) { btrfs_warn(fs_info, "failed to create the UUID tree: %d", ret); - close_ctree(tree_root); + close_ctree(fs_info); return ret; } - } else if (btrfs_test_opt(tree_root, RESCAN_UUID_TREE) || + } else if (btrfs_test_opt(fs_info, RESCAN_UUID_TREE) || fs_info->generation != btrfs_super_uuid_tree_generation(disk_super)) { btrfs_info(fs_info, "checking UUID tree"); @@ -3155,14 +3224,13 @@ retry_root_backup: if (ret) { btrfs_warn(fs_info, "failed to check the UUID tree: %d", ret); - close_ctree(tree_root); + close_ctree(fs_info); return ret; } } else { - fs_info->update_uuid_tree_gen = 1; + set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags); } - - fs_info->open = 1; + set_bit(BTRFS_FS_OPEN, &fs_info->flags); /* * backuproot only affect mount behavior, and if open_ctree succeeded, @@ -3176,7 +3244,7 @@ fail_qgroup: btrfs_free_qgroup_config(fs_info); fail_trans_kthread: kthread_stop(fs_info->transaction_kthread); - btrfs_cleanup_transaction(fs_info->tree_root); + btrfs_cleanup_transaction(fs_info); btrfs_free_fs_roots(fs_info); fail_cleaner: kthread_stop(fs_info->cleaner_kthread); @@ -3224,7 +3292,7 @@ fail: return err; recovery_tree_root: - if (!btrfs_test_opt(tree_root, USEBACKUPROOT)) + if (!btrfs_test_opt(fs_info, USEBACKUPROOT)) goto fail_tree_roots; free_root_pointers(fs_info, 0); @@ -3250,7 +3318,7 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) struct btrfs_device *device = (struct btrfs_device *) bh->b_private; - btrfs_warn_rl_in_rcu(device->dev_root->fs_info, + btrfs_warn_rl_in_rcu(device->fs_info, "lost page write due to IO error on %s", rcu_str_deref(device->name)); /* note, we don't set_buffer_write_io_error because we have @@ -3395,7 +3463,7 @@ static int write_dev_supers(struct btrfs_device *device, bh = __getblk(device->bdev, bytenr / 4096, BTRFS_SUPER_INFO_SIZE); if (!bh) { - btrfs_err(device->dev_root->fs_info, + btrfs_err(device->fs_info, "couldn't get super buffer head for bytenr %llu", bytenr); errors++; @@ -3418,9 +3486,9 @@ static int write_dev_supers(struct btrfs_device *device, * to go down lazy. */ if (i == 0) - ret = btrfsic_submit_bh(WRITE_FUA, bh); + ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_FUA, bh); else - ret = btrfsic_submit_bh(WRITE_SYNC, bh); + ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); if (ret) errors++; } @@ -3484,12 +3552,13 @@ static int write_dev_flush(struct btrfs_device *device, int wait) bio->bi_end_io = btrfs_end_empty_barrier; bio->bi_bdev = device->bdev; + bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; init_completion(&device->flush_wait); bio->bi_private = &device->flush_wait; device->flush_bio = bio; bio_get(bio); - btrfsic_submit_bio(WRITE_FLUSH, bio); + btrfsic_submit_bio(bio); return 0; } @@ -3566,7 +3635,7 @@ int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags) } if (min_tolerated == INT_MAX) { - pr_warn("BTRFS: unknown raid flag: %llu\n", flags); + pr_warn("BTRFS: unknown raid flag: %llu", flags); min_tolerated = 0; } @@ -3627,7 +3696,7 @@ int btrfs_calc_num_tolerated_disk_barrier_failures( return num_tolerated_disk_barrier_failures; } -static int write_all_supers(struct btrfs_root *root, int max_mirrors) +static int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors) { struct list_head *head; struct btrfs_device *dev; @@ -3639,23 +3708,23 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) int total_errors = 0; u64 flags; - do_barriers = !btrfs_test_opt(root, NOBARRIER); - backup_super_roots(root->fs_info); + do_barriers = !btrfs_test_opt(fs_info, NOBARRIER); + backup_super_roots(fs_info); - sb = root->fs_info->super_for_commit; + sb = fs_info->super_for_commit; dev_item = &sb->dev_item; - mutex_lock(&root->fs_info->fs_devices->device_list_mutex); - head = &root->fs_info->fs_devices->devices; - max_errors = btrfs_super_num_devices(root->fs_info->super_copy) - 1; + mutex_lock(&fs_info->fs_devices->device_list_mutex); + head = &fs_info->fs_devices->devices; + max_errors = btrfs_super_num_devices(fs_info->super_copy) - 1; if (do_barriers) { - ret = barrier_all_devices(root->fs_info); + ret = barrier_all_devices(fs_info); if (ret) { mutex_unlock( - &root->fs_info->fs_devices->device_list_mutex); - btrfs_handle_fs_error(root->fs_info, ret, - "errors while submitting device barriers."); + &fs_info->fs_devices->device_list_mutex); + btrfs_handle_fs_error(fs_info, ret, + "errors while submitting device barriers."); return ret; } } @@ -3689,13 +3758,14 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) total_errors++; } if (total_errors > max_errors) { - btrfs_err(root->fs_info, "%d errors while writing supers", - total_errors); - mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); + btrfs_err(fs_info, "%d errors while writing supers", + total_errors); + mutex_unlock(&fs_info->fs_devices->device_list_mutex); /* FUA is masked off if unsupported and can't be the reason */ - btrfs_handle_fs_error(root->fs_info, -EIO, - "%d errors while writing supers", total_errors); + btrfs_handle_fs_error(fs_info, -EIO, + "%d errors while writing supers", + total_errors); return -EIO; } @@ -3710,19 +3780,20 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) if (ret) total_errors++; } - mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); + mutex_unlock(&fs_info->fs_devices->device_list_mutex); if (total_errors > max_errors) { - btrfs_handle_fs_error(root->fs_info, -EIO, - "%d errors while writing supers", total_errors); + btrfs_handle_fs_error(fs_info, -EIO, + "%d errors while writing supers", + total_errors); return -EIO; } return 0; } int write_ctree_super(struct btrfs_trans_handle *trans, - struct btrfs_root *root, int max_mirrors) + struct btrfs_fs_info *fs_info, int max_mirrors) { - return write_all_supers(root, max_mirrors); + return write_all_supers(fs_info, max_mirrors); } /* Drop a fs root from the radix tree and free it. */ @@ -3737,8 +3808,15 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, if (btrfs_root_refs(&root->root_item) == 0) synchronize_srcu(&fs_info->subvol_srcu); - if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) + if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { btrfs_free_log(NULL, root); + if (root->reloc_root) { + free_extent_buffer(root->reloc_root->node); + free_extent_buffer(root->reloc_root->commit_root); + btrfs_put_fs_root(root->reloc_root); + root->reloc_root = NULL; + } + } if (root->free_ino_pinned) __btrfs_remove_free_space_cache(root->free_ino_pinned); @@ -3751,7 +3829,7 @@ static void free_fs_root(struct btrfs_root *root) { iput(root->ino_cache_inode); WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); - btrfs_free_block_rsv(root, root->orphan_block_rsv); + btrfs_free_block_rsv(root->fs_info, root->orphan_block_rsv); root->orphan_block_rsv = NULL; if (root->anon_dev) free_anon_bdev(root->anon_dev); @@ -3821,35 +3899,35 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) return err; } -int btrfs_commit_super(struct btrfs_root *root) +int btrfs_commit_super(struct btrfs_fs_info *fs_info) { + struct btrfs_root *root = fs_info->tree_root; struct btrfs_trans_handle *trans; - mutex_lock(&root->fs_info->cleaner_mutex); - btrfs_run_delayed_iputs(root); - mutex_unlock(&root->fs_info->cleaner_mutex); - wake_up_process(root->fs_info->cleaner_kthread); + mutex_lock(&fs_info->cleaner_mutex); + btrfs_run_delayed_iputs(fs_info); + mutex_unlock(&fs_info->cleaner_mutex); + wake_up_process(fs_info->cleaner_kthread); /* wait until ongoing cleanup work done */ - down_write(&root->fs_info->cleanup_work_sem); - up_write(&root->fs_info->cleanup_work_sem); + down_write(&fs_info->cleanup_work_sem); + up_write(&fs_info->cleanup_work_sem); trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); - return btrfs_commit_transaction(trans, root); + return btrfs_commit_transaction(trans); } -void close_ctree(struct btrfs_root *root) +void close_ctree(struct btrfs_fs_info *fs_info) { - struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_root *root = fs_info->tree_root; int ret; - fs_info->closing = 1; - smp_mb(); + set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags); /* wait for the qgroup rescan worker to stop */ - btrfs_qgroup_wait_for_completion(fs_info); + btrfs_qgroup_wait_for_completion(fs_info, false); /* wait for the uuid_scan task to finish */ down(&fs_info->uuid_tree_rescan_sem); @@ -3878,21 +3956,20 @@ void close_ctree(struct btrfs_root *root) * block groups queued for removal, the deletion will be * skipped when we quit the cleaner thread. */ - btrfs_delete_unused_bgs(root->fs_info); + btrfs_delete_unused_bgs(fs_info); - ret = btrfs_commit_super(root); + ret = btrfs_commit_super(fs_info); if (ret) btrfs_err(fs_info, "commit super ret %d", ret); } if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) - btrfs_error_commit_super(root); + btrfs_error_commit_super(fs_info); kthread_stop(fs_info->transaction_kthread); kthread_stop(fs_info->cleaner_kthread); - fs_info->closing = 2; - smp_mb(); + set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags); btrfs_free_qgroup_config(fs_info); @@ -3917,14 +3994,14 @@ void close_ctree(struct btrfs_root *root) invalidate_inode_pages2(fs_info->btree_inode->i_mapping); btrfs_stop_all_workers(fs_info); - fs_info->open = 0; + clear_bit(BTRFS_FS_OPEN, &fs_info->flags); free_root_pointers(fs_info, 1); iput(fs_info->btree_inode); #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY - if (btrfs_test_opt(root, CHECK_INTEGRITY)) - btrfsic_unmount(root, fs_info->fs_devices); + if (btrfs_test_opt(fs_info, CHECK_INTEGRITY)) + btrfsic_unmount(fs_info->fs_devices); #endif btrfs_close_devices(fs_info->fs_devices); @@ -3941,7 +4018,7 @@ void close_ctree(struct btrfs_root *root) __btrfs_free_block_rsv(root->orphan_block_rsv); root->orphan_block_rsv = NULL; - lock_chunks(root); + mutex_lock(&fs_info->chunk_mutex); while (!list_empty(&fs_info->pinned_chunks)) { struct extent_map *em; @@ -3950,7 +4027,7 @@ void close_ctree(struct btrfs_root *root) list_del_init(&em->list); free_extent_map(em); } - unlock_chunks(root); + mutex_unlock(&fs_info->chunk_mutex); } int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, @@ -3972,6 +4049,7 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, void btrfs_mark_buffer_dirty(struct extent_buffer *buf) { + struct btrfs_fs_info *fs_info; struct btrfs_root *root; u64 transid = btrfs_header_generation(buf); int was_dirty; @@ -3986,25 +4064,25 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) return; #endif root = BTRFS_I(buf->pages[0]->mapping->host)->root; + fs_info = root->fs_info; btrfs_assert_tree_locked(buf); - if (transid != root->fs_info->generation) - WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, " - "found %llu running %llu\n", - buf->start, transid, root->fs_info->generation); + if (transid != fs_info->generation) + WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, found %llu running %llu\n", + buf->start, transid, fs_info->generation); was_dirty = set_extent_buffer_dirty(buf); if (!was_dirty) - __percpu_counter_add(&root->fs_info->dirty_metadata_bytes, + __percpu_counter_add(&fs_info->dirty_metadata_bytes, buf->len, - root->fs_info->dirty_metadata_batch); + fs_info->dirty_metadata_batch); #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) { - btrfs_print_leaf(root, buf); + btrfs_print_leaf(fs_info, buf); ASSERT(0); } #endif } -static void __btrfs_btree_balance_dirty(struct btrfs_root *root, +static void __btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info, int flush_delayed) { /* @@ -4017,30 +4095,31 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root, return; if (flush_delayed) - btrfs_balance_delayed_items(root); + btrfs_balance_delayed_items(fs_info); - ret = percpu_counter_compare(&root->fs_info->dirty_metadata_bytes, + ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes, BTRFS_DIRTY_METADATA_THRESH); if (ret > 0) { - balance_dirty_pages_ratelimited( - root->fs_info->btree_inode->i_mapping); + balance_dirty_pages_ratelimited(fs_info->btree_inode->i_mapping); } } -void btrfs_btree_balance_dirty(struct btrfs_root *root) +void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info) { - __btrfs_btree_balance_dirty(root, 1); + __btrfs_btree_balance_dirty(fs_info, 1); } -void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root) +void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info) { - __btrfs_btree_balance_dirty(root, 0); + __btrfs_btree_balance_dirty(fs_info, 0); } int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) { struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; - return btree_read_extent_buffer_pages(root, buf, 0, parent_transid); + struct btrfs_fs_info *fs_info = root->fs_info; + + return btree_read_extent_buffer_pages(fs_info, buf, parent_transid); } static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, @@ -4052,24 +4131,24 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, int ret = 0; if (btrfs_super_magic(sb) != BTRFS_MAGIC) { - printk(KERN_ERR "BTRFS: no valid FS found\n"); + btrfs_err(fs_info, "no valid FS found"); ret = -EINVAL; } if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP) - printk(KERN_WARNING "BTRFS: unrecognized super flag: %llu\n", + btrfs_warn(fs_info, "unrecognized super flag: %llu", btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP); if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) { - printk(KERN_ERR "BTRFS: tree_root level too big: %d >= %d\n", + btrfs_err(fs_info, "tree_root level too big: %d >= %d", btrfs_super_root_level(sb), BTRFS_MAX_LEVEL); ret = -EINVAL; } if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) { - printk(KERN_ERR "BTRFS: chunk_root level too big: %d >= %d\n", + btrfs_err(fs_info, "chunk_root level too big: %d >= %d", btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL); ret = -EINVAL; } if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) { - printk(KERN_ERR "BTRFS: log_root level too big: %d >= %d\n", + btrfs_err(fs_info, "log_root level too big: %d >= %d", btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL); ret = -EINVAL; } @@ -4080,47 +4159,48 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, */ if (!is_power_of_2(sectorsize) || sectorsize < 4096 || sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) { - printk(KERN_ERR "BTRFS: invalid sectorsize %llu\n", sectorsize); + btrfs_err(fs_info, "invalid sectorsize %llu", sectorsize); ret = -EINVAL; } /* Only PAGE SIZE is supported yet */ if (sectorsize != PAGE_SIZE) { - printk(KERN_ERR "BTRFS: sectorsize %llu not supported yet, only support %lu\n", - sectorsize, PAGE_SIZE); + btrfs_err(fs_info, + "sectorsize %llu not supported yet, only support %lu", + sectorsize, PAGE_SIZE); ret = -EINVAL; } if (!is_power_of_2(nodesize) || nodesize < sectorsize || nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) { - printk(KERN_ERR "BTRFS: invalid nodesize %llu\n", nodesize); + btrfs_err(fs_info, "invalid nodesize %llu", nodesize); ret = -EINVAL; } if (nodesize != le32_to_cpu(sb->__unused_leafsize)) { - printk(KERN_ERR "BTRFS: invalid leafsize %u, should be %llu\n", - le32_to_cpu(sb->__unused_leafsize), - nodesize); + btrfs_err(fs_info, "invalid leafsize %u, should be %llu", + le32_to_cpu(sb->__unused_leafsize), nodesize); ret = -EINVAL; } /* Root alignment check */ if (!IS_ALIGNED(btrfs_super_root(sb), sectorsize)) { - printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", - btrfs_super_root(sb)); + btrfs_warn(fs_info, "tree_root block unaligned: %llu", + btrfs_super_root(sb)); ret = -EINVAL; } if (!IS_ALIGNED(btrfs_super_chunk_root(sb), sectorsize)) { - printk(KERN_WARNING "BTRFS: chunk_root block unaligned: %llu\n", - btrfs_super_chunk_root(sb)); + btrfs_warn(fs_info, "chunk_root block unaligned: %llu", + btrfs_super_chunk_root(sb)); ret = -EINVAL; } if (!IS_ALIGNED(btrfs_super_log_root(sb), sectorsize)) { - printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n", - btrfs_super_log_root(sb)); + btrfs_warn(fs_info, "log_root block unaligned: %llu", + btrfs_super_log_root(sb)); ret = -EINVAL; } if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) { - printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n", - fs_info->fsid, sb->dev_item.fsid); + btrfs_err(fs_info, + "dev_item UUID does not match fsid: %pU != %pU", + fs_info->fsid, sb->dev_item.fsid); ret = -EINVAL; } @@ -4130,25 +4210,25 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, */ if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) { btrfs_err(fs_info, "bytes_used is too small %llu", - btrfs_super_bytes_used(sb)); + btrfs_super_bytes_used(sb)); ret = -EINVAL; } if (!is_power_of_2(btrfs_super_stripesize(sb))) { btrfs_err(fs_info, "invalid stripesize %u", - btrfs_super_stripesize(sb)); + btrfs_super_stripesize(sb)); ret = -EINVAL; } if (btrfs_super_num_devices(sb) > (1UL << 31)) - printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n", - btrfs_super_num_devices(sb)); + btrfs_warn(fs_info, "suspicious number of devices: %llu", + btrfs_super_num_devices(sb)); if (btrfs_super_num_devices(sb) == 0) { - printk(KERN_ERR "BTRFS: number of devices is 0\n"); + btrfs_err(fs_info, "number of devices is 0"); ret = -EINVAL; } if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) { - printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n", - btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET); + btrfs_err(fs_info, "super offset mismatch %llu != %u", + btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET); ret = -EINVAL; } @@ -4157,17 +4237,17 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, * and one chunk */ if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) { - printk(KERN_ERR "BTRFS: system chunk array too big %u > %u\n", - btrfs_super_sys_array_size(sb), - BTRFS_SYSTEM_CHUNK_ARRAY_SIZE); + btrfs_err(fs_info, "system chunk array too big %u > %u", + btrfs_super_sys_array_size(sb), + BTRFS_SYSTEM_CHUNK_ARRAY_SIZE); ret = -EINVAL; } if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key) + sizeof(struct btrfs_chunk)) { - printk(KERN_ERR "BTRFS: system chunk array too small %u < %zu\n", - btrfs_super_sys_array_size(sb), - sizeof(struct btrfs_disk_key) - + sizeof(struct btrfs_chunk)); + btrfs_err(fs_info, "system chunk array too small %u < %zu", + btrfs_super_sys_array_size(sb), + sizeof(struct btrfs_disk_key) + + sizeof(struct btrfs_chunk)); ret = -EINVAL; } @@ -4176,29 +4256,31 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, * but it's still possible that it's the one that's wrong. */ if (btrfs_super_generation(sb) < btrfs_super_chunk_root_generation(sb)) - printk(KERN_WARNING - "BTRFS: suspicious: generation < chunk_root_generation: %llu < %llu\n", - btrfs_super_generation(sb), btrfs_super_chunk_root_generation(sb)); + btrfs_warn(fs_info, + "suspicious: generation < chunk_root_generation: %llu < %llu", + btrfs_super_generation(sb), + btrfs_super_chunk_root_generation(sb)); if (btrfs_super_generation(sb) < btrfs_super_cache_generation(sb) && btrfs_super_cache_generation(sb) != (u64)-1) - printk(KERN_WARNING - "BTRFS: suspicious: generation < cache_generation: %llu < %llu\n", - btrfs_super_generation(sb), btrfs_super_cache_generation(sb)); + btrfs_warn(fs_info, + "suspicious: generation < cache_generation: %llu < %llu", + btrfs_super_generation(sb), + btrfs_super_cache_generation(sb)); return ret; } -static void btrfs_error_commit_super(struct btrfs_root *root) +static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info) { - mutex_lock(&root->fs_info->cleaner_mutex); - btrfs_run_delayed_iputs(root); - mutex_unlock(&root->fs_info->cleaner_mutex); + mutex_lock(&fs_info->cleaner_mutex); + btrfs_run_delayed_iputs(fs_info); + mutex_unlock(&fs_info->cleaner_mutex); - down_write(&root->fs_info->cleanup_work_sem); - up_write(&root->fs_info->cleanup_work_sem); + down_write(&fs_info->cleanup_work_sem); + up_write(&fs_info->cleanup_work_sem); /* cleanup FS via transaction */ - btrfs_cleanup_transaction(root); + btrfs_cleanup_transaction(fs_info); } static void btrfs_destroy_ordered_extents(struct btrfs_root *root) @@ -4241,7 +4323,7 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info) } static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, - struct btrfs_root *root) + struct btrfs_fs_info *fs_info) { struct rb_node *node; struct btrfs_delayed_ref_root *delayed_refs; @@ -4253,7 +4335,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, spin_lock(&delayed_refs->lock); if (atomic_read(&delayed_refs->num_entries) == 0) { spin_unlock(&delayed_refs->lock); - btrfs_info(root->fs_info, "delayed_refs has NO entry"); + btrfs_info(fs_info, "delayed_refs has NO entry"); return ret; } @@ -4279,6 +4361,8 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, list) { ref->in_tree = 0; list_del(&ref->list); + if (!list_empty(&ref->add_list)) + list_del(&ref->add_list); atomic_dec(&delayed_refs->num_entries); btrfs_put_delayed_ref(ref); } @@ -4296,7 +4380,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, mutex_unlock(&head->mutex); if (pin_bytes) - btrfs_pin_extent(root, head->node.bytenr, + btrfs_pin_extent(fs_info, head->node.bytenr, head->node.num_bytes, 1); btrfs_put_delayed_ref(&head->node); cond_resched(); @@ -4360,7 +4444,7 @@ static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info) spin_unlock(&fs_info->delalloc_root_lock); } -static int btrfs_destroy_marked_extents(struct btrfs_root *root, +static int btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info, struct extent_io_tree *dirty_pages, int mark) { @@ -4377,8 +4461,8 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, clear_extent_bits(dirty_pages, start, end, mark); while (start <= end) { - eb = btrfs_find_tree_block(root->fs_info, start); - start += root->nodesize; + eb = find_extent_buffer(fs_info, start); + start += fs_info->nodesize; if (!eb) continue; wait_on_extent_buffer_writeback(eb); @@ -4393,7 +4477,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, return ret; } -static int btrfs_destroy_pinned_extent(struct btrfs_root *root, +static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info, struct extent_io_tree *pinned_extents) { struct extent_io_tree *unpin; @@ -4411,15 +4495,15 @@ again: break; clear_extent_dirty(unpin, start, end); - btrfs_error_unpin_extent_range(root, start, end); + btrfs_error_unpin_extent_range(fs_info, start, end); cond_resched(); } if (loop) { - if (unpin == &root->fs_info->freed_extents[0]) - unpin = &root->fs_info->freed_extents[1]; + if (unpin == &fs_info->freed_extents[0]) + unpin = &fs_info->freed_extents[1]; else - unpin = &root->fs_info->freed_extents[0]; + unpin = &fs_info->freed_extents[0]; loop = false; goto again; } @@ -4427,24 +4511,93 @@ again: return 0; } +static void btrfs_cleanup_bg_io(struct btrfs_block_group_cache *cache) +{ + struct inode *inode; + + inode = cache->io_ctl.inode; + if (inode) { + invalidate_inode_pages2(inode->i_mapping); + BTRFS_I(inode)->generation = 0; + cache->io_ctl.inode = NULL; + iput(inode); + } + btrfs_put_block_group(cache); +} + +void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans, + struct btrfs_fs_info *fs_info) +{ + struct btrfs_block_group_cache *cache; + + spin_lock(&cur_trans->dirty_bgs_lock); + while (!list_empty(&cur_trans->dirty_bgs)) { + cache = list_first_entry(&cur_trans->dirty_bgs, + struct btrfs_block_group_cache, + dirty_list); + if (!cache) { + btrfs_err(fs_info, "orphan block group dirty_bgs list"); + spin_unlock(&cur_trans->dirty_bgs_lock); + return; + } + + if (!list_empty(&cache->io_list)) { + spin_unlock(&cur_trans->dirty_bgs_lock); + list_del_init(&cache->io_list); + btrfs_cleanup_bg_io(cache); + spin_lock(&cur_trans->dirty_bgs_lock); + } + + list_del_init(&cache->dirty_list); + spin_lock(&cache->lock); + cache->disk_cache_state = BTRFS_DC_ERROR; + spin_unlock(&cache->lock); + + spin_unlock(&cur_trans->dirty_bgs_lock); + btrfs_put_block_group(cache); + spin_lock(&cur_trans->dirty_bgs_lock); + } + spin_unlock(&cur_trans->dirty_bgs_lock); + + while (!list_empty(&cur_trans->io_bgs)) { + cache = list_first_entry(&cur_trans->io_bgs, + struct btrfs_block_group_cache, + io_list); + if (!cache) { + btrfs_err(fs_info, "orphan block group on io_bgs list"); + return; + } + + list_del_init(&cache->io_list); + spin_lock(&cache->lock); + cache->disk_cache_state = BTRFS_DC_ERROR; + spin_unlock(&cache->lock); + btrfs_cleanup_bg_io(cache); + } +} + void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, - struct btrfs_root *root) + struct btrfs_fs_info *fs_info) { - btrfs_destroy_delayed_refs(cur_trans, root); + btrfs_cleanup_dirty_bgs(cur_trans, fs_info); + ASSERT(list_empty(&cur_trans->dirty_bgs)); + ASSERT(list_empty(&cur_trans->io_bgs)); + + btrfs_destroy_delayed_refs(cur_trans, fs_info); cur_trans->state = TRANS_STATE_COMMIT_START; - wake_up(&root->fs_info->transaction_blocked_wait); + wake_up(&fs_info->transaction_blocked_wait); cur_trans->state = TRANS_STATE_UNBLOCKED; - wake_up(&root->fs_info->transaction_wait); + wake_up(&fs_info->transaction_wait); - btrfs_destroy_delayed_inodes(root); - btrfs_assert_delayed_root_empty(root); + btrfs_destroy_delayed_inodes(fs_info); + btrfs_assert_delayed_root_empty(fs_info); - btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages, + btrfs_destroy_marked_extents(fs_info, &cur_trans->dirty_pages, EXTENT_DIRTY); - btrfs_destroy_pinned_extent(root, - root->fs_info->pinned_extents); + btrfs_destroy_pinned_extent(fs_info, + fs_info->pinned_extents); cur_trans->state =TRANS_STATE_COMPLETED; wake_up(&cur_trans->commit_wait); @@ -4455,27 +4608,27 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, */ } -static int btrfs_cleanup_transaction(struct btrfs_root *root) +static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info) { struct btrfs_transaction *t; - mutex_lock(&root->fs_info->transaction_kthread_mutex); + mutex_lock(&fs_info->transaction_kthread_mutex); - spin_lock(&root->fs_info->trans_lock); - while (!list_empty(&root->fs_info->trans_list)) { - t = list_first_entry(&root->fs_info->trans_list, + spin_lock(&fs_info->trans_lock); + while (!list_empty(&fs_info->trans_list)) { + t = list_first_entry(&fs_info->trans_list, struct btrfs_transaction, list); if (t->state >= TRANS_STATE_COMMIT_START) { atomic_inc(&t->use_count); - spin_unlock(&root->fs_info->trans_lock); - btrfs_wait_for_commit(root, t->transid); + spin_unlock(&fs_info->trans_lock); + btrfs_wait_for_commit(fs_info, t->transid); btrfs_put_transaction(t); - spin_lock(&root->fs_info->trans_lock); + spin_lock(&fs_info->trans_lock); continue; } - if (t == root->fs_info->running_transaction) { + if (t == fs_info->running_transaction) { t->state = TRANS_STATE_COMMIT_DOING; - spin_unlock(&root->fs_info->trans_lock); + spin_unlock(&fs_info->trans_lock); /* * We wait for 0 num_writers since we don't hold a trans * handle open currently for this transaction. @@ -4483,27 +4636,27 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) wait_event(t->writer_wait, atomic_read(&t->num_writers) == 0); } else { - spin_unlock(&root->fs_info->trans_lock); + spin_unlock(&fs_info->trans_lock); } - btrfs_cleanup_one_transaction(t, root); + btrfs_cleanup_one_transaction(t, fs_info); - spin_lock(&root->fs_info->trans_lock); - if (t == root->fs_info->running_transaction) - root->fs_info->running_transaction = NULL; + spin_lock(&fs_info->trans_lock); + if (t == fs_info->running_transaction) + fs_info->running_transaction = NULL; list_del_init(&t->list); - spin_unlock(&root->fs_info->trans_lock); + spin_unlock(&fs_info->trans_lock); btrfs_put_transaction(t); - trace_btrfs_transaction_commit(root); - spin_lock(&root->fs_info->trans_lock); - } - spin_unlock(&root->fs_info->trans_lock); - btrfs_destroy_all_ordered_extents(root->fs_info); - btrfs_destroy_delayed_inodes(root); - btrfs_assert_delayed_root_empty(root); - btrfs_destroy_pinned_extent(root, root->fs_info->pinned_extents); - btrfs_destroy_all_delalloc_inodes(root->fs_info); - mutex_unlock(&root->fs_info->transaction_kthread_mutex); + trace_btrfs_transaction_commit(fs_info->tree_root); + spin_lock(&fs_info->trans_lock); + } + spin_unlock(&fs_info->trans_lock); + btrfs_destroy_all_ordered_extents(fs_info); + btrfs_destroy_delayed_inodes(fs_info); + btrfs_assert_delayed_root_empty(fs_info); + btrfs_destroy_pinned_extent(fs_info, fs_info->pinned_extents); + btrfs_destroy_all_delalloc_inodes(fs_info); + mutex_unlock(&fs_info->transaction_kthread_mutex); return 0; } |