diff options
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 245 |
1 files changed, 108 insertions, 137 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d1cd60140817..7a18b5762ac9 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -87,19 +87,9 @@ void btrfs_leak_debug_check(void) static inline void __btrfs_debug_check_extent_io_range(const char *caller, struct extent_io_tree *tree, u64 start, u64 end) { - struct inode *inode; - u64 isize; - - if (!tree->mapping) - return; - - inode = tree->mapping->host; - isize = i_size_read(inode); - if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { - btrfs_debug_rl(BTRFS_I(inode)->root->fs_info, - "%s: ino %llu isize %llu odd range [%llu,%llu]", - caller, btrfs_ino(BTRFS_I(inode)), isize, start, end); - } + if (tree->ops && tree->ops->check_extent_io_range) + tree->ops->check_extent_io_range(tree->private_data, caller, + start, end); } #else #define btrfs_leak_debug_add(new, head) do {} while (0) @@ -154,9 +144,9 @@ static noinline void flush_write_bio(void *data); static inline struct btrfs_fs_info * tree_fs_info(struct extent_io_tree *tree) { - if (!tree->mapping) - return NULL; - return btrfs_sb(tree->mapping->host->i_sb); + if (tree->ops) + return tree->ops->tree_fs_info(tree->private_data); + return NULL; } int __init extent_io_init(void) @@ -214,13 +204,13 @@ void extent_io_exit(void) } void extent_io_tree_init(struct extent_io_tree *tree, - struct address_space *mapping) + void *private_data) { tree->state = RB_ROOT; tree->ops = NULL; tree->dirty_bytes = 0; spin_lock_init(&tree->lock); - tree->mapping = mapping; + tree->private_data = private_data; } static struct extent_state *alloc_extent_state(gfp_t mask) @@ -370,8 +360,7 @@ static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, struct extent_state *other) { if (tree->ops && tree->ops->merge_extent_hook) - tree->ops->merge_extent_hook(tree->mapping->host, new, - other); + tree->ops->merge_extent_hook(tree->private_data, new, other); } /* @@ -422,15 +411,14 @@ static void set_state_cb(struct extent_io_tree *tree, struct extent_state *state, unsigned *bits) { if (tree->ops && tree->ops->set_bit_hook) - tree->ops->set_bit_hook(tree->mapping->host, state, bits); + tree->ops->set_bit_hook(tree->private_data, state, bits); } static void clear_state_cb(struct extent_io_tree *tree, struct extent_state *state, unsigned *bits) { if (tree->ops && tree->ops->clear_bit_hook) - tree->ops->clear_bit_hook(BTRFS_I(tree->mapping->host), - state, bits); + tree->ops->clear_bit_hook(tree->private_data, state, bits); } static void set_state_bits(struct extent_io_tree *tree, @@ -479,7 +467,7 @@ static void split_cb(struct extent_io_tree *tree, struct extent_state *orig, u64 split) { if (tree->ops && tree->ops->split_extent_hook) - tree->ops->split_extent_hook(tree->mapping->host, orig, split); + tree->ops->split_extent_hook(tree->private_data, orig, split); } /* @@ -1403,17 +1391,7 @@ void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end) */ static void set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) { - unsigned long index = start >> PAGE_SHIFT; - unsigned long end_index = end >> PAGE_SHIFT; - struct page *page; - - while (index <= end_index) { - page = find_get_page(tree->mapping, index); - BUG_ON(!page); /* Pages should be in the extent_io_tree */ - set_page_writeback(page); - put_page(page); - index++; - } + tree->ops->set_range_writeback(tree->private_data, start, end); } /* find the first state struct with 'bits' set after 'start', and @@ -1962,11 +1940,12 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page) SetPageUptodate(page); } -int free_io_failure(struct btrfs_inode *inode, struct io_failure_record *rec) +int free_io_failure(struct extent_io_tree *failure_tree, + struct extent_io_tree *io_tree, + struct io_failure_record *rec) { int ret; int err = 0; - struct extent_io_tree *failure_tree = &inode->io_failure_tree; set_state_failrec(failure_tree, rec->start, NULL); ret = clear_extent_bits(failure_tree, rec->start, @@ -1975,7 +1954,7 @@ int free_io_failure(struct btrfs_inode *inode, struct io_failure_record *rec) if (ret) err = ret; - ret = clear_extent_bits(&inode->io_tree, rec->start, + ret = clear_extent_bits(io_tree, rec->start, rec->start + rec->len - 1, EXTENT_DAMAGED); if (ret && !err) @@ -1995,11 +1974,10 @@ int free_io_failure(struct btrfs_inode *inode, struct io_failure_record *rec) * currently, there can be no more than two copies of every data bit. thus, * exactly one rewrite is required. */ -int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length, - u64 logical, struct page *page, - unsigned int pg_offset, int mirror_num) +int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, + u64 length, u64 logical, struct page *page, + unsigned int pg_offset, int mirror_num) { - struct btrfs_fs_info *fs_info = inode->root->fs_info; struct bio *bio; struct btrfs_device *dev; u64 map_length = 0; @@ -2010,9 +1988,7 @@ int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length, ASSERT(!(fs_info->sb->s_flags & MS_RDONLY)); BUG_ON(!mirror_num); - bio = btrfs_io_bio_alloc(GFP_NOFS, 1); - if (!bio) - return -EIO; + bio = btrfs_io_bio_alloc(1); bio->bi_iter.bi_size = 0; map_length = length; @@ -2071,7 +2047,7 @@ int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length, btrfs_info_rl_in_rcu(fs_info, "read error corrected: ino %llu off %llu (dev %s sector %llu)", - btrfs_ino(inode), start, + ino, start, rcu_str_deref(dev->name), sector); btrfs_bio_counter_dec(fs_info); bio_put(bio); @@ -2091,8 +2067,7 @@ int repair_eb_io_failure(struct btrfs_fs_info *fs_info, for (i = 0; i < num_pages; i++) { struct page *p = eb->pages[i]; - ret = repair_io_failure(BTRFS_I(fs_info->btree_inode), start, - PAGE_SIZE, start, p, + ret = repair_io_failure(fs_info, 0, start, PAGE_SIZE, start, p, start - page_offset(p), mirror_num); if (ret) break; @@ -2106,24 +2081,24 @@ int repair_eb_io_failure(struct btrfs_fs_info *fs_info, * each time an IO finishes, we do a fast check in the IO failure tree * to see if we need to process or clean up an io_failure_record */ -int clean_io_failure(struct btrfs_inode *inode, u64 start, struct page *page, - unsigned int pg_offset) +int clean_io_failure(struct btrfs_fs_info *fs_info, + struct extent_io_tree *failure_tree, + struct extent_io_tree *io_tree, u64 start, + struct page *page, u64 ino, unsigned int pg_offset) { u64 private; struct io_failure_record *failrec; - struct btrfs_fs_info *fs_info = inode->root->fs_info; struct extent_state *state; int num_copies; int ret; private = 0; - ret = count_range_bits(&inode->io_failure_tree, &private, - (u64)-1, 1, EXTENT_DIRTY, 0); + ret = count_range_bits(failure_tree, &private, (u64)-1, 1, + EXTENT_DIRTY, 0); if (!ret) return 0; - ret = get_state_failrec(&inode->io_failure_tree, start, - &failrec); + ret = get_state_failrec(failure_tree, start, &failrec); if (ret) return 0; @@ -2139,25 +2114,25 @@ int clean_io_failure(struct btrfs_inode *inode, u64 start, struct page *page, if (fs_info->sb->s_flags & MS_RDONLY) goto out; - spin_lock(&inode->io_tree.lock); - state = find_first_extent_bit_state(&inode->io_tree, + spin_lock(&io_tree->lock); + state = find_first_extent_bit_state(io_tree, failrec->start, EXTENT_LOCKED); - spin_unlock(&inode->io_tree.lock); + spin_unlock(&io_tree->lock); if (state && state->start <= failrec->start && state->end >= failrec->start + failrec->len - 1) { num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len); if (num_copies > 1) { - repair_io_failure(inode, start, failrec->len, - failrec->logical, page, - pg_offset, failrec->failed_mirror); + repair_io_failure(fs_info, ino, start, failrec->len, + failrec->logical, page, pg_offset, + failrec->failed_mirror); } } out: - free_io_failure(inode, failrec); + free_io_failure(failure_tree, io_tree, failrec); return 0; } @@ -2357,10 +2332,7 @@ struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio, struct btrfs_io_bio *btrfs_failed_bio; struct btrfs_io_bio *btrfs_bio; - bio = btrfs_io_bio_alloc(GFP_NOFS, 1); - if (!bio) - return NULL; - + bio = btrfs_io_bio_alloc(1); bio->bi_end_io = endio_func; bio->bi_iter.bi_sector = failrec->logical >> 9; bio->bi_bdev = fs_info->fs_devices->latest_bdev; @@ -2398,6 +2370,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, struct io_failure_record *failrec; struct inode *inode = page->mapping->host; struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; + struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; struct bio *bio; int read_mode = 0; blk_status_t status; @@ -2411,7 +2384,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, ret = btrfs_check_repairable(inode, failed_bio, failrec, failed_mirror); if (!ret) { - free_io_failure(BTRFS_I(inode), failrec); + free_io_failure(failure_tree, tree, failrec); return -EIO; } @@ -2424,7 +2397,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, (int)phy_offset, failed_bio->bi_end_io, NULL); if (!bio) { - free_io_failure(BTRFS_I(inode), failrec); + free_io_failure(failure_tree, tree, failrec); return -EIO; } bio_set_op_attrs(bio, REQ_OP_READ, read_mode); @@ -2433,10 +2406,10 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d", read_mode, failrec->this_mirror, failrec->in_validation); - status = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror, + status = tree->ops->submit_bio_hook(tree->private_data, bio, failrec->this_mirror, failrec->bio_flags, 0); if (status) { - free_io_failure(BTRFS_I(inode), failrec); + free_io_failure(failure_tree, tree, failrec); bio_put(bio); ret = blk_status_to_errno(status); } @@ -2542,7 +2515,7 @@ static void end_bio_extent_readpage(struct bio *bio) struct bio_vec *bvec; int uptodate = !bio->bi_status; struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); - struct extent_io_tree *tree; + struct extent_io_tree *tree, *failure_tree; u64 offset = 0; u64 start; u64 end; @@ -2563,6 +2536,7 @@ static void end_bio_extent_readpage(struct bio *bio) (u64)bio->bi_iter.bi_sector, bio->bi_status, io_bio->mirror_num); tree = &BTRFS_I(inode)->io_tree; + failure_tree = &BTRFS_I(inode)->io_failure_tree; /* We always issue full-page reads, but if some block * in a page fails to read, blk_update_request() will @@ -2592,8 +2566,10 @@ static void end_bio_extent_readpage(struct bio *bio) if (ret) uptodate = 0; else - clean_io_failure(BTRFS_I(inode), start, - page, 0); + clean_io_failure(BTRFS_I(inode)->root->fs_info, + failure_tree, tree, start, + page, + btrfs_ino(BTRFS_I(inode)), 0); } if (likely(uptodate)) @@ -2682,67 +2658,70 @@ readpage_ok: } /* - * this allocates from the btrfs_bioset. We're returning a bio right now - * but you can call btrfs_io_bio for the appropriate container_of magic + * Initialize the members up to but not including 'bio'. Use after allocating a + * new bio by bio_alloc_bioset as it does not initialize the bytes outside of + * 'bio' because use of __GFP_ZERO is not supported. */ -struct bio * -btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, - gfp_t gfp_flags) +static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio) { - struct btrfs_io_bio *btrfs_bio; - struct bio *bio; - - bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset); + memset(btrfs_bio, 0, offsetof(struct btrfs_io_bio, bio)); +} - if (bio == NULL && (current->flags & PF_MEMALLOC)) { - while (!bio && (nr_vecs /= 2)) { - bio = bio_alloc_bioset(gfp_flags, - nr_vecs, btrfs_bioset); - } - } +/* + * The following helpers allocate a bio. As it's backed by a bioset, it'll + * never fail. We're returning a bio right now but you can call btrfs_io_bio + * for the appropriate container_of magic + */ +struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte) +{ + struct bio *bio; - if (bio) { - bio->bi_bdev = bdev; - bio->bi_iter.bi_sector = first_sector; - btrfs_bio = btrfs_io_bio(bio); - btrfs_bio->csum = NULL; - btrfs_bio->csum_allocated = NULL; - btrfs_bio->end_io = NULL; - } + bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, btrfs_bioset); + bio->bi_bdev = bdev; + bio->bi_iter.bi_sector = first_byte >> 9; + btrfs_io_bio_init(btrfs_io_bio(bio)); return bio; } -struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask) +struct bio *btrfs_bio_clone(struct bio *bio) { struct btrfs_io_bio *btrfs_bio; struct bio *new; - new = bio_clone_bioset(bio, gfp_mask, btrfs_bioset); - if (new) { - btrfs_bio = btrfs_io_bio(new); - btrfs_bio->csum = NULL; - btrfs_bio->csum_allocated = NULL; - btrfs_bio->end_io = NULL; - } + /* Bio allocation backed by a bioset does not fail */ + new = bio_clone_fast(bio, GFP_NOFS, btrfs_bioset); + btrfs_bio = btrfs_io_bio(new); + btrfs_io_bio_init(btrfs_bio); + btrfs_bio->iter = bio->bi_iter; return new; } -/* this also allocates from the btrfs_bioset */ -struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) +struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs) { - struct btrfs_io_bio *btrfs_bio; struct bio *bio; - bio = bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset); - if (bio) { - btrfs_bio = btrfs_io_bio(bio); - btrfs_bio->csum = NULL; - btrfs_bio->csum_allocated = NULL; - btrfs_bio->end_io = NULL; - } + /* Bio allocation backed by a bioset does not fail */ + bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, btrfs_bioset); + btrfs_io_bio_init(btrfs_io_bio(bio)); return bio; } +struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size) +{ + struct bio *bio; + struct btrfs_io_bio *btrfs_bio; + + /* this will never fail when it's backed by a bioset */ + bio = bio_clone_fast(orig, GFP_NOFS, btrfs_bioset); + ASSERT(bio); + + btrfs_bio = btrfs_io_bio(bio); + btrfs_io_bio_init(btrfs_bio); + + bio_trim(bio, offset >> 9, size >> 9); + btrfs_bio->iter = bio->bi_iter; + return bio; +} static int __must_check submit_one_bio(struct bio *bio, int mirror_num, unsigned long bio_flags) @@ -2759,7 +2738,7 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num, bio_get(bio); if (tree->ops) - ret = tree->ops->submit_bio_hook(page->mapping->host, bio, + ret = tree->ops->submit_bio_hook(tree->private_data, bio, mirror_num, bio_flags, start); else btrfsic_submit_bio(bio); @@ -2822,11 +2801,7 @@ static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree, } } - bio = btrfs_bio_alloc(bdev, sector, BIO_MAX_PAGES, - GFP_NOFS | __GFP_HIGH); - if (!bio) - return -ENOMEM; - + bio = btrfs_bio_alloc(bdev, sector << 9); bio_add_page(bio, page, page_size, offset); bio->bi_end_io = end_io_func; bio->bi_private = tree; @@ -3762,7 +3737,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, * header 0 1 2 .. N ... data_N .. data_2 data_1 data_0 */ start = btrfs_item_nr_offset(nritems); - end = btrfs_leaf_data(eb) + leaf_data_end(fs_info, eb); + end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(fs_info, eb); memzero_extent_buffer(eb, start, end - start); } @@ -4468,29 +4443,25 @@ try_submit_last: } /* - * Sanity check for fiemap cache + * Emit last fiemap cache * - * All fiemap cache should be submitted by emit_fiemap_extent() - * Iteration should be terminated either by last fiemap extent or - * fieinfo->fi_extents_max. - * So no cached fiemap should exist. + * The last fiemap cache may still be cached in the following case: + * 0 4k 8k + * |<- Fiemap range ->| + * |<------------ First extent ----------->| + * + * In this case, the first extent range will be cached but not emitted. + * So we must emit it before ending extent_fiemap(). */ -static int check_fiemap_cache(struct btrfs_fs_info *fs_info, - struct fiemap_extent_info *fieinfo, - struct fiemap_cache *cache) +static int emit_last_fiemap_cache(struct btrfs_fs_info *fs_info, + struct fiemap_extent_info *fieinfo, + struct fiemap_cache *cache) { int ret; if (!cache->cached) return 0; - /* Small and recoverbale problem, only to info developer */ -#ifdef CONFIG_BTRFS_DEBUG - WARN_ON(1); -#endif - btrfs_warn(fs_info, - "unhandled fiemap cache detected: offset=%llu phys=%llu len=%llu flags=0x%x", - cache->offset, cache->phys, cache->len, cache->flags); ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys, cache->len, cache->flags); cache->cached = false; @@ -4706,7 +4677,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, } out_free: if (!ret) - ret = check_fiemap_cache(root->fs_info, fieinfo, &cache); + ret = emit_last_fiemap_cache(root->fs_info, fieinfo, &cache); free_extent_map(em); out: btrfs_free_path(path); |