diff options
33 files changed, 782 insertions, 827 deletions
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 8e99251650b3..86f681fd200d 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -1539,7 +1539,7 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, struct btrfs_device *device; length = len; - ret = btrfs_map_block(state->root->fs_info, READ, + ret = btrfs_map_block(state->root->fs_info, BTRFS_MAP_READ, bytenr, &length, &multi, mirror_num); if (ret) { @@ -2819,10 +2819,11 @@ static void __btrfsic_submit_bio(struct bio *bio) * btrfsic_mount(), this might return NULL */ dev_state = btrfsic_dev_state_lookup(bio->bi_bdev); if (NULL != dev_state && - (bio_op(bio) == REQ_OP_WRITE) && NULL != bio->bi_io_vec) { + (bio_op(bio) == REQ_OP_WRITE) && bio_has_data(bio)) { unsigned int i; u64 dev_bytenr; u64 cur_bytenr; + struct bio_vec *bvec; int bio_is_patched; char **mapped_datav; @@ -2840,32 +2841,23 @@ static void __btrfsic_submit_bio(struct bio *bio) if (!mapped_datav) goto leave; cur_bytenr = dev_bytenr; - for (i = 0; i < bio->bi_vcnt; i++) { - BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_SIZE); - mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page); - if (!mapped_datav[i]) { - while (i > 0) { - i--; - kunmap(bio->bi_io_vec[i].bv_page); - } - kfree(mapped_datav); - goto leave; - } + + bio_for_each_segment_all(bvec, bio, i) { + BUG_ON(bvec->bv_len != PAGE_SIZE); + mapped_datav[i] = kmap(bvec->bv_page); + if (dev_state->state->print_mask & BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE) pr_info("#%u: bytenr=%llu, len=%u, offset=%u\n", - i, cur_bytenr, bio->bi_io_vec[i].bv_len, - bio->bi_io_vec[i].bv_offset); - cur_bytenr += bio->bi_io_vec[i].bv_len; + i, cur_bytenr, bvec->bv_len, bvec->bv_offset); + cur_bytenr += bvec->bv_len; } btrfsic_process_written_block(dev_state, dev_bytenr, mapped_datav, bio->bi_vcnt, bio, &bio_is_patched, NULL, bio->bi_opf); - while (i > 0) { - i--; - kunmap(bio->bi_io_vec[i].bv_page); - } + bio_for_each_segment_all(bvec, bio, i) + kunmap(bvec->bv_page); kfree(mapped_datav); } else if (NULL != dev_state && (bio->bi_opf & REQ_PREFLUSH)) { if (dev_state->state->print_mask & diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index d4d8b7e36b2f..ae4c000cbffc 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -81,9 +81,9 @@ struct compressed_bio { u32 sums; }; -static int btrfs_decompress_biovec(int type, struct page **pages_in, - u64 disk_start, struct bio_vec *bvec, - int vcnt, size_t srclen); +static int btrfs_decompress_bio(int type, struct page **pages_in, + u64 disk_start, struct bio *orig_bio, + size_t srclen); static inline int compressed_bio_size(struct btrfs_root *root, unsigned long disk_size) @@ -120,7 +120,7 @@ static int check_compressed_csum(struct inode *inode, kaddr = kmap_atomic(page); csum = btrfs_csum_data(kaddr, csum, PAGE_SIZE); - btrfs_csum_final(csum, (char *)&csum); + btrfs_csum_final(csum, (u8 *)&csum); kunmap_atomic(kaddr); if (csum != *cb_sum) { @@ -175,11 +175,10 @@ static void end_compressed_bio_read(struct bio *bio) /* ok, we're the last bio for this extent, lets start * the decompression. */ - ret = btrfs_decompress_biovec(cb->compress_type, + ret = btrfs_decompress_bio(cb->compress_type, cb->compressed_pages, cb->start, - cb->orig_bio->bi_io_vec, - cb->orig_bio->bi_vcnt, + cb->orig_bio, cb->compressed_len); csum_failed: if (ret) @@ -446,6 +445,13 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, return 0; } +static u64 bio_end_offset(struct bio *bio) +{ + struct bio_vec *last = &bio->bi_io_vec[bio->bi_vcnt - 1]; + + return page_offset(last->bv_page) + last->bv_len + last->bv_offset; +} + static noinline int add_ra_bio_pages(struct inode *inode, u64 compressed_end, struct compressed_bio *cb) @@ -464,8 +470,7 @@ static noinline int add_ra_bio_pages(struct inode *inode, u64 end; int misses = 0; - page = cb->orig_bio->bi_io_vec[cb->orig_bio->bi_vcnt - 1].bv_page; - last_offset = (page_offset(page) + PAGE_SIZE); + last_offset = bio_end_offset(cb->orig_bio); em_tree = &BTRFS_I(inode)->extent_tree; tree = &BTRFS_I(inode)->io_tree; @@ -563,7 +568,6 @@ next: * * bio->bi_iter.bi_sector points to the compressed extent on disk * bio->bi_io_vec points to all of the inode pages - * bio->bi_vcnt is a count of pages * * After the compressed pages are read, we copy the bytes into the * bio we were passed and then call the bio end_io calls @@ -575,7 +579,6 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, struct extent_map_tree *em_tree; struct compressed_bio *cb; struct btrfs_root *root = BTRFS_I(inode)->root; - unsigned long uncompressed_len = bio->bi_vcnt * PAGE_SIZE; unsigned long compressed_len; unsigned long nr_pages; unsigned long pg_index; @@ -620,7 +623,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, free_extent_map(em); em = NULL; - cb->len = uncompressed_len; + cb->len = bio->bi_iter.bi_size; cb->compressed_len = compressed_len; cb->compress_type = extent_compress_type(bio_flags); cb->orig_bio = bio; @@ -648,8 +651,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, add_ra_bio_pages(inode, em_start + em_len, cb); /* include any pages we added in add_ra-bio_pages */ - uncompressed_len = bio->bi_vcnt * PAGE_SIZE; - cb->len = uncompressed_len; + cb->len = bio->bi_iter.bi_size; comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); if (!comp_bio) @@ -959,9 +961,7 @@ int btrfs_compress_pages(int type, struct address_space *mapping, * * disk_start is the starting logical offset of this array in the file * - * bvec is a bio_vec of pages from the file that we want to decompress into - * - * vcnt is the count of pages in the biovec + * orig_bio contains the pages from the file that we want to decompress into * * srclen is the number of bytes in pages_in * @@ -970,18 +970,18 @@ int btrfs_compress_pages(int type, struct address_space *mapping, * be contiguous. They all correspond to the range of bytes covered by * the compressed extent. */ -static int btrfs_decompress_biovec(int type, struct page **pages_in, - u64 disk_start, struct bio_vec *bvec, - int vcnt, size_t srclen) +static int btrfs_decompress_bio(int type, struct page **pages_in, + u64 disk_start, struct bio *orig_bio, + size_t srclen) { struct list_head *workspace; int ret; workspace = find_workspace(type); - ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in, - disk_start, - bvec, vcnt, srclen); + ret = btrfs_compress_op[type-1]->decompress_bio(workspace, pages_in, + disk_start, orig_bio, + srclen); free_workspace(type, workspace); return ret; } @@ -1021,9 +1021,7 @@ void btrfs_exit_compress(void) */ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, unsigned long total_out, u64 disk_start, - struct bio_vec *bvec, int vcnt, - unsigned long *pg_index, - unsigned long *pg_offset) + struct bio *bio) { unsigned long buf_offset; unsigned long current_buf_start; @@ -1031,13 +1029,13 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, unsigned long working_bytes = total_out - buf_start; unsigned long bytes; char *kaddr; - struct page *page_out = bvec[*pg_index].bv_page; + struct bio_vec bvec = bio_iter_iovec(bio, bio->bi_iter); /* * start byte is the first byte of the page we're currently * copying into relative to the start of the compressed data. */ - start_byte = page_offset(page_out) - disk_start; + start_byte = page_offset(bvec.bv_page) - disk_start; /* we haven't yet hit data corresponding to this page */ if (total_out <= start_byte) @@ -1057,80 +1055,46 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, /* copy bytes from the working buffer into the pages */ while (working_bytes > 0) { - bytes = min(PAGE_SIZE - *pg_offset, - PAGE_SIZE - buf_offset); + bytes = min_t(unsigned long, bvec.bv_len, + PAGE_SIZE - buf_offset); bytes = min(bytes, working_bytes); - kaddr = kmap_atomic(page_out); - memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); + + kaddr = kmap_atomic(bvec.bv_page); + memcpy(kaddr + bvec.bv_offset, buf + buf_offset, bytes); kunmap_atomic(kaddr); - flush_dcache_page(page_out); + flush_dcache_page(bvec.bv_page); - *pg_offset += bytes; buf_offset += bytes; working_bytes -= bytes; current_buf_start += bytes; /* check if we need to pick another page */ - if (*pg_offset == PAGE_SIZE) { - (*pg_index)++; - if (*pg_index >= vcnt) - return 0; + bio_advance(bio, bytes); + if (!bio->bi_iter.bi_size) + return 0; + bvec = bio_iter_iovec(bio, bio->bi_iter); - page_out = bvec[*pg_index].bv_page; - *pg_offset = 0; - start_byte = page_offset(page_out) - disk_start; + start_byte = page_offset(bvec.bv_page) - disk_start; - /* - * make sure our new page is covered by this - * working buffer - */ - if (total_out <= start_byte) - return 1; + /* + * make sure our new page is covered by this + * working buffer + */ + if (total_out <= start_byte) + return 1; - /* - * the next page in the biovec might not be adjacent - * to the last page, but it might still be found - * inside this working buffer. bump our offset pointer - */ - if (total_out > start_byte && - current_buf_start < start_byte) { - buf_offset = start_byte - buf_start; - working_bytes = total_out - start_byte; - current_buf_start = buf_start + buf_offset; - } + /* + * the next page in the biovec might not be adjacent + * to the last page, but it might still be found + * inside this working buffer. bump our offset pointer + */ + if (total_out > start_byte && + current_buf_start < start_byte) { + buf_offset = start_byte - buf_start; + working_bytes = total_out - start_byte; + current_buf_start = buf_start + buf_offset; } } return 1; } - -/* - * When uncompressing data, we need to make sure and zero any parts of - * the biovec that were not filled in by the decompression code. pg_index - * and pg_offset indicate the last page and the last offset of that page - * that have been filled in. This will zero everything remaining in the - * biovec. - */ -void btrfs_clear_biovec_end(struct bio_vec *bvec, int vcnt, - unsigned long pg_index, - unsigned long pg_offset) -{ - while (pg_index < vcnt) { - struct page *page = bvec[pg_index].bv_page; - unsigned long off = bvec[pg_index].bv_offset; - unsigned long len = bvec[pg_index].bv_len; - - if (pg_offset < off) - pg_offset = off; - if (pg_offset < off + len) { - unsigned long bytes = off + len - pg_offset; - char *kaddr; - - kaddr = kmap_atomic(page); - memset(kaddr + pg_offset, 0, bytes); - kunmap_atomic(kaddr); - } - pg_index++; - pg_offset = 0; - } -} diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index f49d8b8c0f00..09879579fbc8 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -34,9 +34,7 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, unsigned long start_byte, size_t srclen, size_t destlen); int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, unsigned long total_out, u64 disk_start, - struct bio_vec *bvec, int vcnt, - unsigned long *pg_index, - unsigned long *pg_offset); + struct bio *bio); int btrfs_submit_compressed_write(struct inode *inode, u64 start, unsigned long len, u64 disk_start, @@ -45,9 +43,6 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, unsigned long nr_pages); int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags); -void btrfs_clear_biovec_end(struct bio_vec *bvec, int vcnt, - unsigned long pg_index, - unsigned long pg_offset); enum btrfs_compression_type { BTRFS_COMPRESS_NONE = 0, @@ -72,11 +67,10 @@ struct btrfs_compress_op { unsigned long *total_out, unsigned long max_out); - int (*decompress_biovec)(struct list_head *workspace, + int (*decompress_bio)(struct list_head *workspace, struct page **pages_in, u64 disk_start, - struct bio_vec *bvec, - int vcnt, + struct bio *orig_bio, size_t srclen); int (*decompress)(struct list_head *workspace, diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index f6ba165d3f81..25286a5912fc 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -260,7 +260,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, if (IS_ERR(cow)) return PTR_ERR(cow); - copy_extent_buffer(cow, buf, 0, 0, cow->len); + copy_extent_buffer_full(cow, buf); btrfs_set_header_bytenr(cow, cow->start); btrfs_set_header_generation(cow, trans->transid); btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); @@ -271,8 +271,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, else btrfs_set_header_owner(cow, new_root_objectid); - write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(), - BTRFS_FSID_SIZE); + write_extent_buffer_fsid(cow, root->fs_info->fsid); WARN_ON(btrfs_header_generation(buf) > trans->transid); if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) @@ -1130,7 +1129,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, /* cow is set to blocking by btrfs_init_new_buffer */ - copy_extent_buffer(cow, buf, 0, 0, cow->len); + copy_extent_buffer_full(cow, buf); btrfs_set_header_bytenr(cow, cow->start); btrfs_set_header_generation(cow, trans->transid); btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); @@ -1141,8 +1140,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, else btrfs_set_header_owner(cow, root->root_key.objectid); - write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(), - BTRFS_FSID_SIZE); + write_extent_buffer_fsid(cow, root->fs_info->fsid); ret = update_ref_for_cow(trans, root, buf, cow, &last_ref); if (ret) { @@ -1670,7 +1668,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, continue; } - cur = btrfs_find_tree_block(root->fs_info, blocknr); + cur = find_extent_buffer(root->fs_info, blocknr); if (cur) uptodate = btrfs_buffer_uptodate(cur, gen, 0); else @@ -2255,7 +2253,7 @@ static void reada_for_search(struct btrfs_root *root, search = btrfs_node_blockptr(node, slot); blocksize = root->nodesize; - eb = btrfs_find_tree_block(root->fs_info, search); + eb = find_extent_buffer(root->fs_info, search); if (eb) { free_extent_buffer(eb); return; @@ -2314,7 +2312,7 @@ static noinline void reada_for_balance(struct btrfs_root *root, if (slot > 0) { block1 = btrfs_node_blockptr(parent, slot - 1); gen = btrfs_node_ptr_generation(parent, slot - 1); - eb = btrfs_find_tree_block(root->fs_info, block1); + eb = find_extent_buffer(root->fs_info, block1); /* * if we get -eagain from btrfs_buffer_uptodate, we * don't want to return eagain here. That will loop @@ -2327,7 +2325,7 @@ static noinline void reada_for_balance(struct btrfs_root *root, if (slot + 1 < nritems) { block2 = btrfs_node_blockptr(parent, slot + 1); gen = btrfs_node_ptr_generation(parent, slot + 1); - eb = btrfs_find_tree_block(root->fs_info, block2); + eb = find_extent_buffer(root->fs_info, block2); if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0) block2 = 0; free_extent_buffer(eb); @@ -2445,7 +2443,7 @@ read_block_for_search(struct btrfs_trans_handle *trans, blocknr = btrfs_node_blockptr(b, slot); gen = btrfs_node_ptr_generation(b, slot); - tmp = btrfs_find_tree_block(root->fs_info, blocknr); + tmp = find_extent_buffer(root->fs_info, blocknr); if (tmp) { /* first we do an atomic uptodate check */ if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) { @@ -3350,7 +3348,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, root_add_used(root, root->nodesize); - memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); + memzero_extent_buffer(c, 0, sizeof(struct btrfs_header)); btrfs_set_header_nritems(c, 1); btrfs_set_header_level(c, level); btrfs_set_header_bytenr(c, c->start); @@ -3358,11 +3356,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(c, root->root_key.objectid); - write_extent_buffer(c, root->fs_info->fsid, btrfs_header_fsid(), - BTRFS_FSID_SIZE); - - write_extent_buffer(c, root->fs_info->chunk_tree_uuid, - btrfs_header_chunk_tree_uuid(c), BTRFS_UUID_SIZE); + write_extent_buffer_fsid(c, root->fs_info->fsid); + write_extent_buffer_chunk_tree_uuid(c, root->fs_info->chunk_tree_uuid); btrfs_set_node_key(c, &lower_key, 0); btrfs_set_node_blockptr(c, 0, lower->start); @@ -3489,17 +3484,15 @@ static noinline int split_node(struct btrfs_trans_handle *trans, root_add_used(root, root->nodesize); - memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header)); + memzero_extent_buffer(split, 0, sizeof(struct btrfs_header)); btrfs_set_header_level(split, btrfs_header_level(c)); btrfs_set_header_bytenr(split, split->start); btrfs_set_header_generation(split, trans->transid); btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(split, root->root_key.objectid); - write_extent_buffer(split, root->fs_info->fsid, - btrfs_header_fsid(), BTRFS_FSID_SIZE); - write_extent_buffer(split, root->fs_info->chunk_tree_uuid, - btrfs_header_chunk_tree_uuid(split), - BTRFS_UUID_SIZE); + write_extent_buffer_fsid(split, root->fs_info->fsid); + write_extent_buffer_chunk_tree_uuid(split, + root->fs_info->chunk_tree_uuid); ret = tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid); @@ -4277,18 +4270,14 @@ again: root_add_used(root, root->nodesize); - memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); + memzero_extent_buffer(right, 0, sizeof(struct btrfs_header)); btrfs_set_header_bytenr(right, right->start); btrfs_set_header_generation(right, trans->transid); btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(right, root->root_key.objectid); btrfs_set_header_level(right, 0); - write_extent_buffer(right, fs_info->fsid, - btrfs_header_fsid(), BTRFS_FSID_SIZE); - - write_extent_buffer(right, fs_info->chunk_tree_uuid, - btrfs_header_chunk_tree_uuid(right), - BTRFS_UUID_SIZE); + write_extent_buffer_fsid(right, fs_info->fsid); + write_extent_buffer_chunk_tree_uuid(right, fs_info->chunk_tree_uuid); if (split == 0) { if (mid <= slot) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0b8ce2b9f7d0..1b25a460ecea 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -90,9 +90,6 @@ static const int btrfs_csum_sizes[] = { 4 }; /* four bytes for CRC32 */ #define BTRFS_EMPTY_DIR_SIZE 0 -/* specific to btrfs_map_block(), therefore not in include/linux/blk_types.h */ -#define REQ_GET_READ_MIRRORS (1 << 30) - /* ioprio of readahead is set to idle */ #define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)) @@ -429,6 +426,10 @@ struct btrfs_space_info { struct list_head ro_bgs; struct list_head priority_tickets; struct list_head tickets; + /* + * tickets_id just indicates the next ticket will be handled, so note + * it's not stored per ticket. + */ u64 tickets_id; struct rw_semaphore groups_sem; @@ -798,7 +799,6 @@ struct btrfs_fs_info { spinlock_t super_lock; struct btrfs_super_block *super_copy; struct btrfs_super_block *super_for_commit; - struct block_device *__bdev; struct super_block *sb; struct inode *btree_inode; struct backing_dev_info bdi; @@ -2210,6 +2210,8 @@ btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu, cpu->target = le64_to_cpu(disk->target); cpu->flags = le64_to_cpu(disk->flags); cpu->limit = le64_to_cpu(disk->limit); + cpu->stripes_min = le32_to_cpu(disk->stripes_min); + cpu->stripes_max = le32_to_cpu(disk->stripes_max); } static inline void @@ -2228,6 +2230,8 @@ btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk, disk->target = cpu_to_le64(cpu->target); disk->flags = cpu_to_le64(cpu->flags); disk->limit = cpu_to_le64(cpu->limit); + disk->stripes_min = cpu_to_le32(cpu->stripes_min); + disk->stripes_max = cpu_to_le32(cpu->stripes_max); } /* struct btrfs_super_block */ @@ -3660,7 +3664,7 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, int btrfs_reada_wait(void *handle); void btrfs_reada_detach(void *handle); int btree_readahead_hook(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, u64 start, int err); + struct extent_buffer *eb, int err); static inline int is_fstree(u64 rootid) { diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 0fcf5f25d524..d90d4446f9fe 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1686,7 +1686,7 @@ int btrfs_should_delete_dir_index(struct list_head *del_list, * */ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, - struct list_head *ins_list, bool *emitted) + struct list_head *ins_list) { struct btrfs_dir_item *di; struct btrfs_delayed_item *curr, *next; @@ -1730,7 +1730,6 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, if (over) return 1; - *emitted = true; } return 0; } diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 2495b3d4075f..2c1cbe245104 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -146,7 +146,7 @@ void btrfs_readdir_put_delayed_items(struct inode *inode, int btrfs_should_delete_dir_index(struct list_head *del_list, u64 index); int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, - struct list_head *ins_list, bool *emitted); + struct list_head *ins_list); /* for init */ int __init btrfs_delayed_inode_init(void); diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 8d93854a4b4f..ef724a5fc30e 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -189,6 +189,8 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans, } else { assert_spin_locked(&head->lock); list_del(&ref->list); + if (!list_empty(&ref->add_list)) + list_del(&ref->add_list); } ref->in_tree = 0; btrfs_put_delayed_ref(ref); @@ -431,6 +433,15 @@ add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans, exist->action = ref->action; mod = -exist->ref_mod; exist->ref_mod = ref->ref_mod; + if (ref->action == BTRFS_ADD_DELAYED_REF) + list_add_tail(&exist->add_list, + &href->ref_add_list); + else if (ref->action == BTRFS_DROP_DELAYED_REF) { + ASSERT(!list_empty(&exist->add_list)); + list_del(&exist->add_list); + } else { + ASSERT(0); + } } else mod = -ref->ref_mod; } @@ -444,6 +455,8 @@ add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans, add_tail: list_add_tail(&ref->list, &href->ref_list); + if (ref->action == BTRFS_ADD_DELAYED_REF) + list_add_tail(&ref->add_list, &href->ref_add_list); atomic_inc(&root->num_entries); trans->delayed_ref_updates++; spin_unlock(&href->lock); @@ -590,6 +603,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, head_ref->must_insert_reserved = must_insert_reserved; head_ref->is_data = is_data; INIT_LIST_HEAD(&head_ref->ref_list); + INIT_LIST_HEAD(&head_ref->ref_add_list); head_ref->processing = 0; head_ref->total_ref_mod = count_mod; head_ref->qgroup_reserved = 0; @@ -606,7 +620,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, qrecord->num_bytes = num_bytes; qrecord->old_roots = NULL; - if(btrfs_qgroup_insert_dirty_extent_nolock(fs_info, + if(btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, qrecord)) kfree(qrecord); } @@ -671,6 +685,8 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info, ref->is_head = 0; ref->in_tree = 1; ref->seq = seq; + INIT_LIST_HEAD(&ref->list); + INIT_LIST_HEAD(&ref->add_list); full_ref = btrfs_delayed_node_to_tree_ref(ref); full_ref->parent = parent; @@ -726,6 +742,8 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info, ref->is_head = 0; ref->in_tree = 1; ref->seq = seq; + INIT_LIST_HEAD(&ref->list); + INIT_LIST_HEAD(&ref->add_list); full_ref = btrfs_delayed_node_to_data_ref(ref); full_ref->parent = parent; diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 43f3629760e9..dba97842b47a 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -42,6 +42,12 @@ struct btrfs_delayed_ref_node { /*data/tree ref use list, stored in ref_head->ref_list. */ struct list_head list; + /* + * If action is BTRFS_ADD_DELAYED_REF, also link this node to + * ref_head->ref_add_list, then we do not need to iterate the + * whole ref_head->ref_list to find BTRFS_ADD_DELAYED_REF nodes. + */ + struct list_head add_list; /* the starting bytenr of the extent */ u64 bytenr; @@ -99,6 +105,8 @@ struct btrfs_delayed_ref_head { spinlock_t lock; struct list_head ref_list; + /* accumulate add BTRFS_ADD_DELAYED_REF nodes to this ref_add_list. */ + struct list_head ref_add_list; struct rb_node href_node; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3a57f99d96aa..811662cce977 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -271,7 +271,7 @@ u32 btrfs_csum_data(char *data, u32 seed, size_t len) return btrfs_crc32c(seed, data, len); } -void btrfs_csum_final(u32 crc, char *result) +void btrfs_csum_final(u32 crc, u8 *result) { put_unaligned_le32(~crc, result); } @@ -747,7 +747,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, err: if (reads_done && test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) - btree_readahead_hook(fs_info, eb, eb->start, ret); + btree_readahead_hook(fs_info, eb, ret); if (ret) { /* @@ -772,7 +772,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror) eb->read_mirror = failed_mirror; atomic_dec(&eb->io_pages); if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) - btree_readahead_hook(eb->fs_info, eb, eb->start, -EIO); + btree_readahead_hook(eb->fs_info, eb, -EIO); return -EIO; /* we fixed nothing */ } @@ -1191,12 +1191,6 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, return 0; } -struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info, - u64 bytenr) -{ - return find_extent_buffer(fs_info, bytenr); -} - struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, u64 bytenr) { @@ -1418,18 +1412,15 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, goto fail; } - memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header)); + memzero_extent_buffer(leaf, 0, sizeof(struct btrfs_header)); btrfs_set_header_bytenr(leaf, leaf->start); btrfs_set_header_generation(leaf, trans->transid); btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(leaf, objectid); root->node = leaf; - write_extent_buffer(leaf, fs_info->fsid, btrfs_header_fsid(), - BTRFS_FSID_SIZE); - write_extent_buffer(leaf, fs_info->chunk_tree_uuid, - btrfs_header_chunk_tree_uuid(leaf), - BTRFS_UUID_SIZE); + write_extent_buffer_fsid(leaf, fs_info->fsid); + write_extent_buffer_chunk_tree_uuid(leaf, fs_info->chunk_tree_uuid); btrfs_mark_buffer_dirty(leaf); root->commit_root = btrfs_root_node(root); @@ -1505,15 +1496,14 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, return ERR_CAST(leaf); } - memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header)); + memzero_extent_buffer(leaf, 0, sizeof(struct btrfs_header)); btrfs_set_header_bytenr(leaf, leaf->start); btrfs_set_header_generation(leaf, trans->transid); btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID); root->node = leaf; - write_extent_buffer(root->node, root->fs_info->fsid, - btrfs_header_fsid(), BTRFS_FSID_SIZE); + write_extent_buffer_fsid(root->node, root->fs_info->fsid); btrfs_mark_buffer_dirty(root->node); btrfs_tree_unlock(root->node); return root; @@ -4354,6 +4344,8 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, list) { ref->in_tree = 0; list_del(&ref->list); + if (!list_empty(&ref->add_list)) + list_del(&ref->add_list); atomic_dec(&delayed_refs->num_entries); btrfs_put_delayed_ref(ref); } @@ -4452,7 +4444,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, clear_extent_bits(dirty_pages, start, end, mark); while (start <= end) { - eb = btrfs_find_tree_block(root->fs_info, start); + eb = find_extent_buffer(root->fs_info, start); start += root->nodesize; if (!eb) continue; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 1a3237e5700f..729540701458 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -63,8 +63,6 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num, struct buffer_head **bh_ret); int btrfs_commit_super(struct btrfs_root *root); -struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info, - u64 bytenr); struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, struct btrfs_key *location); int btrfs_init_fs_root(struct btrfs_root *root); @@ -121,7 +119,7 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, int atomic); int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); u32 btrfs_csum_data(char *data, u32 seed, size_t len); -void btrfs_csum_final(u32 crc, char *result); +void btrfs_csum_final(u32 crc, u8 *result); int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, enum btrfs_wq_endio_type metadata); int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4607af38c72e..0d80136206b1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1114,7 +1114,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans, BTRFS_BLOCK_FLAG_FULL_BACKREF); bi = (struct btrfs_tree_block_info *)(item + 1); /* FIXME: get first key of the block */ - memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi)); + memzero_extent_buffer(leaf, (unsigned long)bi, sizeof(*bi)); btrfs_set_tree_block_level(leaf, bi, (int)owner); } else { btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA); @@ -2036,7 +2036,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, */ btrfs_bio_counter_inc_blocked(root->fs_info); /* Tell the block device(s) that the sectors can be discarded */ - ret = btrfs_map_block(root->fs_info, REQ_OP_DISCARD, + ret = btrfs_map_block(root->fs_info, BTRFS_MAP_DISCARD, bytenr, &num_bytes, &bbio, 0); /* Error condition is -ENOMEM */ if (!ret) { @@ -2454,13 +2454,14 @@ select_delayed_ref(struct btrfs_delayed_ref_head *head) * the extent item from the extent tree, when there still are references * to add, which would fail because they would not find the extent item. */ - list_for_each_entry(ref, &head->ref_list, list) { - if (ref->action == BTRFS_ADD_DELAYED_REF) - return ref; - } + if (!list_empty(&head->ref_add_list)) + return list_first_entry(&head->ref_add_list, + struct btrfs_delayed_ref_node, add_list); - return list_entry(head->ref_list.next, struct btrfs_delayed_ref_node, - list); + ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node, + list); + ASSERT(list_empty(&ref->add_list)); + return ref; } /* @@ -2620,6 +2621,8 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, actual_count++; ref->in_tree = 0; list_del(&ref->list); + if (!list_empty(&ref->add_list)) + list_del(&ref->add_list); } atomic_dec(&delayed_refs->num_entries); @@ -2826,7 +2829,7 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, smp_mb(); avg_runtime = fs_info->avg_delayed_ref_runtime; val = num_entries * avg_runtime; - if (num_entries * avg_runtime >= NSEC_PER_SEC) + if (val >= NSEC_PER_SEC) return 1; if (val >= NSEC_PER_SEC / 2) return 2; @@ -4322,6 +4325,13 @@ void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start, */ void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len) { + struct btrfs_root *root = BTRFS_I(inode)->root; + + /* Make sure the range is aligned to sectorsize */ + len = round_up(start + len, root->sectorsize) - + round_down(start, root->sectorsize); + start = round_down(start, root->sectorsize); + btrfs_free_reserved_data_space_noquota(inode, start, len); btrfs_qgroup_free_data(inode, start, len); } @@ -6499,16 +6509,9 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg) * @num_bytes: The number of bytes in question * @delalloc: The blocks are allocated for the delalloc write * - * This is called by the allocator when it reserves space. Metadata - * reservations should be called with RESERVE_ALLOC so we do the proper - * ENOSPC accounting. For data we handle the reservation through clearing the - * delalloc bits in the io_tree. We have to do this since we could end up - * allocating less disk space for the amount of data we have reserved in the - * case of compression. - * - * If this is a reservation and the block group has become read only we cannot - * make the reservation and return -EAGAIN, otherwise this function always - * succeeds. + * This is called by the allocator when it reserves space. If this is a + * reservation and the block group has become read only we cannot make the + * reservation and return -EAGAIN, otherwise this function always succeeds. */ static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache, u64 ram_bytes, u64 num_bytes, int delalloc) @@ -8538,220 +8541,6 @@ reada: wc->reada_slot = slot; } -static int account_leaf_items(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct extent_buffer *eb) -{ - int nr = btrfs_header_nritems(eb); - int i, extent_type, ret; - struct btrfs_key key; - struct btrfs_file_extent_item *fi; - u64 bytenr, num_bytes; - - /* We can be called directly from walk_up_proc() */ - if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags)) - return 0; - - for (i = 0; i < nr; i++) { - btrfs_item_key_to_cpu(eb, &key, i); - - if (key.type != BTRFS_EXTENT_DATA_KEY) - continue; - - fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item); - /* filter out non qgroup-accountable extents */ - extent_type = btrfs_file_extent_type(eb, fi); - - if (extent_type == BTRFS_FILE_EXTENT_INLINE) - continue; - - bytenr = btrfs_file_extent_disk_bytenr(eb, fi); - if (!bytenr) - continue; - - num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); - - ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info, - bytenr, num_bytes, GFP_NOFS); - if (ret) - return ret; - } - return 0; -} - -/* - * Walk up the tree from the bottom, freeing leaves and any interior - * nodes which have had all slots visited. If a node (leaf or - * interior) is freed, the node above it will have it's slot - * incremented. The root node will never be freed. - * - * At the end of this function, we should have a path which has all - * slots incremented to the next position for a search. If we need to - * read a new node it will be NULL and the node above it will have the - * correct slot selected for a later read. - * - * If we increment the root nodes slot counter past the number of - * elements, 1 is returned to signal completion of the search. - */ -static int adjust_slots_upwards(struct btrfs_root *root, - struct btrfs_path *path, int root_level) -{ - int level = 0; - int nr, slot; - struct extent_buffer *eb; - - if (root_level == 0) - return 1; - - while (level <= root_level) { - eb = path->nodes[level]; - nr = btrfs_header_nritems(eb); - path->slots[level]++; - slot = path->slots[level]; - if (slot >= nr || level == 0) { - /* - * Don't free the root - we will detect this - * condition after our loop and return a - * positive value for caller to stop walking the tree. - */ - if (level != root_level) { - btrfs_tree_unlock_rw(eb, path->locks[level]); - path->locks[level] = 0; - - free_extent_buffer(eb); - path->nodes[level] = NULL; - path->slots[level] = 0; - } - } else { - /* - * We have a valid slot to walk back down - * from. Stop here so caller can process these - * new nodes. - */ - break; - } - - level++; - } - - eb = path->nodes[root_level]; - if (path->slots[root_level] >= btrfs_header_nritems(eb)) - return 1; - - return 0; -} - -/* - * root_eb is the subtree root and is locked before this function is called. - */ -static int account_shared_subtree(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct extent_buffer *root_eb, - u64 root_gen, - int root_level) -{ - int ret = 0; - int level; - struct extent_buffer *eb = root_eb; - struct btrfs_path *path = NULL; - - BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL); - BUG_ON(root_eb == NULL); - - if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags)) - return 0; - - if (!extent_buffer_uptodate(root_eb)) { - ret = btrfs_read_buffer(root_eb, root_gen); - if (ret) - goto out; - } - - if (root_level == 0) { - ret = account_leaf_items(trans, root, root_eb); - goto out; - } - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - /* - * Walk down the tree. Missing extent blocks are filled in as - * we go. Metadata is accounted every time we read a new - * extent block. - * - * When we reach a leaf, we account for file extent items in it, - * walk back up the tree (adjusting slot pointers as we go) - * and restart the search process. - */ - extent_buffer_get(root_eb); /* For path */ - path->nodes[root_level] = root_eb; - path->slots[root_level] = 0; - path->locks[root_level] = 0; /* so release_path doesn't try to unlock */ -walk_down: - level = root_level; - while (level >= 0) { - if (path->nodes[level] == NULL) { - int parent_slot; - u64 child_gen; - u64 child_bytenr; - - /* We need to get child blockptr/gen from - * parent before we can read it. */ - eb = path->nodes[level + 1]; - parent_slot = path->slots[level + 1]; - child_bytenr = btrfs_node_blockptr(eb, parent_slot); - child_gen = btrfs_node_ptr_generation(eb, parent_slot); - - eb = read_tree_block(root, child_bytenr, child_gen); - if (IS_ERR(eb)) { - ret = PTR_ERR(eb); - goto out; - } else if (!extent_buffer_uptodate(eb)) { - free_extent_buffer(eb); - ret = -EIO; - goto out; - } - - path->nodes[level] = eb; - path->slots[level] = 0; - - btrfs_tree_read_lock(eb); - btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); - path->locks[level] = BTRFS_READ_LOCK_BLOCKING; - - ret = btrfs_qgroup_insert_dirty_extent(trans, - root->fs_info, child_bytenr, - root->nodesize, GFP_NOFS); - if (ret) - goto out; - } - - if (level == 0) { - ret = account_leaf_items(trans, root, path->nodes[level]); - if (ret) - goto out; - - /* Nonzero return here means we completed our search */ - ret = adjust_slots_upwards(root, path, root_level); - if (ret) - break; - - /* Restart search with new slots */ - goto walk_down; - } - - level--; - } - - ret = 0; -out: - btrfs_free_path(path); - - return ret; -} - /* * helper to process tree block while walking down the tree. * @@ -8873,7 +8662,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); blocksize = root->nodesize; - next = btrfs_find_tree_block(root->fs_info, bytenr); + next = find_extent_buffer(root->fs_info, bytenr); if (!next) { next = btrfs_find_create_tree_block(root, bytenr); if (IS_ERR(next)) @@ -8980,8 +8769,8 @@ skip: } if (need_account) { - ret = account_shared_subtree(trans, root, next, - generation, level - 1); + ret = btrfs_qgroup_trace_subtree(trans, root, next, + generation, level - 1); if (ret) { btrfs_err_rl(root->fs_info, "Error %d accounting shared subtree. Quota is out of sync, rescan required.", @@ -9078,7 +8867,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, else ret = btrfs_dec_ref(trans, root, eb, 0); BUG_ON(ret); /* -ENOMEM */ - ret = account_leaf_items(trans, root, eb); + ret = btrfs_qgroup_trace_leaf_items(trans, root, eb); if (ret) { btrfs_err_rl(root->fs_info, "error %d accounting leaf items. Quota is out of sync, rescan required.", diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8ed05d95584a..d24af9dc76c7 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2029,7 +2029,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, * read repair operation. */ btrfs_bio_counter_inc_blocked(fs_info); - ret = btrfs_map_block(fs_info, WRITE, logical, + ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical, &map_length, &bbio, mirror_num); if (ret) { btrfs_bio_counter_dec(fs_info); @@ -3743,7 +3743,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, if (btrfs_header_level(eb) > 0) { end = btrfs_node_key_ptr_offset(nritems); - memset_extent_buffer(eb, 0, end, eb->len - end); + memzero_extent_buffer(eb, end, eb->len - end); } else { /* * leaf: @@ -3752,7 +3752,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, start = btrfs_item_nr_offset(nritems); end = btrfs_leaf_data(eb) + leaf_data_end(fs_info->tree_root, eb); - memset_extent_buffer(eb, 0, start, end - start); + memzero_extent_buffer(eb, start, end - start); } for (i = 0; i < num_pages; i++) { @@ -4720,9 +4720,9 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src) WARN_ON(PageDirty(p)); SetPageUptodate(p); new->pages[i] = p; + copy_page(page_address(p), page_address(src->pages[i])); } - copy_extent_buffer(new, src, 0, 0, src->len); set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags); set_bit(EXTENT_BUFFER_DUMMY, &new->bflags); @@ -5465,6 +5465,27 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, return ret; } +void write_extent_buffer_chunk_tree_uuid(struct extent_buffer *eb, + const void *srcv) +{ + char *kaddr; + + WARN_ON(!PageUptodate(eb->pages[0])); + kaddr = page_address(eb->pages[0]); + memcpy(kaddr + offsetof(struct btrfs_header, chunk_tree_uuid), srcv, + BTRFS_FSID_SIZE); +} + +void write_extent_buffer_fsid(struct extent_buffer *eb, const void *srcv) +{ + char *kaddr; + + WARN_ON(!PageUptodate(eb->pages[0])); + kaddr = page_address(eb->pages[0]); + memcpy(kaddr + offsetof(struct btrfs_header, fsid), srcv, + BTRFS_FSID_SIZE); +} + void write_extent_buffer(struct extent_buffer *eb, const void *srcv, unsigned long start, unsigned long len) { @@ -5496,8 +5517,8 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, } } -void memset_extent_buffer(struct extent_buffer *eb, char c, - unsigned long start, unsigned long len) +void memzero_extent_buffer(struct extent_buffer *eb, unsigned long start, + unsigned long len) { size_t cur; size_t offset; @@ -5517,7 +5538,7 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, cur = min(len, PAGE_SIZE - offset); kaddr = page_address(page); - memset(kaddr + offset, c, cur); + memset(kaddr + offset, 0, cur); len -= cur; offset = 0; @@ -5525,6 +5546,20 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, } } +void copy_extent_buffer_full(struct extent_buffer *dst, + struct extent_buffer *src) +{ + int i; + unsigned num_pages; + + ASSERT(dst->len == src->len); + + num_pages = num_extent_pages(dst->start, dst->len); + for (i = 0; i < num_pages; i++) + copy_page(page_address(dst->pages[i]), + page_address(src->pages[i])); +} + void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, unsigned long dst_offset, unsigned long src_offset, unsigned long len) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index ab31d145227e..ae64c1917d0a 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -405,8 +405,13 @@ void read_extent_buffer(struct extent_buffer *eb, void *dst, int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dst, unsigned long start, unsigned long len); +void write_extent_buffer_fsid(struct extent_buffer *eb, const void *src); +void write_extent_buffer_chunk_tree_uuid(struct extent_buffer *eb, + const void *src); void write_extent_buffer(struct extent_buffer *eb, const void *src, unsigned long start, unsigned long len); +void copy_extent_buffer_full(struct extent_buffer *dst, + struct extent_buffer *src); void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, unsigned long dst_offset, unsigned long src_offset, unsigned long len); @@ -414,8 +419,8 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, unsigned long src_offset, unsigned long len); void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, unsigned long src_offset, unsigned long len); -void memset_extent_buffer(struct extent_buffer *eb, char c, - unsigned long start, unsigned long len); +void memzero_extent_buffer(struct extent_buffer *eb, unsigned long start, + unsigned long len); int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start, unsigned long pos); void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index d0d571c47d33..5e74178ba9d9 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -163,7 +163,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, struct bio *bio, u64 logical_offset, u32 *dst, int dio) { - struct bio_vec *bvec = bio->bi_io_vec; + struct bio_vec *bvec; struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio); struct btrfs_csum_item *item = NULL; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; @@ -176,8 +176,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, u64 page_bytes_left; u32 diff; int nblocks; - int bio_index = 0; - int count; + int count = 0, i; u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); path = btrfs_alloc_path(); @@ -223,8 +222,11 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, if (dio) offset = logical_offset; - page_bytes_left = bvec->bv_len; - while (bio_index < bio->bi_vcnt) { + bio_for_each_segment_all(bvec, bio, i) { + page_bytes_left = bvec->bv_len; + if (count) + goto next; + if (!dio) offset = page_offset(bvec->bv_page) + bvec->bv_offset; count = btrfs_find_ordered_sum(inode, offset, disk_bytenr, @@ -285,29 +287,17 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, found: csum += count * csum_size; nblocks -= count; - +next: while (count--) { disk_bytenr += root->sectorsize; offset += root->sectorsize; page_bytes_left -= root->sectorsize; - if (!page_bytes_left) { - bio_index++; - /* - * make sure we're still inside the - * bio before we update page_bytes_left - */ - if (bio_index >= bio->bi_vcnt) { - WARN_ON_ONCE(count); - goto done; - } - bvec++; - page_bytes_left = bvec->bv_len; - } - + if (!page_bytes_left) + break; /* move to next bio */ } } -done: + WARN_ON_ONCE(count); btrfs_free_path(path); return 0; } @@ -447,13 +437,12 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, struct bio *bio, u64 file_start, int contig) { struct btrfs_ordered_sum *sums; - struct btrfs_ordered_extent *ordered; + struct btrfs_ordered_extent *ordered = NULL; char *data; - struct bio_vec *bvec = bio->bi_io_vec; - int bio_index = 0; + struct bio_vec *bvec; int index; int nr_sectors; - int i; + int i, j; unsigned long total_bytes = 0; unsigned long this_sum_bytes = 0; u64 offset; @@ -470,17 +459,20 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, if (contig) offset = file_start; else - offset = page_offset(bvec->bv_page) + bvec->bv_offset; + offset = 0; /* shut up gcc */ - ordered = btrfs_lookup_ordered_extent(inode, offset); - BUG_ON(!ordered); /* Logic error */ sums->bytenr = (u64)bio->bi_iter.bi_sector << 9; index = 0; - while (bio_index < bio->bi_vcnt) { + bio_for_each_segment_all(bvec, bio, j) { if (!contig) offset = page_offset(bvec->bv_page) + bvec->bv_offset; + if (!ordered) { + ordered = btrfs_lookup_ordered_extent(inode, offset); + BUG_ON(!ordered); /* Logic error */ + } + data = kmap_atomic(bvec->bv_page); nr_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, @@ -529,9 +521,6 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, } kunmap_atomic(data); - - bio_index++; - bvec++; } this_sum_bytes = 0; btrfs_add_ordered_sum(inode, ordered, sums); @@ -689,7 +678,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, item_offset = btrfs_item_ptr_offset(leaf, path->slots[0]); - memset_extent_buffer(leaf, 0, item_offset + offset, + memzero_extent_buffer(leaf, item_offset + offset, shift_len); key.offset = bytenr; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 3a14c87d9c92..3c1f4be36f16 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -27,7 +27,6 @@ #include <linux/falloc.h> #include <linux/swap.h> #include <linux/writeback.h> -#include <linux/statfs.h> #include <linux/compat.h> #include <linux/slab.h> #include <linux/btrfs.h> @@ -706,6 +705,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, u64 num_bytes = 0; u64 extent_offset = 0; u64 extent_end = 0; + u64 last_end = start; int del_nr = 0; int del_slot = 0; int extent_type; @@ -797,8 +797,10 @@ next_slot: * extent item in the call to setup_items_for_insert() later * in this function. */ - if (extent_end == key.offset && extent_end >= search_start) + if (extent_end == key.offset && extent_end >= search_start) { + last_end = extent_end; goto delete_extent_item; + } if (extent_end <= search_start) { path->slots[0]++; @@ -861,6 +863,12 @@ next_slot: key.offset = start; } /* + * From here on out we will have actually dropped something, so + * last_end can be updated. + */ + last_end = extent_end; + + /* * | ---- range to drop ----- | * | -------- extent -------- | */ @@ -1010,7 +1018,7 @@ delete_extent_item: if (!replace_extent || !(*key_inserted)) btrfs_release_path(path); if (drop_end) - *drop_end = found ? min(end, extent_end) : end; + *drop_end = found ? min(end, last_end) : end; return ret; } @@ -2224,9 +2232,15 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode, key.offset = offset; ret = btrfs_search_slot(trans, root, &key, path, 0, 1); - if (ret < 0) + if (ret <= 0) { + /* + * We should have dropped this offset, so if we find it then + * something has gone horribly wrong. + */ + if (ret == 0) + ret = -EINVAL; return ret; - BUG_ON(!ret); + } leaf = path->nodes[0]; if (hole_mergeable(inode, leaf, path->slots[0]-1, offset, end)) { @@ -2525,10 +2539,17 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) trans->block_rsv = &root->fs_info->trans_block_rsv; - if (cur_offset < ino_size) { + if (cur_offset < drop_end && cur_offset < ino_size) { ret = fill_holes(trans, inode, path, cur_offset, drop_end); if (ret) { + /* + * If we failed then we didn't insert our hole + * entries for the area we dropped, so now the + * fs is corrupted, so we must abort the + * transaction. + */ + btrfs_abort_transaction(trans, ret); err = ret; break; } @@ -2593,6 +2614,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) if (cur_offset < ino_size && cur_offset < drop_end) { ret = fill_holes(trans, inode, path, cur_offset, drop_end); if (ret) { + /* Same comment as above. */ + btrfs_abort_transaction(trans, ret); err = ret; goto out_trans; } diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index e4b48f377d3a..c698dccb3757 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -75,8 +75,6 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root, btrfs_release_path(path); inode = btrfs_iget(root->fs_info->sb, &location, root, NULL); - if (!inode) - return ERR_PTR(-ENOENT); if (IS_ERR(inode)) return inode; if (is_bad_inode(inode)) { @@ -153,7 +151,7 @@ static int __create_free_space_inode(struct btrfs_root *root, inode_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); btrfs_item_key(leaf, &disk_key, path->slots[0]); - memset_extent_buffer(leaf, 0, (unsigned long)inode_item, + memzero_extent_buffer(leaf, (unsigned long)inode_item, sizeof(*inode_item)); btrfs_set_inode_generation(leaf, inode_item, trans->transid); btrfs_set_inode_size(leaf, inode_item, 0); @@ -181,7 +179,7 @@ static int __create_free_space_inode(struct btrfs_root *root, leaf = path->nodes[0]; header = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_free_space_header); - memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header)); + memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header)); btrfs_set_free_space_key(leaf, header, &disk_key); btrfs_mark_buffer_dirty(leaf); btrfs_release_path(path); @@ -476,7 +474,7 @@ static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index) crc = btrfs_csum_data(io_ctl->orig + offset, crc, PAGE_SIZE - offset); - btrfs_csum_final(crc, (char *)&crc); + btrfs_csum_final(crc, (u8 *)&crc); io_ctl_unmap_page(io_ctl); tmp = page_address(io_ctl->pages[0]); tmp += index; @@ -504,7 +502,7 @@ static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index) io_ctl_map_page(io_ctl, 0); crc = btrfs_csum_data(io_ctl->orig + offset, crc, PAGE_SIZE - offset); - btrfs_csum_final(crc, (char *)&crc); + btrfs_csum_final(crc, (u8 *)&crc); if (val != crc) { btrfs_err_rl(io_ctl->root->fs_info, "csum mismatch on free space cache"); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8e3a5a266917..c96d94ef846d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -30,7 +30,6 @@ #include <linux/mpage.h> #include <linux/swap.h> #include <linux/writeback.h> -#include <linux/statfs.h> #include <linux/compat.h> #include <linux/bit_spinlock.h> #include <linux/xattr.h> @@ -1864,7 +1863,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, length = bio->bi_iter.bi_size; map_length = length; - ret = btrfs_map_block(root->fs_info, bio_op(bio), logical, + ret = btrfs_map_block(root->fs_info, btrfs_op(bio), logical, &map_length, NULL, 0); if (ret < 0) return ret; @@ -3103,7 +3102,7 @@ static int __readpage_endio_check(struct inode *inode, kaddr = kmap_atomic(page); csum = btrfs_csum_data(kaddr + pgoff, csum, len); - btrfs_csum_final(csum, (char *)&csum); + btrfs_csum_final(csum, (u8 *)&csum); if (csum != csum_expected) goto zeroit; @@ -5805,20 +5804,11 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) int slot; unsigned char d_type; int over = 0; - u32 di_cur; - u32 di_total; - u32 di_len; - int key_type = BTRFS_DIR_INDEX_KEY; char tmp_name[32]; char *name_ptr; int name_len; - int is_curr = 0; /* ctx->pos points to the current index? */ - bool emitted; bool put = false; - - /* FIXME, use a real flag for deciding about the key type */ - if (root->fs_info->tree_root == root) - key_type = BTRFS_DIR_ITEM_KEY; + struct btrfs_key location; if (!dir_emit_dots(file, ctx)) return 0; @@ -5829,14 +5819,11 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) path->reada = READA_FORWARD; - if (key_type == BTRFS_DIR_INDEX_KEY) { - INIT_LIST_HEAD(&ins_list); - INIT_LIST_HEAD(&del_list); - put = btrfs_readdir_get_delayed_items(inode, &ins_list, - &del_list); - } + INIT_LIST_HEAD(&ins_list); + INIT_LIST_HEAD(&del_list); + put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list); - key.type = key_type; + key.type = BTRFS_DIR_INDEX_KEY; key.offset = ctx->pos; key.objectid = btrfs_ino(inode); @@ -5844,7 +5831,6 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) if (ret < 0) goto err; - emitted = false; while (1) { leaf = path->nodes[0]; slot = path->slots[0]; @@ -5862,98 +5848,52 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) if (found_key.objectid != key.objectid) break; - if (found_key.type != key_type) + if (found_key.type != BTRFS_DIR_INDEX_KEY) break; if (found_key.offset < ctx->pos) goto next; - if (key_type == BTRFS_DIR_INDEX_KEY && - btrfs_should_delete_dir_index(&del_list, - found_key.offset)) + if (btrfs_should_delete_dir_index(&del_list, found_key.offset)) goto next; ctx->pos = found_key.offset; - is_curr = 1; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); - di_cur = 0; - di_total = btrfs_item_size(leaf, item); - - while (di_cur < di_total) { - struct btrfs_key location; - - if (verify_dir_item(root, leaf, di)) - break; + if (verify_dir_item(root, leaf, di)) + goto next; - name_len = btrfs_dir_name_len(leaf, di); - if (name_len <= sizeof(tmp_name)) { - name_ptr = tmp_name; - } else { - name_ptr = kmalloc(name_len, GFP_KERNEL); - if (!name_ptr) { - ret = -ENOMEM; - goto err; - } + name_len = btrfs_dir_name_len(leaf, di); + if (name_len <= sizeof(tmp_name)) { + name_ptr = tmp_name; + } else { + name_ptr = kmalloc(name_len, GFP_KERNEL); + if (!name_ptr) { + ret = -ENOMEM; + goto err; } - read_extent_buffer(leaf, name_ptr, - (unsigned long)(di + 1), name_len); - - d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; - btrfs_dir_item_key_to_cpu(leaf, di, &location); + } + read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1), + name_len); + d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; + btrfs_dir_item_key_to_cpu(leaf, di, &location); - /* is this a reference to our own snapshot? If so - * skip it. - * - * In contrast to old kernels, we insert the snapshot's - * dir item and dir index after it has been created, so - * we won't find a reference to our own snapshot. We - * still keep the following code for backward - * compatibility. - */ - if (location.type == BTRFS_ROOT_ITEM_KEY && - location.objectid == root->root_key.objectid) { - over = 0; - goto skip; - } - over = !dir_emit(ctx, name_ptr, name_len, - location.objectid, d_type); + over = !dir_emit(ctx, name_ptr, name_len, location.objectid, + d_type); -skip: - if (name_ptr != tmp_name) - kfree(name_ptr); + if (name_ptr != tmp_name) + kfree(name_ptr); - if (over) - goto nopos; - emitted = true; - di_len = btrfs_dir_name_len(leaf, di) + - btrfs_dir_data_len(leaf, di) + sizeof(*di); - di_cur += di_len; - di = (struct btrfs_dir_item *)((char *)di + di_len); - } + if (over) + goto nopos; + ctx->pos++; next: path->slots[0]++; } - if (key_type == BTRFS_DIR_INDEX_KEY) { - if (is_curr) - ctx->pos++; - ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list, &emitted); - if (ret) - goto nopos; - } - - /* - * If we haven't emitted any dir entry, we must not touch ctx->pos as - * it was was set to the termination value in previous call. We assume - * that "." and ".." were emitted if we reach this point and set the - * termination value as well for an empty directory. - */ - if (ctx->pos > 2 && !emitted) + ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list); + if (ret) goto nopos; - /* Reached end of directory/root. Bump pos past the last item. */ - ctx->pos++; - /* * Stop new entries from being returned after we return the last * entry. @@ -5971,12 +5911,10 @@ next: * last entry requires it because doing so has broken 32bit apps * in the past. */ - if (key_type == BTRFS_DIR_INDEX_KEY) { - if (ctx->pos >= INT_MAX) - ctx->pos = LLONG_MAX; - else - ctx->pos = INT_MAX; - } + if (ctx->pos >= INT_MAX) + ctx->pos = LLONG_MAX; + else + ctx->pos = INT_MAX; nopos: ret = 0; err: @@ -6277,7 +6215,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_item); - memset_extent_buffer(path->nodes[0], 0, (unsigned long)inode_item, + memzero_extent_buffer(path->nodes[0], (unsigned long)inode_item, sizeof(*inode_item)); fill_inode_item(trans, path->nodes[0], inode_item, inode); @@ -7049,11 +6987,11 @@ insert: * extent causing the -EEXIST. */ if (existing->start == em->start && - extent_map_end(existing) == extent_map_end(em) && + extent_map_end(existing) >= extent_map_end(em) && em->block_start == existing->block_start) { /* - * these two extents are the same, it happens - * with inlines especially + * The existing extent map already encompasses the + * entire extent map we tried to add. */ free_extent_map(em); em = existing; @@ -7783,10 +7721,12 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, } /* - * this will cow the extent, reset the len in case we changed - * it above + * this will cow the extent, if em is within [start, len], then + * probably we've found a preallocated/existing extent, let's + * give it a chance to use preallocated space. */ - len = bh_result->b_size; + len = min_t(u64, bh_result->b_size, em->len - (start - em->start)); + len = ALIGN(len, root->sectorsize); free_extent_map(em); em = btrfs_new_extent_direct(inode, start, len); if (IS_ERR(em)) { @@ -8394,7 +8334,7 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip, struct btrfs_root *root = BTRFS_I(inode)->root; struct bio *bio; struct bio *orig_bio = dip->orig_bio; - struct bio_vec *bvec = orig_bio->bi_io_vec; + struct bio_vec *bvec; u64 start_sector = orig_bio->bi_iter.bi_sector; u64 file_offset = dip->logical_offset; u64 submit_len = 0; @@ -8403,10 +8343,10 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip, int async_submit = 0; int nr_sectors; int ret; - int i; + int i, j; map_length = orig_bio->bi_iter.bi_size; - ret = btrfs_map_block(root->fs_info, bio_op(orig_bio), + ret = btrfs_map_block(root->fs_info, btrfs_op(orig_bio), start_sector << 9, &map_length, NULL, 0); if (ret) return -EIO; @@ -8433,7 +8373,7 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip, btrfs_io_bio(bio)->logical = file_offset; atomic_inc(&dip->pending_bios); - while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { + bio_for_each_segment_all(bvec, orig_bio, j) { nr_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, bvec->bv_len); i = 0; next_block: @@ -8472,7 +8412,7 @@ next_block: btrfs_io_bio(bio)->logical = file_offset; map_length = orig_bio->bi_iter.bi_size; - ret = btrfs_map_block(root->fs_info, bio_op(orig_bio), + ret = btrfs_map_block(root->fs_info, btrfs_op(orig_bio), start_sector << 9, &map_length, NULL, 0); if (ret) { @@ -8487,7 +8427,6 @@ next_block: i++; goto next_block; } - bvec++; } } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7acbd2cf6192..4a20f3e68cb4 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -33,7 +33,6 @@ #include <linux/namei.h> #include <linux/swap.h> #include <linux/writeback.h> -#include <linux/statfs.h> #include <linux/compat.h> #include <linux/bit_spinlock.h> #include <linux/security.h> @@ -502,17 +501,15 @@ static noinline int create_subvol(struct inode *dir, goto fail; } - memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header)); + memzero_extent_buffer(leaf, 0, sizeof(struct btrfs_header)); btrfs_set_header_bytenr(leaf, leaf->start); btrfs_set_header_generation(leaf, trans->transid); btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(leaf, objectid); - write_extent_buffer(leaf, root->fs_info->fsid, btrfs_header_fsid(), - BTRFS_FSID_SIZE); - write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, - btrfs_header_chunk_tree_uuid(leaf), - BTRFS_UUID_SIZE); + write_extent_buffer_fsid(leaf, root->fs_info->fsid); + write_extent_buffer_chunk_tree_uuid(leaf, + root->fs_info->chunk_tree_uuid); btrfs_mark_buffer_dirty(leaf); inode_item = &root_item->inode; @@ -4572,11 +4569,8 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, return -EPERM; loi = memdup_user(arg, sizeof(*loi)); - if (IS_ERR(loi)) { - ret = PTR_ERR(loi); - loi = NULL; - goto out; - } + if (IS_ERR(loi)) + return PTR_ERR(loi); path = btrfs_alloc_path(); if (!path) { @@ -5203,11 +5197,8 @@ static long btrfs_ioctl_set_received_subvol_32(struct file *file, int ret = 0; args32 = memdup_user(arg, sizeof(*args32)); - if (IS_ERR(args32)) { - ret = PTR_ERR(args32); - args32 = NULL; - goto out; - } + if (IS_ERR(args32)) + return PTR_ERR(args32); args64 = kmalloc(sizeof(*args64), GFP_KERNEL); if (!args64) { @@ -5255,11 +5246,8 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, int ret = 0; sa = memdup_user(arg, sizeof(*sa)); - if (IS_ERR(sa)) { - ret = PTR_ERR(sa); - sa = NULL; - goto out; - } + if (IS_ERR(sa)) + return PTR_ERR(sa); ret = _btrfs_ioctl_set_received_subvol(file, sa); diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index 48655da0f4ca..45d26980caf9 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c @@ -254,25 +254,21 @@ out: return ret; } -static int lzo_decompress_biovec(struct list_head *ws, +static int lzo_decompress_bio(struct list_head *ws, struct page **pages_in, u64 disk_start, - struct bio_vec *bvec, - int vcnt, + struct bio *orig_bio, size_t srclen) { struct workspace *workspace = list_entry(ws, struct workspace, list); int ret = 0, ret2; char *data_in; unsigned long page_in_index = 0; - unsigned long page_out_index = 0; unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE); unsigned long buf_start; unsigned long buf_offset = 0; unsigned long bytes; unsigned long working_bytes; - unsigned long pg_offset; - size_t in_len; size_t out_len; unsigned long in_offset; @@ -292,7 +288,6 @@ static int lzo_decompress_biovec(struct list_head *ws, in_page_bytes_left = PAGE_SIZE - LZO_LEN; tot_out = 0; - pg_offset = 0; while (tot_in < tot_len) { in_len = read_compress_length(data_in + in_offset); @@ -365,16 +360,14 @@ cont: tot_out += out_len; ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start, - tot_out, disk_start, - bvec, vcnt, - &page_out_index, &pg_offset); + tot_out, disk_start, orig_bio); if (ret2 == 0) break; } done: kunmap(pages_in[page_in_index]); if (!ret) - btrfs_clear_biovec_end(bvec, vcnt, page_out_index, pg_offset); + zero_fill_bio(orig_bio); return ret; } @@ -438,6 +431,6 @@ const struct btrfs_compress_op btrfs_lzo_compress = { .alloc_workspace = lzo_alloc_workspace, .free_workspace = lzo_free_workspace, .compress_pages = lzo_compress_pages, - .decompress_biovec = lzo_decompress_biovec, + .decompress_bio = lzo_decompress_bio, .decompress = lzo_decompress, }; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 11f4fffe503e..605a3227980a 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -131,8 +131,15 @@ struct btrfs_qgroup_list { struct btrfs_qgroup *member; }; -#define ptr_to_u64(x) ((u64)(uintptr_t)x) -#define u64_to_ptr(x) ((struct btrfs_qgroup *)(uintptr_t)x) +static inline u64 qgroup_to_aux(struct btrfs_qgroup *qg) +{ + return (u64)(uintptr_t)qg; +} + +static inline struct btrfs_qgroup* unode_aux_to_qgroup(struct ulist_node *n) +{ + return (struct btrfs_qgroup *)(uintptr_t)n->aux; +} static int qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, @@ -1066,7 +1073,7 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, /* Get all of the parent groups that contain this qgroup */ list_for_each_entry(glist, &qgroup->groups, next_group) { ret = ulist_add(tmp, glist->group->qgroupid, - ptr_to_u64(glist->group), GFP_ATOMIC); + qgroup_to_aux(glist->group), GFP_ATOMIC); if (ret < 0) goto out; } @@ -1074,7 +1081,7 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, /* Iterate all of the parents and adjust their reference counts */ ULIST_ITER_INIT(&uiter); while ((unode = ulist_next(tmp, &uiter))) { - qgroup = u64_to_ptr(unode->aux); + qgroup = unode_aux_to_qgroup(unode); qgroup->rfer += sign * num_bytes; qgroup->rfer_cmpr += sign * num_bytes; WARN_ON(sign < 0 && qgroup->excl < num_bytes); @@ -1087,7 +1094,7 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, /* Add any parents of the parents */ list_for_each_entry(glist, &qgroup->groups, next_group) { ret = ulist_add(tmp, glist->group->qgroupid, - ptr_to_u64(glist->group), GFP_ATOMIC); + qgroup_to_aux(glist->group), GFP_ATOMIC); if (ret < 0) goto out; } @@ -1450,7 +1457,7 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, return ret; } -int btrfs_qgroup_insert_dirty_extent_nolock(struct btrfs_fs_info *fs_info, +int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_root *delayed_refs, struct btrfs_qgroup_extent_record *record) { @@ -1460,7 +1467,7 @@ int btrfs_qgroup_insert_dirty_extent_nolock(struct btrfs_fs_info *fs_info, u64 bytenr = record->bytenr; assert_spin_locked(&delayed_refs->lock); - trace_btrfs_qgroup_insert_dirty_extent(fs_info, record); + trace_btrfs_qgroup_trace_extent(fs_info, record); while (*p) { parent_node = *p; @@ -1479,7 +1486,7 @@ int btrfs_qgroup_insert_dirty_extent_nolock(struct btrfs_fs_info *fs_info, return 0; } -int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans, +int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, gfp_t gfp_flag) { @@ -1502,7 +1509,7 @@ int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans, record->old_roots = NULL; spin_lock(&delayed_refs->lock); - ret = btrfs_qgroup_insert_dirty_extent_nolock(fs_info, delayed_refs, + ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record); spin_unlock(&delayed_refs->lock); if (ret > 0) @@ -1510,6 +1517,219 @@ int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans, return 0; } +int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *eb) +{ + int nr = btrfs_header_nritems(eb); + int i, extent_type, ret; + struct btrfs_key key; + struct btrfs_file_extent_item *fi; + u64 bytenr, num_bytes; + + /* We can be called directly from walk_up_proc() */ + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags)) + return 0; + + for (i = 0; i < nr; i++) { + btrfs_item_key_to_cpu(eb, &key, i); + + if (key.type != BTRFS_EXTENT_DATA_KEY) + continue; + + fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item); + /* filter out non qgroup-accountable extents */ + extent_type = btrfs_file_extent_type(eb, fi); + + if (extent_type == BTRFS_FILE_EXTENT_INLINE) + continue; + + bytenr = btrfs_file_extent_disk_bytenr(eb, fi); + if (!bytenr) + continue; + + num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); + + ret = btrfs_qgroup_trace_extent(trans, root->fs_info, + bytenr, num_bytes, GFP_NOFS); + if (ret) + return ret; + } + return 0; +} + +/* + * Walk up the tree from the bottom, freeing leaves and any interior + * nodes which have had all slots visited. If a node (leaf or + * interior) is freed, the node above it will have it's slot + * incremented. The root node will never be freed. + * + * At the end of this function, we should have a path which has all + * slots incremented to the next position for a search. If we need to + * read a new node it will be NULL and the node above it will have the + * correct slot selected for a later read. + * + * If we increment the root nodes slot counter past the number of + * elements, 1 is returned to signal completion of the search. + */ +static int adjust_slots_upwards(struct btrfs_root *root, + struct btrfs_path *path, int root_level) +{ + int level = 0; + int nr, slot; + struct extent_buffer *eb; + + if (root_level == 0) + return 1; + + while (level <= root_level) { + eb = path->nodes[level]; + nr = btrfs_header_nritems(eb); + path->slots[level]++; + slot = path->slots[level]; + if (slot >= nr || level == 0) { + /* + * Don't free the root - we will detect this + * condition after our loop and return a + * positive value for caller to stop walking the tree. + */ + if (level != root_level) { + btrfs_tree_unlock_rw(eb, path->locks[level]); + path->locks[level] = 0; + + free_extent_buffer(eb); + path->nodes[level] = NULL; + path->slots[level] = 0; + } + } else { + /* + * We have a valid slot to walk back down + * from. Stop here so caller can process these + * new nodes. + */ + break; + } + + level++; + } + + eb = path->nodes[root_level]; + if (path->slots[root_level] >= btrfs_header_nritems(eb)) + return 1; + + return 0; +} + +int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *root_eb, + u64 root_gen, int root_level) +{ + int ret = 0; + int level; + struct extent_buffer *eb = root_eb; + struct btrfs_path *path = NULL; + + BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL); + BUG_ON(root_eb == NULL); + + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags)) + return 0; + + if (!extent_buffer_uptodate(root_eb)) { + ret = btrfs_read_buffer(root_eb, root_gen); + if (ret) + goto out; + } + + if (root_level == 0) { + ret = btrfs_qgroup_trace_leaf_items(trans, root, root_eb); + goto out; + } + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + /* + * Walk down the tree. Missing extent blocks are filled in as + * we go. Metadata is accounted every time we read a new + * extent block. + * + * When we reach a leaf, we account for file extent items in it, + * walk back up the tree (adjusting slot pointers as we go) + * and restart the search process. + */ + extent_buffer_get(root_eb); /* For path */ + path->nodes[root_level] = root_eb; + path->slots[root_level] = 0; + path->locks[root_level] = 0; /* so release_path doesn't try to unlock */ +walk_down: + level = root_level; + while (level >= 0) { + if (path->nodes[level] == NULL) { + int parent_slot; + u64 child_gen; + u64 child_bytenr; + + /* + * We need to get child blockptr/gen from parent before + * we can read it. + */ + eb = path->nodes[level + 1]; + parent_slot = path->slots[level + 1]; + child_bytenr = btrfs_node_blockptr(eb, parent_slot); + child_gen = btrfs_node_ptr_generation(eb, parent_slot); + + eb = read_tree_block(root, child_bytenr, child_gen); + if (IS_ERR(eb)) { + ret = PTR_ERR(eb); + goto out; + } else if (!extent_buffer_uptodate(eb)) { + free_extent_buffer(eb); + ret = -EIO; + goto out; + } + + path->nodes[level] = eb; + path->slots[level] = 0; + + btrfs_tree_read_lock(eb); + btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); + path->locks[level] = BTRFS_READ_LOCK_BLOCKING; + + ret = btrfs_qgroup_trace_extent(trans, + root->fs_info, child_bytenr, + root->nodesize, GFP_NOFS); + if (ret) + goto out; + } + + if (level == 0) { + ret = btrfs_qgroup_trace_leaf_items(trans, root, + path->nodes[level]); + if (ret) + goto out; + + /* Nonzero return here means we completed our search */ + ret = adjust_slots_upwards(root, path, root_level); + if (ret) + break; + + /* Restart search with new slots */ + goto walk_down; + } + + level--; + } + + ret = 0; +out: + btrfs_free_path(path); + + return ret; +} + #define UPDATE_NEW 0 #define UPDATE_OLD 1 /* @@ -1535,30 +1755,30 @@ static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info, continue; ulist_reinit(tmp); - ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg), + ret = ulist_add(qgroups, qg->qgroupid, qgroup_to_aux(qg), GFP_ATOMIC); if (ret < 0) return ret; - ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), GFP_ATOMIC); + ret = ulist_add(tmp, qg->qgroupid, qgroup_to_aux(qg), GFP_ATOMIC); if (ret < 0) return ret; ULIST_ITER_INIT(&tmp_uiter); while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { struct btrfs_qgroup_list *glist; - qg = u64_to_ptr(tmp_unode->aux); + qg = unode_aux_to_qgroup(tmp_unode); if (update_old) btrfs_qgroup_update_old_refcnt(qg, seq, 1); else btrfs_qgroup_update_new_refcnt(qg, seq, 1); list_for_each_entry(glist, &qg->groups, next_group) { ret = ulist_add(qgroups, glist->group->qgroupid, - ptr_to_u64(glist->group), + qgroup_to_aux(glist->group), GFP_ATOMIC); if (ret < 0) return ret; ret = ulist_add(tmp, glist->group->qgroupid, - ptr_to_u64(glist->group), + qgroup_to_aux(glist->group), GFP_ATOMIC); if (ret < 0) return ret; @@ -1619,7 +1839,7 @@ static int qgroup_update_counters(struct btrfs_fs_info *fs_info, while ((unode = ulist_next(qgroups, &uiter))) { bool dirty = false; - qg = u64_to_ptr(unode->aux); + qg = unode_aux_to_qgroup(unode); cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq); cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq); @@ -2125,7 +2345,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes) struct btrfs_qgroup *qg; struct btrfs_qgroup_list *glist; - qg = u64_to_ptr(unode->aux); + qg = unode_aux_to_qgroup(unode); if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && qg->reserved + (s64)qg->rfer + num_bytes > @@ -2157,7 +2377,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes) while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { struct btrfs_qgroup *qg; - qg = u64_to_ptr(unode->aux); + qg = unode_aux_to_qgroup(unode); qg->reserved += num_bytes; } @@ -2202,7 +2422,7 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qg; struct btrfs_qgroup_list *glist; - qg = u64_to_ptr(unode->aux); + qg = unode_aux_to_qgroup(unode); qg->reserved -= num_bytes; diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index 1bc64c864b62..99c879dbedc1 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -23,6 +23,34 @@ #include "delayed-ref.h" /* + * Btrfs qgroup overview + * + * Btrfs qgroup splits into 3 main part: + * 1) Reserve + * Reserve metadata/data space for incoming operations + * Affect how qgroup limit works + * + * 2) Trace + * Tell btrfs qgroup to trace dirty extents. + * + * Dirty extents including: + * - Newly allocated extents + * - Extents going to be deleted (in this trans) + * - Extents whose owner is going to be modified + * + * This is the main part affects whether qgroup numbers will stay + * consistent. + * Btrfs qgroup can trace clean extents and won't cause any problem, + * but it will consume extra CPU time, it should be avoided if possible. + * + * 3) Account + * Btrfs qgroup will updates its numbers, based on dirty extents traced + * in previous step. + * + * Normally at qgroup rescan and transaction commit time. + */ + +/* * Record a dirty extent, and info qgroup to update quota on it * TODO: Use kmem cache to alloc it. */ @@ -65,8 +93,8 @@ struct btrfs_delayed_extent_op; int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); /* - * Insert one dirty extent record into @delayed_refs, informing qgroup to - * account that extent at commit trans time. + * Inform qgroup to trace one dirty extent, its info is recorded in @record. + * So qgroup can account it at commit trans time. * * No lock version, caller must acquire delayed ref lock and allocate memory. * @@ -74,14 +102,15 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, * Return >0 for existing record, caller can free @record safely. * Error is not possible */ -int btrfs_qgroup_insert_dirty_extent_nolock( +int btrfs_qgroup_trace_extent_nolock( struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_root *delayed_refs, struct btrfs_qgroup_extent_record *record); /* - * Insert one dirty extent record into @delayed_refs, informing qgroup to - * account that extent at commit trans time. + * Inform qgroup to trace one dirty extent, specified by @bytenr and + * @num_bytes. + * So qgroup can account it at commit trans time. * * Better encapsulated version. * @@ -89,10 +118,33 @@ int btrfs_qgroup_insert_dirty_extent_nolock( * Return <0 for error, like memory allocation failure or invalid parameter * (NULL trans) */ -int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans, +int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, gfp_t gfp_flag); +/* + * Inform qgroup to trace all leaf items of data + * + * Return 0 for success + * Return <0 for error(ENOMEM) + */ +int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *eb); +/* + * Inform qgroup to trace a whole subtree, including all its child tree + * blocks and data. + * The root tree block is specified by @root_eb. + * + * Normally used by relocation(tree block swap) and subvolume deletion. + * + * Return 0 for success + * Return <0 for error(ENOMEM or tree search error) + */ +int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *root_eb, + u64 root_gen, int root_level); int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index d016d4a79864..eece126d6973 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1144,10 +1144,10 @@ static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio) static void index_rbio_pages(struct btrfs_raid_bio *rbio) { struct bio *bio; + struct bio_vec *bvec; u64 start; unsigned long stripe_offset; unsigned long page_index; - struct page *p; int i; spin_lock_irq(&rbio->bio_list_lock); @@ -1156,10 +1156,8 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio) stripe_offset = start - rbio->bbio->raid_map[0]; page_index = stripe_offset >> PAGE_SHIFT; - for (i = 0; i < bio->bi_vcnt; i++) { - p = bio->bi_io_vec[i].bv_page; - rbio->bio_pages[page_index + i] = p; - } + bio_for_each_segment_all(bvec, bio, i) + rbio->bio_pages[page_index + i] = bvec->bv_page; } spin_unlock_irq(&rbio->bio_list_lock); } @@ -1433,13 +1431,11 @@ static int fail_bio_stripe(struct btrfs_raid_bio *rbio, */ static void set_bio_pages_uptodate(struct bio *bio) { + struct bio_vec *bvec; int i; - struct page *p; - for (i = 0; i < bio->bi_vcnt; i++) { - p = bio->bi_io_vec[i].bv_page; - SetPageUptodate(p); - } + bio_for_each_segment_all(bvec, bio, i) + SetPageUptodate(bvec->bv_page); } /* diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 75bab76739be..f0beb63a6d82 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -107,18 +107,14 @@ static int reada_add_block(struct reada_control *rc, u64 logical, /* in case of err, eb might be NULL */ static void __readahead_hook(struct btrfs_fs_info *fs_info, struct reada_extent *re, struct extent_buffer *eb, - u64 start, int err) + int err) { - int level = 0; int nritems; int i; u64 bytenr; u64 generation; struct list_head list; - if (eb) - level = btrfs_header_level(eb); - spin_lock(&re->lock); /* * just take the full list from the extent. afterwards we @@ -143,7 +139,7 @@ static void __readahead_hook(struct btrfs_fs_info *fs_info, * trigger more readahead depending from the content, e.g. * fetch the checksums for the extents in the leaf. */ - if (!level) + if (!btrfs_header_level(eb)) goto cleanup; nritems = btrfs_header_nritems(eb); @@ -213,12 +209,8 @@ cleanup: return; } -/* - * start is passed separately in case eb in NULL, which may be the case with - * failed I/O - */ int btree_readahead_hook(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, u64 start, int err) + struct extent_buffer *eb, int err) { int ret = 0; struct reada_extent *re; @@ -226,7 +218,7 @@ int btree_readahead_hook(struct btrfs_fs_info *fs_info, /* find extent */ spin_lock(&fs_info->reada_lock); re = radix_tree_lookup(&fs_info->reada_tree, - start >> PAGE_SHIFT); + eb->start >> PAGE_SHIFT); if (re) re->refcnt++; spin_unlock(&fs_info->reada_lock); @@ -235,7 +227,7 @@ int btree_readahead_hook(struct btrfs_fs_info *fs_info, goto start_machine; } - __readahead_hook(fs_info, re, eb, start, err); + __readahead_hook(fs_info, re, eb, err); reada_extent_put(fs_info, re); /* our ref */ start_machine: @@ -354,8 +346,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, * map block */ length = blocksize; - ret = btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, logical, &length, - &bbio, 0); + ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical, + &length, &bbio, 0); if (ret || !bbio || length < blocksize) goto error; @@ -401,7 +393,6 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, ret = radix_tree_insert(&fs_info->reada_tree, index, re); if (ret == -EEXIST) { re_exist = radix_tree_lookup(&fs_info->reada_tree, index); - BUG_ON(!re_exist); re_exist->refcnt++; spin_unlock(&fs_info->reada_lock); btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); @@ -448,7 +439,6 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, /* ignore whether the entry was inserted */ radix_tree_delete(&dev->reada_extents, index); } - BUG_ON(fs_info == NULL); radix_tree_delete(&fs_info->reada_tree, index); spin_unlock(&fs_info->reada_lock); btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); @@ -717,9 +707,9 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, ret = reada_tree_block_flagged(fs_info->extent_root, logical, mirror_num, &eb); if (ret) - __readahead_hook(fs_info, re, NULL, logical, ret); + __readahead_hook(fs_info, re, NULL, ret); else if (eb) - __readahead_hook(fs_info, re, eb, eb->start, ret); + __readahead_hook(fs_info, re, eb, ret); if (eb) free_extent_buffer(eb); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index c4af0cdb783d..3dc7232aa038 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1901,6 +1901,29 @@ again: BUG_ON(ret); /* + * Info qgroup to trace both subtrees. + * + * We must trace both trees. + * 1) Tree reloc subtree + * If not traced, we will leak data numbers + * 2) Fs subtree + * If not traced, we will double count old data + * and tree block numbers, if current trans doesn't free + * data reloc tree inode. + */ + ret = btrfs_qgroup_trace_subtree(trans, src, parent, + btrfs_header_generation(parent), + btrfs_header_level(parent)); + if (ret < 0) + break; + ret = btrfs_qgroup_trace_subtree(trans, dest, + path->nodes[level], + btrfs_header_generation(path->nodes[level]), + btrfs_header_level(path->nodes[level])); + if (ret < 0) + break; + + /* * swap blocks in fs tree and reloc tree. */ btrfs_set_node_blockptr(parent, slot, new_bytenr); @@ -3949,90 +3972,6 @@ int prepare_to_relocate(struct reloc_control *rc) return 0; } -/* - * Qgroup fixer for data chunk relocation. - * The data relocation is done in the following steps - * 1) Copy data extents into data reloc tree - * 2) Create tree reloc tree(special snapshot) for related subvolumes - * 3) Modify file extents in tree reloc tree - * 4) Merge tree reloc tree with original fs tree, by swapping tree blocks - * - * The problem is, data and tree reloc tree are not accounted to qgroup, - * and 4) will only info qgroup to track tree blocks change, not file extents - * in the tree blocks. - * - * The good news is, related data extents are all in data reloc tree, so we - * only need to info qgroup to track all file extents in data reloc tree - * before commit trans. - */ -static int qgroup_fix_relocated_data_extents(struct btrfs_trans_handle *trans, - struct reloc_control *rc) -{ - struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; - struct inode *inode = rc->data_inode; - struct btrfs_root *data_reloc_root = BTRFS_I(inode)->root; - struct btrfs_path *path; - struct btrfs_key key; - int ret = 0; - - if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) - return 0; - - /* - * Only for stage where we update data pointers the qgroup fix is - * valid. - * For MOVING_DATA stage, we will miss the timing of swapping tree - * blocks, and won't fix it. - */ - if (!(rc->stage == UPDATE_DATA_PTRS && rc->extents_found)) - return 0; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - key.objectid = btrfs_ino(inode); - key.type = BTRFS_EXTENT_DATA_KEY; - key.offset = 0; - - ret = btrfs_search_slot(NULL, data_reloc_root, &key, path, 0, 0); - if (ret < 0) - goto out; - - lock_extent(&BTRFS_I(inode)->io_tree, 0, (u64)-1); - while (1) { - struct btrfs_file_extent_item *fi; - - btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); - if (key.objectid > btrfs_ino(inode)) - break; - if (key.type != BTRFS_EXTENT_DATA_KEY) - goto next; - fi = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_file_extent_item); - if (btrfs_file_extent_type(path->nodes[0], fi) != - BTRFS_FILE_EXTENT_REG) - goto next; - ret = btrfs_qgroup_insert_dirty_extent(trans, fs_info, - btrfs_file_extent_disk_bytenr(path->nodes[0], fi), - btrfs_file_extent_disk_num_bytes(path->nodes[0], fi), - GFP_NOFS); - if (ret < 0) - break; -next: - ret = btrfs_next_item(data_reloc_root, path); - if (ret < 0) - break; - if (ret > 0) { - ret = 0; - break; - } - } - unlock_extent(&BTRFS_I(inode)->io_tree, 0 , (u64)-1); -out: - btrfs_free_path(path); - return ret; -} - static noinline_for_stack int relocate_block_group(struct reloc_control *rc) { struct rb_root blocks = RB_ROOT; @@ -4223,13 +4162,6 @@ restart: err = PTR_ERR(trans); goto out_free; } - ret = qgroup_fix_relocated_data_extents(trans, rc); - if (ret < 0) { - btrfs_abort_transaction(trans, ret); - if (!err) - err = ret; - goto out_free; - } btrfs_commit_transaction(trans, rc->extent_root); out_free: btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); @@ -4255,7 +4187,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); - memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); + memzero_extent_buffer(leaf, (unsigned long)item, sizeof(*item)); btrfs_set_inode_generation(leaf, item, 1); btrfs_set_inode_size(leaf, item, 0); btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); @@ -4333,6 +4265,45 @@ static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info) } /* + * Print the block group being relocated + */ +static void describe_relocation(struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group) +{ + char buf[128]; /* prefixed by a '|' that'll be dropped */ + u64 flags = block_group->flags; + + /* Shouldn't happen */ + if (!flags) { + strcpy(buf, "|NONE"); + } else { + char *bp = buf; + +#define DESCRIBE_FLAG(f, d) \ + if (flags & BTRFS_BLOCK_GROUP_##f) { \ + bp += snprintf(bp, buf - bp + sizeof(buf), "|%s", d); \ + flags &= ~BTRFS_BLOCK_GROUP_##f; \ + } + DESCRIBE_FLAG(DATA, "data"); + DESCRIBE_FLAG(SYSTEM, "system"); + DESCRIBE_FLAG(METADATA, "metadata"); + DESCRIBE_FLAG(RAID0, "raid0"); + DESCRIBE_FLAG(RAID1, "raid1"); + DESCRIBE_FLAG(DUP, "dup"); + DESCRIBE_FLAG(RAID10, "raid10"); + DESCRIBE_FLAG(RAID5, "raid5"); + DESCRIBE_FLAG(RAID6, "raid6"); + if (flags) + snprintf(buf, buf - bp + sizeof(buf), "|0x%llx", flags); +#undef DESCRIBE_FLAG + } + + btrfs_info(fs_info, + "relocating block group %llu flags %s", + block_group->key.objectid, buf + 1); +} + +/* * function to relocate all extents in a block group. */ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) @@ -4388,9 +4359,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) goto out; } - btrfs_info(extent_root->fs_info, - "relocating block group %llu flags %llu", - rc->block_group->key.objectid, rc->block_group->flags); + describe_relocation(extent_root->fs_info, rc->block_group); btrfs_wait_block_group_reservations(rc->block_group); btrfs_wait_nocow_writers(rc->block_group); @@ -4598,11 +4567,6 @@ int btrfs_recover_relocation(struct btrfs_root *root) err = PTR_ERR(trans); goto out_free; } - err = qgroup_fix_relocated_data_extents(trans, rc); - if (err < 0) { - btrfs_abort_transaction(trans, err); - goto out_free; - } err = btrfs_commit_transaction(trans, rc->extent_root); out_free: kfree(rc); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index fffb9ab8526e..589d79219c18 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1334,8 +1334,8 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, * with a length of PAGE_SIZE, each returned stripe * represents one mirror */ - ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical, - &mapped_length, &bbio, 0, 1); + ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, + logical, &mapped_length, &bbio, 0, 1); if (ret || !bbio || mapped_length < sublen) { btrfs_put_bbio(bbio); return -EIO; @@ -2191,8 +2191,8 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock) int ret; int i; - ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical, &length, - &bbio, 0, 1); + ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical, + &length, &bbio, 0, 1); if (ret || !bbio || !bbio->raid_map) goto bbio_out; @@ -2778,7 +2778,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) goto out; length = sparity->logic_end - sparity->logic_start; - ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE, + ret = btrfs_map_sblock(sctx->dev_root->fs_info, BTRFS_MAP_WRITE, sparity->logic_start, &length, &bbio, 0, 1); if (ret || !bbio || !bbio->raid_map) @@ -2988,8 +2988,9 @@ again: mapped_length = extent_len; bbio = NULL; - ret = btrfs_map_block(fs_info, READ, extent_logical, - &mapped_length, &bbio, 0); + ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, + extent_logical, &mapped_length, &bbio, + 0); if (!ret) { if (!bbio || mapped_length < extent_len) ret = -EIO; @@ -4076,7 +4077,7 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info, int ret; mapped_length = extent_len; - ret = btrfs_map_block(fs_info, READ, extent_logical, + ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, extent_logical, &mapped_length, &bbio, 0); if (ret || !bbio || mapped_length < extent_len || !bbio->stripes[0].dev->bdev) { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 74ed5aae6cea..adec3a0b01d5 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2086,10 +2086,6 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) u64 thresh = 0; int mixed = 0; - /* - * holding chunk_mutex to avoid allocating new chunks, holding - * device_list_mutex to avoid the device being removed - */ rcu_read_lock(); list_for_each_entry_rcu(found, head, list) { if (found->flags & BTRFS_BLOCK_GROUP_DATA) { diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index caad80bb9bd0..2c7a0a922510 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -306,7 +306,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb, int ret; memset(bitmap, 0, len); - memset_extent_buffer(eb, 0, 0, len); + memzero_extent_buffer(eb, 0, len); if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) { test_msg("Bitmap was not zeroed\n"); return -EINVAL; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 3d33c4e41e5f..e0478f51cf16 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -689,7 +689,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, * as the owner of the file extent changed from log tree * (doesn't affect qgroup) to fs/file tree(affects qgroup) */ - ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info, + ret = btrfs_qgroup_trace_extent(trans, root->fs_info, btrfs_file_extent_disk_bytenr(eb, item), btrfs_file_extent_disk_num_bytes(eb, item), GFP_NOFS); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 71a60cc01451..1886b94f13ac 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1595,8 +1595,8 @@ static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, btrfs_set_dev_extent_chunk_objectid(leaf, extent, chunk_objectid); btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset); - write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, - btrfs_dev_extent_chunk_tree_uuid(extent), BTRFS_UUID_SIZE); + write_extent_buffer_chunk_tree_uuid(leaf, + root->fs_info->chunk_tree_uuid); btrfs_set_dev_extent_length(leaf, extent, num_bytes); btrfs_mark_buffer_dirty(leaf); @@ -3062,7 +3062,7 @@ static int insert_balance_item(struct btrfs_root *root, leaf = path->nodes[0]; item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item); - memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); + memzero_extent_buffer(leaf, (unsigned long)item, sizeof(*item)); btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->data); btrfs_set_balance_data(leaf, item, &disk_bargs); @@ -5329,7 +5329,8 @@ void btrfs_put_bbio(struct btrfs_bio *bbio) kfree(bbio); } -static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int op, +static int __btrfs_map_block(struct btrfs_fs_info *fs_info, + enum btrfs_map_op op, u64 logical, u64 *length, struct btrfs_bio **bbio_ret, int mirror_num, int need_raid_map) @@ -5414,7 +5415,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int op, raid56_full_stripe_start *= full_stripe_len; } - if (op == REQ_OP_DISCARD) { + if (op == BTRFS_MAP_DISCARD) { /* we don't discard raid56 yet */ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { ret = -EOPNOTSUPP; @@ -5427,7 +5428,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int op, For other RAID types and for RAID[56] reads, just allow a single stripe (on a single disk). */ if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && - (op == REQ_OP_WRITE)) { + (op == BTRFS_MAP_WRITE)) { max_len = stripe_len * nr_data_stripes(map) - (offset - raid56_full_stripe_start); } else { @@ -5452,8 +5453,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int op, btrfs_dev_replace_set_lock_blocking(dev_replace); if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 && - op != REQ_OP_WRITE && op != REQ_OP_DISCARD && - op != REQ_GET_READ_MIRRORS && dev_replace->tgtdev != NULL) { + op != BTRFS_MAP_WRITE && op != BTRFS_MAP_DISCARD && + op != BTRFS_MAP_GET_READ_MIRRORS && dev_replace->tgtdev != NULL) { /* * in dev-replace case, for repair case (that's the only * case where the mirror is selected explicitly when @@ -5474,7 +5475,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int op, int found = 0; u64 physical_of_found = 0; - ret = __btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, + ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical, &tmp_length, &tmp_bbio, 0, 0); if (ret) { WARN_ON(tmp_bbio != NULL); @@ -5484,7 +5485,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int op, tmp_num_stripes = tmp_bbio->num_stripes; if (mirror_num > tmp_num_stripes) { /* - * REQ_GET_READ_MIRRORS does not contain this + * BTRFS_MAP_GET_READ_MIRRORS does not contain this * mirror, that means that the requested area * is not left of the left cursor */ @@ -5540,17 +5541,17 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int op, (offset + *length); if (map->type & BTRFS_BLOCK_GROUP_RAID0) { - if (op == REQ_OP_DISCARD) + if (op == BTRFS_MAP_DISCARD) num_stripes = min_t(u64, map->num_stripes, stripe_nr_end - stripe_nr_orig); stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &stripe_index); - if (op != REQ_OP_WRITE && op != REQ_OP_DISCARD && - op != REQ_GET_READ_MIRRORS) + if (op != BTRFS_MAP_WRITE && op != BTRFS_MAP_DISCARD && + op != BTRFS_MAP_GET_READ_MIRRORS) mirror_num = 1; } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { - if (op == REQ_OP_WRITE || op == REQ_OP_DISCARD || - op == REQ_GET_READ_MIRRORS) + if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD || + op == BTRFS_MAP_GET_READ_MIRRORS) num_stripes = map->num_stripes; else if (mirror_num) stripe_index = mirror_num - 1; @@ -5563,8 +5564,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int op, } } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { - if (op == REQ_OP_WRITE || op == REQ_OP_DISCARD || - op == REQ_GET_READ_MIRRORS) { + if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD || + op == BTRFS_MAP_GET_READ_MIRRORS) { num_stripes = map->num_stripes; } else if (mirror_num) { stripe_index = mirror_num - 1; @@ -5578,9 +5579,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int op, stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index); stripe_index *= map->sub_stripes; - if (op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS) + if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) num_stripes = map->sub_stripes; - else if (op == REQ_OP_DISCARD) + else if (op == BTRFS_MAP_DISCARD) num_stripes = min_t(u64, map->sub_stripes * (stripe_nr_end - stripe_nr_orig), map->num_stripes); @@ -5598,7 +5599,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int op, } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { if (need_raid_map && - (op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS || + (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS || mirror_num > 1)) { /* push stripe_nr back to the start of the full stripe */ stripe_nr = div_u64(raid56_full_stripe_start, @@ -5626,8 +5627,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int op, /* We distribute the parity blocks across stripes */ div_u64_rem(stripe_nr + stripe_index, map->num_stripes, &stripe_index); - if ((op != REQ_OP_WRITE && op != REQ_OP_DISCARD && - op != REQ_GET_READ_MIRRORS) && mirror_num <= 1) + if ((op != BTRFS_MAP_WRITE && op != BTRFS_MAP_DISCARD && + op != BTRFS_MAP_GET_READ_MIRRORS) && mirror_num <= 1) mirror_num = 1; } } else { @@ -5650,9 +5651,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int op, num_alloc_stripes = num_stripes; if (dev_replace_is_ongoing) { - if (op == REQ_OP_WRITE || op == REQ_OP_DISCARD) + if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD) num_alloc_stripes <<= 1; - if (op == REQ_GET_READ_MIRRORS) + if (op == BTRFS_MAP_GET_READ_MIRRORS) num_alloc_stripes++; tgtdev_indexes = num_stripes; } @@ -5668,7 +5669,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int op, /* build raid_map */ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map && - ((op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS) || + ((op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) || mirror_num > 1)) { u64 tmp; unsigned rot; @@ -5693,7 +5694,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int op, RAID6_Q_STRIPE; } - if (op == REQ_OP_DISCARD) { + if (op == BTRFS_MAP_DISCARD) { u32 factor = 0; u32 sub_stripes = 0; u64 stripes_per_dev = 0; @@ -5773,7 +5774,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int op, } } - if (op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS) + if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) max_errors = btrfs_chunk_max_errors(map); if (bbio->raid_map) @@ -5781,7 +5782,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int op, tgtdev_indexes = 0; if (dev_replace_is_ongoing && - (op == REQ_OP_WRITE || op == REQ_OP_DISCARD) && + (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD) && dev_replace->tgtdev != NULL) { int index_where_to_add; u64 srcdev_devid = dev_replace->srcdev->devid; @@ -5816,7 +5817,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int op, } } num_stripes = index_where_to_add; - } else if (dev_replace_is_ongoing && (op == REQ_GET_READ_MIRRORS) && + } else if (dev_replace_is_ongoing && + op == BTRFS_MAP_GET_READ_MIRRORS && dev_replace->tgtdev != NULL) { u64 srcdev_devid = dev_replace->srcdev->devid; int index_srcdev = 0; @@ -5888,7 +5890,7 @@ out: return ret; } -int btrfs_map_block(struct btrfs_fs_info *fs_info, int op, +int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, struct btrfs_bio **bbio_ret, int mirror_num) { @@ -5897,7 +5899,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, int op, } /* For Scrub/replace */ -int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int op, +int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, struct btrfs_bio **bbio_ret, int mirror_num, int need_raid_map) diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 09ed29c67848..9029a3134922 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -371,14 +371,35 @@ struct btrfs_balance_control { struct btrfs_balance_progress stat; }; +enum btrfs_map_op { + BTRFS_MAP_READ, + BTRFS_MAP_WRITE, + BTRFS_MAP_DISCARD, + BTRFS_MAP_GET_READ_MIRRORS, +}; + +static inline enum btrfs_map_op btrfs_op(struct bio *bio) +{ + switch (bio_op(bio)) { + case REQ_OP_DISCARD: + return BTRFS_MAP_DISCARD; + case REQ_OP_WRITE: + return BTRFS_MAP_WRITE; + default: + WARN_ON_ONCE(1); + case REQ_OP_READ: + return BTRFS_MAP_READ; + } +} + int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, u64 end, u64 *length); void btrfs_get_bbio(struct btrfs_bio *bbio); void btrfs_put_bbio(struct btrfs_bio *bbio); -int btrfs_map_block(struct btrfs_fs_info *fs_info, int op, +int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, struct btrfs_bio **bbio_ret, int mirror_num); -int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int op, +int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, struct btrfs_bio **bbio_ret, int mirror_num, int need_raid_map); diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 441b81a3e545..da497f184ff4 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -210,10 +210,9 @@ out: return ret; } -static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, +static int zlib_decompress_bio(struct list_head *ws, struct page **pages_in, u64 disk_start, - struct bio_vec *bvec, - int vcnt, + struct bio *orig_bio, size_t srclen) { struct workspace *workspace = list_entry(ws, struct workspace, list); @@ -222,10 +221,8 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, char *data_in; size_t total_out = 0; unsigned long page_in_index = 0; - unsigned long page_out_index = 0; unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE); unsigned long buf_start; - unsigned long pg_offset; data_in = kmap(pages_in[page_in_index]); workspace->strm.next_in = data_in; @@ -235,7 +232,6 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, workspace->strm.total_out = 0; workspace->strm.next_out = workspace->buf; workspace->strm.avail_out = PAGE_SIZE; - pg_offset = 0; /* If it's deflate, and it's got no preset dictionary, then we can tell zlib to skip the adler32 check. */ @@ -250,6 +246,7 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, if (Z_OK != zlib_inflateInit2(&workspace->strm, wbits)) { pr_warn("BTRFS: inflateInit failed\n"); + kunmap(pages_in[page_in_index]); return -EIO; } while (workspace->strm.total_in < srclen) { @@ -266,8 +263,7 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start, total_out, disk_start, - bvec, vcnt, - &page_out_index, &pg_offset); + orig_bio); if (ret2 == 0) { ret = 0; goto done; @@ -300,7 +296,7 @@ done: if (data_in) kunmap(pages_in[page_in_index]); if (!ret) - btrfs_clear_biovec_end(bvec, vcnt, page_out_index, pg_offset); + zero_fill_bio(orig_bio); return ret; } @@ -407,6 +403,6 @@ const struct btrfs_compress_op btrfs_zlib_compress = { .alloc_workspace = zlib_alloc_workspace, .free_workspace = zlib_free_workspace, .compress_pages = zlib_compress_pages, - .decompress_biovec = zlib_decompress_biovec, + .decompress_bio = zlib_decompress_bio, .decompress = zlib_decompress, }; diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index e030d6f6c19a..e61bbc3b82d5 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1406,7 +1406,7 @@ DEFINE_EVENT(btrfs_qgroup_extent, btrfs_qgroup_account_extents, TP_ARGS(fs_info, rec) ); -DEFINE_EVENT(btrfs_qgroup_extent, btrfs_qgroup_insert_dirty_extent, +DEFINE_EVENT(btrfs_qgroup_extent, btrfs_qgroup_trace_extent, TP_PROTO(struct btrfs_fs_info *fs_info, struct btrfs_qgroup_extent_record *rec), |