diff options
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/Makefile | 10 | ||||
-rw-r--r-- | fs/btrfs/block-group.c | 33 | ||||
-rw-r--r-- | fs/btrfs/block-group.h | 9 | ||||
-rw-r--r-- | fs/btrfs/compression.c | 68 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 2 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 5 | ||||
-rw-r--r-- | fs/btrfs/delayed-inode.c | 5 | ||||
-rw-r--r-- | fs/btrfs/dev-replace.c | 3 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 19 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 23 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 54 | ||||
-rw-r--r-- | fs/btrfs/file.c | 5 | ||||
-rw-r--r-- | fs/btrfs/free-space-cache.c | 21 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 101 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 19 | ||||
-rw-r--r-- | fs/btrfs/lzo.c | 4 | ||||
-rw-r--r-- | fs/btrfs/qgroup.c | 20 | ||||
-rw-r--r-- | fs/btrfs/qgroup.h | 2 | ||||
-rw-r--r-- | fs/btrfs/raid56.c | 31 | ||||
-rw-r--r-- | fs/btrfs/reada.c | 35 | ||||
-rw-r--r-- | fs/btrfs/ref-verify.c | 4 | ||||
-rw-r--r-- | fs/btrfs/reflink.c | 24 | ||||
-rw-r--r-- | fs/btrfs/scrub.c | 11 | ||||
-rw-r--r-- | fs/btrfs/send.c | 7 | ||||
-rw-r--r-- | fs/btrfs/super.c | 4 | ||||
-rw-r--r-- | fs/btrfs/tree-checker.c | 16 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 11 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 3 | ||||
-rw-r--r-- | fs/btrfs/xattr.c | 31 | ||||
-rw-r--r-- | fs/btrfs/zlib.c | 5 | ||||
-rw-r--r-- | fs/btrfs/zoned.c | 4 | ||||
-rw-r--r-- | fs/btrfs/zstd.c | 6 |
32 files changed, 426 insertions, 169 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index b634c42115ea..b4fb997eda16 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -7,10 +7,12 @@ subdir-ccflags-y += -Wmissing-format-attribute subdir-ccflags-y += -Wmissing-prototypes subdir-ccflags-y += -Wold-style-definition subdir-ccflags-y += -Wmissing-include-dirs -subdir-ccflags-y += $(call cc-option, -Wunused-but-set-variable) -subdir-ccflags-y += $(call cc-option, -Wunused-const-variable) -subdir-ccflags-y += $(call cc-option, -Wpacked-not-aligned) -subdir-ccflags-y += $(call cc-option, -Wstringop-truncation) +condflags := \ + $(call cc-option, -Wunused-but-set-variable) \ + $(call cc-option, -Wunused-const-variable) \ + $(call cc-option, -Wpacked-not-aligned) \ + $(call cc-option, -Wstringop-truncation) +subdir-ccflags-y += $(condflags) # The following turn off the warnings enabled by -Wextra subdir-ccflags-y += -Wno-missing-field-initializers subdir-ccflags-y += -Wno-sign-compare diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 5064be59dac5..744b99ddc28c 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1162,6 +1162,11 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force) spin_lock(&sinfo->lock); spin_lock(&cache->lock); + if (cache->swap_extents) { + ret = -ETXTBSY; + goto out; + } + if (cache->ro) { cache->ro++; ret = 0; @@ -2307,7 +2312,7 @@ again: } ret = inc_block_group_ro(cache, 0); - if (!do_chunk_alloc) + if (!do_chunk_alloc || ret == -ETXTBSY) goto unlock_out; if (!ret) goto out; @@ -2316,6 +2321,8 @@ again: if (ret < 0) goto out; ret = inc_block_group_ro(cache, 0); + if (ret == -ETXTBSY) + goto unlock_out; out: if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) { alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags); @@ -3406,6 +3413,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) ASSERT(list_empty(&block_group->io_list)); ASSERT(list_empty(&block_group->bg_list)); ASSERT(refcount_read(&block_group->refs) == 1); + ASSERT(block_group->swap_extents == 0); btrfs_put_block_group(block_group); spin_lock(&info->block_group_cache_lock); @@ -3472,3 +3480,26 @@ void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group) __btrfs_remove_free_space_cache(block_group->free_space_ctl); } } + +bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg) +{ + bool ret = true; + + spin_lock(&bg->lock); + if (bg->ro) + ret = false; + else + bg->swap_extents++; + spin_unlock(&bg->lock); + + return ret; +} + +void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount) +{ + spin_lock(&bg->lock); + ASSERT(!bg->ro); + ASSERT(bg->swap_extents >= amount); + bg->swap_extents -= amount; + spin_unlock(&bg->lock); +} diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 29678426247d..3ecc3372a5ce 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -186,6 +186,12 @@ struct btrfs_block_group { /* Flag indicating this block group is placed on a sequential zone */ bool seq_zone; + /* + * Number of extents in this block group used for swap files. + * All accesses protected by the spinlock 'lock'. + */ + int swap_extents; + /* Record locked full stripes for RAID5/6 block group */ struct btrfs_full_stripe_locks_tree full_stripe_locks_root; @@ -312,4 +318,7 @@ static inline int btrfs_block_group_done(struct btrfs_block_group *cache) void btrfs_freeze_block_group(struct btrfs_block_group *cache); void btrfs_unfreeze_block_group(struct btrfs_block_group *cache); +bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg); +void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount); + #endif /* BTRFS_BLOCK_GROUP_H */ diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 6d203acfdeb3..3f4c832abfed 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -141,6 +141,7 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio, struct btrfs_fs_info *fs_info = inode->root->fs_info; SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); const u32 csum_size = fs_info->csum_size; + const u32 sectorsize = fs_info->sectorsize; struct page *page; unsigned long i; char *kaddr; @@ -154,22 +155,34 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio, shash->tfm = fs_info->csum_shash; for (i = 0; i < cb->nr_pages; i++) { + u32 pg_offset; + u32 bytes_left = PAGE_SIZE; page = cb->compressed_pages[i]; - kaddr = kmap_atomic(page); - crypto_shash_digest(shash, kaddr, PAGE_SIZE, csum); - kunmap_atomic(kaddr); - - if (memcmp(&csum, cb_sum, csum_size)) { - btrfs_print_data_csum_error(inode, disk_start, - csum, cb_sum, cb->mirror_num); - if (btrfs_io_bio(bio)->device) - btrfs_dev_stat_inc_and_print( - btrfs_io_bio(bio)->device, - BTRFS_DEV_STAT_CORRUPTION_ERRS); - return -EIO; + /* Determine the remaining bytes inside the page first */ + if (i == cb->nr_pages - 1) + bytes_left = cb->compressed_len - i * PAGE_SIZE; + + /* Hash through the page sector by sector */ + for (pg_offset = 0; pg_offset < bytes_left; + pg_offset += sectorsize) { + kaddr = kmap_atomic(page); + crypto_shash_digest(shash, kaddr + pg_offset, + sectorsize, csum); + kunmap_atomic(kaddr); + + if (memcmp(&csum, cb_sum, csum_size) != 0) { + btrfs_print_data_csum_error(inode, disk_start, + csum, cb_sum, cb->mirror_num); + if (btrfs_io_bio(bio)->device) + btrfs_dev_stat_inc_and_print( + btrfs_io_bio(bio)->device, + BTRFS_DEV_STAT_CORRUPTION_ERRS); + return -EIO; + } + cb_sum += csum_size; + disk_start += sectorsize; } - cb_sum += csum_size; } return 0; } @@ -640,7 +653,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, page_offset(bio_first_page_all(bio)), - PAGE_SIZE); + fs_info->sectorsize); read_unlock(&em_tree->lock); if (!em) return BLK_STS_IOERR; @@ -698,19 +711,30 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, refcount_set(&cb->pending_bios, 1); for (pg_index = 0; pg_index < nr_pages; pg_index++) { + u32 pg_len = PAGE_SIZE; int submit = 0; + /* + * To handle subpage case, we need to make sure the bio only + * covers the range we need. + * + * If we're at the last page, truncate the length to only cover + * the remaining part. + */ + if (pg_index == nr_pages - 1) + pg_len = min_t(u32, PAGE_SIZE, + compressed_len - pg_index * PAGE_SIZE); + page = cb->compressed_pages[pg_index]; page->mapping = inode->i_mapping; page->index = em_start >> PAGE_SHIFT; if (comp_bio->bi_iter.bi_size) - submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, + submit = btrfs_bio_fits_in_stripe(page, pg_len, comp_bio, 0); page->mapping = NULL; - if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) < - PAGE_SIZE) { + if (submit || bio_add_page(comp_bio, page, pg_len, 0) < pg_len) { unsigned int nr_sectors; ret = btrfs_bio_wq_end_io(fs_info, comp_bio, @@ -743,9 +767,9 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, comp_bio->bi_private = cb; comp_bio->bi_end_io = end_compressed_bio_read; - bio_add_page(comp_bio, page, PAGE_SIZE, 0); + bio_add_page(comp_bio, page, pg_len, 0); } - cur_disk_byte += PAGE_SIZE; + cur_disk_byte += pg_len; } ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA); @@ -1237,7 +1261,6 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start, unsigned long prev_start_byte; unsigned long working_bytes = total_out - buf_start; unsigned long bytes; - char *kaddr; struct bio_vec bvec = bio_iter_iovec(bio, bio->bi_iter); /* @@ -1268,9 +1291,8 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start, PAGE_SIZE - (buf_offset % PAGE_SIZE)); bytes = min(bytes, working_bytes); - kaddr = kmap_atomic(bvec.bv_page); - memcpy(kaddr + bvec.bv_offset, buf + buf_offset, bytes); - kunmap_atomic(kaddr); + memcpy_to_page(bvec.bv_page, bvec.bv_offset, buf + buf_offset, + bytes); flush_dcache_page(bvec.bv_page); buf_offset += bytes; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index d56730a67885..34b929bd5c1a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1365,7 +1365,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq) "failed to read tree block %llu from get_old_root", logical); } else { + btrfs_tree_read_lock(old); eb = btrfs_clone_extent_buffer(old); + btrfs_tree_read_unlock(old); free_extent_buffer(old); } } else if (old_root) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index bd659354d043..9ae776ab3967 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -524,6 +524,11 @@ struct btrfs_swapfile_pin { * points to a struct btrfs_device. */ bool is_block_group; + /* + * Only used when 'is_block_group' is true and it is the number of + * extents used by a swapfile for this block group ('ptr' field). + */ + int bg_extent_count; }; bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr); diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index ec0b50b8c5d6..bf25401c9768 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -627,7 +627,8 @@ static int btrfs_delayed_inode_reserve_metadata( */ if (!src_rsv || (!trans->bytes_reserved && src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) { - ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true); + ret = btrfs_qgroup_reserve_meta(root, num_bytes, + BTRFS_QGROUP_RSV_META_PREALLOC, true); if (ret < 0) return ret; ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes, @@ -649,7 +650,7 @@ static int btrfs_delayed_inode_reserve_metadata( btrfs_ino(inode), num_bytes, 1); } else { - btrfs_qgroup_free_meta_prealloc(root, fs_info->nodesize); + btrfs_qgroup_free_meta_prealloc(root, num_bytes); } return ret; } diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 3a9c1e046ebe..d05f73530af7 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -81,6 +81,9 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info) struct btrfs_dev_replace_item *ptr; u64 src_devid; + if (!dev_root) + return 0; + path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 41b718cfea40..289f1f09481d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2387,8 +2387,9 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info) } else { set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); fs_info->dev_root = root; - btrfs_init_devices_late(fs_info); } + /* Initialize fs_info for all devices in any case */ + btrfs_init_devices_late(fs_info); /* If IGNOREDATACSUMS is set don't bother reading the csum root. */ if (!btrfs_test_opt(fs_info, IGNOREDATACSUMS)) { @@ -3009,6 +3010,21 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info) } } + /* + * btrfs_find_orphan_roots() is responsible for finding all the dead + * roots (with 0 refs), flag them with BTRFS_ROOT_DEAD_TREE and load + * them into the fs_info->fs_roots_radix tree. This must be done before + * calling btrfs_orphan_cleanup() on the tree root. If we don't do it + * first, then btrfs_orphan_cleanup() will delete a dead root's orphan + * item before the root's tree is deleted - this means that if we unmount + * or crash before the deletion completes, on the next mount we will not + * delete what remains of the tree because the orphan item does not + * exists anymore, which is what tells us we have a pending deletion. + */ + ret = btrfs_find_orphan_roots(fs_info); + if (ret) + goto out; + ret = btrfs_cleanup_fs_roots(fs_info); if (ret) goto out; @@ -3068,7 +3084,6 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info) } } - ret = btrfs_find_orphan_roots(fs_info); out: return ret; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 78ad31a59e59..36a3c973fda1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3323,6 +3323,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, if (last_ref && btrfs_header_generation(buf) == trans->transid) { struct btrfs_block_group *cache; + bool must_pin = false; if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { ret = check_ref_cleanup(trans, buf->start); @@ -3340,7 +3341,27 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, goto out; } - if (btrfs_is_zoned(fs_info)) { + /* + * If this is a leaf and there are tree mod log users, we may + * have recorded mod log operations that point to this leaf. + * So we must make sure no one reuses this leaf's extent before + * mod log operations are applied to a node, otherwise after + * rewinding a node using the mod log operations we get an + * inconsistent btree, as the leaf's extent may now be used as + * a node or leaf for another different btree. + * We are safe from races here because at this point no other + * node or root points to this extent buffer, so if after this + * check a new tree mod log user joins, it will not be able to + * find a node pointing to this leaf and record operations that + * point to this leaf. + */ + if (btrfs_header_level(buf) == 0) { + read_lock(&fs_info->tree_mod_log_lock); + must_pin = !list_empty(&fs_info->tree_mod_seq_list); + read_unlock(&fs_info->tree_mod_log_lock); + } + + if (must_pin || btrfs_is_zoned(fs_info)) { btrfs_redirty_list_add(trans->transaction, buf); pin_down_extent(trans, cache, buf->start, buf->len, 1); btrfs_put_block_group(cache); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4dfb3ead1175..910769d5fcdb 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2886,6 +2886,35 @@ static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len) } /* + * Find extent buffer for a givne bytenr. + * + * This is for end_bio_extent_readpage(), thus we can't do any unsafe locking + * in endio context. + */ +static struct extent_buffer *find_extent_buffer_readpage( + struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr) +{ + struct extent_buffer *eb; + + /* + * For regular sectorsize, we can use page->private to grab extent + * buffer + */ + if (fs_info->sectorsize == PAGE_SIZE) { + ASSERT(PagePrivate(page) && page->private); + return (struct extent_buffer *)page->private; + } + + /* For subpage case, we need to lookup buffer radix tree */ + rcu_read_lock(); + eb = radix_tree_lookup(&fs_info->buffer_radix, + bytenr >> fs_info->sectorsize_bits); + rcu_read_unlock(); + ASSERT(eb); + return eb; +} + +/* * after a readpage IO is done, we need to: * clear the uptodate bits on error * set the uptodate bits if things worked @@ -2996,7 +3025,7 @@ static void end_bio_extent_readpage(struct bio *bio) } else { struct extent_buffer *eb; - eb = (struct extent_buffer *)page->private; + eb = find_extent_buffer_readpage(fs_info, page, start); set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); eb->read_mirror = mirror; atomic_dec(&eb->io_pages); @@ -3008,12 +3037,23 @@ readpage_ok: if (likely(uptodate)) { loff_t i_size = i_size_read(inode); pgoff_t end_index = i_size >> PAGE_SHIFT; - unsigned off; - /* Zero out the end if this page straddles i_size */ - off = offset_in_page(i_size); - if (page->index == end_index && off) - zero_user_segment(page, off, PAGE_SIZE); + /* + * Zero out the remaining part if this range straddles + * i_size. + * + * Here we should only zero the range inside the bvec, + * not touch anything else. + * + * NOTE: i_size is exclusive while end is inclusive. + */ + if (page->index == end_index && i_size <= end) { + u32 zero_start = max(offset_in_page(i_size), + offset_in_page(start)); + + zero_user_segment(page, zero_start, + offset_in_page(end) + 1); + } } ASSERT(bio_offset + len > bio_offset); bio_offset += len; @@ -3048,7 +3088,7 @@ struct bio *btrfs_bio_alloc(u64 first_byte) { struct bio *bio; - bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &btrfs_bioset); + bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_VECS, &btrfs_bioset); bio->bi_iter.bi_sector = first_byte >> 9; btrfs_io_bio_init(btrfs_io_bio(bio)); return bio; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index bf2c51a9607a..0e155f013839 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3260,8 +3260,11 @@ reserve_space: goto out; ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), &data_reserved, alloc_start, bytes_to_reserve); - if (ret) + if (ret) { + unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, + lockend, &cached_state); goto out; + } ret = btrfs_prealloc_file_range(inode, mode, alloc_start, alloc_end - alloc_start, i_blocksize(inode), diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 5400294bd271..9988decd5717 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2555,7 +2555,12 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, to_unusable = size - to_free; ctl->free_space += to_free; - block_group->zone_unusable += to_unusable; + /* + * If the block group is read-only, we should account freed space into + * bytes_readonly. + */ + if (!block_group->ro) + block_group->zone_unusable += to_unusable; spin_unlock(&ctl->tree_lock); if (!used) { spin_lock(&block_group->lock); @@ -2801,8 +2806,10 @@ static void __btrfs_return_cluster_to_free_space( struct rb_node *node; spin_lock(&cluster->lock); - if (cluster->block_group != block_group) - goto out; + if (cluster->block_group != block_group) { + spin_unlock(&cluster->lock); + return; + } cluster->block_group = NULL; cluster->window_start = 0; @@ -2840,8 +2847,6 @@ static void __btrfs_return_cluster_to_free_space( entry->offset, &entry->offset_index, bitmap); } cluster->root = RB_ROOT; - -out: spin_unlock(&cluster->lock); btrfs_put_block_group(block_group); } @@ -3125,8 +3130,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group, entry->bytes -= bytes; } - if (entry->bytes == 0) - rb_erase(&entry->offset_index, &cluster->root); break; } out: @@ -3143,7 +3146,10 @@ out: ctl->free_space -= bytes; if (!entry->bitmap && !btrfs_free_space_trimmed(entry)) ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes; + + spin_lock(&cluster->lock); if (entry->bytes == 0) { + rb_erase(&entry->offset_index, &cluster->root); ctl->free_extents--; if (entry->bitmap) { kmem_cache_free(btrfs_free_space_bitmap_cachep, @@ -3156,6 +3162,7 @@ out: kmem_cache_free(btrfs_free_space_cachep, entry); } + spin_unlock(&cluster->lock); spin_unlock(&ctl->tree_lock); return ret; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2e1c282c202d..a520775949a0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1674,9 +1674,6 @@ next_slot: */ btrfs_release_path(path); - /* If extent is RO, we must COW it */ - if (btrfs_extent_readonly(fs_info, disk_bytenr)) - goto out_check; ret = btrfs_cross_ref_exist(root, ino, found_key.offset - extent_offset, disk_bytenr, false); @@ -1723,6 +1720,7 @@ next_slot: WARN_ON_ONCE(freespace_inode); goto out_check; } + /* If the extent's block group is RO, we must COW */ if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr)) goto out_check; nocow = true; @@ -3101,11 +3099,13 @@ void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start, * @bio_offset: offset to the beginning of the bio (in bytes) * @page: page where is the data to be verified * @pgoff: offset inside the page + * @start: logical offset in the file * * The length of such check is always one sector size. */ static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio, - u32 bio_offset, struct page *page, u32 pgoff) + u32 bio_offset, struct page *page, u32 pgoff, + u64 start) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); @@ -3132,8 +3132,8 @@ static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio, kunmap_atomic(kaddr); return 0; zeroit: - btrfs_print_data_csum_error(BTRFS_I(inode), page_offset(page) + pgoff, - csum, csum_expected, io_bio->mirror_num); + btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected, + io_bio->mirror_num); if (io_bio->device) btrfs_dev_stat_inc_and_print(io_bio->device, BTRFS_DEV_STAT_CORRUPTION_ERRS); @@ -3186,7 +3186,8 @@ int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset, pg_off += sectorsize, bio_offset += sectorsize) { int ret; - ret = check_data_csum(inode, io_bio, bio_offset, page, pg_off); + ret = check_data_csum(inode, io_bio, bio_offset, page, pg_off, + page_offset(page) + pg_off); if (ret < 0) return -EIO; } @@ -6085,7 +6086,7 @@ static int btrfs_dirty_inode(struct inode *inode) return PTR_ERR(trans); ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); - if (ret && ret == -ENOSPC) { + if (ret && (ret == -ENOSPC || ret == -EDQUOT)) { /* whoops, lets try again with the full transaction */ btrfs_end_transaction(trans); trans = btrfs_start_transaction(root, 1); @@ -7912,7 +7913,8 @@ static blk_status_t btrfs_check_read_dio_bio(struct inode *inode, ASSERT(pgoff < PAGE_SIZE); if (uptodate && (!csum || !check_data_csum(inode, io_bio, - bio_offset, bvec.bv_page, pgoff))) { + bio_offset, bvec.bv_page, + pgoff, start))) { clean_io_failure(fs_info, failure_tree, io_tree, start, bvec.bv_page, btrfs_ino(BTRFS_I(inode)), @@ -8171,10 +8173,6 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap, bio->bi_end_io = btrfs_end_dio_bio; btrfs_io_bio(bio)->logical = file_offset; - WARN_ON_ONCE(write && btrfs_is_zoned(fs_info) && - fs_info->max_zone_append_size && - bio_op(bio) != REQ_OP_ZONE_APPEND); - if (bio_op(bio) == REQ_OP_ZONE_APPEND) { status = extract_ordered_extent(BTRFS_I(inode), bio, file_offset); @@ -9010,7 +9008,7 @@ int __init btrfs_init_cachep(void) btrfs_free_space_bitmap_cachep = kmem_cache_create("btrfs_free_space_bitmap", PAGE_SIZE, PAGE_SIZE, - SLAB_RED_ZONE, NULL); + SLAB_MEM_SPREAD, NULL); if (!btrfs_free_space_bitmap_cachep) goto fail; @@ -9879,6 +9877,7 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent( struct btrfs_path *path; u64 start = ins->objectid; u64 len = ins->offset; + int qgroup_released; int ret; memset(&stack_fi, 0, sizeof(stack_fi)); @@ -9891,16 +9890,16 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent( btrfs_set_stack_file_extent_compression(&stack_fi, BTRFS_COMPRESS_NONE); /* Encryption and other encoding is reserved and all 0 */ - ret = btrfs_qgroup_release_data(inode, file_offset, len); - if (ret < 0) - return ERR_PTR(ret); + qgroup_released = btrfs_qgroup_release_data(inode, file_offset, len); + if (qgroup_released < 0) + return ERR_PTR(qgroup_released); if (trans) { ret = insert_reserved_file_extent(trans, inode, file_offset, &stack_fi, - true, ret); + true, qgroup_released); if (ret) - return ERR_PTR(ret); + goto free_qgroup; return trans; } @@ -9911,21 +9910,35 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent( extent_info.file_offset = file_offset; extent_info.extent_buf = (char *)&stack_fi; extent_info.is_new_extent = true; - extent_info.qgroup_reserved = ret; + extent_info.qgroup_reserved = qgroup_released; extent_info.insertions = 0; path = btrfs_alloc_path(); - if (!path) - return ERR_PTR(-ENOMEM); + if (!path) { + ret = -ENOMEM; + goto free_qgroup; + } ret = btrfs_replace_file_extents(&inode->vfs_inode, path, file_offset, file_offset + len - 1, &extent_info, &trans); btrfs_free_path(path); if (ret) - return ERR_PTR(ret); - + goto free_qgroup; return trans; + +free_qgroup: + /* + * We have released qgroup data range at the beginning of the function, + * and normally qgroup_released bytes will be freed when committing + * transaction. + * But if we error out early, we have to free what we have released + * or we leak qgroup data reservation. + */ + btrfs_qgroup_free_refroot(inode->root->fs_info, + inode->root->root_key.objectid, qgroup_released, + BTRFS_QGROUP_RSV_DATA); + return ERR_PTR(ret); } static int __btrfs_prealloc_file_range(struct inode *inode, int mode, @@ -10200,6 +10213,7 @@ static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr, sp->ptr = ptr; sp->inode = inode; sp->is_block_group = is_block_group; + sp->bg_extent_count = 1; spin_lock(&fs_info->swapfile_pins_lock); p = &fs_info->swapfile_pins.rb_node; @@ -10213,6 +10227,8 @@ static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr, (sp->ptr == entry->ptr && sp->inode > entry->inode)) { p = &(*p)->rb_right; } else { + if (is_block_group) + entry->bg_extent_count++; spin_unlock(&fs_info->swapfile_pins_lock); kfree(sp); return 1; @@ -10238,8 +10254,11 @@ static void btrfs_free_swapfile_pins(struct inode *inode) sp = rb_entry(node, struct btrfs_swapfile_pin, node); if (sp->inode == inode) { rb_erase(&sp->node, &fs_info->swapfile_pins); - if (sp->is_block_group) + if (sp->is_block_group) { + btrfs_dec_block_group_swap_extents(sp->ptr, + sp->bg_extent_count); btrfs_put_block_group(sp->ptr); + } kfree(sp); } node = next; @@ -10300,7 +10319,8 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, sector_t *span) { struct inode *inode = file_inode(file); - struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_fs_info *fs_info = root->fs_info; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_state *cached_state = NULL; struct extent_map *em = NULL; @@ -10351,13 +10371,27 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, "cannot activate swapfile while exclusive operation is running"); return -EBUSY; } + + /* + * Prevent snapshot creation while we are activating the swap file. + * We do not want to race with snapshot creation. If snapshot creation + * already started before we bumped nr_swapfiles from 0 to 1 and + * completes before the first write into the swap file after it is + * activated, than that write would fallback to COW. + */ + if (!btrfs_drew_try_write_lock(&root->snapshot_lock)) { + btrfs_exclop_finish(fs_info); + btrfs_warn(fs_info, + "cannot activate swapfile because snapshot creation is in progress"); + return -EINVAL; + } /* * Snapshots can create extents which require COW even if NODATACOW is * set. We use this counter to prevent snapshots. We must increment it * before walking the extents because we don't want a concurrent * snapshot to run after we've already checked the extents. */ - atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles); + atomic_inc(&root->nr_swapfiles); isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize); @@ -10454,6 +10488,17 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, goto out; } + if (!btrfs_inc_block_group_swap_extents(bg)) { + btrfs_warn(fs_info, + "block group for swapfile at %llu is read-only%s", + bg->start, + atomic_read(&fs_info->scrubs_running) ? + " (scrub running)" : ""); + btrfs_put_block_group(bg); + ret = -EINVAL; + goto out; + } + ret = btrfs_add_swapfile_pin(inode, bg, true); if (ret) { btrfs_put_block_group(bg); @@ -10492,6 +10537,8 @@ out: if (ret) btrfs_swap_deactivate(file); + btrfs_drew_write_unlock(&root->snapshot_lock); + btrfs_exclop_finish(fs_info); if (ret) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 072e77726e94..e8d53fea4c61 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1936,7 +1936,10 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, if (vol_args->flags & BTRFS_SUBVOL_RDONLY) readonly = true; if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) { - if (vol_args->size > PAGE_SIZE) { + u64 nums; + + if (vol_args->size < sizeof(*inherit) || + vol_args->size > PAGE_SIZE) { ret = -EINVAL; goto free_args; } @@ -1945,6 +1948,20 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, ret = PTR_ERR(inherit); goto free_args; } + + if (inherit->num_qgroups > PAGE_SIZE || + inherit->num_ref_copies > PAGE_SIZE || + inherit->num_excl_copies > PAGE_SIZE) { + ret = -EINVAL; + goto free_inherit; + } + + nums = inherit->num_qgroups + 2 * inherit->num_ref_copies + + 2 * inherit->num_excl_copies; + if (vol_args->size != struct_size(inherit, qgroups, nums)) { + ret = -EINVAL; + goto free_inherit; + } } ret = __btrfs_ioctl_snap_create(file, vol_args->name, vol_args->fd, diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index aa9cd11f4b78..9084a950dc09 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c @@ -467,7 +467,7 @@ int lzo_decompress(struct list_head *ws, unsigned char *data_in, destlen = min_t(unsigned long, destlen, PAGE_SIZE); bytes = min_t(unsigned long, destlen, out_len - start_byte); - kaddr = kmap_atomic(dest_page); + kaddr = kmap_local_page(dest_page); memcpy(kaddr, workspace->buf + start_byte, bytes); /* @@ -477,7 +477,7 @@ int lzo_decompress(struct list_head *ws, unsigned char *data_in, */ if (bytes < destlen) memset(kaddr+bytes, 0, destlen-bytes); - kunmap_atomic(kaddr); + kunmap_local(kaddr); out: return ret; } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 808370ada888..f0b9ef13153a 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -226,7 +226,6 @@ static void __del_qgroup_rb(struct btrfs_fs_info *fs_info, { struct btrfs_qgroup_list *list; - btrfs_sysfs_del_one_qgroup(fs_info, qgroup); list_del(&qgroup->dirty); while (!list_empty(&qgroup->groups)) { list = list_first_entry(&qgroup->groups, @@ -243,7 +242,6 @@ static void __del_qgroup_rb(struct btrfs_fs_info *fs_info, list_del(&list->next_member); kfree(list); } - kfree(qgroup); } /* must be called with qgroup_lock held */ @@ -569,6 +567,8 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) qgroup = rb_entry(n, struct btrfs_qgroup, node); rb_erase(n, &fs_info->qgroup_tree); __del_qgroup_rb(fs_info, qgroup); + btrfs_sysfs_del_one_qgroup(fs_info, qgroup); + kfree(qgroup); } /* * We call btrfs_free_qgroup_config() when unmounting @@ -1578,6 +1578,14 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) spin_lock(&fs_info->qgroup_lock); del_qgroup_rb(fs_info, qgroupid); spin_unlock(&fs_info->qgroup_lock); + + /* + * Remove the qgroup from sysfs now without holding the qgroup_lock + * spinlock, since the sysfs_remove_group() function needs to take + * the mutex kernfs_mutex through kernfs_remove_by_name_ns(). + */ + btrfs_sysfs_del_one_qgroup(fs_info, qgroup); + kfree(qgroup); out: mutex_unlock(&fs_info->qgroup_ioctl_lock); return ret; @@ -3841,8 +3849,8 @@ static int sub_root_meta_rsv(struct btrfs_root *root, int num_bytes, return num_bytes; } -static int qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, - enum btrfs_qgroup_rsv_type type, bool enforce) +int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, + enum btrfs_qgroup_rsv_type type, bool enforce) { struct btrfs_fs_info *fs_info = root->fs_info; int ret; @@ -3873,14 +3881,14 @@ int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, { int ret; - ret = qgroup_reserve_meta(root, num_bytes, type, enforce); + ret = btrfs_qgroup_reserve_meta(root, num_bytes, type, enforce); if (ret <= 0 && ret != -EDQUOT) return ret; ret = try_flush_qgroup(root); if (ret < 0) return ret; - return qgroup_reserve_meta(root, num_bytes, type, enforce); + return btrfs_qgroup_reserve_meta(root, num_bytes, type, enforce); } void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root) diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index 50dea9a2d8fb..7283e4f549af 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -361,6 +361,8 @@ int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len); int btrfs_qgroup_free_data(struct btrfs_inode *inode, struct extent_changeset *reserved, u64 start, u64 len); +int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, + enum btrfs_qgroup_rsv_type type, bool enforce); int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, enum btrfs_qgroup_rsv_type type, bool enforce); /* Reserve metadata space for pertrans and prealloc type */ diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 8ec34ecb6d68..8c31357f08ed 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -249,8 +249,6 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info) static void cache_rbio_pages(struct btrfs_raid_bio *rbio) { int i; - char *s; - char *d; int ret; ret = alloc_rbio_pages(rbio); @@ -261,13 +259,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio) if (!rbio->bio_pages[i]) continue; - s = kmap(rbio->bio_pages[i]); - d = kmap(rbio->stripe_pages[i]); - - copy_page(d, s); - - kunmap(rbio->bio_pages[i]); - kunmap(rbio->stripe_pages[i]); + copy_highpage(rbio->stripe_pages[i], rbio->bio_pages[i]); SetPageUptodate(rbio->stripe_pages[i]); } set_bit(RBIO_CACHE_READY_BIT, &rbio->flags); @@ -2359,16 +2351,21 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, SetPageUptodate(p_page); if (has_qstripe) { + /* RAID6, allocate and map temp space for the Q stripe */ q_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); if (!q_page) { __free_page(p_page); goto cleanup; } SetPageUptodate(q_page); + pointers[rbio->real_stripes - 1] = kmap(q_page); } atomic_set(&rbio->error, 0); + /* Map the parity stripe just once */ + pointers[nr_data] = kmap(p_page); + for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) { struct page *p; void *parity; @@ -2378,16 +2375,8 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, pointers[stripe] = kmap(p); } - /* then add the parity stripe */ - pointers[stripe++] = kmap(p_page); - if (has_qstripe) { - /* - * raid6, add the qstripe and call the - * library function to fill in our p/q - */ - pointers[stripe++] = kmap(q_page); - + /* RAID6, call the library function to fill in our P/Q */ raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE, pointers); } else { @@ -2408,12 +2397,14 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, for (stripe = 0; stripe < nr_data; stripe++) kunmap(page_in_rbio(rbio, stripe, pagenr, 0)); - kunmap(p_page); } + kunmap(p_page); __free_page(p_page); - if (q_page) + if (q_page) { + kunmap(q_page); __free_page(q_page); + } writeback: /* diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 20fd4aa48a8c..06713a8fe26b 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -209,7 +209,7 @@ int btree_readahead_hook(struct extent_buffer *eb, int err) /* find extent */ spin_lock(&fs_info->reada_lock); re = radix_tree_lookup(&fs_info->reada_tree, - eb->start >> PAGE_SHIFT); + eb->start >> fs_info->sectorsize_bits); if (re) re->refcnt++; spin_unlock(&fs_info->reada_lock); @@ -240,7 +240,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical, zone = NULL; spin_lock(&fs_info->reada_lock); ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, - logical >> PAGE_SHIFT, 1); + logical >> fs_info->sectorsize_bits, 1); if (ret == 1 && logical >= zone->start && logical <= zone->end) { kref_get(&zone->refcnt); spin_unlock(&fs_info->reada_lock); @@ -283,13 +283,13 @@ static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical, spin_lock(&fs_info->reada_lock); ret = radix_tree_insert(&dev->reada_zones, - (unsigned long)(zone->end >> PAGE_SHIFT), - zone); + (unsigned long)(zone->end >> fs_info->sectorsize_bits), + zone); if (ret == -EEXIST) { kfree(zone); ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, - logical >> PAGE_SHIFT, 1); + logical >> fs_info->sectorsize_bits, 1); if (ret == 1 && logical >= zone->start && logical <= zone->end) kref_get(&zone->refcnt); else @@ -315,7 +315,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, u64 length; int real_stripes; int nzones = 0; - unsigned long index = logical >> PAGE_SHIFT; + unsigned long index = logical >> fs_info->sectorsize_bits; int dev_replace_is_ongoing; int have_zone = 0; @@ -497,7 +497,7 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info, struct reada_extent *re) { int i; - unsigned long index = re->logical >> PAGE_SHIFT; + unsigned long index = re->logical >> fs_info->sectorsize_bits; spin_lock(&fs_info->reada_lock); if (--re->refcnt) { @@ -538,11 +538,12 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info, static void reada_zone_release(struct kref *kref) { struct reada_zone *zone = container_of(kref, struct reada_zone, refcnt); + struct btrfs_fs_info *fs_info = zone->device->fs_info; - lockdep_assert_held(&zone->device->fs_info->reada_lock); + lockdep_assert_held(&fs_info->reada_lock); radix_tree_delete(&zone->device->reada_zones, - zone->end >> PAGE_SHIFT); + zone->end >> fs_info->sectorsize_bits); kfree(zone); } @@ -593,7 +594,7 @@ static int reada_add_block(struct reada_control *rc, u64 logical, static void reada_peer_zones_set_lock(struct reada_zone *zone, int lock) { int i; - unsigned long index = zone->end >> PAGE_SHIFT; + unsigned long index = zone->end >> zone->device->fs_info->sectorsize_bits; for (i = 0; i < zone->ndevs; ++i) { struct reada_zone *peer; @@ -628,7 +629,7 @@ static int reada_pick_zone(struct btrfs_device *dev) (void **)&zone, index, 1); if (ret == 0) break; - index = (zone->end >> PAGE_SHIFT) + 1; + index = (zone->end >> dev->fs_info->sectorsize_bits) + 1; if (zone->locked) { if (zone->elems > top_locked_elems) { top_locked_elems = zone->elems; @@ -709,7 +710,7 @@ static int reada_start_machine_dev(struct btrfs_device *dev) * plugging to speed things up */ ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re, - dev->reada_next >> PAGE_SHIFT, 1); + dev->reada_next >> fs_info->sectorsize_bits, 1); if (ret == 0 || re->logical > dev->reada_curr_zone->end) { ret = reada_pick_zone(dev); if (!ret) { @@ -718,7 +719,7 @@ static int reada_start_machine_dev(struct btrfs_device *dev) } re = NULL; ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re, - dev->reada_next >> PAGE_SHIFT, 1); + dev->reada_next >> fs_info->sectorsize_bits, 1); } if (ret == 0) { spin_unlock(&fs_info->reada_lock); @@ -885,7 +886,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) pr_cont(" curr off %llu", device->reada_next - zone->start); pr_cont("\n"); - index = (zone->end >> PAGE_SHIFT) + 1; + index = (zone->end >> fs_info->sectorsize_bits) + 1; } cnt = 0; index = 0; @@ -910,7 +911,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) } } pr_cont("\n"); - index = (re->logical >> PAGE_SHIFT) + 1; + index = (re->logical >> fs_info->sectorsize_bits) + 1; if (++cnt > 15) break; } @@ -926,7 +927,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) if (ret == 0) break; if (!re->scheduled) { - index = (re->logical >> PAGE_SHIFT) + 1; + index = (re->logical >> fs_info->sectorsize_bits) + 1; continue; } pr_debug("re: logical %llu size %u list empty %d scheduled %d", @@ -942,7 +943,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) } } pr_cont("\n"); - index = (re->logical >> PAGE_SHIFT) + 1; + index = (re->logical >> fs_info->sectorsize_bits) + 1; } spin_unlock(&fs_info->reada_lock); } diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index 2b490becbe67..8e026de74c44 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -218,11 +218,11 @@ static void __print_stack_trace(struct btrfs_fs_info *fs_info, stack_trace_print(ra->trace, ra->trace_len, 2); } #else -static void inline __save_stack_trace(struct ref_action *ra) +static inline void __save_stack_trace(struct ref_action *ra) { } -static void inline __print_stack_trace(struct btrfs_fs_info *fs_info, +static inline void __print_stack_trace(struct btrfs_fs_info *fs_info, struct ref_action *ra) { btrfs_err(fs_info, " ref-verify: no stacktrace support"); diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c index b24396cf2f99..762881b777b3 100644 --- a/fs/btrfs/reflink.c +++ b/fs/btrfs/reflink.c @@ -106,12 +106,8 @@ static int copy_inline_to_page(struct btrfs_inode *inode, set_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &inode->runtime_flags); if (comp_type == BTRFS_COMPRESS_NONE) { - char *map; - - map = kmap(page); - memcpy(map, data_start, datal); + memcpy_to_page(page, 0, data_start, datal); flush_dcache_page(page); - kunmap(page); } else { ret = btrfs_decompress(comp_type, data_start, page, 0, inline_size, datal); @@ -553,6 +549,24 @@ process_slot: */ btrfs_release_path(path); + /* + * When using NO_HOLES and we are cloning a range that covers + * only a hole (no extents) into a range beyond the current + * i_size, punching a hole in the target range will not create + * an extent map defining a hole, because the range starts at or + * beyond current i_size. If the file previously had an i_size + * greater than the new i_size set by this clone operation, we + * need to make sure the next fsync is a full fsync, so that it + * detects and logs a hole covering a range from the current + * i_size to the new i_size. If the clone range covers extents, + * besides a hole, then we know the full sync flag was already + * set by previous calls to btrfs_replace_file_extents() that + * replaced file extent items. + */ + if (last_dest_end >= i_size_read(inode)) + set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, + &BTRFS_I(inode)->runtime_flags); + ret = btrfs_replace_file_extents(inode, path, last_dest_end, destoff + len - 1, NULL, &trans); if (ret) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 582df11d298a..3d9088eab2fc 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1428,7 +1428,7 @@ static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info, if (!first_page->dev->bdev) goto out; - bio = btrfs_io_bio_alloc(BIO_MAX_PAGES); + bio = btrfs_io_bio_alloc(BIO_MAX_VECS); bio_set_dev(bio, first_page->dev->bdev); for (page_num = 0; page_num < sblock->page_count; page_num++) { @@ -3767,6 +3767,13 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, * commit_transactions. */ ro_set = 0; + } else if (ret == -ETXTBSY) { + btrfs_warn(fs_info, + "skipping scrub of block group %llu due to active swapfile", + cache->start); + scrub_pause_off(fs_info); + ret = 0; + goto skip_unfreeze; } else { btrfs_warn(fs_info, "failed setting block group ro: %d", ret); @@ -3862,7 +3869,7 @@ done: } else { spin_unlock(&cache->lock); } - +skip_unfreeze: btrfs_unfreeze_block_group(cache); btrfs_put_block_group(cache); if (ret) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index f87878274e9f..8f323859156b 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -4932,7 +4932,6 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len) struct btrfs_fs_info *fs_info = root->fs_info; struct inode *inode; struct page *page; - char *addr; pgoff_t index = offset >> PAGE_SHIFT; pgoff_t last_index; unsigned pg_offset = offset_in_page(offset); @@ -4985,10 +4984,8 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len) } } - addr = kmap(page); - memcpy(sctx->send_buf + sctx->send_size, addr + pg_offset, - cur_len); - kunmap(page); + memcpy_from_page(sctx->send_buf + sctx->send_size, page, + pg_offset, cur_len); unlock_page(page); put_page(page); index++; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f8435641b912..f7a4ad86adee 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1918,8 +1918,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) btrfs_resize_thread_pool(fs_info, fs_info->thread_pool_size, old_thread_pool_size); - if (btrfs_test_opt(fs_info, FREE_SPACE_TREE) != - btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && + if ((bool)btrfs_test_opt(fs_info, FREE_SPACE_TREE) != + (bool)btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && (!sb_rdonly(sb) || (*flags & SB_RDONLY))) { btrfs_warn(fs_info, "remount supports changing free space tree only from ro to rw"); diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 582061c7b547..f4ade821307d 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1453,22 +1453,14 @@ static int check_extent_data_ref(struct extent_buffer *leaf, return -EUCLEAN; } for (; ptr < end; ptr += sizeof(*dref)) { - u64 root_objectid; - u64 owner; u64 offset; - u64 hash; + /* + * We cannot check the extent_data_ref hash due to possible + * overflow from the leaf due to hash collisions. + */ dref = (struct btrfs_extent_data_ref *)ptr; - root_objectid = btrfs_extent_data_ref_root(leaf, dref); - owner = btrfs_extent_data_ref_objectid(leaf, dref); offset = btrfs_extent_data_ref_offset(leaf, dref); - hash = hash_extent_data_ref(root_objectid, owner, offset); - if (unlikely(hash != key->offset)) { - extent_err(leaf, slot, - "invalid extent data ref hash, item has 0x%016llx key has 0x%016llx", - hash, key->offset); - return -EUCLEAN; - } if (unlikely(!IS_ALIGNED(offset, leaf->fs_info->sectorsize))) { extent_err(leaf, slot, "invalid extent data backref offset, have %llu expect aligned to %u", diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d90695c1ab6c..92a368627791 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3169,23 +3169,20 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, mutex_lock(&log_root_tree->log_mutex); - index2 = log_root_tree->log_transid % 2; - list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]); - root_log_ctx.log_transid = log_root_tree->log_transid; - if (btrfs_is_zoned(fs_info)) { - mutex_lock(&fs_info->tree_root->log_mutex); if (!log_root_tree->node) { ret = btrfs_alloc_log_tree_node(trans, log_root_tree); if (ret) { - mutex_unlock(&fs_info->tree_log_mutex); mutex_unlock(&log_root_tree->log_mutex); goto out; } } - mutex_unlock(&fs_info->tree_root->log_mutex); } + index2 = log_root_tree->log_transid % 2; + list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]); + root_log_ctx.log_transid = log_root_tree->log_transid; + /* * Now we are safe to update the log_root_tree because we're under the * log_mutex, and we're a current writer so we're holding the commit diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index bc3b33efddc5..1c6810bbaf8b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -7448,6 +7448,9 @@ static int btrfs_device_init_dev_stats(struct btrfs_device *device, int item_size; int i, ret, slot; + if (!device->fs_info->dev_root) + return 0; + key.objectid = BTRFS_DEV_STATS_OBJECTID; key.type = BTRFS_PERSISTENT_ITEM_KEY; key.offset = device->devid; diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index b025102e435f..8a4514283a4b 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -229,11 +229,33 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name, { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; + const bool start_trans = (current->journal_info == NULL); int ret; - trans = btrfs_start_transaction(root, 2); - if (IS_ERR(trans)) - return PTR_ERR(trans); + if (start_trans) { + /* + * 1 unit for inserting/updating/deleting the xattr + * 1 unit for the inode item update + */ + trans = btrfs_start_transaction(root, 2); + if (IS_ERR(trans)) + return PTR_ERR(trans); + } else { + /* + * This can happen when smack is enabled and a directory is being + * created. It happens through d_instantiate_new(), which calls + * smack_d_instantiate(), which in turn calls __vfs_setxattr() to + * set the transmute xattr (XATTR_NAME_SMACKTRANSMUTE) on the + * inode. We have already reserved space for the xattr and inode + * update at btrfs_mkdir(), so just use the transaction handle. + * We don't join or start a transaction, as that will reset the + * block_rsv of the handle and trigger a warning for the start + * case. + */ + ASSERT(strncmp(name, XATTR_SECURITY_PREFIX, + XATTR_SECURITY_PREFIX_LEN) == 0); + trans = current->journal_info; + } ret = btrfs_setxattr(trans, inode, name, value, size, flags); if (ret) @@ -244,7 +266,8 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name, ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); BUG_ON(ret); out: - btrfs_end_transaction(trans); + if (start_trans) + btrfs_end_transaction(trans); return ret; } diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 05615a1099db..d524acf7b3e5 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -432,9 +432,8 @@ int zlib_decompress(struct list_head *ws, unsigned char *data_in, PAGE_SIZE - (buf_offset % PAGE_SIZE)); bytes = min(bytes, bytes_left); - kaddr = kmap_atomic(dest_page); - memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes); - kunmap_atomic(kaddr); + memcpy_to_page(dest_page, pg_offset, + workspace->buf + buf_offset, bytes); pg_offset += bytes; bytes_left -= bytes; diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index d0eb0c8d6269..1f972b75a9ab 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -269,7 +269,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device) sector_t sector = 0; struct blk_zone *zones = NULL; unsigned int i, nreported = 0, nr_zones; - unsigned int zone_sectors; + sector_t zone_sectors; char *model, *emulated; int ret; @@ -658,7 +658,7 @@ int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw, u64 *bytenr_ret) { struct blk_zone zones[BTRFS_NR_SB_LOG_ZONES]; - unsigned int zone_sectors; + sector_t zone_sectors; u32 sb_zone; int ret; u8 zone_sectors_shift; diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c index 9a4871636c6c..8e9626d63976 100644 --- a/fs/btrfs/zstd.c +++ b/fs/btrfs/zstd.c @@ -688,10 +688,8 @@ int zstd_decompress(struct list_head *ws, unsigned char *data_in, bytes = min_t(unsigned long, destlen - pg_offset, workspace->out_buf.size - buf_offset); - kaddr = kmap_atomic(dest_page); - memcpy(kaddr + pg_offset, workspace->out_buf.dst + buf_offset, - bytes); - kunmap_atomic(kaddr); + memcpy_to_page(dest_page, pg_offset, + workspace->out_buf.dst + buf_offset, bytes); pg_offset += bytes; } |