From 4adaa611020fa6ac65b0ac8db78276af4ec04e63 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 26 Mar 2013 13:07:00 -0400 Subject: Btrfs: fix race between mmap writes and compression Btrfs uses page_mkwrite to ensure stable pages during crc calculations and mmap workloads. We call clear_page_dirty_for_io before we do any crcs, and this forces any application with the file mapped to wait for the crc to finish before it is allowed to change the file. With compression on, the clear_page_dirty_for_io step is happening after we've compressed the pages. This means the applications might be changing the pages while we are compressing them, and some of those modifications might not hit the disk. This commit adds the clear_page_dirty_for_io before compression starts and makes sure to redirty the page if we have to fallback to uncompressed IO as well. Signed-off-by: Chris Mason Reported-by: Alexandre Oliva cc: stable@vger.kernel.org --- fs/btrfs/inode.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1f26888825e2..6a6e13c53086 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -353,6 +353,7 @@ static noinline int compress_file_range(struct inode *inode, int i; int will_compress; int compress_type = root->fs_info->compress_type; + int redirty = 0; /* if this is a small write inside eof, kick off a defrag */ if ((end - start + 1) < 16 * 1024 && @@ -415,6 +416,17 @@ again: if (BTRFS_I(inode)->force_compress) compress_type = BTRFS_I(inode)->force_compress; + /* + * we need to call clear_page_dirty_for_io on each + * page in the range. Otherwise applications with the file + * mmap'd can wander in and change the page contents while + * we are compressing them. + * + * If the compression fails for any reason, we set the pages + * dirty again later on. + */ + extent_range_clear_dirty_for_io(inode, start, end); + redirty = 1; ret = btrfs_compress_pages(compress_type, inode->i_mapping, start, total_compressed, pages, @@ -554,6 +566,8 @@ cleanup_and_bail_uncompressed: __set_page_dirty_nobuffers(locked_page); /* unlocked later on in the async handlers */ } + if (redirty) + extent_range_redirty_for_io(inode, start, end); add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0, BTRFS_COMPRESS_NONE); *num_added += 1; -- cgit v1.2.3 From 6e137ed3f30574f314733d4b7a86ea6523232b14 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 26 Mar 2013 15:26:55 -0400 Subject: Btrfs: fix space accounting for unlink and rename We are way over-reserving for unlink and rename. Rename is just some random huge number and unlink accounts for tree log operations that don't actually happen during unlink, not to mention the tree log doesn't take from the trans block rsv anyway so it's completely useless. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6a6e13c53086..8cab424c75f8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3693,11 +3693,9 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, * 1 for the dir item * 1 for the dir index * 1 for the inode ref - * 1 for the inode ref in the tree log - * 2 for the dir entries in the log * 1 for the inode */ - trans = btrfs_start_transaction(root, 8); + trans = btrfs_start_transaction(root, 5); if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) return trans; @@ -8141,7 +8139,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items * should cover the worst case number of items we'll modify. */ - trans = btrfs_start_transaction(root, 20); + trans = btrfs_start_transaction(root, 11); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out_notrans; -- cgit v1.2.3 From 39847c4d3d91f487f9ab3d083ee5d0f8419f105c Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 28 Mar 2013 08:08:20 +0000 Subject: Btrfs: fix wrong reservation of csums We reserve the space for csums only when we write data into a file, in the other cases, such as tree log, log replay, we don't do reservation, so we can use the reservation of the transaction handle just for the former. And for the latter, we should use the tree's own reservation. But the function - btrfs_csum_file_blocks() didn't differentiate between these two types of the cases, fix it. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/file-item.c | 2 -- fs/btrfs/inode.c | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index ec160202be3e..b7e529d2860f 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -728,7 +728,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, return -ENOMEM; sector_sum = sums->sums; - trans->adding_csums = 1; again: next_offset = (u64)-1; found_next = 0; @@ -899,7 +898,6 @@ next_sector: goto again; } out: - trans->adding_csums = 0; btrfs_free_path(path); return ret; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8cab424c75f8..b88381582dab 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1757,8 +1757,10 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, struct btrfs_ordered_sum *sum; list_for_each_entry(sum, list, list) { + trans->adding_csums = 1; btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root->fs_info->csum_root, sum); + trans->adding_csums = 0; } return 0; } -- cgit v1.2.3