summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/file.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-12-12 20:15:23 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2014-12-12 20:15:23 +0100
commitbdeb03cada1c305346505c48e5b1dab37e9acc4e (patch)
treeecbfda926e8b5b621f37150d509f176886ac0d82 /fs/btrfs/file.c
parentMerge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jik... (diff)
parentMerge branch 'raid56-scrub-replace' of git://github.com/miaoxie/linux-btrfs i... (diff)
downloadlinux-bdeb03cada1c305346505c48e5b1dab37e9acc4e.tar.xz
linux-bdeb03cada1c305346505c48e5b1dab37e9acc4e.zip
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs update from Chris Mason: "From a feature point of view, most of the code here comes from Miao Xie and others at Fujitsu to implement scrubbing and replacing devices on raid56. This has been in development for a while, and it's a big improvement. Filipe and Josef have a great assortment of fixes, many of which solve problems corruptions either after a crash or in error conditions. I still have a round two from Filipe for next week that solves corruptions with discard and block group removal" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (62 commits) Btrfs: make get_caching_control unconditionally return the ctl Btrfs: fix unprotected deletion from pending_chunks list Btrfs: fix fs mapping extent map leak Btrfs: fix memory leak after block remove + trimming Btrfs: make btrfs_abort_transaction consider existence of new block groups Btrfs: fix race between writing free space cache and trimming Btrfs: fix race between fs trimming and block group remove/allocation Btrfs, replace: enable dev-replace for raid56 Btrfs: fix freeing used extents after removing empty block group Btrfs: fix crash caused by block group removal Btrfs: fix invalid block group rbtree access after bg is removed Btrfs, raid56: fix use-after-free problem in the final device replace procedure on raid56 Btrfs, replace: write raid56 parity into the replace target device Btrfs, replace: write dirty pages into the replace target device Btrfs, raid56: support parity scrub on raid56 Btrfs, raid56: use a variant to record the operation type Btrfs, scrub: repair the common data on RAID5/6 if it is corrupted Btrfs, raid56: don't change bbio and raid_map Btrfs: remove unnecessary code of stripe_index assignment in __btrfs_map_block Btrfs: remove noused bbio_ret in __btrfs_map_block in condition ...
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r--fs/btrfs/file.c51
1 files changed, 41 insertions, 10 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index a18ceabd99a8..e4090259569b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1428,7 +1428,7 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos,
u64 num_bytes;
int ret;
- ret = btrfs_start_nocow_write(root);
+ ret = btrfs_start_write_no_snapshoting(root);
if (!ret)
return -ENOSPC;
@@ -1451,7 +1451,7 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos,
ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL);
if (ret <= 0) {
ret = 0;
- btrfs_end_nocow_write(root);
+ btrfs_end_write_no_snapshoting(root);
} else {
*write_bytes = min_t(size_t, *write_bytes ,
num_bytes - pos + lockstart);
@@ -1543,7 +1543,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
btrfs_free_reserved_data_space(inode,
reserve_bytes);
else
- btrfs_end_nocow_write(root);
+ btrfs_end_write_no_snapshoting(root);
break;
}
@@ -1632,7 +1632,7 @@ again:
release_bytes = 0;
if (only_release_metadata)
- btrfs_end_nocow_write(root);
+ btrfs_end_write_no_snapshoting(root);
if (only_release_metadata && copied > 0) {
u64 lockstart = round_down(pos, root->sectorsize);
@@ -1661,7 +1661,7 @@ again:
if (release_bytes) {
if (only_release_metadata) {
- btrfs_end_nocow_write(root);
+ btrfs_end_write_no_snapshoting(root);
btrfs_delalloc_release_metadata(inode, release_bytes);
} else {
btrfs_delalloc_release_space(inode, release_bytes);
@@ -1676,6 +1676,7 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
loff_t pos)
{
struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file);
ssize_t written;
ssize_t written_buffered;
loff_t endbyte;
@@ -1692,8 +1693,15 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
err = written_buffered;
goto out;
}
+ /*
+ * Ensure all data is persisted. We want the next direct IO read to be
+ * able to read what was just written.
+ */
endbyte = pos + written_buffered - 1;
- err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte);
+ err = btrfs_fdatawrite_range(inode, pos, endbyte);
+ if (err)
+ goto out;
+ err = filemap_fdatawait_range(inode->i_mapping, pos, endbyte);
if (err)
goto out;
written += written_buffered;
@@ -1854,10 +1862,7 @@ static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
int ret;
atomic_inc(&BTRFS_I(inode)->sync_writers);
- ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
- if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
- &BTRFS_I(inode)->runtime_flags))
- ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
+ ret = btrfs_fdatawrite_range(inode, start, end);
atomic_dec(&BTRFS_I(inode)->sync_writers);
return ret;
@@ -2810,3 +2815,29 @@ int btrfs_auto_defrag_init(void)
return 0;
}
+
+int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end)
+{
+ int ret;
+
+ /*
+ * So with compression we will find and lock a dirty page and clear the
+ * first one as dirty, setup an async extent, and immediately return
+ * with the entire range locked but with nobody actually marked with
+ * writeback. So we can't just filemap_write_and_wait_range() and
+ * expect it to work since it will just kick off a thread to do the
+ * actual work. So we need to call filemap_fdatawrite_range _again_
+ * since it will wait on the page lock, which won't be unlocked until
+ * after the pages have been marked as writeback and so we're good to go
+ * from there. We have to do this otherwise we'll miss the ordered
+ * extents and that results in badness. Please Josef, do not think you
+ * know better and pull this out at some point in the future, it is
+ * right and you are wrong.
+ */
+ ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
+ if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
+ &BTRFS_I(inode)->runtime_flags))
+ ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
+
+ return ret;
+}