summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2010-11-30 05:16:02 +0100
committerAlex Elder <aelder@sgi.com>2010-12-01 14:40:20 +0100
commit309c848002052edbec650075a1eb098b17c17f35 (patch)
tree7e3e38c9ebcfa539716298c0f8a0000b45cffd8e
parentxfs: push stale, pinned buffers on trylock failures (diff)
downloadlinux-309c848002052edbec650075a1eb098b17c17f35.tar.xz
linux-309c848002052edbec650075a1eb098b17c17f35.zip
xfs: delayed alloc blocks beyond EOF are valid after writeback
There is an assumption in the parts of XFS that flushing a dirty file will make all the delayed allocation blocks disappear from an inode. That is, that after calling xfs_flush_pages() then ip->i_delayed_blks will be zero. This is an invalid assumption as we may have specualtive preallocation beyond EOF and they are recorded in ip->i_delayed_blks. A flush of the dirty pages of an inode will not change the state of these blocks beyond EOF, so a non-zero deeelalloc block count after a flush is valid. The bmap code has an invalid ASSERT() that needs to be removed, and the swapext code has a bug in that while it swaps the data forks around, it fails to swap the i_delayed_blks counter associated with the fork and hence can get the block accounting wrong. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
-rw-r--r--fs/xfs/xfs_bmap.c9
-rw-r--r--fs/xfs/xfs_dfrag.c13
2 files changed, 20 insertions, 2 deletions
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 08b179fa9e8f..4111cd3966c7 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5471,8 +5471,13 @@ xfs_getbmap(
if (error)
goto out_unlock_iolock;
}
-
- ASSERT(ip->i_delayed_blks == 0);
+ /*
+ * even after flushing the inode, there can still be delalloc
+ * blocks on the inode beyond EOF due to speculative
+ * preallocation. These are not removed until the release
+ * function is called or the inode is inactivated. Hence we
+ * cannot assert here that ip->i_delayed_blks == 0.
+ */
}
lock = xfs_ilock_map_shared(ip);
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 3b9582c60a22..e60490bc00a6 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -377,6 +377,19 @@ xfs_swap_extents(
ip->i_d.di_format = tip->i_d.di_format;
tip->i_d.di_format = tmp;
+ /*
+ * The extents in the source inode could still contain speculative
+ * preallocation beyond EOF (e.g. the file is open but not modified
+ * while defrag is in progress). In that case, we need to copy over the
+ * number of delalloc blocks the data fork in the source inode is
+ * tracking beyond EOF so that when the fork is truncated away when the
+ * temporary inode is unlinked we don't underrun the i_delayed_blks
+ * counter on that inode.
+ */
+ ASSERT(tip->i_delayed_blks == 0);
+ tip->i_delayed_blks = ip->i_delayed_blks;
+ ip->i_delayed_blks = 0;
+
ilf_fields = XFS_ILOG_CORE;
switch(ip->i_d.di_format) {