summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2019-08-15 02:38:09 +0200
committerDarrick J. Wong <darrick.wong@oracle.com>2019-08-19 03:53:25 +0200
commit5d888b481e6abc726b36c86f0bf13af1583bb336 (patch)
tree902c57ac0fe14c9693ab36bd943de335eb23fc6d /fs
parentvfs: fix page locking deadlocks when deduping files (diff)
downloadlinux-5d888b481e6abc726b36c86f0bf13af1583bb336.tar.xz
linux-5d888b481e6abc726b36c86f0bf13af1583bb336.zip
xfs: fix reflink source file racing with directio writes
While trawling through the dedupe file comparison code trying to fix page deadlocking problems, Dave Chinner noticed that the reflink code only takes shared IOLOCK/MMAPLOCKs on the source file. Because page_mkwrite and directio writes do not take the EXCL versions of those locks, this means that reflink can race with writer processes. For pure remapping this can lead to undefined behavior and file corruption; for dedupe this means that we cannot be sure that the contents are identical when we decide to go ahead with the remapping. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/xfs_reflink.c63
1 files changed, 37 insertions, 26 deletions
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index c4ec7afd1170..edbe37b7f636 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1190,11 +1190,11 @@ xfs_reflink_remap_blocks(
}
/*
- * Grab the exclusive iolock for a data copy from src to dest, making
- * sure to abide vfs locking order (lowest pointer value goes first) and
- * breaking the pnfs layout leases on dest before proceeding. The loop
- * is needed because we cannot call the blocking break_layout() with the
- * src iolock held, and therefore have to back out both locks.
+ * Grab the exclusive iolock for a data copy from src to dest, making sure to
+ * abide vfs locking order (lowest pointer value goes first) and breaking the
+ * layout leases before proceeding. The loop is needed because we cannot call
+ * the blocking break_layout() with the iolocks held, and therefore have to
+ * back out both locks.
*/
static int
xfs_iolock_two_inodes_and_break_layout(
@@ -1203,33 +1203,44 @@ xfs_iolock_two_inodes_and_break_layout(
{
int error;
-retry:
- if (src < dest) {
- inode_lock_shared(src);
- inode_lock_nested(dest, I_MUTEX_NONDIR2);
- } else {
- /* src >= dest */
- inode_lock(dest);
- }
+ if (src > dest)
+ swap(src, dest);
- error = break_layout(dest, false);
- if (error == -EWOULDBLOCK) {
- inode_unlock(dest);
- if (src < dest)
- inode_unlock_shared(src);
+retry:
+ /* Wait to break both inodes' layouts before we start locking. */
+ error = break_layout(src, true);
+ if (error)
+ return error;
+ if (src != dest) {
error = break_layout(dest, true);
if (error)
return error;
- goto retry;
}
+
+ /* Lock one inode and make sure nobody got in and leased it. */
+ inode_lock(src);
+ error = break_layout(src, false);
if (error) {
+ inode_unlock(src);
+ if (error == -EWOULDBLOCK)
+ goto retry;
+ return error;
+ }
+
+ if (src == dest)
+ return 0;
+
+ /* Lock the other inode and make sure nobody got in and leased it. */
+ inode_lock_nested(dest, I_MUTEX_NONDIR2);
+ error = break_layout(dest, false);
+ if (error) {
+ inode_unlock(src);
inode_unlock(dest);
- if (src < dest)
- inode_unlock_shared(src);
+ if (error == -EWOULDBLOCK)
+ goto retry;
return error;
}
- if (src > dest)
- inode_lock_shared_nested(src, I_MUTEX_NONDIR2);
+
return 0;
}
@@ -1247,10 +1258,10 @@ xfs_reflink_remap_unlock(
xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
if (!same_inode)
- xfs_iunlock(src, XFS_MMAPLOCK_SHARED);
+ xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
inode_unlock(inode_out);
if (!same_inode)
- inode_unlock_shared(inode_in);
+ inode_unlock(inode_in);
}
/*
@@ -1325,7 +1336,7 @@ xfs_reflink_remap_prep(
if (same_inode)
xfs_ilock(src, XFS_MMAPLOCK_EXCL);
else
- xfs_lock_two_inodes(src, XFS_MMAPLOCK_SHARED, dest,
+ xfs_lock_two_inodes(src, XFS_MMAPLOCK_EXCL, dest,
XFS_MMAPLOCK_EXCL);
/* Check file eligibility and prepare for block sharing. */