summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2024-10-03 17:09:48 +0200
committerChristian Brauner <brauner@kernel.org>2024-10-07 13:51:47 +0200
commit50793801fc7f6d08def48754fb0f0706b0cfc394 (patch)
tree7383541e667b3aa308eb87d2ead61b419bb992a8 /fs
parentfsdax: remove zeroing code from dax_unshare_iter (diff)
downloadlinux-50793801fc7f6d08def48754fb0f0706b0cfc394.tar.xz
linux-50793801fc7f6d08def48754fb0f0706b0cfc394.zip
fsdax: dax_unshare_iter needs to copy entire blocks
The code that copies data from srcmap to iomap in dax_unshare_iter is very very broken, which bfoster's recent fsx changes have exposed. If the pos and len passed to dax_file_unshare are not aligned to an fsblock boundary, the iter pos and length in the _iter function will reflect this unalignment. dax_iomap_direct_access always returns a pointer to the start of the kmapped fsdax page, even if its pos argument is in the middle of that page. This is catastrophic for data integrity when iter->pos is not aligned to a page, because daddr/saddr do not point to the same byte in the file as iter->pos. Hence we corrupt user data by copying it to the wrong place. If iter->pos + iomap_length() in the _iter function not aligned to a page, then we fail to copy a full block, and only partially populate the destination block. This is catastrophic for data confidentiality because we expose stale pmem contents. Fix both of these issues by aligning copy_pos/copy_len to a page boundary (remember, this is fsdax so 1 fsblock == 1 base page) so that we always copy full blocks. We're not done yet -- there's no call to invalidate_inode_pages2_range, so programs that have the file range mmap'd will continue accessing the old memory mapping after the file metadata updates have completed. Be careful with the return value -- if the unshare succeeds, we still need to return the number of bytes that the iomap iter thinks we're operating on. Cc: ruansy.fnst@fujitsu.com Fixes: d984648e428b ("fsdax,xfs: port unshare to fsdax") Signed-off-by: Darrick J. Wong <djwong@kernel.org> Link: https://lore.kernel.org/r/172796813328.1131942.16777025316348797355.stgit@frogsfrogsfrogs Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Christian Brauner <brauner@kernel.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/dax.c34
1 files changed, 27 insertions, 7 deletions
diff --git a/fs/dax.c b/fs/dax.c
index 9fbbdaa784b4..21b47402b3dc 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1262,26 +1262,46 @@ static s64 dax_unshare_iter(struct iomap_iter *iter)
{
struct iomap *iomap = &iter->iomap;
const struct iomap *srcmap = iomap_iter_srcmap(iter);
- loff_t pos = iter->pos;
- loff_t length = iomap_length(iter);
+ loff_t copy_pos = iter->pos;
+ u64 copy_len = iomap_length(iter);
+ u32 mod;
int id = 0;
s64 ret = 0;
void *daddr = NULL, *saddr = NULL;
if (!iomap_want_unshare_iter(iter))
- return length;
+ return iomap_length(iter);
+
+ /*
+ * Extend the file range to be aligned to fsblock/pagesize, because
+ * we need to copy entire blocks, not just the byte range specified.
+ * Invalidate the mapping because we're about to CoW.
+ */
+ mod = offset_in_page(copy_pos);
+ if (mod) {
+ copy_len += mod;
+ copy_pos -= mod;
+ }
+
+ mod = offset_in_page(copy_pos + copy_len);
+ if (mod)
+ copy_len += PAGE_SIZE - mod;
+
+ invalidate_inode_pages2_range(iter->inode->i_mapping,
+ copy_pos >> PAGE_SHIFT,
+ (copy_pos + copy_len - 1) >> PAGE_SHIFT);
id = dax_read_lock();
- ret = dax_iomap_direct_access(iomap, pos, length, &daddr, NULL);
+ ret = dax_iomap_direct_access(iomap, copy_pos, copy_len, &daddr, NULL);
if (ret < 0)
goto out_unlock;
- ret = dax_iomap_direct_access(srcmap, pos, length, &saddr, NULL);
+ ret = dax_iomap_direct_access(srcmap, copy_pos, copy_len, &saddr, NULL);
if (ret < 0)
goto out_unlock;
- if (copy_mc_to_kernel(daddr, saddr, length) == 0)
- ret = length;
+ if (copy_mc_to_kernel(daddr, saddr, copy_len) == 0)
+ ret = iomap_length(iter);
else
ret = -EIO;