From 06f3ef6e1705612b88aa0b6991e2ac3b8ed3f8ec Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 12 Jun 2023 18:09:04 -0700 Subject: xfs: don't deplete the reserve pool when trying to shrink the fs Every now and then, xfs/168 fails with this logged in dmesg: Reserve blocks depleted! Consider increasing reserve pool size. EXPERIMENTAL online shrink feature in use. Use at your own risk! Per-AG reservation for AG 1 failed. Filesystem may run out of space. Per-AG reservation for AG 1 failed. Filesystem may run out of space. Error -28 reserving per-AG metadata reserve pool. Corruption of in-memory data (0x8) detected at xfs_ag_shrink_space+0x23c/0x3b0 [xfs] (fs/xfs/libxfs/xfs_ag.c:1007). Shutting down filesystem. It's silly to deplete the reserved blocks pool just to shrink the filesystem, particularly since the fs goes down after that. Fixes: fb2fc1720185 ("xfs: support shrinking unused space in the last AG") Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_fsops.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 13851c0d640b..5f00527b8065 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -140,9 +140,13 @@ xfs_growfs_data_private( return -EINVAL; } - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata, - (delta > 0 ? XFS_GROWFS_SPACE_RES(mp) : -delta), 0, - XFS_TRANS_RESERVE, &tp); + if (delta > 0) + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata, + XFS_GROWFS_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, + &tp); + else + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata, -delta, 0, + 0, &tp); if (error) return error; -- cgit v1.2.3 From 61d7e8274cd84f574e686b24048ebf29bac861cc Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 12 Jun 2023 18:09:04 -0700 Subject: xfs: drop EXPERIMENTAL tag for large extent counts This feature has been baking in upstream for ~10mo with no bug reports. It seems to work fine here, let's get rid of the scary warnings? Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_super.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 4120bd1cba90..08106cbbad85 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1687,10 +1687,6 @@ xfs_fs_fill_super( goto out_filestream_unmount; } - if (xfs_has_large_extent_counts(mp)) - xfs_warn(mp, - "EXPERIMENTAL Large extent counts feature in use. Use at your own risk!"); - error = xfs_mountfs(mp); if (error) goto out_filestream_unmount; -- cgit v1.2.3 From b294349993716ca67c8bd2183c7d43c28df481fc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 12 Jun 2023 18:09:23 -0700 Subject: xfs: set FMODE_CAN_ODIRECT instead of a dummy direct_IO method Since commit a2ad63daa88b ("VFS: add FMODE_CAN_ODIRECT file flag") file systems can just set the FMODE_CAN_ODIRECT flag at open time instead of wiring up a dummy direct_IO method to indicate support for direct I/O. Do that for xfs so that noop_direct_IO can eventually be removed. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_aops.c | 2 -- fs/xfs/xfs_file.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 2ef78aa1d3f6..451942fb38ec 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -582,7 +582,6 @@ const struct address_space_operations xfs_address_space_operations = { .release_folio = iomap_release_folio, .invalidate_folio = iomap_invalidate_folio, .bmap = xfs_vm_bmap, - .direct_IO = noop_direct_IO, .migrate_folio = filemap_migrate_folio, .is_partially_uptodate = iomap_is_partially_uptodate, .error_remove_page = generic_error_remove_page, @@ -591,7 +590,6 @@ const struct address_space_operations xfs_address_space_operations = { const struct address_space_operations xfs_dax_aops = { .writepages = xfs_dax_writepages, - .direct_IO = noop_direct_IO, .dirty_folio = noop_dirty_folio, .swap_activate = xfs_iomap_swapfile_activate, }; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index aede746541f8..605587fefbd3 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1172,7 +1172,7 @@ xfs_file_open( if (xfs_is_shutdown(XFS_M(inode->i_sb))) return -EIO; file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC | - FMODE_DIO_PARALLEL_WRITE; + FMODE_DIO_PARALLEL_WRITE | FMODE_CAN_ODIRECT; return generic_file_open(inode, file); } -- cgit v1.2.3 From c3b880acadc95d6e019eae5d669e072afda24f1b Mon Sep 17 00:00:00 2001 From: Long Li Date: Tue, 13 Jun 2023 08:49:20 -0700 Subject: xfs: fix ag count overflow during growfs I found a corruption during growfs: XFS (loop0): Internal error agbno >= mp->m_sb.sb_agblocks at line 3661 of file fs/xfs/libxfs/xfs_alloc.c. Caller __xfs_free_extent+0x28e/0x3c0 CPU: 0 PID: 573 Comm: xfs_growfs Not tainted 6.3.0-rc7-next-20230420-00001-gda8c95746257 Call Trace: dump_stack_lvl+0x50/0x70 xfs_corruption_error+0x134/0x150 __xfs_free_extent+0x2c1/0x3c0 xfs_ag_extend_space+0x291/0x3e0 xfs_growfs_data+0xd72/0xe90 xfs_file_ioctl+0x5f9/0x14a0 __x64_sys_ioctl+0x13e/0x1c0 do_syscall_64+0x39/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd XFS (loop0): Corruption detected. Unmount and run xfs_repair XFS (loop0): Internal error xfs_trans_cancel at line 1097 of file fs/xfs/xfs_trans.c. Caller xfs_growfs_data+0x691/0xe90 CPU: 0 PID: 573 Comm: xfs_growfs Not tainted 6.3.0-rc7-next-20230420-00001-gda8c95746257 Call Trace: dump_stack_lvl+0x50/0x70 xfs_error_report+0x93/0xc0 xfs_trans_cancel+0x2c0/0x350 xfs_growfs_data+0x691/0xe90 xfs_file_ioctl+0x5f9/0x14a0 __x64_sys_ioctl+0x13e/0x1c0 do_syscall_64+0x39/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f2d86706577 The bug can be reproduced with the following sequence: # truncate -s 1073741824 xfs_test.img # mkfs.xfs -f -b size=1024 -d agcount=4 xfs_test.img # truncate -s 2305843009213693952 xfs_test.img # mount -o loop xfs_test.img /mnt/test # xfs_growfs -D 1125899907891200 /mnt/test The root cause is that during growfs, user space passed in a large value of newblcoks to xfs_growfs_data_private(), due to current sb_agblocks is too small, new AG count will exceed UINT_MAX. Because of AG number type is unsigned int and it would overflow, that caused nagcount much smaller than the actual value. During AG extent space, delta blocks in xfs_resizefs_init_new_ags() will much larger than the actual value due to incorrect nagcount, even exceed UINT_MAX. This will cause corruption and be detected in __xfs_free_extent. Fix it by growing the filesystem to up to the maximally allowed AGs and not return EINVAL when new AG count overflow. Signed-off-by: Long Li Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_fs.h | 2 ++ fs/xfs/xfs_fsops.c | 13 +++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 1cfd5bc6520a..9c60ebb328b4 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -257,6 +257,8 @@ typedef struct xfs_fsop_resblks { #define XFS_MAX_AG_BLOCKS (XFS_MAX_AG_BYTES / XFS_MIN_BLOCKSIZE) #define XFS_MAX_CRC_AG_BLOCKS (XFS_MAX_AG_BYTES / XFS_MIN_CRC_BLOCKSIZE) +#define XFS_MAX_AGNUMBER ((xfs_agnumber_t)(NULLAGNUMBER - 1)) + /* keep the maximum size under 2^31 by a small amount */ #define XFS_MAX_LOG_BYTES \ ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES) diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 5f00527b8065..839ca9a8d064 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -115,11 +115,16 @@ xfs_growfs_data_private( nb_div = nb; nb_mod = do_div(nb_div, mp->m_sb.sb_agblocks); - nagcount = nb_div + (nb_mod != 0); - if (nb_mod && nb_mod < XFS_MIN_AG_BLOCKS) { - nagcount--; - nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks; + if (nb_mod && nb_mod >= XFS_MIN_AG_BLOCKS) + nb_div++; + else if (nb_mod) + nb = nb_div * mp->m_sb.sb_agblocks; + + if (nb_div > XFS_MAX_AGNUMBER + 1) { + nb_div = XFS_MAX_AGNUMBER + 1; + nb = nb_div * mp->m_sb.sb_agblocks; } + nagcount = nb_div; delta = nb - mp->m_sb.sb_dblocks; /* * Reject filesystems with a single AG because they are not -- cgit v1.2.3