diff options
author | Darrick J. Wong <darrick.wong@oracle.com> | 2019-12-11 22:19:07 +0100 |
---|---|---|
committer | Darrick J. Wong <darrick.wong@oracle.com> | 2019-12-17 20:19:28 +0100 |
commit | b1de6fc7520fe12949c070af0e8c0e4044cd3420 (patch) | |
tree | cc0344349ff164c80715efecd102d281bc255b91 | |
parent | xfs: stabilize insert range start boundary to avoid COW writeback race (diff) | |
download | linux-b1de6fc7520fe12949c070af0e8c0e4044cd3420.tar.xz linux-b1de6fc7520fe12949c070af0e8c0e4044cd3420.zip |
xfs: fix log reservation overflows when allocating large rt extents
Omar Sandoval reported that a 4G fallocate on the realtime device causes
filesystem shutdowns due to a log reservation overflow that happens when
we log the rtbitmap updates. Factor rtbitmap/rtsummary updates into the
the tr_write and tr_itruncate log reservation calculation.
"The following reproducer results in a transaction log overrun warning
for me:
mkfs.xfs -f -r rtdev=/dev/vdc -d rtinherit=1 -m reflink=0 /dev/vdb
mount -o rtdev=/dev/vdc /dev/vdb /mnt
fallocate -l 4G /mnt/foo
Reported-by: Omar Sandoval <osandov@osandov.com>
Tested-by: Omar Sandoval <osandov@osandov.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
-rw-r--r-- | fs/xfs/libxfs/xfs_trans_resv.c | 96 |
1 files changed, 77 insertions, 19 deletions
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index c55cd9a3dec9..824073a839ac 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -197,6 +197,24 @@ xfs_calc_inode_chunk_res( } /* + * Per-extent log reservation for the btree changes involved in freeing or + * allocating a realtime extent. We have to be able to log as many rtbitmap + * blocks as needed to mark inuse MAXEXTLEN blocks' worth of realtime extents, + * as well as the realtime summary block. + */ +unsigned int +xfs_rtalloc_log_count( + struct xfs_mount *mp, + unsigned int num_ops) +{ + unsigned int blksz = XFS_FSB_TO_B(mp, 1); + unsigned int rtbmp_bytes; + + rtbmp_bytes = (MAXEXTLEN / mp->m_sb.sb_rextsize) / NBBY; + return (howmany(rtbmp_bytes, blksz) + 1) * num_ops; +} + +/* * Various log reservation values. * * These are based on the size of the file system block because that is what @@ -218,13 +236,21 @@ xfs_calc_inode_chunk_res( /* * In a write transaction we can allocate a maximum of 2 - * extents. This gives: + * extents. This gives (t1): * the inode getting the new extents: inode size * the inode's bmap btree: max depth * block size * the agfs of the ags from which the extents are allocated: 2 * sector * the superblock free block counter: sector size * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size - * And the bmap_finish transaction can free bmap blocks in a join: + * Or, if we're writing to a realtime file (t2): + * the inode getting the new extents: inode size + * the inode's bmap btree: max depth * block size + * the agfs of the ags from which the extents are allocated: 2 * sector + * the superblock free block counter: sector size + * the realtime bitmap: ((MAXEXTLEN / rtextsize) / NBBY) bytes + * the realtime summary: 1 block + * the allocation btrees: 2 trees * (2 * max depth - 1) * block size + * And the bmap_finish transaction can free bmap blocks in a join (t3): * the agfs of the ags containing the blocks: 2 * sector size * the agfls of the ags containing the blocks: 2 * sector size * the super block free block counter: sector size @@ -234,40 +260,72 @@ STATIC uint xfs_calc_write_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + - max((xfs_calc_inode_res(mp, 1) + + unsigned int t1, t2, t3; + unsigned int blksz = XFS_FSB_TO_B(mp, 1); + + t1 = xfs_calc_inode_res(mp, 1) + + xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), blksz) + + xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz); + + if (xfs_sb_version_hasrealtime(&mp->m_sb)) { + t2 = xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), - XFS_FSB_TO_B(mp, 1)) + + blksz) + xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), - XFS_FSB_TO_B(mp, 1))), - (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), - XFS_FSB_TO_B(mp, 1)))); + xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 1), blksz) + + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), blksz); + } else { + t2 = 0; + } + + t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz); + + return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3); } /* - * In truncating a file we free up to two extents at once. We can modify: + * In truncating a file we free up to two extents at once. We can modify (t1): * the inode being truncated: inode size * the inode's bmap btree: (max depth + 1) * block size - * And the bmap_finish transaction can free the blocks and bmap blocks: + * And the bmap_finish transaction can free the blocks and bmap blocks (t2): * the agf for each of the ags: 4 * sector size * the agfl for each of the ags: 4 * sector size * the super block to reflect the freed blocks: sector size * worst case split in allocation btrees per extent assuming 4 extents: * 4 exts * 2 trees * (2 * max depth - 1) * block size + * Or, if it's a realtime file (t3): + * the agf for each of the ags: 2 * sector size + * the agfl for each of the ags: 2 * sector size + * the super block to reflect the freed blocks: sector size + * the realtime bitmap: 2 exts * ((MAXEXTLEN / rtextsize) / NBBY) bytes + * the realtime summary: 2 exts * 1 block + * worst case split in allocation btrees per extent assuming 2 extents: + * 2 exts * 2 trees * (2 * max depth - 1) * block size */ STATIC uint xfs_calc_itruncate_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + - max((xfs_calc_inode_res(mp, 1) + - xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, - XFS_FSB_TO_B(mp, 1))), - (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), - XFS_FSB_TO_B(mp, 1)))); + unsigned int t1, t2, t3; + unsigned int blksz = XFS_FSB_TO_B(mp, 1); + + t1 = xfs_calc_inode_res(mp, 1) + + xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz); + + t2 = xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), blksz); + + if (xfs_sb_version_hasrealtime(&mp->m_sb)) { + t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 2), blksz) + + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz); + } else { + t3 = 0; + } + + return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3); } /* |