diff options
Diffstat (limited to 'fs/xfs/xfs_reflink.c')
-rw-r--r-- | fs/xfs/xfs_reflink.c | 355 |
1 files changed, 163 insertions, 192 deletions
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 107bf2a2f344..aac83f9d6107 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -179,7 +179,7 @@ xfs_reflink_trim_around_shared( int error = 0; /* Holes, unwritten, and delalloc extents cannot be shared */ - if (!xfs_is_cow_inode(ip) || !xfs_bmap_is_real_extent(irec)) { + if (!xfs_is_cow_inode(ip) || !xfs_bmap_is_written_extent(irec)) { *shared = false; return 0; } @@ -655,7 +655,7 @@ xfs_reflink_end_cow_extent( * preallocations can leak into the range we are called upon, and we * need to skip them. */ - if (!xfs_bmap_is_real_extent(&got)) { + if (!xfs_bmap_is_written_extent(&got)) { *end_fsb = del.br_startoff; goto out_cancel; } @@ -984,40 +984,28 @@ xfs_reflink_ag_has_free_space( } /* - * Unmap a range of blocks from a file, then map other blocks into the hole. - * The range to unmap is (destoff : destoff + srcioff + irec->br_blockcount). - * The extent irec is mapped into dest at irec->br_startoff. + * Remap the given extent into the file. The dmap blockcount will be set to + * the number of blocks that were actually remapped. */ STATIC int xfs_reflink_remap_extent( struct xfs_inode *ip, - struct xfs_bmbt_irec *irec, - xfs_fileoff_t destoff, + struct xfs_bmbt_irec *dmap, xfs_off_t new_isize) { + struct xfs_bmbt_irec smap; struct xfs_mount *mp = ip->i_mount; - bool real_extent = xfs_bmap_is_real_extent(irec); struct xfs_trans *tp; - unsigned int resblks; - struct xfs_bmbt_irec uirec; - xfs_filblks_t rlen; - xfs_filblks_t unmap_len; xfs_off_t newlen; + int64_t qres, qdelta; + unsigned int resblks; + bool smap_real; + bool dmap_written = xfs_bmap_is_written_extent(dmap); + int nimaps; int error; - unmap_len = irec->br_startoff + irec->br_blockcount - destoff; - trace_xfs_reflink_punch_range(ip, destoff, unmap_len); - - /* No reflinking if we're low on space */ - if (real_extent) { - error = xfs_reflink_ag_has_free_space(mp, - XFS_FSB_TO_AGNO(mp, irec->br_startblock)); - if (error) - goto out; - } - /* Start a rolling transaction to switch the mappings */ - resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK); + resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); if (error) goto out; @@ -1025,87 +1013,147 @@ xfs_reflink_remap_extent( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); - /* If we're not just clearing space, then do we have enough quota? */ - if (real_extent) { - error = xfs_trans_reserve_quota_nblks(tp, ip, - irec->br_blockcount, 0, XFS_QMOPT_RES_REGBLKS); + /* + * Read what's currently mapped in the destination file into smap. + * If smap isn't a hole, we will have to remove it before we can add + * dmap to the destination file. + */ + nimaps = 1; + error = xfs_bmapi_read(ip, dmap->br_startoff, dmap->br_blockcount, + &smap, &nimaps, 0); + if (error) + goto out_cancel; + ASSERT(nimaps == 1 && smap.br_startoff == dmap->br_startoff); + smap_real = xfs_bmap_is_real_extent(&smap); + + /* + * We can only remap as many blocks as the smaller of the two extent + * maps, because we can only remap one extent at a time. + */ + dmap->br_blockcount = min(dmap->br_blockcount, smap.br_blockcount); + ASSERT(dmap->br_blockcount == smap.br_blockcount); + + trace_xfs_reflink_remap_extent_dest(ip, &smap); + + /* + * Two extents mapped to the same physical block must not have + * different states; that's filesystem corruption. Move on to the next + * extent if they're both holes or both the same physical extent. + */ + if (dmap->br_startblock == smap.br_startblock) { + if (dmap->br_state != smap.br_state) + error = -EFSCORRUPTED; + goto out_cancel; + } + + /* If both extents are unwritten, leave them alone. */ + if (dmap->br_state == XFS_EXT_UNWRITTEN && + smap.br_state == XFS_EXT_UNWRITTEN) + goto out_cancel; + + /* No reflinking if the AG of the dest mapping is low on space. */ + if (dmap_written) { + error = xfs_reflink_ag_has_free_space(mp, + XFS_FSB_TO_AGNO(mp, dmap->br_startblock)); if (error) goto out_cancel; } - trace_xfs_reflink_remap(ip, irec->br_startoff, - irec->br_blockcount, irec->br_startblock); - - /* Unmap the old blocks in the data fork. */ - rlen = unmap_len; - while (rlen) { - ASSERT(tp->t_firstblock == NULLFSBLOCK); - error = __xfs_bunmapi(tp, ip, destoff, &rlen, 0, 1); + /* + * Compute quota reservation if we think the quota block counter for + * this file could increase. + * + * Adding a written extent to the extent map can cause a bmbt split, + * and removing a mapped extent from the extent can cause a bmbt split. + * The two operations cannot both cause a split since they operate on + * the same index in the bmap btree, so we only need a reservation for + * one bmbt split if either thing is happening. + * + * If we are mapping a written extent into the file, we need to have + * enough quota block count reservation to handle the blocks in that + * extent. We log only the delta to the quota block counts, so if the + * extent we're unmapping also has blocks allocated to it, we don't + * need a quota reservation for the extent itself. + * + * Note that if we're replacing a delalloc reservation with a written + * extent, we have to take the full quota reservation because removing + * the delalloc reservation gives the block count back to the quota + * count. This is suboptimal, but the VFS flushed the dest range + * before we started. That should have removed all the delalloc + * reservations, but we code defensively. + */ + qres = qdelta = 0; + if (smap_real || dmap_written) + qres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); + if (!smap_real && dmap_written) + qres += dmap->br_blockcount; + if (qres > 0) { + error = xfs_trans_reserve_quota_nblks(tp, ip, qres, 0, + XFS_QMOPT_RES_REGBLKS); if (error) goto out_cancel; + } + if (smap_real) { /* - * Trim the extent to whatever got unmapped. - * Remember, bunmapi works backwards. + * If the extent we're unmapping is backed by storage (written + * or not), unmap the extent and drop its refcount. */ - uirec.br_startblock = irec->br_startblock + rlen; - uirec.br_startoff = irec->br_startoff + rlen; - uirec.br_blockcount = unmap_len - rlen; - uirec.br_state = irec->br_state; - unmap_len = rlen; - - /* If this isn't a real mapping, we're done. */ - if (!real_extent || uirec.br_blockcount == 0) - goto next_extent; + xfs_bmap_unmap_extent(tp, ip, &smap); + xfs_refcount_decrease_extent(tp, &smap); + qdelta -= smap.br_blockcount; + } else if (smap.br_startblock == DELAYSTARTBLOCK) { + xfs_filblks_t len = smap.br_blockcount; - trace_xfs_reflink_remap(ip, uirec.br_startoff, - uirec.br_blockcount, uirec.br_startblock); - - /* Update the refcount tree */ - xfs_refcount_increase_extent(tp, &uirec); - - /* Map the new blocks into the data fork. */ - xfs_bmap_map_extent(tp, ip, &uirec); + /* + * If the extent we're unmapping is a delalloc reservation, + * we can use the regular bunmapi function to release the + * incore state. Dropping the delalloc reservation takes care + * of the quota reservation for us. + */ + error = __xfs_bunmapi(NULL, ip, smap.br_startoff, &len, 0, 1); + if (error) + goto out_cancel; + ASSERT(len == 0); + } - /* Update quota accounting. */ - xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, - uirec.br_blockcount); + /* + * If the extent we're sharing is backed by written storage, increase + * its refcount and map it into the file. + */ + if (dmap_written) { + xfs_refcount_increase_extent(tp, dmap); + xfs_bmap_map_extent(tp, ip, dmap); + qdelta += dmap->br_blockcount; + } - /* Update dest isize if needed. */ - newlen = XFS_FSB_TO_B(mp, - uirec.br_startoff + uirec.br_blockcount); - newlen = min_t(xfs_off_t, newlen, new_isize); - if (newlen > i_size_read(VFS_I(ip))) { - trace_xfs_reflink_update_inode_size(ip, newlen); - i_size_write(VFS_I(ip), newlen); - ip->i_d.di_size = newlen; - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - } + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, qdelta); -next_extent: - /* Process all the deferred stuff. */ - error = xfs_defer_finish(&tp); - if (error) - goto out_cancel; + /* Update dest isize if needed. */ + newlen = XFS_FSB_TO_B(mp, dmap->br_startoff + dmap->br_blockcount); + newlen = min_t(xfs_off_t, newlen, new_isize); + if (newlen > i_size_read(VFS_I(ip))) { + trace_xfs_reflink_update_inode_size(ip, newlen); + i_size_write(VFS_I(ip), newlen); + ip->i_d.di_size = newlen; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); } + /* Commit everything and unlock. */ error = xfs_trans_commit(tp); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - if (error) - goto out; - return 0; + goto out_unlock; out_cancel: xfs_trans_cancel(tp); +out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); out: - trace_xfs_reflink_remap_extent_error(ip, error, _RET_IP_); + if (error) + trace_xfs_reflink_remap_extent_error(ip, error, _RET_IP_); return error; } -/* - * Iteratively remap one file's extents (and holes) to another's. - */ +/* Remap a range of one file to the other. */ int xfs_reflink_remap_blocks( struct xfs_inode *src, @@ -1116,25 +1164,22 @@ xfs_reflink_remap_blocks( loff_t *remapped) { struct xfs_bmbt_irec imap; - xfs_fileoff_t srcoff; - xfs_fileoff_t destoff; + struct xfs_mount *mp = src->i_mount; + xfs_fileoff_t srcoff = XFS_B_TO_FSBT(mp, pos_in); + xfs_fileoff_t destoff = XFS_B_TO_FSBT(mp, pos_out); xfs_filblks_t len; - xfs_filblks_t range_len; xfs_filblks_t remapped_len = 0; xfs_off_t new_isize = pos_out + remap_len; int nimaps; int error = 0; - destoff = XFS_B_TO_FSBT(src->i_mount, pos_out); - srcoff = XFS_B_TO_FSBT(src->i_mount, pos_in); - len = XFS_B_TO_FSB(src->i_mount, remap_len); + len = min_t(xfs_filblks_t, XFS_B_TO_FSB(mp, remap_len), + XFS_MAX_FILEOFF); - /* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */ - while (len) { - uint lock_mode; + trace_xfs_reflink_remap_blocks(src, srcoff, len, dest, destoff); - trace_xfs_reflink_remap_blocks_loop(src, srcoff, len, - dest, destoff); + while (len > 0) { + unsigned int lock_mode; /* Read extent from the source file */ nimaps = 1; @@ -1143,18 +1188,25 @@ xfs_reflink_remap_blocks( xfs_iunlock(src, lock_mode); if (error) break; - ASSERT(nimaps == 1); - - trace_xfs_reflink_remap_imap(src, srcoff, len, XFS_DATA_FORK, - &imap); + /* + * The caller supposedly flushed all dirty pages in the source + * file range, which means that writeback should have allocated + * or deleted all delalloc reservations in that range. If we + * find one, that's a good sign that something is seriously + * wrong here. + */ + ASSERT(nimaps == 1 && imap.br_startoff == srcoff); + if (imap.br_startblock == DELAYSTARTBLOCK) { + ASSERT(imap.br_startblock != DELAYSTARTBLOCK); + error = -EFSCORRUPTED; + break; + } - /* Translate imap into the destination file. */ - range_len = imap.br_startoff + imap.br_blockcount - srcoff; - imap.br_startoff += destoff - srcoff; + trace_xfs_reflink_remap_extent_src(src, &imap); - /* Clear dest from destoff to the end of imap and map it in. */ - error = xfs_reflink_remap_extent(dest, &imap, destoff, - new_isize); + /* Remap into the destination file at the given offset. */ + imap.br_startoff = destoff; + error = xfs_reflink_remap_extent(dest, &imap, new_isize); if (error) break; @@ -1164,10 +1216,10 @@ xfs_reflink_remap_blocks( } /* Advance drange/srange */ - srcoff += range_len; - destoff += range_len; - len -= range_len; - remapped_len += range_len; + srcoff += imap.br_blockcount; + destoff += imap.br_blockcount; + len -= imap.br_blockcount; + remapped_len += imap.br_blockcount; } if (error) @@ -1178,81 +1230,6 @@ xfs_reflink_remap_blocks( } /* - * Grab the exclusive iolock for a data copy from src to dest, making sure to - * abide vfs locking order (lowest pointer value goes first) and breaking the - * layout leases before proceeding. The loop is needed because we cannot call - * the blocking break_layout() with the iolocks held, and therefore have to - * back out both locks. - */ -static int -xfs_iolock_two_inodes_and_break_layout( - struct inode *src, - struct inode *dest) -{ - int error; - - if (src > dest) - swap(src, dest); - -retry: - /* Wait to break both inodes' layouts before we start locking. */ - error = break_layout(src, true); - if (error) - return error; - if (src != dest) { - error = break_layout(dest, true); - if (error) - return error; - } - - /* Lock one inode and make sure nobody got in and leased it. */ - inode_lock(src); - error = break_layout(src, false); - if (error) { - inode_unlock(src); - if (error == -EWOULDBLOCK) - goto retry; - return error; - } - - if (src == dest) - return 0; - - /* Lock the other inode and make sure nobody got in and leased it. */ - inode_lock_nested(dest, I_MUTEX_NONDIR2); - error = break_layout(dest, false); - if (error) { - inode_unlock(src); - inode_unlock(dest); - if (error == -EWOULDBLOCK) - goto retry; - return error; - } - - return 0; -} - -/* Unlock both inodes after they've been prepped for a range clone. */ -void -xfs_reflink_remap_unlock( - struct file *file_in, - struct file *file_out) -{ - struct inode *inode_in = file_inode(file_in); - struct xfs_inode *src = XFS_I(inode_in); - struct inode *inode_out = file_inode(file_out); - struct xfs_inode *dest = XFS_I(inode_out); - bool same_inode = (inode_in == inode_out); - - xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); - if (!same_inode) - xfs_iunlock(src, XFS_MMAPLOCK_EXCL); - inode_unlock(inode_out); - if (!same_inode) - inode_unlock(inode_in); -} - -/* * If we're reflinking to a point past the destination file's EOF, we must * zero any speculative post-EOF preallocations that sit between the old EOF * and the destination file offset. @@ -1314,18 +1291,12 @@ xfs_reflink_remap_prep( struct xfs_inode *src = XFS_I(inode_in); struct inode *inode_out = file_inode(file_out); struct xfs_inode *dest = XFS_I(inode_out); - bool same_inode = (inode_in == inode_out); - ssize_t ret; + int ret; /* Lock both files against IO */ - ret = xfs_iolock_two_inodes_and_break_layout(inode_in, inode_out); + ret = xfs_ilock2_io_mmap(src, dest); if (ret) return ret; - if (same_inode) - xfs_ilock(src, XFS_MMAPLOCK_EXCL); - else - xfs_lock_two_inodes(src, XFS_MMAPLOCK_EXCL, dest, - XFS_MMAPLOCK_EXCL); /* Check file eligibility and prepare for block sharing. */ ret = -EINVAL; @@ -1339,7 +1310,7 @@ xfs_reflink_remap_prep( ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, len, remap_flags); - if (ret < 0 || *len == 0) + if (ret || *len == 0) goto out_unlock; /* Attach dquots to dest inode before changing block map */ @@ -1374,9 +1345,9 @@ xfs_reflink_remap_prep( if (ret) goto out_unlock; - return 1; + return 0; out_unlock: - xfs_reflink_remap_unlock(file_in, file_out); + xfs_iunlock2_io_mmap(src, dest); return ret; } |