diff options
Diffstat (limited to 'fs/xfs/xfs_file.c')
-rw-r--r-- | fs/xfs/xfs_file.c | 70 |
1 files changed, 45 insertions, 25 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 3c0749ab9e40..cc3cfb12df53 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -119,8 +119,8 @@ xfs_dir_fsync( return xfs_log_force_inode(ip); } -static xfs_lsn_t -xfs_fsync_lsn( +static xfs_csn_t +xfs_fsync_seq( struct xfs_inode *ip, bool datasync) { @@ -128,7 +128,7 @@ xfs_fsync_lsn( return 0; if (datasync && !(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) return 0; - return ip->i_itemp->ili_last_lsn; + return ip->i_itemp->ili_commit_seq; } /* @@ -151,12 +151,12 @@ xfs_fsync_flush_log( int *log_flushed) { int error = 0; - xfs_lsn_t lsn; + xfs_csn_t seq; xfs_ilock(ip, XFS_ILOCK_SHARED); - lsn = xfs_fsync_lsn(ip, datasync); - if (lsn) { - error = xfs_log_force_lsn(ip->i_mount, lsn, XFS_LOG_SYNC, + seq = xfs_fsync_seq(ip, datasync); + if (seq) { + error = xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC, log_flushed); spin_lock(&ip->i_itemp->ili_lock); @@ -197,9 +197,9 @@ xfs_file_fsync( * inode size in case of an extending write. */ if (XFS_IS_REALTIME_INODE(ip)) - xfs_blkdev_issue_flush(mp->m_rtdev_targp); + blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev); else if (mp->m_logdev_targp != mp->m_ddev_targp) - xfs_blkdev_issue_flush(mp->m_ddev_targp); + blkdev_issue_flush(mp->m_ddev_targp->bt_bdev); /* * Any inode that has dirty modifications in the log is pinned. The @@ -219,7 +219,7 @@ xfs_file_fsync( */ if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) && mp->m_logdev_targp == mp->m_ddev_targp) - xfs_blkdev_issue_flush(mp->m_ddev_targp); + blkdev_issue_flush(mp->m_ddev_targp->bt_bdev); return error; } @@ -384,21 +384,30 @@ restart: } goto restart; } + /* * If the offset is beyond the size of the file, we need to zero any * blocks that fall between the existing EOF and the start of this - * write. If zeroing is needed and we are currently holding the - * iolock shared, we need to update it to exclusive which implies - * having to redo all checks before. + * write. If zeroing is needed and we are currently holding the iolock + * shared, we need to update it to exclusive which implies having to + * redo all checks before. + * + * We need to serialise against EOF updates that occur in IO completions + * here. We want to make sure that nobody is changing the size while we + * do this check until we have placed an IO barrier (i.e. hold the + * XFS_IOLOCK_EXCL) that prevents new IO from being dispatched. The + * spinlock effectively forms a memory barrier once we have the + * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value and + * hence be able to correctly determine if we need to run zeroing. * - * We need to serialise against EOF updates that occur in IO - * completions here. We want to make sure that nobody is changing the - * size while we do this check until we have placed an IO barrier (i.e. - * hold the XFS_IOLOCK_EXCL) that prevents new IO from being dispatched. - * The spinlock effectively forms a memory barrier once we have the - * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value - * and hence be able to correctly determine if we need to run zeroing. + * We can do an unlocked check here safely as IO completion can only + * extend EOF. Truncate is locked out at this point, so the EOF can + * not move backwards, only forwards. Hence we only need to take the + * slow path and spin locks when we are at or beyond the current EOF. */ + if (iocb->ki_pos <= i_size_read(inode)) + goto out; + spin_lock(&ip->i_flags_lock); isize = i_size_read(inode); if (iocb->ki_pos > isize) { @@ -426,7 +435,7 @@ restart: drained_dio = true; goto restart; } - + trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize); error = iomap_zero_range(inode, isize, iocb->ki_pos - isize, NULL, &xfs_buffered_write_iomap_ops); @@ -435,6 +444,7 @@ restart: } else spin_unlock(&ip->i_flags_lock); +out: return file_modified(file); } @@ -500,7 +510,17 @@ xfs_dio_write_end_io( * other IO completions here to update the EOF. Failing to serialise * here can result in EOF moving backwards and Bad Things Happen when * that occurs. + * + * As IO completion only ever extends EOF, we can do an unlocked check + * here to avoid taking the spinlock. If we land within the current EOF, + * then we do not need to do an extending update at all, and we don't + * need to take the lock to check this. If we race with an update moving + * EOF, then we'll either still be beyond EOF and need to take the lock, + * or we'll be within EOF and we don't need to take it at all. */ + if (offset + size <= i_size_read(inode)) + goto out; + spin_lock(&ip->i_flags_lock); if (offset + size > i_size_read(inode)) { i_size_write(inode, offset + size); @@ -749,18 +769,18 @@ write_retry: */ if (ret == -EDQUOT && !cleared_space) { xfs_iunlock(ip, iolock); - xfs_blockgc_free_quota(ip, XFS_EOF_FLAGS_SYNC); + xfs_blockgc_free_quota(ip, XFS_ICWALK_FLAG_SYNC); cleared_space = true; goto write_retry; } else if (ret == -ENOSPC && !cleared_space) { - struct xfs_eofblocks eofb = {0}; + struct xfs_icwalk icw = {0}; cleared_space = true; xfs_flush_inodes(ip->i_mount); xfs_iunlock(ip, iolock); - eofb.eof_flags = XFS_EOF_FLAGS_SYNC; - xfs_blockgc_free_space(ip->i_mount, &eofb); + icw.icw_flags = XFS_ICWALK_FLAG_SYNC; + xfs_blockgc_free_space(ip->i_mount, &icw); goto write_retry; } |