diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-14 23:06:06 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-14 23:06:06 +0200 |
commit | 2fc61f25fb296827387a5f01129dbc00cbe3ca58 (patch) | |
tree | 5659c402b3a5f357158a7d8e4f284db15ee69e7f /fs/xfs/xfs_buf.c | |
parent | Merge tag 'iomap-5.10-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux (diff) | |
parent | xfs: ensure that fpunch, fcollapse, and finsert operations are aligned to rt ... (diff) | |
download | linux-2fc61f25fb296827387a5f01129dbc00cbe3ca58.tar.xz linux-2fc61f25fb296827387a5f01129dbc00cbe3ca58.zip |
Merge tag 'xfs-5.10-merge-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs updates from Darrick Wong:
"The biggest changes are two new features for the ondisk metadata: one
to record the sizes of the inode btrees in the AG to increase
redundancy checks and to improve mount times; and a second new feature
to support timestamps until the year 2486.
We also fixed a problem where reflinking into a file that requires
synchronous writes wouldn't actually flush the updates to disk; clean
up a fair amount of cruft; and started fixing some bugs in the
realtime volume code.
Summary:
- Clean up the buffer ioend calling path so that the retry strategy
isn't quite so scattered everywhere.
- Clean up m_sb_bp handling.
- New feature: storing inode btree counts in the AGI to speed up
certain mount time per-AG block reservation operatoins and add a
little more metadata redundancy.
- New feature: Widen inode timestamps and quota grace expiration
timestamps to support dates through the year 2486.
- Get rid of more of our custom buffer allocation API wrappers.
- Use a proper VLA for shortform xattr structure namevals.
- Force the log after reflinking or deduping into a file that is
opened with O_SYNC or O_DSYNC.
- Fix some math errors in the realtime allocator"
* tag 'xfs-5.10-merge-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (42 commits)
xfs: ensure that fpunch, fcollapse, and finsert operations are aligned to rt extent size
xfs: make sure the rt allocator doesn't run off the end
xfs: Remove unneeded semicolon
xfs: force the log after remapping a synchronous-writes file
xfs: Convert xfs_attr_sf macros to inline functions
xfs: Use variable-size array for nameval in xfs_attr_sf_entry
xfs: Remove typedef xfs_attr_shortform_t
xfs: remove typedef xfs_attr_sf_entry_t
xfs: Remove kmem_zalloc_large()
xfs: enable big timestamps
xfs: trace timestamp limits
xfs: widen ondisk quota expiration timestamps to handle y2038+
xfs: widen ondisk inode timestamps to deal with y2038+
xfs: redefine xfs_ictimestamp_t
xfs: redefine xfs_timestamp_t
xfs: move xfs_log_dinode_to_disk to the log recovery code
xfs: refactor quota timestamp coding
xfs: refactor default quota grace period setting code
xfs: refactor quota expiration timer modification
xfs: explicitly define inode timestamp range
...
Diffstat (limited to 'fs/xfs/xfs_buf.c')
-rw-r--r-- | fs/xfs/xfs_buf.c | 208 |
1 files changed, 175 insertions, 33 deletions
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index d4cdcb6fb2fe..4e4cf91f4f9f 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -52,6 +52,15 @@ static kmem_zone_t *xfs_buf_zone; * b_lock (trylock due to inversion) */ +static int __xfs_buf_submit(struct xfs_buf *bp, bool wait); + +static inline int +xfs_buf_submit( + struct xfs_buf *bp) +{ + return __xfs_buf_submit(bp, !(bp->b_flags & XBF_ASYNC)); +} + static inline int xfs_buf_is_vmapped( struct xfs_buf *bp) @@ -751,7 +760,7 @@ found: return 0; } -STATIC int +int _xfs_buf_read( xfs_buf_t *bp, xfs_buf_flags_t flags) @@ -759,7 +768,7 @@ _xfs_buf_read( ASSERT(!(flags & XBF_WRITE)); ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL); - bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD); + bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD | XBF_DONE); bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); return xfs_buf_submit(bp); @@ -1170,20 +1179,145 @@ xfs_buf_wait_unpin( set_current_state(TASK_RUNNING); } +static void +xfs_buf_ioerror_alert_ratelimited( + struct xfs_buf *bp) +{ + static unsigned long lasttime; + static struct xfs_buftarg *lasttarg; + + if (bp->b_target != lasttarg || + time_after(jiffies, (lasttime + 5*HZ))) { + lasttime = jiffies; + xfs_buf_ioerror_alert(bp, __this_address); + } + lasttarg = bp->b_target; +} + /* - * Buffer Utility Routines + * Account for this latest trip around the retry handler, and decide if + * we've failed enough times to constitute a permanent failure. */ +static bool +xfs_buf_ioerror_permanent( + struct xfs_buf *bp, + struct xfs_error_cfg *cfg) +{ + struct xfs_mount *mp = bp->b_mount; -void + if (cfg->max_retries != XFS_ERR_RETRY_FOREVER && + ++bp->b_retries > cfg->max_retries) + return true; + if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER && + time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time)) + return true; + + /* At unmount we may treat errors differently */ + if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount) + return true; + + return false; +} + +/* + * On a sync write or shutdown we just want to stale the buffer and let the + * caller handle the error in bp->b_error appropriately. + * + * If the write was asynchronous then no one will be looking for the error. If + * this is the first failure of this type, clear the error state and write the + * buffer out again. This means we always retry an async write failure at least + * once, but we also need to set the buffer up to behave correctly now for + * repeated failures. + * + * If we get repeated async write failures, then we take action according to the + * error configuration we have been set up to use. + * + * Returns true if this function took care of error handling and the caller must + * not touch the buffer again. Return false if the caller should proceed with + * normal I/O completion handling. + */ +static bool +xfs_buf_ioend_handle_error( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_mount; + struct xfs_error_cfg *cfg; + + /* + * If we've already decided to shutdown the filesystem because of I/O + * errors, there's no point in giving this a retry. + */ + if (XFS_FORCED_SHUTDOWN(mp)) + goto out_stale; + + xfs_buf_ioerror_alert_ratelimited(bp); + + /* + * We're not going to bother about retrying this during recovery. + * One strike! + */ + if (bp->b_flags & _XBF_LOGRECOVERY) { + xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); + return false; + } + + /* + * Synchronous writes will have callers process the error. + */ + if (!(bp->b_flags & XBF_ASYNC)) + goto out_stale; + + trace_xfs_buf_iodone_async(bp, _RET_IP_); + + cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error); + if (bp->b_last_error != bp->b_error || + !(bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL))) { + bp->b_last_error = bp->b_error; + if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER && + !bp->b_first_retry_time) + bp->b_first_retry_time = jiffies; + goto resubmit; + } + + /* + * Permanent error - we need to trigger a shutdown if we haven't already + * to indicate that inconsistency will result from this action. + */ + if (xfs_buf_ioerror_permanent(bp, cfg)) { + xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); + goto out_stale; + } + + /* Still considered a transient error. Caller will schedule retries. */ + if (bp->b_flags & _XBF_INODES) + xfs_buf_inode_io_fail(bp); + else if (bp->b_flags & _XBF_DQUOTS) + xfs_buf_dquot_io_fail(bp); + else + ASSERT(list_empty(&bp->b_li_list)); + xfs_buf_ioerror(bp, 0); + xfs_buf_relse(bp); + return true; + +resubmit: + xfs_buf_ioerror(bp, 0); + bp->b_flags |= (XBF_DONE | XBF_WRITE_FAIL); + xfs_buf_submit(bp); + return true; +out_stale: + xfs_buf_stale(bp); + bp->b_flags |= XBF_DONE; + bp->b_flags &= ~XBF_WRITE; + trace_xfs_buf_error_relse(bp, _RET_IP_); + return false; +} + +static void xfs_buf_ioend( struct xfs_buf *bp) { - bool read = bp->b_flags & XBF_READ; - trace_xfs_buf_iodone(bp, _RET_IP_); - bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); - /* * Pull in IO completion errors now. We are guaranteed to be running * single threaded, so we don't need the lock to read b_io_error. @@ -1191,39 +1325,47 @@ xfs_buf_ioend( if (!bp->b_error && bp->b_io_error) xfs_buf_ioerror(bp, bp->b_io_error); - if (read) { + if (bp->b_flags & XBF_READ) { if (!bp->b_error && bp->b_ops) bp->b_ops->verify_read(bp); if (!bp->b_error) bp->b_flags |= XBF_DONE; - xfs_buf_ioend_finish(bp); - return; - } + } else { + if (!bp->b_error) { + bp->b_flags &= ~XBF_WRITE_FAIL; + bp->b_flags |= XBF_DONE; + } - if (!bp->b_error) { - bp->b_flags &= ~XBF_WRITE_FAIL; - bp->b_flags |= XBF_DONE; - } + if (unlikely(bp->b_error) && xfs_buf_ioend_handle_error(bp)) + return; - /* - * If this is a log recovery buffer, we aren't doing transactional IO - * yet so we need to let it handle IO completions. - */ - if (bp->b_flags & _XBF_LOGRECOVERY) { - xlog_recover_iodone(bp); - return; - } + /* clear the retry state */ + bp->b_last_error = 0; + bp->b_retries = 0; + bp->b_first_retry_time = 0; - if (bp->b_flags & _XBF_INODES) { - xfs_buf_inode_iodone(bp); - return; - } + /* + * Note that for things like remote attribute buffers, there may + * not be a buffer log item here, so processing the buffer log + * item must remain optional. + */ + if (bp->b_log_item) + xfs_buf_item_done(bp); + + if (bp->b_flags & _XBF_INODES) + xfs_buf_inode_iodone(bp); + else if (bp->b_flags & _XBF_DQUOTS) + xfs_buf_dquot_iodone(bp); - if (bp->b_flags & _XBF_DQUOTS) { - xfs_buf_dquot_iodone(bp); - return; } - xfs_buf_iodone(bp); + + bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD | + _XBF_LOGRECOVERY); + + if (bp->b_flags & XBF_ASYNC) + xfs_buf_relse(bp); + else + complete(&bp->b_iowait); } static void @@ -1506,7 +1648,7 @@ xfs_buf_iowait( * safe to reference the buffer after a call to this function unless the caller * holds an additional reference itself. */ -int +static int __xfs_buf_submit( struct xfs_buf *bp, bool wait) |