summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChandan Babu R <chandan.babu@oracle.com>2023-08-18 10:09:32 +0200
committerChandan Babu R <chandan.babu@oracle.com>2023-08-18 10:09:32 +0200
commit220c8d57f55fcd73191ed06710fcf9bfc801ff8e (patch)
treeed235c4af2df4a2677ca5629a1d1a2fc012e9868
parentMerge tag 'repair-agfl-fixes-6.6_2023-08-10' of https://git.kernel.org/pub/sc... (diff)
parentxfs: don't check reflink iflag state when checking cow fork (diff)
downloadlinux-220c8d57f55fcd73191ed06710fcf9bfc801ff8e.tar.xz
linux-220c8d57f55fcd73191ed06710fcf9bfc801ff8e.zip
Merge tag 'scrub-bmap-fixes-6.6_2023-08-10' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-6.6-mergeA
xfs: fixes for the block mapping checker This series amends the file extent map checking code so that nonexistent cow/attr forks get the ENOENT return they're supposed to; and fixes some incorrect logic about the presence of a cow fork vs. reflink iflag. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Chandan Babu R <chandan.babu@oracle.com> * tag 'scrub-bmap-fixes-6.6_2023-08-10' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux: xfs: don't check reflink iflag state when checking cow fork xfs: simplify returns in xchk_bmap xfs: rewrite xchk_inode_is_allocated to work properly xfs: hide xfs_inode_is_allocated in scrub common code
-rw-r--r--fs/xfs/scrub/bmap.c33
-rw-r--r--fs/xfs/scrub/common.c152
-rw-r--r--fs/xfs/scrub/common.h3
-rw-r--r--fs/xfs/scrub/ialloc.c3
-rw-r--r--fs/xfs/scrub/trace.h22
-rw-r--r--fs/xfs/xfs_icache.c38
-rw-r--r--fs/xfs/xfs_icache.h4
7 files changed, 193 insertions, 62 deletions
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index 20ab5d4e92ff..75588915572e 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -841,7 +841,7 @@ xchk_bmap(
/* Non-existent forks can be ignored. */
if (!ifp)
- goto out;
+ return -ENOENT;
info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip);
info.whichfork = whichfork;
@@ -850,10 +850,10 @@ xchk_bmap(
switch (whichfork) {
case XFS_COW_FORK:
- /* No CoW forks on non-reflink inodes/filesystems. */
- if (!xfs_is_reflink_inode(ip)) {
+ /* No CoW forks on non-reflink filesystems. */
+ if (!xfs_has_reflink(mp)) {
xchk_ino_set_corrupt(sc, sc->ip->i_ino);
- goto out;
+ return 0;
}
break;
case XFS_ATTR_FORK:
@@ -873,31 +873,31 @@ xchk_bmap(
/* No mappings to check. */
if (whichfork == XFS_COW_FORK)
xchk_fblock_set_corrupt(sc, whichfork, 0);
- goto out;
+ return 0;
case XFS_DINODE_FMT_EXTENTS:
break;
case XFS_DINODE_FMT_BTREE:
if (whichfork == XFS_COW_FORK) {
xchk_fblock_set_corrupt(sc, whichfork, 0);
- goto out;
+ return 0;
}
error = xchk_bmap_btree(sc, whichfork, &info);
if (error)
- goto out;
+ return error;
break;
default:
xchk_fblock_set_corrupt(sc, whichfork, 0);
- goto out;
+ return 0;
}
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
- goto out;
+ return 0;
/* Find the offset of the last extent in the mapping. */
error = xfs_bmap_last_offset(ip, &endoff, whichfork);
if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
- goto out;
+ return error;
/*
* Scrub extent records. We use a special iterator function here that
@@ -910,12 +910,12 @@ xchk_bmap(
while (xchk_bmap_iext_iter(&info, &irec)) {
if (xchk_should_terminate(sc, &error) ||
(sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
- goto out;
+ return 0;
if (irec.br_startoff >= endoff) {
xchk_fblock_set_corrupt(sc, whichfork,
irec.br_startoff);
- goto out;
+ return 0;
}
if (isnullstartblock(irec.br_startblock))
@@ -928,10 +928,10 @@ xchk_bmap(
if (xchk_bmap_want_check_rmaps(&info)) {
error = xchk_bmap_check_rmaps(sc, whichfork);
if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error))
- goto out;
+ return error;
}
-out:
- return error;
+
+ return 0;
}
/* Scrub an inode's data fork. */
@@ -955,8 +955,5 @@ int
xchk_bmap_cow(
struct xfs_scrub *sc)
{
- if (!xfs_is_reflink_inode(sc->ip))
- return -ENOENT;
-
return xchk_bmap(sc, XFS_COW_FORK);
}
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 82d2a85d6461..de24532fe083 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -1230,3 +1230,155 @@ xchk_fsgates_enable(
sc->flags |= scrub_fsgates;
}
+
+/*
+ * Decide if this is this a cached inode that's also allocated. The caller
+ * must hold a reference to an AG and the AGI buffer lock to prevent inodes
+ * from being allocated or freed.
+ *
+ * Look up an inode by number in the given file system. If the inode number
+ * is invalid, return -EINVAL. If the inode is not in cache, return -ENODATA.
+ * If the inode is being reclaimed, return -ENODATA because we know the inode
+ * cache cannot be updating the ondisk metadata.
+ *
+ * Otherwise, the incore inode is the one we want, and it is either live,
+ * somewhere in the inactivation machinery, or reclaimable. The inode is
+ * allocated if i_mode is nonzero. In all three cases, the cached inode will
+ * be more up to date than the ondisk inode buffer, so we must use the incore
+ * i_mode.
+ */
+int
+xchk_inode_is_allocated(
+ struct xfs_scrub *sc,
+ xfs_agino_t agino,
+ bool *inuse)
+{
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_perag *pag = sc->sa.pag;
+ xfs_ino_t ino;
+ struct xfs_inode *ip;
+ int error;
+
+ /* caller must hold perag reference */
+ if (pag == NULL) {
+ ASSERT(pag != NULL);
+ return -EINVAL;
+ }
+
+ /* caller must have AGI buffer */
+ if (sc->sa.agi_bp == NULL) {
+ ASSERT(sc->sa.agi_bp != NULL);
+ return -EINVAL;
+ }
+
+ /* reject inode numbers outside existing AGs */
+ ino = XFS_AGINO_TO_INO(sc->mp, pag->pag_agno, agino);
+ if (!xfs_verify_ino(mp, ino))
+ return -EINVAL;
+
+ error = -ENODATA;
+ rcu_read_lock();
+ ip = radix_tree_lookup(&pag->pag_ici_root, agino);
+ if (!ip) {
+ /* cache miss */
+ goto out_rcu;
+ }
+
+ /*
+ * If the inode number doesn't match, the incore inode got reused
+ * during an RCU grace period and the radix tree hasn't been updated.
+ * This isn't the inode we want.
+ */
+ spin_lock(&ip->i_flags_lock);
+ if (ip->i_ino != ino)
+ goto out_skip;
+
+ trace_xchk_inode_is_allocated(ip);
+
+ /*
+ * We have an incore inode that matches the inode we want, and the
+ * caller holds the perag structure and the AGI buffer. Let's check
+ * our assumptions below:
+ */
+
+#ifdef DEBUG
+ /*
+ * (1) If the incore inode is live (i.e. referenced from the dcache),
+ * it will not be INEW, nor will it be in the inactivation or reclaim
+ * machinery. The ondisk inode had better be allocated. This is the
+ * most trivial case.
+ */
+ if (!(ip->i_flags & (XFS_NEED_INACTIVE | XFS_INEW | XFS_IRECLAIMABLE |
+ XFS_INACTIVATING))) {
+ /* live inode */
+ ASSERT(VFS_I(ip)->i_mode != 0);
+ }
+
+ /*
+ * If the incore inode is INEW, there are several possibilities:
+ *
+ * (2) For a file that is being created, note that we allocate the
+ * ondisk inode before allocating, initializing, and adding the incore
+ * inode to the radix tree.
+ *
+ * (3) If the incore inode is being recycled, the inode has to be
+ * allocated because we don't allow freed inodes to be recycled.
+ * Recycling doesn't touch i_mode.
+ */
+ if (ip->i_flags & XFS_INEW) {
+ /* created on disk already or recycling */
+ ASSERT(VFS_I(ip)->i_mode != 0);
+ }
+
+ /*
+ * (4) If the inode is queued for inactivation (NEED_INACTIVE) but
+ * inactivation has not started (!INACTIVATING), it is still allocated.
+ */
+ if ((ip->i_flags & XFS_NEED_INACTIVE) &&
+ !(ip->i_flags & XFS_INACTIVATING)) {
+ /* definitely before difree */
+ ASSERT(VFS_I(ip)->i_mode != 0);
+ }
+#endif
+
+ /*
+ * If the incore inode is undergoing inactivation (INACTIVATING), there
+ * are two possibilities:
+ *
+ * (5) It is before the point where it would get freed ondisk, in which
+ * case i_mode is still nonzero.
+ *
+ * (6) It has already been freed, in which case i_mode is zero.
+ *
+ * We don't take the ILOCK here, but difree and dialloc update the AGI,
+ * and we've taken the AGI buffer lock, which prevents that from
+ * happening.
+ */
+
+ /*
+ * (7) Inodes undergoing inactivation (INACTIVATING) or queued for
+ * reclaim (IRECLAIMABLE) could be allocated or free. i_mode still
+ * reflects the ondisk state.
+ */
+
+ /*
+ * (8) If the inode is in IFLUSHING, it's safe to query i_mode because
+ * the flush code uses i_mode to format the ondisk inode.
+ */
+
+ /*
+ * (9) If the inode is in IRECLAIM and was reachable via the radix
+ * tree, it still has the same i_mode as it did before it entered
+ * reclaim. The inode object is still alive because we hold the RCU
+ * read lock.
+ */
+
+ *inuse = VFS_I(ip)->i_mode != 0;
+ error = 0;
+
+out_skip:
+ spin_unlock(&ip->i_flags_lock);
+out_rcu:
+ rcu_read_unlock();
+ return error;
+}
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 4f7cb410904d..cabdc0e16838 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -203,4 +203,7 @@ static inline bool xchk_need_intent_drain(struct xfs_scrub *sc)
void xchk_fsgates_enable(struct xfs_scrub *sc, unsigned int scrub_fshooks);
+int xchk_inode_is_allocated(struct xfs_scrub *sc, xfs_agino_t agino,
+ bool *inuse);
+
#endif /* __XFS_SCRUB_COMMON_H__ */
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
index 575f22a02ebe..fb7bbf47ae5d 100644
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -328,8 +328,7 @@ xchk_iallocbt_check_cluster_ifree(
goto out;
}
- error = xfs_icache_inode_is_allocated(mp, bs->cur->bc_tp, fsino,
- &ino_inuse);
+ error = xchk_inode_is_allocated(bs->sc, agino, &ino_inuse);
if (error == -ENODATA) {
/* Not cached, just read the disk buffer */
freemask_ok = irec_free ^ !!(dip->di_mode);
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index fca99e831466..0464e489b381 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -640,6 +640,28 @@ TRACE_EVENT(xchk_iallocbt_check_cluster,
__entry->cluster_ino)
)
+TRACE_EVENT(xchk_inode_is_allocated,
+ TP_PROTO(struct xfs_inode *ip),
+ TP_ARGS(ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(unsigned long, iflags)
+ __field(umode_t, mode)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(ip)->i_sb->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->iflags = ip->i_flags;
+ __entry->mode = VFS_I(ip)->i_mode;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx iflags 0x%lx mode 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->iflags,
+ __entry->mode)
+);
+
TRACE_EVENT(xchk_fscounters_calc,
TP_PROTO(struct xfs_mount *mp, uint64_t icount, uint64_t ifree,
uint64_t fdblocks, uint64_t delalloc),
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 453890942d9f..e541f5c0bc25 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -803,44 +803,6 @@ out_error_or_again:
}
/*
- * "Is this a cached inode that's also allocated?"
- *
- * Look up an inode by number in the given file system. If the inode is
- * in cache and isn't in purgatory, return 1 if the inode is allocated
- * and 0 if it is not. For all other cases (not in cache, being torn
- * down, etc.), return a negative error code.
- *
- * The caller has to prevent inode allocation and freeing activity,
- * presumably by locking the AGI buffer. This is to ensure that an
- * inode cannot transition from allocated to freed until the caller is
- * ready to allow that. If the inode is in an intermediate state (new,
- * reclaimable, or being reclaimed), -EAGAIN will be returned; if the
- * inode is not in the cache, -ENOENT will be returned. The caller must
- * deal with these scenarios appropriately.
- *
- * This is a specialized use case for the online scrubber; if you're
- * reading this, you probably want xfs_iget.
- */
-int
-xfs_icache_inode_is_allocated(
- struct xfs_mount *mp,
- struct xfs_trans *tp,
- xfs_ino_t ino,
- bool *inuse)
-{
- struct xfs_inode *ip;
- int error;
-
- error = xfs_iget(mp, tp, ino, XFS_IGET_INCORE, 0, &ip);
- if (error)
- return error;
-
- *inuse = !!(VFS_I(ip)->i_mode);
- xfs_irele(ip);
- return 0;
-}
-
-/*
* Grab the inode for reclaim exclusively.
*
* We have found this inode via a lookup under RCU, so the inode may have
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 1dcdcb23796e..2fa6f2e09d07 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -71,10 +71,6 @@ void xfs_inode_set_cowblocks_tag(struct xfs_inode *ip);
void xfs_inode_clear_cowblocks_tag(struct xfs_inode *ip);
void xfs_blockgc_worker(struct work_struct *work);
-
-int xfs_icache_inode_is_allocated(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_ino_t ino, bool *inuse);
-
void xfs_blockgc_stop(struct xfs_mount *mp);
void xfs_blockgc_start(struct xfs_mount *mp);