diff options
author | Chandan Babu R <chandan.babu@oracle.com> | 2023-08-18 10:09:32 +0200 |
---|---|---|
committer | Chandan Babu R <chandan.babu@oracle.com> | 2023-08-18 10:09:32 +0200 |
commit | 220c8d57f55fcd73191ed06710fcf9bfc801ff8e (patch) | |
tree | ed235c4af2df4a2677ca5629a1d1a2fc012e9868 | |
parent | Merge tag 'repair-agfl-fixes-6.6_2023-08-10' of https://git.kernel.org/pub/sc... (diff) | |
parent | xfs: don't check reflink iflag state when checking cow fork (diff) | |
download | linux-220c8d57f55fcd73191ed06710fcf9bfc801ff8e.tar.xz linux-220c8d57f55fcd73191ed06710fcf9bfc801ff8e.zip |
Merge tag 'scrub-bmap-fixes-6.6_2023-08-10' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-6.6-mergeA
xfs: fixes for the block mapping checker
This series amends the file extent map checking code so that nonexistent
cow/attr forks get the ENOENT return they're supposed to; and fixes some
incorrect logic about the presence of a cow fork vs. reflink iflag.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
* tag 'scrub-bmap-fixes-6.6_2023-08-10' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux:
xfs: don't check reflink iflag state when checking cow fork
xfs: simplify returns in xchk_bmap
xfs: rewrite xchk_inode_is_allocated to work properly
xfs: hide xfs_inode_is_allocated in scrub common code
-rw-r--r-- | fs/xfs/scrub/bmap.c | 33 | ||||
-rw-r--r-- | fs/xfs/scrub/common.c | 152 | ||||
-rw-r--r-- | fs/xfs/scrub/common.h | 3 | ||||
-rw-r--r-- | fs/xfs/scrub/ialloc.c | 3 | ||||
-rw-r--r-- | fs/xfs/scrub/trace.h | 22 | ||||
-rw-r--r-- | fs/xfs/xfs_icache.c | 38 | ||||
-rw-r--r-- | fs/xfs/xfs_icache.h | 4 |
7 files changed, 193 insertions, 62 deletions
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 20ab5d4e92ff..75588915572e 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -841,7 +841,7 @@ xchk_bmap( /* Non-existent forks can be ignored. */ if (!ifp) - goto out; + return -ENOENT; info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip); info.whichfork = whichfork; @@ -850,10 +850,10 @@ xchk_bmap( switch (whichfork) { case XFS_COW_FORK: - /* No CoW forks on non-reflink inodes/filesystems. */ - if (!xfs_is_reflink_inode(ip)) { + /* No CoW forks on non-reflink filesystems. */ + if (!xfs_has_reflink(mp)) { xchk_ino_set_corrupt(sc, sc->ip->i_ino); - goto out; + return 0; } break; case XFS_ATTR_FORK: @@ -873,31 +873,31 @@ xchk_bmap( /* No mappings to check. */ if (whichfork == XFS_COW_FORK) xchk_fblock_set_corrupt(sc, whichfork, 0); - goto out; + return 0; case XFS_DINODE_FMT_EXTENTS: break; case XFS_DINODE_FMT_BTREE: if (whichfork == XFS_COW_FORK) { xchk_fblock_set_corrupt(sc, whichfork, 0); - goto out; + return 0; } error = xchk_bmap_btree(sc, whichfork, &info); if (error) - goto out; + return error; break; default: xchk_fblock_set_corrupt(sc, whichfork, 0); - goto out; + return 0; } if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) - goto out; + return 0; /* Find the offset of the last extent in the mapping. */ error = xfs_bmap_last_offset(ip, &endoff, whichfork); if (!xchk_fblock_process_error(sc, whichfork, 0, &error)) - goto out; + return error; /* * Scrub extent records. We use a special iterator function here that @@ -910,12 +910,12 @@ xchk_bmap( while (xchk_bmap_iext_iter(&info, &irec)) { if (xchk_should_terminate(sc, &error) || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) - goto out; + return 0; if (irec.br_startoff >= endoff) { xchk_fblock_set_corrupt(sc, whichfork, irec.br_startoff); - goto out; + return 0; } if (isnullstartblock(irec.br_startblock)) @@ -928,10 +928,10 @@ xchk_bmap( if (xchk_bmap_want_check_rmaps(&info)) { error = xchk_bmap_check_rmaps(sc, whichfork); if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error)) - goto out; + return error; } -out: - return error; + + return 0; } /* Scrub an inode's data fork. */ @@ -955,8 +955,5 @@ int xchk_bmap_cow( struct xfs_scrub *sc) { - if (!xfs_is_reflink_inode(sc->ip)) - return -ENOENT; - return xchk_bmap(sc, XFS_COW_FORK); } diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 82d2a85d6461..de24532fe083 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -1230,3 +1230,155 @@ xchk_fsgates_enable( sc->flags |= scrub_fsgates; } + +/* + * Decide if this is this a cached inode that's also allocated. The caller + * must hold a reference to an AG and the AGI buffer lock to prevent inodes + * from being allocated or freed. + * + * Look up an inode by number in the given file system. If the inode number + * is invalid, return -EINVAL. If the inode is not in cache, return -ENODATA. + * If the inode is being reclaimed, return -ENODATA because we know the inode + * cache cannot be updating the ondisk metadata. + * + * Otherwise, the incore inode is the one we want, and it is either live, + * somewhere in the inactivation machinery, or reclaimable. The inode is + * allocated if i_mode is nonzero. In all three cases, the cached inode will + * be more up to date than the ondisk inode buffer, so we must use the incore + * i_mode. + */ +int +xchk_inode_is_allocated( + struct xfs_scrub *sc, + xfs_agino_t agino, + bool *inuse) +{ + struct xfs_mount *mp = sc->mp; + struct xfs_perag *pag = sc->sa.pag; + xfs_ino_t ino; + struct xfs_inode *ip; + int error; + + /* caller must hold perag reference */ + if (pag == NULL) { + ASSERT(pag != NULL); + return -EINVAL; + } + + /* caller must have AGI buffer */ + if (sc->sa.agi_bp == NULL) { + ASSERT(sc->sa.agi_bp != NULL); + return -EINVAL; + } + + /* reject inode numbers outside existing AGs */ + ino = XFS_AGINO_TO_INO(sc->mp, pag->pag_agno, agino); + if (!xfs_verify_ino(mp, ino)) + return -EINVAL; + + error = -ENODATA; + rcu_read_lock(); + ip = radix_tree_lookup(&pag->pag_ici_root, agino); + if (!ip) { + /* cache miss */ + goto out_rcu; + } + + /* + * If the inode number doesn't match, the incore inode got reused + * during an RCU grace period and the radix tree hasn't been updated. + * This isn't the inode we want. + */ + spin_lock(&ip->i_flags_lock); + if (ip->i_ino != ino) + goto out_skip; + + trace_xchk_inode_is_allocated(ip); + + /* + * We have an incore inode that matches the inode we want, and the + * caller holds the perag structure and the AGI buffer. Let's check + * our assumptions below: + */ + +#ifdef DEBUG + /* + * (1) If the incore inode is live (i.e. referenced from the dcache), + * it will not be INEW, nor will it be in the inactivation or reclaim + * machinery. The ondisk inode had better be allocated. This is the + * most trivial case. + */ + if (!(ip->i_flags & (XFS_NEED_INACTIVE | XFS_INEW | XFS_IRECLAIMABLE | + XFS_INACTIVATING))) { + /* live inode */ + ASSERT(VFS_I(ip)->i_mode != 0); + } + + /* + * If the incore inode is INEW, there are several possibilities: + * + * (2) For a file that is being created, note that we allocate the + * ondisk inode before allocating, initializing, and adding the incore + * inode to the radix tree. + * + * (3) If the incore inode is being recycled, the inode has to be + * allocated because we don't allow freed inodes to be recycled. + * Recycling doesn't touch i_mode. + */ + if (ip->i_flags & XFS_INEW) { + /* created on disk already or recycling */ + ASSERT(VFS_I(ip)->i_mode != 0); + } + + /* + * (4) If the inode is queued for inactivation (NEED_INACTIVE) but + * inactivation has not started (!INACTIVATING), it is still allocated. + */ + if ((ip->i_flags & XFS_NEED_INACTIVE) && + !(ip->i_flags & XFS_INACTIVATING)) { + /* definitely before difree */ + ASSERT(VFS_I(ip)->i_mode != 0); + } +#endif + + /* + * If the incore inode is undergoing inactivation (INACTIVATING), there + * are two possibilities: + * + * (5) It is before the point where it would get freed ondisk, in which + * case i_mode is still nonzero. + * + * (6) It has already been freed, in which case i_mode is zero. + * + * We don't take the ILOCK here, but difree and dialloc update the AGI, + * and we've taken the AGI buffer lock, which prevents that from + * happening. + */ + + /* + * (7) Inodes undergoing inactivation (INACTIVATING) or queued for + * reclaim (IRECLAIMABLE) could be allocated or free. i_mode still + * reflects the ondisk state. + */ + + /* + * (8) If the inode is in IFLUSHING, it's safe to query i_mode because + * the flush code uses i_mode to format the ondisk inode. + */ + + /* + * (9) If the inode is in IRECLAIM and was reachable via the radix + * tree, it still has the same i_mode as it did before it entered + * reclaim. The inode object is still alive because we hold the RCU + * read lock. + */ + + *inuse = VFS_I(ip)->i_mode != 0; + error = 0; + +out_skip: + spin_unlock(&ip->i_flags_lock); +out_rcu: + rcu_read_unlock(); + return error; +} diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index 4f7cb410904d..cabdc0e16838 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -203,4 +203,7 @@ static inline bool xchk_need_intent_drain(struct xfs_scrub *sc) void xchk_fsgates_enable(struct xfs_scrub *sc, unsigned int scrub_fshooks); +int xchk_inode_is_allocated(struct xfs_scrub *sc, xfs_agino_t agino, + bool *inuse); + #endif /* __XFS_SCRUB_COMMON_H__ */ diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c index 575f22a02ebe..fb7bbf47ae5d 100644 --- a/fs/xfs/scrub/ialloc.c +++ b/fs/xfs/scrub/ialloc.c @@ -328,8 +328,7 @@ xchk_iallocbt_check_cluster_ifree( goto out; } - error = xfs_icache_inode_is_allocated(mp, bs->cur->bc_tp, fsino, - &ino_inuse); + error = xchk_inode_is_allocated(bs->sc, agino, &ino_inuse); if (error == -ENODATA) { /* Not cached, just read the disk buffer */ freemask_ok = irec_free ^ !!(dip->di_mode); diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index fca99e831466..0464e489b381 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -640,6 +640,28 @@ TRACE_EVENT(xchk_iallocbt_check_cluster, __entry->cluster_ino) ) +TRACE_EVENT(xchk_inode_is_allocated, + TP_PROTO(struct xfs_inode *ip), + TP_ARGS(ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(unsigned long, iflags) + __field(umode_t, mode) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->iflags = ip->i_flags; + __entry->mode = VFS_I(ip)->i_mode; + ), + TP_printk("dev %d:%d ino 0x%llx iflags 0x%lx mode 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->iflags, + __entry->mode) +); + TRACE_EVENT(xchk_fscounters_calc, TP_PROTO(struct xfs_mount *mp, uint64_t icount, uint64_t ifree, uint64_t fdblocks, uint64_t delalloc), diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 453890942d9f..e541f5c0bc25 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -803,44 +803,6 @@ out_error_or_again: } /* - * "Is this a cached inode that's also allocated?" - * - * Look up an inode by number in the given file system. If the inode is - * in cache and isn't in purgatory, return 1 if the inode is allocated - * and 0 if it is not. For all other cases (not in cache, being torn - * down, etc.), return a negative error code. - * - * The caller has to prevent inode allocation and freeing activity, - * presumably by locking the AGI buffer. This is to ensure that an - * inode cannot transition from allocated to freed until the caller is - * ready to allow that. If the inode is in an intermediate state (new, - * reclaimable, or being reclaimed), -EAGAIN will be returned; if the - * inode is not in the cache, -ENOENT will be returned. The caller must - * deal with these scenarios appropriately. - * - * This is a specialized use case for the online scrubber; if you're - * reading this, you probably want xfs_iget. - */ -int -xfs_icache_inode_is_allocated( - struct xfs_mount *mp, - struct xfs_trans *tp, - xfs_ino_t ino, - bool *inuse) -{ - struct xfs_inode *ip; - int error; - - error = xfs_iget(mp, tp, ino, XFS_IGET_INCORE, 0, &ip); - if (error) - return error; - - *inuse = !!(VFS_I(ip)->i_mode); - xfs_irele(ip); - return 0; -} - -/* * Grab the inode for reclaim exclusively. * * We have found this inode via a lookup under RCU, so the inode may have diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index 1dcdcb23796e..2fa6f2e09d07 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -71,10 +71,6 @@ void xfs_inode_set_cowblocks_tag(struct xfs_inode *ip); void xfs_inode_clear_cowblocks_tag(struct xfs_inode *ip); void xfs_blockgc_worker(struct work_struct *work); - -int xfs_icache_inode_is_allocated(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_ino_t ino, bool *inuse); - void xfs_blockgc_stop(struct xfs_mount *mp); void xfs_blockgc_start(struct xfs_mount *mp); |