From a0158315964136d1a43c1a726ac4381ae3a76153 Mon Sep 17 00:00:00 2001 From: Richard Wareing Date: Mon, 8 Jan 2018 10:41:33 -0800 Subject: xfs: Show realtime device stats on statfs calls if realtime flags set - Reports realtime device free blocks in statfs calls if (realtime) inheritance bit is set on the inode of directory, or realtime flag in the case of files. This is a bit more intuitive, especially for use-cases which are using a much larger device for the realtime device. - Add XFS_IS_REALTIME_MOUNT option to gate based on the existence of a realtime device on the mount, similar to the XFS_IS_REALTIME_INODE option. Reviewed-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Richard Wareing Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_linux.h | 2 ++ fs/xfs/xfs_super.c | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 99562ec0de56..74d3576c8043 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -285,8 +285,10 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y) #define XFS_IS_REALTIME_INODE(ip) \ (((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME) && \ (ip)->i_mount->m_rtdev_targp) +#define XFS_IS_REALTIME_MOUNT(mp) ((mp)->m_rtdev_targp ? 1 : 0) #else #define XFS_IS_REALTIME_INODE(ip) (0) +#define XFS_IS_REALTIME_MOUNT(mp) (0) #endif #endif /* __XFS_LINUX__ */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 1dacccc367f8..a66335599c7d 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1153,6 +1153,14 @@ xfs_fs_statfs( ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD)) xfs_qm_statvfs(ip, statp); + + if (XFS_IS_REALTIME_MOUNT(mp) && + (ip->i_d.di_flags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) { + statp->f_blocks = sbp->sb_rblocks; + statp->f_bavail = statp->f_bfree = + sbp->sb_frextents * sbp->sb_rextsize; + } + return 0; } -- cgit v1.2.3 From bfb3e9b9262d4b3e23c02d23db8e3c6bf8ea024b Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 8 Jan 2018 10:41:33 -0800 Subject: xfs: explicitly initialize meta_scrub_ops array by type An implicit mapping to type by order of initialization seems error-prone, and doesn't lend itself to cscope-ing. Also add sanity checks about size of array vs. max types, and a defensive check that ->scrub exists before using it. Signed-off-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/scrub/scrub.c | 53 +++++++++++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index ab3aef2ae823..53cd9df20ae6 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -167,104 +167,104 @@ xfs_scrub_teardown( /* Scrubbing dispatch. */ static const struct xfs_scrub_meta_ops meta_scrub_ops[] = { - { /* ioctl presence test */ + [XFS_SCRUB_TYPE_PROBE] = { /* ioctl presence test */ .setup = xfs_scrub_setup_fs, .scrub = xfs_scrub_probe, }, - { /* superblock */ + [XFS_SCRUB_TYPE_SB] = { /* superblock */ .setup = xfs_scrub_setup_ag_header, .scrub = xfs_scrub_superblock, }, - { /* agf */ + [XFS_SCRUB_TYPE_AGF] = { /* agf */ .setup = xfs_scrub_setup_ag_header, .scrub = xfs_scrub_agf, }, - { /* agfl */ + [XFS_SCRUB_TYPE_AGFL]= { /* agfl */ .setup = xfs_scrub_setup_ag_header, .scrub = xfs_scrub_agfl, }, - { /* agi */ + [XFS_SCRUB_TYPE_AGI] = { /* agi */ .setup = xfs_scrub_setup_ag_header, .scrub = xfs_scrub_agi, }, - { /* bnobt */ + [XFS_SCRUB_TYPE_BNOBT] = { /* bnobt */ .setup = xfs_scrub_setup_ag_allocbt, .scrub = xfs_scrub_bnobt, }, - { /* cntbt */ + [XFS_SCRUB_TYPE_CNTBT] = { /* cntbt */ .setup = xfs_scrub_setup_ag_allocbt, .scrub = xfs_scrub_cntbt, }, - { /* inobt */ + [XFS_SCRUB_TYPE_INOBT] = { /* inobt */ .setup = xfs_scrub_setup_ag_iallocbt, .scrub = xfs_scrub_inobt, }, - { /* finobt */ + [XFS_SCRUB_TYPE_FINOBT] = { /* finobt */ .setup = xfs_scrub_setup_ag_iallocbt, .scrub = xfs_scrub_finobt, .has = xfs_sb_version_hasfinobt, }, - { /* rmapbt */ + [XFS_SCRUB_TYPE_RMAPBT] = { /* rmapbt */ .setup = xfs_scrub_setup_ag_rmapbt, .scrub = xfs_scrub_rmapbt, .has = xfs_sb_version_hasrmapbt, }, - { /* refcountbt */ + [XFS_SCRUB_TYPE_REFCNTBT] = { /* refcountbt */ .setup = xfs_scrub_setup_ag_refcountbt, .scrub = xfs_scrub_refcountbt, .has = xfs_sb_version_hasreflink, }, - { /* inode record */ + [XFS_SCRUB_TYPE_INODE] = { /* inode record */ .setup = xfs_scrub_setup_inode, .scrub = xfs_scrub_inode, }, - { /* inode data fork */ + [XFS_SCRUB_TYPE_BMBTD] = { /* inode data fork */ .setup = xfs_scrub_setup_inode_bmap, .scrub = xfs_scrub_bmap_data, }, - { /* inode attr fork */ + [XFS_SCRUB_TYPE_BMBTA] = { /* inode attr fork */ .setup = xfs_scrub_setup_inode_bmap, .scrub = xfs_scrub_bmap_attr, }, - { /* inode CoW fork */ + [XFS_SCRUB_TYPE_BMBTC] = { /* inode CoW fork */ .setup = xfs_scrub_setup_inode_bmap, .scrub = xfs_scrub_bmap_cow, }, - { /* directory */ + [XFS_SCRUB_TYPE_DIR] = { /* directory */ .setup = xfs_scrub_setup_directory, .scrub = xfs_scrub_directory, }, - { /* extended attributes */ + [XFS_SCRUB_TYPE_XATTR] = { /* extended attributes */ .setup = xfs_scrub_setup_xattr, .scrub = xfs_scrub_xattr, }, - { /* symbolic link */ + [XFS_SCRUB_TYPE_SYMLINK] = { /* symbolic link */ .setup = xfs_scrub_setup_symlink, .scrub = xfs_scrub_symlink, }, - { /* parent pointers */ + [XFS_SCRUB_TYPE_PARENT] = { /* parent pointers */ .setup = xfs_scrub_setup_parent, .scrub = xfs_scrub_parent, }, - { /* realtime bitmap */ + [XFS_SCRUB_TYPE_RTBITMAP] = { /* realtime bitmap */ .setup = xfs_scrub_setup_rt, .scrub = xfs_scrub_rtbitmap, .has = xfs_sb_version_hasrealtime, }, - { /* realtime summary */ + [XFS_SCRUB_TYPE_RTSUM] = { /* realtime summary */ .setup = xfs_scrub_setup_rt, .scrub = xfs_scrub_rtsummary, .has = xfs_sb_version_hasrealtime, }, - { /* user quota */ + [XFS_SCRUB_TYPE_UQUOTA] = { /* user quota */ .setup = xfs_scrub_setup_quota, .scrub = xfs_scrub_quota, }, - { /* group quota */ + [XFS_SCRUB_TYPE_GQUOTA] = { /* group quota */ .setup = xfs_scrub_setup_quota, .scrub = xfs_scrub_quota, }, - { /* project quota */ + [XFS_SCRUB_TYPE_PQUOTA] = { /* project quota */ .setup = xfs_scrub_setup_quota, .scrub = xfs_scrub_quota, }, @@ -296,6 +296,9 @@ xfs_scrub_metadata( bool try_harder = false; int error = 0; + BUILD_BUG_ON(sizeof(meta_scrub_ops) != + (sizeof(struct xfs_scrub_meta_ops) * XFS_SCRUB_TYPE_NR)); + trace_xfs_scrub_start(ip, sm, error); /* Forbidden if we are shut down or mounted norecovery. */ @@ -319,7 +322,7 @@ xfs_scrub_metadata( if (sm->sm_type >= XFS_SCRUB_TYPE_NR) goto out; ops = &meta_scrub_ops[sm->sm_type]; - if (ops->scrub == NULL) + if (ops->setup == NULL || ops->scrub == NULL) goto out; /* -- cgit v1.2.3 From 0a085ddf0e69f832a1968f24e0d62ad9124f631b Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 8 Jan 2018 10:41:34 -0800 Subject: xfs: factor out scrub input checking Do this before adding more core checks. Signed-off-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/scrub/scrub.c | 75 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 44 insertions(+), 31 deletions(-) diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 53cd9df20ae6..b0667420fcd5 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -284,47 +284,34 @@ xfs_scrub_experimental_warning( "EXPERIMENTAL online scrub feature in use. Use at your own risk!"); } -/* Dispatch metadata scrubbing. */ -int -xfs_scrub_metadata( - struct xfs_inode *ip, +static int +xfs_scrub_validate_inputs( + struct xfs_mount *mp, struct xfs_scrub_metadata *sm) { - struct xfs_scrub_context sc; - struct xfs_mount *mp = ip->i_mount; + int error; const struct xfs_scrub_meta_ops *ops; - bool try_harder = false; - int error = 0; - - BUILD_BUG_ON(sizeof(meta_scrub_ops) != - (sizeof(struct xfs_scrub_meta_ops) * XFS_SCRUB_TYPE_NR)); - - trace_xfs_scrub_start(ip, sm, error); - - /* Forbidden if we are shut down or mounted norecovery. */ - error = -ESHUTDOWN; - if (XFS_FORCED_SHUTDOWN(mp)) - goto out; - error = -ENOTRECOVERABLE; - if (mp->m_flags & XFS_MOUNT_NORECOVERY) - goto out; - /* Check our inputs. */ error = -EINVAL; + /* Check our inputs. */ sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT; if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN) goto out; if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved))) goto out; - /* Do we know about this type of metadata? */ error = -ENOENT; + /* Do we know about this type of metadata? */ if (sm->sm_type >= XFS_SCRUB_TYPE_NR) goto out; ops = &meta_scrub_ops[sm->sm_type]; if (ops->setup == NULL || ops->scrub == NULL) goto out; + /* Does this fs even support this type of metadata? */ + if (ops->has && !ops->has(&mp->m_sb)) + goto out; + error = -EOPNOTSUPP; /* * We won't scrub any filesystem that doesn't have the ability * to record unwritten extents. The option was made default in @@ -334,20 +321,46 @@ xfs_scrub_metadata( * We also don't support v1-v3 filesystems, which aren't * mountable. */ - error = -EOPNOTSUPP; if (!xfs_sb_version_hasextflgbit(&mp->m_sb)) goto out; - /* Does this fs even support this type of metadata? */ - error = -ENOENT; - if (ops->has && !ops->has(&mp->m_sb)) - goto out; - /* We don't know how to repair anything yet. */ - error = -EOPNOTSUPP; if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) goto out; + error = 0; +out: + return error; +} + +/* Dispatch metadata scrubbing. */ +int +xfs_scrub_metadata( + struct xfs_inode *ip, + struct xfs_scrub_metadata *sm) +{ + struct xfs_scrub_context sc; + struct xfs_mount *mp = ip->i_mount; + bool try_harder = false; + int error = 0; + + BUILD_BUG_ON(sizeof(meta_scrub_ops) != + (sizeof(struct xfs_scrub_meta_ops) * XFS_SCRUB_TYPE_NR)); + + trace_xfs_scrub_start(ip, sm, error); + + /* Forbidden if we are shut down or mounted norecovery. */ + error = -ESHUTDOWN; + if (XFS_FORCED_SHUTDOWN(mp)) + goto out; + error = -ENOTRECOVERABLE; + if (mp->m_flags & XFS_MOUNT_NORECOVERY) + goto out; + + error = xfs_scrub_validate_inputs(mp, sm); + if (error) + goto out; + xfs_scrub_experimental_warning(mp); retry_op: @@ -355,7 +368,7 @@ retry_op: memset(&sc, 0, sizeof(sc)); sc.mp = ip->i_mount; sc.sm = sm; - sc.ops = ops; + sc.ops = &meta_scrub_ops[sm->sm_type]; sc.try_harder = try_harder; sc.sa.agno = NULLAGNUMBER; error = sc.ops->setup(&sc, ip); -- cgit v1.2.3 From 8e63083762d451d449fd9d280fd85c402a83d0f9 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 8 Jan 2018 10:41:34 -0800 Subject: xfs: move all scrub input checking to xfs_scrub_validate There were ad-hoc checks for some scrub types but not others; mark each scrub type with ... it's type, and use that to validate the allowed and/or required input fields. Moving these checks out of xfs_scrub_setup_ag_header makes it a thin wrapper, so unwrap it in the process. Signed-off-by: Eric Sandeen [darrick: add xfs_ prefix to enum, check scrub args after checking type] Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/scrub/agheader.c | 17 ------------- fs/xfs/scrub/common.c | 10 +------- fs/xfs/scrub/common.h | 2 -- fs/xfs/scrub/quota.c | 7 ----- fs/xfs/scrub/rtbitmap.c | 12 ++------- fs/xfs/scrub/scrub.c | 68 ++++++++++++++++++++++++++++++++++++++++--------- fs/xfs/scrub/scrub.h | 11 ++++++++ 7 files changed, 70 insertions(+), 57 deletions(-) diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index 2a9b4f9e93c6..b599358c3796 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -37,23 +37,6 @@ #include "scrub/common.h" #include "scrub/trace.h" -/* - * Set up scrub to check all the static metadata in each AG. - * This means the SB, AGF, AGI, and AGFL headers. - */ -int -xfs_scrub_setup_ag_header( - struct xfs_scrub_context *sc, - struct xfs_inode *ip) -{ - struct xfs_mount *mp = sc->mp; - - if (sc->sm->sm_agno >= mp->m_sb.sb_agcount || - sc->sm->sm_ino || sc->sm->sm_gen) - return -EINVAL; - return xfs_scrub_setup_fs(sc, ip); -} - /* Walk all the blocks in the AGFL. */ int xfs_scrub_walk_agfl( diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index ac95fe911d96..98452ad58cff 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -472,7 +472,7 @@ xfs_scrub_setup_ag_btree( return error; } - error = xfs_scrub_setup_ag_header(sc, ip); + error = xfs_scrub_setup_fs(sc, ip); if (error) return error; @@ -507,14 +507,6 @@ xfs_scrub_get_inode( struct xfs_inode *ip = NULL; int error; - /* - * If userspace passed us an AG number or a generation number - * without an inode number, they haven't got a clue so bail out - * immediately. - */ - if (sc->sm->sm_agno || (sc->sm->sm_gen && !sc->sm->sm_ino)) - return -EINVAL; - /* We want to scan the inode we already had opened. */ if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) { sc->ip = ip_in; diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index 5c043855570e..fe12053aa0e7 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -78,8 +78,6 @@ int xfs_scrub_checkpoint_log(struct xfs_mount *mp); /* Setup functions */ int xfs_scrub_setup_fs(struct xfs_scrub_context *sc, struct xfs_inode *ip); -int xfs_scrub_setup_ag_header(struct xfs_scrub_context *sc, - struct xfs_inode *ip); int xfs_scrub_setup_ag_allocbt(struct xfs_scrub_context *sc, struct xfs_inode *ip); int xfs_scrub_setup_ag_iallocbt(struct xfs_scrub_context *sc, diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c index 3d9037eceaf1..51daa4ae2627 100644 --- a/fs/xfs/scrub/quota.c +++ b/fs/xfs/scrub/quota.c @@ -67,13 +67,6 @@ xfs_scrub_setup_quota( { uint dqtype; - /* - * If userspace gave us an AG number or inode data, they don't - * know what they're doing. Get out. - */ - if (sc->sm->sm_agno || sc->sm->sm_ino || sc->sm->sm_gen) - return -EINVAL; - dqtype = xfs_scrub_quota_to_dqtype(sc); if (dqtype == 0) return -EINVAL; diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c index c6fedb698008..6860d5d92515 100644 --- a/fs/xfs/scrub/rtbitmap.c +++ b/fs/xfs/scrub/rtbitmap.c @@ -43,22 +43,14 @@ xfs_scrub_setup_rt( struct xfs_scrub_context *sc, struct xfs_inode *ip) { - struct xfs_mount *mp = sc->mp; - int error = 0; - - /* - * If userspace gave us an AG number or inode data, they don't - * know what they're doing. Get out. - */ - if (sc->sm->sm_agno || sc->sm->sm_ino || sc->sm->sm_gen) - return -EINVAL; + int error; error = xfs_scrub_setup_fs(sc, ip); if (error) return error; sc->ilock_flags = XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP; - sc->ip = mp->m_rbmip; + sc->ip = sc->mp->m_rbmip; xfs_ilock(sc->ip, sc->ilock_flags); return 0; diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index b0667420fcd5..cd4607782a19 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -128,8 +128,6 @@ xfs_scrub_probe( { int error = 0; - if (sc->sm->sm_ino || sc->sm->sm_agno) - return -EINVAL; if (xfs_scrub_should_terminate(sc, &error)) return error; @@ -168,105 +166,129 @@ xfs_scrub_teardown( static const struct xfs_scrub_meta_ops meta_scrub_ops[] = { [XFS_SCRUB_TYPE_PROBE] = { /* ioctl presence test */ + .type = ST_NONE, .setup = xfs_scrub_setup_fs, .scrub = xfs_scrub_probe, }, [XFS_SCRUB_TYPE_SB] = { /* superblock */ - .setup = xfs_scrub_setup_ag_header, + .type = ST_PERAG, + .setup = xfs_scrub_setup_fs, .scrub = xfs_scrub_superblock, }, [XFS_SCRUB_TYPE_AGF] = { /* agf */ - .setup = xfs_scrub_setup_ag_header, + .type = ST_PERAG, + .setup = xfs_scrub_setup_fs, .scrub = xfs_scrub_agf, }, [XFS_SCRUB_TYPE_AGFL]= { /* agfl */ - .setup = xfs_scrub_setup_ag_header, + .type = ST_PERAG, + .setup = xfs_scrub_setup_fs, .scrub = xfs_scrub_agfl, }, [XFS_SCRUB_TYPE_AGI] = { /* agi */ - .setup = xfs_scrub_setup_ag_header, + .type = ST_PERAG, + .setup = xfs_scrub_setup_fs, .scrub = xfs_scrub_agi, }, [XFS_SCRUB_TYPE_BNOBT] = { /* bnobt */ + .type = ST_PERAG, .setup = xfs_scrub_setup_ag_allocbt, .scrub = xfs_scrub_bnobt, }, [XFS_SCRUB_TYPE_CNTBT] = { /* cntbt */ + .type = ST_PERAG, .setup = xfs_scrub_setup_ag_allocbt, .scrub = xfs_scrub_cntbt, }, [XFS_SCRUB_TYPE_INOBT] = { /* inobt */ + .type = ST_PERAG, .setup = xfs_scrub_setup_ag_iallocbt, .scrub = xfs_scrub_inobt, }, [XFS_SCRUB_TYPE_FINOBT] = { /* finobt */ + .type = ST_PERAG, .setup = xfs_scrub_setup_ag_iallocbt, .scrub = xfs_scrub_finobt, .has = xfs_sb_version_hasfinobt, }, [XFS_SCRUB_TYPE_RMAPBT] = { /* rmapbt */ + .type = ST_PERAG, .setup = xfs_scrub_setup_ag_rmapbt, .scrub = xfs_scrub_rmapbt, .has = xfs_sb_version_hasrmapbt, }, [XFS_SCRUB_TYPE_REFCNTBT] = { /* refcountbt */ + .type = ST_PERAG, .setup = xfs_scrub_setup_ag_refcountbt, .scrub = xfs_scrub_refcountbt, .has = xfs_sb_version_hasreflink, }, [XFS_SCRUB_TYPE_INODE] = { /* inode record */ + .type = ST_INODE, .setup = xfs_scrub_setup_inode, .scrub = xfs_scrub_inode, }, [XFS_SCRUB_TYPE_BMBTD] = { /* inode data fork */ + .type = ST_INODE, .setup = xfs_scrub_setup_inode_bmap, .scrub = xfs_scrub_bmap_data, }, [XFS_SCRUB_TYPE_BMBTA] = { /* inode attr fork */ + .type = ST_INODE, .setup = xfs_scrub_setup_inode_bmap, .scrub = xfs_scrub_bmap_attr, }, [XFS_SCRUB_TYPE_BMBTC] = { /* inode CoW fork */ + .type = ST_INODE, .setup = xfs_scrub_setup_inode_bmap, .scrub = xfs_scrub_bmap_cow, }, [XFS_SCRUB_TYPE_DIR] = { /* directory */ + .type = ST_INODE, .setup = xfs_scrub_setup_directory, .scrub = xfs_scrub_directory, }, [XFS_SCRUB_TYPE_XATTR] = { /* extended attributes */ + .type = ST_INODE, .setup = xfs_scrub_setup_xattr, .scrub = xfs_scrub_xattr, }, [XFS_SCRUB_TYPE_SYMLINK] = { /* symbolic link */ + .type = ST_INODE, .setup = xfs_scrub_setup_symlink, .scrub = xfs_scrub_symlink, }, [XFS_SCRUB_TYPE_PARENT] = { /* parent pointers */ + .type = ST_INODE, .setup = xfs_scrub_setup_parent, .scrub = xfs_scrub_parent, }, [XFS_SCRUB_TYPE_RTBITMAP] = { /* realtime bitmap */ + .type = ST_FS, .setup = xfs_scrub_setup_rt, .scrub = xfs_scrub_rtbitmap, .has = xfs_sb_version_hasrealtime, }, [XFS_SCRUB_TYPE_RTSUM] = { /* realtime summary */ + .type = ST_FS, .setup = xfs_scrub_setup_rt, .scrub = xfs_scrub_rtsummary, .has = xfs_sb_version_hasrealtime, }, [XFS_SCRUB_TYPE_UQUOTA] = { /* user quota */ - .setup = xfs_scrub_setup_quota, - .scrub = xfs_scrub_quota, + .type = ST_FS, + .setup = xfs_scrub_setup_quota, + .scrub = xfs_scrub_quota, }, [XFS_SCRUB_TYPE_GQUOTA] = { /* group quota */ - .setup = xfs_scrub_setup_quota, - .scrub = xfs_scrub_quota, + .type = ST_FS, + .setup = xfs_scrub_setup_quota, + .scrub = xfs_scrub_quota, }, [XFS_SCRUB_TYPE_PQUOTA] = { /* project quota */ - .setup = xfs_scrub_setup_quota, - .scrub = xfs_scrub_quota, + .type = ST_FS, + .setup = xfs_scrub_setup_quota, + .scrub = xfs_scrub_quota, }, }; @@ -297,6 +319,7 @@ xfs_scrub_validate_inputs( sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT; if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN) goto out; + /* sm_reserved[] must be zero */ if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved))) goto out; @@ -311,6 +334,27 @@ xfs_scrub_validate_inputs( if (ops->has && !ops->has(&mp->m_sb)) goto out; + error = -EINVAL; + /* restricting fields must be appropriate for type */ + switch (ops->type) { + case ST_NONE: + case ST_FS: + if (sm->sm_ino || sm->sm_gen || sm->sm_agno) + goto out; + break; + case ST_PERAG: + if (sm->sm_ino || sm->sm_gen || + sm->sm_agno >= mp->m_sb.sb_agcount) + goto out; + break; + case ST_INODE: + if (sm->sm_agno || (sm->sm_gen && !sm->sm_ino)) + goto out; + break; + default: + goto out; + } + error = -EOPNOTSUPP; /* * We won't scrub any filesystem that doesn't have the ability diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index e9ec041cf713..2a7961405f02 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -22,6 +22,14 @@ struct xfs_scrub_context; +/* Type info and names for the scrub types. */ +enum xfs_scrub_type { + ST_NONE = 1, /* disabled */ + ST_PERAG, /* per-AG metadata */ + ST_FS, /* per-FS metadata */ + ST_INODE, /* per-inode metadata */ +}; + struct xfs_scrub_meta_ops { /* Acquire whatever resources are needed for the operation. */ int (*setup)(struct xfs_scrub_context *, @@ -32,6 +40,9 @@ struct xfs_scrub_meta_ops { /* Decide if we even have this piece of metadata. */ bool (*has)(struct xfs_sb *); + + /* type describing required/allowed inputs */ + enum xfs_scrub_type type; }; /* Buffer pointers and btree cursors for an entire AG. */ -- cgit v1.2.3 From 29c1c123a3f84e31005b86eb852d935daacbacdf Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:41:35 -0800 Subject: xfs: scrub inode nsec fields Check that the nanosecond fields in each timestamp aren't larger than a billion. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/inode.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index f120fb20452f..2be4b259477a 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -392,6 +392,14 @@ xfs_scrub_dinode( break; } + /* di_[amc]time.nsec */ + if (be32_to_cpu(dip->di_atime.t_nsec) >= NSEC_PER_SEC) + xfs_scrub_ino_set_corrupt(sc, ino, bp); + if (be32_to_cpu(dip->di_mtime.t_nsec) >= NSEC_PER_SEC) + xfs_scrub_ino_set_corrupt(sc, ino, bp); + if (be32_to_cpu(dip->di_ctime.t_nsec) >= NSEC_PER_SEC) + xfs_scrub_ino_set_corrupt(sc, ino, bp); + /* * di_size. xfs_dinode_verify checks for things that screw up * the VFS such as the upper bit being set and zero-length @@ -495,6 +503,8 @@ xfs_scrub_dinode( } if (dip->di_version >= 3) { + if (be32_to_cpu(dip->di_crtime.t_nsec) >= NSEC_PER_SEC) + xfs_scrub_ino_set_corrupt(sc, ino, bp); xfs_scrub_inode_flags2(sc, bp, dip, ino, mode, flags, flags2); xfs_scrub_inode_cowextsize(sc, bp, dip, ino, mode, flags, flags2); -- cgit v1.2.3 From 2c8f6265397642bb150d933fa46b1f7c294f4ffe Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 8 Jan 2018 10:41:35 -0800 Subject: xfs: print transaction log reservation on overrun The transaction dump code displays the content and reservation consumption of a particular transaction in the event of an overrun. It currently displays the reservation associated with the transaction ticket, but not the original reservation attached to the transaction. The latter value reflects the original transaction reservation calculation before additional reservation overhead is assigned, such as for the CIL context header and potential split region headers. Update xlog_print_trans() to also print the original transaction reservation in the event of overrun. This provides a reference point to identify how much reservation overhead was added to a particular ticket by xfs_log_calc_unit_res(). Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index a503af96d780..047df85528b0 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -2117,7 +2117,9 @@ xlog_print_trans( /* dump core transaction and ticket info */ xfs_warn(mp, "transaction summary:"); - xfs_warn(mp, " flags = 0x%x", tp->t_flags); + xfs_warn(mp, " log res = %d", tp->t_log_res); + xfs_warn(mp, " log count = %d", tp->t_log_count); + xfs_warn(mp, " flags = 0x%x", tp->t_flags); xlog_print_tic_res(mp, tp->t_ticket); -- cgit v1.2.3 From a6f485908d5210a5662f7a031bd1deeb3867e466 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 8 Jan 2018 10:41:36 -0800 Subject: xfs: include inobt buffers in ifree tx log reservation The tr_ifree transaction handles inode unlinks and inode chunk frees. The current transaction calculation does not accurately reflect worst case changes to the inode btree, however. The inobt portion of the current transaction reservation only covers modification of a single inobt buffer (for the particular inode record). This is a historical artifact from the days before XFS supported full inode chunk removal. When support for inode chunk removal was added in commit 254f6311ed1b ("Implement deletion of inode clusters in XFS."), the additional log reservation required for chunk removal was not added correctly. The new reservation only considered the header overhead of associated buffers rather than the full contents of the btrees and AGF and AGFL buffers affected by the transaction. The reservation for the free space btrees was subsequently fixed up in commit 5fe6abb82f76 ("Add space for inode and allocation btrees to ITRUNCATE log reservation"), but the res. for full inobt joins has never been added. Further review of the ifree reservation uncovered a couple more problems: - The undocumented +2 blocks are intended for the AGF and AGFL, but are also not sized correctly and should be logged as full sectors (not FSBs). - The additional single block header is undocumented and serves no apparent purpose. Update xfs_calc_ifree_reservation() to include a full inobt join in the reservation calculation. Refactor the undocumented blocks appropriately and fix up the comments to reflect the current calculation. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_trans_resv.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 6bd916bd35e2..838566b85622 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -490,10 +490,9 @@ xfs_calc_symlink_reservation( /* * In freeing an inode we can modify: * the inode being freed: inode size - * the super block free inode counter: sector size - * the agi hash list and counters: sector size - * the inode btree entry: block size - * the on disk inode before ours in the agi hash list: inode cluster size + * the super block free inode counter, AGF and AGFL: sector size + * the on disk inode (agi unlinked list removal) + * the inode chunk is marked stale (headers only) * the inode btree: max depth * blocksize * the allocation btrees: 2 trees * (max depth - 1) * block size * the finobt (record insertion, removal or modification) @@ -504,12 +503,10 @@ xfs_calc_ifree_reservation( { return XFS_DQUOT_LOGRES(mp) + xfs_calc_inode_res(mp, 1) + - xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + xfs_calc_iunlink_remove_reservation(mp) + - xfs_calc_buf_res(1, 0) + - xfs_calc_buf_res(2 + mp->m_ialloc_blks + - mp->m_in_maxlevels, 0) + + xfs_calc_buf_res(mp->m_ialloc_blks, 0) + + xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), XFS_FSB_TO_B(mp, 1)) + xfs_calc_finobt_res(mp, 0, 1); -- cgit v1.2.3 From e8341d9f6348640dff01d8c4a33695dc82bab5a3 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 8 Jan 2018 10:41:36 -0800 Subject: xfs: fix up agi unlinked list reservations The current AGI unlinked list addition and removal reservations do not reflect the worst case log usage. An unlinked list removal can log up to two on-disk inode clusters but only includes reservation for one. An unlinked list addition logs the on-disk cluster but includes reservation for an in-core inode. Update the AGI unlinked list reservation helpers to calculate the correct worst case reservation for the associated operations. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_trans_resv.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 838566b85622..173b1bc13ffe 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -282,13 +282,14 @@ xfs_calc_rename_reservation( * For removing an inode from unlinked list at first, we can modify: * the agi hash list and counters: sector size * the on disk inode before ours in the agi hash list: inode cluster size + * the on disk inode in the agi hash list: inode cluster size */ STATIC uint xfs_calc_iunlink_remove_reservation( struct xfs_mount *mp) { return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + - max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); + 2 * max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); } /* @@ -320,13 +321,13 @@ xfs_calc_link_reservation( /* * For adding an inode to unlinked list we can modify: * the agi hash list: sector size - * the unlinked inode: inode size + * the on disk inode: inode cluster size */ STATIC uint xfs_calc_iunlink_add_reservation(xfs_mount_t *mp) { return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + - xfs_calc_inode_res(mp, 1); + max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); } /* -- cgit v1.2.3 From a606ebdb859e78beb757dfefa08001df366e2ef5 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 8 Jan 2018 10:41:37 -0800 Subject: xfs: truncate transaction does not modify the inobt The truncate transaction does not ever modify the inode btree, but includes an associated log reservation. Update xfs_calc_itruncate_reservation() to remove the reservation associated with inobt updates. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_trans_resv.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 173b1bc13ffe..037a1295d289 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -232,8 +232,6 @@ xfs_calc_write_reservation( * the super block to reflect the freed blocks: sector size * worst case split in allocation btrees per extent assuming 4 extents: * 4 exts * 2 trees * (2 * max depth - 1) * block size - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (max depth - 1) * block size */ STATIC uint xfs_calc_itruncate_reservation( @@ -245,12 +243,7 @@ xfs_calc_itruncate_reservation( XFS_FSB_TO_B(mp, 1))), (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(5, 0) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(2 + mp->m_ialloc_blks + - mp->m_in_maxlevels, 0))); + XFS_FSB_TO_B(mp, 1)))); } /* -- cgit v1.2.3 From f03c78f39710995d2766236f229295d91b8de9dd Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 8 Jan 2018 10:41:37 -0800 Subject: xfs: include an allocfree res for inobt modifications Analysis of recent reports of log reservation overruns and code inspection has uncovered that the reservations associated with inode operations may not cover the worst case scenarios. In particular, many cases only include one allocfree res. for a particular operation even though said operations may also entail AGFL fixups and inode btree block allocations in addition to the actual inode chunk allocation. This can easily turn into two or three block allocations (or frees) per operation. In theory, the only way to define the worst case reservation is to include an allocfree res for each individual allocation in a transaction. Since that is impractical (we can perform multiple agfl fixups per tx and not every allocation results in a full tree operation), we need to find a reasonable compromise that addresses the deficiency in practice without blowing out the size of the transactions. Since the inode btrees are not filled by the AGFL, record insertion and removal can directly result in block allocations and frees depending on the shape of the tree. These allocations and frees occur in the same transaction context as the inobt update itself, but are separate from the allocation/free that might be required for an inode chunk. Therefore, it makes sense to assume that an [f]inobt insert/remove can directly result in one or more block allocations on behalf of the tree. Refactor the inode transaction reservations to include one allocfree res. per inode btree modification to cover allocations required by the tree itself. This separates the reservation required to allocate the inode chunk from the reservation required for inobt record insertion/removal. Apply the same logic to the finobt. This results in killing off the finobt modify condition because we no longer assume that the broader transaction reservation will cover finobt block allocations and finobt shape changes can occur in either of the inobt allocation or modify situations. Suggested-by: Dave Chinner Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_trans_resv.c | 84 +++++++++++++++++++++--------------------- 1 file changed, 43 insertions(+), 41 deletions(-) diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 037a1295d289..19f3a226a357 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -132,44 +132,43 @@ xfs_calc_inode_res( } /* - * The free inode btree is a conditional feature and the log reservation - * requirements differ slightly from that of the traditional inode allocation - * btree. The finobt tracks records for inode chunks with at least one free - * inode. A record can be removed from the tree for an inode allocation - * or free and thus the finobt reservation is unconditional across: + * Inode btree record insertion/removal modifies the inode btree and free space + * btrees (since the inobt does not use the agfl). This requires the following + * reservation: * - * - inode allocation - * - inode free - * - inode chunk allocation + * the inode btree: max depth * blocksize + * the allocation btrees: 2 trees * (max depth - 1) * block size * - * The 'modify' param indicates to include the record modification scenario. The - * 'alloc' param indicates to include the reservation for free space btree - * modifications on behalf of finobt modifications. This is required only for - * transactions that do not already account for free space btree modifications. + * The caller must account for SB and AG header modifications, etc. + */ +STATIC uint +xfs_calc_inobt_res( + struct xfs_mount *mp) +{ + return xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), + XFS_FSB_TO_B(mp, 1)); +} + +/* + * The free inode btree is a conditional feature. The behavior differs slightly + * from that of the traditional inode btree in that the finobt tracks records + * for inode chunks with at least one free inode. A record can be removed from + * the tree during individual inode allocation. Therefore the finobt + * reservation is unconditional for both the inode chunk allocation and + * individual inode allocation (modify) cases. * - * the free inode btree: max depth * block size - * the allocation btrees: 2 trees * (max depth - 1) * block size - * the free inode btree entry: block size + * Behavior aside, the reservation for finobt modification is equivalent to the + * traditional inobt: cover a full finobt shape change plus block allocation. */ STATIC uint xfs_calc_finobt_res( - struct xfs_mount *mp, - int alloc, - int modify) + struct xfs_mount *mp) { - uint res; - if (!xfs_sb_version_hasfinobt(&mp->m_sb)) return 0; - res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)); - if (alloc) - res += xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), - XFS_FSB_TO_B(mp, 1)); - if (modify) - res += (uint)XFS_FSB_TO_B(mp, 1); - - return res; + return xfs_calc_inobt_res(mp); } /* @@ -373,7 +372,7 @@ xfs_calc_create_resv_modify( xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + (uint)XFS_FSB_TO_B(mp, 1) + xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) + - xfs_calc_finobt_res(mp, 1, 1); + xfs_calc_finobt_res(mp); } /* @@ -381,8 +380,8 @@ xfs_calc_create_resv_modify( * the agi and agf of the ag getting the new inodes: 2 * sectorsize * the superblock for the nlink flag: sector size * the inode blocks allocated: mp->m_ialloc_blks * blocksize - * the inode btree: max depth * blocksize * the allocation btrees: 2 trees * (max depth - 1) * block size + * the inode btree (record insertion) */ STATIC uint xfs_calc_create_resv_alloc( @@ -391,9 +390,9 @@ xfs_calc_create_resv_alloc( return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + mp->m_sb.sb_sectsize + xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), - XFS_FSB_TO_B(mp, 1)); + XFS_FSB_TO_B(mp, 1)) + + xfs_calc_inobt_res(mp); } STATIC uint @@ -409,8 +408,8 @@ __xfs_calc_create_reservation( * For icreate we can allocate some inodes giving: * the agi and agf of the ag getting the new inodes: 2 * sectorsize * the superblock for the nlink flag: sector size - * the inode btree: max depth * blocksize * the allocation btrees: 2 trees * (max depth - 1) * block size + * the inobt (record insertion) * the finobt (record insertion) */ STATIC uint @@ -419,10 +418,10 @@ xfs_calc_icreate_resv_alloc( { return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + mp->m_sb.sb_sectsize + - xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), XFS_FSB_TO_B(mp, 1)) + - xfs_calc_finobt_res(mp, 0, 0); + xfs_calc_inobt_res(mp) + + xfs_calc_finobt_res(mp); } STATIC uint @@ -487,9 +486,14 @@ xfs_calc_symlink_reservation( * the super block free inode counter, AGF and AGFL: sector size * the on disk inode (agi unlinked list removal) * the inode chunk is marked stale (headers only) - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (max depth - 1) * block size + * the inode btree * the finobt (record insertion, removal or modification) + * + * Note that the allocfree res. for the inode chunk itself is not included + * because the extent free occurs after a transaction roll. We could take the + * maximum of the pre/post roll operations, but the pre-roll reservation already + * includes at least one allocfree res. for the inobt and is thus guaranteed to + * be larger. */ STATIC uint xfs_calc_ifree_reservation( @@ -500,10 +504,8 @@ xfs_calc_ifree_reservation( xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + xfs_calc_iunlink_remove_reservation(mp) + xfs_calc_buf_res(mp->m_ialloc_blks, 0) + - xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_finobt_res(mp, 0, 1); + xfs_calc_inobt_res(mp) + + xfs_calc_finobt_res(mp); } /* -- cgit v1.2.3 From 57af33e451b73f56feb428f5856cdf6e4e0c60cd Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 8 Jan 2018 10:41:38 -0800 Subject: xfs: refactor inode chunk alloc/free tx reservation The reservation for the various forms of inode allocation is scattered across several different functions. This includes two variants of chunk allocation (v5 icreate transactions vs. older create transactions) and the inode free transaction. To clean up some of this code and clarify the purpose of specific allocfree reservations, continue the pattern of defining helper functions for smaller operational units of broader transactions. Refactor the reservation into an inode chunk alloc/free helper that considers the various conditions based on filesystem format. An inode chunk free involves an extent free and buffer invalidations. The latter requires reservation for log headers only. An inode chunk allocation modifies the free space btrees and logs the chunk on v4 supers. v5 supers initialize the inode chunk using ordered buffers and so do not log the chunk. As a side effect of this refactoring, add one more allocfree res to the ifree transaction. Technically this does not serve a specific purpose because inode chunks are freed via deferred operations and thus occur after a transaction roll. tr_ifree has a bit of a history of tx overruns caused by too many agfl fixups during sustained file deletion workloads, so add this extra reservation as a form of padding nonetheless. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_trans_resv.c | 64 ++++++++++++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 15 deletions(-) diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 19f3a226a357..75259a1346eb 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -34,6 +34,9 @@ #include "xfs_trans_space.h" #include "xfs_trace.h" +#define _ALLOC true +#define _FREE false + /* * A buffer has a format structure overhead in the log in addition * to the data, so we need to take this into account when reserving @@ -171,6 +174,41 @@ xfs_calc_finobt_res( return xfs_calc_inobt_res(mp); } +/* + * Calculate the reservation required to allocate or free an inode chunk. This + * includes: + * + * the allocation btrees: 2 trees * (max depth - 1) * block size + * the inode chunk: m_ialloc_blks * N + * + * The size N of the inode chunk reservation depends on whether it is for + * allocation or free and which type of create transaction is in use. An inode + * chunk free always invalidates the buffers and only requires reservation for + * headers (N == 0). An inode chunk allocation requires a chunk sized + * reservation on v4 and older superblocks to initialize the chunk. No chunk + * reservation is required for allocation on v5 supers, which use ordered + * buffers to initialize. + */ +STATIC uint +xfs_calc_inode_chunk_res( + struct xfs_mount *mp, + bool alloc) +{ + uint res, size = 0; + + res = xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), + XFS_FSB_TO_B(mp, 1)); + if (alloc) { + /* icreate tx uses ordered buffers */ + if (xfs_sb_version_hascrc(&mp->m_sb)) + return res; + size = XFS_FSB_TO_B(mp, 1); + } + + res += xfs_calc_buf_res(mp->m_ialloc_blks, size); + return res; +} + /* * Various log reservation values. * @@ -379,8 +417,7 @@ xfs_calc_create_resv_modify( * For create we can allocate some inodes giving: * the agi and agf of the ag getting the new inodes: 2 * sectorsize * the superblock for the nlink flag: sector size - * the inode blocks allocated: mp->m_ialloc_blks * blocksize - * the allocation btrees: 2 trees * (max depth - 1) * block size + * the inode chunk (allocation/init) * the inode btree (record insertion) */ STATIC uint @@ -389,9 +426,7 @@ xfs_calc_create_resv_alloc( { return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + mp->m_sb.sb_sectsize + - xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), - XFS_FSB_TO_B(mp, 1)) + + xfs_calc_inode_chunk_res(mp, _ALLOC) + xfs_calc_inobt_res(mp); } @@ -408,7 +443,7 @@ __xfs_calc_create_reservation( * For icreate we can allocate some inodes giving: * the agi and agf of the ag getting the new inodes: 2 * sectorsize * the superblock for the nlink flag: sector size - * the allocation btrees: 2 trees * (max depth - 1) * block size + * the inode chunk (allocation, no init) * the inobt (record insertion) * the finobt (record insertion) */ @@ -418,8 +453,7 @@ xfs_calc_icreate_resv_alloc( { return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + mp->m_sb.sb_sectsize + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), - XFS_FSB_TO_B(mp, 1)) + + xfs_calc_inode_chunk_res(mp, _ALLOC) + xfs_calc_inobt_res(mp) + xfs_calc_finobt_res(mp); } @@ -485,15 +519,15 @@ xfs_calc_symlink_reservation( * the inode being freed: inode size * the super block free inode counter, AGF and AGFL: sector size * the on disk inode (agi unlinked list removal) - * the inode chunk is marked stale (headers only) + * the inode chunk (invalidated, headers only) * the inode btree * the finobt (record insertion, removal or modification) * - * Note that the allocfree res. for the inode chunk itself is not included - * because the extent free occurs after a transaction roll. We could take the - * maximum of the pre/post roll operations, but the pre-roll reservation already - * includes at least one allocfree res. for the inobt and is thus guaranteed to - * be larger. + * Note that the inode chunk res. includes an allocfree res. for freeing of the + * inode chunk. This is technically extraneous because the inode chunk free is + * deferred (it occurs after a transaction roll). Include the extra reservation + * anyways since we've had reports of ifree transaction overruns due to too many + * agfl fixups during inode chunk frees. */ STATIC uint xfs_calc_ifree_reservation( @@ -503,7 +537,7 @@ xfs_calc_ifree_reservation( xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + xfs_calc_iunlink_remove_reservation(mp) + - xfs_calc_buf_res(mp->m_ialloc_blks, 0) + + xfs_calc_inode_chunk_res(mp, _FREE) + xfs_calc_inobt_res(mp) + xfs_calc_finobt_res(mp); } -- cgit v1.2.3 From c017cb5ddfd6326032570d5eba83308c8a9c13a9 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 8 Jan 2018 10:41:38 -0800 Subject: xfs: eliminate duplicate icreate tx reservation functions The create transaction reservation calculation has two different branches of code depending on whether the filesystem is a v5 format fs or older. Each branch considers the max reservation between the allocation case (new chunk allocation + record insert) and the modify case (chunk exists, record modification) of inode allocation. The modify case is the same for both superblock versions with the exception of the finobt. The finobt helper checks the feature bit, however, and so the modify case already shares the same code. Now that inode chunk allocation has been refactored into a helper that checks the superblock version to calculate the appropriate reservation for the create transaction, the only remaining difference between the create and icreate branches is the call to the finobt helper. As noted above, the finobt helper is a no-op when the feature is not enabled. Therefore, these branches are effectively duplicate and can be condensed. Remove the xfs_calc_create_*() branch of functions and update the various callers to use the xfs_calc_icreate_*() variant. The latter creates the same reservation size for v4 create transactions as the removed branch. As such, this patch does not result in transaction reservation changes. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_trans_resv.c | 52 +++++------------------------------------- 1 file changed, 6 insertions(+), 46 deletions(-) diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 75259a1346eb..5f17641f040f 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -413,39 +413,13 @@ xfs_calc_create_resv_modify( xfs_calc_finobt_res(mp); } -/* - * For create we can allocate some inodes giving: - * the agi and agf of the ag getting the new inodes: 2 * sectorsize - * the superblock for the nlink flag: sector size - * the inode chunk (allocation/init) - * the inode btree (record insertion) - */ -STATIC uint -xfs_calc_create_resv_alloc( - struct xfs_mount *mp) -{ - return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + - mp->m_sb.sb_sectsize + - xfs_calc_inode_chunk_res(mp, _ALLOC) + - xfs_calc_inobt_res(mp); -} - -STATIC uint -__xfs_calc_create_reservation( - struct xfs_mount *mp) -{ - return XFS_DQUOT_LOGRES(mp) + - MAX(xfs_calc_create_resv_alloc(mp), - xfs_calc_create_resv_modify(mp)); -} - /* * For icreate we can allocate some inodes giving: * the agi and agf of the ag getting the new inodes: 2 * sectorsize * the superblock for the nlink flag: sector size - * the inode chunk (allocation, no init) + * the inode chunk (allocation, optional init) * the inobt (record insertion) - * the finobt (record insertion) + * the finobt (optional, record insertion) */ STATIC uint xfs_calc_icreate_resv_alloc( @@ -466,27 +440,13 @@ xfs_calc_icreate_reservation(xfs_mount_t *mp) xfs_calc_create_resv_modify(mp)); } -STATIC uint -xfs_calc_create_reservation( - struct xfs_mount *mp) -{ - if (xfs_sb_version_hascrc(&mp->m_sb)) - return xfs_calc_icreate_reservation(mp); - return __xfs_calc_create_reservation(mp); - -} - STATIC uint xfs_calc_create_tmpfile_reservation( struct xfs_mount *mp) { uint res = XFS_DQUOT_LOGRES(mp); - if (xfs_sb_version_hascrc(&mp->m_sb)) - res += xfs_calc_icreate_resv_alloc(mp); - else - res += xfs_calc_create_resv_alloc(mp); - + res += xfs_calc_icreate_resv_alloc(mp); return res + xfs_calc_iunlink_add_reservation(mp); } @@ -497,7 +457,7 @@ STATIC uint xfs_calc_mkdir_reservation( struct xfs_mount *mp) { - return xfs_calc_create_reservation(mp); + return xfs_calc_icreate_reservation(mp); } @@ -510,7 +470,7 @@ STATIC uint xfs_calc_symlink_reservation( struct xfs_mount *mp) { - return xfs_calc_create_reservation(mp) + + return xfs_calc_icreate_reservation(mp) + xfs_calc_buf_res(1, XFS_SYMLINK_MAXLEN); } @@ -869,7 +829,7 @@ xfs_trans_resv_calc( resp->tr_symlink.tr_logcount = XFS_SYMLINK_LOG_COUNT; resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - resp->tr_create.tr_logres = xfs_calc_create_reservation(mp); + resp->tr_create.tr_logres = xfs_calc_icreate_reservation(mp); resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT; resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES; -- cgit v1.2.3 From 5a9d929d6e13278df62bd9e3d3ceae8c87ad1eea Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:41:39 -0800 Subject: iomap: report collisions between directio and buffered writes to userspace If two programs simultaneously try to write to the same part of a file via direct IO and buffered IO, there's a chance that the post-diowrite pagecache invalidation will fail on the dirty page. When this happens, the dio write succeeded, which means that the page cache is no longer coherent with the disk! Programs are not supposed to mix IO types and this is a clear case of data corruption, so store an EIO which will be reflected to userspace during the next fsync. Replace the WARN_ON with a ratelimited pr_crit so that the developers have /some/ kind of breadcrumb to track down the offending program(s) and file(s) involved. Signed-off-by: Darrick J. Wong Reviewed-by: Liu Bo --- fs/direct-io.c | 24 +++++++++++++++++++++++- fs/iomap.c | 12 ++++++++++-- include/linux/fs.h | 1 + 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/fs/direct-io.c b/fs/direct-io.c index 3aafb3343a65..a0ca9e48e993 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -219,6 +219,27 @@ static inline struct page *dio_get_page(struct dio *dio, return dio->pages[sdio->head]; } +/* + * Warn about a page cache invalidation failure during a direct io write. + */ +void dio_warn_stale_pagecache(struct file *filp) +{ + static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST); + char pathname[128]; + struct inode *inode = file_inode(filp); + char *path; + + errseq_set(&inode->i_mapping->wb_err, -EIO); + if (__ratelimit(&_rs)) { + path = file_path(filp, pathname, sizeof(pathname)); + if (IS_ERR(path)) + path = "(unknown)"; + pr_crit("Page cache invalidation failure on direct I/O. Possible data corruption due to collision with buffered I/O!\n"); + pr_crit("File: %s PID: %d Comm: %.20s\n", path, current->pid, + current->comm); + } +} + /** * dio_complete() - called when all DIO BIO I/O has been completed * @offset: the byte offset in the file of the completed operation @@ -290,7 +311,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags) err = invalidate_inode_pages2_range(dio->inode->i_mapping, offset >> PAGE_SHIFT, (offset + ret - 1) >> PAGE_SHIFT); - WARN_ON_ONCE(err); + if (err) + dio_warn_stale_pagecache(dio->iocb->ki_filp); } if (!(dio->flags & DIO_SKIP_DIO_COUNT)) diff --git a/fs/iomap.c b/fs/iomap.c index 47d29ccffaef..e5de7725f18a 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -753,7 +753,8 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio) err = invalidate_inode_pages2_range(inode->i_mapping, offset >> PAGE_SHIFT, (offset + dio->size - 1) >> PAGE_SHIFT); - WARN_ON_ONCE(err); + if (err) + dio_warn_stale_pagecache(iocb->ki_filp); } inode_dio_end(file_inode(iocb->ki_filp)); @@ -1018,9 +1019,16 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (ret) goto out_free_dio; + /* + * Try to invalidate cache pages for the range we're direct + * writing. If this invalidation fails, tough, the write will + * still work, but racing two incompatible write paths is a + * pretty crazy thing to do, so we don't support it 100%. + */ ret = invalidate_inode_pages2_range(mapping, start >> PAGE_SHIFT, end >> PAGE_SHIFT); - WARN_ON_ONCE(ret); + if (ret) + dio_warn_stale_pagecache(iocb->ki_filp); ret = 0; if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) && diff --git a/include/linux/fs.h b/include/linux/fs.h index 511fbaabf624..7f8d96d68f34 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2992,6 +2992,7 @@ enum { }; void dio_end_io(struct bio *bio); +void dio_warn_stale_pagecache(struct file *filp); ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, struct block_device *bdev, struct iov_iter *iter, -- cgit v1.2.3 From 5a0f433745ead12f485da2213fa0c473ed613a45 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:49:02 -0800 Subject: xfs: ignore agfl read errors when not scrubbing agfl In xfs_scrub_ag_read_headers, if we're not scrubbing the AGFL but hit a read error reading the AGFL, we should reset the error code so that it doesn't propagate up into the caller. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 98452ad58cff..6ec4e1013ac0 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -302,7 +302,7 @@ xfs_scrub_ag_read_headers( error = xfs_alloc_read_agfl(mp, sc->tp, agno, agfl); if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL)) goto out; - + error = 0; out: return error; } -- cgit v1.2.3 From e5b37faa932d79f0c277badd0566317718648ffc Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:49:02 -0800 Subject: xfs: catch a few more error codes when scrubbing secondary sb The superblock validation routines return a variety of error codes to reject a mount request. For scrub we can assume that the mount succeeded, so if we see these things appear when scrubbing secondary sb X, we can treat them all like corruption. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/agheader.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index b599358c3796..97beb4773298 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -126,6 +126,22 @@ xfs_scrub_superblock( error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp, XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_sb_buf_ops); + /* + * The superblock verifier can return several different error codes + * if it thinks the superblock doesn't look right. For a mount these + * would all get bounced back to userspace, but if we're here then the + * fs mounted successfully, which means that this secondary superblock + * is simply incorrect. Treat all these codes the same way we treat + * any corruption. + */ + switch (error) { + case -EINVAL: /* also -EWRONGFS */ + case -ENOSYS: + case -EFBIG: + error = -EFSCORRUPTED; + default: + break; + } if (!xfs_scrub_process_error(sc, agno, XFS_SB_BLOCK(mp), &error)) return error; -- cgit v1.2.3 From 2b9e9b5771fdcff3d307faaade23bd4e965374da Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:49:03 -0800 Subject: xfs: xfs_scrub_bmap should use for_each_xfs_iext Refactor xfs_scrub_bmap to use for_each_xfs_iext now that it exists. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/bmap.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 42fec0bcd9e1..0261e1133901 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -235,7 +235,6 @@ xfs_scrub_bmap( struct xfs_ifork *ifp; xfs_fileoff_t endoff; struct xfs_iext_cursor icur; - bool found; int error = 0; ifp = XFS_IFORK_PTR(ip, whichfork); @@ -314,9 +313,7 @@ xfs_scrub_bmap( /* Scrub extent records. */ info.lastoff = 0; ifp = XFS_IFORK_PTR(ip, whichfork); - for (found = xfs_iext_lookup_extent(ip, ifp, 0, &icur, &irec); - found != 0; - found = xfs_iext_next_extent(ifp, &icur, &irec)) { + for_each_xfs_iext(ifp, &icur, &irec) { if (xfs_scrub_should_terminate(sc, &error)) break; if (isnullstartblock(irec.br_startblock)) -- cgit v1.2.3 From 1ad1205e716fa2a88ca960601cc807cd38853c01 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:49:03 -0800 Subject: xfs: always grab transaction when scrubbing inode Always allocate a transaction for inode scrubbing, even if the _iget fails. This is something that is nice to have now for consistency with the other scrubbers but will become critical when we get to online repair where we'll actually use the transaction + raw buffer read to fix the verifier errors. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 2be4b259477a..61cd1123bc4a 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -64,7 +64,7 @@ xfs_scrub_setup_inode( break; case -EFSCORRUPTED: case -EFSBADCRC: - return 0; + return xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp); default: return error; } -- cgit v1.2.3 From d658e72b4a09c0eb65c13fc5c15bc6e6d93ed8bc Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:49:04 -0800 Subject: xfs: distinguish between corrupt inode and invalid inum in xfs_scrub_get_inode In xfs_scrub_get_inode, we don't do a good enough job distinguishing EINVAL returns from xfs_iget w/ IGET_UNTRUSTED -- this can happen if the passed in inode number is invalid (past eofs, inobt says it isn't an inode) or if the inum is actually valid but the inode buffer fails verifier. In the first case we still want to return ENOENT, but in the second case we want to capture the corruption error. Therefore, if xfs_iget returns EINVAL, try the raw imap lookup. If that succeeds, we conclude it's a corruption error, otherwise we just bounce out to userspace. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/common.c | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 6ec4e1013ac0..d5c37d8d2fe6 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -503,6 +503,7 @@ xfs_scrub_get_inode( struct xfs_scrub_context *sc, struct xfs_inode *ip_in) { + struct xfs_imap imap; struct xfs_mount *mp = sc->mp; struct xfs_inode *ip = NULL; int error; @@ -518,10 +519,33 @@ xfs_scrub_get_inode( return -ENOENT; error = xfs_iget(mp, NULL, sc->sm->sm_ino, XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip); - if (error == -ENOENT || error == -EINVAL) { - /* inode doesn't exist... */ - return -ENOENT; - } else if (error) { + switch (error) { + case -ENOENT: + /* Inode doesn't exist, just bail out. */ + return error; + case 0: + /* Got an inode, continue. */ + break; + case -EINVAL: + /* + * -EINVAL with IGET_UNTRUSTED could mean one of several + * things: userspace gave us an inode number that doesn't + * correspond to fs space, or doesn't have an inobt entry; + * or it could simply mean that the inode buffer failed the + * read verifiers. + * + * Try just the inode mapping lookup -- if it succeeds, then + * the inode buffer verifier failed and something needs fixing. + * Otherwise, we really couldn't find it so tell userspace + * that it no longer exists. + */ + error = xfs_imap(sc->mp, sc->tp, sc->sm->sm_ino, &imap, + XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE); + if (error) + return -ENOENT; + error = -EFSCORRUPTED; + /* fall through */ + default: trace_xfs_scrub_op_error(sc, XFS_INO_TO_AGNO(mp, sc->sm->sm_ino), XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino), -- cgit v1.2.3 From 59f6fec3bdb2aafc84d39f34000819d232182d71 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:51:00 -0800 Subject: xfs: remove XFS_FSB_SANITY_CHECK We already have a function to verify fsb pointers, so get rid of the last users of the (less robust) macro. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_bmap.c | 4 ++-- fs/xfs/libxfs/xfs_bmap_btree.c | 4 ++-- fs/xfs/libxfs/xfs_btree.c | 2 +- fs/xfs/libxfs/xfs_btree.h | 4 ---- 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 1bddbba6b80c..140744700b07 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -400,7 +400,7 @@ xfs_bmap_check_leaf_extents( pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); bno = be64_to_cpu(*pp); XFS_WANT_CORRUPTED_GOTO(mp, - XFS_FSB_SANITY_CHECK(mp, bno), error0); + xfs_verify_fsbno(mp, bno), error0); if (bp_release) { bp_release = 0; xfs_trans_brelse(NULL, bp); @@ -1220,7 +1220,7 @@ xfs_iread_extents( pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); bno = be64_to_cpu(*pp); XFS_WANT_CORRUPTED_GOTO(mp, - XFS_FSB_SANITY_CHECK(mp, bno), out_brelse); + xfs_verify_fsbno(mp, bno), out_brelse); xfs_trans_brelse(tp, bp); } diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index c10aecaaae44..00472e184e7c 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -470,11 +470,11 @@ xfs_bmbt_verify( /* sibling pointer verification */ if (!block->bb_u.l.bb_leftsib || (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && - !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib)))) + !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_leftsib)))) return false; if (!block->bb_u.l.bb_rightsib || (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && - !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib)))) + !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_rightsib)))) return false; return true; diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 5f33adf8eecb..54c0a927a0fa 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -853,7 +853,7 @@ xfs_btree_read_bufl( xfs_daddr_t d; /* real disk block address */ int error; - if (!XFS_FSB_SANITY_CHECK(mp, fsbno)) + if (!xfs_verify_fsbno(mp, fsbno)) return -EFSCORRUPTED; d = XFS_FSB_TO_DADDR(mp, fsbno); error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index b57501c6f71d..b495381b629f 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -473,10 +473,6 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block) #define XFS_FILBLKS_MIN(a,b) min_t(xfs_filblks_t, (a), (b)) #define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b)) -#define XFS_FSB_SANITY_CHECK(mp,fsb) \ - (fsb && XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ - XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks) - /* * Trace hooks. Currently not implemented as they need to be ported * over to the generic tracing functionality, which is some effort. -- cgit v1.2.3 From 8368a6019d5bbb8b56c140029dcf5ea570b638f1 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:51:00 -0800 Subject: xfs: refactor long-format btree header verification routines Create two helper functions to verify the headers of a long format btree block. We'll use this later for the realtime rmapbt. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_bmap_btree.c | 22 ++------------------- fs/xfs/libxfs/xfs_btree.c | 45 ++++++++++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_btree.h | 3 +++ 3 files changed, 50 insertions(+), 20 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 00472e184e7c..862be9c5eb08 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -435,17 +435,11 @@ xfs_bmbt_verify( switch (block->bb_magic) { case cpu_to_be32(XFS_BMAP_CRC_MAGIC): - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; - if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) - return false; - if (be64_to_cpu(block->bb_u.l.bb_blkno) != bp->b_bn) - return false; /* * XXX: need a better way of verifying the owner here. Right now * just make sure there has been one set. */ - if (be64_to_cpu(block->bb_u.l.bb_owner) == 0) + if (!xfs_btree_lblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN)) return false; /* fall through */ case cpu_to_be32(XFS_BMAP_MAGIC): @@ -464,20 +458,8 @@ xfs_bmbt_verify( level = be16_to_cpu(block->bb_level); if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1])) return false; - if (be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0]) - return false; - - /* sibling pointer verification */ - if (!block->bb_u.l.bb_leftsib || - (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && - !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_leftsib)))) - return false; - if (!block->bb_u.l.bb_rightsib || - (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && - !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_rightsib)))) - return false; - return true; + return xfs_btree_lblock_verify(bp, mp->m_bmap_dmxr[level != 0]); } static void diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 54c0a927a0fa..95d4c355c466 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -4529,6 +4529,51 @@ xfs_btree_change_owner( &bbcoi); } +/* Verify the v5 fields of a long-format btree block. */ +bool +xfs_btree_lblock_v5hdr_verify( + struct xfs_buf *bp, + uint64_t owner) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return false; + if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) + return false; + if (block->bb_u.l.bb_blkno != cpu_to_be64(bp->b_bn)) + return false; + if (owner != XFS_RMAP_OWN_UNKNOWN && + be64_to_cpu(block->bb_u.l.bb_owner) != owner) + return false; + return true; +} + +/* Verify a long-format btree block. */ +bool +xfs_btree_lblock_verify( + struct xfs_buf *bp, + unsigned int max_recs) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + + /* numrecs verification */ + if (be16_to_cpu(block->bb_numrecs) > max_recs) + return false; + + /* sibling pointer verification */ + if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && + !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))) + return false; + if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && + !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))) + return false; + + return true; +} + /** * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format * btree block diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index b495381b629f..5f86ee14cc66 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -494,6 +494,9 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block) bool xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp); bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs); +bool xfs_btree_lblock_v5hdr_verify(struct xfs_buf *bp, uint64_t owner); +bool xfs_btree_lblock_verify(struct xfs_buf *bp, unsigned int max_recs); + uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits, unsigned long len); xfs_extlen_t xfs_btree_calc_size(struct xfs_mount *mp, uint *limits, -- cgit v1.2.3 From e1e55aaf1cc646b736439cbd5af229759029ae34 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:51:01 -0800 Subject: xfs: refactor short form btree pointer verification Now that we have xfs_verify_agbno, use it to verify short form btree pointers instead of open-coding them. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_btree.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 95d4c355c466..33908a62aa39 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -4614,19 +4614,19 @@ xfs_btree_sblock_verify( { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + xfs_agblock_t agno; /* numrecs verification */ if (be16_to_cpu(block->bb_numrecs) > max_recs) return false; /* sibling pointer verification */ - if (!block->bb_u.s.bb_leftsib || - (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks && - block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK))) + agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp)); + if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) && + !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_leftsib))) return false; - if (!block->bb_u.s.bb_rightsib || - (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks && - block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK))) + if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) && + !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_rightsib))) return false; return true; -- cgit v1.2.3 From 9101d3707b9acae8bbb0d82d47e99cf5c60b3ee5 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:51:01 -0800 Subject: xfs: remove XFS_WANT_CORRUPTED_RETURN from dir3 data verifiers Since __xfs_dir3_data_check verifies on-disk metadata, we can't have it noisily blowing asserts and hanging the system on corrupt data coming in off the disk. Instead, have it return a boolean like all the other checker functions, and only have it noisily fail if we fail in debug mode. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_dir2_block.c | 4 +- fs/xfs/libxfs/xfs_dir2_data.c | 100 +++++++++++++++++++++-------------------- fs/xfs/libxfs/xfs_dir2_priv.h | 10 ++++- 3 files changed, 61 insertions(+), 53 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index 43c902f7a68d..e4272494b17f 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -78,9 +78,7 @@ xfs_dir3_block_verify( if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) return false; } - if (__xfs_dir3_data_check(NULL, bp)) - return false; - return true; + return __xfs_dir3_data_check(NULL, bp); } static void diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index 8727a43115ef..af15f7052f3a 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -36,9 +36,9 @@ /* * Check the consistency of the data block. * The input can also be a block-format directory. - * Return 0 is the buffer is good, otherwise an error. + * Return true if the buffer is good. */ -int +bool __xfs_dir3_data_check( struct xfs_inode *dp, /* incore inode pointer */ struct xfs_buf *bp) /* data block's buffer */ @@ -90,16 +90,16 @@ __xfs_dir3_data_check( * so just ensure that the count falls somewhere inside the * block right now. */ - XFS_WANT_CORRUPTED_RETURN(mp, be32_to_cpu(btp->count) < - ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry)); + if (be32_to_cpu(btp->count) >= + ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry)) + return false; break; case cpu_to_be32(XFS_DIR3_DATA_MAGIC): case cpu_to_be32(XFS_DIR2_DATA_MAGIC): endp = (char *)hdr + geo->blksize; break; default: - XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp); - return -EFSCORRUPTED; + return false; } /* @@ -108,22 +108,25 @@ __xfs_dir3_data_check( bf = ops->data_bestfree_p(hdr); count = lastfree = freeseen = 0; if (!bf[0].length) { - XFS_WANT_CORRUPTED_RETURN(mp, !bf[0].offset); + if (bf[0].offset) + return false; freeseen |= 1 << 0; } if (!bf[1].length) { - XFS_WANT_CORRUPTED_RETURN(mp, !bf[1].offset); + if (bf[1].offset) + return false; freeseen |= 1 << 1; } if (!bf[2].length) { - XFS_WANT_CORRUPTED_RETURN(mp, !bf[2].offset); + if (bf[2].offset) + return false; freeseen |= 1 << 2; } - XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[0].length) >= - be16_to_cpu(bf[1].length)); - XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[1].length) >= - be16_to_cpu(bf[2].length)); + if (be16_to_cpu(bf[0].length) < be16_to_cpu(bf[1].length)) + return false; + if (be16_to_cpu(bf[1].length) < be16_to_cpu(bf[2].length)) + return false; /* * Loop over the data/unused entries. */ @@ -135,22 +138,23 @@ __xfs_dir3_data_check( * doesn't need to be there. */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { - XFS_WANT_CORRUPTED_RETURN(mp, lastfree == 0); - XFS_WANT_CORRUPTED_RETURN(mp, endp >= - p + be16_to_cpu(dup->length)); - XFS_WANT_CORRUPTED_RETURN(mp, - be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == - (char *)dup - (char *)hdr); + if (lastfree != 0) + return false; + if (endp < p + be16_to_cpu(dup->length)) + return false; + if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) != + (char *)dup - (char *)hdr) + return false; dfp = xfs_dir2_data_freefind(hdr, bf, dup); if (dfp) { i = (int)(dfp - bf); - XFS_WANT_CORRUPTED_RETURN(mp, - (freeseen & (1 << i)) == 0); + if ((freeseen & (1 << i)) != 0) + return false; freeseen |= 1 << i; } else { - XFS_WANT_CORRUPTED_RETURN(mp, - be16_to_cpu(dup->length) <= - be16_to_cpu(bf[2].length)); + if (be16_to_cpu(dup->length) > + be16_to_cpu(bf[2].length)) + return false; } p += be16_to_cpu(dup->length); lastfree = 1; @@ -163,16 +167,17 @@ __xfs_dir3_data_check( * The linear search is crude but this is DEBUG code. */ dep = (xfs_dir2_data_entry_t *)p; - XFS_WANT_CORRUPTED_RETURN(mp, dep->namelen != 0); - XFS_WANT_CORRUPTED_RETURN(mp, - !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))); - XFS_WANT_CORRUPTED_RETURN(mp, endp >= - p + ops->data_entsize(dep->namelen)); - XFS_WANT_CORRUPTED_RETURN(mp, - be16_to_cpu(*ops->data_entry_tag_p(dep)) == - (char *)dep - (char *)hdr); - XFS_WANT_CORRUPTED_RETURN(mp, - ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX); + if (dep->namelen == 0) + return false; + if (xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))) + return false; + if (endp < p + ops->data_entsize(dep->namelen)) + return false; + if (be16_to_cpu(*ops->data_entry_tag_p(dep)) != + (char *)dep - (char *)hdr) + return false; + if (ops->data_get_ftype(dep) >= XFS_DIR3_FT_MAX) + return false; count++; lastfree = 0; if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || @@ -188,31 +193,32 @@ __xfs_dir3_data_check( be32_to_cpu(lep[i].hashval) == hash) break; } - XFS_WANT_CORRUPTED_RETURN(mp, - i < be32_to_cpu(btp->count)); + if (i >= be32_to_cpu(btp->count)) + return false; } p += ops->data_entsize(dep->namelen); } /* * Need to have seen all the entries and all the bestfree slots. */ - XFS_WANT_CORRUPTED_RETURN(mp, freeseen == 7); + if (freeseen != 7) + return false; if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { for (i = stale = 0; i < be32_to_cpu(btp->count); i++) { if (lep[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) stale++; - if (i > 0) - XFS_WANT_CORRUPTED_RETURN(mp, - be32_to_cpu(lep[i].hashval) >= - be32_to_cpu(lep[i - 1].hashval)); + if (i > 0 && be32_to_cpu(lep[i].hashval) < + be32_to_cpu(lep[i - 1].hashval)) + return false; } - XFS_WANT_CORRUPTED_RETURN(mp, count == - be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)); - XFS_WANT_CORRUPTED_RETURN(mp, stale == be32_to_cpu(btp->stale)); + if (count != be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)) + return false; + if (stale != be32_to_cpu(btp->stale)) + return false; } - return 0; + return true; } static bool @@ -235,9 +241,7 @@ xfs_dir3_data_verify( if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC)) return false; } - if (__xfs_dir3_data_check(NULL, bp)) - return false; - return true; + return __xfs_dir3_data_check(NULL, bp); } /* diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h index 4badd26c47e6..45c68d04219d 100644 --- a/fs/xfs/libxfs/xfs_dir2_priv.h +++ b/fs/xfs/libxfs/xfs_dir2_priv.h @@ -39,12 +39,18 @@ extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args, /* xfs_dir2_data.c */ #ifdef DEBUG -#define xfs_dir3_data_check(dp,bp) __xfs_dir3_data_check(dp, bp); +#define xfs_dir3_data_check(dp, bp) \ +do { \ + if (!__xfs_dir3_data_check((dp), (bp))) { \ + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, \ + (bp)->b_target->bt_mount, (bp)->b_addr); \ + } \ +} while (0) #else #define xfs_dir3_data_check(dp,bp) #endif -extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp); +extern bool __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp); extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp); extern int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno, -- cgit v1.2.3 From 31ca03c92c329525ee3a97d99c47f1ebbaed5d63 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:51:02 -0800 Subject: xfs: refactor xfs_verifier_error and xfs_buf_ioerror Since all verification errors also mark the buffer as having an error, we can combine these two calls. Later we'll add a xfs_failaddr_t parameter to promote the idea of reporting corruption errors and the address of the failing check to enable better debugging reports. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc.c | 20 ++++++-------------- fs/xfs/libxfs/xfs_alloc_btree.c | 11 ++++------- fs/xfs/libxfs/xfs_attr_leaf.c | 10 +++------- fs/xfs/libxfs/xfs_attr_remote.c | 24 +++++++++++------------- fs/xfs/libxfs/xfs_bmap_btree.c | 11 ++++------- fs/xfs/libxfs/xfs_da_btree.c | 16 +++++----------- fs/xfs/libxfs/xfs_dir2_block.c | 10 +++------- fs/xfs/libxfs/xfs_dir2_data.c | 15 +++++---------- fs/xfs/libxfs/xfs_dir2_leaf.c | 10 +++------- fs/xfs/libxfs/xfs_dir2_node.c | 13 ++++--------- fs/xfs/libxfs/xfs_dquot_buf.c | 10 +++------- fs/xfs/libxfs/xfs_ialloc.c | 10 +++------- fs/xfs/libxfs/xfs_ialloc_btree.c | 11 ++++------- fs/xfs/libxfs/xfs_inode_buf.c | 3 +-- fs/xfs/libxfs/xfs_refcount_btree.c | 11 ++++------- fs/xfs/libxfs/xfs_rmap_btree.c | 11 ++++------- fs/xfs/libxfs/xfs_sb.c | 10 ++++------ fs/xfs/libxfs/xfs_symlink_remote.c | 10 +++------- fs/xfs/xfs_buf.c | 7 ++++--- fs/xfs/xfs_buf.h | 4 +++- fs/xfs/xfs_error.c | 5 ++++- fs/xfs/xfs_error.h | 2 +- fs/xfs/xfs_trace.h | 6 +++--- 23 files changed, 89 insertions(+), 151 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 83ed7715f856..055ffa8b7a52 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -567,12 +567,9 @@ xfs_agfl_read_verify( return; if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); + xfs_verifier_error(bp, -EFSBADCRC); else if (!xfs_agfl_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); } static void @@ -587,8 +584,7 @@ xfs_agfl_write_verify( return; if (!xfs_agfl_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); return; } @@ -2461,13 +2457,10 @@ xfs_agf_read_verify( if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); + xfs_verifier_error(bp, -EFSBADCRC); else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp, XFS_ERRTAG_ALLOC_READ_AGF)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); } static void @@ -2478,8 +2471,7 @@ xfs_agf_write_verify( struct xfs_buf_log_item *bip = bp->b_fspriv; if (!xfs_agf_verify(mp, bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); return; } diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index cfde0a0f9706..752d6aa10e54 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -364,14 +364,12 @@ xfs_allocbt_read_verify( struct xfs_buf *bp) { if (!xfs_btree_sblock_verify_crc(bp)) - xfs_buf_ioerror(bp, -EFSBADCRC); + xfs_verifier_error(bp, -EFSBADCRC); else if (!xfs_allocbt_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSCORRUPTED); - if (bp->b_error) { + if (bp->b_error) trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp); - } } static void @@ -380,8 +378,7 @@ xfs_allocbt_write_verify( { if (!xfs_allocbt_verify(bp)) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); return; } xfs_btree_sblock_calc_crc(bp); diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 601eaa36f1ad..6a9805ad95cb 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -297,8 +297,7 @@ xfs_attr3_leaf_write_verify( struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; if (!xfs_attr3_leaf_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); return; } @@ -325,12 +324,9 @@ xfs_attr3_leaf_read_verify( if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); + xfs_verifier_error(bp, -EFSBADCRC); else if (!xfs_attr3_leaf_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); } const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = { diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index d56caf037ca0..231b569bb230 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -137,22 +137,20 @@ xfs_attr3_rmt_read_verify( while (len > 0) { if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { - xfs_buf_ioerror(bp, -EFSBADCRC); - break; + xfs_verifier_error(bp, -EFSBADCRC); + return; } if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - break; + xfs_verifier_error(bp, -EFSCORRUPTED); + return; } len -= blksize; ptr += blksize; bno += BTOBB(blksize); } - if (bp->b_error) - xfs_verifier_error(bp); - else - ASSERT(len == 0); + if (len != 0) + xfs_verifier_error(bp, -EFSCORRUPTED); } static void @@ -178,8 +176,7 @@ xfs_attr3_rmt_write_verify( struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr; if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); return; } @@ -188,8 +185,7 @@ xfs_attr3_rmt_write_verify( * xfs_attr3_rmt_hdr_set() for the explanation. */ if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); return; } xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); @@ -198,7 +194,9 @@ xfs_attr3_rmt_write_verify( ptr += blksize; bno += BTOBB(blksize); } - ASSERT(len == 0); + + if (len != 0) + xfs_verifier_error(bp, -EFSCORRUPTED); } const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = { diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 862be9c5eb08..b6d61c7161d1 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -467,14 +467,12 @@ xfs_bmbt_read_verify( struct xfs_buf *bp) { if (!xfs_btree_lblock_verify_crc(bp)) - xfs_buf_ioerror(bp, -EFSBADCRC); + xfs_verifier_error(bp, -EFSBADCRC); else if (!xfs_bmbt_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSCORRUPTED); - if (bp->b_error) { + if (bp->b_error) trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp); - } } static void @@ -483,8 +481,7 @@ xfs_bmbt_write_verify( { if (!xfs_bmbt_verify(bp)) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); return; } xfs_btree_lblock_calc_crc(bp); diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 651611530d2f..27b5ed375e61 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -186,8 +186,7 @@ xfs_da3_node_write_verify( struct xfs_da3_node_hdr *hdr3 = bp->b_addr; if (!xfs_da3_node_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); return; } @@ -215,15 +214,13 @@ xfs_da3_node_read_verify( switch (be16_to_cpu(info->magic)) { case XFS_DA3_NODE_MAGIC: if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) { - xfs_buf_ioerror(bp, -EFSBADCRC); + xfs_verifier_error(bp, -EFSBADCRC); break; } /* fall through */ case XFS_DA_NODE_MAGIC: - if (!xfs_da3_node_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - break; - } + if (!xfs_da3_node_verify(bp)) + xfs_verifier_error(bp, -EFSCORRUPTED); return; case XFS_ATTR_LEAF_MAGIC: case XFS_ATTR3_LEAF_MAGIC: @@ -236,12 +233,9 @@ xfs_da3_node_read_verify( bp->b_ops->verify_read(bp); return; default: - xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSCORRUPTED); break; } - - /* corrupt block */ - xfs_verifier_error(bp); } const struct xfs_buf_ops xfs_da3_node_buf_ops = { diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index e4272494b17f..a5d95c9ddd36 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -89,12 +89,9 @@ xfs_dir3_block_read_verify( if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); + xfs_verifier_error(bp, -EFSBADCRC); else if (!xfs_dir3_block_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); } static void @@ -106,8 +103,7 @@ xfs_dir3_block_write_verify( struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; if (!xfs_dir3_block_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); return; } diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index af15f7052f3a..e1546fb33393 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -267,8 +267,7 @@ xfs_dir3_data_reada_verify( bp->b_ops->verify_read(bp); return; default: - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); break; } } @@ -280,13 +279,10 @@ xfs_dir3_data_read_verify( struct xfs_mount *mp = bp->b_target->bt_mount; if (xfs_sb_version_hascrc(&mp->m_sb) && - !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); + !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) + xfs_verifier_error(bp, -EFSBADCRC); else if (!xfs_dir3_data_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); } static void @@ -298,8 +294,7 @@ xfs_dir3_data_write_verify( struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; if (!xfs_dir3_data_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); return; } diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index 27297a689d9c..071f879cc114 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -184,12 +184,9 @@ __read_verify( if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); + xfs_verifier_error(bp, -EFSBADCRC); else if (!xfs_dir3_leaf_verify(bp, magic)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); } static void @@ -202,8 +199,7 @@ __write_verify( struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; if (!xfs_dir3_leaf_verify(bp, magic)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); return; } diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 682e2bf370c7..888301d22c45 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -118,12 +118,9 @@ xfs_dir3_free_read_verify( if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); + xfs_verifier_error(bp, -EFSBADCRC); else if (!xfs_dir3_free_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); } static void @@ -135,8 +132,7 @@ xfs_dir3_free_write_verify( struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; if (!xfs_dir3_free_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); return; } @@ -209,8 +205,7 @@ __xfs_dir3_free_read( /* Check things that we can't do in the verifier. */ if (!xfs_dir3_free_header_check(dp, fbno, *bpp)) { - xfs_buf_ioerror(*bpp, -EFSCORRUPTED); - xfs_verifier_error(*bpp); + xfs_verifier_error(*bpp, -EFSCORRUPTED); xfs_trans_brelse(tp, *bpp); return -EFSCORRUPTED; } diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index 747085b4ef44..a3f1eccc614b 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -249,12 +249,9 @@ xfs_dquot_buf_read_verify( struct xfs_mount *mp = bp->b_target->bt_mount; if (!xfs_dquot_buf_verify_crc(mp, bp)) - xfs_buf_ioerror(bp, -EFSBADCRC); + xfs_verifier_error(bp, -EFSBADCRC); else if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); } /* @@ -288,8 +285,7 @@ xfs_dquot_buf_write_verify( struct xfs_mount *mp = bp->b_target->bt_mount; if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); return; } } diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 3b57ef0f2f76..3a41e852206e 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -2544,13 +2544,10 @@ xfs_agi_read_verify( if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); + xfs_verifier_error(bp, -EFSBADCRC); else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp, XFS_ERRTAG_IALLOC_READ_AGI)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); } static void @@ -2561,8 +2558,7 @@ xfs_agi_write_verify( struct xfs_buf_log_item *bip = bp->b_fspriv; if (!xfs_agi_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); return; } diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 317caba9faa6..d86bdce6652a 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -294,14 +294,12 @@ xfs_inobt_read_verify( struct xfs_buf *bp) { if (!xfs_btree_sblock_verify_crc(bp)) - xfs_buf_ioerror(bp, -EFSBADCRC); + xfs_verifier_error(bp, -EFSBADCRC); else if (!xfs_inobt_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSCORRUPTED); - if (bp->b_error) { + if (bp->b_error) trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp); - } } static void @@ -310,8 +308,7 @@ xfs_inobt_write_verify( { if (!xfs_inobt_verify(bp)) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); return; } xfs_btree_sblock_calc_crc(bp); diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 6b7989038d75..b625f65bc412 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -113,8 +113,7 @@ xfs_inode_buf_verify( return; } - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); #ifdef DEBUG xfs_alert(mp, "bad inode magic/vsn daddr %lld #%d (magic=%x)", diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 3c59dd3d58d7..ce940ebe230c 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -255,14 +255,12 @@ xfs_refcountbt_read_verify( struct xfs_buf *bp) { if (!xfs_btree_sblock_verify_crc(bp)) - xfs_buf_ioerror(bp, -EFSBADCRC); + xfs_verifier_error(bp, -EFSBADCRC); else if (!xfs_refcountbt_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSCORRUPTED); - if (bp->b_error) { + if (bp->b_error) trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp); - } } STATIC void @@ -271,8 +269,7 @@ xfs_refcountbt_write_verify( { if (!xfs_refcountbt_verify(bp)) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); return; } xfs_btree_sblock_calc_crc(bp); diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 9d9c9192584c..6325908463c2 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -347,14 +347,12 @@ xfs_rmapbt_read_verify( struct xfs_buf *bp) { if (!xfs_btree_sblock_verify_crc(bp)) - xfs_buf_ioerror(bp, -EFSBADCRC); + xfs_verifier_error(bp, -EFSBADCRC); else if (!xfs_rmapbt_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSCORRUPTED); - if (bp->b_error) { + if (bp->b_error) trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp); - } } static void @@ -363,8 +361,7 @@ xfs_rmapbt_write_verify( { if (!xfs_rmapbt_verify(bp)) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); return; } xfs_btree_sblock_calc_crc(bp); diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 9b5aae2bcc0b..96a82672989a 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -640,11 +640,10 @@ xfs_sb_read_verify( error = xfs_sb_verify(bp, true); out_error: - if (error) { + if (error == -EFSCORRUPTED || error == -EFSBADCRC) + xfs_verifier_error(bp, error); + else if (error) xfs_buf_ioerror(bp, error); - if (error == -EFSCORRUPTED || error == -EFSBADCRC) - xfs_verifier_error(bp); - } } /* @@ -678,8 +677,7 @@ xfs_sb_write_verify( error = xfs_sb_verify(bp, false); if (error) { - xfs_buf_ioerror(bp, error); - xfs_verifier_error(bp); + xfs_verifier_error(bp, error); return; } diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index c484877129a0..58c21a6e0eff 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -135,12 +135,9 @@ xfs_symlink_read_verify( return; if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); + xfs_verifier_error(bp, -EFSBADCRC); else if (!xfs_symlink_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); } static void @@ -155,8 +152,7 @@ xfs_symlink_write_verify( return; if (!xfs_symlink_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED); return; } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 4c6e86d861fd..45987a278930 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1180,13 +1180,14 @@ xfs_buf_ioend_async( } void -xfs_buf_ioerror( +__xfs_buf_ioerror( xfs_buf_t *bp, - int error) + int error, + xfs_failaddr_t failaddr) { ASSERT(error <= 0 && error >= -1000); bp->b_error = error; - trace_xfs_buf_ioerror(bp, error, _RET_IP_); + trace_xfs_buf_ioerror(bp, error, failaddr); } void diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index f873bb786824..6f907a365f85 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -315,7 +315,9 @@ extern void xfs_buf_unlock(xfs_buf_t *); /* Buffer Read and Write Routines */ extern int xfs_bwrite(struct xfs_buf *bp); extern void xfs_buf_ioend(struct xfs_buf *bp); -extern void xfs_buf_ioerror(xfs_buf_t *, int); +extern void __xfs_buf_ioerror(struct xfs_buf *bp, int error, + xfs_failaddr_t failaddr); +#define xfs_buf_ioerror(bp, err) __xfs_buf_ioerror((bp), (err), __this_address) extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); extern void xfs_buf_submit(struct xfs_buf *bp); extern int xfs_buf_submit_wait(struct xfs_buf *bp); diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 4c9f35d983b2..c0a98c42033d 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -347,10 +347,13 @@ xfs_corruption_error( */ void xfs_verifier_error( - struct xfs_buf *bp) + struct xfs_buf *bp, + int error) { struct xfs_mount *mp = bp->b_target->bt_mount; + __xfs_buf_ioerror(bp, error, __return_address); + xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx", bp->b_error == -EFSBADCRC ? "CRC error" : "corruption", __return_address, bp->b_ops->name, bp->b_bn); diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index ea816c1bf8db..598756281d8d 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -25,7 +25,7 @@ extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp, extern void xfs_corruption_error(const char *tag, int level, struct xfs_mount *mp, void *p, const char *filename, int linenum, void *ra); -extern void xfs_verifier_error(struct xfs_buf *bp); +extern void xfs_verifier_error(struct xfs_buf *bp, int error); #define XFS_ERROR_REPORT(e, lvl, mp) \ xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address) diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index d718a10c2271..9235b2c29695 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -390,7 +390,7 @@ DEFINE_BUF_FLAGS_EVENT(xfs_buf_get); DEFINE_BUF_FLAGS_EVENT(xfs_buf_read); TRACE_EVENT(xfs_buf_ioerror, - TP_PROTO(struct xfs_buf *bp, int error, unsigned long caller_ip), + TP_PROTO(struct xfs_buf *bp, int error, xfs_failaddr_t caller_ip), TP_ARGS(bp, error, caller_ip), TP_STRUCT__entry( __field(dev_t, dev) @@ -401,7 +401,7 @@ TRACE_EVENT(xfs_buf_ioerror, __field(int, pincount) __field(unsigned, lockval) __field(int, error) - __field(unsigned long, caller_ip) + __field(xfs_failaddr_t, caller_ip) ), TP_fast_assign( __entry->dev = bp->b_target->bt_dev; @@ -415,7 +415,7 @@ TRACE_EVENT(xfs_buf_ioerror, __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " - "lock %d error %d flags %s caller %ps", + "lock %d error %d flags %s caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->bno, __entry->buffer_length, -- cgit v1.2.3 From a6a781a58befcbd467ce843af4eaca3906aa1f08 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:51:03 -0800 Subject: xfs: have buffer verifier functions report failing address Modify each function that checks the contents of a metadata buffer to return the instruction address of the failing test so that we can report more precise failure errors to the log. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc.c | 43 ++++++++++---------- fs/xfs/libxfs/xfs_alloc_btree.c | 27 +++++++------ fs/xfs/libxfs/xfs_attr_leaf.c | 20 ++++----- fs/xfs/libxfs/xfs_attr_remote.c | 36 ++++++++--------- fs/xfs/libxfs/xfs_bmap_btree.c | 16 ++++---- fs/xfs/libxfs/xfs_btree.c | 46 ++++++++++----------- fs/xfs/libxfs/xfs_btree.h | 11 +++-- fs/xfs/libxfs/xfs_da_btree.c | 26 ++++++------ fs/xfs/libxfs/xfs_dir2_block.c | 16 ++++---- fs/xfs/libxfs/xfs_dir2_data.c | 83 +++++++++++++++++++++++--------------- fs/xfs/libxfs/xfs_dir2_leaf.c | 55 ++++++++++++++----------- fs/xfs/libxfs/xfs_dir2_node.c | 61 ++++++++++++++++------------ fs/xfs/libxfs/xfs_dir2_priv.h | 16 +++----- fs/xfs/libxfs/xfs_ialloc.c | 22 +++++----- fs/xfs/libxfs/xfs_ialloc_btree.c | 16 ++++---- fs/xfs/libxfs/xfs_inode_buf.c | 36 +++++++++-------- fs/xfs/libxfs/xfs_inode_buf.h | 4 +- fs/xfs/libxfs/xfs_refcount_btree.c | 20 ++++----- fs/xfs/libxfs/xfs_rmap_btree.c | 20 ++++----- fs/xfs/libxfs/xfs_symlink_remote.c | 22 +++++----- fs/xfs/scrub/inode.c | 2 +- 21 files changed, 323 insertions(+), 275 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 055ffa8b7a52..4d7ef74f5783 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -520,7 +520,7 @@ xfs_alloc_fixup_trees( return 0; } -static bool +static xfs_failaddr_t xfs_agfl_verify( struct xfs_buf *bp) { @@ -529,9 +529,9 @@ xfs_agfl_verify( int i; if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC) - return false; + return __this_address; /* * during growfs operations, the perag is not fully initialised, * so we can't use it for any useful checking. growfs ensures we can't @@ -539,16 +539,17 @@ xfs_agfl_verify( * so we can detect and avoid this problem. */ if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno) - return false; + return __this_address; for (i = 0; i < XFS_AGFL_SIZE(mp); i++) { if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK && be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) - return false; + return __this_address; } - return xfs_log_check_lsn(mp, - be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)); + if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn))) + return __this_address; + return NULL; } static void @@ -568,7 +569,7 @@ xfs_agfl_read_verify( if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) xfs_verifier_error(bp, -EFSBADCRC); - else if (!xfs_agfl_verify(bp)) + else if (xfs_agfl_verify(bp)) xfs_verifier_error(bp, -EFSCORRUPTED); } @@ -583,7 +584,7 @@ xfs_agfl_write_verify( if (!xfs_sb_version_hascrc(&mp->m_sb)) return; - if (!xfs_agfl_verify(bp)) { + if (xfs_agfl_verify(bp)) { xfs_verifier_error(bp, -EFSCORRUPTED); return; } @@ -2393,7 +2394,7 @@ xfs_alloc_put_freelist( return 0; } -static bool +static xfs_failaddr_t xfs_agf_verify( struct xfs_mount *mp, struct xfs_buf *bp) @@ -2402,10 +2403,10 @@ xfs_agf_verify( if (xfs_sb_version_hascrc(&mp->m_sb)) { if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn))) - return false; + return __this_address; } if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && @@ -2414,18 +2415,18 @@ xfs_agf_verify( be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp))) - return false; + return __this_address; if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 || be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS || be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS) - return false; + return __this_address; if (xfs_sb_version_hasrmapbt(&mp->m_sb) && (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 || be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)) - return false; + return __this_address; /* * during growfs operations, the perag is not fully initialised, @@ -2434,18 +2435,18 @@ xfs_agf_verify( * so we can detect and avoid this problem. */ if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno) - return false; + return __this_address; if (xfs_sb_version_haslazysbcount(&mp->m_sb) && be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length)) - return false; + return __this_address; if (xfs_sb_version_hasreflink(&mp->m_sb) && (be32_to_cpu(agf->agf_refcount_level) < 1 || be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS)) - return false; + return __this_address; - return true;; + return NULL; } @@ -2458,7 +2459,7 @@ xfs_agf_read_verify( if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) xfs_verifier_error(bp, -EFSBADCRC); - else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp, + else if (XFS_TEST_ERROR(xfs_agf_verify(mp, bp), mp, XFS_ERRTAG_ALLOC_READ_AGF)) xfs_verifier_error(bp, -EFSCORRUPTED); } @@ -2470,7 +2471,7 @@ xfs_agf_write_verify( struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_buf_log_item *bip = bp->b_fspriv; - if (!xfs_agf_verify(mp, bp)) { + if (xfs_agf_verify(mp, bp)) { xfs_verifier_error(bp, -EFSCORRUPTED); return; } diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 752d6aa10e54..4383c05df8ca 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -307,13 +307,14 @@ xfs_cntbt_diff_two_keys( be32_to_cpu(k2->alloc.ar_startblock); } -static bool +static xfs_failaddr_t xfs_allocbt_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_perag *pag = bp->b_pag; + xfs_failaddr_t fa; unsigned int level; /* @@ -331,29 +332,31 @@ xfs_allocbt_verify( level = be16_to_cpu(block->bb_level); switch (block->bb_magic) { case cpu_to_be32(XFS_ABTB_CRC_MAGIC): - if (!xfs_btree_sblock_v5hdr_verify(bp)) - return false; + fa = xfs_btree_sblock_v5hdr_verify(bp); + if (fa) + return fa; /* fall through */ case cpu_to_be32(XFS_ABTB_MAGIC): if (pag && pag->pagf_init) { if (level >= pag->pagf_levels[XFS_BTNUM_BNOi]) - return false; + return __this_address; } else if (level >= mp->m_ag_maxlevels) - return false; + return __this_address; break; case cpu_to_be32(XFS_ABTC_CRC_MAGIC): - if (!xfs_btree_sblock_v5hdr_verify(bp)) - return false; + fa = xfs_btree_sblock_v5hdr_verify(bp); + if (fa) + return fa; /* fall through */ case cpu_to_be32(XFS_ABTC_MAGIC): if (pag && pag->pagf_init) { if (level >= pag->pagf_levels[XFS_BTNUM_CNTi]) - return false; + return __this_address; } else if (level >= mp->m_ag_maxlevels) - return false; + return __this_address; break; default: - return false; + return __this_address; } return xfs_btree_sblock_verify(bp, mp->m_alloc_mxr[level != 0]); @@ -365,7 +368,7 @@ xfs_allocbt_read_verify( { if (!xfs_btree_sblock_verify_crc(bp)) xfs_verifier_error(bp, -EFSBADCRC); - else if (!xfs_allocbt_verify(bp)) + else if (xfs_allocbt_verify(bp)) xfs_verifier_error(bp, -EFSCORRUPTED); if (bp->b_error) @@ -376,7 +379,7 @@ static void xfs_allocbt_write_verify( struct xfs_buf *bp) { - if (!xfs_allocbt_verify(bp)) { + if (xfs_allocbt_verify(bp)) { trace_xfs_btree_corrupt(bp, _RET_IP_); xfs_verifier_error(bp, -EFSCORRUPTED); return; diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 6a9805ad95cb..f6281e100469 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -247,7 +247,7 @@ xfs_attr3_leaf_hdr_to_disk( } } -static bool +static xfs_failaddr_t xfs_attr3_leaf_verify( struct xfs_buf *bp) { @@ -262,17 +262,17 @@ xfs_attr3_leaf_verify( struct xfs_da3_node_hdr *hdr3 = bp->b_addr; if (ichdr.magic != XFS_ATTR3_LEAF_MAGIC) - return false; + return __this_address; if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn))) - return false; + return __this_address; } else { if (ichdr.magic != XFS_ATTR_LEAF_MAGIC) - return false; + return __this_address; } /* * In recovery there is a transient state where count == 0 is valid @@ -280,12 +280,12 @@ xfs_attr3_leaf_verify( * if the attr didn't fit in shortform. */ if (pag && pag->pagf_init && ichdr.count == 0) - return false; + return __this_address; /* XXX: need to range check rest of attr header values */ /* XXX: hash order check? */ - return true; + return NULL; } static void @@ -296,7 +296,7 @@ xfs_attr3_leaf_write_verify( struct xfs_buf_log_item *bip = bp->b_fspriv; struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; - if (!xfs_attr3_leaf_verify(bp)) { + if (xfs_attr3_leaf_verify(bp)) { xfs_verifier_error(bp, -EFSCORRUPTED); return; } @@ -325,7 +325,7 @@ xfs_attr3_leaf_read_verify( if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF)) xfs_verifier_error(bp, -EFSBADCRC); - else if (!xfs_attr3_leaf_verify(bp)) + else if (xfs_attr3_leaf_verify(bp)) xfs_verifier_error(bp, -EFSCORRUPTED); } diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 231b569bb230..06858db972ed 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -65,7 +65,7 @@ xfs_attr3_rmt_blocks( * does CRC, location and bounds checking, the unpacking function checks the * attribute parameters and owner. */ -static bool +static xfs_failaddr_t xfs_attr3_rmt_hdr_ok( void *ptr, xfs_ino_t ino, @@ -76,19 +76,19 @@ xfs_attr3_rmt_hdr_ok( struct xfs_attr3_rmt_hdr *rmt = ptr; if (bno != be64_to_cpu(rmt->rm_blkno)) - return false; + return __this_address; if (offset != be32_to_cpu(rmt->rm_offset)) - return false; + return __this_address; if (size != be32_to_cpu(rmt->rm_bytes)) - return false; + return __this_address; if (ino != be64_to_cpu(rmt->rm_owner)) - return false; + return __this_address; /* ok */ - return true; + return NULL; } -static bool +static xfs_failaddr_t xfs_attr3_rmt_verify( struct xfs_mount *mp, void *ptr, @@ -98,22 +98,22 @@ xfs_attr3_rmt_verify( struct xfs_attr3_rmt_hdr *rmt = ptr; if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; + return __this_address; if (rmt->rm_magic != cpu_to_be32(XFS_ATTR3_RMT_MAGIC)) - return false; + return __this_address; if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(rmt->rm_blkno) != bno) - return false; + return __this_address; if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) - return false; + return __this_address; if (be32_to_cpu(rmt->rm_offset) + be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX) - return false; + return __this_address; if (rmt->rm_owner == 0) - return false; + return __this_address; - return true; + return NULL; } static void @@ -140,7 +140,7 @@ xfs_attr3_rmt_read_verify( xfs_verifier_error(bp, -EFSBADCRC); return; } - if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { + if (xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { xfs_verifier_error(bp, -EFSCORRUPTED); return; } @@ -175,7 +175,7 @@ xfs_attr3_rmt_write_verify( while (len > 0) { struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr; - if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { + if (xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { xfs_verifier_error(bp, -EFSCORRUPTED); return; } @@ -267,7 +267,7 @@ xfs_attr_rmtval_copyout( byte_cnt = min(*valuelen, byte_cnt); if (xfs_sb_version_hascrc(&mp->m_sb)) { - if (!xfs_attr3_rmt_hdr_ok(src, ino, *offset, + if (xfs_attr3_rmt_hdr_ok(src, ino, *offset, byte_cnt, bno)) { xfs_alert(mp, "remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)", diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index b6d61c7161d1..0fdae57e7944 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -425,12 +425,13 @@ xfs_bmbt_diff_two_keys( be64_to_cpu(k2->bmbt.br_startoff); } -static bool +static xfs_failaddr_t xfs_bmbt_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + xfs_failaddr_t fa; unsigned int level; switch (block->bb_magic) { @@ -439,13 +440,14 @@ xfs_bmbt_verify( * XXX: need a better way of verifying the owner here. Right now * just make sure there has been one set. */ - if (!xfs_btree_lblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN)) - return false; + fa = xfs_btree_lblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN); + if (fa) + return fa; /* fall through */ case cpu_to_be32(XFS_BMAP_MAGIC): break; default: - return false; + return __this_address; } /* @@ -457,7 +459,7 @@ xfs_bmbt_verify( */ level = be16_to_cpu(block->bb_level); if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1])) - return false; + return __this_address; return xfs_btree_lblock_verify(bp, mp->m_bmap_dmxr[level != 0]); } @@ -468,7 +470,7 @@ xfs_bmbt_read_verify( { if (!xfs_btree_lblock_verify_crc(bp)) xfs_verifier_error(bp, -EFSBADCRC); - else if (!xfs_bmbt_verify(bp)) + else if (xfs_bmbt_verify(bp)) xfs_verifier_error(bp, -EFSCORRUPTED); if (bp->b_error) @@ -479,7 +481,7 @@ static void xfs_bmbt_write_verify( struct xfs_buf *bp) { - if (!xfs_bmbt_verify(bp)) { + if (xfs_bmbt_verify(bp)) { trace_xfs_btree_corrupt(bp, _RET_IP_); xfs_verifier_error(bp, -EFSCORRUPTED); return; diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 33908a62aa39..2b2be1d6c00d 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -329,7 +329,7 @@ xfs_btree_sblock_verify_crc( if (xfs_sb_version_hascrc(&mp->m_sb)) { if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn))) - return false; + return __this_address; return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); } @@ -4530,7 +4530,7 @@ xfs_btree_change_owner( } /* Verify the v5 fields of a long-format btree block. */ -bool +xfs_failaddr_t xfs_btree_lblock_v5hdr_verify( struct xfs_buf *bp, uint64_t owner) @@ -4539,19 +4539,19 @@ xfs_btree_lblock_v5hdr_verify( struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; + return __this_address; if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (block->bb_u.l.bb_blkno != cpu_to_be64(bp->b_bn)) - return false; + return __this_address; if (owner != XFS_RMAP_OWN_UNKNOWN && be64_to_cpu(block->bb_u.l.bb_owner) != owner) - return false; - return true; + return __this_address; + return NULL; } /* Verify a long-format btree block. */ -bool +xfs_failaddr_t xfs_btree_lblock_verify( struct xfs_buf *bp, unsigned int max_recs) @@ -4561,17 +4561,17 @@ xfs_btree_lblock_verify( /* numrecs verification */ if (be16_to_cpu(block->bb_numrecs) > max_recs) - return false; + return __this_address; /* sibling pointer verification */ if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))) - return false; + return __this_address; if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))) - return false; + return __this_address; - return true; + return NULL; } /** @@ -4582,7 +4582,7 @@ xfs_btree_lblock_verify( * @max_recs: pointer to the m_*_mxr max records field in the xfs mount * @pag_max_level: pointer to the per-ag max level field */ -bool +xfs_failaddr_t xfs_btree_sblock_v5hdr_verify( struct xfs_buf *bp) { @@ -4591,14 +4591,14 @@ xfs_btree_sblock_v5hdr_verify( struct xfs_perag *pag = bp->b_pag; if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; + return __this_address; if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) - return false; + return __this_address; if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) - return false; - return true; + return __this_address; + return NULL; } /** @@ -4607,7 +4607,7 @@ xfs_btree_sblock_v5hdr_verify( * @bp: buffer containing the btree block * @max_recs: maximum records allowed in this btree node */ -bool +xfs_failaddr_t xfs_btree_sblock_verify( struct xfs_buf *bp, unsigned int max_recs) @@ -4618,18 +4618,18 @@ xfs_btree_sblock_verify( /* numrecs verification */ if (be16_to_cpu(block->bb_numrecs) > max_recs) - return false; + return __this_address; /* sibling pointer verification */ agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp)); if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) && !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_leftsib))) - return false; + return __this_address; if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) && !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_rightsib))) - return false; + return __this_address; - return true; + return NULL; } /* diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 5f86ee14cc66..2f13b8676f41 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -492,10 +492,13 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block) #define XFS_BTREE_TRACE_ARGR(c, r) #define XFS_BTREE_TRACE_CURSOR(c, t) -bool xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp); -bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs); -bool xfs_btree_lblock_v5hdr_verify(struct xfs_buf *bp, uint64_t owner); -bool xfs_btree_lblock_verify(struct xfs_buf *bp, unsigned int max_recs); +xfs_failaddr_t xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp); +xfs_failaddr_t xfs_btree_sblock_verify(struct xfs_buf *bp, + unsigned int max_recs); +xfs_failaddr_t xfs_btree_lblock_v5hdr_verify(struct xfs_buf *bp, + uint64_t owner); +xfs_failaddr_t xfs_btree_lblock_verify(struct xfs_buf *bp, + unsigned int max_recs); uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits, unsigned long len); diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 27b5ed375e61..60a2572ba4e3 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -128,7 +128,7 @@ xfs_da_state_free(xfs_da_state_t *state) kmem_zone_free(xfs_da_state_zone, state); } -static bool +static xfs_failaddr_t xfs_da3_node_verify( struct xfs_buf *bp) { @@ -145,24 +145,24 @@ xfs_da3_node_verify( struct xfs_da3_node_hdr *hdr3 = bp->b_addr; if (ichdr.magic != XFS_DA3_NODE_MAGIC) - return false; + return __this_address; if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn))) - return false; + return __this_address; } else { if (ichdr.magic != XFS_DA_NODE_MAGIC) - return false; + return __this_address; } if (ichdr.level == 0) - return false; + return __this_address; if (ichdr.level > XFS_DA_NODE_MAXDEPTH) - return false; + return __this_address; if (ichdr.count == 0) - return false; + return __this_address; /* * we don't know if the node is for and attribute or directory tree, @@ -170,11 +170,11 @@ xfs_da3_node_verify( */ if (ichdr.count > mp->m_dir_geo->node_ents && ichdr.count > mp->m_attr_geo->node_ents) - return false; + return __this_address; /* XXX: hash order check? */ - return true; + return NULL; } static void @@ -185,7 +185,7 @@ xfs_da3_node_write_verify( struct xfs_buf_log_item *bip = bp->b_fspriv; struct xfs_da3_node_hdr *hdr3 = bp->b_addr; - if (!xfs_da3_node_verify(bp)) { + if (xfs_da3_node_verify(bp)) { xfs_verifier_error(bp, -EFSCORRUPTED); return; } @@ -219,7 +219,7 @@ xfs_da3_node_read_verify( } /* fall through */ case XFS_DA_NODE_MAGIC: - if (!xfs_da3_node_verify(bp)) + if (xfs_da3_node_verify(bp)) xfs_verifier_error(bp, -EFSCORRUPTED); return; case XFS_ATTR_LEAF_MAGIC: diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index a5d95c9ddd36..3b728794659c 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -58,7 +58,7 @@ xfs_dir_startup(void) xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2); } -static bool +static xfs_failaddr_t xfs_dir3_block_verify( struct xfs_buf *bp) { @@ -67,16 +67,16 @@ xfs_dir3_block_verify( if (xfs_sb_version_hascrc(&mp->m_sb)) { if (hdr3->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) - return false; + return __this_address; if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(hdr3->blkno) != bp->b_bn) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) - return false; + return __this_address; } else { if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) - return false; + return __this_address; } return __xfs_dir3_data_check(NULL, bp); } @@ -90,7 +90,7 @@ xfs_dir3_block_read_verify( if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) xfs_verifier_error(bp, -EFSBADCRC); - else if (!xfs_dir3_block_verify(bp)) + else if (xfs_dir3_block_verify(bp)) xfs_verifier_error(bp, -EFSCORRUPTED); } @@ -102,7 +102,7 @@ xfs_dir3_block_write_verify( struct xfs_buf_log_item *bip = bp->b_fspriv; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; - if (!xfs_dir3_block_verify(bp)) { + if (xfs_dir3_block_verify(bp)) { xfs_verifier_error(bp, -EFSCORRUPTED); return; } diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index e1546fb33393..3eda2474ad0f 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -36,9 +36,9 @@ /* * Check the consistency of the data block. * The input can also be a block-format directory. - * Return true if the buffer is good. + * Return NULL if the buffer is good, otherwise the address of the error. */ -bool +xfs_failaddr_t __xfs_dir3_data_check( struct xfs_inode *dp, /* incore inode pointer */ struct xfs_buf *bp) /* data block's buffer */ @@ -92,14 +92,14 @@ __xfs_dir3_data_check( */ if (be32_to_cpu(btp->count) >= ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry)) - return false; + return __this_address; break; case cpu_to_be32(XFS_DIR3_DATA_MAGIC): case cpu_to_be32(XFS_DIR2_DATA_MAGIC): endp = (char *)hdr + geo->blksize; break; default: - return false; + return __this_address; } /* @@ -109,24 +109,24 @@ __xfs_dir3_data_check( count = lastfree = freeseen = 0; if (!bf[0].length) { if (bf[0].offset) - return false; + return __this_address; freeseen |= 1 << 0; } if (!bf[1].length) { if (bf[1].offset) - return false; + return __this_address; freeseen |= 1 << 1; } if (!bf[2].length) { if (bf[2].offset) - return false; + return __this_address; freeseen |= 1 << 2; } if (be16_to_cpu(bf[0].length) < be16_to_cpu(bf[1].length)) - return false; + return __this_address; if (be16_to_cpu(bf[1].length) < be16_to_cpu(bf[2].length)) - return false; + return __this_address; /* * Loop over the data/unused entries. */ @@ -139,22 +139,22 @@ __xfs_dir3_data_check( */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { if (lastfree != 0) - return false; + return __this_address; if (endp < p + be16_to_cpu(dup->length)) - return false; + return __this_address; if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) != (char *)dup - (char *)hdr) - return false; + return __this_address; dfp = xfs_dir2_data_freefind(hdr, bf, dup); if (dfp) { i = (int)(dfp - bf); if ((freeseen & (1 << i)) != 0) - return false; + return __this_address; freeseen |= 1 << i; } else { if (be16_to_cpu(dup->length) > be16_to_cpu(bf[2].length)) - return false; + return __this_address; } p += be16_to_cpu(dup->length); lastfree = 1; @@ -168,16 +168,16 @@ __xfs_dir3_data_check( */ dep = (xfs_dir2_data_entry_t *)p; if (dep->namelen == 0) - return false; + return __this_address; if (xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))) - return false; + return __this_address; if (endp < p + ops->data_entsize(dep->namelen)) - return false; + return __this_address; if (be16_to_cpu(*ops->data_entry_tag_p(dep)) != (char *)dep - (char *)hdr) - return false; + return __this_address; if (ops->data_get_ftype(dep) >= XFS_DIR3_FT_MAX) - return false; + return __this_address; count++; lastfree = 0; if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || @@ -194,7 +194,7 @@ __xfs_dir3_data_check( break; } if (i >= be32_to_cpu(btp->count)) - return false; + return __this_address; } p += ops->data_entsize(dep->namelen); } @@ -202,7 +202,7 @@ __xfs_dir3_data_check( * Need to have seen all the entries and all the bestfree slots. */ if (freeseen != 7) - return false; + return __this_address; if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { for (i = stale = 0; i < be32_to_cpu(btp->count); i++) { @@ -211,17 +211,34 @@ __xfs_dir3_data_check( stale++; if (i > 0 && be32_to_cpu(lep[i].hashval) < be32_to_cpu(lep[i - 1].hashval)) - return false; + return __this_address; } if (count != be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)) - return false; + return __this_address; if (stale != be32_to_cpu(btp->stale)) - return false; + return __this_address; } - return true; + return NULL; } -static bool +#ifdef DEBUG +void +xfs_dir3_data_check( + struct xfs_inode *dp, + struct xfs_buf *bp) +{ + xfs_failaddr_t fa; + + fa = __xfs_dir3_data_check(dp, bp); + if (!fa) + return; + xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount, + bp->b_addr, __FILE__, __LINE__, fa); + ASSERT(0); +} +#endif + +static xfs_failaddr_t xfs_dir3_data_verify( struct xfs_buf *bp) { @@ -230,16 +247,16 @@ xfs_dir3_data_verify( if (xfs_sb_version_hascrc(&mp->m_sb)) { if (hdr3->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC)) - return false; + return __this_address; if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(hdr3->blkno) != bp->b_bn) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) - return false; + return __this_address; } else { if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC)) - return false; + return __this_address; } return __xfs_dir3_data_check(NULL, bp); } @@ -281,7 +298,7 @@ xfs_dir3_data_read_verify( if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) xfs_verifier_error(bp, -EFSBADCRC); - else if (!xfs_dir3_data_verify(bp)) + else if (xfs_dir3_data_verify(bp)) xfs_verifier_error(bp, -EFSCORRUPTED); } @@ -293,7 +310,7 @@ xfs_dir3_data_write_verify( struct xfs_buf_log_item *bip = bp->b_fspriv; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; - if (!xfs_dir3_data_verify(bp)) { + if (xfs_dir3_data_verify(bp)) { xfs_verifier_error(bp, -EFSCORRUPTED); return; } diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index 071f879cc114..a8694860566b 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -50,13 +50,7 @@ static void xfs_dir3_leaf_log_tail(struct xfs_da_args *args, * Pop an assert if something is wrong. */ #ifdef DEBUG -#define xfs_dir3_leaf_check(dp, bp) \ -do { \ - if (!xfs_dir3_leaf1_check((dp), (bp))) \ - ASSERT(0); \ -} while (0); - -STATIC bool +static xfs_failaddr_t xfs_dir3_leaf1_check( struct xfs_inode *dp, struct xfs_buf *bp) @@ -69,17 +63,32 @@ xfs_dir3_leaf1_check( if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) { struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) - return false; + return __this_address; } else if (leafhdr.magic != XFS_DIR2_LEAF1_MAGIC) - return false; + return __this_address; return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf); } + +static inline void +xfs_dir3_leaf_check( + struct xfs_inode *dp, + struct xfs_buf *bp) +{ + xfs_failaddr_t fa; + + fa = xfs_dir3_leaf1_check(dp, bp); + if (!fa) + return; + xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount, + bp->b_addr, __FILE__, __LINE__, fa); + ASSERT(0); +} #else #define xfs_dir3_leaf_check(dp, bp) #endif -bool +xfs_failaddr_t xfs_dir3_leaf_check_int( struct xfs_mount *mp, struct xfs_inode *dp, @@ -114,27 +123,27 @@ xfs_dir3_leaf_check_int( * We can deduce a value for that from di_size. */ if (hdr->count > ops->leaf_max_ents(geo)) - return false; + return __this_address; /* Leaves and bests don't overlap in leaf format. */ if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC || hdr->magic == XFS_DIR3_LEAF1_MAGIC) && (char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp)) - return false; + return __this_address; /* Check hash value order, count stale entries. */ for (i = stale = 0; i < hdr->count; i++) { if (i + 1 < hdr->count) { if (be32_to_cpu(ents[i].hashval) > be32_to_cpu(ents[i + 1].hashval)) - return false; + return __this_address; } if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) stale++; } if (hdr->stale != stale) - return false; - return true; + return __this_address; + return NULL; } /* @@ -142,7 +151,7 @@ xfs_dir3_leaf_check_int( * kernels we don't get assertion failures in xfs_dir3_leaf_hdr_from_disk() due * to incorrect magic numbers. */ -static bool +static xfs_failaddr_t xfs_dir3_leaf_verify( struct xfs_buf *bp, uint16_t magic) @@ -160,16 +169,16 @@ xfs_dir3_leaf_verify( : XFS_DIR3_LEAFN_MAGIC; if (leaf3->info.hdr.magic != cpu_to_be16(magic3)) - return false; + return __this_address; if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn))) - return false; + return __this_address; } else { if (leaf->hdr.info.magic != cpu_to_be16(magic)) - return false; + return __this_address; } return xfs_dir3_leaf_check_int(mp, NULL, NULL, leaf); @@ -185,7 +194,7 @@ __read_verify( if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF)) xfs_verifier_error(bp, -EFSBADCRC); - else if (!xfs_dir3_leaf_verify(bp, magic)) + else if (xfs_dir3_leaf_verify(bp, magic)) xfs_verifier_error(bp, -EFSCORRUPTED); } @@ -198,7 +207,7 @@ __write_verify( struct xfs_buf_log_item *bip = bp->b_fspriv; struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; - if (!xfs_dir3_leaf_verify(bp, magic)) { + if (xfs_dir3_leaf_verify(bp, magic)) { xfs_verifier_error(bp, -EFSCORRUPTED); return; } diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 888301d22c45..051a21f3ad06 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -53,13 +53,7 @@ static int xfs_dir2_node_addname_int(xfs_da_args_t *args, * Check internal consistency of a leafn block. */ #ifdef DEBUG -#define xfs_dir3_leaf_check(dp, bp) \ -do { \ - if (!xfs_dir3_leafn_check((dp), (bp))) \ - ASSERT(0); \ -} while (0); - -static bool +static xfs_failaddr_t xfs_dir3_leafn_check( struct xfs_inode *dp, struct xfs_buf *bp) @@ -72,17 +66,32 @@ xfs_dir3_leafn_check( if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC) { struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) - return false; + return __this_address; } else if (leafhdr.magic != XFS_DIR2_LEAFN_MAGIC) - return false; + return __this_address; return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf); } + +static inline void +xfs_dir3_leaf_check( + struct xfs_inode *dp, + struct xfs_buf *bp) +{ + xfs_failaddr_t fa; + + fa = xfs_dir3_leafn_check(dp, bp); + if (!fa) + return; + xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount, + bp->b_addr, __FILE__, __LINE__, fa); + ASSERT(0); +} #else #define xfs_dir3_leaf_check(dp, bp) #endif -static bool +static xfs_failaddr_t xfs_dir3_free_verify( struct xfs_buf *bp) { @@ -93,21 +102,21 @@ xfs_dir3_free_verify( struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; if (hdr3->magic != cpu_to_be32(XFS_DIR3_FREE_MAGIC)) - return false; + return __this_address; if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(hdr3->blkno) != bp->b_bn) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) - return false; + return __this_address; } else { if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)) - return false; + return __this_address; } /* XXX: should bounds check the xfs_dir3_icfree_hdr here */ - return true; + return NULL; } static void @@ -119,7 +128,7 @@ xfs_dir3_free_read_verify( if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF)) xfs_verifier_error(bp, -EFSBADCRC); - else if (!xfs_dir3_free_verify(bp)) + else if (xfs_dir3_free_verify(bp)) xfs_verifier_error(bp, -EFSCORRUPTED); } @@ -131,7 +140,7 @@ xfs_dir3_free_write_verify( struct xfs_buf_log_item *bip = bp->b_fspriv; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; - if (!xfs_dir3_free_verify(bp)) { + if (xfs_dir3_free_verify(bp)) { xfs_verifier_error(bp, -EFSCORRUPTED); return; } @@ -170,22 +179,22 @@ xfs_dir3_free_header_check( struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; if (be32_to_cpu(hdr3->firstdb) != firstdb) - return false; + return __this_address; if (be32_to_cpu(hdr3->nvalid) > maxbests) - return false; + return __this_address; if (be32_to_cpu(hdr3->nvalid) < be32_to_cpu(hdr3->nused)) - return false; + return __this_address; } else { struct xfs_dir2_free_hdr *hdr = bp->b_addr; if (be32_to_cpu(hdr->firstdb) != firstdb) - return false; + return __this_address; if (be32_to_cpu(hdr->nvalid) > maxbests) - return false; + return __this_address; if (be32_to_cpu(hdr->nvalid) < be32_to_cpu(hdr->nused)) - return false; + return __this_address; } - return true; + return NULL; } static int @@ -204,7 +213,7 @@ __xfs_dir3_free_read( return err; /* Check things that we can't do in the verifier. */ - if (!xfs_dir3_free_header_check(dp, fbno, *bpp)) { + if (xfs_dir3_free_header_check(dp, fbno, *bpp)) { xfs_verifier_error(*bpp, -EFSCORRUPTED); xfs_trans_brelse(tp, *bpp); return -EFSCORRUPTED; diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h index 45c68d04219d..a15ad7cd8057 100644 --- a/fs/xfs/libxfs/xfs_dir2_priv.h +++ b/fs/xfs/libxfs/xfs_dir2_priv.h @@ -39,18 +39,13 @@ extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args, /* xfs_dir2_data.c */ #ifdef DEBUG -#define xfs_dir3_data_check(dp, bp) \ -do { \ - if (!__xfs_dir3_data_check((dp), (bp))) { \ - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, \ - (bp)->b_target->bt_mount, (bp)->b_addr); \ - } \ -} while (0) +extern void xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp); #else #define xfs_dir3_data_check(dp,bp) #endif -extern bool __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp); +extern xfs_failaddr_t __xfs_dir3_data_check(struct xfs_inode *dp, + struct xfs_buf *bp); extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp); extern int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno, @@ -95,8 +90,9 @@ xfs_dir3_leaf_find_entry(struct xfs_dir3_icleaf_hdr *leafhdr, int lowstale, int highstale, int *lfloglow, int *lfloghigh); extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state); -extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, struct xfs_inode *dp, - struct xfs_dir3_icleaf_hdr *hdr, struct xfs_dir2_leaf *leaf); +extern xfs_failaddr_t xfs_dir3_leaf_check_int(struct xfs_mount *mp, + struct xfs_inode *dp, struct xfs_dir3_icleaf_hdr *hdr, + struct xfs_dir2_leaf *leaf); /* xfs_dir2_node.c */ extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 3a41e852206e..f53f9bc486e8 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -2491,7 +2491,7 @@ xfs_check_agi_unlinked( #define xfs_check_agi_unlinked(agi) #endif -static bool +static xfs_failaddr_t xfs_agi_verify( struct xfs_buf *bp) { @@ -2500,28 +2500,28 @@ xfs_agi_verify( if (xfs_sb_version_hascrc(&mp->m_sb)) { if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGI(bp)->agi_lsn))) - return false; + return __this_address; } /* * Validate the magic number of the agi block. */ if (agi->agi_magicnum != cpu_to_be32(XFS_AGI_MAGIC)) - return false; + return __this_address; if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum))) - return false; + return __this_address; if (be32_to_cpu(agi->agi_level) < 1 || be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS) - return false; + return __this_address; if (xfs_sb_version_hasfinobt(&mp->m_sb) && (be32_to_cpu(agi->agi_free_level) < 1 || be32_to_cpu(agi->agi_free_level) > XFS_BTREE_MAXLEVELS)) - return false; + return __this_address; /* * during growfs operations, the perag is not fully initialised, @@ -2530,10 +2530,10 @@ xfs_agi_verify( * so we can detect and avoid this problem. */ if (bp->b_pag && be32_to_cpu(agi->agi_seqno) != bp->b_pag->pag_agno) - return false; + return __this_address; xfs_check_agi_unlinked(agi); - return true; + return NULL; } static void @@ -2545,7 +2545,7 @@ xfs_agi_read_verify( if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF)) xfs_verifier_error(bp, -EFSBADCRC); - else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp, + else if (XFS_TEST_ERROR(xfs_agi_verify(bp), mp, XFS_ERRTAG_IALLOC_READ_AGI)) xfs_verifier_error(bp, -EFSCORRUPTED); } @@ -2557,7 +2557,7 @@ xfs_agi_write_verify( struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_buf_log_item *bip = bp->b_fspriv; - if (!xfs_agi_verify(bp)) { + if (xfs_agi_verify(bp)) { xfs_verifier_error(bp, -EFSCORRUPTED); return; } diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index d86bdce6652a..8fe126bcc5bd 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -250,12 +250,13 @@ xfs_inobt_diff_two_keys( be32_to_cpu(k2->inobt.ir_startino); } -static int +static xfs_failaddr_t xfs_inobt_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + xfs_failaddr_t fa; unsigned int level; /* @@ -271,20 +272,21 @@ xfs_inobt_verify( switch (block->bb_magic) { case cpu_to_be32(XFS_IBT_CRC_MAGIC): case cpu_to_be32(XFS_FIBT_CRC_MAGIC): - if (!xfs_btree_sblock_v5hdr_verify(bp)) - return false; + fa = xfs_btree_sblock_v5hdr_verify(bp); + if (fa) + return fa; /* fall through */ case cpu_to_be32(XFS_IBT_MAGIC): case cpu_to_be32(XFS_FIBT_MAGIC): break; default: - return 0; + return NULL; } /* level verification */ level = be16_to_cpu(block->bb_level); if (level >= mp->m_in_maxlevels) - return false; + return __this_address; return xfs_btree_sblock_verify(bp, mp->m_inobt_mxr[level != 0]); } @@ -295,7 +297,7 @@ xfs_inobt_read_verify( { if (!xfs_btree_sblock_verify_crc(bp)) xfs_verifier_error(bp, -EFSBADCRC); - else if (!xfs_inobt_verify(bp)) + else if (xfs_inobt_verify(bp)) xfs_verifier_error(bp, -EFSCORRUPTED); if (bp->b_error) @@ -306,7 +308,7 @@ static void xfs_inobt_write_verify( struct xfs_buf *bp) { - if (!xfs_inobt_verify(bp)) { + if (xfs_inobt_verify(bp)) { trace_xfs_btree_corrupt(bp, _RET_IP_); xfs_verifier_error(bp, -EFSCORRUPTED); return; diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index b625f65bc412..607dcca30668 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -380,7 +380,7 @@ xfs_log_dinode_to_disk( } } -bool +xfs_failaddr_t xfs_dinode_verify( struct xfs_mount *mp, xfs_ino_t ino, @@ -391,33 +391,33 @@ xfs_dinode_verify( uint64_t flags2; if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) - return false; + return __this_address; /* don't allow invalid i_size */ if (be64_to_cpu(dip->di_size) & (1ULL << 63)) - return false; + return __this_address; mode = be16_to_cpu(dip->di_mode); if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN) - return false; + return __this_address; /* No zero-length symlinks/dirs. */ if ((S_ISLNK(mode) || S_ISDIR(mode)) && dip->di_size == 0) - return false; + return __this_address; /* only version 3 or greater inodes are extensively verified here */ if (dip->di_version < 3) - return true; + return NULL; if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; + return __this_address; if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, XFS_DINODE_CRC_OFF)) - return false; + return __this_address; if (be64_to_cpu(dip->di_ino) != ino) - return false; + return __this_address; if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; flags = be16_to_cpu(dip->di_flags); flags2 = be64_to_cpu(dip->di_flags2); @@ -425,17 +425,17 @@ xfs_dinode_verify( /* don't allow reflink/cowextsize if we don't have reflink */ if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) && !xfs_sb_version_hasreflink(&mp->m_sb)) - return false; + return __this_address; /* don't let reflink and realtime mix */ if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME)) - return false; + return __this_address; /* don't let reflink and dax mix */ if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags2 & XFS_DIFLAG2_DAX)) - return false; + return __this_address; - return true; + return NULL; } void @@ -475,6 +475,7 @@ xfs_iread( { xfs_buf_t *bp; xfs_dinode_t *dip; + xfs_failaddr_t fa; int error; /* @@ -506,9 +507,10 @@ xfs_iread( return error; /* even unallocated inodes are verified */ - if (!xfs_dinode_verify(mp, ip->i_ino, dip)) { - xfs_alert(mp, "%s: validation failed for inode %lld", - __func__, ip->i_ino); + fa = xfs_dinode_verify(mp, ip->i_ino, dip); + if (fa) { + xfs_alert(mp, "%s: validation failed for inode %lld at %pS", + __func__, ip->i_ino, fa); XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip); error = -EFSCORRUPTED; diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h index a9c97a356c30..8a5e1da52d74 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.h +++ b/fs/xfs/libxfs/xfs_inode_buf.h @@ -82,7 +82,7 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); #define xfs_inobp_check(mp, bp) #endif /* DEBUG */ -bool xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino, - struct xfs_dinode *dip); +xfs_failaddr_t xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino, + struct xfs_dinode *dip); #endif /* __XFS_INODE_BUF_H__ */ diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index ce940ebe230c..aa090510e8e0 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -223,29 +223,31 @@ xfs_refcountbt_diff_two_keys( be32_to_cpu(k2->refc.rc_startblock); } -STATIC bool +STATIC xfs_failaddr_t xfs_refcountbt_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_perag *pag = bp->b_pag; + xfs_failaddr_t fa; unsigned int level; if (block->bb_magic != cpu_to_be32(XFS_REFC_CRC_MAGIC)) - return false; + return __this_address; if (!xfs_sb_version_hasreflink(&mp->m_sb)) - return false; - if (!xfs_btree_sblock_v5hdr_verify(bp)) - return false; + return __this_address; + fa = xfs_btree_sblock_v5hdr_verify(bp); + if (fa) + return fa; level = be16_to_cpu(block->bb_level); if (pag && pag->pagf_init) { if (level >= pag->pagf_refcount_level) - return false; + return __this_address; } else if (level >= mp->m_refc_maxlevels) - return false; + return __this_address; return xfs_btree_sblock_verify(bp, mp->m_refc_mxr[level != 0]); } @@ -256,7 +258,7 @@ xfs_refcountbt_read_verify( { if (!xfs_btree_sblock_verify_crc(bp)) xfs_verifier_error(bp, -EFSBADCRC); - else if (!xfs_refcountbt_verify(bp)) + else if (xfs_refcountbt_verify(bp)) xfs_verifier_error(bp, -EFSCORRUPTED); if (bp->b_error) @@ -267,7 +269,7 @@ STATIC void xfs_refcountbt_write_verify( struct xfs_buf *bp) { - if (!xfs_refcountbt_verify(bp)) { + if (xfs_refcountbt_verify(bp)) { trace_xfs_btree_corrupt(bp, _RET_IP_); xfs_verifier_error(bp, -EFSCORRUPTED); return; diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 6325908463c2..333fd5c08f9f 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -303,13 +303,14 @@ xfs_rmapbt_diff_two_keys( return 0; } -static bool +static xfs_failaddr_t xfs_rmapbt_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_perag *pag = bp->b_pag; + xfs_failaddr_t fa; unsigned int level; /* @@ -325,19 +326,20 @@ xfs_rmapbt_verify( * in this case. */ if (block->bb_magic != cpu_to_be32(XFS_RMAP_CRC_MAGIC)) - return false; + return __this_address; if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) - return false; - if (!xfs_btree_sblock_v5hdr_verify(bp)) - return false; + return __this_address; + fa = xfs_btree_sblock_v5hdr_verify(bp); + if (fa) + return fa; level = be16_to_cpu(block->bb_level); if (pag && pag->pagf_init) { if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi]) - return false; + return __this_address; } else if (level >= mp->m_rmap_maxlevels) - return false; + return __this_address; return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]); } @@ -348,7 +350,7 @@ xfs_rmapbt_read_verify( { if (!xfs_btree_sblock_verify_crc(bp)) xfs_verifier_error(bp, -EFSBADCRC); - else if (!xfs_rmapbt_verify(bp)) + else if (xfs_rmapbt_verify(bp)) xfs_verifier_error(bp, -EFSCORRUPTED); if (bp->b_error) @@ -359,7 +361,7 @@ static void xfs_rmapbt_write_verify( struct xfs_buf *bp) { - if (!xfs_rmapbt_verify(bp)) { + if (xfs_rmapbt_verify(bp)) { trace_xfs_btree_corrupt(bp, _RET_IP_); xfs_verifier_error(bp, -EFSCORRUPTED); return; diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index 58c21a6e0eff..64db737c49d5 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -98,7 +98,7 @@ xfs_symlink_hdr_ok( return true; } -static bool +static xfs_failaddr_t xfs_symlink_verify( struct xfs_buf *bp) { @@ -106,22 +106,22 @@ xfs_symlink_verify( struct xfs_dsymlink_hdr *dsl = bp->b_addr; if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; + return __this_address; if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC)) - return false; + return __this_address; if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (bp->b_bn != be64_to_cpu(dsl->sl_blkno)) - return false; + return __this_address; if (be32_to_cpu(dsl->sl_offset) + be32_to_cpu(dsl->sl_bytes) >= XFS_SYMLINK_MAXLEN) - return false; + return __this_address; if (dsl->sl_owner == 0) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(dsl->sl_lsn))) - return false; + return __this_address; - return true; + return NULL; } static void @@ -136,7 +136,7 @@ xfs_symlink_read_verify( if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF)) xfs_verifier_error(bp, -EFSBADCRC); - else if (!xfs_symlink_verify(bp)) + else if (xfs_symlink_verify(bp)) xfs_verifier_error(bp, -EFSCORRUPTED); } @@ -151,7 +151,7 @@ xfs_symlink_write_verify( if (!xfs_sb_version_hascrc(&mp->m_sb)) return; - if (!xfs_symlink_verify(bp)) { + if (xfs_symlink_verify(bp)) { xfs_verifier_error(bp, -EFSCORRUPTED); return; } diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 61cd1123bc4a..59a4fce91a2b 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -556,7 +556,7 @@ xfs_scrub_inode_map_raw( */ bp->b_ops = &xfs_inode_buf_ops; dip = xfs_buf_offset(bp, imap.im_boffset); - if (!xfs_dinode_verify(mp, ino, dip) || + if (xfs_dinode_verify(mp, ino, dip) != NULL || !xfs_dinode_good_version(mp, dip->di_version)) { xfs_scrub_ino_set_corrupt(sc, ino, bp); goto out_buf; -- cgit v1.2.3 From bc1a09b8e334bf5fca1d6727aec538dcff957961 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:51:03 -0800 Subject: xfs: refactor verifier callers to print address of failing check Refactor the callers of verifiers to print the instruction address of a failing check. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc.c | 33 ++++++++++++++++++++++----------- fs/xfs/libxfs/xfs_alloc_btree.c | 18 +++++++++++++----- fs/xfs/libxfs/xfs_attr_leaf.c | 16 +++++++++++----- fs/xfs/libxfs/xfs_attr_remote.c | 22 +++++++++++++--------- fs/xfs/libxfs/xfs_bmap_btree.c | 18 +++++++++++++----- fs/xfs/libxfs/xfs_da_btree.c | 17 +++++++++++------ fs/xfs/libxfs/xfs_dir2_block.c | 16 +++++++++++----- fs/xfs/libxfs/xfs_dir2_data.c | 18 ++++++++++++------ fs/xfs/libxfs/xfs_dir2_leaf.c | 16 +++++++++++----- fs/xfs/libxfs/xfs_dir2_node.c | 24 ++++++++++++++++-------- fs/xfs/libxfs/xfs_dquot_buf.c | 6 +++--- fs/xfs/libxfs/xfs_ialloc.c | 17 +++++++++++------ fs/xfs/libxfs/xfs_ialloc_btree.c | 18 +++++++++++++----- fs/xfs/libxfs/xfs_inode_buf.c | 2 +- fs/xfs/libxfs/xfs_refcount_btree.c | 18 +++++++++++++----- fs/xfs/libxfs/xfs_rmap_btree.c | 18 +++++++++++++----- fs/xfs/libxfs/xfs_sb.c | 4 ++-- fs/xfs/libxfs/xfs_symlink_remote.c | 16 +++++++++++----- fs/xfs/xfs_error.c | 11 +++++++---- fs/xfs/xfs_error.h | 3 ++- 20 files changed, 209 insertions(+), 102 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 4d7ef74f5783..6bace8cb47da 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -557,6 +557,7 @@ xfs_agfl_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; /* * There is no verification of non-crc AGFLs because mkfs does not @@ -568,9 +569,12 @@ xfs_agfl_read_verify( return; if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) - xfs_verifier_error(bp, -EFSBADCRC); - else if (xfs_agfl_verify(bp)) - xfs_verifier_error(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_agfl_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void @@ -579,13 +583,15 @@ xfs_agfl_write_verify( { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_buf_log_item *bip = bp->b_fspriv; + xfs_failaddr_t fa; /* no verification of non-crc AGFLs */ if (!xfs_sb_version_hascrc(&mp->m_sb)) return; - if (xfs_agfl_verify(bp)) { - xfs_verifier_error(bp, -EFSCORRUPTED); + fa = xfs_agfl_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -2455,13 +2461,16 @@ xfs_agf_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) - xfs_verifier_error(bp, -EFSBADCRC); - else if (XFS_TEST_ERROR(xfs_agf_verify(mp, bp), mp, - XFS_ERRTAG_ALLOC_READ_AGF)) - xfs_verifier_error(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_agf_verify(mp, bp); + if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF)) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void @@ -2470,9 +2479,11 @@ xfs_agf_write_verify( { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_buf_log_item *bip = bp->b_fspriv; + xfs_failaddr_t fa; - if (xfs_agf_verify(mp, bp)) { - xfs_verifier_error(bp, -EFSCORRUPTED); + fa = xfs_agf_verify(mp, bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 4383c05df8ca..060d6fa83a7f 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -366,10 +366,15 @@ static void xfs_allocbt_read_verify( struct xfs_buf *bp) { + xfs_failaddr_t fa; + if (!xfs_btree_sblock_verify_crc(bp)) - xfs_verifier_error(bp, -EFSBADCRC); - else if (xfs_allocbt_verify(bp)) - xfs_verifier_error(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_allocbt_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } if (bp->b_error) trace_xfs_btree_corrupt(bp, _RET_IP_); @@ -379,9 +384,12 @@ static void xfs_allocbt_write_verify( struct xfs_buf *bp) { - if (xfs_allocbt_verify(bp)) { + xfs_failaddr_t fa; + + fa = xfs_allocbt_verify(bp); + if (fa) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } xfs_btree_sblock_calc_crc(bp); diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index f6281e100469..68c66fa378fd 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -295,9 +295,11 @@ xfs_attr3_leaf_write_verify( struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_buf_log_item *bip = bp->b_fspriv; struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; + xfs_failaddr_t fa; - if (xfs_attr3_leaf_verify(bp)) { - xfs_verifier_error(bp, -EFSCORRUPTED); + fa = xfs_attr3_leaf_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -321,12 +323,16 @@ xfs_attr3_leaf_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF)) - xfs_verifier_error(bp, -EFSBADCRC); - else if (xfs_attr3_leaf_verify(bp)) - xfs_verifier_error(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_attr3_leaf_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = { diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 06858db972ed..55514b343216 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -122,6 +122,7 @@ xfs_attr3_rmt_read_verify( { struct xfs_mount *mp = bp->b_target->bt_mount; char *ptr; + xfs_failaddr_t fa; int len; xfs_daddr_t bno; int blksize = mp->m_attr_geo->blksize; @@ -137,12 +138,13 @@ xfs_attr3_rmt_read_verify( while (len > 0) { if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { - xfs_verifier_error(bp, -EFSBADCRC); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); return; } - if (xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { - xfs_verifier_error(bp, -EFSCORRUPTED); - return; + fa = xfs_attr3_rmt_verify(mp, ptr, blksize, bno); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + break; } len -= blksize; ptr += blksize; @@ -150,7 +152,7 @@ xfs_attr3_rmt_read_verify( } if (len != 0) - xfs_verifier_error(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); } static void @@ -158,6 +160,7 @@ xfs_attr3_rmt_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; int blksize = mp->m_attr_geo->blksize; char *ptr; int len; @@ -175,8 +178,9 @@ xfs_attr3_rmt_write_verify( while (len > 0) { struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr; - if (xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { - xfs_verifier_error(bp, -EFSCORRUPTED); + fa = xfs_attr3_rmt_verify(mp, ptr, blksize, bno); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -185,7 +189,7 @@ xfs_attr3_rmt_write_verify( * xfs_attr3_rmt_hdr_set() for the explanation. */ if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) { - xfs_verifier_error(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); return; } xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); @@ -196,7 +200,7 @@ xfs_attr3_rmt_write_verify( } if (len != 0) - xfs_verifier_error(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); } const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = { diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 0fdae57e7944..64ae0eea9812 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -468,10 +468,15 @@ static void xfs_bmbt_read_verify( struct xfs_buf *bp) { + xfs_failaddr_t fa; + if (!xfs_btree_lblock_verify_crc(bp)) - xfs_verifier_error(bp, -EFSBADCRC); - else if (xfs_bmbt_verify(bp)) - xfs_verifier_error(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_bmbt_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } if (bp->b_error) trace_xfs_btree_corrupt(bp, _RET_IP_); @@ -481,9 +486,12 @@ static void xfs_bmbt_write_verify( struct xfs_buf *bp) { - if (xfs_bmbt_verify(bp)) { + xfs_failaddr_t fa; + + fa = xfs_bmbt_verify(bp); + if (fa) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } xfs_btree_lblock_calc_crc(bp); diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 60a2572ba4e3..0ae39617a6c1 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -184,9 +184,11 @@ xfs_da3_node_write_verify( struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_buf_log_item *bip = bp->b_fspriv; struct xfs_da3_node_hdr *hdr3 = bp->b_addr; + xfs_failaddr_t fa; - if (xfs_da3_node_verify(bp)) { - xfs_verifier_error(bp, -EFSCORRUPTED); + fa = xfs_da3_node_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -210,17 +212,20 @@ xfs_da3_node_read_verify( struct xfs_buf *bp) { struct xfs_da_blkinfo *info = bp->b_addr; + xfs_failaddr_t fa; switch (be16_to_cpu(info->magic)) { case XFS_DA3_NODE_MAGIC: if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) { - xfs_verifier_error(bp, -EFSBADCRC); + xfs_verifier_error(bp, -EFSBADCRC, + __this_address); break; } /* fall through */ case XFS_DA_NODE_MAGIC: - if (xfs_da3_node_verify(bp)) - xfs_verifier_error(bp, -EFSCORRUPTED); + fa = xfs_da3_node_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; case XFS_ATTR_LEAF_MAGIC: case XFS_ATTR3_LEAF_MAGIC: @@ -233,7 +238,7 @@ xfs_da3_node_read_verify( bp->b_ops->verify_read(bp); return; default: - xfs_verifier_error(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); break; } } diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index 3b728794659c..cfd2777bf918 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -86,12 +86,16 @@ xfs_dir3_block_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) - xfs_verifier_error(bp, -EFSBADCRC); - else if (xfs_dir3_block_verify(bp)) - xfs_verifier_error(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_dir3_block_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void @@ -101,9 +105,11 @@ xfs_dir3_block_write_verify( struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_buf_log_item *bip = bp->b_fspriv; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; + xfs_failaddr_t fa; - if (xfs_dir3_block_verify(bp)) { - xfs_verifier_error(bp, -EFSCORRUPTED); + fa = xfs_dir3_block_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index 3eda2474ad0f..2fa7c34023fb 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -284,7 +284,7 @@ xfs_dir3_data_reada_verify( bp->b_ops->verify_read(bp); return; default: - xfs_verifier_error(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); break; } } @@ -294,12 +294,16 @@ xfs_dir3_data_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) - xfs_verifier_error(bp, -EFSBADCRC); - else if (xfs_dir3_data_verify(bp)) - xfs_verifier_error(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_dir3_data_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void @@ -309,9 +313,11 @@ xfs_dir3_data_write_verify( struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_buf_log_item *bip = bp->b_fspriv; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; + xfs_failaddr_t fa; - if (xfs_dir3_data_verify(bp)) { - xfs_verifier_error(bp, -EFSCORRUPTED); + fa = xfs_dir3_data_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index a8694860566b..a03d67995811 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -190,12 +190,16 @@ __read_verify( uint16_t magic) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF)) - xfs_verifier_error(bp, -EFSBADCRC); - else if (xfs_dir3_leaf_verify(bp, magic)) - xfs_verifier_error(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_dir3_leaf_verify(bp, magic); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void @@ -206,9 +210,11 @@ __write_verify( struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_buf_log_item *bip = bp->b_fspriv; struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; + xfs_failaddr_t fa; - if (xfs_dir3_leaf_verify(bp, magic)) { - xfs_verifier_error(bp, -EFSCORRUPTED); + fa = xfs_dir3_leaf_verify(bp, magic); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 051a21f3ad06..3bdbe1897212 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -124,12 +124,16 @@ xfs_dir3_free_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF)) - xfs_verifier_error(bp, -EFSBADCRC); - else if (xfs_dir3_free_verify(bp)) - xfs_verifier_error(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_dir3_free_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void @@ -139,9 +143,11 @@ xfs_dir3_free_write_verify( struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_buf_log_item *bip = bp->b_fspriv; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; + xfs_failaddr_t fa; - if (xfs_dir3_free_verify(bp)) { - xfs_verifier_error(bp, -EFSCORRUPTED); + fa = xfs_dir3_free_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -161,7 +167,7 @@ const struct xfs_buf_ops xfs_dir3_free_buf_ops = { }; /* Everything ok in the free block header? */ -static bool +static xfs_failaddr_t xfs_dir3_free_header_check( struct xfs_inode *dp, xfs_dablk_t fbno, @@ -205,6 +211,7 @@ __xfs_dir3_free_read( xfs_daddr_t mappedbno, struct xfs_buf **bpp) { + xfs_failaddr_t fa; int err; err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, @@ -213,8 +220,9 @@ __xfs_dir3_free_read( return err; /* Check things that we can't do in the verifier. */ - if (xfs_dir3_free_header_check(dp, fbno, *bpp)) { - xfs_verifier_error(*bpp, -EFSCORRUPTED); + fa = xfs_dir3_free_header_check(dp, fbno, *bpp); + if (fa) { + xfs_verifier_error(*bpp, -EFSCORRUPTED, fa); xfs_trans_brelse(tp, *bpp); return -EFSCORRUPTED; } diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index a3f1eccc614b..5e022c1a52c4 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -249,9 +249,9 @@ xfs_dquot_buf_read_verify( struct xfs_mount *mp = bp->b_target->bt_mount; if (!xfs_dquot_buf_verify_crc(mp, bp)) - xfs_verifier_error(bp, -EFSBADCRC); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); else if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) - xfs_verifier_error(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); } /* @@ -285,7 +285,7 @@ xfs_dquot_buf_write_verify( struct xfs_mount *mp = bp->b_target->bt_mount; if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) { - xfs_verifier_error(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); return; } } diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index f53f9bc486e8..344b6a3525e1 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -2541,13 +2541,16 @@ xfs_agi_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF)) - xfs_verifier_error(bp, -EFSBADCRC); - else if (XFS_TEST_ERROR(xfs_agi_verify(bp), mp, - XFS_ERRTAG_IALLOC_READ_AGI)) - xfs_verifier_error(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_agi_verify(bp); + if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_IALLOC_READ_AGI)) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void @@ -2556,9 +2559,11 @@ xfs_agi_write_verify( { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_buf_log_item *bip = bp->b_fspriv; + xfs_failaddr_t fa; - if (xfs_agi_verify(bp)) { - xfs_verifier_error(bp, -EFSCORRUPTED); + fa = xfs_agi_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 8fe126bcc5bd..9c691ad00220 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -295,10 +295,15 @@ static void xfs_inobt_read_verify( struct xfs_buf *bp) { + xfs_failaddr_t fa; + if (!xfs_btree_sblock_verify_crc(bp)) - xfs_verifier_error(bp, -EFSBADCRC); - else if (xfs_inobt_verify(bp)) - xfs_verifier_error(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_inobt_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } if (bp->b_error) trace_xfs_btree_corrupt(bp, _RET_IP_); @@ -308,9 +313,12 @@ static void xfs_inobt_write_verify( struct xfs_buf *bp) { - if (xfs_inobt_verify(bp)) { + xfs_failaddr_t fa; + + fa = xfs_inobt_verify(bp); + if (fa) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } xfs_btree_sblock_calc_crc(bp); diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 607dcca30668..a43a3702af93 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -113,7 +113,7 @@ xfs_inode_buf_verify( return; } - xfs_verifier_error(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); #ifdef DEBUG xfs_alert(mp, "bad inode magic/vsn daddr %lld #%d (magic=%x)", diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index aa090510e8e0..715c272a609c 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -256,10 +256,15 @@ STATIC void xfs_refcountbt_read_verify( struct xfs_buf *bp) { + xfs_failaddr_t fa; + if (!xfs_btree_sblock_verify_crc(bp)) - xfs_verifier_error(bp, -EFSBADCRC); - else if (xfs_refcountbt_verify(bp)) - xfs_verifier_error(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_refcountbt_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } if (bp->b_error) trace_xfs_btree_corrupt(bp, _RET_IP_); @@ -269,9 +274,12 @@ STATIC void xfs_refcountbt_write_verify( struct xfs_buf *bp) { - if (xfs_refcountbt_verify(bp)) { + xfs_failaddr_t fa; + + fa = xfs_refcountbt_verify(bp); + if (fa) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } xfs_btree_sblock_calc_crc(bp); diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 333fd5c08f9f..d089a48ac530 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -348,10 +348,15 @@ static void xfs_rmapbt_read_verify( struct xfs_buf *bp) { + xfs_failaddr_t fa; + if (!xfs_btree_sblock_verify_crc(bp)) - xfs_verifier_error(bp, -EFSBADCRC); - else if (xfs_rmapbt_verify(bp)) - xfs_verifier_error(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_rmapbt_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } if (bp->b_error) trace_xfs_btree_corrupt(bp, _RET_IP_); @@ -361,9 +366,12 @@ static void xfs_rmapbt_write_verify( struct xfs_buf *bp) { - if (xfs_rmapbt_verify(bp)) { + xfs_failaddr_t fa; + + fa = xfs_rmapbt_verify(bp); + if (fa) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } xfs_btree_sblock_calc_crc(bp); diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 96a82672989a..63e0331b1d24 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -641,7 +641,7 @@ xfs_sb_read_verify( out_error: if (error == -EFSCORRUPTED || error == -EFSBADCRC) - xfs_verifier_error(bp, error); + xfs_verifier_error(bp, error, __this_address); else if (error) xfs_buf_ioerror(bp, error); } @@ -677,7 +677,7 @@ xfs_sb_write_verify( error = xfs_sb_verify(bp, false); if (error) { - xfs_verifier_error(bp, error); + xfs_verifier_error(bp, error, __this_address); return; } diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index 64db737c49d5..5497014f5293 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -129,15 +129,19 @@ xfs_symlink_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; /* no verification of non-crc buffers */ if (!xfs_sb_version_hascrc(&mp->m_sb)) return; if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF)) - xfs_verifier_error(bp, -EFSBADCRC); - else if (xfs_symlink_verify(bp)) - xfs_verifier_error(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_symlink_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void @@ -146,13 +150,15 @@ xfs_symlink_write_verify( { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_buf_log_item *bip = bp->b_fspriv; + xfs_failaddr_t fa; /* no verification of non-crc buffers */ if (!xfs_sb_version_hascrc(&mp->m_sb)) return; - if (xfs_symlink_verify(bp)) { - xfs_verifier_error(bp, -EFSCORRUPTED); + fa = xfs_symlink_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index c0a98c42033d..21db07cc7a11 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -348,15 +348,18 @@ xfs_corruption_error( void xfs_verifier_error( struct xfs_buf *bp, - int error) + int error, + xfs_failaddr_t failaddr) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; - __xfs_buf_ioerror(bp, error, __return_address); + fa = failaddr ? failaddr : __return_address; + __xfs_buf_ioerror(bp, error, fa); xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx", bp->b_error == -EFSBADCRC ? "CRC error" : "corruption", - __return_address, bp->b_ops->name, bp->b_bn); + fa, bp->b_ops->name, bp->b_bn); xfs_alert(mp, "Unmount and run xfs_repair"); diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 598756281d8d..11f80e072403 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -25,7 +25,8 @@ extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp, extern void xfs_corruption_error(const char *tag, int level, struct xfs_mount *mp, void *p, const char *filename, int linenum, void *ra); -extern void xfs_verifier_error(struct xfs_buf *bp, int error); +extern void xfs_verifier_error(struct xfs_buf *bp, int error, + xfs_failaddr_t failaddr); #define XFS_ERROR_REPORT(e, lvl, mp) \ xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address) -- cgit v1.2.3 From 50aa90ef03007beca2c9108993f5b4f2bb4f0a66 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:51:04 -0800 Subject: xfs: verify dinode header first Move the v3 inode integrity information (crc, owner, metauuid) before we look at anything else in the inode so that we don't waste time on a torn write or a totally garbled block. This makes xfs_dinode_verify more consistent with the other verifiers. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_inode_buf.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index a43a3702af93..a1ba112567b0 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -393,6 +393,19 @@ xfs_dinode_verify( if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) return __this_address; + /* Verify v3 integrity information first */ + if (dip->di_version >= 3) { + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return __this_address; + if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, + XFS_DINODE_CRC_OFF)) + return __this_address; + if (be64_to_cpu(dip->di_ino) != ino) + return __this_address; + if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) + return __this_address; + } + /* don't allow invalid i_size */ if (be64_to_cpu(dip->di_size) & (1ULL << 63)) return __this_address; @@ -409,16 +422,6 @@ xfs_dinode_verify( if (dip->di_version < 3) return NULL; - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return __this_address; - if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, - XFS_DINODE_CRC_OFF)) - return __this_address; - if (be64_to_cpu(dip->di_ino) != ino) - return __this_address; - if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) - return __this_address; - flags = be16_to_cpu(dip->di_flags); flags2 = be64_to_cpu(dip->di_flags2); -- cgit v1.2.3 From 71493b839e294065ba63bd6f8d07263f3afee8c6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:51:04 -0800 Subject: xfs: move inode fork verifiers to xfs_dinode_verify Consolidate the fork size and format verifiers to xfs_dinode_verify so that we can reject bad inodes earlier and in a single place. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_inode_buf.c | 72 +++++++++++++++++++++++++++++++++-- fs/xfs/libxfs/xfs_inode_fork.c | 86 ------------------------------------------ 2 files changed, 69 insertions(+), 89 deletions(-) diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index a1ba112567b0..4035b5d5f6fd 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -389,6 +389,7 @@ xfs_dinode_verify( uint16_t mode; uint16_t flags; uint64_t flags2; + uint64_t di_size; if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) return __this_address; @@ -407,7 +408,8 @@ xfs_dinode_verify( } /* don't allow invalid i_size */ - if (be64_to_cpu(dip->di_size) & (1ULL << 63)) + di_size = be64_to_cpu(dip->di_size); + if (di_size & (1ULL << 63)) return __this_address; mode = be16_to_cpu(dip->di_mode); @@ -415,14 +417,74 @@ xfs_dinode_verify( return __this_address; /* No zero-length symlinks/dirs. */ - if ((S_ISLNK(mode) || S_ISDIR(mode)) && dip->di_size == 0) + if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) return __this_address; + /* Fork checks carried over from xfs_iformat_fork */ + if (mode && + be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) > + be64_to_cpu(dip->di_nblocks)) + return __this_address; + + if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize) + return __this_address; + + flags = be16_to_cpu(dip->di_flags); + + if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp) + return __this_address; + + /* Do we have appropriate data fork formats for the mode? */ + switch (mode & S_IFMT) { + case S_IFIFO: + case S_IFCHR: + case S_IFBLK: + case S_IFSOCK: + if (dip->di_format != XFS_DINODE_FMT_DEV) + return __this_address; + break; + case S_IFREG: + case S_IFLNK: + case S_IFDIR: + switch (dip->di_format) { + case XFS_DINODE_FMT_LOCAL: + /* + * no local regular files yet + */ + if (S_ISREG(mode)) + return __this_address; + if (di_size > XFS_DFORK_DSIZE(dip, mp)) + return __this_address; + /* fall through */ + case XFS_DINODE_FMT_EXTENTS: + case XFS_DINODE_FMT_BTREE: + break; + default: + return __this_address; + } + break; + case 0: + /* Uninitialized inode ok. */ + break; + default: + return __this_address; + } + + if (XFS_DFORK_Q(dip)) { + switch (dip->di_aformat) { + case XFS_DINODE_FMT_LOCAL: + case XFS_DINODE_FMT_EXTENTS: + case XFS_DINODE_FMT_BTREE: + break; + default: + return __this_address; + } + } + /* only version 3 or greater inodes are extensively verified here */ if (dip->di_version < 3) return NULL; - flags = be16_to_cpu(dip->di_flags); flags2 = be64_to_cpu(dip->di_flags2); /* don't allow reflink/cowextsize if we don't have reflink */ @@ -430,6 +492,10 @@ xfs_dinode_verify( !xfs_sb_version_hasreflink(&mp->m_sb)) return __this_address; + /* only regular files get reflink */ + if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG) + return __this_address; + /* don't let reflink and realtime mix */ if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME)) return __this_address; diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index c79a1616b79d..fd88cbe8c264 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -62,69 +62,11 @@ xfs_iformat_fork( int error = 0; xfs_fsize_t di_size; - if (unlikely(be32_to_cpu(dip->di_nextents) + - be16_to_cpu(dip->di_anextents) > - be64_to_cpu(dip->di_nblocks))) { - xfs_warn(ip->i_mount, - "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", - (unsigned long long)ip->i_ino, - (int)(be32_to_cpu(dip->di_nextents) + - be16_to_cpu(dip->di_anextents)), - (unsigned long long) - be64_to_cpu(dip->di_nblocks)); - XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return -EFSCORRUPTED; - } - - if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { - xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.", - (unsigned long long)ip->i_ino, - dip->di_forkoff); - XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return -EFSCORRUPTED; - } - - if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && - !ip->i_mount->m_rtdev_targp)) { - xfs_warn(ip->i_mount, - "corrupt dinode %Lu, has realtime flag set.", - ip->i_ino); - XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", - XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return -EFSCORRUPTED; - } - - if (unlikely(xfs_is_reflink_inode(ip) && !S_ISREG(inode->i_mode))) { - xfs_warn(ip->i_mount, - "corrupt dinode %llu, wrong file type for reflink.", - ip->i_ino); - XFS_CORRUPTION_ERROR("xfs_iformat(reflink)", - XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return -EFSCORRUPTED; - } - - if (unlikely(xfs_is_reflink_inode(ip) && - (ip->i_d.di_flags & XFS_DIFLAG_REALTIME))) { - xfs_warn(ip->i_mount, - "corrupt dinode %llu, has reflink+realtime flag set.", - ip->i_ino); - XFS_CORRUPTION_ERROR("xfs_iformat(reflink)", - XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return -EFSCORRUPTED; - } - switch (inode->i_mode & S_IFMT) { case S_IFIFO: case S_IFCHR: case S_IFBLK: case S_IFSOCK: - if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) { - XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return -EFSCORRUPTED; - } ip->i_d.di_size = 0; inode->i_rdev = xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip)); break; @@ -134,32 +76,7 @@ xfs_iformat_fork( case S_IFDIR: switch (dip->di_format) { case XFS_DINODE_FMT_LOCAL: - /* - * no local regular files yet - */ - if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) { - xfs_warn(ip->i_mount, - "corrupt inode %Lu (local format for regular file).", - (unsigned long long) ip->i_ino); - XFS_CORRUPTION_ERROR("xfs_iformat(4)", - XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return -EFSCORRUPTED; - } - di_size = be64_to_cpu(dip->di_size); - if (unlikely(di_size < 0 || - di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { - xfs_warn(ip->i_mount, - "corrupt inode %Lu (bad size %Ld for local inode).", - (unsigned long long) ip->i_ino, - (long long) di_size); - XFS_CORRUPTION_ERROR("xfs_iformat(5)", - XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return -EFSCORRUPTED; - } - size = (int)di_size; error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size); break; @@ -170,14 +87,11 @@ xfs_iformat_fork( error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); break; default: - XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW, - ip->i_mount); return -EFSCORRUPTED; } break; default: - XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount); return -EFSCORRUPTED; } if (error) -- cgit v1.2.3 From 1e1bbd8e7ee0624034e9bf1e91ac11a7aaa2f8a6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:51:05 -0800 Subject: xfs: create structure verifier function for shortform xattrs Create a function to perform structure verification for short form extended attributes. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_attr_leaf.c | 74 +++++++++++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_attr_leaf.h | 1 + 2 files changed, 75 insertions(+) diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 68c66fa378fd..ae3bccb4aa5a 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -872,6 +872,80 @@ xfs_attr_shortform_allfit( return xfs_attr_shortform_bytesfit(dp, bytes); } +/* Verify the consistency of an inline attribute fork. */ +xfs_failaddr_t +xfs_attr_shortform_verify( + struct xfs_inode *ip) +{ + struct xfs_attr_shortform *sfp; + struct xfs_attr_sf_entry *sfep; + struct xfs_attr_sf_entry *next_sfep; + char *endp; + struct xfs_ifork *ifp; + int i; + int size; + + ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL); + ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK); + sfp = (struct xfs_attr_shortform *)ifp->if_u1.if_data; + size = ifp->if_bytes; + + /* + * Give up if the attribute is way too short. + */ + if (size < sizeof(struct xfs_attr_sf_hdr)) + return __this_address; + + endp = (char *)sfp + size; + + /* Check all reported entries */ + sfep = &sfp->list[0]; + for (i = 0; i < sfp->hdr.count; i++) { + /* + * struct xfs_attr_sf_entry has a variable length. + * Check the fixed-offset parts of the structure are + * within the data buffer. + */ + if (((char *)sfep + sizeof(*sfep)) >= endp) + return __this_address; + + /* Don't allow names with known bad length. */ + if (sfep->namelen == 0) + return __this_address; + + /* + * Check that the variable-length part of the structure is + * within the data buffer. The next entry starts after the + * name component, so nextentry is an acceptable test. + */ + next_sfep = XFS_ATTR_SF_NEXTENTRY(sfep); + if ((char *)next_sfep > endp) + return __this_address; + + /* + * Check for unknown flags. Short form doesn't support + * the incomplete or local bits, so we can use the namespace + * mask here. + */ + if (sfep->flags & ~XFS_ATTR_NSP_ONDISK_MASK) + return __this_address; + + /* + * Check for invalid namespace combinations. We only allow + * one namespace flag per xattr, so we can just count the + * bits (i.e. hweight) here. + */ + if (hweight8(sfep->flags & XFS_ATTR_NSP_ONDISK_MASK) > 1) + return __this_address; + + sfep = next_sfep; + } + if ((void *)sfep != (void *)endp) + return __this_address; + + return NULL; +} + /* * Convert a leaf attribute list to shortform attribute list */ diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index 894124efb421..4da08af5b134 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -53,6 +53,7 @@ int xfs_attr_shortform_to_leaf(struct xfs_da_args *args, int xfs_attr_shortform_remove(struct xfs_da_args *args); int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes); +xfs_failaddr_t xfs_attr_shortform_verify(struct xfs_inode *ip); void xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp); /* -- cgit v1.2.3 From 0795e004fd4f2723f3dbf09a195cd7ccf3c74c58 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:51:05 -0800 Subject: xfs: create structure verifier function for short form symlinks Create a function to check the structure of short form symlink targets. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_shared.h | 1 + fs/xfs/libxfs/xfs_symlink_remote.c | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index c6f4eb46fe26..67ccb1ab4d7e 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -143,5 +143,6 @@ bool xfs_symlink_hdr_ok(xfs_ino_t ino, uint32_t offset, uint32_t size, struct xfs_buf *bp); void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp, struct xfs_inode *ip, struct xfs_ifork *ifp); +xfs_failaddr_t xfs_symlink_shortform_verify(struct xfs_inode *ip); #endif /* __XFS_SHARED_H__ */ diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index 5497014f5293..adf2d7833abd 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -209,3 +209,37 @@ xfs_symlink_local_to_remote( xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsymlink_hdr) + ifp->if_bytes - 1); } + +/* Verify the consistency of an inline symlink. */ +xfs_failaddr_t +xfs_symlink_shortform_verify( + struct xfs_inode *ip) +{ + char *sfp; + char *endp; + struct xfs_ifork *ifp; + int size; + + ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL); + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + sfp = (char *)ifp->if_u1.if_data; + size = ifp->if_bytes; + endp = sfp + size; + + /* Zero length symlinks can exist while we're deleting a remote one. */ + if (size == 0) + return NULL; + + /* No negative sizes or overly long symlink targets. */ + if (size < 0 || size > XFS_SYMLINK_MAXLEN) + return __this_address; + + /* No NULLs in the target either. */ + if (memchr(sfp, 0, size - 1)) + return __this_address; + + /* We /did/ null-terminate the buffer, right? */ + if (*endp != 0) + return __this_address; + return NULL; +} -- cgit v1.2.3 From dc042c2d8ff629dd411e9a60bce9c379e2f8aaf8 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:51:06 -0800 Subject: xfs: refactor short form directory structure verifier function Change the short form directory structure verifier function to return the instruction pointer of a failing check or NULL if everything's ok. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_dir2_priv.h | 2 +- fs/xfs/libxfs/xfs_dir2_sf.c | 26 +++++++++++++------------- fs/xfs/libxfs/xfs_inode_fork.c | 5 ++--- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h index a15ad7cd8057..753aeeeffc18 100644 --- a/fs/xfs/libxfs/xfs_dir2_priv.h +++ b/fs/xfs/libxfs/xfs_dir2_priv.h @@ -129,7 +129,7 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); extern int xfs_dir2_sf_removename(struct xfs_da_args *args); extern int xfs_dir2_sf_replace(struct xfs_da_args *args); -extern int xfs_dir2_sf_verify(struct xfs_inode *ip); +extern xfs_failaddr_t xfs_dir2_sf_verify(struct xfs_inode *ip); /* xfs_dir2_readdir.c */ extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp, diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index be8b9755f66a..8500fa2a1321 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -630,7 +630,7 @@ xfs_dir2_sf_check( #endif /* DEBUG */ /* Verify the consistency of an inline directory. */ -int +xfs_failaddr_t xfs_dir2_sf_verify( struct xfs_inode *ip) { @@ -665,7 +665,7 @@ xfs_dir2_sf_verify( */ if (size <= offsetof(struct xfs_dir2_sf_hdr, parent) || size < xfs_dir2_sf_hdr_size(sfp->i8count)) - return -EFSCORRUPTED; + return __this_address; endp = (char *)sfp + size; @@ -674,7 +674,7 @@ xfs_dir2_sf_verify( i8count = ino > XFS_DIR2_MAX_SHORT_INUM; error = xfs_dir_ino_validate(mp, ino); if (error) - return error; + return __this_address; offset = dops->data_first_offset; /* Check all reported entries */ @@ -686,11 +686,11 @@ xfs_dir2_sf_verify( * within the data buffer. */ if (((char *)sfep + sizeof(*sfep)) >= endp) - return -EFSCORRUPTED; + return __this_address; /* Don't allow names with known bad length. */ if (sfep->namelen == 0) - return -EFSCORRUPTED; + return __this_address; /* * Check that the variable-length part of the structure is @@ -699,23 +699,23 @@ xfs_dir2_sf_verify( */ next_sfep = dops->sf_nextentry(sfp, sfep); if (endp < (char *)next_sfep) - return -EFSCORRUPTED; + return __this_address; /* Check that the offsets always increase. */ if (xfs_dir2_sf_get_offset(sfep) < offset) - return -EFSCORRUPTED; + return __this_address; /* Check the inode number. */ ino = dops->sf_get_ino(sfp, sfep); i8count += ino > XFS_DIR2_MAX_SHORT_INUM; error = xfs_dir_ino_validate(mp, ino); if (error) - return error; + return __this_address; /* Check the file type. */ filetype = dops->sf_get_ftype(sfep); if (filetype >= XFS_DIR3_FT_MAX) - return -EFSCORRUPTED; + return __this_address; offset = xfs_dir2_sf_get_offset(sfep) + dops->data_entsize(sfep->namelen); @@ -723,16 +723,16 @@ xfs_dir2_sf_verify( sfep = next_sfep; } if (i8count != sfp->i8count) - return -EFSCORRUPTED; + return __this_address; if ((void *)sfep != (void *)endp) - return -EFSCORRUPTED; + return __this_address; /* Make sure this whole thing ought to be in local format. */ if (offset + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + (uint)sizeof(xfs_dir2_block_tail_t) > mp->m_dir_geo->blksize) - return -EFSCORRUPTED; + return __this_address; - return 0; + return NULL; } /* diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index fd88cbe8c264..a92395a901d4 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -99,10 +99,9 @@ xfs_iformat_fork( /* Check inline dir contents. */ if (S_ISDIR(inode->i_mode) && dip->di_format == XFS_DINODE_FMT_LOCAL) { - error = xfs_dir2_sf_verify(ip); - if (error) { + if (xfs_dir2_sf_verify(ip)) { xfs_idestroy_fork(ip, XFS_DATA_FORK); - return error; + return -EFSCORRUPTED; } } -- cgit v1.2.3 From 9cfb9b47479e237d217dbcfafe034cbf98f45909 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:51:06 -0800 Subject: xfs: provide a centralized method for verifying inline fork data Replace the current haphazard dir2 shortform verifier callsites with a centralized verifier function that can be called either with the default verifier functions or with a custom set. This helps us strengthen integrity checking while providing us with flexibility for repair tools. xfs_repair wants this to be able to supply its own verifier functions when trying to fix possibly corrupt metadata. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_inode_fork.c | 64 +++++++++++++++++++++++++++++------------- fs/xfs/libxfs/xfs_inode_fork.h | 14 +++++++++ fs/xfs/xfs_icache.c | 5 ++++ fs/xfs/xfs_inode.c | 34 +++++++++++++++++++--- fs/xfs/xfs_inode.h | 2 ++ fs/xfs/xfs_log_recover.c | 4 +++ 6 files changed, 99 insertions(+), 24 deletions(-) diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index a92395a901d4..c1c1a86e7f47 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -35,6 +35,8 @@ #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_dir2_priv.h" +#include "xfs_attr_leaf.h" +#include "xfs_shared.h" kmem_zone_t *xfs_ifork_zone; @@ -97,14 +99,6 @@ xfs_iformat_fork( if (error) return error; - /* Check inline dir contents. */ - if (S_ISDIR(inode->i_mode) && dip->di_format == XFS_DINODE_FMT_LOCAL) { - if (xfs_dir2_sf_verify(ip)) { - xfs_idestroy_fork(ip, XFS_DATA_FORK); - return -EFSCORRUPTED; - } - } - if (xfs_is_reflink_inode(ip)) { ASSERT(ip->i_cowfp == NULL); xfs_ifork_init_cow(ip); @@ -121,18 +115,6 @@ xfs_iformat_fork( atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); size = be16_to_cpu(atp->hdr.totsize); - if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) { - xfs_warn(ip->i_mount, - "corrupt inode %Lu (bad attr fork size %Ld).", - (unsigned long long) ip->i_ino, - (long long) size); - XFS_CORRUPTION_ERROR("xfs_iformat(8)", - XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - error = -EFSCORRUPTED; - break; - } - error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); break; case XFS_DINODE_FMT_EXTENTS: @@ -740,3 +722,45 @@ xfs_ifork_init_cow( ip->i_cformat = XFS_DINODE_FMT_EXTENTS; ip->i_cnextents = 0; } + +/* Default fork content verifiers. */ +struct xfs_ifork_ops xfs_default_ifork_ops = { + .verify_attr = xfs_attr_shortform_verify, + .verify_dir = xfs_dir2_sf_verify, + .verify_symlink = xfs_symlink_shortform_verify, +}; + +/* Verify the inline contents of the data fork of an inode. */ +xfs_failaddr_t +xfs_ifork_verify_data( + struct xfs_inode *ip, + struct xfs_ifork_ops *ops) +{ + /* Non-local data fork, we're done. */ + if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) + return NULL; + + /* Check the inline data fork if there is one. */ + switch (VFS_I(ip)->i_mode & S_IFMT) { + case S_IFDIR: + return ops->verify_dir(ip); + case S_IFLNK: + return ops->verify_symlink(ip); + default: + return NULL; + } +} + +/* Verify the inline contents of the attr fork of an inode. */ +xfs_failaddr_t +xfs_ifork_verify_attr( + struct xfs_inode *ip, + struct xfs_ifork_ops *ops) +{ + /* There has to be an attr fork allocated if aformat is local. */ + if (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) + return NULL; + if (!XFS_IFORK_PTR(ip, XFS_ATTR_FORK)) + return __this_address; + return ops->verify_attr(ip); +} diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index b9f0098e33b8..dd8aba0dd119 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -186,4 +186,18 @@ extern struct kmem_zone *xfs_ifork_zone; extern void xfs_ifork_init_cow(struct xfs_inode *ip); +typedef xfs_failaddr_t (*xfs_ifork_verifier_t)(struct xfs_inode *); + +struct xfs_ifork_ops { + xfs_ifork_verifier_t verify_symlink; + xfs_ifork_verifier_t verify_dir; + xfs_ifork_verifier_t verify_attr; +}; +extern struct xfs_ifork_ops xfs_default_ifork_ops; + +xfs_failaddr_t xfs_ifork_verify_data(struct xfs_inode *ip, + struct xfs_ifork_ops *ops); +xfs_failaddr_t xfs_ifork_verify_attr(struct xfs_inode *ip, + struct xfs_ifork_ops *ops); + #endif /* __XFS_INODE_FORK_H__ */ diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 3861d61fb265..c9c7c02bc2bb 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -473,6 +473,11 @@ xfs_iget_cache_miss( if (error) goto out_destroy; + if (!xfs_inode_verify_forks(ip)) { + error = -EFSCORRUPTED; + goto out_destroy; + } + trace_xfs_iget_miss(ip); if ((VFS_I(ip)->i_mode == 0) && !(flags & XFS_IGET_CREATE)) { diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 6f95bdb408ce..663b546f2bcd 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3479,6 +3479,34 @@ abort_out: return error; } +/* + * If there are inline format data / attr forks attached to this inode, + * make sure they're not corrupt. + */ +bool +xfs_inode_verify_forks( + struct xfs_inode *ip) +{ + xfs_failaddr_t fa; + + fa = xfs_ifork_verify_data(ip, &xfs_default_ifork_ops); + if (fa) { + xfs_alert(ip->i_mount, + "%s: bad inode %llu inline data fork at %pF", + __func__, ip->i_ino, fa); + return false; + } + + fa = xfs_ifork_verify_attr(ip, &xfs_default_ifork_ops); + if (fa) { + xfs_alert(ip->i_mount, + "%s: bad inode %llu inline attr fork at %pF", + __func__, ip->i_ino, fa); + return false; + } + return true; +} + STATIC int xfs_iflush_int( struct xfs_inode *ip, @@ -3557,10 +3585,8 @@ xfs_iflush_int( if (ip->i_d.di_version < 3) ip->i_d.di_flushiter++; - /* Check the inline directory data. */ - if (S_ISDIR(VFS_I(ip)->i_mode) && - ip->i_d.di_format == XFS_DINODE_FMT_LOCAL && - xfs_dir2_sf_verify(ip)) + /* Check the inline fork data before we write out. */ + if (!xfs_inode_verify_forks(ip)) goto corrupt_out; /* diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index d383e392ec9d..386b0bb3c92a 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -491,4 +491,6 @@ extern struct kmem_zone *xfs_inode_zone; /* The default CoW extent size hint. */ #define XFS_DEFAULT_COWEXTSZ_HINT 32 +bool xfs_inode_verify_forks(struct xfs_inode *ip); + #endif /* __XFS_INODE_H__ */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 28d1abfe835e..04f5b3081417 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2957,6 +2957,10 @@ xfs_recover_inode_owner_change( if (error) goto out_free_ip; + if (!xfs_inode_verify_forks(ip)) { + error = -EFSCORRUPTED; + goto out_free_ip; + } if (in_f->ilf_fields & XFS_ILOG_DOWNER) { ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT); -- cgit v1.2.3 From 8ba92d43d499f4920af983a7c16e02304dd36932 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:51:07 -0800 Subject: xfs: fail out of xfs_attr3_leaf_lookup_int if it looks corrupt If the xattr leaf block looks corrupt, return -EFSCORRUPTED to userspace instead of ASSERTing on debug kernels or running off the end of the buffer on regular kernels. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_attr_leaf.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index ae3bccb4aa5a..7168827ca758 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -2249,7 +2249,8 @@ xfs_attr3_leaf_lookup_int( leaf = bp->b_addr; xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf); entries = xfs_attr3_leaf_entryp(leaf); - ASSERT(ichdr.count < args->geo->blksize / 8); + if (ichdr.count >= args->geo->blksize / 8) + return -EFSCORRUPTED; /* * Binary search. (note: small blocks will skip this loop) @@ -2265,8 +2266,10 @@ xfs_attr3_leaf_lookup_int( else break; } - ASSERT(probe >= 0 && (!ichdr.count || probe < ichdr.count)); - ASSERT(span <= 4 || be32_to_cpu(entry->hashval) == hashval); + if (!(probe >= 0 && (!ichdr.count || probe < ichdr.count))) + return -EFSCORRUPTED; + if (!(span <= 4 || be32_to_cpu(entry->hashval) == hashval)) + return -EFSCORRUPTED; /* * Since we may have duplicate hashval's, find the first matching -- cgit v1.2.3 From b55725974c9d3a5afcdf83daff6fba7d3f91ffca Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:51:08 -0800 Subject: xfs: create a new buf_ops pointer to verify structure metadata Expose all metadata structure buffer verifier functions via buf_ops. These will be used by the online scrub mechanism to look for problems with buffers that are already sitting around in memory. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc.c | 23 +++++++++++---- fs/xfs/libxfs/xfs_alloc_btree.c | 1 + fs/xfs/libxfs/xfs_attr_leaf.c | 1 + fs/xfs/libxfs/xfs_attr_remote.c | 58 ++++++++++++++++++++++++++++---------- fs/xfs/libxfs/xfs_bmap_btree.c | 1 + fs/xfs/libxfs/xfs_da_btree.c | 25 ++++++++++++++++ fs/xfs/libxfs/xfs_dir2_block.c | 1 + fs/xfs/libxfs/xfs_dir2_data.c | 1 + fs/xfs/libxfs/xfs_dir2_leaf.c | 16 +++++++++++ fs/xfs/libxfs/xfs_dir2_node.c | 1 + fs/xfs/libxfs/xfs_dquot_buf.c | 12 ++++++++ fs/xfs/libxfs/xfs_ialloc.c | 1 + fs/xfs/libxfs/xfs_ialloc_btree.c | 1 + fs/xfs/libxfs/xfs_refcount_btree.c | 1 + fs/xfs/libxfs/xfs_rmap_btree.c | 1 + fs/xfs/libxfs/xfs_symlink_remote.c | 1 + fs/xfs/xfs_buf.h | 1 + 17 files changed, 125 insertions(+), 21 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 6bace8cb47da..50ba989481cc 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -528,6 +528,15 @@ xfs_agfl_verify( struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); int i; + /* + * There is no verification of non-crc AGFLs because mkfs does not + * initialise the AGFL to zero or NULL. Hence the only valid part of the + * AGFL is what the AGF says is active. We can't get to the AGF, so we + * can't verify just those entries are valid. + */ + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return NULL; + if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC) @@ -605,6 +614,7 @@ const struct xfs_buf_ops xfs_agfl_buf_ops = { .name = "xfs_agfl", .verify_read = xfs_agfl_read_verify, .verify_write = xfs_agfl_write_verify, + .verify_struct = xfs_agfl_verify, }; /* @@ -2402,10 +2412,10 @@ xfs_alloc_put_freelist( static xfs_failaddr_t xfs_agf_verify( - struct xfs_mount *mp, - struct xfs_buf *bp) - { - struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); if (xfs_sb_version_hascrc(&mp->m_sb)) { if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) @@ -2467,7 +2477,7 @@ xfs_agf_read_verify( !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) xfs_verifier_error(bp, -EFSBADCRC, __this_address); else { - fa = xfs_agf_verify(mp, bp); + fa = xfs_agf_verify(bp); if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF)) xfs_verifier_error(bp, -EFSCORRUPTED, fa); } @@ -2481,7 +2491,7 @@ xfs_agf_write_verify( struct xfs_buf_log_item *bip = bp->b_fspriv; xfs_failaddr_t fa; - fa = xfs_agf_verify(mp, bp); + fa = xfs_agf_verify(bp); if (fa) { xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; @@ -2500,6 +2510,7 @@ const struct xfs_buf_ops xfs_agf_buf_ops = { .name = "xfs_agf", .verify_read = xfs_agf_read_verify, .verify_write = xfs_agf_write_verify, + .verify_struct = xfs_agf_verify, }; /* diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 060d6fa83a7f..6840b588187e 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -400,6 +400,7 @@ const struct xfs_buf_ops xfs_allocbt_buf_ops = { .name = "xfs_allocbt", .verify_read = xfs_allocbt_read_verify, .verify_write = xfs_allocbt_write_verify, + .verify_struct = xfs_allocbt_verify, }; diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 7168827ca758..6fddce7bbd54 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -339,6 +339,7 @@ const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = { .name = "xfs_attr3_leaf", .verify_read = xfs_attr3_leaf_read_verify, .verify_write = xfs_attr3_leaf_write_verify, + .verify_struct = xfs_attr3_leaf_verify, }; int diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 55514b343216..21be186067a2 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -116,20 +116,21 @@ xfs_attr3_rmt_verify( return NULL; } -static void -xfs_attr3_rmt_read_verify( - struct xfs_buf *bp) +static int +__xfs_attr3_rmt_read_verify( + struct xfs_buf *bp, + bool check_crc, + xfs_failaddr_t *failaddr) { struct xfs_mount *mp = bp->b_target->bt_mount; char *ptr; - xfs_failaddr_t fa; int len; xfs_daddr_t bno; int blksize = mp->m_attr_geo->blksize; /* no verification of non-crc buffers */ if (!xfs_sb_version_hascrc(&mp->m_sb)) - return; + return 0; ptr = bp->b_addr; bno = bp->b_bn; @@ -137,22 +138,48 @@ xfs_attr3_rmt_read_verify( ASSERT(len >= blksize); while (len > 0) { - if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { - xfs_verifier_error(bp, -EFSBADCRC, __this_address); - return; - } - fa = xfs_attr3_rmt_verify(mp, ptr, blksize, bno); - if (fa) { - xfs_verifier_error(bp, -EFSCORRUPTED, fa); - break; + if (check_crc && + !xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { + *failaddr = __this_address; + return -EFSBADCRC; } + *failaddr = xfs_attr3_rmt_verify(mp, ptr, blksize, bno); + if (*failaddr) + return -EFSCORRUPTED; len -= blksize; ptr += blksize; bno += BTOBB(blksize); } - if (len != 0) - xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); + if (len != 0) { + *failaddr = __this_address; + return -EFSCORRUPTED; + } + + return 0; +} + +static void +xfs_attr3_rmt_read_verify( + struct xfs_buf *bp) +{ + xfs_failaddr_t fa; + int error; + + error = __xfs_attr3_rmt_read_verify(bp, true, &fa); + if (error) + xfs_verifier_error(bp, error, fa); +} + +static xfs_failaddr_t +xfs_attr3_rmt_verify_struct( + struct xfs_buf *bp) +{ + xfs_failaddr_t fa; + int error; + + error = __xfs_attr3_rmt_read_verify(bp, false, &fa); + return error ? fa : NULL; } static void @@ -207,6 +234,7 @@ const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = { .name = "xfs_attr3_rmt", .verify_read = xfs_attr3_rmt_read_verify, .verify_write = xfs_attr3_rmt_write_verify, + .verify_struct = xfs_attr3_rmt_verify_struct, }; STATIC int diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 64ae0eea9812..9faf479aba49 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -501,6 +501,7 @@ const struct xfs_buf_ops xfs_bmbt_buf_ops = { .name = "xfs_bmbt", .verify_read = xfs_bmbt_read_verify, .verify_write = xfs_bmbt_write_verify, + .verify_struct = xfs_bmbt_verify, }; diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 0ae39617a6c1..cf07585b9d83 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -243,10 +243,35 @@ xfs_da3_node_read_verify( } } +/* Verify the structure of a da3 block. */ +static xfs_failaddr_t +xfs_da3_node_verify_struct( + struct xfs_buf *bp) +{ + struct xfs_da_blkinfo *info = bp->b_addr; + + switch (be16_to_cpu(info->magic)) { + case XFS_DA3_NODE_MAGIC: + case XFS_DA_NODE_MAGIC: + return xfs_da3_node_verify(bp); + case XFS_ATTR_LEAF_MAGIC: + case XFS_ATTR3_LEAF_MAGIC: + bp->b_ops = &xfs_attr3_leaf_buf_ops; + return bp->b_ops->verify_struct(bp); + case XFS_DIR2_LEAFN_MAGIC: + case XFS_DIR3_LEAFN_MAGIC: + bp->b_ops = &xfs_dir3_leafn_buf_ops; + return bp->b_ops->verify_struct(bp); + default: + return __this_address; + } +} + const struct xfs_buf_ops xfs_da3_node_buf_ops = { .name = "xfs_da3_node", .verify_read = xfs_da3_node_read_verify, .verify_write = xfs_da3_node_write_verify, + .verify_struct = xfs_da3_node_verify_struct, }; int diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index cfd2777bf918..fe951fa1a583 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -126,6 +126,7 @@ const struct xfs_buf_ops xfs_dir3_block_buf_ops = { .name = "xfs_dir3_block", .verify_read = xfs_dir3_block_read_verify, .verify_write = xfs_dir3_block_write_verify, + .verify_struct = xfs_dir3_block_verify, }; int diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index 2fa7c34023fb..32378122cd1f 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -334,6 +334,7 @@ const struct xfs_buf_ops xfs_dir3_data_buf_ops = { .name = "xfs_dir3_data", .verify_read = xfs_dir3_data_read_verify, .verify_write = xfs_dir3_data_write_verify, + .verify_struct = xfs_dir3_data_verify, }; static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = { diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index a03d67995811..a7ad649398c7 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -227,6 +227,13 @@ __write_verify( xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF); } +static xfs_failaddr_t +xfs_dir3_leaf1_verify( + struct xfs_buf *bp) +{ + return xfs_dir3_leaf_verify(bp, XFS_DIR2_LEAF1_MAGIC); +} + static void xfs_dir3_leaf1_read_verify( struct xfs_buf *bp) @@ -241,6 +248,13 @@ xfs_dir3_leaf1_write_verify( __write_verify(bp, XFS_DIR2_LEAF1_MAGIC); } +static xfs_failaddr_t +xfs_dir3_leafn_verify( + struct xfs_buf *bp) +{ + return xfs_dir3_leaf_verify(bp, XFS_DIR2_LEAFN_MAGIC); +} + static void xfs_dir3_leafn_read_verify( struct xfs_buf *bp) @@ -259,12 +273,14 @@ const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops = { .name = "xfs_dir3_leaf1", .verify_read = xfs_dir3_leaf1_read_verify, .verify_write = xfs_dir3_leaf1_write_verify, + .verify_struct = xfs_dir3_leaf1_verify, }; const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = { .name = "xfs_dir3_leafn", .verify_read = xfs_dir3_leafn_read_verify, .verify_write = xfs_dir3_leafn_write_verify, + .verify_struct = xfs_dir3_leafn_verify, }; int diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 3bdbe1897212..915c4fe5e4c3 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -164,6 +164,7 @@ const struct xfs_buf_ops xfs_dir3_free_buf_ops = { .name = "xfs_dir3_free", .verify_read = xfs_dir3_free_read_verify, .verify_write = xfs_dir3_free_write_verify, + .verify_struct = xfs_dir3_free_verify, }; /* Everything ok in the free block header? */ diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index 5e022c1a52c4..f8b62fca9f6a 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -242,6 +242,17 @@ xfs_dquot_buf_verify( return true; } +static xfs_failaddr_t +xfs_dquot_buf_verify_struct( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + + if (!xfs_dquot_buf_verify(mp, bp, 0)) + return __this_address; + return NULL; +} + static void xfs_dquot_buf_read_verify( struct xfs_buf *bp) @@ -294,6 +305,7 @@ const struct xfs_buf_ops xfs_dquot_buf_ops = { .name = "xfs_dquot", .verify_read = xfs_dquot_buf_read_verify, .verify_write = xfs_dquot_buf_write_verify, + .verify_struct = xfs_dquot_buf_verify_struct, }; const struct xfs_buf_ops xfs_dquot_buf_ra_ops = { diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 344b6a3525e1..c01ed9cfc5ae 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -2579,6 +2579,7 @@ const struct xfs_buf_ops xfs_agi_buf_ops = { .name = "xfs_agi", .verify_read = xfs_agi_read_verify, .verify_write = xfs_agi_write_verify, + .verify_struct = xfs_agi_verify, }; /* diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 9c691ad00220..47f44d624cb1 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -329,6 +329,7 @@ const struct xfs_buf_ops xfs_inobt_buf_ops = { .name = "xfs_inobt", .verify_read = xfs_inobt_read_verify, .verify_write = xfs_inobt_write_verify, + .verify_struct = xfs_inobt_verify, }; STATIC int diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 715c272a609c..8479769e470d 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -290,6 +290,7 @@ const struct xfs_buf_ops xfs_refcountbt_buf_ops = { .name = "xfs_refcountbt", .verify_read = xfs_refcountbt_read_verify, .verify_write = xfs_refcountbt_write_verify, + .verify_struct = xfs_refcountbt_verify, }; STATIC int diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index d089a48ac530..e829c3e489ea 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -382,6 +382,7 @@ const struct xfs_buf_ops xfs_rmapbt_buf_ops = { .name = "xfs_rmapbt", .verify_read = xfs_rmapbt_read_verify, .verify_write = xfs_rmapbt_write_verify, + .verify_struct = xfs_rmapbt_verify, }; STATIC int diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index adf2d7833abd..091e3cf0868f 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -173,6 +173,7 @@ const struct xfs_buf_ops xfs_symlink_buf_ops = { .name = "xfs_symlink", .verify_read = xfs_symlink_read_verify, .verify_write = xfs_symlink_write_verify, + .verify_struct = xfs_symlink_verify, }; void diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 6f907a365f85..5b5b4861c729 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -140,6 +140,7 @@ struct xfs_buf_ops { char *name; void (*verify_read)(struct xfs_buf *); void (*verify_write)(struct xfs_buf *); + xfs_failaddr_t (*verify_struct)(struct xfs_buf *bp); }; typedef struct xfs_buf { -- cgit v1.2.3 From eeea79802871fef82a8ca6ab1220515855e5cdcc Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:51:24 -0800 Subject: xfs: separate dquot repair into a separate function Move the dquot repair code into a separate function and remove XFS_QMOPT_DQREPAIR in favor of calling the helper directly. Remove other dead code because quotacheck is the only caller of DQREPAIR. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_dquot_buf.c | 22 +++++++++++------ fs/xfs/libxfs/xfs_quota_defs.h | 3 ++- fs/xfs/xfs_dquot.c | 54 ------------------------------------------ fs/xfs/xfs_qm.c | 7 ++++-- 4 files changed, 22 insertions(+), 64 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index f8b62fca9f6a..6b15c5005266 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -51,7 +51,6 @@ xfs_dqcheck( uint flags, const char *str) { - xfs_dqblk_t *d = (xfs_dqblk_t *)ddq; int errs = 0; /* @@ -139,17 +138,26 @@ xfs_dqcheck( } } - if (!errs || !(flags & XFS_QMOPT_DQREPAIR)) - return errs; + return errs; +} + +/* + * Do some primitive error checking on ondisk dquot data structures. + */ +int +xfs_dquot_repair( + struct xfs_mount *mp, + struct xfs_disk_dquot *ddq, + xfs_dqid_t id, + uint type) +{ + struct xfs_dqblk *d = (struct xfs_dqblk *)ddq; - if (flags & XFS_QMOPT_DOWARN) - xfs_notice(mp, "Re-initializing dquot ID 0x%x", id); /* * Typically, a repair is only requested by quotacheck. */ ASSERT(id != -1); - ASSERT(flags & XFS_QMOPT_DQREPAIR); memset(d, 0, sizeof(xfs_dqblk_t)); d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); @@ -163,7 +171,7 @@ xfs_dqcheck( XFS_DQUOT_CRC_OFF); } - return errs; + return 0; } STATIC bool diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h index d69c772271cb..7187ec93fc76 100644 --- a/fs/xfs/libxfs/xfs_quota_defs.h +++ b/fs/xfs/libxfs/xfs_quota_defs.h @@ -113,7 +113,6 @@ typedef uint16_t xfs_qwarncnt_t; #define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */ #define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */ #define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */ -#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */ #define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ #define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */ #define XFS_QMOPT_DQNEXT 0x0008000 /* return next dquot >= this ID */ @@ -156,5 +155,7 @@ typedef uint16_t xfs_qwarncnt_t; extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq, xfs_dqid_t id, uint type, uint flags, const char *str); extern int xfs_calc_dquots_per_chunk(unsigned int nbblks); +extern int xfs_dquot_repair(struct xfs_mount *mp, struct xfs_disk_dquot *ddq, + xfs_dqid_t id, uint type); #endif /* __XFS_QUOTA_H__ */ diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index f248708c10ff..0d8c52b499f0 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -399,52 +399,6 @@ error0: return error; } -STATIC int -xfs_qm_dqrepair( - struct xfs_mount *mp, - struct xfs_trans *tp, - struct xfs_dquot *dqp, - xfs_dqid_t firstid, - struct xfs_buf **bpp) -{ - int error; - struct xfs_disk_dquot *ddq; - struct xfs_dqblk *d; - int i; - - /* - * Read the buffer without verification so we get the corrupted - * buffer returned to us. make sure we verify it on write, though. - */ - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno, - mp->m_quotainfo->qi_dqchunklen, - 0, bpp, NULL); - - if (error) { - ASSERT(*bpp == NULL); - return error; - } - (*bpp)->b_ops = &xfs_dquot_buf_ops; - - ASSERT(xfs_buf_islocked(*bpp)); - d = (struct xfs_dqblk *)(*bpp)->b_addr; - - /* Do the actual repair of dquots in this buffer */ - for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) { - ddq = &d[i].dd_diskdq; - error = xfs_dqcheck(mp, ddq, firstid + i, - dqp->dq_flags & XFS_DQ_ALLTYPES, - XFS_QMOPT_DQREPAIR, "xfs_qm_dqrepair"); - if (error) { - /* repair failed, we're screwed */ - xfs_trans_brelse(tp, *bpp); - return -EIO; - } - } - - return 0; -} - /* * Maps a dquot to the buffer containing its on-disk version. * This returns a ptr to the buffer containing the on-disk dquot @@ -526,14 +480,6 @@ xfs_qm_dqtobp( dqp->q_blkno, mp->m_quotainfo->qi_dqchunklen, 0, &bp, &xfs_dquot_buf_ops); - - if (error == -EFSCORRUPTED && (flags & XFS_QMOPT_DQREPAIR)) { - xfs_dqid_t firstid = (xfs_dqid_t)map.br_startoff * - mp->m_quotainfo->qi_dqperchunk; - ASSERT(bp == NULL); - error = xfs_qm_dqrepair(mp, tp, dqp, firstid, &bp); - } - if (error) { ASSERT(bp == NULL); return error; diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index b897b11afb2c..65d34cc35b92 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -843,6 +843,7 @@ xfs_qm_reset_dqcounts( { struct xfs_dqblk *dqb; int j; + int error; trace_xfs_reset_dqcounts(bp, _RET_IP_); @@ -866,8 +867,10 @@ xfs_qm_reset_dqcounts( * output any warnings because it's perfectly possible to * find uninitialised dquot blks. See comment in xfs_dqcheck. */ - xfs_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR, - "xfs_quotacheck"); + error = xfs_dqcheck(mp, ddq, id+j, type, 0, "xfs_quotacheck"); + if (error) + xfs_dquot_repair(mp, ddq, id + j, type); + /* * Reset type in case we are reusing group quota file for * project quotas or vice versa -- cgit v1.2.3 From eebf3cab9c5eac7fdb54fb9e9fb38c06f46f17f3 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:51:25 -0800 Subject: xfs: standardize quota verification function outputs Rename xfs_dqcheck to xfs_dquot_verify and make it return an xfs_failaddr_t like every other structure verifier function. This enables us to check on-disk quotas in the same way that we check everything else. Callers are now responsible for logging errors, as XFS_QMOPT_DOWARN goes away. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_dquot_buf.c | 142 +++++++++++++++-------------------------- fs/xfs/libxfs/xfs_quota_defs.h | 6 +- fs/xfs/xfs_dquot.c | 8 ++- fs/xfs/xfs_log_recover.c | 24 ++++--- fs/xfs/xfs_qm.c | 28 ++++---- 5 files changed, 86 insertions(+), 122 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index 6b15c5005266..8b7a6c3cb599 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -42,17 +42,14 @@ xfs_calc_dquots_per_chunk( /* * Do some primitive error checking on ondisk dquot data structures. */ -int -xfs_dqcheck( +xfs_failaddr_t +xfs_dquot_verify( struct xfs_mount *mp, xfs_disk_dquot_t *ddq, xfs_dqid_t id, uint type, /* used only when IO_dorepair is true */ - uint flags, - const char *str) + uint flags) { - int errs = 0; - /* * We can encounter an uninitialized dquot buffer for 2 reasons: * 1. If we crash while deleting the quotainode(s), and those blks got @@ -68,77 +65,38 @@ xfs_dqcheck( * This is all fine; things are still consistent, and we haven't lost * any quota information. Just don't complain about bad dquot blks. */ - if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", - str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); - errs++; - } - if (ddq->d_version != XFS_DQUOT_VERSION) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", - str, id, ddq->d_version, XFS_DQUOT_VERSION); - errs++; - } + if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) + return __this_address; + if (ddq->d_version != XFS_DQUOT_VERSION) + return __this_address; if (ddq->d_flags != XFS_DQ_USER && ddq->d_flags != XFS_DQ_PROJ && - ddq->d_flags != XFS_DQ_GROUP) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : XFS dquot ID 0x%x, unknown flags 0x%x", - str, id, ddq->d_flags); - errs++; - } + ddq->d_flags != XFS_DQ_GROUP) + return __this_address; - if (id != -1 && id != be32_to_cpu(ddq->d_id)) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : ondisk-dquot 0x%p, ID mismatch: " - "0x%x expected, found id 0x%x", - str, ddq, id, be32_to_cpu(ddq->d_id)); - errs++; - } + if (id != -1 && id != be32_to_cpu(ddq->d_id)) + return __this_address; - if (!errs && ddq->d_id) { - if (ddq->d_blk_softlimit && - be64_to_cpu(ddq->d_bcount) > - be64_to_cpu(ddq->d_blk_softlimit)) { - if (!ddq->d_btimer) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED", - str, (int)be32_to_cpu(ddq->d_id), ddq); - errs++; - } - } - if (ddq->d_ino_softlimit && - be64_to_cpu(ddq->d_icount) > - be64_to_cpu(ddq->d_ino_softlimit)) { - if (!ddq->d_itimer) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED", - str, (int)be32_to_cpu(ddq->d_id), ddq); - errs++; - } - } - if (ddq->d_rtb_softlimit && - be64_to_cpu(ddq->d_rtbcount) > - be64_to_cpu(ddq->d_rtb_softlimit)) { - if (!ddq->d_rtbtimer) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED", - str, (int)be32_to_cpu(ddq->d_id), ddq); - errs++; - } - } - } + if (!ddq->d_id) + return NULL; - return errs; + if (ddq->d_blk_softlimit && + be64_to_cpu(ddq->d_bcount) > be64_to_cpu(ddq->d_blk_softlimit) && + !ddq->d_btimer) + return __this_address; + + if (ddq->d_ino_softlimit && + be64_to_cpu(ddq->d_icount) > be64_to_cpu(ddq->d_ino_softlimit) && + !ddq->d_itimer) + return __this_address; + + if (ddq->d_rtb_softlimit && + be64_to_cpu(ddq->d_rtbcount) > be64_to_cpu(ddq->d_rtb_softlimit) && + !ddq->d_rtbtimer) + return __this_address; + + return NULL; } /* @@ -206,13 +164,13 @@ xfs_dquot_buf_verify_crc( return true; } -STATIC bool +STATIC xfs_failaddr_t xfs_dquot_buf_verify( struct xfs_mount *mp, - struct xfs_buf *bp, - int warn) + struct xfs_buf *bp) { struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; + xfs_failaddr_t fa; xfs_dqid_t id = 0; int ndquots; int i; @@ -236,41 +194,43 @@ xfs_dquot_buf_verify( */ for (i = 0; i < ndquots; i++) { struct xfs_disk_dquot *ddq; - int error; ddq = &d[i].dd_diskdq; if (i == 0) id = be32_to_cpu(ddq->d_id); - error = xfs_dqcheck(mp, ddq, id + i, 0, warn, __func__); - if (error) - return false; + fa = xfs_dquot_verify(mp, ddq, id + i, 0, 0); + if (fa) + return fa; } - return true; + + return NULL; } static xfs_failaddr_t xfs_dquot_buf_verify_struct( - struct xfs_buf *bp) + struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - if (!xfs_dquot_buf_verify(mp, bp, 0)) - return __this_address; - return NULL; + return xfs_dquot_buf_verify(mp, bp); } static void xfs_dquot_buf_read_verify( - struct xfs_buf *bp) + struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (!xfs_dquot_buf_verify_crc(mp, bp)) xfs_verifier_error(bp, -EFSBADCRC, __this_address); - else if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) - xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); + else { + fa = xfs_dquot_buf_verify(mp, bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); + } } /* @@ -286,7 +246,7 @@ xfs_dquot_buf_readahead_verify( struct xfs_mount *mp = bp->b_target->bt_mount; if (!xfs_dquot_buf_verify_crc(mp, bp) || - !xfs_dquot_buf_verify(mp, bp, 0)) { + xfs_dquot_buf_verify(mp, bp) != NULL) { xfs_buf_ioerror(bp, -EIO); bp->b_flags &= ~XBF_DONE; } @@ -299,14 +259,14 @@ xfs_dquot_buf_readahead_verify( */ static void xfs_dquot_buf_write_verify( - struct xfs_buf *bp) + struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; - if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) { + fa = xfs_dquot_buf_verify(mp, bp); + if (fa) xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); - return; - } } const struct xfs_buf_ops xfs_dquot_buf_ops = { diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h index 7187ec93fc76..bb1b13a9b5f4 100644 --- a/fs/xfs/libxfs/xfs_quota_defs.h +++ b/fs/xfs/libxfs/xfs_quota_defs.h @@ -112,7 +112,6 @@ typedef uint16_t xfs_qwarncnt_t; #define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */ #define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */ #define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */ -#define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */ #define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ #define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */ #define XFS_QMOPT_DQNEXT 0x0008000 /* return next dquot >= this ID */ @@ -152,8 +151,9 @@ typedef uint16_t xfs_qwarncnt_t; (XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA) #define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS) -extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq, - xfs_dqid_t id, uint type, uint flags, const char *str); +extern xfs_failaddr_t xfs_dquot_verify(struct xfs_mount *mp, + struct xfs_disk_dquot *ddq, xfs_dqid_t id, uint type, + uint flags); extern int xfs_calc_dquots_per_chunk(unsigned int nbblks); extern int xfs_dquot_repair(struct xfs_mount *mp, struct xfs_disk_dquot *ddq, xfs_dqid_t id, uint type); diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 0d8c52b499f0..43572f8a1b8e 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -956,6 +956,7 @@ xfs_qm_dqflush( struct xfs_mount *mp = dqp->q_mount; struct xfs_buf *bp; struct xfs_disk_dquot *ddqp; + xfs_failaddr_t fa; int error; ASSERT(XFS_DQ_IS_LOCKED(dqp)); @@ -1002,9 +1003,10 @@ xfs_qm_dqflush( /* * A simple sanity check in case we got a corrupted dquot.. */ - error = xfs_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, - XFS_QMOPT_DOWARN, "dqflush (incore copy)"); - if (error) { + fa = xfs_dquot_verify(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, 0); + if (fa) { + xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS", + be32_to_cpu(ddqp->d_id), fa); xfs_buf_relse(bp); xfs_dqfunlock(dqp); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 04f5b3081417..7864a298f7eb 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2652,7 +2652,7 @@ xlog_recover_do_reg_buffer( int i; int bit; int nbits; - int error; + xfs_failaddr_t fa; trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); @@ -2687,7 +2687,7 @@ xlog_recover_do_reg_buffer( * the first dquot in the buffer should do. XXXThis is * probably a good thing to do for other buf types also. */ - error = 0; + fa = NULL; if (buf_f->blf_flags & (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { if (item->ri_buf[i].i_addr == NULL) { @@ -2701,11 +2701,14 @@ xlog_recover_do_reg_buffer( item->ri_buf[i].i_len, __func__); goto next; } - error = xfs_dqcheck(mp, item->ri_buf[i].i_addr, - -1, 0, XFS_QMOPT_DOWARN, - "dquot_buf_recover"); - if (error) + fa = xfs_dquot_verify(mp, item->ri_buf[i].i_addr, + -1, 0, 0); + if (fa) { + xfs_alert(mp, + "dquot corrupt at %pS trying to replay into block 0x%llx", + fa, bp->b_bn); goto next; + } } memcpy(xfs_buf_offset(bp, @@ -3307,6 +3310,7 @@ xlog_recover_dquot_pass2( xfs_mount_t *mp = log->l_mp; xfs_buf_t *bp; struct xfs_disk_dquot *ddq, *recddq; + xfs_failaddr_t fa; int error; xfs_dq_logformat_t *dq_f; uint type; @@ -3349,10 +3353,12 @@ xlog_recover_dquot_pass2( */ dq_f = item->ri_buf[0].i_addr; ASSERT(dq_f); - error = xfs_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, - "xlog_recover_dquot_pass2 (log copy)"); - if (error) + fa = xfs_dquot_verify(mp, recddq, dq_f->qlf_id, 0, 0); + if (fa) { + xfs_alert(mp, "corrupt dquot ID 0x%x in log at %pS", + dq_f->qlf_id, fa); return -EIO; + } ASSERT(dq_f->qlf_len == 1); /* diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 65d34cc35b92..6b9f44df7918 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -291,8 +291,7 @@ xfs_qm_dqattach_one( * exist on disk and we didn't ask it to allocate; ESRCH if quotas got * turned off suddenly. */ - error = xfs_qm_dqget(ip->i_mount, ip, id, type, - doalloc | XFS_QMOPT_DOWARN, &dqp); + error = xfs_qm_dqget(ip->i_mount, ip, id, type, doalloc, &dqp); if (error) return error; @@ -574,7 +573,7 @@ xfs_qm_set_defquota( struct xfs_def_quota *defq; int error; - error = xfs_qm_dqread(mp, 0, type, XFS_QMOPT_DOWARN, &dqp); + error = xfs_qm_dqread(mp, 0, type, 0, &dqp); if (!error) { xfs_disk_dquot_t *ddqp = &dqp->q_core; @@ -652,7 +651,7 @@ xfs_qm_init_quotainfo( XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP : XFS_DQ_PROJ), - XFS_QMOPT_DOWARN, &dqp); + 0, &dqp); if (!error) { xfs_disk_dquot_t *ddqp = &dqp->q_core; @@ -843,7 +842,7 @@ xfs_qm_reset_dqcounts( { struct xfs_dqblk *dqb; int j; - int error; + xfs_failaddr_t fa; trace_xfs_reset_dqcounts(bp, _RET_IP_); @@ -865,10 +864,11 @@ xfs_qm_reset_dqcounts( /* * Do a sanity check, and if needed, repair the dqblk. Don't * output any warnings because it's perfectly possible to - * find uninitialised dquot blks. See comment in xfs_dqcheck. + * find uninitialised dquot blks. See comment in + * xfs_dquot_verify. */ - error = xfs_dqcheck(mp, ddq, id+j, type, 0, "xfs_quotacheck"); - if (error) + fa = xfs_dquot_verify(mp, ddq, id + j, type, 0); + if (fa) xfs_dquot_repair(mp, ddq, id + j, type); /* @@ -1077,8 +1077,7 @@ xfs_qm_quotacheck_dqadjust( struct xfs_dquot *dqp; int error; - error = xfs_qm_dqget(mp, ip, id, type, - XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp); + error = xfs_qm_dqget(mp, ip, id, type, XFS_QMOPT_DQALLOC, &dqp); if (error) { /* * Shouldn't be able to turn off quotas here. @@ -1699,8 +1698,7 @@ xfs_qm_vop_dqalloc( xfs_iunlock(ip, lockflags); error = xfs_qm_dqget(mp, NULL, uid, XFS_DQ_USER, - XFS_QMOPT_DQALLOC | - XFS_QMOPT_DOWARN, + XFS_QMOPT_DQALLOC, &uq); if (error) { ASSERT(error != -ENOENT); @@ -1726,8 +1724,7 @@ xfs_qm_vop_dqalloc( xfs_iunlock(ip, lockflags); error = xfs_qm_dqget(mp, NULL, gid, XFS_DQ_GROUP, - XFS_QMOPT_DQALLOC | - XFS_QMOPT_DOWARN, + XFS_QMOPT_DQALLOC, &gq); if (error) { ASSERT(error != -ENOENT); @@ -1746,8 +1743,7 @@ xfs_qm_vop_dqalloc( xfs_iunlock(ip, lockflags); error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, XFS_DQ_PROJ, - XFS_QMOPT_DQALLOC | - XFS_QMOPT_DOWARN, + XFS_QMOPT_DQALLOC, &pq); if (error) { ASSERT(error != -ENOENT); -- cgit v1.2.3 From d9418ed08ae1fa025c96498d95ba30cbb934e119 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:51:25 -0800 Subject: xfs: teach error reporting functions to take xfs_failaddr_t Convert the two other error reporting functions to take xfs_failaddr_t when the caller wishes to capture a code pointer instead of the classic void * pointer. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_error.c | 8 ++++---- fs/xfs/xfs_error.h | 5 +++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 21db07cc7a11..be1211a851dc 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -314,12 +314,12 @@ xfs_error_report( struct xfs_mount *mp, const char *filename, int linenum, - void *ra) + xfs_failaddr_t failaddr) { if (level <= xfs_error_level) { xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, "Internal error %s at line %d of file %s. Caller %pS", - tag, linenum, filename, ra); + tag, linenum, filename, failaddr); xfs_stack_trace(); } @@ -333,11 +333,11 @@ xfs_corruption_error( void *p, const char *filename, int linenum, - void *ra) + xfs_failaddr_t failaddr) { if (level <= xfs_error_level) xfs_hex_dump(p, 64); - xfs_error_report(tag, level, mp, filename, linenum, ra); + xfs_error_report(tag, level, mp, filename, linenum, failaddr); xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair"); } diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 11f80e072403..f086040266ac 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -21,10 +21,11 @@ struct xfs_mount; extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp, - const char *filename, int linenum, void *ra); + const char *filename, int linenum, + xfs_failaddr_t failaddr); extern void xfs_corruption_error(const char *tag, int level, struct xfs_mount *mp, void *p, const char *filename, - int linenum, void *ra); + int linenum, xfs_failaddr_t failaddr); extern void xfs_verifier_error(struct xfs_buf *bp, int error, xfs_failaddr_t failaddr); -- cgit v1.2.3 From 9c712a1346b214a92f4a0e5bb4de4c075de65c32 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:51:26 -0800 Subject: xfs: dump the first 128 bytes of any corrupt buffer Increase the corrupt buffer dump to the first 128 bytes since v5 filesystems have larger block headers than before. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_buf.c | 3 ++- fs/xfs/xfs_error.c | 7 ++++--- fs/xfs/xfs_error.h | 3 +++ 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 45987a278930..1981ef77040d 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1381,7 +1381,8 @@ _xfs_buf_ioapply( xfs_warn(mp, "%s: no ops on block 0x%llx/0x%x", __func__, bp->b_bn, bp->b_length); - xfs_hex_dump(bp->b_addr, 64); + xfs_hex_dump(bp->b_addr, + XFS_CORRUPTION_DUMP_LEN); dump_stack(); } } diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index be1211a851dc..980d5f0660b5 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -336,7 +336,7 @@ xfs_corruption_error( xfs_failaddr_t failaddr) { if (level <= xfs_error_level) - xfs_hex_dump(p, 64); + xfs_hex_dump(p, XFS_CORRUPTION_DUMP_LEN); xfs_error_report(tag, level, mp, filename, linenum, failaddr); xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair"); } @@ -364,8 +364,9 @@ xfs_verifier_error( xfs_alert(mp, "Unmount and run xfs_repair"); if (xfs_error_level >= XFS_ERRLEVEL_LOW) { - xfs_alert(mp, "First 64 bytes of corrupted metadata buffer:"); - xfs_hex_dump(xfs_buf_offset(bp, 0), 64); + xfs_alert(mp, "First %d bytes of corrupted metadata buffer:", + XFS_CORRUPTION_DUMP_LEN); + xfs_hex_dump(xfs_buf_offset(bp, 0), XFS_CORRUPTION_DUMP_LEN); } if (xfs_error_level >= XFS_ERRLEVEL_HIGH) diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index f086040266ac..a3ba05bd983d 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -39,6 +39,9 @@ extern void xfs_verifier_error(struct xfs_buf *bp, int error, #define XFS_ERRLEVEL_LOW 1 #define XFS_ERRLEVEL_HIGH 5 +/* Dump 128 bytes of any corrupt buffer */ +#define XFS_CORRUPTION_DUMP_LEN (128) + /* * Macros to set EFSCORRUPTED & return/branch. */ -- cgit v1.2.3 From b872af2c8700e9d64af8e13811b7679ede26ca00 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:51:26 -0800 Subject: xfs: trace log reservations at mount time At each mount, emit the transaction reservation type information via tracepoints. This makes it easier to compare the log reservation info calculated by the kernel and xfsprogs so that we can more easily diagnose minimum log size failures on freshly formatted filesystems. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/libxfs/xfs_log_rlimit.c | 2 +- fs/xfs/libxfs/xfs_shared.h | 3 +++ fs/xfs/xfs_trace.h | 26 ++++++++++++++++++++++++++ fs/xfs/xfs_trans.c | 22 ++++++++++++++++++++++ 4 files changed, 52 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c index c10597973333..cc4cbe290939 100644 --- a/fs/xfs/libxfs/xfs_log_rlimit.c +++ b/fs/xfs/libxfs/xfs_log_rlimit.c @@ -55,7 +55,7 @@ xfs_log_calc_max_attrsetm_res( * the maximum one in terms of the pre-calculated values which were done * at mount time. */ -STATIC void +void xfs_log_get_max_trans_res( struct xfs_mount *mp, struct xfs_trans_res *max_resp) diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index 67ccb1ab4d7e..d0b84da0cb1e 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -76,6 +76,9 @@ struct xfs_log_item_desc { int xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes); int xfs_log_calc_minimum_size(struct xfs_mount *); +struct xfs_trans_res; +void xfs_log_get_max_trans_res(struct xfs_mount *mp, + struct xfs_trans_res *max_resp); /* * Values for t_flags. diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 9235b2c29695..b6251f8d66a0 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -3313,6 +3313,32 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_low_key); DEFINE_GETFSMAP_EVENT(xfs_getfsmap_high_key); DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping); +TRACE_EVENT(xfs_trans_resv_calc, + TP_PROTO(struct xfs_mount *mp, unsigned int type, + struct xfs_trans_res *res), + TP_ARGS(mp, type, res), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, type) + __field(uint, logres) + __field(int, logcount) + __field(int, logflags) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->type = type; + __entry->logres = res->tr_logres; + __entry->logcount = res->tr_logcount; + __entry->logflags = res->tr_logflags; + ), + TP_printk("dev %d:%d type %d logres %u logcount %d flags 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->type, + __entry->logres, + __entry->logcount, + __entry->logflags) +); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index a87f657f59c9..86f92df32c42 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -35,6 +35,27 @@ kmem_zone_t *xfs_trans_zone; kmem_zone_t *xfs_log_item_desc_zone; +#if defined(CONFIG_TRACEPOINTS) +static void +xfs_trans_trace_reservations( + struct xfs_mount *mp) +{ + struct xfs_trans_res resv; + struct xfs_trans_res *res; + struct xfs_trans_res *end_res; + int i; + + res = (struct xfs_trans_res *)M_RES(mp); + end_res = (struct xfs_trans_res *)(M_RES(mp) + 1); + for (i = 0; res < end_res; i++, res++) + trace_xfs_trans_resv_calc(mp, i, res); + xfs_log_get_max_trans_res(mp, &resv); + trace_xfs_trans_resv_calc(mp, -1, &resv); +} +#else +# define xfs_trans_trace_reservations(mp) +#endif + /* * Initialize the precomputed transaction reservation values * in the mount structure. @@ -44,6 +65,7 @@ xfs_trans_init( struct xfs_mount *mp) { xfs_trans_resv_calc(mp, M_RES(mp)); + xfs_trans_trace_reservations(mp); } /* -- cgit v1.2.3 From c368ebcd4cc3bbc08602adce083ad3cc76a15258 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:51:27 -0800 Subject: xfs: hoist xfs_fs_geometry to libxfs Move xfs_fs_geometry to libxfs so that we can clean up the fs geometry reporting in xfsprogs. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/libxfs/xfs_sb.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_sb.h | 3 ++ fs/xfs/xfs_fsops.c | 77 ------------------------------------------------ fs/xfs/xfs_fsops.h | 1 - fs/xfs/xfs_ioctl.c | 1 + fs/xfs/xfs_ioctl32.c | 1 + 6 files changed, 84 insertions(+), 78 deletions(-) diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 63e0331b1d24..139517ae7aa8 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -40,6 +40,8 @@ #include "xfs_rmap_btree.h" #include "xfs_bmap.h" #include "xfs_refcount_btree.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" /* * Physical superblock buffer manipulations. Shared with libxfs in userspace. @@ -874,3 +876,80 @@ xfs_sync_sb( xfs_trans_set_sync(tp); return xfs_trans_commit(tp); } + +int +xfs_fs_geometry( + xfs_mount_t *mp, + xfs_fsop_geom_t *geo, + int new_version) +{ + + memset(geo, 0, sizeof(*geo)); + + geo->blocksize = mp->m_sb.sb_blocksize; + geo->rtextsize = mp->m_sb.sb_rextsize; + geo->agblocks = mp->m_sb.sb_agblocks; + geo->agcount = mp->m_sb.sb_agcount; + geo->logblocks = mp->m_sb.sb_logblocks; + geo->sectsize = mp->m_sb.sb_sectsize; + geo->inodesize = mp->m_sb.sb_inodesize; + geo->imaxpct = mp->m_sb.sb_imax_pct; + geo->datablocks = mp->m_sb.sb_dblocks; + geo->rtblocks = mp->m_sb.sb_rblocks; + geo->rtextents = mp->m_sb.sb_rextents; + geo->logstart = mp->m_sb.sb_logstart; + ASSERT(sizeof(geo->uuid) == sizeof(mp->m_sb.sb_uuid)); + memcpy(geo->uuid, &mp->m_sb.sb_uuid, sizeof(mp->m_sb.sb_uuid)); + if (new_version >= 2) { + geo->sunit = mp->m_sb.sb_unit; + geo->swidth = mp->m_sb.sb_width; + } + if (new_version >= 3) { + geo->version = XFS_FSOP_GEOM_VERSION; + geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK | + XFS_FSOP_GEOM_FLAGS_DIRV2 | + (xfs_sb_version_hasattr(&mp->m_sb) ? + XFS_FSOP_GEOM_FLAGS_ATTR : 0) | + (xfs_sb_version_hasquota(&mp->m_sb) ? + XFS_FSOP_GEOM_FLAGS_QUOTA : 0) | + (xfs_sb_version_hasalign(&mp->m_sb) ? + XFS_FSOP_GEOM_FLAGS_IALIGN : 0) | + (xfs_sb_version_hasdalign(&mp->m_sb) ? + XFS_FSOP_GEOM_FLAGS_DALIGN : 0) | + (xfs_sb_version_hasextflgbit(&mp->m_sb) ? + XFS_FSOP_GEOM_FLAGS_EXTFLG : 0) | + (xfs_sb_version_hassector(&mp->m_sb) ? + XFS_FSOP_GEOM_FLAGS_SECTOR : 0) | + (xfs_sb_version_hasasciici(&mp->m_sb) ? + XFS_FSOP_GEOM_FLAGS_DIRV2CI : 0) | + (xfs_sb_version_haslazysbcount(&mp->m_sb) ? + XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) | + (xfs_sb_version_hasattr2(&mp->m_sb) ? + XFS_FSOP_GEOM_FLAGS_ATTR2 : 0) | + (xfs_sb_version_hasprojid32bit(&mp->m_sb) ? + XFS_FSOP_GEOM_FLAGS_PROJID32 : 0) | + (xfs_sb_version_hascrc(&mp->m_sb) ? + XFS_FSOP_GEOM_FLAGS_V5SB : 0) | + (xfs_sb_version_hasftype(&mp->m_sb) ? + XFS_FSOP_GEOM_FLAGS_FTYPE : 0) | + (xfs_sb_version_hasfinobt(&mp->m_sb) ? + XFS_FSOP_GEOM_FLAGS_FINOBT : 0) | + (xfs_sb_version_hassparseinodes(&mp->m_sb) ? + XFS_FSOP_GEOM_FLAGS_SPINODES : 0) | + (xfs_sb_version_hasrmapbt(&mp->m_sb) ? + XFS_FSOP_GEOM_FLAGS_RMAPBT : 0) | + (xfs_sb_version_hasreflink(&mp->m_sb) ? + XFS_FSOP_GEOM_FLAGS_REFLINK : 0); + geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ? + mp->m_sb.sb_logsectsize : BBSIZE; + geo->rtsectsize = mp->m_sb.sb_blocksize; + geo->dirblocksize = mp->m_dir_geo->blksize; + } + if (new_version >= 4) { + geo->flags |= + (xfs_sb_version_haslogv2(&mp->m_sb) ? + XFS_FSOP_GEOM_FLAGS_LOGV2 : 0); + geo->logsunit = mp->m_sb.sb_logsunit; + } + return 0; +} diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index 961e6475a309..a16632c2a332 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -34,4 +34,7 @@ extern void xfs_sb_from_disk(struct xfs_sb *to, struct xfs_dsb *from); extern void xfs_sb_to_disk(struct xfs_dsb *to, struct xfs_sb *from); extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp); +extern int xfs_fs_geometry(struct xfs_mount *mp, struct xfs_fsop_geom *geo, + int nversion); + #endif /* __XFS_SB_H__ */ diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 60a2e128cb6a..84d73835c614 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -49,83 +49,6 @@ * File system operations */ -int -xfs_fs_geometry( - xfs_mount_t *mp, - xfs_fsop_geom_t *geo, - int new_version) -{ - - memset(geo, 0, sizeof(*geo)); - - geo->blocksize = mp->m_sb.sb_blocksize; - geo->rtextsize = mp->m_sb.sb_rextsize; - geo->agblocks = mp->m_sb.sb_agblocks; - geo->agcount = mp->m_sb.sb_agcount; - geo->logblocks = mp->m_sb.sb_logblocks; - geo->sectsize = mp->m_sb.sb_sectsize; - geo->inodesize = mp->m_sb.sb_inodesize; - geo->imaxpct = mp->m_sb.sb_imax_pct; - geo->datablocks = mp->m_sb.sb_dblocks; - geo->rtblocks = mp->m_sb.sb_rblocks; - geo->rtextents = mp->m_sb.sb_rextents; - geo->logstart = mp->m_sb.sb_logstart; - ASSERT(sizeof(geo->uuid)==sizeof(mp->m_sb.sb_uuid)); - memcpy(geo->uuid, &mp->m_sb.sb_uuid, sizeof(mp->m_sb.sb_uuid)); - if (new_version >= 2) { - geo->sunit = mp->m_sb.sb_unit; - geo->swidth = mp->m_sb.sb_width; - } - if (new_version >= 3) { - geo->version = XFS_FSOP_GEOM_VERSION; - geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK | - XFS_FSOP_GEOM_FLAGS_DIRV2 | - (xfs_sb_version_hasattr(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_ATTR : 0) | - (xfs_sb_version_hasquota(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_QUOTA : 0) | - (xfs_sb_version_hasalign(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_IALIGN : 0) | - (xfs_sb_version_hasdalign(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_DALIGN : 0) | - (xfs_sb_version_hasextflgbit(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_EXTFLG : 0) | - (xfs_sb_version_hassector(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_SECTOR : 0) | - (xfs_sb_version_hasasciici(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_DIRV2CI : 0) | - (xfs_sb_version_haslazysbcount(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) | - (xfs_sb_version_hasattr2(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_ATTR2 : 0) | - (xfs_sb_version_hasprojid32bit(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_PROJID32 : 0) | - (xfs_sb_version_hascrc(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_V5SB : 0) | - (xfs_sb_version_hasftype(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_FTYPE : 0) | - (xfs_sb_version_hasfinobt(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_FINOBT : 0) | - (xfs_sb_version_hassparseinodes(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_SPINODES : 0) | - (xfs_sb_version_hasrmapbt(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_RMAPBT : 0) | - (xfs_sb_version_hasreflink(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_REFLINK : 0); - geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ? - mp->m_sb.sb_logsectsize : BBSIZE; - geo->rtsectsize = mp->m_sb.sb_blocksize; - geo->dirblocksize = mp->m_dir_geo->blksize; - } - if (new_version >= 4) { - geo->flags |= - (xfs_sb_version_haslogv2(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_LOGV2 : 0); - geo->logsunit = mp->m_sb.sb_logsunit; - } - return 0; -} - static struct xfs_buf * xfs_growfs_get_hdr_buf( struct xfs_mount *mp, diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h index 2954c13a3acd..20484ed5e919 100644 --- a/fs/xfs/xfs_fsops.h +++ b/fs/xfs/xfs_fsops.h @@ -18,7 +18,6 @@ #ifndef __XFS_FSOPS_H__ #define __XFS_FSOPS_H__ -extern int xfs_fs_geometry(xfs_mount_t *mp, xfs_fsop_geom_t *geo, int nversion); extern int xfs_growfs_data(xfs_mount_t *mp, xfs_growfs_data_t *in); extern int xfs_growfs_log(xfs_mount_t *mp, xfs_growfs_log_t *in); extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 20dc65fef6a4..3015e178d028 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -45,6 +45,7 @@ #include #include "xfs_fsmap.h" #include "scrub/xfs_scrub.h" +#include "xfs_sb.h" #include #include diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index 35c79e246fde..66cc3cd70268 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -37,6 +37,7 @@ #include "xfs_ioctl.h" #include "xfs_ioctl32.h" #include "xfs_trace.h" +#include "xfs_sb.h" #define _NATIVE_IOC(cmd, type) \ _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) -- cgit v1.2.3 From ac503a4cc9e8ab574032e3e217ffb555f5bf2341 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:51:27 -0800 Subject: xfs: refactor the geometry structure filling function Refactor the geometry structure filling function to use the superblock to fill the fields. While we're at it, make the function less indenty and use some whitespace to make the function easier to read. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/libxfs/xfs_da_format.h | 6 ++ fs/xfs/libxfs/xfs_dir2.c | 5 +- fs/xfs/libxfs/xfs_sb.c | 148 ++++++++++++++++++++++-------------------- fs/xfs/libxfs/xfs_sb.h | 5 +- fs/xfs/xfs_ioctl.c | 4 +- fs/xfs/xfs_ioctl32.c | 2 +- 6 files changed, 92 insertions(+), 78 deletions(-) diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index 3771edcb301d..7e77299b7789 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -875,4 +875,10 @@ struct xfs_attr3_rmt_hdr { ((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \ sizeof(struct xfs_attr3_rmt_hdr) : 0)) +/* Number of bytes in a directory block. */ +static inline unsigned int xfs_dir2_dirblock_bytes(struct xfs_sb *sbp) +{ + return 1 << (sbp->sb_blocklog + sbp->sb_dirblklog); +} + #endif /* __XFS_DA_FORMAT_H__ */ diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index e10778c102ea..92f94e190f04 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -119,8 +119,7 @@ xfs_da_mount( ASSERT(mp->m_sb.sb_versionnum & XFS_SB_VERSION_DIRV2BIT); - ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <= - XFS_MAX_BLOCKSIZE); + ASSERT(xfs_dir2_dirblock_bytes(&mp->m_sb) <= XFS_MAX_BLOCKSIZE); mp->m_dir_inode_ops = xfs_dir_get_ops(mp, NULL); mp->m_nondir_inode_ops = xfs_nondir_get_ops(mp, NULL); @@ -140,7 +139,7 @@ xfs_da_mount( dageo = mp->m_dir_geo; dageo->blklog = mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog; dageo->fsblog = mp->m_sb.sb_blocklog; - dageo->blksize = 1 << dageo->blklog; + dageo->blksize = xfs_dir2_dirblock_bytes(&mp->m_sb); dageo->fsbcount = 1 << mp->m_sb.sb_dirblklog; /* diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 139517ae7aa8..35b005d66977 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -879,77 +879,85 @@ xfs_sync_sb( int xfs_fs_geometry( - xfs_mount_t *mp, - xfs_fsop_geom_t *geo, - int new_version) + struct xfs_sb *sbp, + struct xfs_fsop_geom *geo, + int struct_version) { + memset(geo, 0, sizeof(struct xfs_fsop_geom)); + + geo->blocksize = sbp->sb_blocksize; + geo->rtextsize = sbp->sb_rextsize; + geo->agblocks = sbp->sb_agblocks; + geo->agcount = sbp->sb_agcount; + geo->logblocks = sbp->sb_logblocks; + geo->sectsize = sbp->sb_sectsize; + geo->inodesize = sbp->sb_inodesize; + geo->imaxpct = sbp->sb_imax_pct; + geo->datablocks = sbp->sb_dblocks; + geo->rtblocks = sbp->sb_rblocks; + geo->rtextents = sbp->sb_rextents; + geo->logstart = sbp->sb_logstart; + BUILD_BUG_ON(sizeof(geo->uuid) != sizeof(sbp->sb_uuid)); + memcpy(geo->uuid, &sbp->sb_uuid, sizeof(sbp->sb_uuid)); + + if (struct_version < 2) + return 0; + + geo->sunit = sbp->sb_unit; + geo->swidth = sbp->sb_width; + + if (struct_version < 3) + return 0; + + geo->version = XFS_FSOP_GEOM_VERSION; + geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK | + XFS_FSOP_GEOM_FLAGS_DIRV2; + if (xfs_sb_version_hasattr(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR; + if (xfs_sb_version_hasquota(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_QUOTA; + if (xfs_sb_version_hasalign(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_IALIGN; + if (xfs_sb_version_hasdalign(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_DALIGN; + if (xfs_sb_version_hasextflgbit(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_EXTFLG; + if (xfs_sb_version_hassector(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_SECTOR; + if (xfs_sb_version_hasasciici(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_DIRV2CI; + if (xfs_sb_version_haslazysbcount(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_LAZYSB; + if (xfs_sb_version_hasattr2(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR2; + if (xfs_sb_version_hasprojid32bit(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_PROJID32; + if (xfs_sb_version_hascrc(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_V5SB; + if (xfs_sb_version_hasftype(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_FTYPE; + if (xfs_sb_version_hasfinobt(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_FINOBT; + if (xfs_sb_version_hassparseinodes(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_SPINODES; + if (xfs_sb_version_hasrmapbt(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_RMAPBT; + if (xfs_sb_version_hasreflink(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_REFLINK; + if (xfs_sb_version_hassector(sbp)) + geo->logsectsize = sbp->sb_logsectsize; + else + geo->logsectsize = BBSIZE; + geo->rtsectsize = sbp->sb_blocksize; + geo->dirblocksize = xfs_dir2_dirblock_bytes(sbp); + + if (struct_version < 3) + return 0; + + if (xfs_sb_version_haslogv2(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_LOGV2; + + geo->logsunit = sbp->sb_logsunit; - memset(geo, 0, sizeof(*geo)); - - geo->blocksize = mp->m_sb.sb_blocksize; - geo->rtextsize = mp->m_sb.sb_rextsize; - geo->agblocks = mp->m_sb.sb_agblocks; - geo->agcount = mp->m_sb.sb_agcount; - geo->logblocks = mp->m_sb.sb_logblocks; - geo->sectsize = mp->m_sb.sb_sectsize; - geo->inodesize = mp->m_sb.sb_inodesize; - geo->imaxpct = mp->m_sb.sb_imax_pct; - geo->datablocks = mp->m_sb.sb_dblocks; - geo->rtblocks = mp->m_sb.sb_rblocks; - geo->rtextents = mp->m_sb.sb_rextents; - geo->logstart = mp->m_sb.sb_logstart; - ASSERT(sizeof(geo->uuid) == sizeof(mp->m_sb.sb_uuid)); - memcpy(geo->uuid, &mp->m_sb.sb_uuid, sizeof(mp->m_sb.sb_uuid)); - if (new_version >= 2) { - geo->sunit = mp->m_sb.sb_unit; - geo->swidth = mp->m_sb.sb_width; - } - if (new_version >= 3) { - geo->version = XFS_FSOP_GEOM_VERSION; - geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK | - XFS_FSOP_GEOM_FLAGS_DIRV2 | - (xfs_sb_version_hasattr(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_ATTR : 0) | - (xfs_sb_version_hasquota(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_QUOTA : 0) | - (xfs_sb_version_hasalign(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_IALIGN : 0) | - (xfs_sb_version_hasdalign(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_DALIGN : 0) | - (xfs_sb_version_hasextflgbit(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_EXTFLG : 0) | - (xfs_sb_version_hassector(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_SECTOR : 0) | - (xfs_sb_version_hasasciici(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_DIRV2CI : 0) | - (xfs_sb_version_haslazysbcount(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) | - (xfs_sb_version_hasattr2(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_ATTR2 : 0) | - (xfs_sb_version_hasprojid32bit(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_PROJID32 : 0) | - (xfs_sb_version_hascrc(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_V5SB : 0) | - (xfs_sb_version_hasftype(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_FTYPE : 0) | - (xfs_sb_version_hasfinobt(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_FINOBT : 0) | - (xfs_sb_version_hassparseinodes(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_SPINODES : 0) | - (xfs_sb_version_hasrmapbt(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_RMAPBT : 0) | - (xfs_sb_version_hasreflink(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_REFLINK : 0); - geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ? - mp->m_sb.sb_logsectsize : BBSIZE; - geo->rtsectsize = mp->m_sb.sb_blocksize; - geo->dirblocksize = mp->m_dir_geo->blksize; - } - if (new_version >= 4) { - geo->flags |= - (xfs_sb_version_haslogv2(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_LOGV2 : 0); - geo->logsunit = mp->m_sb.sb_logsunit; - } return 0; } diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index a16632c2a332..63dcd2a1a657 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -34,7 +34,8 @@ extern void xfs_sb_from_disk(struct xfs_sb *to, struct xfs_dsb *from); extern void xfs_sb_to_disk(struct xfs_dsb *to, struct xfs_sb *from); extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp); -extern int xfs_fs_geometry(struct xfs_mount *mp, struct xfs_fsop_geom *geo, - int nversion); +#define XFS_FS_GEOM_MAX_STRUCT_VER (4) +extern int xfs_fs_geometry(struct xfs_sb *sbp, struct xfs_fsop_geom *geo, + int struct_version); #endif /* __XFS_SB_H__ */ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 3015e178d028..89fb1eb80aae 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -810,7 +810,7 @@ xfs_ioc_fsgeometry_v1( xfs_fsop_geom_t fsgeo; int error; - error = xfs_fs_geometry(mp, &fsgeo, 3); + error = xfs_fs_geometry(&mp->m_sb, &fsgeo, 3); if (error) return error; @@ -832,7 +832,7 @@ xfs_ioc_fsgeometry( xfs_fsop_geom_t fsgeo; int error; - error = xfs_fs_geometry(mp, &fsgeo, 4); + error = xfs_fs_geometry(&mp->m_sb, &fsgeo, 4); if (error) return error; diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index 66cc3cd70268..10fbde359649 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -67,7 +67,7 @@ xfs_compat_ioc_fsgeometry_v1( xfs_fsop_geom_t fsgeo; int error; - error = xfs_fs_geometry(mp, &fsgeo, 3); + error = xfs_fs_geometry(&mp->m_sb, &fsgeo, 3); if (error) return error; /* The 32-bit variant simply has some padding at the end */ -- cgit v1.2.3 From 46c59736d8090e602f960aeaf1c6b8292151bf38 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 9 Jan 2018 11:11:42 -0800 Subject: xfs: harden directory integrity checks some more If a malicious filesystem image contains a block+ format directory wherein the directory inode's core.mode is set such that S_ISDIR(core.mode) == 0, and if there are subdirectories of the corrupted directory, an attempt to traverse up the directory tree will crash the kernel in __xfs_dir3_data_check. Running the online scrub's parent checks will tend to do this. The crash occurs because the directory inode's d_ops get set to xfs_dir[23]_nondir_ops (it's not a directory) but the parent pointer scrubber's indiscriminate call to xfs_readdir proceeds past the ASSERT if we have non fatal asserts configured. Fix the null pointer dereference crash in __xfs_dir3_data_check by looking for S_ISDIR or wrong d_ops; and teach the parent scrubber to bail out if it is fed a non-directory "parent". Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/libxfs/xfs_dir2_data.c | 8 ++++++++ fs/xfs/scrub/parent.c | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index 32378122cd1f..853d9abdd545 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -73,6 +73,14 @@ __xfs_dir3_data_check( */ ops = xfs_dir_get_ops(mp, dp); + /* + * If this isn't a directory, or we don't get handed the dir ops, + * something is seriously wrong. Bail out. + */ + if ((dp && !S_ISDIR(VFS_I(dp)->i_mode)) || + ops != xfs_dir_get_ops(mp, NULL)) + return __this_address; + hdr = bp->b_addr; p = (char *)ops->data_entry_p(hdr); diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index 63a25334fc83..b4d2f8406d22 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -171,7 +171,7 @@ xfs_scrub_parent_validate( error = xfs_iget(mp, sc->tp, dnum, 0, 0, &dp); if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) goto out; - if (dp == sc->ip) { + if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) { xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); goto out_rele; } -- cgit v1.2.3 From c219b01579b204be007d26b6f484a7a26d620799 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 11:39:18 -0800 Subject: xfs: clarify units in the failed metadata io message If a metadata IO error happens, we report the location of the failed IO request in units of daddrs. However, the printk message misleads people into thinking that the units are fs blocks, so fix the reported units. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_buf.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 1981ef77040d..0820c1ccf97c 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -585,7 +585,7 @@ _xfs_buf_find( * returning a specific error on buffer lookup failures. */ xfs_alert(btp->bt_mount, - "%s: Block out of range: block 0x%llx, EOFS 0x%llx ", + "%s: daddr 0x%llx out of range, EOFS 0x%llx", __func__, cmap.bm_bn, eofs); WARN_ON(1); return NULL; @@ -1196,8 +1196,9 @@ xfs_buf_ioerror_alert( const char *func) { xfs_alert(bp->b_target->bt_mount, -"metadata I/O error: block 0x%llx (\"%s\") error %d numblks %d", - (uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length); +"metadata I/O error in \"%s\" at daddr 0x%llx len %d error %d", + func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length, + -bp->b_error); } int @@ -1379,7 +1380,7 @@ _xfs_buf_ioapply( */ if (xfs_sb_version_hascrc(&mp->m_sb)) { xfs_warn(mp, - "%s: no ops on block 0x%llx/0x%x", + "%s: no buf ops on daddr 0x%llx len %d", __func__, bp->b_bn, bp->b_length); xfs_hex_dump(bp->b_addr, XFS_CORRUPTION_DUMP_LEN); @@ -1673,7 +1674,7 @@ xfs_wait_buftarg( list_del_init(&bp->b_lru); if (bp->b_flags & XBF_WRITE_FAIL) { xfs_alert(btp->bt_mount, -"Corruption Alert: Buffer at block 0x%llx had permanent write failures!", +"Corruption Alert: Buffer at daddr 0x%llx had permanent write failures!", (long long)bp->b_bn); xfs_alert(btp->bt_mount, "Please run xfs_repair to determine the extent of the problem."); -- cgit v1.2.3 From 3d170aa24283568b1ed92a09daa0e05a8788c6a4 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 9 Jan 2018 11:43:36 -0800 Subject: xfs: change 0x%p -> %p in print messages Since %p prepends "0x" to the outputted string, we can drop the prefix. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_dir2_node.c | 2 +- fs/xfs/xfs_fsops.c | 2 +- fs/xfs/xfs_inode.c | 10 +++++----- fs/xfs/xfs_log.c | 4 ++-- fs/xfs/xfs_log_recover.c | 24 ++++++++++++------------ fs/xfs/xfs_trace.h | 14 +++++++------- 6 files changed, 28 insertions(+), 28 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 915c4fe5e4c3..e900dbcb0de9 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -1919,7 +1919,7 @@ xfs_dir2_node_addname_int( (unsigned long long)ifbno, lastfbno); if (fblk) { xfs_alert(mp, - " fblk 0x%p blkno %llu index %d magic 0x%x", + " fblk %p blkno %llu index %d magic 0x%x", fblk, (unsigned long long)fblk->blkno, fblk->index, diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 84d73835c614..cc86b2b34243 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -878,7 +878,7 @@ xfs_do_force_shutdown( if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { xfs_notice(mp, - "%s(0x%x) called from line %d of file %s. Return address = 0x%p", + "%s(0x%x) called from line %d of file %s. Return address = %p", __func__, flags, lnnum, fname, __return_address); } /* diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 663b546f2bcd..e93fb885bbc5 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3529,7 +3529,7 @@ xfs_iflush_int( if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), mp, XFS_ERRTAG_IFLUSH_1)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, - "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p", + "%s: Bad inode %Lu magic number 0x%x, ptr %p", __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip); goto corrupt_out; } @@ -3539,7 +3539,7 @@ xfs_iflush_int( (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), mp, XFS_ERRTAG_IFLUSH_3)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, - "%s: Bad regular inode %Lu, ptr 0x%p", + "%s: Bad regular inode %Lu, ptr %p", __func__, ip->i_ino, ip); goto corrupt_out; } @@ -3550,7 +3550,7 @@ xfs_iflush_int( (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), mp, XFS_ERRTAG_IFLUSH_4)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, - "%s: Bad directory inode %Lu, ptr 0x%p", + "%s: Bad directory inode %Lu, ptr %p", __func__, ip->i_ino, ip); goto corrupt_out; } @@ -3559,7 +3559,7 @@ xfs_iflush_int( ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, "%s: detected corrupt incore inode %Lu, " - "total extents = %d, nblocks = %Ld, ptr 0x%p", + "total extents = %d, nblocks = %Ld, ptr %p", __func__, ip->i_ino, ip->i_d.di_nextents + ip->i_d.di_anextents, ip->i_d.di_nblocks, ip); @@ -3568,7 +3568,7 @@ xfs_iflush_int( if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, mp, XFS_ERRTAG_IFLUSH_6)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, - "%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p", + "%s: bad inode %Lu, forkoff 0x%x, ptr %p", __func__, ip->i_ino, ip->i_d.di_forkoff, ip); goto corrupt_out; } diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 047df85528b0..922e5a9764ca 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -2244,7 +2244,7 @@ xlog_write_setup_ophdr( break; default: xfs_warn(log->l_mp, - "Bad XFS transaction clientid 0x%x in ticket 0x%p", + "Bad XFS transaction clientid 0x%x in ticket %p", ophdr->oh_clientid, ticket); return NULL; } @@ -3926,7 +3926,7 @@ xlog_verify_iclog( } if (clientid != XFS_TRANSACTION && clientid != XFS_LOG) xfs_warn(log->l_mp, - "%s: invalid clientid %d op 0x%p offset 0x%lx", + "%s: invalid clientid %d op %p offset 0x%lx", __func__, clientid, ophead, (unsigned long)field_offset); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 7864a298f7eb..205bace41832 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2218,7 +2218,7 @@ xlog_recover_do_inode_buffer( next_unlinked_offset - reg_buf_offset; if (unlikely(*logged_nextp == 0)) { xfs_alert(mp, - "Bad inode buffer log record (ptr = 0x%p, bp = 0x%p). " + "Bad inode buffer log record (ptr = %p, bp = %p). " "Trying to replay bad (0) inode di_next_unlinked field.", item, bp); XFS_ERROR_REPORT("xlog_recover_do_inode_buf", @@ -3049,7 +3049,7 @@ xlog_recover_inode_pass2( */ if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) { xfs_alert(mp, - "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld", + "%s: Bad inode magic number, dip = %p, dino bp = %p, ino = %Ld", __func__, dip, bp, in_f->ilf_ino); XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", XFS_ERRLEVEL_LOW, mp); @@ -3059,7 +3059,7 @@ xlog_recover_inode_pass2( ldip = item->ri_buf[1].i_addr; if (unlikely(ldip->di_magic != XFS_DINODE_MAGIC)) { xfs_alert(mp, - "%s: Bad inode log record, rec ptr 0x%p, ino %Ld", + "%s: Bad inode log record, rec ptr %p, ino %Ld", __func__, item, in_f->ilf_ino); XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", XFS_ERRLEVEL_LOW, mp); @@ -3117,8 +3117,8 @@ xlog_recover_inode_pass2( XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", XFS_ERRLEVEL_LOW, mp, ldip); xfs_alert(mp, - "%s: Bad regular inode log record, rec ptr 0x%p, " - "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", + "%s: Bad regular inode log record, rec ptr %p, " + "ino ptr = %p, ino bp = %p, ino %Ld", __func__, item, dip, bp, in_f->ilf_ino); error = -EFSCORRUPTED; goto out_release; @@ -3130,8 +3130,8 @@ xlog_recover_inode_pass2( XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", XFS_ERRLEVEL_LOW, mp, ldip); xfs_alert(mp, - "%s: Bad dir inode log record, rec ptr 0x%p, " - "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", + "%s: Bad dir inode log record, rec ptr %p, " + "ino ptr = %p, ino bp = %p, ino %Ld", __func__, item, dip, bp, in_f->ilf_ino); error = -EFSCORRUPTED; goto out_release; @@ -3141,8 +3141,8 @@ xlog_recover_inode_pass2( XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", XFS_ERRLEVEL_LOW, mp, ldip); xfs_alert(mp, - "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " - "dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", + "%s: Bad inode log record, rec ptr %p, dino ptr %p, " + "dino bp %p, ino %Ld, total extents = %d, nblocks = %Ld", __func__, item, dip, bp, in_f->ilf_ino, ldip->di_nextents + ldip->di_anextents, ldip->di_nblocks); @@ -3153,8 +3153,8 @@ xlog_recover_inode_pass2( XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", XFS_ERRLEVEL_LOW, mp, ldip); xfs_alert(mp, - "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " - "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__, + "%s: Bad inode log record, rec ptr %p, dino ptr %p, " + "dino bp %p, ino %Ld, forkoff 0x%x", __func__, item, dip, bp, in_f->ilf_ino, ldip->di_forkoff); error = -EFSCORRUPTED; goto out_release; @@ -3164,7 +3164,7 @@ xlog_recover_inode_pass2( XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", XFS_ERRLEVEL_LOW, mp, ldip); xfs_alert(mp, - "%s: Bad inode log record length %d, rec ptr 0x%p", + "%s: Bad inode log record length %d, rec ptr %p", __func__, item->ri_buf[1].i_len, item); error = -EFSCORRUPTED; goto out_release; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index b6251f8d66a0..560545f1a0d7 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -72,7 +72,7 @@ DECLARE_EVENT_CLASS(xfs_attr_list_class, __entry->flags = ctx->flags; ), TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " - "alist 0x%p size %u count %u firstu %u flags %d %s", + "alist %p size %u count %u firstu %u flags %d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->hashval, @@ -200,7 +200,7 @@ TRACE_EVENT(xfs_attr_list_node_descend, __entry->bt_before = be32_to_cpu(btree->before); ), TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " - "alist 0x%p size %u count %u firstu %u flags %d %s " + "alist %p size %u count %u firstu %u flags %d %s " "node hashval %u, node before %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, @@ -251,7 +251,7 @@ DECLARE_EVENT_CLASS(xfs_bmap_class, __entry->bmap_state = state; __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d ino 0x%llx state %s cur 0x%p/%d " + TP_printk("dev %d:%d ino 0x%llx state %s cur %p/%d " "offset %lld block %lld count %lld flag %d caller %ps", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, @@ -460,7 +460,7 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class, ), TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " "lock %d flags %s recur %d refcount %d bliflags %s " - "lidesc 0x%p liflags %s", + "lidesc %p liflags %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->buf_bno, __entry->buf_len, @@ -1028,7 +1028,7 @@ DECLARE_EVENT_CLASS(xfs_log_item_class, __entry->flags = lip->li_flags; __entry->lsn = lip->li_lsn; ), - TP_printk("dev %d:%d lip 0x%p lsn %d/%d type %s flags %s", + TP_printk("dev %d:%d lip %p lsn %d/%d type %s flags %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->lip, CYCLE_LSN(__entry->lsn), BLOCK_LSN(__entry->lsn), @@ -1082,7 +1082,7 @@ DECLARE_EVENT_CLASS(xfs_ail_class, __entry->old_lsn = old_lsn; __entry->new_lsn = new_lsn; ), - TP_printk("dev %d:%d lip 0x%p old lsn %d/%d new lsn %d/%d type %s flags %s", + TP_printk("dev %d:%d lip %p old lsn %d/%d new lsn %d/%d type %s flags %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->lip, CYCLE_LSN(__entry->old_lsn), BLOCK_LSN(__entry->old_lsn), @@ -2014,7 +2014,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_item_class, __entry->count = item->ri_cnt; __entry->total = item->ri_total; ), - TP_printk("dev %d:%d tid 0x%x lsn 0x%llx, pass %d, item 0x%p, " + TP_printk("dev %d:%d tid 0x%x lsn 0x%llx, pass %d, item %p, " "item type %s item region count/total %d/%d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid, -- cgit v1.2.3 From aff68a5502d24be416e256478448e228f1a88aaf Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 9 Jan 2018 11:46:05 -0800 Subject: xfs: use %pS printk format for direct instruction addresses Use the %pS instead of the %pF printk format specifier for printing symbols from direct addresses. This is needed for the ia64, ppc64 and parisc64 architectures. While we're at it, be consistent with the capitalization of the 'S'. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/trace.h | 20 ++++++++++---------- fs/xfs/xfs_inode.c | 4 ++-- fs/xfs/xfs_trace.h | 22 +++++++++++----------- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index c4ebfb5c1ee8..ffa4a70ebdcc 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -90,7 +90,7 @@ TRACE_EVENT(xfs_scrub_op_error, __entry->error = error; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d type %u agno %u agbno %u error %d ret_ip %pF", + TP_printk("dev %d:%d type %u agno %u agbno %u error %d ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->type, __entry->agno, @@ -121,7 +121,7 @@ TRACE_EVENT(xfs_scrub_file_op_error, __entry->error = error; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d ino %llu fork %d type %u offset %llu error %d ret_ip %pF", + TP_printk("dev %d:%d ino %llu fork %d type %u offset %llu error %d ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->whichfork, @@ -156,7 +156,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_block_error_class, __entry->bno = bno; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d type %u agno %u agbno %u ret_ip %pF", + TP_printk("dev %d:%d type %u agno %u agbno %u ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->type, __entry->agno, @@ -207,7 +207,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_ino_error_class, __entry->bno = bno; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d ino %llu type %u agno %u agbno %u ret_ip %pF", + TP_printk("dev %d:%d ino %llu type %u agno %u agbno %u ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->type, @@ -246,7 +246,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_fblock_error_class, __entry->offset = offset; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d ino %llu fork %d type %u offset %llu ret_ip %pF", + TP_printk("dev %d:%d ino %llu fork %d type %u offset %llu ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->whichfork, @@ -277,7 +277,7 @@ TRACE_EVENT(xfs_scrub_incomplete, __entry->type = sc->sm->sm_type; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d type %u ret_ip %pF", + TP_printk("dev %d:%d type %u ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->type, __entry->ret_ip) @@ -311,7 +311,7 @@ TRACE_EVENT(xfs_scrub_btree_op_error, __entry->error = error; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pF", + TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->type, __entry->btnum, @@ -354,7 +354,7 @@ TRACE_EVENT(xfs_scrub_ifork_btree_op_error, __entry->error = error; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d ino %llu fork %d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pF", + TP_printk("dev %d:%d ino %llu fork %d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->whichfork, @@ -393,7 +393,7 @@ TRACE_EVENT(xfs_scrub_btree_error, __entry->ptr = cur->bc_ptrs[level]; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pF", + TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->type, __entry->btnum, @@ -433,7 +433,7 @@ TRACE_EVENT(xfs_scrub_ifork_btree_error, __entry->ptr = cur->bc_ptrs[level]; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d ino %llu fork %d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pF", + TP_printk("dev %d:%d ino %llu fork %d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->whichfork, diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index e93fb885bbc5..29c47da383e0 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3492,7 +3492,7 @@ xfs_inode_verify_forks( fa = xfs_ifork_verify_data(ip, &xfs_default_ifork_ops); if (fa) { xfs_alert(ip->i_mount, - "%s: bad inode %llu inline data fork at %pF", + "%s: bad inode %llu inline data fork at %pS", __func__, ip->i_ino, fa); return false; } @@ -3500,7 +3500,7 @@ xfs_inode_verify_forks( fa = xfs_ifork_verify_attr(ip, &xfs_default_ifork_ops); if (fa) { xfs_alert(ip->i_mount, - "%s: bad inode %llu inline attr fork at %pF", + "%s: bad inode %llu inline attr fork at %pS", __func__, ip->i_ino, fa); return false; } diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 560545f1a0d7..945de08af7ba 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -119,7 +119,7 @@ DECLARE_EVENT_CLASS(xfs_perag_class, __entry->refcount = refcount; __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d agno %u refcount %d caller %ps", + TP_printk("dev %d:%d agno %u refcount %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->refcount, @@ -252,7 +252,7 @@ DECLARE_EVENT_CLASS(xfs_bmap_class, __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d ino 0x%llx state %s cur %p/%d " - "offset %lld block %lld count %lld flag %d caller %ps", + "offset %lld block %lld count %lld flag %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), @@ -301,7 +301,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class, __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d " - "lock %d flags %s caller %ps", + "lock %d flags %s caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->bno, __entry->nblks, @@ -370,7 +370,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class, __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " - "lock %d flags %s caller %ps", + "lock %d flags %s caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->bno, __entry->buffer_length, @@ -579,7 +579,7 @@ DECLARE_EVENT_CLASS(xfs_lock_class, __entry->lock_flags = lock_flags; __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d ino 0x%llx flags %s caller %ps", + TP_printk("dev %d:%d ino 0x%llx flags %s caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS), @@ -697,7 +697,7 @@ DECLARE_EVENT_CLASS(xfs_iref_class, __entry->pincount = atomic_read(&ip->i_pincount); __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %ps", + TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->count, @@ -1049,7 +1049,7 @@ TRACE_EVENT(xfs_log_force, __entry->lsn = lsn; __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d lsn 0x%llx caller %ps", + TP_printk("dev %d:%d lsn 0x%llx caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->lsn, (void *)__entry->caller_ip) ) @@ -1403,7 +1403,7 @@ TRACE_EVENT(xfs_bunmap, __entry->flags = flags; ), TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx" - "flags %s caller %ps", + "flags %s caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->size, @@ -1517,7 +1517,7 @@ TRACE_EVENT(xfs_agf, ), TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u " "levels b %u c %u flfirst %u fllast %u flcount %u " - "freeblks %u longest %u caller %ps", + "freeblks %u longest %u caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __print_flags(__entry->flags, "|", XFS_AGF_FLAGS), @@ -2486,7 +2486,7 @@ DECLARE_EVENT_CLASS(xfs_ag_error_class, __entry->error = error; __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d agno %u error %d caller %ps", + TP_printk("dev %d:%d agno %u error %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->error, @@ -2977,7 +2977,7 @@ DECLARE_EVENT_CLASS(xfs_inode_error_class, __entry->error = error; __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d ino %llx error %d caller %ps", + TP_printk("dev %d:%d ino %llx error %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->error, -- cgit v1.2.3 From c96900435fa9fdfd9702a60cd765bd85e380303e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 9 Jan 2018 12:02:55 -0800 Subject: xfs: use %px for data pointers when debugging Starting with commit 57e734423ad ("vsprintf: refactor %pK code out of pointer"), the behavior of the raw '%p' printk format specifier was changed to print a 32-bit hash of the pointer value to avoid leaking kernel pointers into dmesg. For most situations that's good. This is /undesirable/ behavior when we're trying to debug XFS, however, so define a PTR_FMT that prints the actual pointer when we're in debug mode. Note that %p for tracepoints still prints the raw pointer, so in the long run we could consider rewriting some of these messages as tracepoints. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_dir2_node.c | 2 +- fs/xfs/xfs_aops.c | 2 +- fs/xfs/xfs_dquot_item.c | 2 +- fs/xfs/xfs_fsops.c | 2 +- fs/xfs/xfs_inode.c | 10 +++++----- fs/xfs/xfs_linux.h | 12 ++++++++++++ fs/xfs/xfs_log.c | 4 ++-- fs/xfs/xfs_log_recover.c | 24 ++++++++++++------------ fs/xfs/xfs_qm.c | 4 ++-- 9 files changed, 37 insertions(+), 25 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index e900dbcb0de9..bb893ae02696 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -1919,7 +1919,7 @@ xfs_dir2_node_addname_int( (unsigned long long)ifbno, lastfbno); if (fblk) { xfs_alert(mp, - " fblk %p blkno %llu index %d magic 0x%x", + " fblk "PTR_FMT" blkno %llu index %d magic 0x%x", fblk, (unsigned long long)fblk->blkno, fblk->index, diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 4fc526a27a94..2e094c76bd45 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -791,7 +791,7 @@ xfs_aops_discard_page( goto out_invalidate; xfs_alert(ip->i_mount, - "page discard on page %p, inode 0x%llx, offset %llu.", + "page discard on page "PTR_FMT", inode 0x%llx, offset %llu.", page, ip->i_ino, offset); xfs_ilock(ip, XFS_ILOCK_EXCL); diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 664dea105e76..e564f11d83f3 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -212,7 +212,7 @@ xfs_qm_dquot_logitem_push( error = xfs_qm_dqflush(dqp, &bp); if (error) { - xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p", + xfs_warn(dqp->q_mount, "%s: push error %d on dqp "PTR_FMT, __func__, error, dqp); } else { if (!xfs_buf_delwri_queue(bp, buffer_list)) diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index cc86b2b34243..8b4545623e25 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -878,7 +878,7 @@ xfs_do_force_shutdown( if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { xfs_notice(mp, - "%s(0x%x) called from line %d of file %s. Return address = %p", + "%s(0x%x) called from line %d of file %s. Return address = "PTR_FMT, __func__, flags, lnnum, fname, __return_address); } /* diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 29c47da383e0..c9e40d4fc939 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3529,7 +3529,7 @@ xfs_iflush_int( if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), mp, XFS_ERRTAG_IFLUSH_1)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, - "%s: Bad inode %Lu magic number 0x%x, ptr %p", + "%s: Bad inode %Lu magic number 0x%x, ptr "PTR_FMT, __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip); goto corrupt_out; } @@ -3539,7 +3539,7 @@ xfs_iflush_int( (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), mp, XFS_ERRTAG_IFLUSH_3)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, - "%s: Bad regular inode %Lu, ptr %p", + "%s: Bad regular inode %Lu, ptr "PTR_FMT, __func__, ip->i_ino, ip); goto corrupt_out; } @@ -3550,7 +3550,7 @@ xfs_iflush_int( (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), mp, XFS_ERRTAG_IFLUSH_4)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, - "%s: Bad directory inode %Lu, ptr %p", + "%s: Bad directory inode %Lu, ptr "PTR_FMT, __func__, ip->i_ino, ip); goto corrupt_out; } @@ -3559,7 +3559,7 @@ xfs_iflush_int( ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, "%s: detected corrupt incore inode %Lu, " - "total extents = %d, nblocks = %Ld, ptr %p", + "total extents = %d, nblocks = %Ld, ptr "PTR_FMT, __func__, ip->i_ino, ip->i_d.di_nextents + ip->i_d.di_anextents, ip->i_d.di_nblocks, ip); @@ -3568,7 +3568,7 @@ xfs_iflush_int( if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, mp, XFS_ERRTAG_IFLUSH_6)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, - "%s: bad inode %Lu, forkoff 0x%x, ptr %p", + "%s: bad inode %Lu, forkoff 0x%x, ptr "PTR_FMT, __func__, ip->i_ino, ip->i_d.di_forkoff, ip); goto corrupt_out; } diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 74d3576c8043..bee51a14a906 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -291,4 +291,16 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y) #define XFS_IS_REALTIME_MOUNT(mp) (0) #endif +/* + * Starting in Linux 4.15, the %p (raw pointer value) printk modifier + * prints a hashed version of the pointer to avoid leaking kernel + * pointers into dmesg. If we're trying to debug the kernel we want the + * raw values, so override this behavior as best we can. + */ +#ifdef DEBUG +# define PTR_FMT "%px" +#else +# define PTR_FMT "%p" +#endif + #endif /* __XFS_LINUX__ */ diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 922e5a9764ca..c1f266c34af7 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -2244,7 +2244,7 @@ xlog_write_setup_ophdr( break; default: xfs_warn(log->l_mp, - "Bad XFS transaction clientid 0x%x in ticket %p", + "Bad XFS transaction clientid 0x%x in ticket "PTR_FMT, ophdr->oh_clientid, ticket); return NULL; } @@ -3926,7 +3926,7 @@ xlog_verify_iclog( } if (clientid != XFS_TRANSACTION && clientid != XFS_LOG) xfs_warn(log->l_mp, - "%s: invalid clientid %d op %p offset 0x%lx", + "%s: invalid clientid %d op "PTR_FMT" offset 0x%lx", __func__, clientid, ophead, (unsigned long)field_offset); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 205bace41832..d864380b6575 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2218,7 +2218,7 @@ xlog_recover_do_inode_buffer( next_unlinked_offset - reg_buf_offset; if (unlikely(*logged_nextp == 0)) { xfs_alert(mp, - "Bad inode buffer log record (ptr = %p, bp = %p). " + "Bad inode buffer log record (ptr = "PTR_FMT", bp = "PTR_FMT"). " "Trying to replay bad (0) inode di_next_unlinked field.", item, bp); XFS_ERROR_REPORT("xlog_recover_do_inode_buf", @@ -3049,7 +3049,7 @@ xlog_recover_inode_pass2( */ if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) { xfs_alert(mp, - "%s: Bad inode magic number, dip = %p, dino bp = %p, ino = %Ld", + "%s: Bad inode magic number, dip = "PTR_FMT", dino bp = "PTR_FMT", ino = %Ld", __func__, dip, bp, in_f->ilf_ino); XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", XFS_ERRLEVEL_LOW, mp); @@ -3059,7 +3059,7 @@ xlog_recover_inode_pass2( ldip = item->ri_buf[1].i_addr; if (unlikely(ldip->di_magic != XFS_DINODE_MAGIC)) { xfs_alert(mp, - "%s: Bad inode log record, rec ptr %p, ino %Ld", + "%s: Bad inode log record, rec ptr "PTR_FMT", ino %Ld", __func__, item, in_f->ilf_ino); XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", XFS_ERRLEVEL_LOW, mp); @@ -3117,8 +3117,8 @@ xlog_recover_inode_pass2( XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", XFS_ERRLEVEL_LOW, mp, ldip); xfs_alert(mp, - "%s: Bad regular inode log record, rec ptr %p, " - "ino ptr = %p, ino bp = %p, ino %Ld", + "%s: Bad regular inode log record, rec ptr "PTR_FMT", " + "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld", __func__, item, dip, bp, in_f->ilf_ino); error = -EFSCORRUPTED; goto out_release; @@ -3130,8 +3130,8 @@ xlog_recover_inode_pass2( XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", XFS_ERRLEVEL_LOW, mp, ldip); xfs_alert(mp, - "%s: Bad dir inode log record, rec ptr %p, " - "ino ptr = %p, ino bp = %p, ino %Ld", + "%s: Bad dir inode log record, rec ptr "PTR_FMT", " + "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld", __func__, item, dip, bp, in_f->ilf_ino); error = -EFSCORRUPTED; goto out_release; @@ -3141,8 +3141,8 @@ xlog_recover_inode_pass2( XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", XFS_ERRLEVEL_LOW, mp, ldip); xfs_alert(mp, - "%s: Bad inode log record, rec ptr %p, dino ptr %p, " - "dino bp %p, ino %Ld, total extents = %d, nblocks = %Ld", + "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", " + "dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld", __func__, item, dip, bp, in_f->ilf_ino, ldip->di_nextents + ldip->di_anextents, ldip->di_nblocks); @@ -3153,8 +3153,8 @@ xlog_recover_inode_pass2( XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", XFS_ERRLEVEL_LOW, mp, ldip); xfs_alert(mp, - "%s: Bad inode log record, rec ptr %p, dino ptr %p, " - "dino bp %p, ino %Ld, forkoff 0x%x", __func__, + "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", " + "dino bp "PTR_FMT", ino %Ld, forkoff 0x%x", __func__, item, dip, bp, in_f->ilf_ino, ldip->di_forkoff); error = -EFSCORRUPTED; goto out_release; @@ -3164,7 +3164,7 @@ xlog_recover_inode_pass2( XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", XFS_ERRLEVEL_LOW, mp, ldip); xfs_alert(mp, - "%s: Bad inode log record length %d, rec ptr %p", + "%s: Bad inode log record length %d, rec ptr "PTR_FMT, __func__, item->ri_buf[1].i_len, item); error = -EFSCORRUPTED; goto out_release; diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 6b9f44df7918..5b848f4b637f 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -162,7 +162,7 @@ xfs_qm_dqpurge( */ error = xfs_qm_dqflush(dqp, &bp); if (error) { - xfs_warn(mp, "%s: dquot %p flush failed", + xfs_warn(mp, "%s: dquot "PTR_FMT" flush failed", __func__, dqp); } else { error = xfs_bwrite(bp); @@ -480,7 +480,7 @@ xfs_qm_dquot_isolate( error = xfs_qm_dqflush(dqp, &bp); if (error) { - xfs_warn(dqp->q_mount, "%s: dquot %p flush failed", + xfs_warn(dqp->q_mount, "%s: dquot "PTR_FMT" flush failed", __func__, dqp); goto out_unlock_dirty; } -- cgit v1.2.3 From 1da061899321c15500caec735f8998f4c1e1b48f Mon Sep 17 00:00:00 2001 From: Xiongwei Song Date: Thu, 11 Jan 2018 09:45:51 -0800 Subject: xfs: destroy mutex pag_ici_reclaim_lock before free The mutex pag_ici_reclaim_lock of xfs_perag_t structure is initialized in xfs_initialize_perag. If happen errors in xfs_initialize_perag, or free resources in xfs_free_perag, wo need to destroy the mutex before free perag. Signed-off-by: Xiongwei Song Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_mount.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index c879b517cc94..98fd41cbb9e1 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -162,6 +162,7 @@ xfs_free_perag( ASSERT(pag); ASSERT(atomic_read(&pag->pag_ref) == 0); xfs_buf_hash_destroy(pag); + mutex_destroy(&pag->pag_ici_reclaim_lock); call_rcu(&pag->rcu_head, __xfs_free_perag); } } @@ -248,6 +249,7 @@ xfs_initialize_perag( out_hash_destroy: xfs_buf_hash_destroy(pag); out_free_pag: + mutex_destroy(&pag->pag_ici_reclaim_lock); kmem_free(pag); out_unwind_new_pags: /* unwind any prior newly initialized pags */ @@ -256,6 +258,7 @@ out_unwind_new_pags: if (!pag) break; xfs_buf_hash_destroy(pag); + mutex_destroy(&pag->pag_ici_reclaim_lock); kmem_free(pag); } return error; -- cgit v1.2.3 From a8789a5ae28f69d7f3791a0e74f8c44222f3108b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 12 Jan 2018 08:47:50 -0800 Subject: xfs: fix check on struct_version for versions 4 or greater It appears that the check for versions 4 or more is incorrect and is off-by-one. Fix this. Detected by CoverityScan, CID#1463775 ("Logically dead code") Fixes: ac503a4cc9e8 ("xfs: refactor the geometry structure filling function") Signed-off-by: Colin Ian King Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_sb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 35b005d66977..869a2f3f0375 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -951,7 +951,7 @@ xfs_fs_geometry( geo->rtsectsize = sbp->sb_blocksize; geo->dirblocksize = xfs_dir2_dirblock_bytes(sbp); - if (struct_version < 3) + if (struct_version < 4) return 0; if (xfs_sb_version_haslogv2(sbp)) -- cgit v1.2.3 From ad90bb585c45917b6c1bb01c812fba337e689362 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 12 Jan 2018 14:07:21 -0800 Subject: xfs: account finobt blocks properly in perag reservation XFS started using the perag metadata reservation pool for free inode btree blocks in commit 76d771b4cbe33 ("xfs: use per-AG reservations for the finobt"). To handle backwards compatibility, finobt blocks are accounted against the pool so long as the full reservation is available at mount time. Otherwise the ->m_inotbt_nores flag is set and the filesystem falls back to the traditional per-transaction finobt reservation. This commit has two problems: - finobt blocks are always accounted against the metadata reservation on allocation, regardless of ->m_inotbt_nores state - finobt blocks are never returned to the reservation pool on free The first problem affects reflink+finobt filesystems where the full finobt reservation is not available at mount time. finobt blocks are essentially stolen from the reflink reservation, putting refcountbt management at risk of allocation failure. The second problem is an unconditional leak of metadata reservation whenever finobt is enabled. Update the finobt block allocation callouts to consider ->m_inotbt_nores and account blocks appropriately. Blocks should be consistently accounted against the metadata pool when ->m_inotbt_nores is false and otherwise tagged as RESV_NONE. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_ialloc_btree.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 47f44d624cb1..af197a5f3a82 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -141,21 +141,42 @@ xfs_finobt_alloc_block( union xfs_btree_ptr *new, int *stat) { + if (cur->bc_mp->m_inotbt_nores) + return xfs_inobt_alloc_block(cur, start, new, stat); return __xfs_inobt_alloc_block(cur, start, new, stat, XFS_AG_RESV_METADATA); } STATIC int -xfs_inobt_free_block( +__xfs_inobt_free_block( struct xfs_btree_cur *cur, - struct xfs_buf *bp) + struct xfs_buf *bp, + enum xfs_ag_resv_type resv) { struct xfs_owner_info oinfo; xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); return xfs_free_extent(cur->bc_tp, XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1, - &oinfo, XFS_AG_RESV_NONE); + &oinfo, resv); +} + +STATIC int +xfs_inobt_free_block( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + return __xfs_inobt_free_block(cur, bp, XFS_AG_RESV_NONE); +} + +STATIC int +xfs_finobt_free_block( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + if (cur->bc_mp->m_inotbt_nores) + return xfs_inobt_free_block(cur, bp); + return __xfs_inobt_free_block(cur, bp, XFS_AG_RESV_METADATA); } STATIC int @@ -380,7 +401,7 @@ static const struct xfs_btree_ops xfs_finobt_ops = { .dup_cursor = xfs_inobt_dup_cursor, .set_root = xfs_finobt_set_root, .alloc_block = xfs_finobt_alloc_block, - .free_block = xfs_inobt_free_block, + .free_block = xfs_finobt_free_block, .get_minrecs = xfs_inobt_get_minrecs, .get_maxrecs = xfs_inobt_get_maxrecs, .init_key_from_rec = xfs_inobt_init_key_from_rec, -- cgit v1.2.3 From c468562879a766de2c2fbedd41b653a7bf4c157d Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Tue, 16 Jan 2018 14:53:28 -0800 Subject: xfs: cancel tx on xfs_defer_finish() error during xattr set/remove Chris Dunlop reports a problem where an xattr operation fails, reports the following error to syslog and hangs during unmount: ================================================ [ BUG: lock held when returning to user space! ] ... ------------------------------------------------ is leaving the kernel with locks still held! 1 lock held by : #0: (sb_internal){......}, at: [] xfs_trans_alloc+0xe3/0x130 [xfs] The failure/shutdown occurs during deferred ops processing which leads to an error return from xfs_defer_finish() via xfs_attr_leaf_addname(). While the root cause of the failure is unknown corruption, the cause of the subsequent BUG above and unmount hang is failure to cancel the transaction before returning to userspace. The transaction is not cancelled because the out_defer_cancel error handling paths in the xfs_attr_[leaf|node]_[add|remove]name() functions clear args.trans without releasing the transaction. The callers therefore lose the reference to the transaction and fail to cancel it. Since xfs_attr_[set|remove]() always cancel args.trans when != NULL and xfs_defer_finish()->...->xfs_trans_roll() should always return with a valid transaction, update the leaf/node xattr functions to not reset args.trans in the error path responsible for cancelling deferred ops. Reported-by: Chris Dunlop Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_attr.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index a76914db72ef..ce4a34a2751d 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -717,7 +717,6 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) return error; out_defer_cancel: xfs_defer_cancel(args->dfops); - args->trans = NULL; return error; } @@ -770,7 +769,6 @@ xfs_attr_leaf_removename(xfs_da_args_t *args) return 0; out_defer_cancel: xfs_defer_cancel(args->dfops); - args->trans = NULL; return error; } @@ -1045,7 +1043,6 @@ out: return retval; out_defer_cancel: xfs_defer_cancel(args->dfops); - args->trans = NULL; goto out; } @@ -1186,7 +1183,6 @@ out: return error; out_defer_cancel: xfs_defer_cancel(args->dfops); - args->trans = NULL; goto out; } -- cgit v1.2.3 From ce1d802e6a889b8ee53b3444c6d7e8cfecadac50 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Jan 2018 18:52:12 -0800 Subject: xfs: add scrub cross-referencing helpers for the free space btrees Add a couple of functions to the free space btrees that will be used to cross-reference metadata against the bnobt/cntbt, and a generic btree function that provides the real implementation. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc.c | 21 ++++++++++++++++++++- fs/xfs/libxfs/xfs_alloc.h | 10 ++++++++++ fs/xfs/libxfs/xfs_btree.c | 30 ++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_btree.h | 2 ++ 4 files changed, 62 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 50ba989481cc..6883a7668de6 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -167,7 +167,7 @@ xfs_alloc_lookup_ge( * Lookup the first record less than or equal to [bno, len] * in the btree given by cur. */ -static int /* error */ +int /* error */ xfs_alloc_lookup_le( struct xfs_btree_cur *cur, /* btree cursor */ xfs_agblock_t bno, /* starting block of extent */ @@ -2996,3 +2996,22 @@ xfs_verify_fsbno( return false; return xfs_verify_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno)); } + +/* Is there a record covering a given extent? */ +int +xfs_alloc_has_record( + struct xfs_btree_cur *cur, + xfs_agblock_t bno, + xfs_extlen_t len, + bool *exists) +{ + union xfs_btree_irec low; + union xfs_btree_irec high; + + memset(&low, 0, sizeof(low)); + low.a.ar_startblock = bno; + memset(&high, 0xFF, sizeof(high)); + high.a.ar_startblock = bno + len - 1; + + return xfs_btree_has_record(cur, &low, &high, exists); +} diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 7ba2d129d504..65a0cafe06e4 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -197,6 +197,13 @@ xfs_free_extent( struct xfs_owner_info *oinfo, /* extent owner */ enum xfs_ag_resv_type type); /* block reservation type */ +int /* error */ +xfs_alloc_lookup_le( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat); /* success/failure */ + int /* error */ xfs_alloc_lookup_ge( struct xfs_btree_cur *cur, /* btree cursor */ @@ -237,4 +244,7 @@ bool xfs_verify_agbno(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno); bool xfs_verify_fsbno(struct xfs_mount *mp, xfs_fsblock_t fsbno); +int xfs_alloc_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno, + xfs_extlen_t len, bool *exist); + #endif /* __XFS_ALLOC_H__ */ diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 2b2be1d6c00d..567cff5ed511 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -4998,3 +4998,33 @@ xfs_btree_diff_two_ptrs( return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l); return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s); } + +/* If there's an extent, we're done. */ +STATIC int +xfs_btree_has_record_helper( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + void *priv) +{ + return XFS_BTREE_QUERY_RANGE_ABORT; +} + +/* Is there a record covering a given range of keys? */ +int +xfs_btree_has_record( + struct xfs_btree_cur *cur, + union xfs_btree_irec *low, + union xfs_btree_irec *high, + bool *exists) +{ + int error; + + error = xfs_btree_query_range(cur, low, high, + &xfs_btree_has_record_helper, NULL); + if (error == XFS_BTREE_QUERY_RANGE_ABORT) { + *exists = true; + return 0; + } + *exists = false; + return error; +} diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 2f13b8676f41..50440b5618e8 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -547,5 +547,7 @@ void xfs_btree_get_keys(struct xfs_btree_cur *cur, struct xfs_btree_block *block, union xfs_btree_key *key); union xfs_btree_key *xfs_btree_high_key_from_key(struct xfs_btree_cur *cur, union xfs_btree_key *key); +int xfs_btree_has_record(struct xfs_btree_cur *cur, union xfs_btree_irec *low, + union xfs_btree_irec *high, bool *exists); #endif /* __XFS_BTREE_H__ */ -- cgit v1.2.3 From 2e001266b67c865ad904e1889658282d0773b207 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Jan 2018 18:52:12 -0800 Subject: xfs: add scrub cross-referencing helpers for the inode btrees Add a couple of functions to the inode btrees that will be used to cross-reference metadata against the inobt. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_ialloc.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_ialloc.h | 6 +++ 2 files changed, 105 insertions(+) diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index c01ed9cfc5ae..3625d1da7462 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -2753,3 +2753,102 @@ xfs_verify_dir_ino( return false; return xfs_verify_ino(mp, ino); } + +/* Is there an inode record covering a given range of inode numbers? */ +int +xfs_ialloc_has_inode_record( + struct xfs_btree_cur *cur, + xfs_agino_t low, + xfs_agino_t high, + bool *exists) +{ + struct xfs_inobt_rec_incore irec; + xfs_agino_t agino; + uint16_t holemask; + int has_record; + int i; + int error; + + *exists = false; + error = xfs_inobt_lookup(cur, low, XFS_LOOKUP_LE, &has_record); + while (error == 0 && has_record) { + error = xfs_inobt_get_rec(cur, &irec, &has_record); + if (error || irec.ir_startino > high) + break; + + agino = irec.ir_startino; + holemask = irec.ir_holemask; + for (i = 0; i < XFS_INOBT_HOLEMASK_BITS; holemask >>= 1, + i++, agino += XFS_INODES_PER_HOLEMASK_BIT) { + if (holemask & 1) + continue; + if (agino + XFS_INODES_PER_HOLEMASK_BIT > low && + agino <= high) { + *exists = true; + return 0; + } + } + + error = xfs_btree_increment(cur, 0, &has_record); + } + return error; +} + +/* Is there an inode record covering a given extent? */ +int +xfs_ialloc_has_inodes_at_extent( + struct xfs_btree_cur *cur, + xfs_agblock_t bno, + xfs_extlen_t len, + bool *exists) +{ + xfs_agino_t low; + xfs_agino_t high; + + low = XFS_OFFBNO_TO_AGINO(cur->bc_mp, bno, 0); + high = XFS_OFFBNO_TO_AGINO(cur->bc_mp, bno + len, 0) - 1; + + return xfs_ialloc_has_inode_record(cur, low, high, exists); +} + +struct xfs_ialloc_count_inodes { + xfs_agino_t count; + xfs_agino_t freecount; +}; + +/* Record inode counts across all inobt records. */ +STATIC int +xfs_ialloc_count_inodes_rec( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + void *priv) +{ + struct xfs_inobt_rec_incore irec; + struct xfs_ialloc_count_inodes *ci = priv; + + xfs_inobt_btrec_to_irec(cur->bc_mp, rec, &irec); + ci->count += irec.ir_count; + ci->freecount += irec.ir_freecount; + + return 0; +} + +/* Count allocated and free inodes under an inobt. */ +int +xfs_ialloc_count_inodes( + struct xfs_btree_cur *cur, + xfs_agino_t *count, + xfs_agino_t *freecount) +{ + struct xfs_ialloc_count_inodes ci = {0}; + int error; + + ASSERT(cur->bc_btnum == XFS_BTNUM_INO); + error = xfs_btree_query_all(cur, xfs_ialloc_count_inodes_rec, &ci); + if (error) + return error; + + *count = ci.count; + *freecount = ci.freecount; + return 0; +} diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h index 66a8de0b1caa..c5402bb4ce0c 100644 --- a/fs/xfs/libxfs/xfs_ialloc.h +++ b/fs/xfs/libxfs/xfs_ialloc.h @@ -170,6 +170,12 @@ int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp, union xfs_btree_rec; void xfs_inobt_btrec_to_irec(struct xfs_mount *mp, union xfs_btree_rec *rec, struct xfs_inobt_rec_incore *irec); +int xfs_ialloc_has_inodes_at_extent(struct xfs_btree_cur *cur, + xfs_agblock_t bno, xfs_extlen_t len, bool *exists); +int xfs_ialloc_has_inode_record(struct xfs_btree_cur *cur, xfs_agino_t low, + xfs_agino_t high, bool *exists); +int xfs_ialloc_count_inodes(struct xfs_btree_cur *cur, xfs_agino_t *count, + xfs_agino_t *freecount); int xfs_ialloc_cluster_alignment(struct xfs_mount *mp); void xfs_ialloc_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno, -- cgit v1.2.3 From ed7c52d4bf92ac1f05b8c251a44a8bf4688f8786 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Jan 2018 18:52:13 -0800 Subject: xfs: add scrub cross-referencing helpers for the rmap btrees Add a couple of functions to the rmap btrees that will be used to cross-reference metadata against the rmapbt. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_rmap.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_rmap.h | 5 ++++ 2 files changed, 72 insertions(+) diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 50db920ceeeb..79822cf6ebe3 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -2387,3 +2387,70 @@ xfs_rmap_compare( else return 0; } + +/* Is there a record covering a given extent? */ +int +xfs_rmap_has_record( + struct xfs_btree_cur *cur, + xfs_agblock_t bno, + xfs_extlen_t len, + bool *exists) +{ + union xfs_btree_irec low; + union xfs_btree_irec high; + + memset(&low, 0, sizeof(low)); + low.r.rm_startblock = bno; + memset(&high, 0xFF, sizeof(high)); + high.r.rm_startblock = bno + len - 1; + + return xfs_btree_has_record(cur, &low, &high, exists); +} + +/* + * Is there a record for this owner completely covering a given physical + * extent? If so, *has_rmap will be set to true. If there is no record + * or the record only covers part of the range, we set *has_rmap to false. + * This function doesn't perform range lookups or offset checks, so it is + * not suitable for checking data fork blocks. + */ +int +xfs_rmap_record_exists( + struct xfs_btree_cur *cur, + xfs_agblock_t bno, + xfs_extlen_t len, + struct xfs_owner_info *oinfo, + bool *has_rmap) +{ + uint64_t owner; + uint64_t offset; + unsigned int flags; + int has_record; + struct xfs_rmap_irec irec; + int error; + + xfs_owner_info_unpack(oinfo, &owner, &offset, &flags); + ASSERT(XFS_RMAP_NON_INODE_OWNER(owner) || + (flags & XFS_RMAP_BMBT_BLOCK)); + + error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags, + &has_record); + if (error) + return error; + if (!has_record) { + *has_rmap = false; + return 0; + } + + error = xfs_rmap_get_rec(cur, &irec, &has_record); + if (error) + return error; + if (!has_record) { + *has_rmap = false; + return 0; + } + + *has_rmap = (irec.rm_owner == owner && irec.rm_startblock <= bno && + irec.rm_startblock + irec.rm_blockcount >= bno + len); + return 0; +} diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h index 0fcd5b1ba729..380e53be98d5 100644 --- a/fs/xfs/libxfs/xfs_rmap.h +++ b/fs/xfs/libxfs/xfs_rmap.h @@ -233,5 +233,10 @@ int xfs_rmap_compare(const struct xfs_rmap_irec *a, union xfs_btree_rec; int xfs_rmap_btrec_to_irec(union xfs_btree_rec *rec, struct xfs_rmap_irec *irec); +int xfs_rmap_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno, + xfs_extlen_t len, bool *exists); +int xfs_rmap_record_exists(struct xfs_btree_cur *cur, xfs_agblock_t bno, + xfs_extlen_t len, struct xfs_owner_info *oinfo, + bool *has_rmap); #endif /* __XFS_RMAP_H__ */ -- cgit v1.2.3 From 49db55eca5665e32c9d3e67a7d5694bcc6c274de Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Jan 2018 18:52:14 -0800 Subject: xfs: add scrub cross-referencing helpers for the refcount btrees Add a couple of functions to the refcount btrees that will be used to cross-reference metadata against the refcountbt. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_refcount.c | 19 +++++++++++++++++++ fs/xfs/libxfs/xfs_refcount.h | 3 +++ 2 files changed, 22 insertions(+) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index c40d26763075..bee68c23d612 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1696,3 +1696,22 @@ out_cursor: xfs_trans_brelse(tp, agbp); goto out_trans; } + +/* Is there a record covering a given extent? */ +int +xfs_refcount_has_record( + struct xfs_btree_cur *cur, + xfs_agblock_t bno, + xfs_extlen_t len, + bool *exists) +{ + union xfs_btree_irec low; + union xfs_btree_irec high; + + memset(&low, 0, sizeof(low)); + low.rc.rc_startblock = bno; + memset(&high, 0xFF, sizeof(high)); + high.rc.rc_startblock = bno + len - 1; + + return xfs_btree_has_record(cur, &low, &high, exists); +} diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index eafb9d1f3b37..2a731ac68fe4 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -83,4 +83,7 @@ static inline xfs_fileoff_t xfs_refcount_max_unmap(int log_res) return (log_res * 3 / 4) / XFS_REFCOUNT_ITEM_OVERHEAD; } +extern int xfs_refcount_has_record(struct xfs_btree_cur *cur, + xfs_agblock_t bno, xfs_extlen_t len, bool *exists); + #endif /* __XFS_REFCOUNT_H__ */ -- cgit v1.2.3 From 64b12563b2b5abf78ebd9d4b3c2e4062d9aedc61 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Jan 2018 18:52:14 -0800 Subject: xfs: set up scrub cross-referencing helpers Create some helper functions that we'll use later to deal with problems we might encounter while cross referencing metadata with other metadata. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/btree.c | 70 +++++++++++++++++++++----- fs/xfs/scrub/btree.h | 9 ++++ fs/xfs/scrub/common.c | 135 ++++++++++++++++++++++++++++++++++++++++++++++---- fs/xfs/scrub/common.h | 16 ++++++ fs/xfs/scrub/scrub.c | 10 ++++ fs/xfs/scrub/trace.h | 22 ++++++++ 6 files changed, 240 insertions(+), 22 deletions(-) diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index df0766132ace..4751ed03b9fb 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -42,12 +42,14 @@ * Check for btree operation errors. See the section about handling * operational errors in common.c. */ -bool -xfs_scrub_btree_process_error( +static bool +__xfs_scrub_btree_process_error( struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, int level, - int *error) + int *error, + __u32 errflag, + void *ret_ip) { if (*error == 0) return true; @@ -60,36 +62,80 @@ xfs_scrub_btree_process_error( case -EFSBADCRC: case -EFSCORRUPTED: /* Note the badness but don't abort. */ - sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + sc->sm->sm_flags |= errflag; *error = 0; /* fall through */ default: if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) trace_xfs_scrub_ifork_btree_op_error(sc, cur, level, - *error, __return_address); + *error, ret_ip); else trace_xfs_scrub_btree_op_error(sc, cur, level, - *error, __return_address); + *error, ret_ip); break; } return false; } +bool +xfs_scrub_btree_process_error( + struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, + int level, + int *error) +{ + return __xfs_scrub_btree_process_error(sc, cur, level, error, + XFS_SCRUB_OFLAG_CORRUPT, __return_address); +} + +bool +xfs_scrub_btree_xref_process_error( + struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, + int level, + int *error) +{ + return __xfs_scrub_btree_process_error(sc, cur, level, error, + XFS_SCRUB_OFLAG_XFAIL, __return_address); +} + /* Record btree block corruption. */ -void -xfs_scrub_btree_set_corrupt( +static void +__xfs_scrub_btree_set_corrupt( struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, - int level) + int level, + __u32 errflag, + void *ret_ip) { - sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + sc->sm->sm_flags |= errflag; if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) trace_xfs_scrub_ifork_btree_error(sc, cur, level, - __return_address); + ret_ip); else trace_xfs_scrub_btree_error(sc, cur, level, - __return_address); + ret_ip); +} + +void +xfs_scrub_btree_set_corrupt( + struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, + int level) +{ + __xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT, + __return_address); +} + +void +xfs_scrub_btree_xref_set_corrupt( + struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, + int level) +{ + __xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT, + __return_address); } /* diff --git a/fs/xfs/scrub/btree.h b/fs/xfs/scrub/btree.h index 4de825a626d1..e2b868ede70b 100644 --- a/fs/xfs/scrub/btree.h +++ b/fs/xfs/scrub/btree.h @@ -26,10 +26,19 @@ bool xfs_scrub_btree_process_error(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, int level, int *error); +/* Check for btree xref operation errors. */ +bool xfs_scrub_btree_xref_process_error(struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, int level, + int *error); + /* Check for btree corruption. */ void xfs_scrub_btree_set_corrupt(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, int level); +/* Check for btree xref discrepancies. */ +void xfs_scrub_btree_xref_set_corrupt(struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, int level); + struct xfs_scrub_btree; typedef int (*xfs_scrub_btree_rec_fn)( struct xfs_scrub_btree *bs, diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index d5c37d8d2fe6..68fea09cd673 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -78,12 +78,14 @@ */ /* Check for operational errors. */ -bool -xfs_scrub_process_error( +static bool +__xfs_scrub_process_error( struct xfs_scrub_context *sc, xfs_agnumber_t agno, xfs_agblock_t bno, - int *error) + int *error, + __u32 errflag, + void *ret_ip) { switch (*error) { case 0: @@ -95,24 +97,48 @@ xfs_scrub_process_error( case -EFSBADCRC: case -EFSCORRUPTED: /* Note the badness but don't abort. */ - sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + sc->sm->sm_flags |= errflag; *error = 0; /* fall through */ default: trace_xfs_scrub_op_error(sc, agno, bno, *error, - __return_address); + ret_ip); break; } return false; } -/* Check for operational errors for a file offset. */ bool -xfs_scrub_fblock_process_error( +xfs_scrub_process_error( + struct xfs_scrub_context *sc, + xfs_agnumber_t agno, + xfs_agblock_t bno, + int *error) +{ + return __xfs_scrub_process_error(sc, agno, bno, error, + XFS_SCRUB_OFLAG_CORRUPT, __return_address); +} + +bool +xfs_scrub_xref_process_error( + struct xfs_scrub_context *sc, + xfs_agnumber_t agno, + xfs_agblock_t bno, + int *error) +{ + return __xfs_scrub_process_error(sc, agno, bno, error, + XFS_SCRUB_OFLAG_XFAIL, __return_address); +} + +/* Check for operational errors for a file offset. */ +static bool +__xfs_scrub_fblock_process_error( struct xfs_scrub_context *sc, int whichfork, xfs_fileoff_t offset, - int *error) + int *error, + __u32 errflag, + void *ret_ip) { switch (*error) { case 0: @@ -124,17 +150,39 @@ xfs_scrub_fblock_process_error( case -EFSBADCRC: case -EFSCORRUPTED: /* Note the badness but don't abort. */ - sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + sc->sm->sm_flags |= errflag; *error = 0; /* fall through */ default: trace_xfs_scrub_file_op_error(sc, whichfork, offset, *error, - __return_address); + ret_ip); break; } return false; } +bool +xfs_scrub_fblock_process_error( + struct xfs_scrub_context *sc, + int whichfork, + xfs_fileoff_t offset, + int *error) +{ + return __xfs_scrub_fblock_process_error(sc, whichfork, offset, error, + XFS_SCRUB_OFLAG_CORRUPT, __return_address); +} + +bool +xfs_scrub_fblock_xref_process_error( + struct xfs_scrub_context *sc, + int whichfork, + xfs_fileoff_t offset, + int *error) +{ + return __xfs_scrub_fblock_process_error(sc, whichfork, offset, error, + XFS_SCRUB_OFLAG_XFAIL, __return_address); +} + /* * Handling scrub corruption/optimization/warning checks. * @@ -183,6 +231,16 @@ xfs_scrub_block_set_corrupt( trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address); } +/* Record a corruption while cross-referencing. */ +void +xfs_scrub_block_xref_set_corrupt( + struct xfs_scrub_context *sc, + struct xfs_buf *bp) +{ + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; + trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address); +} + /* * Record a corrupt inode. The trace data will include the block given * by bp if bp is given; otherwise it will use the block location of the @@ -198,6 +256,17 @@ xfs_scrub_ino_set_corrupt( trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address); } +/* Record a corruption while cross-referencing with an inode. */ +void +xfs_scrub_ino_xref_set_corrupt( + struct xfs_scrub_context *sc, + xfs_ino_t ino, + struct xfs_buf *bp) +{ + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; + trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address); +} + /* Record corruption in a block indexed by a file fork. */ void xfs_scrub_fblock_set_corrupt( @@ -209,6 +278,17 @@ xfs_scrub_fblock_set_corrupt( trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address); } +/* Record a corruption while cross-referencing a fork block. */ +void +xfs_scrub_fblock_xref_set_corrupt( + struct xfs_scrub_context *sc, + int whichfork, + xfs_fileoff_t offset) +{ + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; + trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address); +} + /* * Warn about inodes that need administrative review but is not * incorrect. @@ -588,3 +668,38 @@ out: /* scrub teardown will unlock and release the inode for us */ return error; } + +/* + * Predicate that decides if we need to evaluate the cross-reference check. + * If there was an error accessing the cross-reference btree, just delete + * the cursor and skip the check. + */ +bool +xfs_scrub_should_check_xref( + struct xfs_scrub_context *sc, + int *error, + struct xfs_btree_cur **curpp) +{ + if (*error == 0) + return true; + + if (curpp) { + /* If we've already given up on xref, just bail out. */ + if (!*curpp) + return false; + + /* xref error, delete cursor and bail out. */ + xfs_btree_del_cursor(*curpp, XFS_BTREE_ERROR); + *curpp = NULL; + } + + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL; + trace_xfs_scrub_xref_error(sc, *error, __return_address); + + /* + * Errors encountered during cross-referencing with another + * data structure should not cause this scrubber to abort. + */ + *error = 0; + return false; +} diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index fe12053aa0e7..84c302f1d634 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -56,6 +56,11 @@ bool xfs_scrub_process_error(struct xfs_scrub_context *sc, xfs_agnumber_t agno, bool xfs_scrub_fblock_process_error(struct xfs_scrub_context *sc, int whichfork, xfs_fileoff_t offset, int *error); +bool xfs_scrub_xref_process_error(struct xfs_scrub_context *sc, + xfs_agnumber_t agno, xfs_agblock_t bno, int *error); +bool xfs_scrub_fblock_xref_process_error(struct xfs_scrub_context *sc, + int whichfork, xfs_fileoff_t offset, int *error); + void xfs_scrub_block_set_preen(struct xfs_scrub_context *sc, struct xfs_buf *bp); void xfs_scrub_ino_set_preen(struct xfs_scrub_context *sc, xfs_ino_t ino, @@ -68,6 +73,13 @@ void xfs_scrub_ino_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino, void xfs_scrub_fblock_set_corrupt(struct xfs_scrub_context *sc, int whichfork, xfs_fileoff_t offset); +void xfs_scrub_block_xref_set_corrupt(struct xfs_scrub_context *sc, + struct xfs_buf *bp); +void xfs_scrub_ino_xref_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino, + struct xfs_buf *bp); +void xfs_scrub_fblock_xref_set_corrupt(struct xfs_scrub_context *sc, + int whichfork, xfs_fileoff_t offset); + void xfs_scrub_ino_set_warning(struct xfs_scrub_context *sc, xfs_ino_t ino, struct xfs_buf *bp); void xfs_scrub_fblock_set_warning(struct xfs_scrub_context *sc, int whichfork, @@ -76,6 +88,10 @@ void xfs_scrub_fblock_set_warning(struct xfs_scrub_context *sc, int whichfork, void xfs_scrub_set_incomplete(struct xfs_scrub_context *sc); int xfs_scrub_checkpoint_log(struct xfs_mount *mp); +/* Are we set up for a cross-referencing check? */ +bool xfs_scrub_should_check_xref(struct xfs_scrub_context *sc, int *error, + struct xfs_btree_cur **curpp); + /* Setup functions */ int xfs_scrub_setup_fs(struct xfs_scrub_context *sc, struct xfs_inode *ip); int xfs_scrub_setup_ag_allocbt(struct xfs_scrub_context *sc, diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index cd4607782a19..0ed2a123cbb8 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -110,6 +110,16 @@ * structure itself is corrupt, the CORRUPT flag will be set. If * the metadata is correct but otherwise suboptimal, the PREEN flag * will be set. + * + * We perform secondary validation of filesystem metadata by + * cross-referencing every record with all other available metadata. + * For example, for block mapping extents, we verify that there are no + * records in the free space and inode btrees corresponding to that + * space extent and that there is a corresponding entry in the reverse + * mapping btree. Inconsistent metadata is noted by setting the + * XCORRUPT flag; btree query function errors are noted by setting the + * XFAIL flag and deleting the cursor to prevent further attempts to + * cross-reference with a defective btree. */ /* diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index ffa4a70ebdcc..a0a6d3cd131a 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -491,6 +491,28 @@ DEFINE_EVENT(xfs_scrub_sbtree_class, name, \ DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_rec); DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_key); +TRACE_EVENT(xfs_scrub_xref_error, + TP_PROTO(struct xfs_scrub_context *sc, int error, void *ret_ip), + TP_ARGS(sc, error, ret_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, type) + __field(int, error) + __field(void *, ret_ip) + ), + TP_fast_assign( + __entry->dev = sc->mp->m_super->s_dev; + __entry->type = sc->sm->sm_type; + __entry->error = error; + __entry->ret_ip = ret_ip; + ), + TP_printk("dev %d:%d type %u xref error %d ret_ip %pF", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->type, + __entry->error, + __entry->ret_ip) +); + #endif /* _TRACE_XFS_SCRUB_TRACE_H */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From 9a7e26956661049103493c229eda20d344d492ae Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Jan 2018 18:52:44 -0800 Subject: xfs: fix a few erroneous process_error calls in the scrubbers There are a few places where we make a libxfs api call on behalf of some object other than the one we're scrubbing but inadvertently call the regular process_error function. When this happens we mark the object corrupt even though it was corruption in /some other/ object that actually produced the -EFSCORRUPTED code. The correct output flag for these situations is SCRUB_OFLAG_XFAIL, not SCRUB_OFLAG_CORRUPT, so fix this now that we also have a helper to set these. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/dir.c | 2 +- fs/xfs/scrub/inode.c | 2 +- fs/xfs/scrub/parent.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index 69e1efdd4019..e75826bb6516 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -92,7 +92,7 @@ xfs_scrub_dir_check_ftype( * inodes can trigger immediate inactive cleanup of the inode. */ error = xfs_iget(mp, sdc->sc->tp, inum, 0, 0, &ip); - if (!xfs_scrub_fblock_process_error(sdc->sc, XFS_DATA_FORK, offset, + if (!xfs_scrub_fblock_xref_process_error(sdc->sc, XFS_DATA_FORK, offset, &error)) goto out; diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 59a4fce91a2b..1c5b64667705 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -619,7 +619,7 @@ xfs_scrub_inode( if (xfs_is_reflink_inode(sc->ip)) { error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, &has_shared); - if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino), + if (!xfs_scrub_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino), XFS_INO_TO_AGBNO(mp, ino), &error)) goto out; if (!has_shared) diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index b4d2f8406d22..0d3851410c74 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -169,7 +169,7 @@ xfs_scrub_parent_validate( * immediate inactive cleanup of the inode. */ error = xfs_iget(mp, sc->tp, dnum, 0, 0, &dp); - if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) + if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) goto out; if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) { xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); @@ -185,7 +185,7 @@ xfs_scrub_parent_validate( */ if (xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) { error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink); - if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, + if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) goto out_unlock; if (nlink != expected_nlink) @@ -205,7 +205,7 @@ xfs_scrub_parent_validate( /* Go looking for our dentry. */ error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink); - if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) + if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) goto out_unlock; /* Drop the parent lock, relock this inode. */ -- cgit v1.2.3 From 858333dcf021a46baef6505beac329c495fbfcf3 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Jan 2018 18:53:05 -0800 Subject: xfs: check btree block ownership with bnobt/rmapbt when scrubbing btree When scanning a metadata btree block, cross-reference the block location with the free space btree and the reverse mapping btree to ensure that the rmapbt knows about the block and the bnobt does not. Add a mechanism to defer checks when we happen to be scanning the bnobt/rmapbt itself because it's less efficient to repeatedly clone and destroy the cursor. This patch provides the framework to make btree block owner checks happen; the actual meat will be added in subsequent patches. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/btree.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index 4751ed03b9fb..e671d694908b 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -361,6 +361,80 @@ out: return error; } +struct check_owner { + struct list_head list; + xfs_daddr_t daddr; + int level; +}; + +/* + * Make sure this btree block isn't in the free list and that there's + * an rmap record for it. + */ +STATIC int +xfs_scrub_btree_check_block_owner( + struct xfs_scrub_btree *bs, + int level, + xfs_daddr_t daddr) +{ + xfs_agnumber_t agno; + bool init_sa; + int error = 0; + + if (!bs->cur) + return 0; + + agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr); + + init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS; + if (init_sa) { + error = xfs_scrub_ag_init(bs->sc, agno, &bs->sc->sa); + if (!xfs_scrub_btree_xref_process_error(bs->sc, bs->cur, + level, &error)) + return error; + } + + if (init_sa) + xfs_scrub_ag_free(bs->sc, &bs->sc->sa); + + return error; +} + +/* Check the owner of a btree block. */ +STATIC int +xfs_scrub_btree_check_owner( + struct xfs_scrub_btree *bs, + int level, + struct xfs_buf *bp) +{ + struct xfs_btree_cur *cur = bs->cur; + struct check_owner *co; + + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && bp == NULL) + return 0; + + /* + * We want to cross-reference each btree block with the bnobt + * and the rmapbt. We cannot cross-reference the bnobt or + * rmapbt while scanning the bnobt or rmapbt, respectively, + * because we cannot alter the cursor and we'd prefer not to + * duplicate cursors. Therefore, save the buffer daddr for + * later scanning. + */ + if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) { + co = kmem_alloc(sizeof(struct check_owner), + KM_MAYFAIL | KM_NOFS); + if (!co) + return -ENOMEM; + co->level = level; + co->daddr = XFS_BUF_ADDR(bp); + list_add_tail(&co->list, &bs->to_check); + return 0; + } + + return xfs_scrub_btree_check_block_owner(bs, level, XFS_BUF_ADDR(bp)); +} + /* * Grab and scrub a btree block given a btree pointer. Returns block * and buffer pointers (if applicable) if they're ok to use. @@ -396,6 +470,14 @@ xfs_scrub_btree_get_block( return 0; } + /* + * Check the block's owner; this function absorbs error codes + * for us. + */ + error = xfs_scrub_btree_check_owner(bs, level, *pbp); + if (error) + return error; + /* * Check the block's siblings; this function absorbs error codes * for us. @@ -467,6 +549,8 @@ xfs_scrub_btree( struct xfs_btree_block *block; int level; struct xfs_buf *bp; + struct check_owner *co; + struct check_owner *n; int i; int error = 0; @@ -558,5 +642,14 @@ xfs_scrub_btree( } out: + /* Process deferred owner checks on btree blocks. */ + list_for_each_entry_safe(co, n, &bs.to_check, list) { + if (!error && bs.cur) + error = xfs_scrub_btree_check_block_owner(&bs, + co->level, co->daddr); + list_del(&co->list); + kmem_free(co); + } + return error; } -- cgit v1.2.3 From 166d76410d7ac08ba2fd90f33ebb545e21fd6b3a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Jan 2018 18:53:05 -0800 Subject: xfs: introduce scrubber cross-referencing stubs Create some stubs that will be used to cross-reference metadata records. The actual cross-referencing will be filled in by subsequent patches. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/agheader.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++- fs/xfs/scrub/alloc.c | 13 ++++++++++ fs/xfs/scrub/bmap.c | 29 +++++++++++++++++++++++ fs/xfs/scrub/ialloc.c | 15 ++++++++++++ fs/xfs/scrub/inode.c | 12 ++++++++++ fs/xfs/scrub/refcount.c | 14 +++++++++++ fs/xfs/scrub/rmap.c | 12 ++++++++++ 7 files changed, 157 insertions(+), 1 deletion(-) diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index 97beb4773298..1477aadbfe27 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -37,7 +37,10 @@ #include "scrub/common.h" #include "scrub/trace.h" -/* Walk all the blocks in the AGFL. */ +/* + * Walk all the blocks in the AGFL. The fn function can return any negative + * error code or XFS_BTREE_QUERY_RANGE_ABORT. + */ int xfs_scrub_walk_agfl( struct xfs_scrub_context *sc, @@ -98,6 +101,16 @@ xfs_scrub_walk_agfl( /* Superblock */ +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_superblock_xref( + struct xfs_scrub_context *sc, + struct xfs_buf *bp) +{ + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; +} + /* * Scrub the filesystem superblock. * @@ -386,11 +399,22 @@ xfs_scrub_superblock( BBTOB(bp->b_length) - sizeof(struct xfs_dsb))) xfs_scrub_block_set_corrupt(sc, bp); + xfs_scrub_superblock_xref(sc, bp); + return error; } /* AGF */ +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_agf_xref( + struct xfs_scrub_context *sc) +{ + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; +} + /* Scrub the AGF. */ int xfs_scrub_agf( @@ -469,6 +493,7 @@ xfs_scrub_agf( if (agfl_count != 0 && fl_count != agfl_count) xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + xfs_scrub_agf_xref(sc); out: return error; } @@ -481,6 +506,16 @@ struct xfs_scrub_agfl_info { xfs_agblock_t *entries; }; +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_agfl_block_xref( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno) +{ + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; +} + /* Scrub an AGFL block. */ STATIC int xfs_scrub_agfl_block( @@ -498,6 +533,8 @@ xfs_scrub_agfl_block( else xfs_scrub_block_set_corrupt(sc, sc->sa.agfl_bp); + xfs_scrub_agfl_block_xref(sc, agbno); + return 0; } @@ -512,6 +549,15 @@ xfs_scrub_agblock_cmp( return (int)*a - (int)*b; } +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_agfl_xref( + struct xfs_scrub_context *sc) +{ + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; +} + /* Scrub the AGFL. */ int xfs_scrub_agfl( @@ -532,6 +578,11 @@ xfs_scrub_agfl( if (!sc->sa.agf_bp) return -EFSCORRUPTED; + xfs_scrub_agfl_xref(sc); + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + goto out; + /* Allocate buffer to ensure uniqueness of AGFL entries. */ agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); agflcount = be32_to_cpu(agf->agf_flcount); @@ -574,6 +625,15 @@ out: /* AGI */ +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_agi_xref( + struct xfs_scrub_context *sc) +{ + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; +} + /* Scrub the AGI. */ int xfs_scrub_agi( @@ -652,6 +712,7 @@ xfs_scrub_agi( if (agi->agi_pad32 != cpu_to_be32(0)) xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); + xfs_scrub_agi_xref(sc); out: return error; } diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c index 059663e13414..03ed403ff0d3 100644 --- a/fs/xfs/scrub/alloc.c +++ b/fs/xfs/scrub/alloc.c @@ -50,6 +50,17 @@ xfs_scrub_setup_ag_allocbt( /* Free space btree scrubber. */ +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_allocbt_xref( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + xfs_extlen_t len) +{ + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; +} + /* Scrub a bnobt/cntbt record. */ STATIC int xfs_scrub_allocbt_rec( @@ -70,6 +81,8 @@ xfs_scrub_allocbt_rec( !xfs_verify_agbno(mp, agno, bno + len - 1)) xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xfs_scrub_allocbt_xref(bs->sc, bno, len); + return error; } diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 0261e1133901..b6931928e727 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -99,6 +99,30 @@ struct xfs_scrub_bmap_info { int whichfork; }; +/* Cross-reference a single rtdev extent record. */ +STATIC void +xfs_scrub_bmap_rt_extent_xref( + struct xfs_scrub_bmap_info *info, + struct xfs_inode *ip, + struct xfs_btree_cur *cur, + struct xfs_bmbt_irec *irec) +{ + if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; +} + +/* Cross-reference a single datadev extent record. */ +STATIC void +xfs_scrub_bmap_extent_xref( + struct xfs_scrub_bmap_info *info, + struct xfs_inode *ip, + struct xfs_btree_cur *cur, + struct xfs_bmbt_irec *irec) +{ + if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; +} + /* Scrub a single extent record. */ STATIC int xfs_scrub_bmap_extent( @@ -158,6 +182,11 @@ xfs_scrub_bmap_extent( xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); + if (info->is_rt) + xfs_scrub_bmap_rt_extent_xref(info, ip, cur, irec); + else + xfs_scrub_bmap_extent_xref(info, ip, cur, irec); + info->lastoff = irec->br_startoff + irec->br_blockcount; return error; } diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c index 496d6f2fbb9e..9294148267bc 100644 --- a/fs/xfs/scrub/ialloc.c +++ b/fs/xfs/scrub/ialloc.c @@ -58,6 +58,19 @@ xfs_scrub_setup_ag_iallocbt( /* Inode btree scrubber. */ +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_iallocbt_chunk_xref( + struct xfs_scrub_context *sc, + struct xfs_inobt_rec_incore *irec, + xfs_agino_t agino, + xfs_agblock_t agbno, + xfs_extlen_t len) +{ + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; +} + /* Is this chunk worth checking? */ STATIC bool xfs_scrub_iallocbt_chunk( @@ -76,6 +89,8 @@ xfs_scrub_iallocbt_chunk( !xfs_verify_agbno(mp, agno, bno + len - 1)) xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xfs_scrub_iallocbt_chunk_xref(bs->sc, irec, agino, bno, len); + return true; } diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 1c5b64667705..63525791b3ce 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -577,6 +577,17 @@ out_buf: return error; } +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_inode_xref( + struct xfs_scrub_context *sc, + xfs_ino_t ino, + struct xfs_dinode *dip) +{ + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; +} + /* Scrub an inode. */ int xfs_scrub_inode( @@ -626,6 +637,7 @@ xfs_scrub_inode( xfs_scrub_ino_set_preen(sc, ino, bp); } + xfs_scrub_inode_xref(sc, ino, dip); out: if (bp) xfs_trans_brelse(sc->tp, bp); diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index 2f88a8d44bd0..4c550b3bfbe6 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -50,6 +50,18 @@ xfs_scrub_setup_ag_refcountbt( /* Reference count btree scrubber. */ +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_refcountbt_xref( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + xfs_extlen_t len, + xfs_nlink_t refcount) +{ + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; +} + /* Scrub a refcountbt record. */ STATIC int xfs_scrub_refcountbt_rec( @@ -83,6 +95,8 @@ xfs_scrub_refcountbt_rec( if (refcount == 0) xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xfs_scrub_refcountbt_xref(bs->sc, bno, len, refcount); + return error; } diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c index 97846c424690..865594895920 100644 --- a/fs/xfs/scrub/rmap.c +++ b/fs/xfs/scrub/rmap.c @@ -51,6 +51,16 @@ xfs_scrub_setup_ag_rmapbt( /* Reverse-mapping scrubber. */ +/* Cross-reference with the other btrees. */ +STATIC void +xfs_scrub_rmapbt_xref( + struct xfs_scrub_context *sc, + struct xfs_rmap_irec *irec) +{ + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return; +} + /* Scrub an rmapbt record. */ STATIC int xfs_scrub_rmapbt_rec( @@ -121,6 +131,8 @@ xfs_scrub_rmapbt_rec( irec.rm_owner > XFS_RMAP_OWN_FS) xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); } + + xfs_scrub_rmapbt_xref(bs->sc, &irec); out: return error; } -- cgit v1.2.3 From 52dc4b44af74196ded6413304542ead0257b5cda Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Jan 2018 18:53:06 -0800 Subject: xfs: cross-reference with the bnobt When we're scrubbing various btrees, cross-reference the records with the bnobt to ensure that we don't also think the space is free. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/agheader.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/alloc.c | 20 +++++++++++ fs/xfs/scrub/bmap.c | 19 ++++++++++ fs/xfs/scrub/btree.c | 13 +++++++ fs/xfs/scrub/ialloc.c | 2 ++ fs/xfs/scrub/inode.c | 15 ++++++++ fs/xfs/scrub/refcount.c | 2 ++ fs/xfs/scrub/rmap.c | 5 +++ fs/xfs/scrub/scrub.h | 4 +++ 9 files changed, 176 insertions(+) diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index 1477aadbfe27..713b4e0cd907 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -107,8 +107,23 @@ xfs_scrub_superblock_xref( struct xfs_scrub_context *sc, struct xfs_buf *bp) { + struct xfs_mount *mp = sc->mp; + xfs_agnumber_t agno = sc->sm->sm_agno; + xfs_agblock_t agbno; + int error; + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; + + agbno = XFS_SB_BLOCK(mp); + + error = xfs_scrub_ag_init(sc, agno, &sc->sa); + if (!xfs_scrub_xref_process_error(sc, agno, agbno, &error)) + return; + + xfs_scrub_xref_is_used_space(sc, agbno, 1); + + /* scrub teardown will take care of sc->sa for us */ } /* @@ -406,13 +421,61 @@ xfs_scrub_superblock( /* AGF */ +/* Tally freespace record lengths. */ +STATIC int +xfs_scrub_agf_record_bno_lengths( + struct xfs_btree_cur *cur, + struct xfs_alloc_rec_incore *rec, + void *priv) +{ + xfs_extlen_t *blocks = priv; + + (*blocks) += rec->ar_blockcount; + return 0; +} + +/* Check agf_freeblks */ +static inline void +xfs_scrub_agf_xref_freeblks( + struct xfs_scrub_context *sc) +{ + struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); + xfs_extlen_t blocks = 0; + int error; + + if (!sc->sa.bno_cur) + return; + + error = xfs_alloc_query_all(sc->sa.bno_cur, + xfs_scrub_agf_record_bno_lengths, &blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur)) + return; + if (blocks != be32_to_cpu(agf->agf_freeblks)) + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); +} + /* Cross-reference with the other btrees. */ STATIC void xfs_scrub_agf_xref( struct xfs_scrub_context *sc) { + struct xfs_mount *mp = sc->mp; + xfs_agblock_t agbno; + int error; + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; + + agbno = XFS_AGF_BLOCK(mp); + + error = xfs_scrub_ag_btcur_init(sc, &sc->sa); + if (error) + return; + + xfs_scrub_xref_is_used_space(sc, agbno, 1); + xfs_scrub_agf_xref_freeblks(sc); + + /* scrub teardown will take care of sc->sa for us */ } /* Scrub the AGF. */ @@ -514,6 +577,8 @@ xfs_scrub_agfl_block_xref( { if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; + + xfs_scrub_xref_is_used_space(sc, agbno, 1); } /* Scrub an AGFL block. */ @@ -554,8 +619,25 @@ STATIC void xfs_scrub_agfl_xref( struct xfs_scrub_context *sc) { + struct xfs_mount *mp = sc->mp; + xfs_agblock_t agbno; + int error; + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; + + agbno = XFS_AGFL_BLOCK(mp); + + error = xfs_scrub_ag_btcur_init(sc, &sc->sa); + if (error) + return; + + xfs_scrub_xref_is_used_space(sc, agbno, 1); + + /* + * Scrub teardown will take care of sc->sa for us. Leave sc->sa + * active so that the agfl block xref can use it too. + */ } /* Scrub the AGFL. */ @@ -630,8 +712,22 @@ STATIC void xfs_scrub_agi_xref( struct xfs_scrub_context *sc) { + struct xfs_mount *mp = sc->mp; + xfs_agblock_t agbno; + int error; + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; + + agbno = XFS_AGI_BLOCK(mp); + + error = xfs_scrub_ag_btcur_init(sc, &sc->sa); + if (error) + return; + + xfs_scrub_xref_is_used_space(sc, agbno, 1); + + /* scrub teardown will take care of sc->sa for us */ } /* Scrub the AGI. */ diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c index 03ed403ff0d3..9b45585c0992 100644 --- a/fs/xfs/scrub/alloc.c +++ b/fs/xfs/scrub/alloc.c @@ -113,3 +113,23 @@ xfs_scrub_cntbt( { return xfs_scrub_allocbt(sc, XFS_BTNUM_CNT); } + +/* xref check that the extent is not free */ +void +xfs_scrub_xref_is_used_space( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + xfs_extlen_t len) +{ + bool is_freesp; + int error; + + if (!sc->sa.bno_cur) + return; + + error = xfs_alloc_has_record(sc->sa.bno_cur, agbno, len, &is_freesp); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur)) + return; + if (is_freesp) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.bno_cur, 0); +} diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index b6931928e727..7e8e239c2516 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -119,8 +119,27 @@ xfs_scrub_bmap_extent_xref( struct xfs_btree_cur *cur, struct xfs_bmbt_irec *irec) { + struct xfs_mount *mp = info->sc->mp; + xfs_agnumber_t agno; + xfs_agblock_t agbno; + xfs_extlen_t len; + int error; + if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; + + agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock); + agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock); + len = irec->br_blockcount; + + error = xfs_scrub_ag_init(info->sc, agno, &info->sc->sa); + if (!xfs_scrub_fblock_process_error(info->sc, info->whichfork, + irec->br_startoff, &error)) + return; + + xfs_scrub_xref_is_used_space(info->sc, agbno, len); + + xfs_scrub_ag_free(info->sc, &info->sc->sa); } /* Scrub a single extent record. */ diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index e671d694908b..222e0312bd8c 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -378,13 +378,17 @@ xfs_scrub_btree_check_block_owner( xfs_daddr_t daddr) { xfs_agnumber_t agno; + xfs_agblock_t agbno; + xfs_btnum_t btnum; bool init_sa; int error = 0; if (!bs->cur) return 0; + btnum = bs->cur->bc_btnum; agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr); + agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr); init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS; if (init_sa) { @@ -394,6 +398,15 @@ xfs_scrub_btree_check_block_owner( return error; } + xfs_scrub_xref_is_used_space(bs->sc, agbno, 1); + /* + * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we + * have to nullify it (to shut down further block owner checks) if + * self-xref encounters problems. + */ + if (!bs->sc->sa.bno_cur && btnum == XFS_BTNUM_BNO) + bs->cur = NULL; + if (init_sa) xfs_scrub_ag_free(bs->sc, &bs->sc->sa); diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c index 9294148267bc..45268941785a 100644 --- a/fs/xfs/scrub/ialloc.c +++ b/fs/xfs/scrub/ialloc.c @@ -69,6 +69,8 @@ xfs_scrub_iallocbt_chunk_xref( { if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; + + xfs_scrub_xref_is_used_space(sc, agbno, len); } /* Is this chunk worth checking? */ diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 63525791b3ce..153d4eb91b93 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -584,8 +584,23 @@ xfs_scrub_inode_xref( xfs_ino_t ino, struct xfs_dinode *dip) { + xfs_agnumber_t agno; + xfs_agblock_t agbno; + int error; + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; + + agno = XFS_INO_TO_AGNO(sc->mp, ino); + agbno = XFS_INO_TO_AGBNO(sc->mp, ino); + + error = xfs_scrub_ag_init(sc, agno, &sc->sa); + if (!xfs_scrub_xref_process_error(sc, agno, agbno, &error)) + return; + + xfs_scrub_xref_is_used_space(sc, agbno, 1); + + xfs_scrub_ag_free(sc, &sc->sa); } /* Scrub an inode. */ diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index 4c550b3bfbe6..09a04ae0895e 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -60,6 +60,8 @@ xfs_scrub_refcountbt_xref( { if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; + + xfs_scrub_xref_is_used_space(sc, agbno, len); } /* Scrub a refcountbt record. */ diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c index 865594895920..54b0eac22707 100644 --- a/fs/xfs/scrub/rmap.c +++ b/fs/xfs/scrub/rmap.c @@ -57,8 +57,13 @@ xfs_scrub_rmapbt_xref( struct xfs_scrub_context *sc, struct xfs_rmap_irec *irec) { + xfs_agblock_t agbno = irec->rm_startblock; + xfs_extlen_t len = irec->rm_blockcount; + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; + + xfs_scrub_xref_is_used_space(sc, agbno, len); } /* Scrub an rmapbt record. */ diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index 2a7961405f02..cbc636326171 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -123,4 +123,8 @@ xfs_scrub_quota(struct xfs_scrub_context *sc) } #endif +/* cross-referencing helpers */ +void xfs_scrub_xref_is_used_space(struct xfs_scrub_context *sc, + xfs_agblock_t agbno, xfs_extlen_t len); + #endif /* __XFS_SCRUB_SCRUB_H__ */ -- cgit v1.2.3 From e1134b12fd79a768ef386e0c42a6f028953f58eb Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Jan 2018 18:53:07 -0800 Subject: xfs: cross-reference bnobt records with cntbt Scrub should make sure that each bnobt record has a corresponding cntbt record. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/agheader.c | 33 +++++++++++++++++++++++++++++++++ fs/xfs/scrub/alloc.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index 713b4e0cd907..1aba7c01cdd7 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -454,6 +454,38 @@ xfs_scrub_agf_xref_freeblks( xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); } +/* Cross reference the AGF with the cntbt (freespace by length btree) */ +static inline void +xfs_scrub_agf_xref_cntbt( + struct xfs_scrub_context *sc) +{ + struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); + xfs_agblock_t agbno; + xfs_extlen_t blocks; + int have; + int error; + + if (!sc->sa.cnt_cur) + return; + + /* Any freespace at all? */ + error = xfs_alloc_lookup_le(sc->sa.cnt_cur, 0, -1U, &have); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur)) + return; + if (!have) { + if (agf->agf_freeblks != be32_to_cpu(0)) + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); + return; + } + + /* Check agf_longest */ + error = xfs_alloc_get_rec(sc->sa.cnt_cur, &agbno, &blocks, &have); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur)) + return; + if (!have || blocks != be32_to_cpu(agf->agf_longest)) + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); +} + /* Cross-reference with the other btrees. */ STATIC void xfs_scrub_agf_xref( @@ -474,6 +506,7 @@ xfs_scrub_agf_xref( xfs_scrub_xref_is_used_space(sc, agbno, 1); xfs_scrub_agf_xref_freeblks(sc); + xfs_scrub_agf_xref_cntbt(sc); /* scrub teardown will take care of sc->sa for us */ } diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c index 9b45585c0992..836b3c31c951 100644 --- a/fs/xfs/scrub/alloc.c +++ b/fs/xfs/scrub/alloc.c @@ -31,6 +31,7 @@ #include "xfs_sb.h" #include "xfs_alloc.h" #include "xfs_rmap.h" +#include "xfs_alloc.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" @@ -49,6 +50,48 @@ xfs_scrub_setup_ag_allocbt( } /* Free space btree scrubber. */ +/* + * Ensure there's a corresponding cntbt/bnobt record matching this + * bnobt/cntbt record, respectively. + */ +STATIC void +xfs_scrub_allocbt_xref_other( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + xfs_extlen_t len) +{ + struct xfs_btree_cur **pcur; + xfs_agblock_t fbno; + xfs_extlen_t flen; + int has_otherrec; + int error; + + if (sc->sm->sm_type == XFS_SCRUB_TYPE_BNOBT) + pcur = &sc->sa.cnt_cur; + else + pcur = &sc->sa.bno_cur; + if (!*pcur) + return; + + error = xfs_alloc_lookup_le(*pcur, agbno, len, &has_otherrec); + if (!xfs_scrub_should_check_xref(sc, &error, pcur)) + return; + if (!has_otherrec) { + xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0); + return; + } + + error = xfs_alloc_get_rec(*pcur, &fbno, &flen, &has_otherrec); + if (!xfs_scrub_should_check_xref(sc, &error, pcur)) + return; + if (!has_otherrec) { + xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0); + return; + } + + if (fbno != agbno || flen != len) + xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0); +} /* Cross-reference with the other btrees. */ STATIC void @@ -59,6 +102,8 @@ xfs_scrub_allocbt_xref( { if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; + + xfs_scrub_allocbt_xref_other(sc, agbno, len); } /* Scrub a bnobt/cntbt record. */ -- cgit v1.2.3 From 2e6f27561b798710fd7c89118d8b489231408a80 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Jan 2018 18:53:07 -0800 Subject: xfs: cross-reference inode btrees during scrub Cross-reference the inode btrees with the other metadata when we scrub the filesystem. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/agheader.c | 27 ++++++++++++++++++ fs/xfs/scrub/alloc.c | 1 + fs/xfs/scrub/bmap.c | 1 + fs/xfs/scrub/ialloc.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/inode.c | 49 +++++++++++++++++++++++++++++++++ fs/xfs/scrub/refcount.c | 1 + fs/xfs/scrub/rmap.c | 4 +++ fs/xfs/scrub/scrub.h | 4 +++ 8 files changed, 160 insertions(+) diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index 1aba7c01cdd7..13ec76b783ad 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -122,6 +122,7 @@ xfs_scrub_superblock_xref( return; xfs_scrub_xref_is_used_space(sc, agbno, 1); + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); /* scrub teardown will take care of sc->sa for us */ } @@ -507,6 +508,7 @@ xfs_scrub_agf_xref( xfs_scrub_xref_is_used_space(sc, agbno, 1); xfs_scrub_agf_xref_freeblks(sc); xfs_scrub_agf_xref_cntbt(sc); + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); /* scrub teardown will take care of sc->sa for us */ } @@ -612,6 +614,7 @@ xfs_scrub_agfl_block_xref( return; xfs_scrub_xref_is_used_space(sc, agbno, 1); + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); } /* Scrub an AGFL block. */ @@ -666,6 +669,7 @@ xfs_scrub_agfl_xref( return; xfs_scrub_xref_is_used_space(sc, agbno, 1); + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); /* * Scrub teardown will take care of sc->sa for us. Leave sc->sa @@ -740,6 +744,27 @@ out: /* AGI */ +/* Check agi_count/agi_freecount */ +static inline void +xfs_scrub_agi_xref_icounts( + struct xfs_scrub_context *sc) +{ + struct xfs_agi *agi = XFS_BUF_TO_AGI(sc->sa.agi_bp); + xfs_agino_t icount; + xfs_agino_t freecount; + int error; + + if (!sc->sa.ino_cur) + return; + + error = xfs_ialloc_count_inodes(sc->sa.ino_cur, &icount, &freecount); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.ino_cur)) + return; + if (be32_to_cpu(agi->agi_count) != icount || + be32_to_cpu(agi->agi_freecount) != freecount) + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agi_bp); +} + /* Cross-reference with the other btrees. */ STATIC void xfs_scrub_agi_xref( @@ -759,6 +784,8 @@ xfs_scrub_agi_xref( return; xfs_scrub_xref_is_used_space(sc, agbno, 1); + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); + xfs_scrub_agi_xref_icounts(sc); /* scrub teardown will take care of sc->sa for us */ } diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c index 836b3c31c951..0031014fbf9c 100644 --- a/fs/xfs/scrub/alloc.c +++ b/fs/xfs/scrub/alloc.c @@ -104,6 +104,7 @@ xfs_scrub_allocbt_xref( return; xfs_scrub_allocbt_xref_other(sc, agbno, len); + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len); } /* Scrub a bnobt/cntbt record. */ diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 7e8e239c2516..6f1d145d5fe7 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -138,6 +138,7 @@ xfs_scrub_bmap_extent_xref( return; xfs_scrub_xref_is_used_space(info->sc, agbno, len); + xfs_scrub_xref_is_not_inode_chunk(info->sc, agbno, len); xfs_scrub_ag_free(info->sc, &info->sc->sa); } diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c index 45268941785a..bd7ba1642189 100644 --- a/fs/xfs/scrub/ialloc.c +++ b/fs/xfs/scrub/ialloc.c @@ -58,6 +58,35 @@ xfs_scrub_setup_ag_iallocbt( /* Inode btree scrubber. */ +/* + * If we're checking the finobt, cross-reference with the inobt. + * Otherwise we're checking the inobt; if there is an finobt, make sure + * we have a record or not depending on freecount. + */ +static inline void +xfs_scrub_iallocbt_chunk_xref_other( + struct xfs_scrub_context *sc, + struct xfs_inobt_rec_incore *irec, + xfs_agino_t agino) +{ + struct xfs_btree_cur **pcur; + bool has_irec; + int error; + + if (sc->sm->sm_type == XFS_SCRUB_TYPE_FINOBT) + pcur = &sc->sa.ino_cur; + else + pcur = &sc->sa.fino_cur; + if (!(*pcur)) + return; + error = xfs_ialloc_has_inode_record(*pcur, agino, agino, &has_irec); + if (!xfs_scrub_should_check_xref(sc, &error, pcur)) + return; + if (((irec->ir_freecount > 0 && !has_irec) || + (irec->ir_freecount == 0 && has_irec))) + xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0); +} + /* Cross-reference with the other btrees. */ STATIC void xfs_scrub_iallocbt_chunk_xref( @@ -71,6 +100,7 @@ xfs_scrub_iallocbt_chunk_xref( return; xfs_scrub_xref_is_used_space(sc, agbno, len); + xfs_scrub_iallocbt_chunk_xref_other(sc, irec, agino); } /* Is this chunk worth checking? */ @@ -352,3 +382,46 @@ xfs_scrub_finobt( { return xfs_scrub_iallocbt(sc, XFS_BTNUM_FINO); } + +/* See if an inode btree has (or doesn't have) an inode chunk record. */ +static inline void +xfs_scrub_xref_inode_check( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + xfs_extlen_t len, + struct xfs_btree_cur **icur, + bool should_have_inodes) +{ + bool has_inodes; + int error; + + if (!(*icur)) + return; + + error = xfs_ialloc_has_inodes_at_extent(*icur, agbno, len, &has_inodes); + if (!xfs_scrub_should_check_xref(sc, &error, icur)) + return; + if (has_inodes != should_have_inodes) + xfs_scrub_btree_xref_set_corrupt(sc, *icur, 0); +} + +/* xref check that the extent is not covered by inodes */ +void +xfs_scrub_xref_is_not_inode_chunk( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + xfs_extlen_t len) +{ + xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, false); + xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.fino_cur, false); +} + +/* xref check that the extent is covered by inodes */ +void +xfs_scrub_xref_is_inode_chunk( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + xfs_extlen_t len) +{ + xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, true); +} diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 153d4eb91b93..5eac188d18ea 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -39,6 +39,7 @@ #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" +#include "scrub/btree.h" #include "scrub/trace.h" /* @@ -577,6 +578,53 @@ out_buf: return error; } +/* + * Make sure the finobt doesn't think this inode is free. + * We don't have to check the inobt ourselves because we got the inode via + * IGET_UNTRUSTED, which checks the inobt for us. + */ +static void +xfs_scrub_inode_xref_finobt( + struct xfs_scrub_context *sc, + xfs_ino_t ino) +{ + struct xfs_inobt_rec_incore rec; + xfs_agino_t agino; + int has_record; + int error; + + if (!sc->sa.fino_cur) + return; + + agino = XFS_INO_TO_AGINO(sc->mp, ino); + + /* + * Try to get the finobt record. If we can't get it, then we're + * in good shape. + */ + error = xfs_inobt_lookup(sc->sa.fino_cur, agino, XFS_LOOKUP_LE, + &has_record); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur) || + !has_record) + return; + + error = xfs_inobt_get_rec(sc->sa.fino_cur, &rec, &has_record); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur) || + !has_record) + return; + + /* + * Otherwise, make sure this record either doesn't cover this inode, + * or that it does but it's marked present. + */ + if (rec.ir_startino > agino || + rec.ir_startino + XFS_INODES_PER_CHUNK <= agino) + return; + + if (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0); +} + /* Cross-reference with the other btrees. */ STATIC void xfs_scrub_inode_xref( @@ -599,6 +647,7 @@ xfs_scrub_inode_xref( return; xfs_scrub_xref_is_used_space(sc, agbno, 1); + xfs_scrub_inode_xref_finobt(sc, ino); xfs_scrub_ag_free(sc, &sc->sa); } diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index 09a04ae0895e..af54590f331b 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -62,6 +62,7 @@ xfs_scrub_refcountbt_xref( return; xfs_scrub_xref_is_used_space(sc, agbno, len); + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len); } /* Scrub a refcountbt record. */ diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c index 54b0eac22707..6e937ef14218 100644 --- a/fs/xfs/scrub/rmap.c +++ b/fs/xfs/scrub/rmap.c @@ -64,6 +64,10 @@ xfs_scrub_rmapbt_xref( return; xfs_scrub_xref_is_used_space(sc, agbno, len); + if (irec->rm_owner == XFS_RMAP_OWN_INODES) + xfs_scrub_xref_is_inode_chunk(sc, agbno, len); + else + xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len); } /* Scrub an rmapbt record. */ diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index cbc636326171..9b0033baa90b 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -126,5 +126,9 @@ xfs_scrub_quota(struct xfs_scrub_context *sc) /* cross-referencing helpers */ void xfs_scrub_xref_is_used_space(struct xfs_scrub_context *sc, xfs_agblock_t agbno, xfs_extlen_t len); +void xfs_scrub_xref_is_not_inode_chunk(struct xfs_scrub_context *sc, + xfs_agblock_t agbno, xfs_extlen_t len); +void xfs_scrub_xref_is_inode_chunk(struct xfs_scrub_context *sc, + xfs_agblock_t agbno, xfs_extlen_t len); #endif /* __XFS_SCRUB_SCRUB_H__ */ -- cgit v1.2.3 From d852657ccfc0e45570989a5f142e11c950d9a793 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Jan 2018 18:53:08 -0800 Subject: xfs: cross-reference reverse-mapping btree When scrubbing various btrees, we should cross-reference the records with the reverse mapping btree and ensure that traversing the btree finds the same number of blocks that the rmapbt thinks are owned by that btree. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/agheader.c | 68 +++++++++++++++++++++++- fs/xfs/scrub/alloc.c | 1 + fs/xfs/scrub/bmap.c | 134 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/btree.c | 4 ++ fs/xfs/scrub/common.c | 53 +++++++++++++++++++ fs/xfs/scrub/common.h | 4 ++ fs/xfs/scrub/ialloc.c | 103 ++++++++++++++++++++++++++++++++++++- fs/xfs/scrub/inode.c | 4 ++ fs/xfs/scrub/rmap.c | 65 +++++++++++++++++++++++ fs/xfs/scrub/scrub.h | 8 +++ 10 files changed, 440 insertions(+), 4 deletions(-) diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index 13ec76b783ad..1d109d5744aa 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -32,6 +32,7 @@ #include "xfs_inode.h" #include "xfs_alloc.h" #include "xfs_ialloc.h" +#include "xfs_rmap.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" @@ -107,6 +108,7 @@ xfs_scrub_superblock_xref( struct xfs_scrub_context *sc, struct xfs_buf *bp) { + struct xfs_owner_info oinfo; struct xfs_mount *mp = sc->mp; xfs_agnumber_t agno = sc->sm->sm_agno; xfs_agblock_t agbno; @@ -123,6 +125,8 @@ xfs_scrub_superblock_xref( xfs_scrub_xref_is_used_space(sc, agbno, 1); xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); + xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); /* scrub teardown will take care of sc->sa for us */ } @@ -487,11 +491,58 @@ xfs_scrub_agf_xref_cntbt( xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); } +/* Check the btree block counts in the AGF against the btrees. */ +STATIC void +xfs_scrub_agf_xref_btreeblks( + struct xfs_scrub_context *sc) +{ + struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); + struct xfs_mount *mp = sc->mp; + xfs_agblock_t blocks; + xfs_agblock_t btreeblks; + int error; + + /* Check agf_rmap_blocks; set up for agf_btreeblks check */ + if (sc->sa.rmap_cur) { + error = xfs_btree_count_blocks(sc->sa.rmap_cur, &blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + return; + btreeblks = blocks - 1; + if (blocks != be32_to_cpu(agf->agf_rmap_blocks)) + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); + } else { + btreeblks = 0; + } + + /* + * No rmap cursor; we can't xref if we have the rmapbt feature. + * We also can't do it if we're missing the free space btree cursors. + */ + if ((xfs_sb_version_hasrmapbt(&mp->m_sb) && !sc->sa.rmap_cur) || + !sc->sa.bno_cur || !sc->sa.cnt_cur) + return; + + /* Check agf_btreeblks */ + error = xfs_btree_count_blocks(sc->sa.bno_cur, &blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur)) + return; + btreeblks += blocks - 1; + + error = xfs_btree_count_blocks(sc->sa.cnt_cur, &blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur)) + return; + btreeblks += blocks - 1; + + if (btreeblks != be32_to_cpu(agf->agf_btreeblks)) + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); +} + /* Cross-reference with the other btrees. */ STATIC void xfs_scrub_agf_xref( struct xfs_scrub_context *sc) { + struct xfs_owner_info oinfo; struct xfs_mount *mp = sc->mp; xfs_agblock_t agbno; int error; @@ -509,6 +560,9 @@ xfs_scrub_agf_xref( xfs_scrub_agf_xref_freeblks(sc); xfs_scrub_agf_xref_cntbt(sc); xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); + xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); + xfs_scrub_agf_xref_btreeblks(sc); /* scrub teardown will take care of sc->sa for us */ } @@ -599,6 +653,7 @@ out: /* AGFL */ struct xfs_scrub_agfl_info { + struct xfs_owner_info oinfo; unsigned int sz_entries; unsigned int nr_entries; xfs_agblock_t *entries; @@ -608,13 +663,15 @@ struct xfs_scrub_agfl_info { STATIC void xfs_scrub_agfl_block_xref( struct xfs_scrub_context *sc, - xfs_agblock_t agbno) + xfs_agblock_t agbno, + struct xfs_owner_info *oinfo) { if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; xfs_scrub_xref_is_used_space(sc, agbno, 1); xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); + xfs_scrub_xref_is_owned_by(sc, agbno, 1, oinfo); } /* Scrub an AGFL block. */ @@ -634,7 +691,7 @@ xfs_scrub_agfl_block( else xfs_scrub_block_set_corrupt(sc, sc->sa.agfl_bp); - xfs_scrub_agfl_block_xref(sc, agbno); + xfs_scrub_agfl_block_xref(sc, agbno, priv); return 0; } @@ -655,6 +712,7 @@ STATIC void xfs_scrub_agfl_xref( struct xfs_scrub_context *sc) { + struct xfs_owner_info oinfo; struct xfs_mount *mp = sc->mp; xfs_agblock_t agbno; int error; @@ -670,6 +728,8 @@ xfs_scrub_agfl_xref( xfs_scrub_xref_is_used_space(sc, agbno, 1); xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); + xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); /* * Scrub teardown will take care of sc->sa for us. Leave sc->sa @@ -717,6 +777,7 @@ xfs_scrub_agfl( } /* Check the blocks in the AGFL. */ + xfs_rmap_ag_owner(&sai.oinfo, XFS_RMAP_OWN_AG); error = xfs_scrub_walk_agfl(sc, xfs_scrub_agfl_block, &sai); if (error) goto out_free; @@ -770,6 +831,7 @@ STATIC void xfs_scrub_agi_xref( struct xfs_scrub_context *sc) { + struct xfs_owner_info oinfo; struct xfs_mount *mp = sc->mp; xfs_agblock_t agbno; int error; @@ -786,6 +848,8 @@ xfs_scrub_agi_xref( xfs_scrub_xref_is_used_space(sc, agbno, 1); xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); xfs_scrub_agi_xref_icounts(sc); + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); + xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); /* scrub teardown will take care of sc->sa for us */ } diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c index 0031014fbf9c..3faa4371079e 100644 --- a/fs/xfs/scrub/alloc.c +++ b/fs/xfs/scrub/alloc.c @@ -105,6 +105,7 @@ xfs_scrub_allocbt_xref( xfs_scrub_allocbt_xref_other(sc, agbno, len); xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len); + xfs_scrub_xref_has_no_owner(sc, agbno, len); } /* Scrub a bnobt/cntbt record. */ diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 6f1d145d5fe7..933e0b8be34f 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -99,6 +99,139 @@ struct xfs_scrub_bmap_info { int whichfork; }; +/* Look for a corresponding rmap for this irec. */ +static inline bool +xfs_scrub_bmap_get_rmap( + struct xfs_scrub_bmap_info *info, + struct xfs_bmbt_irec *irec, + xfs_agblock_t agbno, + uint64_t owner, + struct xfs_rmap_irec *rmap) +{ + xfs_fileoff_t offset; + unsigned int rflags = 0; + int has_rmap; + int error; + + if (info->whichfork == XFS_ATTR_FORK) + rflags |= XFS_RMAP_ATTR_FORK; + + /* + * CoW staging extents are owned (on disk) by the refcountbt, so + * their rmaps do not have offsets. + */ + if (info->whichfork == XFS_COW_FORK) + offset = 0; + else + offset = irec->br_startoff; + + /* + * If the caller thinks this could be a shared bmbt extent (IOWs, + * any data fork extent of a reflink inode) then we have to use the + * range rmap lookup to make sure we get the correct owner/offset. + */ + if (info->is_shared) { + error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno, + owner, offset, rflags, rmap, &has_rmap); + if (!xfs_scrub_should_check_xref(info->sc, &error, + &info->sc->sa.rmap_cur)) + return false; + goto out; + } + + /* + * Otherwise, use the (faster) regular lookup. + */ + error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno, 0, owner, + offset, rflags, &has_rmap); + if (!xfs_scrub_should_check_xref(info->sc, &error, + &info->sc->sa.rmap_cur)) + return false; + if (!has_rmap) + goto out; + + error = xfs_rmap_get_rec(info->sc->sa.rmap_cur, rmap, &has_rmap); + if (!xfs_scrub_should_check_xref(info->sc, &error, + &info->sc->sa.rmap_cur)) + return false; + +out: + if (!has_rmap) + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + return has_rmap; +} + +/* Make sure that we have rmapbt records for this extent. */ +STATIC void +xfs_scrub_bmap_xref_rmap( + struct xfs_scrub_bmap_info *info, + struct xfs_bmbt_irec *irec, + xfs_agblock_t agbno) +{ + struct xfs_rmap_irec rmap; + unsigned long long rmap_end; + uint64_t owner; + + if (!info->sc->sa.rmap_cur) + return; + + if (info->whichfork == XFS_COW_FORK) + owner = XFS_RMAP_OWN_COW; + else + owner = info->sc->ip->i_ino; + + /* Find the rmap record for this irec. */ + if (!xfs_scrub_bmap_get_rmap(info, irec, agbno, owner, &rmap)) + return; + + /* Check the rmap. */ + rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount; + if (rmap.rm_startblock > agbno || + agbno + irec->br_blockcount > rmap_end) + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + + /* + * Check the logical offsets if applicable. CoW staging extents + * don't track logical offsets since the mappings only exist in + * memory. + */ + if (info->whichfork != XFS_COW_FORK) { + rmap_end = (unsigned long long)rmap.rm_offset + + rmap.rm_blockcount; + if (rmap.rm_offset > irec->br_startoff || + irec->br_startoff + irec->br_blockcount > rmap_end) + xfs_scrub_fblock_xref_set_corrupt(info->sc, + info->whichfork, irec->br_startoff); + } + + if (rmap.rm_owner != owner) + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + + /* + * Check for discrepancies between the unwritten flag in the irec and + * the rmap. Note that the (in-memory) CoW fork distinguishes between + * unwritten and written extents, but we don't track that in the rmap + * records because the blocks are owned (on-disk) by the refcountbt, + * which doesn't track unwritten state. + */ + if (owner != XFS_RMAP_OWN_COW && + irec->br_state == XFS_EXT_UNWRITTEN && + !(rmap.rm_flags & XFS_RMAP_UNWRITTEN)) + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + + if (info->whichfork == XFS_ATTR_FORK && + !(rmap.rm_flags & XFS_RMAP_ATTR_FORK)) + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK) + xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); +} + /* Cross-reference a single rtdev extent record. */ STATIC void xfs_scrub_bmap_rt_extent_xref( @@ -139,6 +272,7 @@ xfs_scrub_bmap_extent_xref( xfs_scrub_xref_is_used_space(info->sc, agbno, len); xfs_scrub_xref_is_not_inode_chunk(info->sc, agbno, len); + xfs_scrub_bmap_xref_rmap(info, irec, agbno); xfs_scrub_ag_free(info->sc, &info->sc->sa); } diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index 222e0312bd8c..0589d4efbf6b 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -407,6 +407,10 @@ xfs_scrub_btree_check_block_owner( if (!bs->sc->sa.bno_cur && btnum == XFS_BTNUM_BNO) bs->cur = NULL; + xfs_scrub_xref_is_owned_by(bs->sc, agbno, 1, bs->oinfo); + if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP) + bs->cur = NULL; + if (init_sa) xfs_scrub_ag_free(bs->sc, &bs->sc->sa); diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 68fea09cd673..f5df8f2859d7 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -324,6 +324,59 @@ xfs_scrub_set_incomplete( trace_xfs_scrub_incomplete(sc, __return_address); } +/* + * rmap scrubbing -- compute the number of blocks with a given owner, + * at least according to the reverse mapping data. + */ + +struct xfs_scrub_rmap_ownedby_info { + struct xfs_owner_info *oinfo; + xfs_filblks_t *blocks; +}; + +STATIC int +xfs_scrub_count_rmap_ownedby_irec( + struct xfs_btree_cur *cur, + struct xfs_rmap_irec *rec, + void *priv) +{ + struct xfs_scrub_rmap_ownedby_info *sroi = priv; + bool irec_attr; + bool oinfo_attr; + + irec_attr = rec->rm_flags & XFS_RMAP_ATTR_FORK; + oinfo_attr = sroi->oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK; + + if (rec->rm_owner != sroi->oinfo->oi_owner) + return 0; + + if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) || irec_attr == oinfo_attr) + (*sroi->blocks) += rec->rm_blockcount; + + return 0; +} + +/* + * Calculate the number of blocks the rmap thinks are owned by something. + * The caller should pass us an rmapbt cursor. + */ +int +xfs_scrub_count_rmap_ownedby_ag( + struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, + struct xfs_owner_info *oinfo, + xfs_filblks_t *blocks) +{ + struct xfs_scrub_rmap_ownedby_info sroi; + + sroi.oinfo = oinfo; + *blocks = 0; + sroi.blocks = blocks; + + return xfs_rmap_query_all(cur, xfs_scrub_count_rmap_ownedby_irec, + &sroi); +} + /* * AG scrubbing * diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index 84c302f1d634..bf88a677f6e7 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -148,6 +148,10 @@ int xfs_scrub_walk_agfl(struct xfs_scrub_context *sc, int (*fn)(struct xfs_scrub_context *, xfs_agblock_t bno, void *), void *priv); +int xfs_scrub_count_rmap_ownedby_ag(struct xfs_scrub_context *sc, + struct xfs_btree_cur *cur, + struct xfs_owner_info *oinfo, + xfs_filblks_t *blocks); int xfs_scrub_setup_ag_btree(struct xfs_scrub_context *sc, struct xfs_inode *ip, bool force_log); diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c index bd7ba1642189..1a16f7867e31 100644 --- a/fs/xfs/scrub/ialloc.c +++ b/fs/xfs/scrub/ialloc.c @@ -96,11 +96,15 @@ xfs_scrub_iallocbt_chunk_xref( xfs_agblock_t agbno, xfs_extlen_t len) { + struct xfs_owner_info oinfo; + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; xfs_scrub_xref_is_used_space(sc, agbno, len); xfs_scrub_iallocbt_chunk_xref_other(sc, irec, agino); + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); + xfs_scrub_xref_is_owned_by(sc, agbno, len, &oinfo); } /* Is this chunk worth checking? */ @@ -237,8 +241,14 @@ xfs_scrub_iallocbt_check_freemask( } /* If any part of this is a hole, skip it. */ - if (ir_holemask) + if (ir_holemask) { + xfs_scrub_xref_is_not_owned_by(bs->sc, agbno, + blks_per_cluster, &oinfo); continue; + } + + xfs_scrub_xref_is_owned_by(bs->sc, agbno, blks_per_cluster, + &oinfo); /* Grab the inode cluster buffer. */ imap.im_blkno = XFS_AGB_TO_DADDR(mp, bs->cur->bc_private.a.agno, @@ -274,6 +284,7 @@ xfs_scrub_iallocbt_rec( union xfs_btree_rec *rec) { struct xfs_mount *mp = bs->cur->bc_mp; + xfs_filblks_t *inode_blocks = bs->private; struct xfs_inobt_rec_incore irec; uint64_t holes; xfs_agnumber_t agno = bs->cur->bc_private.a.agno; @@ -311,6 +322,9 @@ xfs_scrub_iallocbt_rec( (agbno & (xfs_icluster_size_fsb(mp) - 1))) xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + *inode_blocks += XFS_B_TO_FSB(mp, + irec.ir_count * mp->m_sb.sb_inodesize); + /* Handle non-sparse inodes */ if (!xfs_inobt_issparse(irec.ir_holemask)) { len = XFS_B_TO_FSB(mp, @@ -355,6 +369,72 @@ out: return error; } +/* + * Make sure the inode btrees are as large as the rmap thinks they are. + * Don't bother if we're missing btree cursors, as we're already corrupt. + */ +STATIC void +xfs_scrub_iallocbt_xref_rmap_btreeblks( + struct xfs_scrub_context *sc, + int which) +{ + struct xfs_owner_info oinfo; + xfs_filblks_t blocks; + xfs_extlen_t inobt_blocks = 0; + xfs_extlen_t finobt_blocks = 0; + int error; + + if (!sc->sa.ino_cur || !sc->sa.rmap_cur || + (xfs_sb_version_hasfinobt(&sc->mp->m_sb) && !sc->sa.fino_cur)) + return; + + /* Check that we saw as many inobt blocks as the rmap says. */ + error = xfs_btree_count_blocks(sc->sa.ino_cur, &inobt_blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.ino_cur)) + return; + + if (sc->sa.fino_cur) { + error = xfs_btree_count_blocks(sc->sa.fino_cur, &finobt_blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur)) + return; + } + + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); + error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo, + &blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + return; + if (blocks != inobt_blocks + finobt_blocks) + xfs_scrub_btree_set_corrupt(sc, sc->sa.ino_cur, 0); +} + +/* + * Make sure that the inobt records point to the same number of blocks as + * the rmap says are owned by inodes. + */ +STATIC void +xfs_scrub_iallocbt_xref_rmap_inodes( + struct xfs_scrub_context *sc, + int which, + xfs_filblks_t inode_blocks) +{ + struct xfs_owner_info oinfo; + xfs_filblks_t blocks; + int error; + + if (!sc->sa.rmap_cur) + return; + + /* Check that we saw as many inode blocks as the rmap knows about. */ + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); + error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo, + &blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + return; + if (blocks != inode_blocks) + xfs_scrub_btree_set_corrupt(sc, sc->sa.ino_cur, 0); +} + /* Scrub the inode btrees for some AG. */ STATIC int xfs_scrub_iallocbt( @@ -363,10 +443,29 @@ xfs_scrub_iallocbt( { struct xfs_btree_cur *cur; struct xfs_owner_info oinfo; + xfs_filblks_t inode_blocks = 0; + int error; xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); cur = which == XFS_BTNUM_INO ? sc->sa.ino_cur : sc->sa.fino_cur; - return xfs_scrub_btree(sc, cur, xfs_scrub_iallocbt_rec, &oinfo, NULL); + error = xfs_scrub_btree(sc, cur, xfs_scrub_iallocbt_rec, &oinfo, + &inode_blocks); + if (error) + return error; + + xfs_scrub_iallocbt_xref_rmap_btreeblks(sc, which); + + /* + * If we're scrubbing the inode btree, inode_blocks is the number of + * blocks pointed to by all the inode chunk records. Therefore, we + * should compare to the number of inode chunk blocks that the rmap + * knows about. We can't do this for the finobt since it only points + * to inode chunks with free inodes. + */ + if (which == XFS_BTNUM_INO) + xfs_scrub_iallocbt_xref_rmap_inodes(sc, which, inode_blocks); + + return error; } int diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 5eac188d18ea..53fcd215c0f5 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -36,6 +36,7 @@ #include "xfs_ialloc.h" #include "xfs_da_format.h" #include "xfs_reflink.h" +#include "xfs_rmap.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" @@ -632,6 +633,7 @@ xfs_scrub_inode_xref( xfs_ino_t ino, struct xfs_dinode *dip) { + struct xfs_owner_info oinfo; xfs_agnumber_t agno; xfs_agblock_t agbno; int error; @@ -648,6 +650,8 @@ xfs_scrub_inode_xref( xfs_scrub_xref_is_used_space(sc, agbno, 1); xfs_scrub_inode_xref_finobt(sc, ino); + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); + xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); xfs_scrub_ag_free(sc, &sc->sa); } diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c index 6e937ef14218..3ee50610c48a 100644 --- a/fs/xfs/scrub/rmap.c +++ b/fs/xfs/scrub/rmap.c @@ -157,3 +157,68 @@ xfs_scrub_rmapbt( return xfs_scrub_btree(sc, sc->sa.rmap_cur, xfs_scrub_rmapbt_rec, &oinfo, NULL); } + +/* xref check that the extent is owned by a given owner */ +static inline void +xfs_scrub_xref_check_owner( + struct xfs_scrub_context *sc, + xfs_agblock_t bno, + xfs_extlen_t len, + struct xfs_owner_info *oinfo, + bool should_have_rmap) +{ + bool has_rmap; + int error; + + if (!sc->sa.rmap_cur) + return; + + error = xfs_rmap_record_exists(sc->sa.rmap_cur, bno, len, oinfo, + &has_rmap); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + return; + if (has_rmap != should_have_rmap) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); +} + +/* xref check that the extent is owned by a given owner */ +void +xfs_scrub_xref_is_owned_by( + struct xfs_scrub_context *sc, + xfs_agblock_t bno, + xfs_extlen_t len, + struct xfs_owner_info *oinfo) +{ + xfs_scrub_xref_check_owner(sc, bno, len, oinfo, true); +} + +/* xref check that the extent is not owned by a given owner */ +void +xfs_scrub_xref_is_not_owned_by( + struct xfs_scrub_context *sc, + xfs_agblock_t bno, + xfs_extlen_t len, + struct xfs_owner_info *oinfo) +{ + xfs_scrub_xref_check_owner(sc, bno, len, oinfo, false); +} + +/* xref check that the extent has no reverse mapping at all */ +void +xfs_scrub_xref_has_no_owner( + struct xfs_scrub_context *sc, + xfs_agblock_t bno, + xfs_extlen_t len) +{ + bool has_rmap; + int error; + + if (!sc->sa.rmap_cur) + return; + + error = xfs_rmap_has_record(sc->sa.rmap_cur, bno, len, &has_rmap); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + return; + if (has_rmap) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); +} diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index 9b0033baa90b..8fcf491c8288 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -130,5 +130,13 @@ void xfs_scrub_xref_is_not_inode_chunk(struct xfs_scrub_context *sc, xfs_agblock_t agbno, xfs_extlen_t len); void xfs_scrub_xref_is_inode_chunk(struct xfs_scrub_context *sc, xfs_agblock_t agbno, xfs_extlen_t len); +void xfs_scrub_xref_is_owned_by(struct xfs_scrub_context *sc, + xfs_agblock_t agbno, xfs_extlen_t len, + struct xfs_owner_info *oinfo); +void xfs_scrub_xref_is_not_owned_by(struct xfs_scrub_context *sc, + xfs_agblock_t agbno, xfs_extlen_t len, + struct xfs_owner_info *oinfo); +void xfs_scrub_xref_has_no_owner(struct xfs_scrub_context *sc, + xfs_agblock_t agbno, xfs_extlen_t len); #endif /* __XFS_SCRUB_SCRUB_H__ */ -- cgit v1.2.3 From dbde19da96370670cfc4379ab2bfa8db6be42c2b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Jan 2018 18:53:08 -0800 Subject: xfs: cross-reference the rmapbt data with the refcountbt Cross reference the refcount data with the rmap data to check that the number of rmaps for a given block match the refcount of that block, and that CoW blocks (which are owned entirely by the refcountbt) are tracked as well. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/refcount.c | 336 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 334 insertions(+), 2 deletions(-) diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index af54590f331b..0c02f2fba394 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -50,6 +50,291 @@ xfs_scrub_setup_ag_refcountbt( /* Reference count btree scrubber. */ +/* + * Confirming Reference Counts via Reverse Mappings + * + * We want to count the reverse mappings overlapping a refcount record + * (bno, len, refcount), allowing for the possibility that some of the + * overlap may come from smaller adjoining reverse mappings, while some + * comes from single extents which overlap the range entirely. The + * outer loop is as follows: + * + * 1. For all reverse mappings overlapping the refcount extent, + * a. If a given rmap completely overlaps, mark it as seen. + * b. Otherwise, record the fragment (in agbno order) for later + * processing. + * + * Once we've seen all the rmaps, we know that for all blocks in the + * refcount record we want to find $refcount owners and we've already + * visited $seen extents that overlap all the blocks. Therefore, we + * need to find ($refcount - $seen) owners for every block in the + * extent; call that quantity $target_nr. Proceed as follows: + * + * 2. Pull the first $target_nr fragments from the list; all of them + * should start at or before the start of the extent. + * Call this subset of fragments the working set. + * 3. Until there are no more unprocessed fragments, + * a. Find the shortest fragments in the set and remove them. + * b. Note the block number of the end of these fragments. + * c. Pull the same number of fragments from the list. All of these + * fragments should start at the block number recorded in the + * previous step. + * d. Put those fragments in the set. + * 4. Check that there are $target_nr fragments remaining in the list, + * and that they all end at or beyond the end of the refcount extent. + * + * If the refcount is correct, all the check conditions in the algorithm + * should always hold true. If not, the refcount is incorrect. + */ +struct xfs_scrub_refcnt_frag { + struct list_head list; + struct xfs_rmap_irec rm; +}; + +struct xfs_scrub_refcnt_check { + struct xfs_scrub_context *sc; + struct list_head fragments; + + /* refcount extent we're examining */ + xfs_agblock_t bno; + xfs_extlen_t len; + xfs_nlink_t refcount; + + /* number of owners seen */ + xfs_nlink_t seen; +}; + +/* + * Decide if the given rmap is large enough that we can redeem it + * towards refcount verification now, or if it's a fragment, in + * which case we'll hang onto it in the hopes that we'll later + * discover that we've collected exactly the correct number of + * fragments as the refcountbt says we should have. + */ +STATIC int +xfs_scrub_refcountbt_rmap_check( + struct xfs_btree_cur *cur, + struct xfs_rmap_irec *rec, + void *priv) +{ + struct xfs_scrub_refcnt_check *refchk = priv; + struct xfs_scrub_refcnt_frag *frag; + xfs_agblock_t rm_last; + xfs_agblock_t rc_last; + int error = 0; + + if (xfs_scrub_should_terminate(refchk->sc, &error)) + return error; + + rm_last = rec->rm_startblock + rec->rm_blockcount - 1; + rc_last = refchk->bno + refchk->len - 1; + + /* Confirm that a single-owner refc extent is a CoW stage. */ + if (refchk->refcount == 1 && rec->rm_owner != XFS_RMAP_OWN_COW) { + xfs_scrub_btree_xref_set_corrupt(refchk->sc, cur, 0); + return 0; + } + + if (rec->rm_startblock <= refchk->bno && rm_last >= rc_last) { + /* + * The rmap overlaps the refcount record, so we can confirm + * one refcount owner seen. + */ + refchk->seen++; + } else { + /* + * This rmap covers only part of the refcount record, so + * save the fragment for later processing. If the rmapbt + * is healthy each rmap_irec we see will be in agbno order + * so we don't need insertion sort here. + */ + frag = kmem_alloc(sizeof(struct xfs_scrub_refcnt_frag), + KM_MAYFAIL | KM_NOFS); + if (!frag) + return -ENOMEM; + memcpy(&frag->rm, rec, sizeof(frag->rm)); + list_add_tail(&frag->list, &refchk->fragments); + } + + return 0; +} + +/* + * Given a bunch of rmap fragments, iterate through them, keeping + * a running tally of the refcount. If this ever deviates from + * what we expect (which is the refcountbt's refcount minus the + * number of extents that totally covered the refcountbt extent), + * we have a refcountbt error. + */ +STATIC void +xfs_scrub_refcountbt_process_rmap_fragments( + struct xfs_scrub_refcnt_check *refchk) +{ + struct list_head worklist; + struct xfs_scrub_refcnt_frag *frag; + struct xfs_scrub_refcnt_frag *n; + xfs_agblock_t bno; + xfs_agblock_t rbno; + xfs_agblock_t next_rbno; + xfs_nlink_t nr; + xfs_nlink_t target_nr; + + target_nr = refchk->refcount - refchk->seen; + if (target_nr == 0) + return; + + /* + * There are (refchk->rc.rc_refcount - refchk->nr refcount) + * references we haven't found yet. Pull that many off the + * fragment list and figure out where the smallest rmap ends + * (and therefore the next rmap should start). All the rmaps + * we pull off should start at or before the beginning of the + * refcount record's range. + */ + INIT_LIST_HEAD(&worklist); + rbno = NULLAGBLOCK; + nr = 1; + + /* Make sure the fragments actually /are/ in agbno order. */ + bno = 0; + list_for_each_entry(frag, &refchk->fragments, list) { + if (frag->rm.rm_startblock < bno) + goto done; + bno = frag->rm.rm_startblock; + } + + /* + * Find all the rmaps that start at or before the refc extent, + * and put them on the worklist. + */ + list_for_each_entry_safe(frag, n, &refchk->fragments, list) { + if (frag->rm.rm_startblock > refchk->bno) + goto done; + bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; + if (bno < rbno) + rbno = bno; + list_move_tail(&frag->list, &worklist); + if (nr == target_nr) + break; + nr++; + } + + /* + * We should have found exactly $target_nr rmap fragments starting + * at or before the refcount extent. + */ + if (nr != target_nr) + goto done; + + while (!list_empty(&refchk->fragments)) { + /* Discard any fragments ending at rbno from the worklist. */ + nr = 0; + next_rbno = NULLAGBLOCK; + list_for_each_entry_safe(frag, n, &worklist, list) { + bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; + if (bno != rbno) { + if (bno < next_rbno) + next_rbno = bno; + continue; + } + list_del(&frag->list); + kmem_free(frag); + nr++; + } + + /* Try to add nr rmaps starting at rbno to the worklist. */ + list_for_each_entry_safe(frag, n, &refchk->fragments, list) { + bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; + if (frag->rm.rm_startblock != rbno) + goto done; + list_move_tail(&frag->list, &worklist); + if (next_rbno > bno) + next_rbno = bno; + nr--; + if (nr == 0) + break; + } + + /* + * If we get here and nr > 0, this means that we added fewer + * items to the worklist than we discarded because the fragment + * list ran out of items. Therefore, we cannot maintain the + * required refcount. Something is wrong, so we're done. + */ + if (nr) + goto done; + + rbno = next_rbno; + } + + /* + * Make sure the last extent we processed ends at or beyond + * the end of the refcount extent. + */ + if (rbno < refchk->bno + refchk->len) + goto done; + + /* Actually record us having seen the remaining refcount. */ + refchk->seen = refchk->refcount; +done: + /* Delete fragments and work list. */ + list_for_each_entry_safe(frag, n, &worklist, list) { + list_del(&frag->list); + kmem_free(frag); + } + list_for_each_entry_safe(frag, n, &refchk->fragments, list) { + list_del(&frag->list); + kmem_free(frag); + } +} + +/* Use the rmap entries covering this extent to verify the refcount. */ +STATIC void +xfs_scrub_refcountbt_xref_rmap( + struct xfs_scrub_context *sc, + xfs_agblock_t bno, + xfs_extlen_t len, + xfs_nlink_t refcount) +{ + struct xfs_scrub_refcnt_check refchk = { + .sc = sc, + .bno = bno, + .len = len, + .refcount = refcount, + .seen = 0, + }; + struct xfs_rmap_irec low; + struct xfs_rmap_irec high; + struct xfs_scrub_refcnt_frag *frag; + struct xfs_scrub_refcnt_frag *n; + int error; + + if (!sc->sa.rmap_cur) + return; + + /* Cross-reference with the rmapbt to confirm the refcount. */ + memset(&low, 0, sizeof(low)); + low.rm_startblock = bno; + memset(&high, 0xFF, sizeof(high)); + high.rm_startblock = bno + len - 1; + + INIT_LIST_HEAD(&refchk.fragments); + error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high, + &xfs_scrub_refcountbt_rmap_check, &refchk); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + goto out_free; + + xfs_scrub_refcountbt_process_rmap_fragments(&refchk); + if (refcount != refchk.seen) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); + +out_free: + list_for_each_entry_safe(frag, n, &refchk.fragments, list) { + list_del(&frag->list); + kmem_free(frag); + } +} + /* Cross-reference with the other btrees. */ STATIC void xfs_scrub_refcountbt_xref( @@ -63,6 +348,7 @@ xfs_scrub_refcountbt_xref( xfs_scrub_xref_is_used_space(sc, agbno, len); xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len); + xfs_scrub_refcountbt_xref_rmap(sc, agbno, len, refcount); } /* Scrub a refcountbt record. */ @@ -72,6 +358,7 @@ xfs_scrub_refcountbt_rec( union xfs_btree_rec *rec) { struct xfs_mount *mp = bs->cur->bc_mp; + xfs_agblock_t *cow_blocks = bs->private; xfs_agnumber_t agno = bs->cur->bc_private.a.agno; xfs_agblock_t bno; xfs_extlen_t len; @@ -87,6 +374,8 @@ xfs_scrub_refcountbt_rec( has_cowflag = (bno & XFS_REFC_COW_START); if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag)) xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + if (has_cowflag) + (*cow_blocks) += len; /* Check the extent. */ bno &= ~XFS_REFC_COW_START; @@ -103,14 +392,57 @@ xfs_scrub_refcountbt_rec( return error; } +/* Make sure we have as many refc blocks as the rmap says. */ +STATIC void +xfs_scrub_refcount_xref_rmap( + struct xfs_scrub_context *sc, + struct xfs_owner_info *oinfo, + xfs_filblks_t cow_blocks) +{ + xfs_extlen_t refcbt_blocks = 0; + xfs_filblks_t blocks; + int error; + + if (!sc->sa.rmap_cur) + return; + + /* Check that we saw as many refcbt blocks as the rmap knows about. */ + error = xfs_btree_count_blocks(sc->sa.refc_cur, &refcbt_blocks); + if (!xfs_scrub_btree_process_error(sc, sc->sa.refc_cur, 0, &error)) + return; + error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo, + &blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + return; + if (blocks != refcbt_blocks) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); + + /* Check that we saw as many cow blocks as the rmap knows about. */ + xfs_rmap_ag_owner(oinfo, XFS_RMAP_OWN_COW); + error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo, + &blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + return; + if (blocks != cow_blocks) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); +} + /* Scrub the refcount btree for some AG. */ int xfs_scrub_refcountbt( struct xfs_scrub_context *sc) { struct xfs_owner_info oinfo; + xfs_agblock_t cow_blocks = 0; + int error; xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC); - return xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec, - &oinfo, NULL); + error = xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec, + &oinfo, &cow_blocks); + if (error) + return error; + + xfs_scrub_refcount_xref_rmap(sc, &oinfo, cow_blocks); + + return 0; } -- cgit v1.2.3 From f6d5fc21fdc7137848a469e344f78fcc8b5c10ab Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Jan 2018 18:53:09 -0800 Subject: xfs: cross-reference refcount btree during scrub During metadata btree scrub, we should cross-reference with the reference counts. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/agheader.c | 25 ++++++++++++++++++ fs/xfs/scrub/alloc.c | 1 + fs/xfs/scrub/bmap.c | 15 +++++++++++ fs/xfs/scrub/ialloc.c | 1 + fs/xfs/scrub/inode.c | 50 +++++++++++++++++++++++++----------- fs/xfs/scrub/refcount.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/rmap.c | 37 +++++++++++++++++++++++++++ fs/xfs/scrub/scrub.h | 4 +++ 8 files changed, 186 insertions(+), 14 deletions(-) diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index 1d109d5744aa..20a3bebdee06 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -127,6 +127,7 @@ xfs_scrub_superblock_xref( xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); + xfs_scrub_xref_is_not_shared(sc, agbno, 1); /* scrub teardown will take care of sc->sa for us */ } @@ -537,6 +538,25 @@ xfs_scrub_agf_xref_btreeblks( xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); } +/* Check agf_refcount_blocks against tree size */ +static inline void +xfs_scrub_agf_xref_refcblks( + struct xfs_scrub_context *sc) +{ + struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); + xfs_agblock_t blocks; + int error; + + if (!sc->sa.refc_cur) + return; + + error = xfs_btree_count_blocks(sc->sa.refc_cur, &blocks); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) + return; + if (blocks != be32_to_cpu(agf->agf_refcount_blocks)) + xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); +} + /* Cross-reference with the other btrees. */ STATIC void xfs_scrub_agf_xref( @@ -563,6 +583,8 @@ xfs_scrub_agf_xref( xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); xfs_scrub_agf_xref_btreeblks(sc); + xfs_scrub_xref_is_not_shared(sc, agbno, 1); + xfs_scrub_agf_xref_refcblks(sc); /* scrub teardown will take care of sc->sa for us */ } @@ -672,6 +694,7 @@ xfs_scrub_agfl_block_xref( xfs_scrub_xref_is_used_space(sc, agbno, 1); xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); xfs_scrub_xref_is_owned_by(sc, agbno, 1, oinfo); + xfs_scrub_xref_is_not_shared(sc, agbno, 1); } /* Scrub an AGFL block. */ @@ -730,6 +753,7 @@ xfs_scrub_agfl_xref( xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); + xfs_scrub_xref_is_not_shared(sc, agbno, 1); /* * Scrub teardown will take care of sc->sa for us. Leave sc->sa @@ -850,6 +874,7 @@ xfs_scrub_agi_xref( xfs_scrub_agi_xref_icounts(sc); xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); + xfs_scrub_xref_is_not_shared(sc, agbno, 1); /* scrub teardown will take care of sc->sa for us */ } diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c index 3faa4371079e..517c079d3f68 100644 --- a/fs/xfs/scrub/alloc.c +++ b/fs/xfs/scrub/alloc.c @@ -106,6 +106,7 @@ xfs_scrub_allocbt_xref( xfs_scrub_allocbt_xref_other(sc, agbno, len); xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len); xfs_scrub_xref_has_no_owner(sc, agbno, len); + xfs_scrub_xref_is_not_shared(sc, agbno, len); } /* Scrub a bnobt/cntbt record. */ diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 933e0b8be34f..7b2cf8fd1ce0 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -37,6 +37,7 @@ #include "xfs_bmap_util.h" #include "xfs_bmap_btree.h" #include "xfs_rmap.h" +#include "xfs_refcount.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" @@ -273,6 +274,20 @@ xfs_scrub_bmap_extent_xref( xfs_scrub_xref_is_used_space(info->sc, agbno, len); xfs_scrub_xref_is_not_inode_chunk(info->sc, agbno, len); xfs_scrub_bmap_xref_rmap(info, irec, agbno); + switch (info->whichfork) { + case XFS_DATA_FORK: + if (xfs_is_reflink_inode(info->sc->ip)) + break; + /* fall through */ + case XFS_ATTR_FORK: + xfs_scrub_xref_is_not_shared(info->sc, agbno, + irec->br_blockcount); + break; + case XFS_COW_FORK: + xfs_scrub_xref_is_cow_staging(info->sc, agbno, + irec->br_blockcount); + break; + } xfs_scrub_ag_free(info->sc, &info->sc->sa); } diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c index 1a16f7867e31..21c850abbafd 100644 --- a/fs/xfs/scrub/ialloc.c +++ b/fs/xfs/scrub/ialloc.c @@ -105,6 +105,7 @@ xfs_scrub_iallocbt_chunk_xref( xfs_scrub_iallocbt_chunk_xref_other(sc, irec, agino); xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); xfs_scrub_xref_is_owned_by(sc, agbno, len, &oinfo); + xfs_scrub_xref_is_not_shared(sc, agbno, len); } /* Is this chunk worth checking? */ diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 53fcd215c0f5..92752eef014c 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -652,22 +652,50 @@ xfs_scrub_inode_xref( xfs_scrub_inode_xref_finobt(sc, ino); xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); + xfs_scrub_xref_is_not_shared(sc, agbno, 1); xfs_scrub_ag_free(sc, &sc->sa); } +/* + * If the reflink iflag disagrees with a scan for shared data fork extents, + * either flag an error (shared extents w/ no flag) or a preen (flag set w/o + * any shared extents). We already checked for reflink iflag set on a non + * reflink filesystem. + */ +static void +xfs_scrub_inode_check_reflink_iflag( + struct xfs_scrub_context *sc, + xfs_ino_t ino, + struct xfs_buf *bp) +{ + struct xfs_mount *mp = sc->mp; + bool has_shared; + int error; + + if (!xfs_sb_version_hasreflink(&mp->m_sb)) + return; + + error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, + &has_shared); + if (!xfs_scrub_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino), + XFS_INO_TO_AGBNO(mp, ino), &error)) + return; + if (xfs_is_reflink_inode(sc->ip) && !has_shared) + xfs_scrub_ino_set_preen(sc, ino, bp); + else if (!xfs_is_reflink_inode(sc->ip) && has_shared) + xfs_scrub_ino_set_corrupt(sc, ino, bp); +} + /* Scrub an inode. */ int xfs_scrub_inode( struct xfs_scrub_context *sc) { struct xfs_dinode di; - struct xfs_mount *mp = sc->mp; struct xfs_buf *bp = NULL; struct xfs_dinode *dip; xfs_ino_t ino; - - bool has_shared; int error = 0; /* Did we get the in-core inode, or are we doing this manually? */ @@ -692,18 +720,12 @@ xfs_scrub_inode( goto out; /* - * Does this inode have the reflink flag set but no shared extents? - * Set the preening flag if this is the case. + * Look for discrepancies between file's data blocks and the reflink + * iflag. We already checked the iflag against the file mode when + * we scrubbed the dinode. */ - if (xfs_is_reflink_inode(sc->ip)) { - error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, - &has_shared); - if (!xfs_scrub_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino), - XFS_INO_TO_AGBNO(mp, ino), &error)) - goto out; - if (!has_shared) - xfs_scrub_ino_set_preen(sc, ino, bp); - } + if (S_ISREG(VFS_I(sc->ip)->i_mode)) + xfs_scrub_inode_check_reflink_iflag(sc, ino, bp); xfs_scrub_inode_xref(sc, ino, dip); out: diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index 0c02f2fba394..400f1561cd3d 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -31,6 +31,7 @@ #include "xfs_sb.h" #include "xfs_alloc.h" #include "xfs_rmap.h" +#include "xfs_refcount.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" @@ -446,3 +447,69 @@ xfs_scrub_refcountbt( return 0; } + +/* xref check that a cow staging extent is marked in the refcountbt. */ +void +xfs_scrub_xref_is_cow_staging( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + xfs_extlen_t len) +{ + struct xfs_refcount_irec rc; + bool has_cowflag; + int has_refcount; + int error; + + if (!sc->sa.refc_cur) + return; + + /* Find the CoW staging extent. */ + error = xfs_refcount_lookup_le(sc->sa.refc_cur, + agbno + XFS_REFC_COW_START, &has_refcount); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) + return; + if (!has_refcount) { + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); + return; + } + + error = xfs_refcount_get_rec(sc->sa.refc_cur, &rc, &has_refcount); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) + return; + if (!has_refcount) { + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); + return; + } + + /* CoW flag must be set, refcount must be 1. */ + has_cowflag = (rc.rc_startblock & XFS_REFC_COW_START); + if (!has_cowflag || rc.rc_refcount != 1) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); + + /* Must be at least as long as what was passed in */ + if (rc.rc_blockcount < len) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); +} + +/* + * xref check that the extent is not shared. Only file data blocks + * can have multiple owners. + */ +void +xfs_scrub_xref_is_not_shared( + struct xfs_scrub_context *sc, + xfs_agblock_t agbno, + xfs_extlen_t len) +{ + bool shared; + int error; + + if (!sc->sa.refc_cur) + return; + + error = xfs_refcount_has_record(sc->sa.refc_cur, agbno, len, &shared); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) + return; + if (shared) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); +} diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c index 3ee50610c48a..8f2a7c3ff455 100644 --- a/fs/xfs/scrub/rmap.c +++ b/fs/xfs/scrub/rmap.c @@ -32,6 +32,7 @@ #include "xfs_alloc.h" #include "xfs_ialloc.h" #include "xfs_rmap.h" +#include "xfs_refcount.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" @@ -51,6 +52,37 @@ xfs_scrub_setup_ag_rmapbt( /* Reverse-mapping scrubber. */ +/* Cross-reference a rmap against the refcount btree. */ +STATIC void +xfs_scrub_rmapbt_xref_refc( + struct xfs_scrub_context *sc, + struct xfs_rmap_irec *irec) +{ + xfs_agblock_t fbno; + xfs_extlen_t flen; + bool non_inode; + bool is_bmbt; + bool is_attr; + bool is_unwritten; + int error; + + if (!sc->sa.refc_cur) + return; + + non_inode = XFS_RMAP_NON_INODE_OWNER(irec->rm_owner); + is_bmbt = irec->rm_flags & XFS_RMAP_BMBT_BLOCK; + is_attr = irec->rm_flags & XFS_RMAP_ATTR_FORK; + is_unwritten = irec->rm_flags & XFS_RMAP_UNWRITTEN; + + /* If this is shared, must be a data fork extent. */ + error = xfs_refcount_find_shared(sc->sa.refc_cur, irec->rm_startblock, + irec->rm_blockcount, &fbno, &flen, false); + if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) + return; + if (flen != 0 && (non_inode || is_attr || is_bmbt || is_unwritten)) + xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); +} + /* Cross-reference with the other btrees. */ STATIC void xfs_scrub_rmapbt_xref( @@ -68,6 +100,11 @@ xfs_scrub_rmapbt_xref( xfs_scrub_xref_is_inode_chunk(sc, agbno, len); else xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len); + if (irec->rm_owner == XFS_RMAP_OWN_COW) + xfs_scrub_xref_is_cow_staging(sc, irec->rm_startblock, + irec->rm_blockcount); + else + xfs_scrub_rmapbt_xref_refc(sc, irec); } /* Scrub an rmapbt record. */ diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index 8fcf491c8288..c8f8b42cbdb3 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -138,5 +138,9 @@ void xfs_scrub_xref_is_not_owned_by(struct xfs_scrub_context *sc, struct xfs_owner_info *oinfo); void xfs_scrub_xref_has_no_owner(struct xfs_scrub_context *sc, xfs_agblock_t agbno, xfs_extlen_t len); +void xfs_scrub_xref_is_cow_staging(struct xfs_scrub_context *sc, + xfs_agblock_t bno, xfs_extlen_t len); +void xfs_scrub_xref_is_not_shared(struct xfs_scrub_context *sc, + xfs_agblock_t bno, xfs_extlen_t len); #endif /* __XFS_SCRUB_SCRUB_H__ */ -- cgit v1.2.3 From 46d9bfb5e706493777b9dfed666cd8967f69e6fd Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Jan 2018 18:53:10 -0800 Subject: xfs: cross-reference the realtime bitmap While we're scrubbing various btrees, cross-reference the records with the other metadata. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_rtbitmap.c | 21 +++++++++++++++++++++ fs/xfs/scrub/bmap.c | 3 +++ fs/xfs/scrub/rtbitmap.c | 23 +++++++++++++++++++++++ fs/xfs/scrub/scrub.h | 6 ++++++ fs/xfs/xfs_rtalloc.h | 4 ++++ 5 files changed, 57 insertions(+) diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 3fb29a5ea915..106be2d0bb88 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -1097,3 +1097,24 @@ xfs_verify_rtbno( { return rtbno < mp->m_sb.sb_rblocks; } + +/* Is the given extent all free? */ +int +xfs_rtalloc_extent_is_free( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_rtblock_t start, + xfs_extlen_t len, + bool *is_free) +{ + xfs_rtblock_t end; + int matches; + int error; + + error = xfs_rtcheck_range(mp, tp, start, len, 1, &end, &matches); + if (error) + return error; + + *is_free = matches; + return 0; +} diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 7b2cf8fd1ce0..2bb3e0c4b4ef 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -243,6 +243,9 @@ xfs_scrub_bmap_rt_extent_xref( { if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; + + xfs_scrub_xref_is_used_rt_space(info->sc, irec->br_startblock, + irec->br_blockcount); } /* Cross-reference a single datadev extent record. */ diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c index 6860d5d92515..26390991369a 100644 --- a/fs/xfs/scrub/rtbitmap.c +++ b/fs/xfs/scrub/rtbitmap.c @@ -98,3 +98,26 @@ xfs_scrub_rtsummary( /* XXX: implement this some day */ return -ENOENT; } + + +/* xref check that the extent is not free in the rtbitmap */ +void +xfs_scrub_xref_is_used_rt_space( + struct xfs_scrub_context *sc, + xfs_rtblock_t fsbno, + xfs_extlen_t len) +{ + bool is_free; + int error; + + xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); + error = xfs_rtalloc_extent_is_free(sc->mp, sc->tp, fsbno, len, + &is_free); + if (!xfs_scrub_should_check_xref(sc, &error, NULL)) + goto out_unlock; + if (is_free) + xfs_scrub_ino_xref_set_corrupt(sc, sc->mp->m_rbmip->i_ino, + NULL); +out_unlock: + xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); +} diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index c8f8b42cbdb3..0d92af86f67a 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -142,5 +142,11 @@ void xfs_scrub_xref_is_cow_staging(struct xfs_scrub_context *sc, xfs_agblock_t bno, xfs_extlen_t len); void xfs_scrub_xref_is_not_shared(struct xfs_scrub_context *sc, xfs_agblock_t bno, xfs_extlen_t len); +#ifdef CONFIG_XFS_RT +void xfs_scrub_xref_is_used_rt_space(struct xfs_scrub_context *sc, + xfs_rtblock_t rtbno, xfs_extlen_t len); +#else +# define xfs_scrub_xref_is_used_rt_space(sc, rtbno, len) do { } while (0) +#endif #endif /* __XFS_SCRUB_SCRUB_H__ */ diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index 3f30f846d7f2..dfee3c991155 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h @@ -139,6 +139,9 @@ int xfs_rtalloc_query_all(struct xfs_trans *tp, xfs_rtalloc_query_range_fn fn, void *priv); bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno); +int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_rtblock_t start, xfs_extlen_t len, + bool *is_free); #else # define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (ENOSYS) # define xfs_rtfree_extent(t,b,l) (ENOSYS) @@ -148,6 +151,7 @@ bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno); # define xfs_rtalloc_query_all(t,f,p) (ENOSYS) # define xfs_rtbuf_get(m,t,b,i,p) (ENOSYS) # define xfs_verify_rtbno(m, r) (false) +# define xfs_rtalloc_extent_is_free(m,t,s,l,i) (ENOSYS) static inline int /* error */ xfs_rtmount_init( xfs_mount_t *mp) /* file system mount structure */ -- cgit v1.2.3 From 561f648ab2bdbb43f2ecc5074854c11537f2aa6c Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Jan 2018 18:53:10 -0800 Subject: xfs: cross-reference the block mappings when possible Use an inode's block mappings to cross-reference inode block counters. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/inode.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 92752eef014c..21297bef8df1 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -37,6 +37,8 @@ #include "xfs_da_format.h" #include "xfs_reflink.h" #include "xfs_rmap.h" +#include "xfs_bmap.h" +#include "xfs_bmap_util.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" @@ -626,6 +628,37 @@ xfs_scrub_inode_xref_finobt( xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0); } +/* Cross reference the inode fields with the forks. */ +STATIC void +xfs_scrub_inode_xref_bmap( + struct xfs_scrub_context *sc, + struct xfs_dinode *dip) +{ + xfs_extnum_t nextents; + xfs_filblks_t count; + xfs_filblks_t acount; + int error; + + /* Walk all the extents to check nextents/naextents/nblocks. */ + error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK, + &nextents, &count); + if (!xfs_scrub_should_check_xref(sc, &error, NULL)) + return; + if (nextents < be32_to_cpu(dip->di_nextents)) + xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL); + + error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK, + &nextents, &acount); + if (!xfs_scrub_should_check_xref(sc, &error, NULL)) + return; + if (nextents != be16_to_cpu(dip->di_anextents)) + xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL); + + /* Check nblocks against the inode. */ + if (count + acount != be64_to_cpu(dip->di_nblocks)) + xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL); +} + /* Cross-reference with the other btrees. */ STATIC void xfs_scrub_inode_xref( @@ -653,6 +686,7 @@ xfs_scrub_inode_xref( xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); xfs_scrub_xref_is_not_shared(sc, agbno, 1); + xfs_scrub_inode_xref_bmap(sc, dip); xfs_scrub_ag_free(sc, &sc->sa); } -- cgit v1.2.3 From cf1b0b8b1a43102cdc0189d76d1c05915c4e16a6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Jan 2018 18:53:11 -0800 Subject: xfs: scrub in-core metadata Whenever we load a buffer, explicitly re-call the structure verifier to ensure that memory isn't corrupting things. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/agheader.c | 3 +++ fs/xfs/scrub/btree.c | 4 ++++ fs/xfs/scrub/common.c | 23 +++++++++++++++++++++++ fs/xfs/scrub/common.h | 1 + fs/xfs/scrub/dabtree.c | 22 ++++++++++++++++++++++ fs/xfs/scrub/dir.c | 4 ++++ 6 files changed, 57 insertions(+) diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index 20a3bebdee06..fd975524f460 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -611,6 +611,7 @@ xfs_scrub_agf( &sc->sa.agf_bp, &sc->sa.agfl_bp); if (!xfs_scrub_process_error(sc, agno, XFS_AGF_BLOCK(sc->mp), &error)) goto out; + xfs_scrub_buffer_recheck(sc, sc->sa.agf_bp); agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); @@ -780,6 +781,7 @@ xfs_scrub_agfl( goto out; if (!sc->sa.agf_bp) return -EFSCORRUPTED; + xfs_scrub_buffer_recheck(sc, sc->sa.agfl_bp); xfs_scrub_agfl_xref(sc); @@ -902,6 +904,7 @@ xfs_scrub_agi( &sc->sa.agf_bp, &sc->sa.agfl_bp); if (!xfs_scrub_process_error(sc, agno, XFS_AGI_BLOCK(sc->mp), &error)) goto out; + xfs_scrub_buffer_recheck(sc, sc->sa.agi_bp); agi = XFS_BUF_TO_AGI(sc->sa.agi_bp); diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index 0589d4efbf6b..54218168c8f9 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -314,6 +314,8 @@ xfs_scrub_btree_block_check_sibling( pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock); if (!xfs_scrub_btree_ptr_ok(bs, level + 1, pp)) goto out; + if (pbp) + xfs_scrub_buffer_recheck(bs->sc, pbp); if (xfs_btree_diff_two_ptrs(cur, pp, sibling)) xfs_scrub_btree_set_corrupt(bs->sc, cur, level); @@ -486,6 +488,8 @@ xfs_scrub_btree_get_block( xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level); return 0; } + if (*pbp) + xfs_scrub_buffer_recheck(bs->sc, *pbp); /* * Check the block's owner; this function absorbs error codes diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index f5df8f2859d7..8033ab9d8f47 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -756,3 +756,26 @@ xfs_scrub_should_check_xref( *error = 0; return false; } + +/* Run the structure verifiers on in-memory buffers to detect bad memory. */ +void +xfs_scrub_buffer_recheck( + struct xfs_scrub_context *sc, + struct xfs_buf *bp) +{ + xfs_failaddr_t fa; + + if (bp->b_ops == NULL) { + xfs_scrub_block_set_corrupt(sc, bp); + return; + } + if (bp->b_ops->verify_struct == NULL) { + xfs_scrub_set_incomplete(sc); + return; + } + fa = bp->b_ops->verify_struct(bp); + if (!fa) + return; + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + trace_xfs_scrub_block_error(sc, bp->b_bn, fa); +} diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index bf88a677f6e7..ddb65d22c76a 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -158,5 +158,6 @@ int xfs_scrub_setup_ag_btree(struct xfs_scrub_context *sc, int xfs_scrub_get_inode(struct xfs_scrub_context *sc, struct xfs_inode *ip_in); int xfs_scrub_setup_inode_contents(struct xfs_scrub_context *sc, struct xfs_inode *ip, unsigned int resblks); +void xfs_scrub_buffer_recheck(struct xfs_scrub_context *sc, struct xfs_buf *bp); #endif /* __XFS_SCRUB_COMMON_H__ */ diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c index d94edd93cba8..bffdb7dc09bf 100644 --- a/fs/xfs/scrub/dabtree.c +++ b/fs/xfs/scrub/dabtree.c @@ -233,11 +233,28 @@ xfs_scrub_da_btree_write_verify( return; } } +static void * +xfs_scrub_da_btree_verify( + struct xfs_buf *bp) +{ + struct xfs_da_blkinfo *info = bp->b_addr; + + switch (be16_to_cpu(info->magic)) { + case XFS_DIR2_LEAF1_MAGIC: + case XFS_DIR3_LEAF1_MAGIC: + bp->b_ops = &xfs_dir3_leaf1_buf_ops; + return bp->b_ops->verify_struct(bp); + default: + bp->b_ops = &xfs_da3_node_buf_ops; + return bp->b_ops->verify_struct(bp); + } +} static const struct xfs_buf_ops xfs_scrub_da_btree_buf_ops = { .name = "xfs_scrub_da_btree", .verify_read = xfs_scrub_da_btree_read_verify, .verify_write = xfs_scrub_da_btree_write_verify, + .verify_struct = xfs_scrub_da_btree_verify, }; /* Check a block's sibling. */ @@ -276,6 +293,9 @@ xfs_scrub_da_btree_block_check_sibling( xfs_scrub_da_set_corrupt(ds, level); return error; } + if (ds->state->altpath.blk[level].bp) + xfs_scrub_buffer_recheck(ds->sc, + ds->state->altpath.blk[level].bp); /* Compare upper level pointer to sibling pointer. */ if (ds->state->altpath.blk[level].blkno != sibling) @@ -358,6 +378,8 @@ xfs_scrub_da_btree_block( &xfs_scrub_da_btree_buf_ops); if (!xfs_scrub_da_process_error(ds, level, &error)) goto out_nobuf; + if (blk->bp) + xfs_scrub_buffer_recheck(ds->sc, blk->bp); /* * We didn't find a dir btree root block, which means that diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index e75826bb6516..f5a0d179eac0 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -237,6 +237,7 @@ xfs_scrub_dir_rec( xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); goto out; } + xfs_scrub_buffer_recheck(ds->sc, bp); /* Retrieve the entry, sanity check it, and compare hashes. */ dent = (struct xfs_dir2_data_entry *)(((char *)bp->b_addr) + off); @@ -324,6 +325,7 @@ xfs_scrub_directory_data_bestfree( } if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) goto out; + xfs_scrub_buffer_recheck(sc, bp); /* XXX: Check xfs_dir3_data_hdr.pad is zero once we start setting it. */ @@ -474,6 +476,7 @@ xfs_scrub_directory_leaf1_bestfree( error = xfs_dir3_leaf_read(sc->tp, sc->ip, lblk, -1, &bp); if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) goto out; + xfs_scrub_buffer_recheck(sc, bp); leaf = bp->b_addr; d_ops->leaf_hdr_from_disk(&leafhdr, leaf); @@ -559,6 +562,7 @@ xfs_scrub_directory_free_bestfree( error = xfs_dir2_free_read(sc->tp, sc->ip, lblk, &bp); if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) goto out; + xfs_scrub_buffer_recheck(sc, bp); if (xfs_sb_version_hascrc(&sc->mp->m_sb)) { struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; -- cgit v1.2.3 From 638a7174894c8f2195430990b614615ef16e3912 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Jan 2018 18:53:57 -0800 Subject: xfs: don't iunlock unlocked inodes Don't iunlock an unlocked inode, which can happen if the parent pointer scrubber bails out with sc->ip unlocked while trying to grab the parent directory inode. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/scrub/scrub.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 0ed2a123cbb8..26c75967a072 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -159,7 +159,8 @@ xfs_scrub_teardown( sc->tp = NULL; } if (sc->ip) { - xfs_iunlock(sc->ip, sc->ilock_flags); + if (sc->ilock_flags) + xfs_iunlock(sc->ip, sc->ilock_flags); if (sc->ip != ip_in && !xfs_internal_inum(sc->mp, sc->ip->i_ino)) iput(VFS_I(sc->ip)); -- cgit v1.2.3 From ce92d29ddf9908d397895c46b7c78e9db8df414d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Jan 2018 18:54:12 -0800 Subject: xfs: directory scrubber must walk through data block to offset In xfs_scrub_dir_rec, we must walk through the directory block entries to arrive at the offset given by the hash structure. If we blindly trust the hash address, we can end up midway into a directory entry and stray outside the block. Found by lastbit fuzzing lents[3].address in xfs/390 with KASAN enabled. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_dir2.h | 2 ++ fs/xfs/libxfs/xfs_dir2_data.c | 43 ++++++++++++++++++++++++------------------- fs/xfs/libxfs/xfs_dir2_sf.c | 4 +--- fs/xfs/scrub/dir.c | 38 ++++++++++++++++++++++++++++++-------- fs/xfs/xfs_dir2_readdir.c | 4 +--- 5 files changed, 58 insertions(+), 33 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index 1a8f2cf977ca..388d67c5c903 100644 --- a/fs/xfs/libxfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h @@ -340,5 +340,7 @@ xfs_dir2_leaf_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_leaf *lp) #define XFS_READDIR_BUFSIZE (32768) unsigned char xfs_dir3_get_dtype(struct xfs_mount *mp, uint8_t filetype); +void *xfs_dir3_data_endp(struct xfs_da_geometry *geo, + struct xfs_dir2_data_hdr *hdr); #endif /* __XFS_DIR2_H__ */ diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index 853d9abdd545..a1e30c751c00 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -89,7 +89,6 @@ __xfs_dir3_data_check( case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): btp = xfs_dir2_block_tail_p(geo, hdr); lep = xfs_dir2_block_leaf_p(btp); - endp = (char *)lep; /* * The number of leaf entries is limited by the size of the @@ -104,11 +103,13 @@ __xfs_dir3_data_check( break; case cpu_to_be32(XFS_DIR3_DATA_MAGIC): case cpu_to_be32(XFS_DIR2_DATA_MAGIC): - endp = (char *)hdr + geo->blksize; break; default: return __this_address; } + endp = xfs_dir3_data_endp(geo, hdr); + if (!endp) + return __this_address; /* * Account for zero bestfree entries. @@ -546,7 +547,6 @@ xfs_dir2_data_freescan_int( struct xfs_dir2_data_hdr *hdr, int *loghead) { - xfs_dir2_block_tail_t *btp; /* block tail */ xfs_dir2_data_entry_t *dep; /* active data entry */ xfs_dir2_data_unused_t *dup; /* unused data entry */ struct xfs_dir2_data_free *bf; @@ -568,12 +568,7 @@ xfs_dir2_data_freescan_int( * Set up pointers. */ p = (char *)ops->data_entry_p(hdr); - if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { - btp = xfs_dir2_block_tail_p(geo, hdr); - endp = (char *)xfs_dir2_block_leaf_p(btp); - } else - endp = (char *)hdr + geo->blksize; + endp = xfs_dir3_data_endp(geo, hdr); /* * Loop over the block's entries. */ @@ -786,17 +781,9 @@ xfs_dir2_data_make_free( /* * Figure out where the end of the data area is. */ - if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)) - endptr = (char *)hdr + args->geo->blksize; - else { - xfs_dir2_block_tail_t *btp; /* block tail */ + endptr = xfs_dir3_data_endp(args->geo, hdr); + ASSERT(endptr != NULL); - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); - btp = xfs_dir2_block_tail_p(args->geo, hdr); - endptr = (char *)xfs_dir2_block_leaf_p(btp); - } /* * If this isn't the start of the block, then back up to * the previous entry and see if it's free. @@ -1098,3 +1085,21 @@ xfs_dir2_data_use_free( } *needscanp = needscan; } + +/* Find the end of the entry data in a data/block format dir block. */ +void * +xfs_dir3_data_endp( + struct xfs_da_geometry *geo, + struct xfs_dir2_data_hdr *hdr) +{ + switch (hdr->magic) { + case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): + case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): + return xfs_dir2_block_leaf_p(xfs_dir2_block_tail_p(geo, hdr)); + case cpu_to_be32(XFS_DIR3_DATA_MAGIC): + case cpu_to_be32(XFS_DIR2_DATA_MAGIC): + return (char *)hdr + geo->blksize; + default: + return NULL; + } +} diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index 8500fa2a1321..0c75a7f00883 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -156,7 +156,6 @@ xfs_dir2_block_to_sf( xfs_dir2_sf_hdr_t *sfhp) /* shortform directory hdr */ { xfs_dir2_data_hdr_t *hdr; /* block header */ - xfs_dir2_block_tail_t *btp; /* block tail pointer */ xfs_dir2_data_entry_t *dep; /* data entry pointer */ xfs_inode_t *dp; /* incore directory inode */ xfs_dir2_data_unused_t *dup; /* unused data pointer */ @@ -192,9 +191,8 @@ xfs_dir2_block_to_sf( /* * Set up to loop over the block's entries. */ - btp = xfs_dir2_block_tail_p(args->geo, hdr); ptr = (char *)dp->d_ops->data_entry_p(hdr); - endptr = (char *)xfs_dir2_block_leaf_p(btp); + endptr = xfs_dir3_data_endp(args->geo, hdr); sfep = xfs_dir2_sf_firstentry(sfp); /* * Loop over the active and unused entries. diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index f5a0d179eac0..50b6a26b0299 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -200,6 +200,7 @@ xfs_scrub_dir_rec( struct xfs_inode *dp = ds->dargs.dp; struct xfs_dir2_data_entry *dent; struct xfs_buf *bp; + char *p, *endp; xfs_ino_t ino; xfs_dablk_t rec_bno; xfs_dir2_db_t db; @@ -239,8 +240,35 @@ xfs_scrub_dir_rec( } xfs_scrub_buffer_recheck(ds->sc, bp); - /* Retrieve the entry, sanity check it, and compare hashes. */ dent = (struct xfs_dir2_data_entry *)(((char *)bp->b_addr) + off); + + /* Make sure we got a real directory entry. */ + p = (char *)mp->m_dir_inode_ops->data_entry_p(bp->b_addr); + endp = xfs_dir3_data_endp(mp->m_dir_geo, bp->b_addr); + if (!endp) { + xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); + goto out_relse; + } + while (p < endp) { + struct xfs_dir2_data_entry *dep; + struct xfs_dir2_data_unused *dup; + + dup = (struct xfs_dir2_data_unused *)p; + if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { + p += be16_to_cpu(dup->length); + continue; + } + dep = (struct xfs_dir2_data_entry *)p; + if (dep == dent) + break; + p += mp->m_dir_inode_ops->data_entsize(dep->namelen); + } + if (p >= endp) { + xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); + goto out_relse; + } + + /* Retrieve the entry, sanity check it, and compare hashes. */ ino = be64_to_cpu(dent->inumber); hash = be32_to_cpu(ent->hashval); tag = be16_to_cpup(dp->d_ops->data_entry_tag_p(dent)); @@ -363,13 +391,7 @@ xfs_scrub_directory_data_bestfree( /* Make sure the bestfrees are actually the best free spaces. */ ptr = (char *)d_ops->data_entry_p(bp->b_addr); - if (is_block) { - struct xfs_dir2_block_tail *btp; - - btp = xfs_dir2_block_tail_p(mp->m_dir_geo, bp->b_addr); - endptr = (char *)xfs_dir2_block_leaf_p(btp); - } else - endptr = (char *)bp->b_addr + BBTOB(bp->b_length); + endptr = xfs_dir3_data_endp(mp->m_dir_geo, bp->b_addr); /* Iterate the entries, stopping when we hit or go past the end. */ while (ptr < endptr) { diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index 0c58918bc0ad..b6ae3597bfb0 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -152,7 +152,6 @@ xfs_dir2_block_getdents( struct xfs_inode *dp = args->dp; /* incore directory inode */ xfs_dir2_data_hdr_t *hdr; /* block header */ struct xfs_buf *bp; /* buffer for block */ - xfs_dir2_block_tail_t *btp; /* block tail */ xfs_dir2_data_entry_t *dep; /* block data entry */ xfs_dir2_data_unused_t *dup; /* block unused entry */ char *endptr; /* end of the data entries */ @@ -185,9 +184,8 @@ xfs_dir2_block_getdents( /* * Set up values for the loop. */ - btp = xfs_dir2_block_tail_p(geo, hdr); ptr = (char *)dp->d_ops->data_entry_p(hdr); - endptr = (char *)xfs_dir2_block_leaf_p(btp); + endptr = xfs_dir3_data_endp(geo, hdr); /* * Loop over the data portion of the block. -- cgit v1.2.3 From 79a69bf8dc240ebeb105226a8a8540df136bf987 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Jan 2018 18:54:12 -0800 Subject: xfs: attr leaf verifier needs to check for obviously bad count In the attribute leaf verifier, we can check for obviously bad values of firstused and count so that later attempts at lasthash don't run off the end of the memory buffer. Found by ones fuzzing hdr.count in xfs/400 with KASAN. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/libxfs/xfs_attr_leaf.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 6fddce7bbd54..efe5f8acbd45 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -249,12 +249,13 @@ xfs_attr3_leaf_hdr_to_disk( static xfs_failaddr_t xfs_attr3_leaf_verify( - struct xfs_buf *bp) + struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_attr_leafblock *leaf = bp->b_addr; - struct xfs_perag *pag = bp->b_pag; - struct xfs_attr3_icleaf_hdr ichdr; + struct xfs_attr3_icleaf_hdr ichdr; + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_attr_leafblock *leaf = bp->b_addr; + struct xfs_perag *pag = bp->b_pag; + struct xfs_attr_leaf_entry *entries; xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf); @@ -282,6 +283,21 @@ xfs_attr3_leaf_verify( if (pag && pag->pagf_init && ichdr.count == 0) return __this_address; + /* + * firstused is the block offset of the first name info structure. + * Make sure it doesn't go off the block or crash into the header. + */ + if (ichdr.firstused > mp->m_attr_geo->blksize) + return __this_address; + if (ichdr.firstused < xfs_attr3_leaf_hdr_size(leaf)) + return __this_address; + + /* Make sure the entries array doesn't crash into the name info. */ + entries = xfs_attr3_leaf_entryp(bp->b_addr); + if ((char *)&entries[ichdr.count] > + (char *)bp->b_addr + ichdr.firstused) + return __this_address; + /* XXX: need to range check rest of attr header values */ /* XXX: hash order check? */ -- cgit v1.2.3 From 55e45429ce3e4ac9dd2bf4937b1a499a69ccc4ca Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Jan 2018 18:54:13 -0800 Subject: xfs: btree format ifork loader should check for zero numrecs A btree format inode fork with zero records makes no sense, so reject it if we see it, or else we can miscalculate memory allocations. Found by zeroes fuzzing {a,u3}.bmbt.numrecs in xfs/{374,378,412} with KASAN. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/libxfs/xfs_inode_fork.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index c1c1a86e7f47..866d2861c625 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -298,6 +298,7 @@ xfs_iformat_btree( */ if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= XFS_IFORK_MAXEXT(ip, whichfork) || + nrecs == 0 || XFS_BMDR_SPACE_CALC(nrecs) > XFS_DFORK_SIZE(dip, mp, whichfork) || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) || -- cgit v1.2.3 From a5f460b168820335706c0d8cd8c8bc3657f1dd5e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Jan 2018 18:54:13 -0800 Subject: xfs: check that br_blockcount doesn't overflow xfs_bmbt_irec.br_blockcount is declared as xfs_filblks_t, which is an unsigned 64-bit integer. Though the bmbt helpers will never set a value larger than 2^21 (since the underlying on-disk extent record has a length field that is only 21 bits wide), we should be a little defensive about checking that a bmbt record doesn't exceed what we're expecting or overflow into the next AG. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/bmap.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 2bb3e0c4b4ef..d00282130492 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -305,6 +305,7 @@ xfs_scrub_bmap_extent( { struct xfs_mount *mp = info->sc->mp; struct xfs_buf *bp = NULL; + xfs_filblks_t end; int error = 0; if (cur) @@ -332,19 +333,23 @@ xfs_scrub_bmap_extent( irec->br_startoff); /* Make sure the extent points to a valid place. */ + if (irec->br_blockcount > MAXEXTLEN) + xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); if (irec->br_startblock + irec->br_blockcount <= irec->br_startblock) xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); + end = irec->br_startblock + irec->br_blockcount - 1; if (info->is_rt && (!xfs_verify_rtbno(mp, irec->br_startblock) || - !xfs_verify_rtbno(mp, irec->br_startblock + - irec->br_blockcount - 1))) + !xfs_verify_rtbno(mp, end))) xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); if (!info->is_rt && (!xfs_verify_fsbno(mp, irec->br_startblock) || - !xfs_verify_fsbno(mp, irec->br_startblock + - irec->br_blockcount - 1))) + !xfs_verify_fsbno(mp, end) || + XFS_FSB_TO_AGNO(mp, irec->br_startblock) != + XFS_FSB_TO_AGNO(mp, end))) xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); -- cgit v1.2.3 From be78ff0e72778eb4df4aac66edb9e97462bfe00d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Jan 2018 19:03:59 -0800 Subject: xfs: recheck reflink / dirty page status before freeing CoW reservations Eryu Guan reported seeing occasional hangs when running generic/269 with a new fsstress that supports clonerange/deduperange. The cause of this hang is an infinite loop when we convert the CoW fork extents from unwritten to real just prior to writing the pages out; the infinite loop happens because there's nothing in the CoW fork to convert, and so it spins forever. The fundamental issue here is that when we go to perform these CoW fork conversions, we're supposed to have an extent waiting for us, but the low space CoW reaper has snuck in and blown them away! There are four conditions that can dissuade the reaper from touching our file -- no reflink iflag; dirty page cache; writeback in progress; or directio in progress. We check the four conditions prior to taking the locks, but we neglect to recheck them once we have the locks, which is how we end up whacking the writeback that's in progress. Therefore, refactor the four checks into a helper function and call it once again once we have the locks to make sure we really want to reap the inode. While we're at it, add an ASSERT for this weird condition so that we'll fail noisily if we ever screw this up again. Reported-by: Eryu Guan Signed-off-by: Darrick J. Wong Tested-by: Eryu Guan Reviewed-by: Brian Foster --- fs/xfs/libxfs/xfs_bmap.c | 10 +++++++- fs/xfs/xfs_icache.c | 63 ++++++++++++++++++++++++++++++++---------------- 2 files changed, 51 insertions(+), 22 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 140744700b07..6e6f3cb88cc2 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4304,8 +4304,16 @@ xfs_bmapi_write( while (bno < end && n < *nmap) { bool need_alloc = false, wasdelay = false; - /* in hole or beyoned EOF? */ + /* in hole or beyond EOF? */ if (eof || bma.got.br_startoff > bno) { + /* + * CoW fork conversions should /never/ hit EOF or + * holes. There should always be something for us + * to work on. + */ + ASSERT(!((flags & XFS_BMAPI_CONVERT) && + (flags & XFS_BMAPI_COWFORK))); + if (flags & XFS_BMAPI_DELALLOC) { /* * For the COW fork we can reasonably get a diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index c9c7c02bc2bb..2da7a2ee34bc 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1655,28 +1655,15 @@ xfs_inode_clear_eofblocks_tag( } /* - * Automatic CoW Reservation Freeing - * - * These functions automatically garbage collect leftover CoW reservations - * that were made on behalf of a cowextsize hint when we start to run out - * of quota or when the reservations sit around for too long. If the file - * has dirty pages or is undergoing writeback, its CoW reservations will - * be retained. - * - * The actual garbage collection piggybacks off the same code that runs - * the speculative EOF preallocation garbage collector. + * Set ourselves up to free CoW blocks from this file. If it's already clean + * then we can bail out quickly, but otherwise we must back off if the file + * is undergoing some kind of write. */ -STATIC int -xfs_inode_free_cowblocks( +static bool +xfs_prep_free_cowblocks( struct xfs_inode *ip, - int flags, - void *args) + struct xfs_ifork *ifp) { - int ret; - struct xfs_eofblocks *eofb = args; - int match; - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); - /* * Just clear the tag if we have an empty cow fork or none at all. It's * possible the inode was fully unshared since it was originally tagged. @@ -1684,7 +1671,7 @@ xfs_inode_free_cowblocks( if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) { trace_xfs_inode_free_cowblocks_invalid(ip); xfs_inode_clear_cowblocks_tag(ip); - return 0; + return false; } /* @@ -1695,6 +1682,35 @@ xfs_inode_free_cowblocks( mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) || mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) || atomic_read(&VFS_I(ip)->i_dio_count)) + return false; + + return true; +} + +/* + * Automatic CoW Reservation Freeing + * + * These functions automatically garbage collect leftover CoW reservations + * that were made on behalf of a cowextsize hint when we start to run out + * of quota or when the reservations sit around for too long. If the file + * has dirty pages or is undergoing writeback, its CoW reservations will + * be retained. + * + * The actual garbage collection piggybacks off the same code that runs + * the speculative EOF preallocation garbage collector. + */ +STATIC int +xfs_inode_free_cowblocks( + struct xfs_inode *ip, + int flags, + void *args) +{ + struct xfs_eofblocks *eofb = args; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + int match; + int ret = 0; + + if (!xfs_prep_free_cowblocks(ip, ifp)) return 0; if (eofb) { @@ -1715,7 +1731,12 @@ xfs_inode_free_cowblocks( xfs_ilock(ip, XFS_IOLOCK_EXCL); xfs_ilock(ip, XFS_MMAPLOCK_EXCL); - ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); + /* + * Check again, nobody else should be able to dirty blocks or change + * the reflink iflag now that we have the first two locks held. + */ + if (xfs_prep_free_cowblocks(ip, ifp)) + ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); xfs_iunlock(ip, XFS_IOLOCK_EXCL); -- cgit v1.2.3 From 4bb73d014785cc55225686f9f46e7192fb59d26b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Jan 2018 19:04:09 -0800 Subject: xfs: check sb_agblocks and sb_agblklog when validating superblock Currently, we don't check sb_agblocks or sb_agblklog when we validate the superblock, which means that we can fuzz garbage values into those values and the mount succeeds. This leads to all sorts of UBSAN warnings in xfs/350 since we can then coerce other parts of xfs into shifting by ridiculously large values. Once we've validated agblocks, make sure the agcount makes sense. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/libxfs/xfs_fs.h | 7 +++++++ fs/xfs/libxfs/xfs_sb.c | 14 ++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index b90924104596..faf1a4edd618 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -233,6 +233,13 @@ typedef struct xfs_fsop_resblks { #define XFS_MAX_LOG_BLOCKS (1024 * 1024ULL) #define XFS_MIN_LOG_BYTES (10 * 1024 * 1024ULL) +/* + * Limits on sb_agblocks/sb_agblklog -- mkfs won't format AGs smaller than + * 16MB or larger than 1TB. + */ +#define XFS_MIN_AG_BYTES (1ULL << 24) /* 16 MB */ +#define XFS_MAX_AG_BYTES (1ULL << 40) /* 1 TB */ + /* keep the maximum size under 2^31 by a small amount */ #define XFS_MAX_LOG_BYTES \ ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES) diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 869a2f3f0375..e0c826403c6a 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -118,6 +118,9 @@ xfs_mount_validate_sb( bool check_inprogress, bool check_version) { + u32 agcount = 0; + u32 rem; + if (sbp->sb_magicnum != XFS_SB_MAGIC) { xfs_warn(mp, "bad magic number"); return -EWRONGFS; @@ -228,6 +231,13 @@ xfs_mount_validate_sb( return -EINVAL; } + /* Compute agcount for this number of dblocks and agblocks */ + if (sbp->sb_agblocks) { + agcount = div_u64_rem(sbp->sb_dblocks, sbp->sb_agblocks, &rem); + if (rem) + agcount++; + } + /* * More sanity checking. Most of these were stolen directly from * xfs_repair. @@ -252,6 +262,10 @@ xfs_mount_validate_sb( sbp->sb_inodesize != (1 << sbp->sb_inodelog) || sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE || sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) || + XFS_FSB_TO_B(mp, sbp->sb_agblocks) < XFS_MIN_AG_BYTES || + XFS_FSB_TO_B(mp, sbp->sb_agblocks) > XFS_MAX_AG_BYTES || + sbp->sb_agblklog != xfs_highbit32(sbp->sb_agblocks - 1) + 1 || + agcount == 0 || agcount != sbp->sb_agcount || (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || -- cgit v1.2.3 From 75d4a13b1f6163340e1695bc487ff7fcdc6bc965 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Jan 2018 19:04:27 -0800 Subject: xfs: fix non-debug build compiler warnings Fix compiler warning on non-debug build Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_dquot_item.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index e564f11d83f3..51ee848a550e 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -150,10 +150,7 @@ xfs_dquot_item_error( struct xfs_log_item *lip, struct xfs_buf *bp) { - struct xfs_dquot *dqp; - - dqp = DQUOT_ITEM(lip)->qli_dquot; - ASSERT(!completion_done(&dqp->q_flush)); + ASSERT(!completion_done(&DQUOT_ITEM(lip)->qli_dquot->q_flush)); xfs_set_li_failed(lip, bp); } -- cgit v1.2.3 From 70a20655339ab90866300e174a47631df49a018a Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Wed, 24 Jan 2018 13:38:48 -0800 Subject: Get rid of xfs_buf_log_item_t typedef Take advantage of the rework on xfs_buf log items list, to get rid of ths typedef for xfs_buf_log_item. This patch also fix some indentation alignment issues found along the way. Signed-off-by: Carlos Maiolino Reviewed-by: Bill O'Donnell Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_buf_item.c | 40 ++++++++++++++++---------------- fs/xfs/xfs_buf_item.h | 6 ++--- fs/xfs/xfs_trans_buf.c | 62 +++++++++++++++++++++++++++----------------------- 3 files changed, 56 insertions(+), 52 deletions(-) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index e0a0af0946f2..8afcfa3ed976 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -61,14 +61,14 @@ xfs_buf_log_format_size( */ STATIC void xfs_buf_item_size_segment( - struct xfs_buf_log_item *bip, - struct xfs_buf_log_format *blfp, - int *nvecs, - int *nbytes) + struct xfs_buf_log_item *bip, + struct xfs_buf_log_format *blfp, + int *nvecs, + int *nbytes) { - struct xfs_buf *bp = bip->bli_buf; - int next_bit; - int last_bit; + struct xfs_buf *bp = bip->bli_buf; + int next_bit; + int last_bit; last_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0); if (last_bit == -1) @@ -218,12 +218,12 @@ xfs_buf_item_format_segment( uint offset, struct xfs_buf_log_format *blfp) { - struct xfs_buf *bp = bip->bli_buf; - uint base_size; - int first_bit; - int last_bit; - int next_bit; - uint nbits; + struct xfs_buf *bp = bip->bli_buf; + uint base_size; + int first_bit; + int last_bit; + int next_bit; + uint nbits; /* copy the flags across from the base format item */ blfp->blf_flags = bip->__bli_format.blf_flags; @@ -406,10 +406,10 @@ xfs_buf_item_unpin( int remove) { struct xfs_buf_log_item *bip = BUF_ITEM(lip); - xfs_buf_t *bp = bip->bli_buf; - struct xfs_ail *ailp = lip->li_ailp; - int stale = bip->bli_flags & XFS_BLI_STALE; - int freed; + xfs_buf_t *bp = bip->bli_buf; + struct xfs_ail *ailp = lip->li_ailp; + int stale = bip->bli_flags & XFS_BLI_STALE; + int freed; ASSERT(bp->b_fspriv == bip); ASSERT(atomic_read(&bip->bli_refcount) > 0); @@ -880,7 +880,7 @@ xfs_buf_item_log_segment( */ void xfs_buf_item_log( - xfs_buf_log_item_t *bip, + struct xfs_buf_log_item *bip, uint first, uint last) { @@ -943,7 +943,7 @@ xfs_buf_item_dirty_format( STATIC void xfs_buf_item_free( - xfs_buf_log_item_t *bip) + struct xfs_buf_log_item *bip) { xfs_buf_item_free_format(bip); kmem_free(bip->bli_item.li_lv_shadow); @@ -961,7 +961,7 @@ void xfs_buf_item_relse( xfs_buf_t *bp) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_fspriv; trace_xfs_buf_item_relse(bp, _RET_IP_); ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL)); diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 9690ce62c9a7..0febfbbf6ba9 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -50,7 +50,7 @@ struct xfs_buf_log_item; * needed to log buffers. It tracks how many times the lock has been * locked, and which 128 byte chunks of the buffer are dirty. */ -typedef struct xfs_buf_log_item { +struct xfs_buf_log_item { xfs_log_item_t bli_item; /* common item structure */ struct xfs_buf *bli_buf; /* real buffer pointer */ unsigned int bli_flags; /* misc flags */ @@ -59,11 +59,11 @@ typedef struct xfs_buf_log_item { int bli_format_count; /* count of headers */ struct xfs_buf_log_format *bli_formats; /* array of in-log header ptrs */ struct xfs_buf_log_format __bli_format; /* embedded in-log header */ -} xfs_buf_log_item_t; +}; int xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); void xfs_buf_item_relse(struct xfs_buf *); -void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint); +void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint); bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *); void xfs_buf_attach_iodone(struct xfs_buf *, void(*)(struct xfs_buf *, xfs_log_item_t *), diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 3ba7a96a8abd..74563cd2970c 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -139,7 +139,7 @@ xfs_trans_get_buf_map( xfs_buf_flags_t flags) { xfs_buf_t *bp; - xfs_buf_log_item_t *bip; + struct xfs_buf_log_item *bip; if (!tp) return xfs_buf_get_map(target, map, nmaps, flags); @@ -188,12 +188,13 @@ xfs_trans_get_buf_map( * mount structure. */ xfs_buf_t * -xfs_trans_getsb(xfs_trans_t *tp, - struct xfs_mount *mp, - int flags) +xfs_trans_getsb( + xfs_trans_t *tp, + struct xfs_mount *mp, + int flags) { xfs_buf_t *bp; - xfs_buf_log_item_t *bip; + struct xfs_buf_log_item *bip; /* * Default to just trying to lock the superblock buffer @@ -352,10 +353,11 @@ xfs_trans_read_buf_map( * brelse() call. */ void -xfs_trans_brelse(xfs_trans_t *tp, - xfs_buf_t *bp) +xfs_trans_brelse( + xfs_trans_t *tp, + xfs_buf_t *bp) { - xfs_buf_log_item_t *bip; + struct xfs_buf_log_item *bip; int freed; /* @@ -456,10 +458,11 @@ xfs_trans_brelse(xfs_trans_t *tp, */ /* ARGSUSED */ void -xfs_trans_bhold(xfs_trans_t *tp, - xfs_buf_t *bp) +xfs_trans_bhold( + xfs_trans_t *tp, + xfs_buf_t *bp) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_fspriv; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); @@ -476,10 +479,11 @@ xfs_trans_bhold(xfs_trans_t *tp, * for this transaction. */ void -xfs_trans_bhold_release(xfs_trans_t *tp, - xfs_buf_t *bp) +xfs_trans_bhold_release( + xfs_trans_t *tp, + xfs_buf_t *bp) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_fspriv; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); @@ -600,10 +604,10 @@ xfs_trans_log_buf( */ void xfs_trans_binval( - xfs_trans_t *tp, - xfs_buf_t *bp) + xfs_trans_t *tp, + xfs_buf_t *bp) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_fspriv; int i; ASSERT(bp->b_transp == tp); @@ -655,10 +659,10 @@ xfs_trans_binval( */ void xfs_trans_inode_buf( - xfs_trans_t *tp, - xfs_buf_t *bp) + xfs_trans_t *tp, + xfs_buf_t *bp) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_fspriv; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); @@ -679,10 +683,10 @@ xfs_trans_inode_buf( */ void xfs_trans_stale_inode_buf( - xfs_trans_t *tp, - xfs_buf_t *bp) + xfs_trans_t *tp, + xfs_buf_t *bp) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_fspriv; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); @@ -704,10 +708,10 @@ xfs_trans_stale_inode_buf( /* ARGSUSED */ void xfs_trans_inode_alloc_buf( - xfs_trans_t *tp, - xfs_buf_t *bp) + xfs_trans_t *tp, + xfs_buf_t *bp) { - xfs_buf_log_item_t *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_fspriv; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); @@ -797,9 +801,9 @@ xfs_trans_buf_copy_type( /* ARGSUSED */ void xfs_trans_dquot_buf( - xfs_trans_t *tp, - xfs_buf_t *bp, - uint type) + xfs_trans_t *tp, + xfs_buf_t *bp, + uint type) { struct xfs_buf_log_item *bip = bp->b_fspriv; -- cgit v1.2.3 From fb1755a645972ed096047583600838f6cf414e2b Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Wed, 24 Jan 2018 13:38:48 -0800 Subject: Split buffer's b_fspriv field By splitting the b_fspriv field into two different fields (b_log_item and b_li_list). It's possible to get rid of an old ABI workaround, by using the new b_log_item field to store xfs_buf_log_item separated from the log items attached to the buffer, which will be linked in the new b_li_list field. This way, there is no more need to reorder the log items list to place the buf_log_item at the beginning of the list, simplifying a bit the logic to handle buffer IO. This also opens the possibility to change buffer's log items list into a proper list_head. b_log_item field is still defined as a void *, because it is still used by the log buffers to store xlog_in_core structures, and there is no need to add an extra field on xfs_buf just for xlog_in_core. Signed-off-by: Carlos Maiolino Reviewed-by: Bill O'Donnell Reviewed-by: Darrick J. Wong [darrick: minor style changes] Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_alloc.c | 8 ++-- fs/xfs/libxfs/xfs_attr_leaf.c | 2 +- fs/xfs/libxfs/xfs_btree.c | 4 +- fs/xfs/libxfs/xfs_da_btree.c | 2 +- fs/xfs/libxfs/xfs_dir2_block.c | 2 +- fs/xfs/libxfs/xfs_dir2_data.c | 2 +- fs/xfs/libxfs/xfs_dir2_leaf.c | 2 +- fs/xfs/libxfs/xfs_dir2_node.c | 2 +- fs/xfs/libxfs/xfs_ialloc.c | 4 +- fs/xfs/libxfs/xfs_sb.c | 2 +- fs/xfs/libxfs/xfs_symlink_remote.c | 2 +- fs/xfs/xfs_buf.h | 3 +- fs/xfs/xfs_buf_item.c | 85 +++++++++++++++++++++++--------------- fs/xfs/xfs_inode.c | 4 +- fs/xfs/xfs_inode_item.c | 4 +- fs/xfs/xfs_log.c | 8 ++-- fs/xfs/xfs_log_recover.c | 6 +-- fs/xfs/xfs_trans_buf.c | 48 ++++++++++----------- 18 files changed, 104 insertions(+), 86 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 6883a7668de6..c02781a4c091 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -590,8 +590,8 @@ static void xfs_agfl_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_buf_log_item *bip = bp->b_log_item; xfs_failaddr_t fa; /* no verification of non-crc AGFLs */ @@ -2487,8 +2487,8 @@ static void xfs_agf_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_buf_log_item *bip = bp->b_log_item; xfs_failaddr_t fa; fa = xfs_agf_verify(bp); diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index efe5f8acbd45..2135b8e67dcc 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -309,7 +309,7 @@ xfs_attr3_leaf_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; xfs_failaddr_t fa; diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 567cff5ed511..79ee4a1951d1 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -273,7 +273,7 @@ xfs_btree_lblock_calc_crc( struct xfs_buf *bp) { struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) return; @@ -311,7 +311,7 @@ xfs_btree_sblock_calc_crc( struct xfs_buf *bp) { struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) return; diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index cf07585b9d83..ea187b4a7991 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -182,7 +182,7 @@ xfs_da3_node_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_da3_node_hdr *hdr3 = bp->b_addr; xfs_failaddr_t fa; diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index fe951fa1a583..2da86a394bcf 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -103,7 +103,7 @@ xfs_dir3_block_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; xfs_failaddr_t fa; diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index a1e30c751c00..920279485275 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -320,7 +320,7 @@ xfs_dir3_data_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; xfs_failaddr_t fa; diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index a7ad649398c7..d7e630f41f9c 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -208,7 +208,7 @@ __write_verify( uint16_t magic) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; xfs_failaddr_t fa; diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index bb893ae02696..239d97a64296 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -141,7 +141,7 @@ xfs_dir3_free_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; xfs_failaddr_t fa; diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 3625d1da7462..0e2cf5f0be1f 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -2557,8 +2557,8 @@ static void xfs_agi_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_buf_log_item *bip = bp->b_log_item; xfs_failaddr_t fa; fa = xfs_agi_verify(bp); diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index e0c826403c6a..46af6aa60a8e 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -688,7 +688,7 @@ xfs_sb_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; int error; error = xfs_sb_verify(bp, false); diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index 091e3cf0868f..5ef5f354587e 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -149,7 +149,7 @@ xfs_symlink_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; xfs_failaddr_t fa; /* no verification of non-crc buffers */ diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 5b5b4861c729..6fcba7536d7e 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -176,7 +176,8 @@ typedef struct xfs_buf { struct workqueue_struct *b_ioend_wq; /* I/O completion wq */ xfs_buf_iodone_t b_iodone; /* I/O completion function */ struct completion b_iowait; /* queue for I/O waiters */ - void *b_fspriv; + void *b_log_item; + struct xfs_log_item *b_li_list; struct xfs_trans *b_transp; struct page **b_pages; /* array of page pointers */ struct page *b_page_array[XB_PAGES]; /* inline pages */ diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 8afcfa3ed976..8354fab9796e 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -411,7 +411,7 @@ xfs_buf_item_unpin( int stale = bip->bli_flags & XFS_BLI_STALE; int freed; - ASSERT(bp->b_fspriv == bip); + ASSERT(bp->b_log_item == bip); ASSERT(atomic_read(&bip->bli_refcount) > 0); trace_xfs_buf_item_unpin(bip); @@ -456,13 +456,14 @@ xfs_buf_item_unpin( */ if (bip->bli_flags & XFS_BLI_STALE_INODE) { xfs_buf_do_callbacks(bp); - bp->b_fspriv = NULL; + bp->b_log_item = NULL; + bp->b_li_list = NULL; bp->b_iodone = NULL; } else { spin_lock(&ailp->xa_lock); xfs_trans_ail_delete(ailp, lip, SHUTDOWN_LOG_IO_ERROR); xfs_buf_item_relse(bp); - ASSERT(bp->b_fspriv == NULL); + ASSERT(bp->b_log_item == NULL); } xfs_buf_relse(bp); } else if (freed && remove) { @@ -722,18 +723,15 @@ xfs_buf_item_free_format( /* * Allocate a new buf log item to go with the given buffer. - * Set the buffer's b_fsprivate field to point to the new - * buf log item. If there are other item's attached to the - * buffer (see xfs_buf_attach_iodone() below), then put the - * buf log item at the front. + * Set the buffer's b_log_item field to point to the new + * buf log item. */ int xfs_buf_item_init( struct xfs_buf *bp, struct xfs_mount *mp) { - struct xfs_log_item *lip = bp->b_fspriv; - struct xfs_buf_log_item *bip; + struct xfs_buf_log_item *bip = bp->b_log_item; int chunks; int map_size; int error; @@ -741,13 +739,14 @@ xfs_buf_item_init( /* * Check to see if there is already a buf log item for - * this buffer. If there is, it is guaranteed to be - * the first. If we do already have one, there is + * this buffer. If we do already have one, there is * nothing to do here so return. */ ASSERT(bp->b_target->bt_mount == mp); - if (lip != NULL && lip->li_type == XFS_LI_BUF) + if (bip != NULL) { + ASSERT(bip->bli_item.li_type == XFS_LI_BUF); return 0; + } bip = kmem_zone_zalloc(xfs_buf_item_zone, KM_SLEEP); xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops); @@ -781,13 +780,7 @@ xfs_buf_item_init( bip->bli_formats[i].blf_map_size = map_size; } - /* - * Put the buf item into the list of items attached to the - * buffer at the front. - */ - if (bp->b_fspriv) - bip->bli_item.li_bio_list = bp->b_fspriv; - bp->b_fspriv = bip; + bp->b_log_item = bip; xfs_buf_hold(bp); return 0; } @@ -961,13 +954,14 @@ void xfs_buf_item_relse( xfs_buf_t *bp) { - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; + struct xfs_log_item *lip = bp->b_li_list; trace_xfs_buf_item_relse(bp, _RET_IP_); ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL)); - bp->b_fspriv = bip->bli_item.li_bio_list; - if (bp->b_fspriv == NULL) + bp->b_log_item = NULL; + if (lip == NULL) bp->b_iodone = NULL; xfs_buf_rele(bp); @@ -980,9 +974,7 @@ xfs_buf_item_relse( * to be called when the buffer's I/O completes. If it is not set * already, set the buffer's b_iodone() routine to be * xfs_buf_iodone_callbacks() and link the log item into the list of - * items rooted at b_fsprivate. Items are always added as the second - * entry in the list if there is a first, because the buf item code - * assumes that the buf log item is first. + * items rooted at b_li_list. */ void xfs_buf_attach_iodone( @@ -995,12 +987,12 @@ xfs_buf_attach_iodone( ASSERT(xfs_buf_islocked(bp)); lip->li_cb = cb; - head_lip = bp->b_fspriv; + head_lip = bp->b_li_list; if (head_lip) { lip->li_bio_list = head_lip->li_bio_list; head_lip->li_bio_list = lip; } else { - bp->b_fspriv = lip; + bp->b_li_list = lip; } ASSERT(bp->b_iodone == NULL || @@ -1024,10 +1016,17 @@ STATIC void xfs_buf_do_callbacks( struct xfs_buf *bp) { + struct xfs_buf_log_item *blip = bp->b_log_item; struct xfs_log_item *lip; - while ((lip = bp->b_fspriv) != NULL) { - bp->b_fspriv = lip->li_bio_list; + /* If there is a buf_log_item attached, run its callback */ + if (blip) { + lip = &blip->bli_item; + lip->li_cb(bp, lip); + } + + while ((lip = bp->b_li_list) != NULL) { + bp->b_li_list = lip->li_bio_list; ASSERT(lip->li_cb != NULL); /* * Clear the next pointer so we don't have any @@ -1052,10 +1051,19 @@ STATIC void xfs_buf_do_callbacks_fail( struct xfs_buf *bp) { + struct xfs_log_item *lip = bp->b_li_list; struct xfs_log_item *next; - struct xfs_log_item *lip = bp->b_fspriv; - struct xfs_ail *ailp = lip->li_ailp; + struct xfs_ail *ailp; + /* + * Buffer log item errors are handled directly by xfs_buf_item_push() + * and xfs_buf_iodone_callback_error, and they have no IO error + * callbacks. Check only for items in b_li_list. + */ + if (lip == NULL) + return; + + ailp = lip->li_ailp; spin_lock(&ailp->xa_lock); for (; lip; lip = next) { next = lip->li_bio_list; @@ -1069,12 +1077,20 @@ static bool xfs_buf_iodone_callback_error( struct xfs_buf *bp) { - struct xfs_log_item *lip = bp->b_fspriv; - struct xfs_mount *mp = lip->li_mountp; + struct xfs_buf_log_item *bip = bp->b_log_item; + struct xfs_log_item *lip = bp->b_li_list; + struct xfs_mount *mp; static ulong lasttime; static xfs_buftarg_t *lasttarg; struct xfs_error_cfg *cfg; + /* + * The failed buffer might not have a buf_log_item attached or the + * log_item list might be empty. Get the mp from the available + * xfs_log_item + */ + mp = bip ? bip->bli_item.li_mountp : lip->li_mountp; + /* * If we've already decided to shutdown the filesystem because of * I/O errors, there's no point in giving this a retry. @@ -1183,7 +1199,8 @@ xfs_buf_iodone_callbacks( bp->b_first_retry_time = 0; xfs_buf_do_callbacks(bp); - bp->b_fspriv = NULL; + bp->b_log_item = NULL; + bp->b_li_list = NULL; bp->b_iodone = NULL; xfs_buf_ioend(bp); } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c9e40d4fc939..8a3ff6343d91 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2272,7 +2272,7 @@ xfs_ifree_cluster( * stale first, we will not attempt to lock them in the loop * below as the XFS_ISTALE flag will be set. */ - lip = bp->b_fspriv; + lip = bp->b_li_list; while (lip) { if (lip->li_type == XFS_LI_INODE) { iip = (xfs_inode_log_item_t *)lip; @@ -3649,7 +3649,7 @@ xfs_iflush_int( /* generate the checksum. */ xfs_dinode_calc_crc(mp, dip); - ASSERT(bp->b_fspriv != NULL); + ASSERT(bp->b_li_list != NULL); ASSERT(bp->b_iodone != NULL); return 0; diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 6ee5c3bf19ad..993736032b4b 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -722,7 +722,7 @@ xfs_iflush_done( * Scan the buffer IO completions for other inodes being completed and * attach them to the current inode log item. */ - blip = bp->b_fspriv; + blip = bp->b_li_list; prev = NULL; while (blip != NULL) { if (blip->li_cb != xfs_iflush_done) { @@ -734,7 +734,7 @@ xfs_iflush_done( /* remove from list */ next = blip->li_bio_list; if (!prev) { - bp->b_fspriv = next; + bp->b_li_list = next; } else { prev->li_bio_list = next; } diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index c1f266c34af7..20483b654ef1 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1242,7 +1242,7 @@ xlog_space_left( static void xlog_iodone(xfs_buf_t *bp) { - struct xlog_in_core *iclog = bp->b_fspriv; + struct xlog_in_core *iclog = bp->b_log_item; struct xlog *l = iclog->ic_log; int aborted = 0; @@ -1773,7 +1773,7 @@ STATIC int xlog_bdstrat( struct xfs_buf *bp) { - struct xlog_in_core *iclog = bp->b_fspriv; + struct xlog_in_core *iclog = bp->b_log_item; xfs_buf_lock(bp); if (iclog->ic_state & XLOG_STATE_IOERROR) { @@ -1919,7 +1919,7 @@ xlog_sync( } bp->b_io_length = BTOBB(count); - bp->b_fspriv = iclog; + bp->b_log_item = iclog; bp->b_flags &= ~XBF_FLUSH; bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA); @@ -1958,7 +1958,7 @@ xlog_sync( XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */ xfs_buf_associate_memory(bp, (char *)&iclog->ic_header + count, split); - bp->b_fspriv = iclog; + bp->b_log_item = iclog; bp->b_flags &= ~XBF_FLUSH; bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index d864380b6575..00240c9ee72e 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -400,9 +400,9 @@ xlog_recover_iodone( * On v5 supers, a bli could be attached to update the metadata LSN. * Clean it up. */ - if (bp->b_fspriv) + if (bp->b_log_item) xfs_buf_item_relse(bp); - ASSERT(bp->b_fspriv == NULL); + ASSERT(bp->b_log_item == NULL); bp->b_iodone = NULL; xfs_buf_ioend(bp); @@ -2630,7 +2630,7 @@ xlog_recover_validate_buf_type( ASSERT(!bp->b_iodone || bp->b_iodone == xlog_recover_iodone); bp->b_iodone = xlog_recover_iodone; xfs_buf_item_init(bp, mp); - bip = bp->b_fspriv; + bip = bp->b_log_item; bip->bli_item.li_lsn = current_lsn; } } diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 74563cd2970c..653ce379d36b 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -82,12 +82,12 @@ _xfs_trans_bjoin( ASSERT(bp->b_transp == NULL); /* - * The xfs_buf_log_item pointer is stored in b_fsprivate. If + * The xfs_buf_log_item pointer is stored in b_log_item. If * it doesn't have one yet, then allocate one and initialize it. * The checks to see if one is there are in xfs_buf_item_init(). */ xfs_buf_item_init(bp, tp->t_mountp); - bip = bp->b_fspriv; + bip = bp->b_log_item; ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL)); ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); @@ -118,7 +118,7 @@ xfs_trans_bjoin( struct xfs_buf *bp) { _xfs_trans_bjoin(tp, bp, 0); - trace_xfs_trans_bjoin(bp->b_fspriv); + trace_xfs_trans_bjoin(bp->b_log_item); } /* @@ -159,7 +159,7 @@ xfs_trans_get_buf_map( } ASSERT(bp->b_transp == tp); - bip = bp->b_fspriv; + bip = bp->b_log_item; ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_recur++; @@ -175,7 +175,7 @@ xfs_trans_get_buf_map( ASSERT(!bp->b_error); _xfs_trans_bjoin(tp, bp, 1); - trace_xfs_trans_get_buf(bp->b_fspriv); + trace_xfs_trans_get_buf(bp->b_log_item); return bp; } @@ -211,7 +211,7 @@ xfs_trans_getsb( */ bp = mp->m_sb_bp; if (bp->b_transp == tp) { - bip = bp->b_fspriv; + bip = bp->b_log_item; ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_recur++; @@ -224,7 +224,7 @@ xfs_trans_getsb( return NULL; _xfs_trans_bjoin(tp, bp, 1); - trace_xfs_trans_getsb(bp->b_fspriv); + trace_xfs_trans_getsb(bp->b_log_item); return bp; } @@ -267,7 +267,7 @@ xfs_trans_read_buf_map( if (bp) { ASSERT(xfs_buf_islocked(bp)); ASSERT(bp->b_transp == tp); - ASSERT(bp->b_fspriv != NULL); + ASSERT(bp->b_log_item != NULL); ASSERT(!bp->b_error); ASSERT(bp->b_flags & XBF_DONE); @@ -280,7 +280,7 @@ xfs_trans_read_buf_map( return -EIO; } - bip = bp->b_fspriv; + bip = bp->b_log_item; bip->bli_recur++; ASSERT(atomic_read(&bip->bli_refcount) > 0); @@ -330,7 +330,7 @@ xfs_trans_read_buf_map( if (tp) { _xfs_trans_bjoin(tp, bp, 1); - trace_xfs_trans_read_buf(bp->b_fspriv); + trace_xfs_trans_read_buf(bp->b_log_item); } *bpp = bp; return 0; @@ -370,7 +370,7 @@ xfs_trans_brelse( } ASSERT(bp->b_transp == tp); - bip = bp->b_fspriv; + bip = bp->b_log_item; ASSERT(bip->bli_item.li_type == XFS_LI_BUF); ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL)); @@ -462,7 +462,7 @@ xfs_trans_bhold( xfs_trans_t *tp, xfs_buf_t *bp) { - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); @@ -483,7 +483,7 @@ xfs_trans_bhold_release( xfs_trans_t *tp, xfs_buf_t *bp) { - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); @@ -504,7 +504,7 @@ xfs_trans_dirty_buf( struct xfs_trans *tp, struct xfs_buf *bp) { - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); @@ -561,7 +561,7 @@ xfs_trans_log_buf( uint first, uint last) { - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(first <= last && last < BBTOB(bp->b_length)); ASSERT(!(bip->bli_flags & XFS_BLI_ORDERED)); @@ -607,7 +607,7 @@ xfs_trans_binval( xfs_trans_t *tp, xfs_buf_t *bp) { - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; int i; ASSERT(bp->b_transp == tp); @@ -662,7 +662,7 @@ xfs_trans_inode_buf( xfs_trans_t *tp, xfs_buf_t *bp) { - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); @@ -686,7 +686,7 @@ xfs_trans_stale_inode_buf( xfs_trans_t *tp, xfs_buf_t *bp) { - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); @@ -711,7 +711,7 @@ xfs_trans_inode_alloc_buf( xfs_trans_t *tp, xfs_buf_t *bp) { - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); @@ -733,7 +733,7 @@ xfs_trans_ordered_buf( struct xfs_trans *tp, struct xfs_buf *bp) { - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); @@ -763,7 +763,7 @@ xfs_trans_buf_set_type( struct xfs_buf *bp, enum xfs_blft type) { - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; if (!tp) return; @@ -780,8 +780,8 @@ xfs_trans_buf_copy_type( struct xfs_buf *dst_bp, struct xfs_buf *src_bp) { - struct xfs_buf_log_item *sbip = src_bp->b_fspriv; - struct xfs_buf_log_item *dbip = dst_bp->b_fspriv; + struct xfs_buf_log_item *sbip = src_bp->b_log_item; + struct xfs_buf_log_item *dbip = dst_bp->b_log_item; enum xfs_blft type; type = xfs_blft_from_flags(&sbip->__bli_format); @@ -805,7 +805,7 @@ xfs_trans_dquot_buf( xfs_buf_t *bp, uint type) { - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(type == XFS_BLF_UDQUOT_BUF || type == XFS_BLF_PDQUOT_BUF || -- cgit v1.2.3 From 643c8c05e75d978c55ceb584f21a16de5431c17d Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Wed, 24 Jan 2018 13:38:49 -0800 Subject: Use list_head infra-structure for buffer's log items list Now that buffer's b_fspriv has been split, just replace the current singly linked list of xfs_log_items, by the list_head infrastructure. Also, remove the xfs_log_item argument from xfs_buf_resubmit_failed_buffers(), there is no need for this argument, once the log items can be walked through the list_head in the buffer. Signed-off-by: Carlos Maiolino Reviewed-by: Bill O'Donnell Reviewed-by: Darrick J. Wong [darrick: minor style cleanups] Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_buf.c | 1 + fs/xfs/xfs_buf.h | 2 +- fs/xfs/xfs_buf_item.c | 57 +++++++++++++++++++++---------------------------- fs/xfs/xfs_buf_item.h | 1 - fs/xfs/xfs_dquot_item.c | 2 +- fs/xfs/xfs_inode.c | 8 +++---- fs/xfs/xfs_inode_item.c | 41 +++++++++++------------------------ fs/xfs/xfs_log.c | 1 + fs/xfs/xfs_trans.h | 2 +- 9 files changed, 44 insertions(+), 71 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 0820c1ccf97c..d1da2ee9e6db 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -236,6 +236,7 @@ _xfs_buf_alloc( init_completion(&bp->b_iowait); INIT_LIST_HEAD(&bp->b_lru); INIT_LIST_HEAD(&bp->b_list); + INIT_LIST_HEAD(&bp->b_li_list); sema_init(&bp->b_sema, 0); /* held, no waiters */ spin_lock_init(&bp->b_lock); XB_SET_OWNER(bp); diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 6fcba7536d7e..2f4c91452861 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -177,7 +177,7 @@ typedef struct xfs_buf { xfs_buf_iodone_t b_iodone; /* I/O completion function */ struct completion b_iowait; /* queue for I/O waiters */ void *b_log_item; - struct xfs_log_item *b_li_list; + struct list_head b_li_list; /* Log items list head */ struct xfs_trans *b_transp; struct page **b_pages; /* array of page pointers */ struct page *b_page_array[XB_PAGES]; /* inline pages */ diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 8354fab9796e..270ddb4d2313 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -457,7 +457,7 @@ xfs_buf_item_unpin( if (bip->bli_flags & XFS_BLI_STALE_INODE) { xfs_buf_do_callbacks(bp); bp->b_log_item = NULL; - bp->b_li_list = NULL; + list_del_init(&bp->b_li_list); bp->b_iodone = NULL; } else { spin_lock(&ailp->xa_lock); @@ -955,13 +955,12 @@ xfs_buf_item_relse( xfs_buf_t *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; - struct xfs_log_item *lip = bp->b_li_list; trace_xfs_buf_item_relse(bp, _RET_IP_); ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL)); bp->b_log_item = NULL; - if (lip == NULL) + if (list_empty(&bp->b_li_list)) bp->b_iodone = NULL; xfs_buf_rele(bp); @@ -982,18 +981,10 @@ xfs_buf_attach_iodone( void (*cb)(xfs_buf_t *, xfs_log_item_t *), xfs_log_item_t *lip) { - xfs_log_item_t *head_lip; - ASSERT(xfs_buf_islocked(bp)); lip->li_cb = cb; - head_lip = bp->b_li_list; - if (head_lip) { - lip->li_bio_list = head_lip->li_bio_list; - head_lip->li_bio_list = lip; - } else { - bp->b_li_list = lip; - } + list_add_tail(&lip->li_bio_list, &bp->b_li_list); ASSERT(bp->b_iodone == NULL || bp->b_iodone == xfs_buf_iodone_callbacks); @@ -1003,12 +994,12 @@ xfs_buf_attach_iodone( /* * We can have many callbacks on a buffer. Running the callbacks individually * can cause a lot of contention on the AIL lock, so we allow for a single - * callback to be able to scan the remaining lip->li_bio_list for other items - * of the same type and callback to be processed in the first call. + * callback to be able to scan the remaining items in bp->b_li_list for other + * items of the same type and callback to be processed in the first call. * * As a result, the loop walking the callback list below will also modify the * list. it removes the first item from the list and then runs the callback. - * The loop then restarts from the new head of the list. This allows the + * The loop then restarts from the new first item int the list. This allows the * callback to scan and modify the list attached to the buffer and we don't * have to care about maintaining a next item pointer. */ @@ -1025,16 +1016,17 @@ xfs_buf_do_callbacks( lip->li_cb(bp, lip); } - while ((lip = bp->b_li_list) != NULL) { - bp->b_li_list = lip->li_bio_list; - ASSERT(lip->li_cb != NULL); + while (!list_empty(&bp->b_li_list)) { + lip = list_first_entry(&bp->b_li_list, struct xfs_log_item, + li_bio_list); + /* - * Clear the next pointer so we don't have any + * Remove the item from the list, so we don't have any * confusion if the item is added to another buf. * Don't touch the log item after calling its * callback, because it could have freed itself. */ - lip->li_bio_list = NULL; + list_del_init(&lip->li_bio_list); lip->li_cb(bp, lip); } } @@ -1051,8 +1043,7 @@ STATIC void xfs_buf_do_callbacks_fail( struct xfs_buf *bp) { - struct xfs_log_item *lip = bp->b_li_list; - struct xfs_log_item *next; + struct xfs_log_item *lip; struct xfs_ail *ailp; /* @@ -1060,13 +1051,14 @@ xfs_buf_do_callbacks_fail( * and xfs_buf_iodone_callback_error, and they have no IO error * callbacks. Check only for items in b_li_list. */ - if (lip == NULL) + if (list_empty(&bp->b_li_list)) return; + lip = list_first_entry(&bp->b_li_list, struct xfs_log_item, + li_bio_list); ailp = lip->li_ailp; spin_lock(&ailp->xa_lock); - for (; lip; lip = next) { - next = lip->li_bio_list; + list_for_each_entry(lip, &bp->b_li_list, li_bio_list) { if (lip->li_ops->iop_error) lip->li_ops->iop_error(lip, bp); } @@ -1078,7 +1070,7 @@ xfs_buf_iodone_callback_error( struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; - struct xfs_log_item *lip = bp->b_li_list; + struct xfs_log_item *lip; struct xfs_mount *mp; static ulong lasttime; static xfs_buftarg_t *lasttarg; @@ -1089,7 +1081,9 @@ xfs_buf_iodone_callback_error( * log_item list might be empty. Get the mp from the available * xfs_log_item */ - mp = bip ? bip->bli_item.li_mountp : lip->li_mountp; + lip = list_first_entry_or_null(&bp->b_li_list, struct xfs_log_item, + li_bio_list); + mp = lip ? lip->li_mountp : bip->bli_item.li_mountp; /* * If we've already decided to shutdown the filesystem because of @@ -1200,7 +1194,7 @@ xfs_buf_iodone_callbacks( xfs_buf_do_callbacks(bp); bp->b_log_item = NULL; - bp->b_li_list = NULL; + list_del_init(&bp->b_li_list); bp->b_iodone = NULL; xfs_buf_ioend(bp); } @@ -1245,10 +1239,9 @@ xfs_buf_iodone( bool xfs_buf_resubmit_failed_buffers( struct xfs_buf *bp, - struct xfs_log_item *lip, struct list_head *buffer_list) { - struct xfs_log_item *next; + struct xfs_log_item *lip; /* * Clear XFS_LI_FAILED flag from all items before resubmit @@ -1256,10 +1249,8 @@ xfs_buf_resubmit_failed_buffers( * XFS_LI_FAILED set/clear is protected by xa_lock, caller this * function already have it acquired */ - for (; lip; lip = next) { - next = lip->li_bio_list; + list_for_each_entry(lip, &bp->b_li_list, li_bio_list) xfs_clear_li_failed(lip); - } /* Add this buffer back to the delayed write list */ return xfs_buf_delwri_queue(bp, buffer_list); diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 0febfbbf6ba9..643f53dcfe51 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -71,7 +71,6 @@ void xfs_buf_attach_iodone(struct xfs_buf *, void xfs_buf_iodone_callbacks(struct xfs_buf *); void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); bool xfs_buf_resubmit_failed_buffers(struct xfs_buf *, - struct xfs_log_item *, struct list_head *); extern kmem_zone_t *xfs_buf_item_zone; diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 51ee848a550e..96eaa6933709 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -176,7 +176,7 @@ xfs_qm_dquot_logitem_push( if (!xfs_buf_trylock(bp)) return XFS_ITEM_LOCKED; - if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list)) + if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list)) rval = XFS_ITEM_FLUSHING; xfs_buf_unlock(bp); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 8a3ff6343d91..c66effc8e7dd 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2214,7 +2214,7 @@ xfs_ifree_cluster( xfs_buf_t *bp; xfs_inode_t *ip; xfs_inode_log_item_t *iip; - xfs_log_item_t *lip; + struct xfs_log_item *lip; struct xfs_perag *pag; xfs_ino_t inum; @@ -2272,8 +2272,7 @@ xfs_ifree_cluster( * stale first, we will not attempt to lock them in the loop * below as the XFS_ISTALE flag will be set. */ - lip = bp->b_li_list; - while (lip) { + list_for_each_entry(lip, &bp->b_li_list, li_bio_list) { if (lip->li_type == XFS_LI_INODE) { iip = (xfs_inode_log_item_t *)lip; ASSERT(iip->ili_logged == 1); @@ -2283,7 +2282,6 @@ xfs_ifree_cluster( &iip->ili_item.li_lsn); xfs_iflags_set(iip->ili_inode, XFS_ISTALE); } - lip = lip->li_bio_list; } @@ -3649,7 +3647,7 @@ xfs_iflush_int( /* generate the checksum. */ xfs_dinode_calc_crc(mp, dip); - ASSERT(bp->b_li_list != NULL); + ASSERT(!list_empty(&bp->b_li_list)); ASSERT(bp->b_iodone != NULL); return 0; diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 993736032b4b..ddfc2c80af5e 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -521,7 +521,7 @@ xfs_inode_item_push( if (!xfs_buf_trylock(bp)) return XFS_ITEM_LOCKED; - if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list)) + if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list)) rval = XFS_ITEM_FLUSHING; xfs_buf_unlock(bp); @@ -712,37 +712,23 @@ xfs_iflush_done( struct xfs_log_item *lip) { struct xfs_inode_log_item *iip; - struct xfs_log_item *blip; - struct xfs_log_item *next; - struct xfs_log_item *prev; + struct xfs_log_item *blip, *n; struct xfs_ail *ailp = lip->li_ailp; int need_ail = 0; + LIST_HEAD(tmp); /* * Scan the buffer IO completions for other inodes being completed and * attach them to the current inode log item. */ - blip = bp->b_li_list; - prev = NULL; - while (blip != NULL) { - if (blip->li_cb != xfs_iflush_done) { - prev = blip; - blip = blip->li_bio_list; - continue; - } - /* remove from list */ - next = blip->li_bio_list; - if (!prev) { - bp->b_li_list = next; - } else { - prev->li_bio_list = next; - } + list_add_tail(&lip->li_bio_list, &tmp); - /* add to current list */ - blip->li_bio_list = lip->li_bio_list; - lip->li_bio_list = blip; + list_for_each_entry_safe(blip, n, &bp->b_li_list, li_bio_list) { + if (lip->li_cb != xfs_iflush_done) + continue; + list_move_tail(&blip->li_bio_list, &tmp); /* * while we have the item, do the unlocked check for needing * the AIL lock. @@ -751,8 +737,6 @@ xfs_iflush_done( if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) || (blip->li_flags & XFS_LI_FAILED)) need_ail++; - - blip = next; } /* make sure we capture the state of the initial inode. */ @@ -775,7 +759,7 @@ xfs_iflush_done( /* this is an opencoded batch version of xfs_trans_ail_delete */ spin_lock(&ailp->xa_lock); - for (blip = lip; blip; blip = blip->li_bio_list) { + list_for_each_entry(blip, &tmp, li_bio_list) { if (INODE_ITEM(blip)->ili_logged && blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn) mlip_changed |= xfs_ail_delete_one(ailp, blip); @@ -801,15 +785,14 @@ xfs_iflush_done( * ili_last_fields bits now that we know that the data corresponding to * them is safely on disk. */ - for (blip = lip; blip; blip = next) { - next = blip->li_bio_list; - blip->li_bio_list = NULL; - + list_for_each_entry_safe(blip, n, &tmp, li_bio_list) { + list_del_init(&blip->li_bio_list); iip = INODE_ITEM(blip); iip->ili_logged = 0; iip->ili_last_fields = 0; xfs_ifunlock(iip->ili_inode); } + list_del(&tmp); } /* diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 20483b654ef1..3e5ba1ecc080 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1047,6 +1047,7 @@ xfs_log_item_init( INIT_LIST_HEAD(&item->li_ail); INIT_LIST_HEAD(&item->li_cil); + INIT_LIST_HEAD(&item->li_bio_list); } /* diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 815b53d20e26..9d542dfe0052 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -50,7 +50,7 @@ typedef struct xfs_log_item { uint li_type; /* item type */ uint li_flags; /* misc flags */ struct xfs_buf *li_buf; /* real buffer pointer */ - struct xfs_log_item *li_bio_list; /* buffer item list */ + struct list_head li_bio_list; /* buffer item list */ void (*li_cb)(struct xfs_buf *, struct xfs_log_item *); /* buffer item iodone */ -- cgit v1.2.3 From 6ca30729c206d62d88730a904af7d543a56273d8 Mon Sep 17 00:00:00 2001 From: Shan Hai Date: Tue, 23 Jan 2018 13:56:11 -0800 Subject: xfs: bmap code cleanup Remove the extent size hint and realtime inode relevant code from the xfs_bmapi_reserve_delalloc since it is not called on the inode with extent size hint set or on a realtime inode. Signed-off-by: Shan Hai Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_bmap.c | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 6e6f3cb88cc2..0c9c9cdd532a 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3876,8 +3876,6 @@ xfs_bmapi_reserve_delalloc( struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); xfs_extlen_t alen; xfs_extlen_t indlen; - char rt = XFS_IS_REALTIME_INODE(ip); - xfs_extlen_t extsz; int error; xfs_fileoff_t aoff = off; @@ -3892,31 +3890,25 @@ xfs_bmapi_reserve_delalloc( prealloc = alen - len; /* Figure out the extent size, adjust alen */ - if (whichfork == XFS_COW_FORK) - extsz = xfs_get_cowextsz_hint(ip); - else - extsz = xfs_get_extsz_hint(ip); - if (extsz) { + if (whichfork == XFS_COW_FORK) { struct xfs_bmbt_irec prev; + xfs_extlen_t extsz = xfs_get_cowextsz_hint(ip); if (!xfs_iext_peek_prev_extent(ifp, icur, &prev)) prev.br_startoff = NULLFILEOFF; - error = xfs_bmap_extsize_align(mp, got, &prev, extsz, rt, eof, + error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof, 1, 0, &aoff, &alen); ASSERT(!error); } - if (rt) - extsz = alen / mp->m_sb.sb_rextsize; - /* * Make a transaction-less quota reservation for delayed allocation * blocks. This number gets adjusted later. We return if we haven't * allocated blocks already inside this loop. */ error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0, - rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); + XFS_QMOPT_RES_REGBLKS); if (error) return error; @@ -3927,12 +3919,7 @@ xfs_bmapi_reserve_delalloc( indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen); ASSERT(indlen > 0); - if (rt) { - error = xfs_mod_frextents(mp, -((int64_t)extsz)); - } else { - error = xfs_mod_fdblocks(mp, -((int64_t)alen), false); - } - + error = xfs_mod_fdblocks(mp, -((int64_t)alen), false); if (error) goto out_unreserve_quota; @@ -3963,14 +3950,11 @@ xfs_bmapi_reserve_delalloc( return 0; out_unreserve_blocks: - if (rt) - xfs_mod_frextents(mp, extsz); - else - xfs_mod_fdblocks(mp, alen, false); + xfs_mod_fdblocks(mp, alen, false); out_unreserve_quota: if (XFS_IS_QUOTA_ON(mp)) - xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ? - XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); + xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, + XFS_QMOPT_RES_REGBLKS); return error; } -- cgit v1.2.3 From 09ac862397041fc484cd7294b15d41073aa78864 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 19 Jan 2018 08:56:04 -0800 Subject: xfs: call xfs_qm_dqattach before performing reflink operations Ensure that we've attached all the necessary dquots before performing reflink operations so that quota accounting is accurate. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_reflink.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 47aea2e82c26..bcc2ad4f0899 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1295,6 +1295,11 @@ xfs_reflink_remap_range( if (ret <= 0) goto out_unlock; + /* Attach dquots to dest inode before changing block map */ + ret = xfs_qm_dqattach(dest, 0); + if (ret) + goto out_unlock; + trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); /* -- cgit v1.2.3 From beaae8cd588ec7b4e58f4bc32f603be15fb11766 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Jan 2018 19:19:26 -0800 Subject: xfs: always zero di_flags2 when we free the inode Always zero the di_flags2 field when we free the inode so that we never end up with an on-disk record for an unallocated inode that also has the reflink iflag set. This is in keeping with the general principle that only files can have the reflink iflag set, even though we'll zero out di_flags2 if we ever reallocate the inode. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c66effc8e7dd..4ea6476bcbd7 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2449,6 +2449,7 @@ xfs_ifree( VFS_I(ip)->i_mode = 0; /* mark incore inode as free */ ip->i_d.di_flags = 0; + ip->i_d.di_flags2 = 0; ip->i_d.di_dmevmask = 0; ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; -- cgit v1.2.3 From 67a3f6d01495bbf520186aa3ecd013ba02b81462 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Jan 2018 16:46:42 -0800 Subject: xfs: make tracepoint inode number format consistent Fix all the inode number formats to be consistently (0x%llx) in all trace point definitions. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig --- fs/xfs/scrub/trace.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index a0a6d3cd131a..4dc896852bf0 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -50,7 +50,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_class, __entry->flags = sm->sm_flags; __entry->error = error; ), - TP_printk("dev %d:%d ino %llu type %u agno %u inum %llu gen %u flags 0x%x error %d", + TP_printk("dev %d:%d ino 0x%llx type %u agno %u inum %llu gen %u flags 0x%x error %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->type, @@ -121,7 +121,7 @@ TRACE_EVENT(xfs_scrub_file_op_error, __entry->error = error; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d ino %llu fork %d type %u offset %llu error %d ret_ip %pS", + TP_printk("dev %d:%d ino 0x%llx fork %d type %u offset %llu error %d ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->whichfork, @@ -207,7 +207,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_ino_error_class, __entry->bno = bno; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d ino %llu type %u agno %u agbno %u ret_ip %pS", + TP_printk("dev %d:%d ino 0x%llx type %u agno %u agbno %u ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->type, @@ -246,7 +246,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_fblock_error_class, __entry->offset = offset; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d ino %llu fork %d type %u offset %llu ret_ip %pS", + TP_printk("dev %d:%d ino 0x%llx fork %d type %u offset %llu ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->whichfork, @@ -354,7 +354,7 @@ TRACE_EVENT(xfs_scrub_ifork_btree_op_error, __entry->error = error; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d ino %llu fork %d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pS", + TP_printk("dev %d:%d ino 0x%llx fork %d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->whichfork, @@ -433,7 +433,7 @@ TRACE_EVENT(xfs_scrub_ifork_btree_error, __entry->ptr = cur->bc_ptrs[level]; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d ino %llu fork %d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pS", + TP_printk("dev %d:%d ino 0x%llx fork %d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->whichfork, -- cgit v1.2.3 From 22431bf3dfbf44d7356933776eb486a6a01dea6f Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Jan 2018 18:09:48 -0800 Subject: xfs: refactor inode verifier corruption error printing Refactor inode verifier error reporting into a non-libxfs function so that we aren't encoding the message format in libxfs. This also changes the kernel dmesg output to resemble buffer verifier errors more closely. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_inode_buf.c | 6 ++---- fs/xfs/xfs_error.c | 37 +++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_error.h | 3 +++ fs/xfs/xfs_inode.c | 14 ++++++++------ 4 files changed, 50 insertions(+), 10 deletions(-) diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 4035b5d5f6fd..d7e7e58f0ee2 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -578,10 +578,8 @@ xfs_iread( /* even unallocated inodes are verified */ fa = xfs_dinode_verify(mp, ip->i_ino, dip); if (fa) { - xfs_alert(mp, "%s: validation failed for inode %lld at %pS", - __func__, ip->i_ino, fa); - - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip); + xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", dip, + sizeof(*dip), fa); error = -EFSCORRUPTED; goto out_brelse; } diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 980d5f0660b5..ccf520f0b00d 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -24,6 +24,7 @@ #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_sysfs.h" +#include "xfs_inode.h" #ifdef DEBUG @@ -372,3 +373,39 @@ xfs_verifier_error( if (xfs_error_level >= XFS_ERRLEVEL_HIGH) xfs_stack_trace(); } + +/* + * Warnings for inode corruption problems. Don't bother with the stack + * trace unless the error level is turned up high. + */ +void +xfs_inode_verifier_error( + struct xfs_inode *ip, + int error, + const char *name, + void *buf, + size_t bufsz, + xfs_failaddr_t failaddr) +{ + struct xfs_mount *mp = ip->i_mount; + xfs_failaddr_t fa; + int sz; + + fa = failaddr ? failaddr : __return_address; + + xfs_alert(mp, "Metadata %s detected at %pS, inode 0x%llx %s", + error == -EFSBADCRC ? "CRC error" : "corruption", + fa, ip->i_ino, name); + + xfs_alert(mp, "Unmount and run xfs_repair"); + + if (buf && xfs_error_level >= XFS_ERRLEVEL_LOW) { + sz = min_t(size_t, XFS_CORRUPTION_DUMP_LEN, bufsz); + xfs_alert(mp, "First %d bytes of corrupted metadata buffer:", + sz); + xfs_hex_dump(buf, sz); + } + + if (xfs_error_level >= XFS_ERRLEVEL_HIGH) + xfs_stack_trace(); +} diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index a3ba05bd983d..7e728c5a46b8 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -28,6 +28,9 @@ extern void xfs_corruption_error(const char *tag, int level, int linenum, xfs_failaddr_t failaddr); extern void xfs_verifier_error(struct xfs_buf *bp, int error, xfs_failaddr_t failaddr); +extern void xfs_inode_verifier_error(struct xfs_inode *ip, int error, + const char *name, void *buf, size_t bufsz, + xfs_failaddr_t failaddr); #define XFS_ERROR_REPORT(e, lvl, mp) \ xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 4ea6476bcbd7..5366fb619db6 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3486,21 +3486,23 @@ bool xfs_inode_verify_forks( struct xfs_inode *ip) { + struct xfs_ifork *ifp; xfs_failaddr_t fa; fa = xfs_ifork_verify_data(ip, &xfs_default_ifork_ops); if (fa) { - xfs_alert(ip->i_mount, - "%s: bad inode %llu inline data fork at %pS", - __func__, ip->i_ino, fa); + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + xfs_inode_verifier_error(ip, -EFSCORRUPTED, "data fork", + ifp->if_u1.if_data, ifp->if_bytes, fa); return false; } fa = xfs_ifork_verify_attr(ip, &xfs_default_ifork_ops); if (fa) { - xfs_alert(ip->i_mount, - "%s: bad inode %llu inline attr fork at %pS", - __func__, ip->i_ino, fa); + ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK); + xfs_inode_verifier_error(ip, -EFSCORRUPTED, "attr fork", + ifp ? ifp->if_u1.if_data : NULL, + ifp ? ifp->if_bytes : 0, fa); return false; } return true; -- cgit v1.2.3 From 751f3767c245f9adf4f0a4f8f04aae9ae1d675a0 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 25 Jan 2018 13:58:13 -0800 Subject: xfs: refactor accounting updates out of xfs_bmap_btalloc Move all the inode and quota accounting updates out of xfs_bmap_btalloc in preparation for fixing some quota accounting problems with copy on write. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Brian Foster --- fs/xfs/libxfs/xfs_bmap.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 0c9c9cdd532a..4582f5547ae3 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3337,6 +3337,22 @@ xfs_bmap_btalloc_filestreams( return 0; } +/* Update all inode and quota accounting for the allocation we just did. */ +static void +xfs_bmap_btalloc_accounting( + struct xfs_bmalloca *ap, + struct xfs_alloc_arg *args) +{ + if (!(ap->flags & XFS_BMAPI_COWFORK)) + ap->ip->i_d.di_nblocks += args->len; + xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); + if (ap->wasdel) + ap->ip->i_delayed_blks -= args->len; + xfs_trans_mod_dquot_byino(ap->tp, ap->ip, + ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT, + args->len); +} + STATIC int xfs_bmap_btalloc( struct xfs_bmalloca *ap) /* bmap alloc argument struct */ @@ -3571,19 +3587,7 @@ xfs_bmap_btalloc( *ap->firstblock = args.fsbno; ASSERT(nullfb || fb_agno <= args.agno); ap->length = args.len; - if (!(ap->flags & XFS_BMAPI_COWFORK)) - ap->ip->i_d.di_nblocks += args.len; - xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); - if (ap->wasdel) - ap->ip->i_delayed_blks -= args.len; - /* - * Adjust the disk quota also. This was reserved - * earlier. - */ - xfs_trans_mod_dquot_byino(ap->tp, ap->ip, - ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : - XFS_TRANS_DQ_BCOUNT, - (long) args.len); + xfs_bmap_btalloc_accounting(ap, &args); } else { ap->blkno = NULLFSBLOCK; ap->length = 0; -- cgit v1.2.3 From acd1d71598f7654b6d7718bcbe979992295c672a Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 26 Jan 2018 11:24:40 -0800 Subject: xfs: preserve i_rdev when recycling a reclaimable inode Commit 66f364649d870 ("xfs: remove if_rdev") moved storing of rdev value for special inodes to VFS inodes, but forgot to preserve the value of i_rdev when recycling a reclaimable xfs_inode. This was detected by xfstest overlay/017 with inodex=on mount option and xfs base fs. The test does a lookup of overlay chardev and blockdev right after drop caches. Overlayfs inodes hold a reference on underlying xfs inodes when mount option index=on is configured. If drop caches reclaim xfs inodes, before it relclaims overlayfs inodes, that can sometimes leave a reclaimable xfs inode and that test hits that case quite often. When that happens, the xfs inode cache remains broken (zere i_rdev) until the next cycle mount or drop caches. Fixes: 66f364649d870 ("xfs: remove if_rdev") Signed-off-by: Amir Goldstein Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_icache.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 2da7a2ee34bc..73bbeac739ed 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -295,6 +295,7 @@ xfs_reinit_inode( uint32_t generation = inode->i_generation; uint64_t version = inode->i_version; umode_t mode = inode->i_mode; + dev_t dev = inode->i_rdev; error = inode_init_always(mp->m_super, inode); @@ -302,6 +303,7 @@ xfs_reinit_inode( inode->i_generation = generation; inode->i_version = version; inode->i_mode = mode; + inode->i_rdev = dev; return error; } -- cgit v1.2.3 From 70c57dcd606f218b507372a05e633b23351258f0 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 24 Jan 2018 20:48:53 -0800 Subject: xfs: skip CoW writes past EOF when writeback races with truncate Every so often we blow the ASSERT(type != XFS_IO_COW) in xfs_map_blocks when running fsstress, as we do in generic/269. The cause of this is writeback racing with truncate -- writeback doesn't take the iolock, so truncate can sneak in to decrease i_size and truncate page cache while writeback is gathering buffer heads to schedule writeout. If we hit this race on a block that has a CoW mapping, we'll get a valid imap from the CoW fork but the reduced i_size trims the mapping to zero length (which makes it invalid), so we call xfs_map_blocks to try again. This doesn't do much anyway, since any mapping we get out of that will also be invalid, so we might as well skip the assert and just stop. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_aops.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 2e094c76bd45..9c6a830da0ee 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -390,6 +390,19 @@ xfs_map_blocks( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; + /* + * Truncate can race with writeback since writeback doesn't take the + * iolock and truncate decreases the file size before it starts + * truncating the pages between new_size and old_size. Therefore, we + * can end up in the situation where writeback gets a CoW fork mapping + * but the truncate makes the mapping invalid and we end up in here + * trying to get a new mapping. Bail out here so that we simply never + * get a valid mapping and so we drop the write altogether. The page + * truncation will kill the contents anyway. + */ + if (type == XFS_IO_COW && offset > i_size_read(inode)) + return 0; + ASSERT(type != XFS_IO_COW); if (type == XFS_IO_UNWRITTEN) bmapi_flags |= XFS_BMAPI_IGSTATE; -- cgit v1.2.3 From c47b74fb2dba46642fc9c2581a28893b42a42815 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 23 Jan 2018 11:17:47 -0800 Subject: xfs: don't clobber inobt/finobt cursors when xref with rmap Even if we can't use the inobt/finobt cursors to count the number of inode btree blocks, we are never allowed to clobber the cursor of the btree being checked, so don't do this. Found by fuzzing level = ones in xfs/364. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/scrub/ialloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c index 21c850abbafd..63ab3f98430d 100644 --- a/fs/xfs/scrub/ialloc.c +++ b/fs/xfs/scrub/ialloc.c @@ -391,12 +391,12 @@ xfs_scrub_iallocbt_xref_rmap_btreeblks( /* Check that we saw as many inobt blocks as the rmap says. */ error = xfs_btree_count_blocks(sc->sa.ino_cur, &inobt_blocks); - if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.ino_cur)) + if (!xfs_scrub_process_error(sc, 0, 0, &error)) return; if (sc->sa.fino_cur) { error = xfs_btree_count_blocks(sc->sa.fino_cur, &finobt_blocks); - if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur)) + if (!xfs_scrub_process_error(sc, 0, 0, &error)) return; } -- cgit v1.2.3 From 1364b1d4b5df31a05b6a3e6fdeb5371dbd4bd8ac Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 18 Jan 2018 13:55:20 -0800 Subject: xfs: reflink should break pnfs leases before sharing blocks Before we share blocks between files, we need to break the pnfs leases on the layout before we start slicing and dicing the block map. The structure of this function sets us up for the lock contention reduction in the next patch. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_reflink.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index bcc2ad4f0899..bac464f0bc59 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1244,6 +1244,50 @@ err: return error; } +/* + * Grab the exclusive iolock for a data copy from src to dest, making + * sure to abide vfs locking order (lowest pointer value goes first) and + * breaking the pnfs layout leases on dest before proceeding. The loop + * is needed because we cannot call the blocking break_layout() with the + * src iolock held, and therefore have to back out both locks. + */ +static int +xfs_iolock_two_inodes_and_break_layout( + struct inode *src, + struct inode *dest) +{ + int error; + +retry: + if (src < dest) { + inode_lock(src); + inode_lock_nested(dest, I_MUTEX_NONDIR2); + } else { + /* src >= dest */ + inode_lock(dest); + } + + error = break_layout(dest, false); + if (error == -EWOULDBLOCK) { + inode_unlock(dest); + if (src < dest) + inode_unlock(src); + error = break_layout(dest, true); + if (error) + return error; + goto retry; + } + if (error) { + inode_unlock(dest); + if (src < dest) + inode_unlock(src); + return error; + } + if (src > dest) + inode_lock_nested(src, I_MUTEX_NONDIR2); + return 0; +} + /* * Link a range of blocks from one file to another. */ @@ -1274,7 +1318,9 @@ xfs_reflink_remap_range( return -EIO; /* Lock both files against IO */ - lock_two_nondirectories(inode_in, inode_out); + ret = xfs_iolock_two_inodes_and_break_layout(inode_in, inode_out); + if (ret) + return ret; if (same_inode) xfs_ilock(src, XFS_MMAPLOCK_EXCL); else -- cgit v1.2.3 From 7c2d238ac6c435c07780a54719760da2beb46a43 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 26 Jan 2018 15:27:33 -0800 Subject: xfs: allow xfs_lock_two_inodes to take different EXCL/SHARED modes Refactor xfs_lock_two_inodes to take separate locking modes for each inode. Specifically, this enables us to take a SHARED lock on one inode and an EXCL lock on the other. The lock class (MMAPLOCK/ILOCK) must be the same for each inode. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_bmap_util.c | 4 ++-- fs/xfs/xfs_inode.c | 49 ++++++++++++++++++++++++++++++++----------------- fs/xfs/xfs_inode.h | 3 ++- fs/xfs/xfs_reflink.c | 5 +++-- 4 files changed, 39 insertions(+), 22 deletions(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 6d37ab43195f..c83f549dc17b 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1872,7 +1872,7 @@ xfs_swap_extents( */ lock_two_nondirectories(VFS_I(ip), VFS_I(tip)); lock_flags = XFS_MMAPLOCK_EXCL; - xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL); + xfs_lock_two_inodes(ip, XFS_MMAPLOCK_EXCL, tip, XFS_MMAPLOCK_EXCL); /* Verify that both files have the same format */ if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) { @@ -1919,7 +1919,7 @@ xfs_swap_extents( * Lock and join the inodes to the tansaction so that transaction commit * or cancel will unlock the inodes from this point onwards. */ - xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); + xfs_lock_two_inodes(ip, XFS_ILOCK_EXCL, tip, XFS_ILOCK_EXCL); lock_flags |= XFS_ILOCK_EXCL; xfs_trans_ijoin(tp, ip, 0); xfs_trans_ijoin(tp, tip, 0); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 5366fb619db6..e7f6d5291a7a 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -546,23 +546,36 @@ again: /* * xfs_lock_two_inodes() can only be used to lock one type of lock at a time - - * the iolock, the mmaplock or the ilock, but not more than one at a time. If we - * lock more than one at a time, lockdep will report false positives saying we - * have violated locking orders. + * the mmaplock or the ilock, but not more than one type at a time. If we lock + * more than one at a time, lockdep will report false positives saying we have + * violated locking orders. The iolock must be double-locked separately since + * we use i_rwsem for that. We now support taking one lock EXCL and the other + * SHARED. */ void xfs_lock_two_inodes( - xfs_inode_t *ip0, - xfs_inode_t *ip1, - uint lock_mode) + struct xfs_inode *ip0, + uint ip0_mode, + struct xfs_inode *ip1, + uint ip1_mode) { - xfs_inode_t *temp; + struct xfs_inode *temp; + uint mode_temp; int attempts = 0; xfs_log_item_t *lp; - ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))); - if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) - ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); + ASSERT(hweight32(ip0_mode) == 1); + ASSERT(hweight32(ip1_mode) == 1); + ASSERT(!(ip0_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))); + ASSERT(!(ip1_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))); + ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || + !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); + ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || + !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); + ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || + !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); + ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || + !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); ASSERT(ip0->i_ino != ip1->i_ino); @@ -570,10 +583,13 @@ xfs_lock_two_inodes( temp = ip0; ip0 = ip1; ip1 = temp; + mode_temp = ip0_mode; + ip0_mode = ip1_mode; + ip1_mode = mode_temp; } again: - xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0)); + xfs_ilock(ip0, xfs_lock_inumorder(ip0_mode, 0)); /* * If the first lock we have locked is in the AIL, we must TRY to get @@ -582,18 +598,17 @@ xfs_lock_two_inodes( */ lp = (xfs_log_item_t *)ip0->i_itemp; if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { - if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) { - xfs_iunlock(ip0, lock_mode); + if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) { + xfs_iunlock(ip0, ip0_mode); if ((++attempts % 5) == 0) delay(1); /* Don't just spin the CPU */ goto again; } } else { - xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1)); + xfs_ilock(ip1, xfs_lock_inumorder(ip1_mode, 1)); } } - void __xfs_iflock( struct xfs_inode *ip) @@ -1421,7 +1436,7 @@ xfs_link( if (error) goto std_return; - xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); + xfs_lock_two_inodes(sip, XFS_ILOCK_EXCL, tdp, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); @@ -2585,7 +2600,7 @@ xfs_remove( goto std_return; } - xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); + xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 386b0bb3c92a..3e8dc990d41c 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -423,7 +423,8 @@ void xfs_iunpin_wait(xfs_inode_t *); #define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) int xfs_iflush(struct xfs_inode *, struct xfs_buf **); -void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); +void xfs_lock_two_inodes(struct xfs_inode *ip0, uint ip0_mode, + struct xfs_inode *ip1, uint ip1_mode); xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index bac464f0bc59..bcc58c24287c 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -944,7 +944,7 @@ xfs_reflink_set_inode_flag( if (src->i_ino == dest->i_ino) xfs_ilock(src, XFS_ILOCK_EXCL); else - xfs_lock_two_inodes(src, dest, XFS_ILOCK_EXCL); + xfs_lock_two_inodes(src, XFS_ILOCK_EXCL, dest, XFS_ILOCK_EXCL); if (!xfs_is_reflink_inode(src)) { trace_xfs_reflink_set_inode_flag(src); @@ -1324,7 +1324,8 @@ xfs_reflink_remap_range( if (same_inode) xfs_ilock(src, XFS_MMAPLOCK_EXCL); else - xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); + xfs_lock_two_inodes(src, XFS_MMAPLOCK_EXCL, dest, + XFS_MMAPLOCK_EXCL); /* Check file eligibility and prepare for block sharing. */ ret = -EINVAL; -- cgit v1.2.3 From 01c2e13dcae9757ea987af8933f9fcc6e33f2d7c Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 18 Jan 2018 14:07:53 -0800 Subject: xfs: only grab shared inode locks for source file during reflink Reflink and dedupe operations remap blocks from a source file into a destination file. The destination file needs exclusive locks on all levels because we're updating its block map, but the source file isn't undergoing any block map changes so we can use a shared lock. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_reflink.c | 25 +++++++++++++++---------- include/linux/fs.h | 5 +++++ 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index bcc58c24287c..85a119e1463b 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1202,13 +1202,16 @@ xfs_reflink_remap_blocks( /* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */ while (len) { + uint lock_mode; + trace_xfs_reflink_remap_blocks_loop(src, srcoff, len, dest, destoff); + /* Read extent from the source file */ nimaps = 1; - xfs_ilock(src, XFS_ILOCK_EXCL); + lock_mode = xfs_ilock_data_map_shared(src); error = xfs_bmapi_read(src, srcoff, len, &imap, &nimaps, 0); - xfs_iunlock(src, XFS_ILOCK_EXCL); + xfs_iunlock(src, lock_mode); if (error) goto err; ASSERT(nimaps == 1); @@ -1260,7 +1263,7 @@ xfs_iolock_two_inodes_and_break_layout( retry: if (src < dest) { - inode_lock(src); + inode_lock_shared(src); inode_lock_nested(dest, I_MUTEX_NONDIR2); } else { /* src >= dest */ @@ -1271,7 +1274,7 @@ retry: if (error == -EWOULDBLOCK) { inode_unlock(dest); if (src < dest) - inode_unlock(src); + inode_unlock_shared(src); error = break_layout(dest, true); if (error) return error; @@ -1280,11 +1283,11 @@ retry: if (error) { inode_unlock(dest); if (src < dest) - inode_unlock(src); + inode_unlock_shared(src); return error; } if (src > dest) - inode_lock_nested(src, I_MUTEX_NONDIR2); + inode_lock_shared_nested(src, I_MUTEX_NONDIR2); return 0; } @@ -1324,7 +1327,7 @@ xfs_reflink_remap_range( if (same_inode) xfs_ilock(src, XFS_MMAPLOCK_EXCL); else - xfs_lock_two_inodes(src, XFS_MMAPLOCK_EXCL, dest, + xfs_lock_two_inodes(src, XFS_MMAPLOCK_SHARED, dest, XFS_MMAPLOCK_EXCL); /* Check file eligibility and prepare for block sharing. */ @@ -1393,10 +1396,12 @@ xfs_reflink_remap_range( is_dedupe); out_unlock: - xfs_iunlock(src, XFS_MMAPLOCK_EXCL); + xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); + if (!same_inode) + xfs_iunlock(src, XFS_MMAPLOCK_SHARED); + inode_unlock(inode_out); if (!same_inode) - xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); - unlock_two_nondirectories(inode_in, inode_out); + inode_unlock_shared(inode_in); if (ret) trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); return ret; diff --git a/include/linux/fs.h b/include/linux/fs.h index 7f8d96d68f34..5cbeab8a63ca 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -748,6 +748,11 @@ static inline void inode_lock_nested(struct inode *inode, unsigned subclass) down_write_nested(&inode->i_rwsem, subclass); } +static inline void inode_lock_shared_nested(struct inode *inode, unsigned subclass) +{ + down_read_nested(&inode->i_rwsem, subclass); +} + void lock_two_nondirectories(struct inode *, struct inode*); void unlock_two_nondirectories(struct inode *, struct inode*); -- cgit v1.2.3 From 4b4c1326fd7c7210d23d9dd3bfc51f2b6477bb9e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 19 Jan 2018 09:05:48 -0800 Subject: xfs: treat CoW fork operations as delalloc for quota accounting Since the CoW fork only exists in memory, it is incorrect to update the on-disk quota block counts when we modify the CoW fork. Unlike the data fork, even real extents in the CoW fork are only delalloc-style reservations (on-disk they're owned by the refcountbt) so they must not be tracked in the on disk quota info. Ensure the i_delayed_blks accounting reflects this too. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_bmap.c | 32 ++++++++++++++++++++++++++++++-- fs/xfs/xfs_reflink.c | 15 +++++++++++---- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 4582f5547ae3..cad21fd0c45d 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3343,8 +3343,35 @@ xfs_bmap_btalloc_accounting( struct xfs_bmalloca *ap, struct xfs_alloc_arg *args) { - if (!(ap->flags & XFS_BMAPI_COWFORK)) - ap->ip->i_d.di_nblocks += args->len; + if (ap->flags & XFS_BMAPI_COWFORK) { + /* + * COW fork blocks are in-core only and thus are treated as + * in-core quota reservation (like delalloc blocks) even when + * converted to real blocks. The quota reservation is not + * accounted to disk until blocks are remapped to the data + * fork. So if these blocks were previously delalloc, we + * already have quota reservation and there's nothing to do + * yet. + */ + if (ap->wasdel) + return; + + /* + * Otherwise, we've allocated blocks in a hole. The transaction + * has acquired in-core quota reservation for this extent. + * Rather than account these as real blocks, however, we reduce + * the transaction quota reservation based on the allocation. + * This essentially transfers the transaction quota reservation + * to that of a delalloc extent. + */ + ap->ip->i_delayed_blks += args->len; + xfs_trans_mod_dquot_byino(ap->tp, ap->ip, XFS_TRANS_DQ_RES_BLKS, + -(long)args->len); + return; + } + + /* data/attr fork only */ + ap->ip->i_d.di_nblocks += args->len; xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); if (ap->wasdel) ap->ip->i_delayed_blks -= args->len; @@ -4820,6 +4847,7 @@ xfs_bmap_del_extent_cow( xfs_iext_insert(ip, icur, &new, state); break; } + ip->i_delayed_blks -= del->br_blockcount; } /* diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 85a119e1463b..c4f0aff86f96 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -599,10 +599,6 @@ xfs_reflink_cancel_cow_blocks( del.br_startblock, del.br_blockcount, NULL); - /* Update quota accounting */ - xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT, - -(long)del.br_blockcount); - /* Roll the transaction */ xfs_defer_ijoin(&dfops, ip); error = xfs_defer_finish(tpp, &dfops); @@ -613,6 +609,13 @@ xfs_reflink_cancel_cow_blocks( /* Remove the mapping from the CoW fork. */ xfs_bmap_del_extent_cow(ip, &icur, &got, &del); + + /* Remove the quota reservation */ + error = xfs_trans_reserve_quota_nblks(NULL, ip, + -(long)del.br_blockcount, 0, + XFS_QMOPT_RES_REGBLKS); + if (error) + break; } else { /* Didn't do anything, push cursor back. */ xfs_iext_prev(ifp, &icur); @@ -795,6 +798,10 @@ xfs_reflink_end_cow( if (error) goto out_defer; + /* Charge this new data fork mapping to the on-disk quota. */ + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_DELBCOUNT, + (long)del.br_blockcount); + /* Remove the mapping from the CoW fork. */ xfs_bmap_del_extent_cow(ip, &icur, &got, &del); -- cgit v1.2.3 From 0c6dda7a1cbd587e48bcef1999875e29549c2b41 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 26 Jan 2018 11:11:20 -0800 Subject: iomap: warn on zero-length mappings Don't let the iomap callback get away with feeding us a garbage zero length mapping -- there was a bug in xfs that resulted in those leaking out to hilarious effect. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/iomap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/iomap.c b/fs/iomap.c index e5de7725f18a..afd163586aa0 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -65,6 +65,8 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags, return ret; if (WARN_ON(iomap.offset > pos)) return -EIO; + if (WARN_ON(iomap.length == 0)) + return -EIO; /* * Cut down the length to the one actually provided by the filesystem, -- cgit v1.2.3 From 9f37bd11b442dc7c79d8979ecf627c059bc6bfe7 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 26 Jan 2018 11:37:44 -0800 Subject: xfs: check reflink allocation mappings There's a really bad bug in xfs_reflink_allocate_cow -- if bmapi_write can return a zero error code but no mappings. This happens if there's an extent size hint (which causes allocation requests to be rounded to extsz granularity internally), but there wasn't a big enough chunk of free space to start filling at the extsz granularity and fill even one block of the range that we actually requested. In any case, if we got no mappings we can't possibly do anything useful with the contents of imap, so we must bail out with ENOSPC here. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_reflink.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index c4f0aff86f96..270246943a06 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -464,6 +464,13 @@ retry: error = xfs_trans_commit(tp); if (error) return error; + + /* + * Allocation succeeded but the requested range was not even partially + * satisfied? Bail out! + */ + if (nimaps == 0) + return -ENOSPC; convert: return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb, &dfops); -- cgit v1.2.3 From 6d8a45ce29c7d67cc4fc3016dc2a07660c62482a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 19 Jan 2018 17:47:36 -0800 Subject: xfs: don't screw up direct writes when freesp is fragmented xfs_bmap_btalloc is given a range of file offset blocks that must be allocated to some data/attr/cow fork. If the fork has an extent size hint associated with it, the request will be enlarged on both ends to try to satisfy the alignment hint. If free space is fragmentated, sometimes we can allocate some blocks but not enough to fulfill any of the requested range. Since bmapi_allocate always trims the new extent mapping to match the originally requested range, this results in bmapi_write returning zero and no mapping. The consequences of this vary -- buffered writes will simply re-call bmapi_write until it can satisfy at least one block from the original request. Direct IO overwrites notice nmaps == 0 and return -ENOSPC through the dio mechanism out to userspace with the weird result that writes fail even when we have enough space because the ENOSPC return overrides any partial write status. For direct CoW writes the situation was disastrous because nobody notices us returning an invalid zero-length wrong-offset mapping to iomap and the write goes off into space. Therefore, if free space is so fragmented that we managed to allocate some space but not enough to map into even a single block of the original allocation request range, we should break the alignment hint in order to guarantee at least some forward progress for the direct write. If we return a short allocation to iomap_apply it'll call back about the remaining blocks. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_bmap.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index cad21fd0c45d..daae00ed30c5 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3390,6 +3390,8 @@ xfs_bmap_btalloc( xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ xfs_agnumber_t ag; xfs_alloc_arg_t args; + xfs_fileoff_t orig_offset; + xfs_extlen_t orig_length; xfs_extlen_t blen; xfs_extlen_t nextminlen = 0; int nullfb; /* true if ap->firstblock isn't set */ @@ -3399,6 +3401,8 @@ xfs_bmap_btalloc( int stripe_align; ASSERT(ap->length); + orig_offset = ap->offset; + orig_length = ap->length; mp = ap->ip->i_mount; @@ -3614,6 +3618,22 @@ xfs_bmap_btalloc( *ap->firstblock = args.fsbno; ASSERT(nullfb || fb_agno <= args.agno); ap->length = args.len; + /* + * If the extent size hint is active, we tried to round the + * caller's allocation request offset down to extsz and the + * length up to another extsz boundary. If we found a free + * extent we mapped it in starting at this new offset. If the + * newly mapped space isn't long enough to cover any of the + * range of offsets that was originally requested, move the + * mapping up so that we can fill as much of the caller's + * original request as possible. Free space is apparently + * very fragmented so we're unlikely to be able to satisfy the + * hints anyway. + */ + if (ap->length <= orig_length) + ap->offset = orig_offset; + else if (ap->offset + ap->length < orig_offset + orig_length) + ap->offset = orig_offset + orig_length - ap->length; xfs_bmap_btalloc_accounting(ap, &args); } else { ap->blkno = NULLFSBLOCK; -- cgit v1.2.3 From 1e369b0e199bbfbab5218e1c1443d839700d8884 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 Jan 2018 13:30:08 -0800 Subject: xfs: remove experimental tag for reflinks But reject reflink + DAX file systems for now until the code to support reflinks on DAX is actually implemented. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong [darrick: port to 4.16] Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_super.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index a66335599c7d..f3e0001f9992 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1668,7 +1668,7 @@ xfs_fs_fill_super( } if (xfs_sb_version_hasreflink(&mp->m_sb)) xfs_alert(mp, - "DAX and reflink have not been tested together!"); + "DAX and reflink cannot be used together!"); } if (mp->m_flags & XFS_MOUNT_DISCARD) { @@ -1692,10 +1692,6 @@ xfs_fs_fill_super( "EXPERIMENTAL reverse mapping btree feature enabled. Use at your own risk!"); } - if (xfs_sb_version_hasreflink(&mp->m_sb)) - xfs_alert(mp, - "EXPERIMENTAL reflink feature enabled. Use at your own risk!"); - error = xfs_mountfs(mp); if (error) goto out_filestream_unmount; -- cgit v1.2.3