diff options
Diffstat (limited to 'fs/xfs')
151 files changed, 4151 insertions, 3279 deletions
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index e986b95d94c9..6f49bf39183c 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -29,67 +29,3 @@ kmem_alloc(size_t size, xfs_km_flags_t flags) congestion_wait(BLK_RW_ASYNC, HZ/50); } while (1); } - - -/* - * __vmalloc() will allocate data pages and auxiliary structures (e.g. - * pagetables) with GFP_KERNEL, yet we may be under GFP_NOFS context here. Hence - * we need to tell memory reclaim that we are in such a context via - * PF_MEMALLOC_NOFS to prevent memory reclaim re-entering the filesystem here - * and potentially deadlocking. - */ -static void * -__kmem_vmalloc(size_t size, xfs_km_flags_t flags) -{ - unsigned nofs_flag = 0; - void *ptr; - gfp_t lflags = kmem_flags_convert(flags); - - if (flags & KM_NOFS) - nofs_flag = memalloc_nofs_save(); - - ptr = __vmalloc(size, lflags); - - if (flags & KM_NOFS) - memalloc_nofs_restore(nofs_flag); - - return ptr; -} - -/* - * Same as kmem_alloc_large, except we guarantee the buffer returned is aligned - * to the @align_mask. We only guarantee alignment up to page size, we'll clamp - * alignment at page size if it is larger. vmalloc always returns a PAGE_SIZE - * aligned region. - */ -void * -kmem_alloc_io(size_t size, int align_mask, xfs_km_flags_t flags) -{ - void *ptr; - - trace_kmem_alloc_io(size, flags, _RET_IP_); - - if (WARN_ON_ONCE(align_mask >= PAGE_SIZE)) - align_mask = PAGE_SIZE - 1; - - ptr = kmem_alloc(size, flags | KM_MAYFAIL); - if (ptr) { - if (!((uintptr_t)ptr & align_mask)) - return ptr; - kfree(ptr); - } - return __kmem_vmalloc(size, flags); -} - -void * -kmem_alloc_large(size_t size, xfs_km_flags_t flags) -{ - void *ptr; - - trace_kmem_alloc_large(size, flags, _RET_IP_); - - ptr = kmem_alloc(size, flags | KM_MAYFAIL); - if (ptr) - return ptr; - return __kmem_vmalloc(size, flags); -} diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 38007117697e..54da6d717a06 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -57,8 +57,6 @@ kmem_flags_convert(xfs_km_flags_t flags) } extern void *kmem_alloc(size_t, xfs_km_flags_t); -extern void *kmem_alloc_io(size_t size, int align_mask, xfs_km_flags_t flags); -extern void *kmem_alloc_large(size_t size, xfs_km_flags_t); static inline void kmem_free(const void *ptr) { kvfree(ptr); diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index ee9ec0c50bec..005abfd9fd34 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -313,7 +313,6 @@ xfs_get_aghdr_buf( if (error) return error; - bp->b_bn = blkno; bp->b_maps[0].bm_bn = blkno; bp->b_ops = ops; @@ -469,7 +468,7 @@ xfs_rmaproot_init( rrec->rm_offset = 0; /* account for refc btree root */ - if (xfs_sb_version_hasreflink(&mp->m_sb)) { + if (xfs_has_reflink(mp)) { rrec = XFS_RMAP_REC_ADDR(block, 5); rrec->rm_startblock = cpu_to_be32(xfs_refc_block(mp)); rrec->rm_blockcount = cpu_to_be32(1); @@ -528,7 +527,7 @@ xfs_agfblock_init( agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp)); agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1); agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1); - if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { + if (xfs_has_rmapbt(mp)) { agf->agf_roots[XFS_BTNUM_RMAPi] = cpu_to_be32(XFS_RMAP_BLOCK(mp)); agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1); @@ -541,9 +540,9 @@ xfs_agfblock_init( tmpsize = id->agsize - mp->m_ag_prealloc_blocks; agf->agf_freeblks = cpu_to_be32(tmpsize); agf->agf_longest = cpu_to_be32(tmpsize); - if (xfs_sb_version_hascrc(&mp->m_sb)) + if (xfs_has_crc(mp)) uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid); - if (xfs_sb_version_hasreflink(&mp->m_sb)) { + if (xfs_has_reflink(mp)) { agf->agf_refcount_root = cpu_to_be32( xfs_refc_block(mp)); agf->agf_refcount_level = cpu_to_be32(1); @@ -569,7 +568,7 @@ xfs_agflblock_init( __be32 *agfl_bno; int bucket; - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC); agfl->agfl_seqno = cpu_to_be32(id->agno); uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid); @@ -599,17 +598,17 @@ xfs_agiblock_init( agi->agi_freecount = 0; agi->agi_newino = cpu_to_be32(NULLAGINO); agi->agi_dirino = cpu_to_be32(NULLAGINO); - if (xfs_sb_version_hascrc(&mp->m_sb)) + if (xfs_has_crc(mp)) uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid); - if (xfs_sb_version_hasfinobt(&mp->m_sb)) { + if (xfs_has_finobt(mp)) { agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp)); agi->agi_free_level = cpu_to_be32(1); } for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); - if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) { + if (xfs_has_inobtcounts(mp)) { agi->agi_iblocks = cpu_to_be32(1); - if (xfs_sb_version_hasfinobt(&mp->m_sb)) + if (xfs_has_finobt(mp)) agi->agi_fblocks = cpu_to_be32(1); } } @@ -719,14 +718,14 @@ xfs_ag_init_headers( .ops = &xfs_finobt_buf_ops, .work = &xfs_btroot_init, .type = XFS_BTNUM_FINO, - .need_init = xfs_sb_version_hasfinobt(&mp->m_sb) + .need_init = xfs_has_finobt(mp) }, { /* RMAP root block */ .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_RMAP_BLOCK(mp)), .numblks = BTOBB(mp->m_sb.sb_blocksize), .ops = &xfs_rmapbt_buf_ops, .work = &xfs_rmaproot_init, - .need_init = xfs_sb_version_hasrmapbt(&mp->m_sb) + .need_init = xfs_has_rmapbt(mp) }, { /* REFC root block */ .daddr = XFS_AGB_TO_DADDR(mp, id->agno, xfs_refc_block(mp)), @@ -734,7 +733,7 @@ xfs_ag_init_headers( .ops = &xfs_refcountbt_buf_ops, .work = &xfs_btroot_init, .type = XFS_BTNUM_REFC, - .need_init = xfs_sb_version_hasreflink(&mp->m_sb) + .need_init = xfs_has_reflink(mp) }, { /* NULL terminating block */ .daddr = XFS_BUF_DADDR_NULL, diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 6929157d8d6e..95157f5a5a6c 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -51,7 +51,7 @@ xfs_agfl_size( { unsigned int size = mp->m_sb.sb_sectsize; - if (xfs_sb_version_hascrc(&mp->m_sb)) + if (xfs_has_crc(mp)) size -= sizeof(struct xfs_agfl); return size / sizeof(xfs_agblock_t); @@ -61,9 +61,9 @@ unsigned int xfs_refc_block( struct xfs_mount *mp) { - if (xfs_sb_version_hasrmapbt(&mp->m_sb)) + if (xfs_has_rmapbt(mp)) return XFS_RMAP_BLOCK(mp) + 1; - if (xfs_sb_version_hasfinobt(&mp->m_sb)) + if (xfs_has_finobt(mp)) return XFS_FIBT_BLOCK(mp) + 1; return XFS_IBT_BLOCK(mp) + 1; } @@ -72,11 +72,11 @@ xfs_extlen_t xfs_prealloc_blocks( struct xfs_mount *mp) { - if (xfs_sb_version_hasreflink(&mp->m_sb)) + if (xfs_has_reflink(mp)) return xfs_refc_block(mp) + 1; - if (xfs_sb_version_hasrmapbt(&mp->m_sb)) + if (xfs_has_rmapbt(mp)) return XFS_RMAP_BLOCK(mp) + 1; - if (xfs_sb_version_hasfinobt(&mp->m_sb)) + if (xfs_has_finobt(mp)) return XFS_FIBT_BLOCK(mp) + 1; return XFS_IBT_BLOCK(mp) + 1; } @@ -126,11 +126,11 @@ xfs_alloc_ag_max_usable( blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */ blocks += XFS_ALLOC_AGFL_RESERVE; blocks += 3; /* AGF, AGI btree root blocks */ - if (xfs_sb_version_hasfinobt(&mp->m_sb)) + if (xfs_has_finobt(mp)) blocks++; /* finobt root block */ - if (xfs_sb_version_hasrmapbt(&mp->m_sb)) + if (xfs_has_rmapbt(mp)) blocks++; /* rmap root block */ - if (xfs_sb_version_hasreflink(&mp->m_sb)) + if (xfs_has_reflink(mp)) blocks++; /* refcount root block */ return mp->m_sb.sb_agblocks - blocks; @@ -598,7 +598,7 @@ xfs_agfl_verify( * AGFL is what the AGF says is active. We can't get to the AGF, so we * can't verify just those entries are valid. */ - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_has_crc(mp)) return NULL; if (!xfs_verify_magic(bp, agfl->agfl_magicnum)) @@ -638,7 +638,7 @@ xfs_agfl_read_verify( * AGFL is what the AGF says is active. We can't get to the AGF, so we * can't verify just those entries are valid. */ - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_has_crc(mp)) return; if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) @@ -659,7 +659,7 @@ xfs_agfl_write_verify( xfs_failaddr_t fa; /* no verification of non-crc AGFLs */ - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_has_crc(mp)) return; fa = xfs_agfl_verify(bp); @@ -2264,7 +2264,7 @@ xfs_alloc_min_freelist( min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1, mp->m_ag_maxlevels); /* space needed reverse mapping used space btree */ - if (xfs_sb_version_hasrmapbt(&mp->m_sb)) + if (xfs_has_rmapbt(mp)) min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1, mp->m_rmap_maxlevels); @@ -2373,7 +2373,7 @@ xfs_agfl_needs_reset( int active; /* no agfl header on v4 supers */ - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_has_crc(mp)) return false; /* @@ -2877,7 +2877,7 @@ xfs_agf_verify( struct xfs_mount *mp = bp->b_mount; struct xfs_agf *agf = bp->b_addr; - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(agf->agf_lsn))) @@ -2907,12 +2907,12 @@ xfs_agf_verify( be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > mp->m_ag_maxlevels) return __this_address; - if (xfs_sb_version_hasrmapbt(&mp->m_sb) && + if (xfs_has_rmapbt(mp) && (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 || be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > mp->m_rmap_maxlevels)) return __this_address; - if (xfs_sb_version_hasrmapbt(&mp->m_sb) && + if (xfs_has_rmapbt(mp) && be32_to_cpu(agf->agf_rmap_blocks) > be32_to_cpu(agf->agf_length)) return __this_address; @@ -2925,16 +2925,16 @@ xfs_agf_verify( if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno) return __this_address; - if (xfs_sb_version_haslazysbcount(&mp->m_sb) && + if (xfs_has_lazysbcount(mp) && be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length)) return __this_address; - if (xfs_sb_version_hasreflink(&mp->m_sb) && + if (xfs_has_reflink(mp) && be32_to_cpu(agf->agf_refcount_blocks) > be32_to_cpu(agf->agf_length)) return __this_address; - if (xfs_sb_version_hasreflink(&mp->m_sb) && + if (xfs_has_reflink(mp) && (be32_to_cpu(agf->agf_refcount_level) < 1 || be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels)) return __this_address; @@ -2950,7 +2950,7 @@ xfs_agf_read_verify( struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; - if (xfs_sb_version_hascrc(&mp->m_sb) && + if (xfs_has_crc(mp) && !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) xfs_verifier_error(bp, -EFSBADCRC, __this_address); else { @@ -2975,7 +2975,7 @@ xfs_agf_write_verify( return; } - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_has_crc(mp)) return; if (bip) @@ -3073,13 +3073,13 @@ xfs_alloc_read_agf( * counter only tracks non-root blocks. */ allocbt_blks = pag->pagf_btreeblks; - if (xfs_sb_version_hasrmapbt(&mp->m_sb)) + if (xfs_has_rmapbt(mp)) allocbt_blks -= be32_to_cpu(agf->agf_rmap_blocks) - 1; if (allocbt_blks > 0) atomic64_add(allocbt_blks, &mp->m_allocbt_blks); } #ifdef DEBUG - else if (!XFS_FORCED_SHUTDOWN(mp)) { + else if (!xfs_is_shutdown(mp)) { ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks)); ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks)); ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); @@ -3166,7 +3166,7 @@ xfs_alloc_vextent( * the first a.g. fails. */ if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) && - (mp->m_flags & XFS_MOUNT_32BITINODES)) { + xfs_is_inode32(mp)) { args->fsbno = XFS_AGB_TO_FSB(mp, ((mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount), 0); @@ -3392,7 +3392,7 @@ struct xfs_alloc_query_range_info { STATIC int xfs_alloc_query_range_helper( struct xfs_btree_cur *cur, - union xfs_btree_rec *rec, + const union xfs_btree_rec *rec, void *priv) { struct xfs_alloc_query_range_info *query = priv; @@ -3407,8 +3407,8 @@ xfs_alloc_query_range_helper( int xfs_alloc_query_range( struct xfs_btree_cur *cur, - struct xfs_alloc_rec_incore *low_rec, - struct xfs_alloc_rec_incore *high_rec, + const struct xfs_alloc_rec_incore *low_rec, + const struct xfs_alloc_rec_incore *high_rec, xfs_alloc_query_range_fn fn, void *priv) { diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index e30900b6f8ba..df4aefaf0046 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -220,13 +220,13 @@ int xfs_free_extent_fix_freelist(struct xfs_trans *tp, struct xfs_perag *pag, xfs_extlen_t xfs_prealloc_blocks(struct xfs_mount *mp); typedef int (*xfs_alloc_query_range_fn)( - struct xfs_btree_cur *cur, - struct xfs_alloc_rec_incore *rec, - void *priv); + struct xfs_btree_cur *cur, + const struct xfs_alloc_rec_incore *rec, + void *priv); int xfs_alloc_query_range(struct xfs_btree_cur *cur, - struct xfs_alloc_rec_incore *low_rec, - struct xfs_alloc_rec_incore *high_rec, + const struct xfs_alloc_rec_incore *low_rec, + const struct xfs_alloc_rec_incore *high_rec, xfs_alloc_query_range_fn fn, void *priv); int xfs_alloc_query_all(struct xfs_btree_cur *cur, xfs_alloc_query_range_fn fn, void *priv); @@ -243,7 +243,7 @@ static inline __be32 * xfs_buf_to_agfl_bno( struct xfs_buf *bp) { - if (xfs_sb_version_hascrc(&bp->b_mount->m_sb)) + if (xfs_has_crc(bp->b_mount)) return bp->b_addr + sizeof(struct xfs_agfl); return bp->b_addr; } diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 6b363f78cfa2..6746fd735550 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -31,9 +31,9 @@ xfs_allocbt_dup_cursor( STATIC void xfs_allocbt_set_root( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *ptr, - int inc) + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *ptr, + int inc) { struct xfs_buf *agbp = cur->bc_ag.agbp; struct xfs_agf *agf = agbp->b_addr; @@ -50,10 +50,10 @@ xfs_allocbt_set_root( STATIC int xfs_allocbt_alloc_block( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *start, - union xfs_btree_ptr *new, - int *stat) + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int *stat) { int error; xfs_agblock_t bno; @@ -87,7 +87,7 @@ xfs_allocbt_free_block( xfs_agblock_t bno; int error; - bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp)); + bno = xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp)); error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1); if (error) return error; @@ -103,11 +103,11 @@ xfs_allocbt_free_block( */ STATIC void xfs_allocbt_update_lastrec( - struct xfs_btree_cur *cur, - struct xfs_btree_block *block, - union xfs_btree_rec *rec, - int ptr, - int reason) + struct xfs_btree_cur *cur, + const struct xfs_btree_block *block, + const union xfs_btree_rec *rec, + int ptr, + int reason) { struct xfs_agf *agf = cur->bc_ag.agbp->b_addr; struct xfs_perag *pag; @@ -177,8 +177,8 @@ xfs_allocbt_get_maxrecs( STATIC void xfs_allocbt_init_key_from_rec( - union xfs_btree_key *key, - union xfs_btree_rec *rec) + union xfs_btree_key *key, + const union xfs_btree_rec *rec) { key->alloc.ar_startblock = rec->alloc.ar_startblock; key->alloc.ar_blockcount = rec->alloc.ar_blockcount; @@ -186,10 +186,10 @@ xfs_allocbt_init_key_from_rec( STATIC void xfs_bnobt_init_high_key_from_rec( - union xfs_btree_key *key, - union xfs_btree_rec *rec) + union xfs_btree_key *key, + const union xfs_btree_rec *rec) { - __u32 x; + __u32 x; x = be32_to_cpu(rec->alloc.ar_startblock); x += be32_to_cpu(rec->alloc.ar_blockcount) - 1; @@ -199,8 +199,8 @@ xfs_bnobt_init_high_key_from_rec( STATIC void xfs_cntbt_init_high_key_from_rec( - union xfs_btree_key *key, - union xfs_btree_rec *rec) + union xfs_btree_key *key, + const union xfs_btree_rec *rec) { key->alloc.ar_blockcount = rec->alloc.ar_blockcount; key->alloc.ar_startblock = 0; @@ -229,23 +229,23 @@ xfs_allocbt_init_ptr_from_cur( STATIC int64_t xfs_bnobt_key_diff( - struct xfs_btree_cur *cur, - union xfs_btree_key *key) + struct xfs_btree_cur *cur, + const union xfs_btree_key *key) { - xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a; - xfs_alloc_key_t *kp = &key->alloc; + struct xfs_alloc_rec_incore *rec = &cur->bc_rec.a; + const struct xfs_alloc_rec *kp = &key->alloc; return (int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; } STATIC int64_t xfs_cntbt_key_diff( - struct xfs_btree_cur *cur, - union xfs_btree_key *key) + struct xfs_btree_cur *cur, + const union xfs_btree_key *key) { - xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a; - xfs_alloc_key_t *kp = &key->alloc; - int64_t diff; + struct xfs_alloc_rec_incore *rec = &cur->bc_rec.a; + const struct xfs_alloc_rec *kp = &key->alloc; + int64_t diff; diff = (int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount; if (diff) @@ -256,9 +256,9 @@ xfs_cntbt_key_diff( STATIC int64_t xfs_bnobt_diff_two_keys( - struct xfs_btree_cur *cur, - union xfs_btree_key *k1, - union xfs_btree_key *k2) + struct xfs_btree_cur *cur, + const union xfs_btree_key *k1, + const union xfs_btree_key *k2) { return (int64_t)be32_to_cpu(k1->alloc.ar_startblock) - be32_to_cpu(k2->alloc.ar_startblock); @@ -266,11 +266,11 @@ xfs_bnobt_diff_two_keys( STATIC int64_t xfs_cntbt_diff_two_keys( - struct xfs_btree_cur *cur, - union xfs_btree_key *k1, - union xfs_btree_key *k2) + struct xfs_btree_cur *cur, + const union xfs_btree_key *k1, + const union xfs_btree_key *k2) { - int64_t diff; + int64_t diff; diff = be32_to_cpu(k1->alloc.ar_blockcount) - be32_to_cpu(k2->alloc.ar_blockcount); @@ -295,7 +295,7 @@ xfs_allocbt_verify( if (!xfs_verify_magic(bp, block->bb_magic)) return __this_address; - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { fa = xfs_btree_sblock_v5hdr_verify(bp); if (fa) return fa; @@ -376,9 +376,9 @@ const struct xfs_buf_ops xfs_cntbt_buf_ops = { STATIC int xfs_bnobt_keys_inorder( - struct xfs_btree_cur *cur, - union xfs_btree_key *k1, - union xfs_btree_key *k2) + struct xfs_btree_cur *cur, + const union xfs_btree_key *k1, + const union xfs_btree_key *k2) { return be32_to_cpu(k1->alloc.ar_startblock) < be32_to_cpu(k2->alloc.ar_startblock); @@ -386,9 +386,9 @@ xfs_bnobt_keys_inorder( STATIC int xfs_bnobt_recs_inorder( - struct xfs_btree_cur *cur, - union xfs_btree_rec *r1, - union xfs_btree_rec *r2) + struct xfs_btree_cur *cur, + const union xfs_btree_rec *r1, + const union xfs_btree_rec *r2) { return be32_to_cpu(r1->alloc.ar_startblock) + be32_to_cpu(r1->alloc.ar_blockcount) <= @@ -397,9 +397,9 @@ xfs_bnobt_recs_inorder( STATIC int xfs_cntbt_keys_inorder( - struct xfs_btree_cur *cur, - union xfs_btree_key *k1, - union xfs_btree_key *k2) + struct xfs_btree_cur *cur, + const union xfs_btree_key *k1, + const union xfs_btree_key *k2) { return be32_to_cpu(k1->alloc.ar_blockcount) < be32_to_cpu(k2->alloc.ar_blockcount) || @@ -410,9 +410,9 @@ xfs_cntbt_keys_inorder( STATIC int xfs_cntbt_recs_inorder( - struct xfs_btree_cur *cur, - union xfs_btree_rec *r1, - union xfs_btree_rec *r2) + struct xfs_btree_cur *cur, + const union xfs_btree_rec *r1, + const union xfs_btree_rec *r2) { return be32_to_cpu(r1->alloc.ar_blockcount) < be32_to_cpu(r2->alloc.ar_blockcount) || @@ -498,7 +498,7 @@ xfs_allocbt_init_common( atomic_inc(&pag->pag_ref); cur->bc_ag.pag = pag; - if (xfs_sb_version_hascrc(&mp->m_sb)) + if (xfs_has_crc(mp)) cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; return cur; diff --git a/fs/xfs/libxfs/xfs_alloc_btree.h b/fs/xfs/libxfs/xfs_alloc_btree.h index 9eb4c667a6b8..2f6b816aaf9f 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.h +++ b/fs/xfs/libxfs/xfs_alloc_btree.h @@ -20,7 +20,7 @@ struct xbtree_afakeroot; * Btree block header size depends on a superblock flag. */ #define XFS_ALLOC_BLOCK_LEN(mp) \ - (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \ + (xfs_has_crc(((mp))) ? \ XFS_BTREE_SBLOCK_CRC_LEN : XFS_BTREE_SBLOCK_LEN) /* diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 191d51725988..fbc9d816882c 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -146,7 +146,7 @@ xfs_attr_get( XFS_STATS_INC(args->dp->i_mount, xs_attr_get); - if (XFS_FORCED_SHUTDOWN(args->dp->i_mount)) + if (xfs_is_shutdown(args->dp->i_mount)) return -EIO; args->geo = args->dp->i_mount->m_attr_geo; @@ -224,7 +224,7 @@ xfs_attr_try_sf_addname( if (!error && !(args->op_flags & XFS_DA_OP_NOTIME)) xfs_trans_ichgtime(args->trans, dp, XFS_ICHGTIME_CHG); - if (dp->i_mount->m_flags & XFS_MOUNT_WSYNC) + if (xfs_has_wsync(dp->i_mount)) xfs_trans_set_sync(args->trans); return error; @@ -335,6 +335,7 @@ xfs_attr_sf_addname( * the attr fork to leaf format and will restart with the leaf * add. */ + trace_xfs_attr_sf_addname_return(XFS_DAS_UNINIT, args->dp); dac->flags |= XFS_DAC_DEFER_FINISH; return -EAGAIN; } @@ -394,6 +395,8 @@ xfs_attr_set_iter( * handling code below */ dac->flags |= XFS_DAC_DEFER_FINISH; + trace_xfs_attr_set_iter_return( + dac->dela_state, args->dp); return -EAGAIN; } else if (error) { return error; @@ -411,6 +414,7 @@ xfs_attr_set_iter( dac->dela_state = XFS_DAS_FOUND_NBLK; } + trace_xfs_attr_set_iter_return(dac->dela_state, args->dp); return -EAGAIN; case XFS_DAS_FOUND_LBLK: /* @@ -438,6 +442,8 @@ xfs_attr_set_iter( error = xfs_attr_rmtval_set_blk(dac); if (error) return error; + trace_xfs_attr_set_iter_return(dac->dela_state, + args->dp); return -EAGAIN; } @@ -472,6 +478,7 @@ xfs_attr_set_iter( * series. */ dac->dela_state = XFS_DAS_FLIP_LFLAG; + trace_xfs_attr_set_iter_return(dac->dela_state, args->dp); return -EAGAIN; case XFS_DAS_FLIP_LFLAG: /* @@ -488,11 +495,15 @@ xfs_attr_set_iter( /* Set state in case xfs_attr_rmtval_remove returns -EAGAIN */ dac->dela_state = XFS_DAS_RM_LBLK; if (args->rmtblkno) { - error = __xfs_attr_rmtval_remove(dac); + error = xfs_attr_rmtval_remove(dac); + if (error == -EAGAIN) + trace_xfs_attr_set_iter_return( + dac->dela_state, args->dp); if (error) return error; dac->dela_state = XFS_DAS_RD_LEAF; + trace_xfs_attr_set_iter_return(dac->dela_state, args->dp); return -EAGAIN; } @@ -542,6 +553,8 @@ xfs_attr_set_iter( error = xfs_attr_rmtval_set_blk(dac); if (error) return error; + trace_xfs_attr_set_iter_return( + dac->dela_state, args->dp); return -EAGAIN; } @@ -577,6 +590,7 @@ xfs_attr_set_iter( * series */ dac->dela_state = XFS_DAS_FLIP_NFLAG; + trace_xfs_attr_set_iter_return(dac->dela_state, args->dp); return -EAGAIN; case XFS_DAS_FLIP_NFLAG: @@ -595,11 +609,16 @@ xfs_attr_set_iter( /* Set state in case xfs_attr_rmtval_remove returns -EAGAIN */ dac->dela_state = XFS_DAS_RM_NBLK; if (args->rmtblkno) { - error = __xfs_attr_rmtval_remove(dac); + error = xfs_attr_rmtval_remove(dac); + if (error == -EAGAIN) + trace_xfs_attr_set_iter_return( + dac->dela_state, args->dp); + if (error) return error; dac->dela_state = XFS_DAS_CLR_FLAG; + trace_xfs_attr_set_iter_return(dac->dela_state, args->dp); return -EAGAIN; } @@ -623,8 +642,8 @@ out: /* * Return EEXIST if attr is found, or ENOATTR if not */ -int -xfs_has_attr( +static int +xfs_attr_lookup( struct xfs_da_args *args) { struct xfs_inode *dp = args->dp; @@ -691,7 +710,7 @@ xfs_attr_set( int rmt_blks = 0; unsigned int total; - if (XFS_FORCED_SHUTDOWN(dp->i_mount)) + if (xfs_is_shutdown(dp->i_mount)) return -EIO; error = xfs_qm_dqattach(dp); @@ -761,8 +780,8 @@ xfs_attr_set( goto out_trans_cancel; } + error = xfs_attr_lookup(args); if (args->value) { - error = xfs_has_attr(args); if (error == -EEXIST && (args->attr_flags & XATTR_CREATE)) goto out_trans_cancel; if (error == -ENOATTR && (args->attr_flags & XATTR_REPLACE)) @@ -777,7 +796,6 @@ xfs_attr_set( if (!args->trans) goto out_unlock; } else { - error = xfs_has_attr(args); if (error != -EEXIST) goto out_trans_cancel; @@ -790,7 +808,7 @@ xfs_attr_set( * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. */ - if (mp->m_flags & XFS_MOUNT_WSYNC) + if (xfs_has_wsync(mp)) xfs_trans_set_sync(args->trans); if (!(args->op_flags & XFS_DA_OP_NOTIME)) @@ -1176,6 +1194,8 @@ xfs_attr_node_addname( * this point. */ dac->flags |= XFS_DAC_DEFER_FINISH; + trace_xfs_attr_node_addname_return( + dac->dela_state, args->dp); return -EAGAIN; } @@ -1421,11 +1441,14 @@ xfs_attr_remove_iter( * May return -EAGAIN. Roll and repeat until all remote * blocks are removed. */ - error = __xfs_attr_rmtval_remove(dac); - if (error == -EAGAIN) + error = xfs_attr_rmtval_remove(dac); + if (error == -EAGAIN) { + trace_xfs_attr_remove_iter_return( + dac->dela_state, args->dp); return error; - else if (error) + } else if (error) { goto out; + } /* * Refill the state structure with buffers (the prior @@ -1438,6 +1461,7 @@ xfs_attr_remove_iter( goto out; dac->dela_state = XFS_DAS_RM_NAME; dac->flags |= XFS_DAC_DEFER_FINISH; + trace_xfs_attr_remove_iter_return(dac->dela_state, args->dp); return -EAGAIN; } @@ -1466,6 +1490,8 @@ xfs_attr_remove_iter( dac->flags |= XFS_DAC_DEFER_FINISH; dac->dela_state = XFS_DAS_RM_SHRINK; + trace_xfs_attr_remove_iter_return( + dac->dela_state, args->dp); return -EAGAIN; } @@ -1514,7 +1540,7 @@ xfs_attr_fillstate(xfs_da_state_t *state) ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); for (blk = path->blk, level = 0; level < path->active; blk++, level++) { if (blk->bp) { - blk->disk_blkno = XFS_BUF_ADDR(blk->bp); + blk->disk_blkno = xfs_buf_daddr(blk->bp); blk->bp = NULL; } else { blk->disk_blkno = 0; @@ -1529,7 +1555,7 @@ xfs_attr_fillstate(xfs_da_state_t *state) ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); for (blk = path->blk, level = 0; level < path->active; blk++, level++) { if (blk->bp) { - blk->disk_blkno = XFS_BUF_ADDR(blk->bp); + blk->disk_blkno = xfs_buf_daddr(blk->bp); blk->bp = NULL; } else { blk->disk_blkno = 0; diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index 8de5d1d2733e..5e71f719bdd5 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -490,7 +490,6 @@ int xfs_attr_get_ilocked(struct xfs_da_args *args); int xfs_attr_get(struct xfs_da_args *args); int xfs_attr_set(struct xfs_da_args *args); int xfs_attr_set_args(struct xfs_da_args *args); -int xfs_has_attr(struct xfs_da_args *args); int xfs_attr_remove_args(struct xfs_da_args *args); int xfs_attr_remove_iter(struct xfs_delattr_context *dac); bool xfs_attr_namecheck(const void *name, size_t length); diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index b910bd209949..e1d11e314228 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -384,7 +384,7 @@ xfs_attr3_leaf_write_verify( return; } - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_has_crc(mp)) return; if (bip) @@ -406,7 +406,7 @@ xfs_attr3_leaf_read_verify( struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; - if (xfs_sb_version_hascrc(&mp->m_sb) && + if (xfs_has_crc(mp) && !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF)) xfs_verifier_error(bp, -EFSBADCRC, __this_address); else { @@ -489,7 +489,7 @@ xfs_attr_copy_value( } if (!args->value) { - args->value = kmem_alloc_large(valuelen, KM_NOLOCKDEP); + args->value = kvmalloc(valuelen, GFP_KERNEL | __GFP_NOLOCKDEP); if (!args->value) return -ENOMEM; } @@ -568,7 +568,7 @@ xfs_attr_shortform_bytesfit( * literal area, but for the old format we are done if there is no * space in the fixed attribute fork. */ - if (!(mp->m_flags & XFS_MOUNT_ATTR2)) + if (!xfs_has_attr2(mp)) return 0; dsize = dp->i_df.if_bytes; @@ -576,7 +576,7 @@ xfs_attr_shortform_bytesfit( switch (dp->i_df.if_format) { case XFS_DINODE_FMT_EXTENTS: /* - * If there is no attr fork and the data fork is extents, + * If there is no attr fork and the data fork is extents, * determine if creating the default attr fork will result * in the extents form migrating to btree. If so, the * minimum offset only needs to be the space required for @@ -621,21 +621,27 @@ xfs_attr_shortform_bytesfit( } /* - * Switch on the ATTR2 superblock bit (implies also FEATURES2) + * Switch on the ATTR2 superblock bit (implies also FEATURES2) unless: + * - noattr2 mount option is set, + * - on-disk version bit says it is already set, or + * - the attr2 mount option is not set to enable automatic upgrade from attr1. */ STATIC void -xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp) +xfs_sbversion_add_attr2( + struct xfs_mount *mp, + struct xfs_trans *tp) { - if ((mp->m_flags & XFS_MOUNT_ATTR2) && - !(xfs_sb_version_hasattr2(&mp->m_sb))) { - spin_lock(&mp->m_sb_lock); - if (!xfs_sb_version_hasattr2(&mp->m_sb)) { - xfs_sb_version_addattr2(&mp->m_sb); - spin_unlock(&mp->m_sb_lock); - xfs_log_sb(tp); - } else - spin_unlock(&mp->m_sb_lock); - } + if (xfs_has_noattr2(mp)) + return; + if (mp->m_sb.sb_features2 & XFS_SB_VERSION2_ATTR2BIT) + return; + if (!xfs_has_attr2(mp)) + return; + + spin_lock(&mp->m_sb_lock); + xfs_add_attr2(mp); + spin_unlock(&mp->m_sb_lock); + xfs_log_sb(tp); } /* @@ -810,8 +816,7 @@ xfs_attr_sf_removename( * Fix up the start offset of the attribute fork */ totsize -= size; - if (totsize == sizeof(xfs_attr_sf_hdr_t) && - (mp->m_flags & XFS_MOUNT_ATTR2) && + if (totsize == sizeof(xfs_attr_sf_hdr_t) && xfs_has_attr2(mp) && (dp->i_df.if_format != XFS_DINODE_FMT_BTREE) && !(args->op_flags & XFS_DA_OP_ADDNAME)) { xfs_attr_fork_remove(dp, args->trans); @@ -821,7 +826,7 @@ xfs_attr_sf_removename( ASSERT(dp->i_forkoff); ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || (args->op_flags & XFS_DA_OP_ADDNAME) || - !(mp->m_flags & XFS_MOUNT_ATTR2) || + !xfs_has_attr2(mp) || dp->i_df.if_format == XFS_DINODE_FMT_BTREE); xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA); @@ -997,7 +1002,7 @@ xfs_attr_shortform_allfit( bytes += xfs_attr_sf_entsize_byname(name_loc->namelen, be16_to_cpu(name_loc->valuelen)); } - if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) && + if (xfs_has_attr2(dp->i_mount) && (dp->i_df.if_format != XFS_DINODE_FMT_BTREE) && (bytes == sizeof(struct xfs_attr_sf_hdr))) return -1; @@ -1122,7 +1127,7 @@ xfs_attr3_leaf_to_shortform( goto out; if (forkoff == -1) { - ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2); + ASSERT(xfs_has_attr2(dp->i_mount)); ASSERT(dp->i_df.if_format != XFS_DINODE_FMT_BTREE); xfs_attr_fork_remove(dp, args->trans); goto out; @@ -1199,9 +1204,9 @@ xfs_attr3_leaf_to_node( xfs_trans_buf_set_type(args->trans, bp2, XFS_BLFT_ATTR_LEAF_BUF); bp2->b_ops = bp1->b_ops; memcpy(bp2->b_addr, bp1->b_addr, args->geo->blksize); - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { struct xfs_da3_blkinfo *hdr3 = bp2->b_addr; - hdr3->blkno = cpu_to_be64(bp2->b_bn); + hdr3->blkno = cpu_to_be64(xfs_buf_daddr(bp2)); } xfs_trans_log_buf(args->trans, bp2, 0, args->geo->blksize - 1); @@ -1264,12 +1269,12 @@ xfs_attr3_leaf_create( memset(&ichdr, 0, sizeof(ichdr)); ichdr.firstused = args->geo->blksize; - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { struct xfs_da3_blkinfo *hdr3 = bp->b_addr; ichdr.magic = XFS_ATTR3_LEAF_MAGIC; - hdr3->blkno = cpu_to_be64(bp->b_bn); + hdr3->blkno = cpu_to_be64(xfs_buf_daddr(bp)); hdr3->owner = cpu_to_be64(dp->i_ino); uuid_copy(&hdr3->uuid, &mp->m_sb.sb_meta_uuid); diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 0c8bee3abc3b..83b95be9ded8 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -51,7 +51,7 @@ xfs_attr3_rmt_blocks( struct xfs_mount *mp, int attrlen) { - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize); return (attrlen + buflen - 1) / buflen; } @@ -126,11 +126,11 @@ __xfs_attr3_rmt_read_verify( int blksize = mp->m_attr_geo->blksize; /* no verification of non-crc buffers */ - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_has_crc(mp)) return 0; ptr = bp->b_addr; - bno = bp->b_bn; + bno = xfs_buf_daddr(bp); len = BBTOB(bp->b_length); ASSERT(len >= blksize); @@ -191,11 +191,11 @@ xfs_attr3_rmt_write_verify( xfs_daddr_t bno; /* no verification of non-crc buffers */ - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_has_crc(mp)) return; ptr = bp->b_addr; - bno = bp->b_bn; + bno = xfs_buf_daddr(bp); len = BBTOB(bp->b_length); ASSERT(len >= blksize); @@ -246,7 +246,7 @@ xfs_attr3_rmt_hdr_set( { struct xfs_attr3_rmt_hdr *rmt = ptr; - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_has_crc(mp)) return 0; rmt->rm_magic = cpu_to_be32(XFS_ATTR3_RMT_MAGIC); @@ -284,7 +284,7 @@ xfs_attr_rmtval_copyout( uint8_t **dst) { char *src = bp->b_addr; - xfs_daddr_t bno = bp->b_bn; + xfs_daddr_t bno = xfs_buf_daddr(bp); int len = BBTOB(bp->b_length); int blksize = mp->m_attr_geo->blksize; @@ -296,7 +296,7 @@ xfs_attr_rmtval_copyout( byte_cnt = min(*valuelen, byte_cnt); - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { if (xfs_attr3_rmt_hdr_ok(src, ino, *offset, byte_cnt, bno)) { xfs_alert(mp, @@ -332,7 +332,7 @@ xfs_attr_rmtval_copyin( uint8_t **src) { char *dst = bp->b_addr; - xfs_daddr_t bno = bp->b_bn; + xfs_daddr_t bno = xfs_buf_daddr(bp); int len = BBTOB(bp->b_length); int blksize = mp->m_attr_geo->blksize; @@ -672,7 +672,7 @@ xfs_attr_rmtval_invalidate( * routine until it returns something other than -EAGAIN. */ int -__xfs_attr_rmtval_remove( +xfs_attr_rmtval_remove( struct xfs_delattr_context *dac) { struct xfs_da_args *args = dac->da_args; @@ -696,6 +696,7 @@ __xfs_attr_rmtval_remove( */ if (!done) { dac->flags |= XFS_DAC_DEFER_FINISH; + trace_xfs_attr_rmtval_remove_return(dac->dela_state, args->dp); return -EAGAIN; } diff --git a/fs/xfs/libxfs/xfs_attr_remote.h b/fs/xfs/libxfs/xfs_attr_remote.h index 61b85b918db8..d72eff30ca18 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.h +++ b/fs/xfs/libxfs/xfs_attr_remote.h @@ -12,7 +12,7 @@ int xfs_attr_rmtval_get(struct xfs_da_args *args); int xfs_attr_rmtval_stale(struct xfs_inode *ip, struct xfs_bmbt_irec *map, xfs_buf_flags_t incore_flags); int xfs_attr_rmtval_invalidate(struct xfs_da_args *args); -int __xfs_attr_rmtval_remove(struct xfs_delattr_context *dac); +int xfs_attr_rmtval_remove(struct xfs_delattr_context *dac); int xfs_attr_rmt_find_hole(struct xfs_da_args *args); int xfs_attr_rmtval_set_value(struct xfs_da_args *args); int xfs_attr_rmtval_set_blk(struct xfs_delattr_context *dac); diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 948092babb6a..b48230f1a361 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -242,7 +242,7 @@ xfs_bmap_get_bp( for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) { if (!cur->bc_bufs[i]) break; - if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno) + if (xfs_buf_daddr(cur->bc_bufs[i]) == bno) return cur->bc_bufs[i]; } @@ -251,7 +251,7 @@ xfs_bmap_get_bp( struct xfs_buf_log_item *bip = (struct xfs_buf_log_item *)lip; if (bip->bli_item.li_type == XFS_LI_BUF && - XFS_BUF_ADDR(bip->bli_buf) == bno) + xfs_buf_daddr(bip->bli_buf) == bno) return bip->bli_buf; } @@ -739,7 +739,7 @@ xfs_bmap_extents_to_btree( */ abp->b_ops = &xfs_bmbt_buf_ops; ablock = XFS_BUF_TO_BLOCK(abp); - xfs_btree_init_block_int(mp, ablock, abp->b_bn, + xfs_btree_init_block_int(mp, ablock, xfs_buf_daddr(abp), XFS_BTNUM_BMAP, 0, 0, ip->i_ino, XFS_BTREE_LONG_PTRS); @@ -1047,7 +1047,7 @@ xfs_bmap_set_attrforkoff( ip->i_forkoff = xfs_attr_shortform_bytesfit(ip, size); if (!ip->i_forkoff) ip->i_forkoff = default_size; - else if ((ip->i_mount->m_flags & XFS_MOUNT_ATTR2) && version) + else if (xfs_has_attr2(ip->i_mount) && version) *version = 2; break; default: @@ -1115,17 +1115,17 @@ xfs_bmap_add_attrfork( xfs_trans_log_inode(tp, ip, logflags); if (error) goto trans_cancel; - if (!xfs_sb_version_hasattr(&mp->m_sb) || - (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) { + if (!xfs_has_attr(mp) || + (!xfs_has_attr2(mp) && version == 2)) { bool log_sb = false; spin_lock(&mp->m_sb_lock); - if (!xfs_sb_version_hasattr(&mp->m_sb)) { - xfs_sb_version_addattr(&mp->m_sb); + if (!xfs_has_attr(mp)) { + xfs_add_attr(mp); log_sb = true; } - if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) { - xfs_sb_version_addattr2(&mp->m_sb); + if (!xfs_has_attr2(mp) && version == 2) { + xfs_add_attr2(mp); log_sb = true; } spin_unlock(&mp->m_sb_lock); @@ -3422,7 +3422,7 @@ xfs_bmap_compute_alignments( int stripe_align = 0; /* stripe alignment for allocation is determined by mount parameters */ - if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC)) + if (mp->m_swidth && xfs_has_swalloc(mp)) stripe_align = mp->m_swidth; else if (mp->m_dalign) stripe_align = mp->m_dalign; @@ -3938,7 +3938,7 @@ xfs_bmapi_read( XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) return -EFSCORRUPTED; - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; XFS_STATS_INC(mp, xs_blk_mapr); @@ -4420,7 +4420,7 @@ xfs_bmapi_write( return -EFSCORRUPTED; } - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; XFS_STATS_INC(mp, xs_blk_mapw); @@ -4703,7 +4703,7 @@ xfs_bmapi_remap( return -EFSCORRUPTED; } - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; error = xfs_iread_extents(tp, ip, whichfork); @@ -5361,7 +5361,7 @@ __xfs_bunmapi( ifp = XFS_IFORK_PTR(ip, whichfork); if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp))) return -EFSCORRUPTED; - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); @@ -5852,7 +5852,7 @@ xfs_bmap_collapse_extents( return -EFSCORRUPTED; } - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL)); @@ -5930,7 +5930,7 @@ xfs_bmap_can_insert_extents( ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + if (xfs_is_shutdown(ip->i_mount)) return -EIO; xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -5967,7 +5967,7 @@ xfs_bmap_insert_extents( return -EFSCORRUPTED; } - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL)); @@ -6070,7 +6070,7 @@ xfs_bmap_split_extent( return -EFSCORRUPTED; } - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; /* Read in all the extents */ diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 1ceba020940e..72444b8b38a6 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -58,7 +58,7 @@ xfs_bmdr_to_bmbt( void xfs_bmbt_disk_get_all( - struct xfs_bmbt_rec *rec, + const struct xfs_bmbt_rec *rec, struct xfs_bmbt_irec *irec) { uint64_t l0 = get_unaligned_be64(&rec->l0); @@ -78,7 +78,7 @@ xfs_bmbt_disk_get_all( */ xfs_filblks_t xfs_bmbt_disk_get_blockcount( - xfs_bmbt_rec_t *r) + const struct xfs_bmbt_rec *r) { return (xfs_filblks_t)(be64_to_cpu(r->l1) & xfs_mask64lo(21)); } @@ -88,7 +88,7 @@ xfs_bmbt_disk_get_blockcount( */ xfs_fileoff_t xfs_bmbt_disk_get_startoff( - xfs_bmbt_rec_t *r) + const struct xfs_bmbt_rec *r) { return ((xfs_fileoff_t)be64_to_cpu(r->l0) & xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; @@ -136,7 +136,7 @@ xfs_bmbt_to_bmdr( xfs_bmbt_key_t *tkp; __be64 *tpp; - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_CRC_MAGIC)); ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)); @@ -193,10 +193,10 @@ xfs_bmbt_update_cursor( STATIC int xfs_bmbt_alloc_block( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *start, - union xfs_btree_ptr *new, - int *stat) + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int *stat) { xfs_alloc_arg_t args; /* block allocation args */ int error; /* error return value */ @@ -282,7 +282,7 @@ xfs_bmbt_free_block( struct xfs_mount *mp = cur->bc_mp; struct xfs_inode *ip = cur->bc_ino.ip; struct xfs_trans *tp = cur->bc_tp; - xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp)); + xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); struct xfs_owner_info oinfo; xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork); @@ -352,8 +352,8 @@ xfs_bmbt_get_dmaxrecs( STATIC void xfs_bmbt_init_key_from_rec( - union xfs_btree_key *key, - union xfs_btree_rec *rec) + union xfs_btree_key *key, + const union xfs_btree_rec *rec) { key->bmbt.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(&rec->bmbt)); @@ -361,8 +361,8 @@ xfs_bmbt_init_key_from_rec( STATIC void xfs_bmbt_init_high_key_from_rec( - union xfs_btree_key *key, - union xfs_btree_rec *rec) + union xfs_btree_key *key, + const union xfs_btree_rec *rec) { key->bmbt.br_startoff = cpu_to_be64( xfs_bmbt_disk_get_startoff(&rec->bmbt) + @@ -387,8 +387,8 @@ xfs_bmbt_init_ptr_from_cur( STATIC int64_t xfs_bmbt_key_diff( - struct xfs_btree_cur *cur, - union xfs_btree_key *key) + struct xfs_btree_cur *cur, + const union xfs_btree_key *key) { return (int64_t)be64_to_cpu(key->bmbt.br_startoff) - cur->bc_rec.b.br_startoff; @@ -396,12 +396,12 @@ xfs_bmbt_key_diff( STATIC int64_t xfs_bmbt_diff_two_keys( - struct xfs_btree_cur *cur, - union xfs_btree_key *k1, - union xfs_btree_key *k2) + struct xfs_btree_cur *cur, + const union xfs_btree_key *k1, + const union xfs_btree_key *k2) { - uint64_t a = be64_to_cpu(k1->bmbt.br_startoff); - uint64_t b = be64_to_cpu(k2->bmbt.br_startoff); + uint64_t a = be64_to_cpu(k1->bmbt.br_startoff); + uint64_t b = be64_to_cpu(k2->bmbt.br_startoff); /* * Note: This routine previously casted a and b to int64 and subtracted @@ -428,7 +428,7 @@ xfs_bmbt_verify( if (!xfs_verify_magic(bp, block->bb_magic)) return __this_address; - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { /* * XXX: need a better way of verifying the owner here. Right now * just make sure there has been one set. @@ -497,9 +497,9 @@ const struct xfs_buf_ops xfs_bmbt_buf_ops = { STATIC int xfs_bmbt_keys_inorder( - struct xfs_btree_cur *cur, - union xfs_btree_key *k1, - union xfs_btree_key *k2) + struct xfs_btree_cur *cur, + const union xfs_btree_key *k1, + const union xfs_btree_key *k2) { return be64_to_cpu(k1->bmbt.br_startoff) < be64_to_cpu(k2->bmbt.br_startoff); @@ -507,9 +507,9 @@ xfs_bmbt_keys_inorder( STATIC int xfs_bmbt_recs_inorder( - struct xfs_btree_cur *cur, - union xfs_btree_rec *r1, - union xfs_btree_rec *r2) + struct xfs_btree_cur *cur, + const union xfs_btree_rec *r1, + const union xfs_btree_rec *r2) { return xfs_bmbt_disk_get_startoff(&r1->bmbt) + xfs_bmbt_disk_get_blockcount(&r1->bmbt) <= @@ -563,7 +563,7 @@ xfs_bmbt_init_cursor( cur->bc_ops = &xfs_bmbt_ops; cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE; - if (xfs_sb_version_hascrc(&mp->m_sb)) + if (xfs_has_crc(mp)) cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; cur->bc_ino.forksize = XFS_IFORK_SIZE(ip, whichfork); diff --git a/fs/xfs/libxfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h index 72bf74c79fb9..729e3bc569be 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.h +++ b/fs/xfs/libxfs/xfs_bmap_btree.h @@ -16,7 +16,7 @@ struct xfs_trans; * Btree block header size depends on a superblock flag. */ #define XFS_BMBT_BLOCK_LEN(mp) \ - (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \ + (xfs_has_crc(((mp))) ? \ XFS_BTREE_LBLOCK_CRC_LEN : XFS_BTREE_LBLOCK_LEN) #define XFS_BMBT_REC_ADDR(mp, block, index) \ @@ -88,9 +88,10 @@ extern void xfs_bmdr_to_bmbt(struct xfs_inode *, xfs_bmdr_block_t *, int, struct xfs_btree_block *, int); void xfs_bmbt_disk_set_all(struct xfs_bmbt_rec *r, struct xfs_bmbt_irec *s); -extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r); -extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r); -extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s); +extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(const struct xfs_bmbt_rec *r); +extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(const struct xfs_bmbt_rec *r); +void xfs_bmbt_disk_get_all(const struct xfs_bmbt_rec *r, + struct xfs_bmbt_irec *s); extern void xfs_bmbt_to_bmdr(struct xfs_mount *, struct xfs_btree_block *, int, xfs_bmdr_block_t *, int); diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index be74a6b53689..298395481713 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -64,13 +64,13 @@ __xfs_btree_check_lblock( { struct xfs_mount *mp = cur->bc_mp; xfs_btnum_t btnum = cur->bc_btnum; - int crc = xfs_sb_version_hascrc(&mp->m_sb); + int crc = xfs_has_crc(mp); if (crc) { if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; if (block->bb_u.l.bb_blkno != - cpu_to_be64(bp ? bp->b_bn : XFS_BUF_DADDR_NULL)) + cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL)) return __this_address; if (block->bb_u.l.bb_pad != cpu_to_be32(0)) return __this_address; @@ -129,13 +129,13 @@ __xfs_btree_check_sblock( { struct xfs_mount *mp = cur->bc_mp; xfs_btnum_t btnum = cur->bc_btnum; - int crc = xfs_sb_version_hascrc(&mp->m_sb); + int crc = xfs_has_crc(mp); if (crc) { if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; if (block->bb_u.s.bb_blkno != - cpu_to_be64(bp ? bp->b_bn : XFS_BUF_DADDR_NULL)) + cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL)) return __this_address; } @@ -225,10 +225,10 @@ xfs_btree_check_sptr( */ static int xfs_btree_check_ptr( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *ptr, - int index, - int level) + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *ptr, + int index, + int level) { if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { if (xfs_btree_check_lptr(cur, be64_to_cpu((&ptr->l)[index]), @@ -273,7 +273,7 @@ xfs_btree_lblock_calc_crc( struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_buf_log_item *bip = bp->b_log_item; - if (!xfs_sb_version_hascrc(&bp->b_mount->m_sb)) + if (!xfs_has_crc(bp->b_mount)) return; if (bip) block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); @@ -287,7 +287,7 @@ xfs_btree_lblock_verify_crc( struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_mount *mp = bp->b_mount; - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn))) return false; return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); @@ -311,7 +311,7 @@ xfs_btree_sblock_calc_crc( struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_buf_log_item *bip = bp->b_log_item; - if (!xfs_sb_version_hascrc(&bp->b_mount->m_sb)) + if (!xfs_has_crc(bp->b_mount)) return; if (bip) block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); @@ -325,7 +325,7 @@ xfs_btree_sblock_verify_crc( struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_mount *mp = bp->b_mount; - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn))) return false; return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); @@ -374,7 +374,7 @@ xfs_btree_del_cursor( } ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 || - XFS_FORCED_SHUTDOWN(cur->bc_mp)); + xfs_is_shutdown(cur->bc_mp)); if (unlikely(cur->bc_flags & XFS_BTREE_STAGING)) kmem_free(cur->bc_ops); if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS) && cur->bc_ag.pag) @@ -420,7 +420,7 @@ xfs_btree_dup_cursor( bp = cur->bc_bufs[i]; if (bp) { error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, - XFS_BUF_ADDR(bp), mp->m_bsize, + xfs_buf_daddr(bp), mp->m_bsize, 0, &bp, cur->bc_ops->buf_ops); if (error) { @@ -935,9 +935,9 @@ xfs_btree_readahead( STATIC int xfs_btree_ptr_to_daddr( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *ptr, - xfs_daddr_t *daddr) + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *ptr, + xfs_daddr_t *daddr) { xfs_fsblock_t fsbno; xfs_agblock_t agbno; @@ -1012,8 +1012,8 @@ xfs_btree_setbuf( bool xfs_btree_ptr_is_null( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *ptr) + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *ptr) { if (cur->bc_flags & XFS_BTREE_LONG_PTRS) return ptr->l == cpu_to_be64(NULLFSBLOCK); @@ -1059,10 +1059,10 @@ xfs_btree_get_sibling( void xfs_btree_set_sibling( - struct xfs_btree_cur *cur, - struct xfs_btree_block *block, - union xfs_btree_ptr *ptr, - int lr) + struct xfs_btree_cur *cur, + struct xfs_btree_block *block, + const union xfs_btree_ptr *ptr, + int lr) { ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); @@ -1090,7 +1090,7 @@ xfs_btree_init_block_int( __u64 owner, unsigned int flags) { - int crc = xfs_sb_version_hascrc(&mp->m_sb); + int crc = xfs_has_crc(mp); __u32 magic = xfs_btree_magic(crc, btnum); buf->bb_magic = cpu_to_be32(magic); @@ -1131,7 +1131,7 @@ xfs_btree_init_block( __u16 numrecs, __u64 owner) { - xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, + xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), xfs_buf_daddr(bp), btnum, level, numrecs, owner, 0); } @@ -1155,9 +1155,9 @@ xfs_btree_init_block_cur( else owner = cur->bc_ag.pag->pag_agno; - xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, - cur->bc_btnum, level, numrecs, - owner, cur->bc_flags); + xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), + xfs_buf_daddr(bp), cur->bc_btnum, level, + numrecs, owner, cur->bc_flags); } /* @@ -1192,10 +1192,10 @@ xfs_btree_buf_to_ptr( { if (cur->bc_flags & XFS_BTREE_LONG_PTRS) ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp, - XFS_BUF_ADDR(bp))); + xfs_buf_daddr(bp))); else { ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp, - XFS_BUF_ADDR(bp))); + xfs_buf_daddr(bp))); } } @@ -1229,10 +1229,10 @@ xfs_btree_set_refs( int xfs_btree_get_buf_block( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *ptr, - struct xfs_btree_block **block, - struct xfs_buf **bpp) + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *ptr, + struct xfs_btree_block **block, + struct xfs_buf **bpp) { struct xfs_mount *mp = cur->bc_mp; xfs_daddr_t d; @@ -1257,11 +1257,11 @@ xfs_btree_get_buf_block( */ STATIC int xfs_btree_read_buf_block( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *ptr, - int flags, - struct xfs_btree_block **block, - struct xfs_buf **bpp) + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *ptr, + int flags, + struct xfs_btree_block **block, + struct xfs_buf **bpp) { struct xfs_mount *mp = cur->bc_mp; xfs_daddr_t d; @@ -1289,10 +1289,10 @@ xfs_btree_read_buf_block( */ void xfs_btree_copy_keys( - struct xfs_btree_cur *cur, - union xfs_btree_key *dst_key, - union xfs_btree_key *src_key, - int numkeys) + struct xfs_btree_cur *cur, + union xfs_btree_key *dst_key, + const union xfs_btree_key *src_key, + int numkeys) { ASSERT(numkeys >= 0); memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len); @@ -1713,10 +1713,10 @@ error0: int xfs_btree_lookup_get_block( - struct xfs_btree_cur *cur, /* btree cursor */ - int level, /* level in the btree */ - union xfs_btree_ptr *pp, /* ptr to btree block */ - struct xfs_btree_block **blkp) /* return btree block */ + struct xfs_btree_cur *cur, /* btree cursor */ + int level, /* level in the btree */ + const union xfs_btree_ptr *pp, /* ptr to btree block */ + struct xfs_btree_block **blkp) /* return btree block */ { struct xfs_buf *bp; /* buffer pointer for btree block */ xfs_daddr_t daddr; @@ -1739,7 +1739,7 @@ xfs_btree_lookup_get_block( error = xfs_btree_ptr_to_daddr(cur, pp, &daddr); if (error) return error; - if (bp && XFS_BUF_ADDR(bp) == daddr) { + if (bp && xfs_buf_daddr(bp) == daddr) { *blkp = XFS_BUF_TO_BLOCK(bp); return 0; } @@ -1749,7 +1749,7 @@ xfs_btree_lookup_get_block( return error; /* Check the inode owner since the verifiers don't. */ - if (xfs_sb_version_hascrc(&cur->bc_mp->m_sb) && + if (xfs_has_crc(cur->bc_mp) && !(cur->bc_ino.flags & XFS_BTCUR_BMBT_INVALID_OWNER) && (cur->bc_flags & XFS_BTREE_LONG_PTRS) && be64_to_cpu((*blkp)->bb_u.l.bb_owner) != @@ -2923,10 +2923,11 @@ xfs_btree_new_iroot( */ memcpy(cblock, block, xfs_btree_block_len(cur)); if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { + __be64 bno = cpu_to_be64(xfs_buf_daddr(cbp)); if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn); + cblock->bb_u.l.bb_blkno = bno; else - cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn); + cblock->bb_u.s.bb_blkno = bno; } be16_add_cpu(&block->bb_level, 1); @@ -3225,7 +3226,7 @@ xfs_btree_insrec( /* Get pointers to the btree buffer and block. */ block = xfs_btree_get_block(cur, level, &bp); - old_bn = bp ? bp->b_bn : XFS_BUF_DADDR_NULL; + old_bn = bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL; numrecs = xfs_btree_get_numrecs(block); #ifdef DEBUG @@ -3341,7 +3342,7 @@ xfs_btree_insrec( * some records into the new tree block), so use the regular key * update mechanism. */ - if (bp && bp->b_bn != old_bn) { + if (bp && xfs_buf_daddr(bp) != old_bn) { xfs_btree_get_keys(cur, block, lkey); } else if (xfs_btree_needs_key_update(cur, optr)) { error = xfs_btree_update_keys(cur, level); @@ -4418,11 +4419,11 @@ xfs_btree_lblock_v5hdr_verify( struct xfs_mount *mp = bp->b_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_has_crc(mp)) return __this_address; if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; - if (block->bb_u.l.bb_blkno != cpu_to_be64(bp->b_bn)) + if (block->bb_u.l.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp))) return __this_address; if (owner != XFS_RMAP_OWN_UNKNOWN && be64_to_cpu(block->bb_u.l.bb_owner) != owner) @@ -4468,11 +4469,11 @@ xfs_btree_sblock_v5hdr_verify( struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_perag *pag = bp->b_pag; - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_has_crc(mp)) return __this_address; if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; - if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) + if (block->bb_u.s.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp))) return __this_address; if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) return __this_address; @@ -4499,7 +4500,7 @@ xfs_btree_sblock_verify( return __this_address; /* sibling pointer verification */ - agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp)); + agno = xfs_daddr_to_agno(mp, xfs_buf_daddr(bp)); if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) && !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_leftsib))) return __this_address; @@ -4536,8 +4537,8 @@ xfs_btree_compute_maxlevels( STATIC int xfs_btree_simple_query_range( struct xfs_btree_cur *cur, - union xfs_btree_key *low_key, - union xfs_btree_key *high_key, + const union xfs_btree_key *low_key, + const union xfs_btree_key *high_key, xfs_btree_query_range_fn fn, void *priv) { @@ -4627,8 +4628,8 @@ out: STATIC int xfs_btree_overlapped_query_range( struct xfs_btree_cur *cur, - union xfs_btree_key *low_key, - union xfs_btree_key *high_key, + const union xfs_btree_key *low_key, + const union xfs_btree_key *high_key, xfs_btree_query_range_fn fn, void *priv) { @@ -4769,8 +4770,8 @@ out: int xfs_btree_query_range( struct xfs_btree_cur *cur, - union xfs_btree_irec *low_rec, - union xfs_btree_irec *high_rec, + const union xfs_btree_irec *low_rec, + const union xfs_btree_irec *high_rec, xfs_btree_query_range_fn fn, void *priv) { @@ -4877,7 +4878,7 @@ xfs_btree_diff_two_ptrs( STATIC int xfs_btree_has_record_helper( struct xfs_btree_cur *cur, - union xfs_btree_rec *rec, + const union xfs_btree_rec *rec, void *priv) { return -ECANCELED; @@ -4886,12 +4887,12 @@ xfs_btree_has_record_helper( /* Is there a record covering a given range of keys? */ int xfs_btree_has_record( - struct xfs_btree_cur *cur, - union xfs_btree_irec *low, - union xfs_btree_irec *high, - bool *exists) + struct xfs_btree_cur *cur, + const union xfs_btree_irec *low, + const union xfs_btree_irec *high, + bool *exists) { - int error; + int error; error = xfs_btree_query_range(cur, low, high, &xfs_btree_has_record_helper, NULL); diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 4dbdc659c396..4eaf8517f850 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -106,19 +106,19 @@ struct xfs_btree_ops { /* update btree root pointer */ void (*set_root)(struct xfs_btree_cur *cur, - union xfs_btree_ptr *nptr, int level_change); + const union xfs_btree_ptr *nptr, int level_change); /* block allocation / freeing */ int (*alloc_block)(struct xfs_btree_cur *cur, - union xfs_btree_ptr *start_bno, + const union xfs_btree_ptr *start_bno, union xfs_btree_ptr *new_bno, int *stat); int (*free_block)(struct xfs_btree_cur *cur, struct xfs_buf *bp); /* update last record information */ void (*update_lastrec)(struct xfs_btree_cur *cur, - struct xfs_btree_block *block, - union xfs_btree_rec *rec, + const struct xfs_btree_block *block, + const union xfs_btree_rec *rec, int ptr, int reason); /* records in block/level */ @@ -130,37 +130,37 @@ struct xfs_btree_ops { /* init values of btree structures */ void (*init_key_from_rec)(union xfs_btree_key *key, - union xfs_btree_rec *rec); + const union xfs_btree_rec *rec); void (*init_rec_from_cur)(struct xfs_btree_cur *cur, union xfs_btree_rec *rec); void (*init_ptr_from_cur)(struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr); void (*init_high_key_from_rec)(union xfs_btree_key *key, - union xfs_btree_rec *rec); + const union xfs_btree_rec *rec); /* difference between key value and cursor value */ int64_t (*key_diff)(struct xfs_btree_cur *cur, - union xfs_btree_key *key); + const union xfs_btree_key *key); /* * Difference between key2 and key1 -- positive if key1 > key2, * negative if key1 < key2, and zero if equal. */ int64_t (*diff_two_keys)(struct xfs_btree_cur *cur, - union xfs_btree_key *key1, - union xfs_btree_key *key2); + const union xfs_btree_key *key1, + const union xfs_btree_key *key2); const struct xfs_buf_ops *buf_ops; /* check that k1 is lower than k2 */ int (*keys_inorder)(struct xfs_btree_cur *cur, - union xfs_btree_key *k1, - union xfs_btree_key *k2); + const union xfs_btree_key *k1, + const union xfs_btree_key *k2); /* check that r1 is lower than r2 */ int (*recs_inorder)(struct xfs_btree_cur *cur, - union xfs_btree_rec *r1, - union xfs_btree_rec *r2); + const union xfs_btree_rec *r1, + const union xfs_btree_rec *r2); }; /* @@ -423,7 +423,7 @@ void xfs_btree_log_recs(struct xfs_btree_cur *, struct xfs_buf *, int, int); /* * Helpers. */ -static inline int xfs_btree_get_numrecs(struct xfs_btree_block *block) +static inline int xfs_btree_get_numrecs(const struct xfs_btree_block *block) { return be16_to_cpu(block->bb_numrecs); } @@ -434,7 +434,7 @@ static inline void xfs_btree_set_numrecs(struct xfs_btree_block *block, block->bb_numrecs = cpu_to_be16(numrecs); } -static inline int xfs_btree_get_level(struct xfs_btree_block *block) +static inline int xfs_btree_get_level(const struct xfs_btree_block *block) { return be16_to_cpu(block->bb_level); } @@ -471,10 +471,11 @@ unsigned long long xfs_btree_calc_size(uint *limits, unsigned long long len); * code on its own. */ typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur, - union xfs_btree_rec *rec, void *priv); + const union xfs_btree_rec *rec, void *priv); int xfs_btree_query_range(struct xfs_btree_cur *cur, - union xfs_btree_irec *low_rec, union xfs_btree_irec *high_rec, + const union xfs_btree_irec *low_rec, + const union xfs_btree_irec *high_rec, xfs_btree_query_range_fn fn, void *priv); int xfs_btree_query_all(struct xfs_btree_cur *cur, xfs_btree_query_range_fn fn, void *priv); @@ -502,10 +503,11 @@ union xfs_btree_key *xfs_btree_high_key_addr(struct xfs_btree_cur *cur, int n, union xfs_btree_ptr *xfs_btree_ptr_addr(struct xfs_btree_cur *cur, int n, struct xfs_btree_block *block); int xfs_btree_lookup_get_block(struct xfs_btree_cur *cur, int level, - union xfs_btree_ptr *pp, struct xfs_btree_block **blkp); + const union xfs_btree_ptr *pp, struct xfs_btree_block **blkp); struct xfs_btree_block *xfs_btree_get_block(struct xfs_btree_cur *cur, int level, struct xfs_buf **bpp); -bool xfs_btree_ptr_is_null(struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr); +bool xfs_btree_ptr_is_null(struct xfs_btree_cur *cur, + const union xfs_btree_ptr *ptr); int64_t xfs_btree_diff_two_ptrs(struct xfs_btree_cur *cur, const union xfs_btree_ptr *a, const union xfs_btree_ptr *b); @@ -516,8 +518,9 @@ void xfs_btree_get_keys(struct xfs_btree_cur *cur, struct xfs_btree_block *block, union xfs_btree_key *key); union xfs_btree_key *xfs_btree_high_key_from_key(struct xfs_btree_cur *cur, union xfs_btree_key *key); -int xfs_btree_has_record(struct xfs_btree_cur *cur, union xfs_btree_irec *low, - union xfs_btree_irec *high, bool *exists); +int xfs_btree_has_record(struct xfs_btree_cur *cur, + const union xfs_btree_irec *low, + const union xfs_btree_irec *high, bool *exists); bool xfs_btree_has_more_records(struct xfs_btree_cur *cur); struct xfs_ifork *xfs_btree_ifork_ptr(struct xfs_btree_cur *cur); @@ -540,10 +543,11 @@ xfs_btree_islastblock( void xfs_btree_set_ptr_null(struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr); -int xfs_btree_get_buf_block(struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr, - struct xfs_btree_block **block, struct xfs_buf **bpp); +int xfs_btree_get_buf_block(struct xfs_btree_cur *cur, + const union xfs_btree_ptr *ptr, struct xfs_btree_block **block, + struct xfs_buf **bpp); void xfs_btree_set_sibling(struct xfs_btree_cur *cur, - struct xfs_btree_block *block, union xfs_btree_ptr *ptr, + struct xfs_btree_block *block, const union xfs_btree_ptr *ptr, int lr); void xfs_btree_init_block_cur(struct xfs_btree_cur *cur, struct xfs_buf *bp, int level, int numrecs); @@ -551,7 +555,7 @@ void xfs_btree_copy_ptrs(struct xfs_btree_cur *cur, union xfs_btree_ptr *dst_ptr, const union xfs_btree_ptr *src_ptr, int numptrs); void xfs_btree_copy_keys(struct xfs_btree_cur *cur, - union xfs_btree_key *dst_key, union xfs_btree_key *src_key, - int numkeys); + union xfs_btree_key *dst_key, + const union xfs_btree_key *src_key, int numkeys); #endif /* __XFS_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_btree_staging.c b/fs/xfs/libxfs/xfs_btree_staging.c index aa8dc9521c39..ac9e80152b5c 100644 --- a/fs/xfs/libxfs/xfs_btree_staging.c +++ b/fs/xfs/libxfs/xfs_btree_staging.c @@ -59,10 +59,10 @@ xfs_btree_fakeroot_dup_cursor( */ STATIC int xfs_btree_fakeroot_alloc_block( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *start_bno, - union xfs_btree_ptr *new_bno, - int *stat) + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *start_bno, + union xfs_btree_ptr *new_bno, + int *stat) { ASSERT(0); return -EFSCORRUPTED; @@ -112,9 +112,9 @@ xfs_btree_fakeroot_init_ptr_from_cur( /* Update the btree root information for a per-AG fake root. */ STATIC void xfs_btree_afakeroot_set_root( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *ptr, - int inc) + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *ptr, + int inc) { struct xbtree_afakeroot *afake = cur->bc_ag.afake; diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 747ec77912c3..c062e2c85178 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -129,7 +129,7 @@ xfs_da3_node_hdr_from_disk( struct xfs_da3_icnode_hdr *to, struct xfs_da_intnode *from) { - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { struct xfs_da3_intnode *from3 = (struct xfs_da3_intnode *)from; to->forw = be32_to_cpu(from3->hdr.info.hdr.forw); @@ -156,7 +156,7 @@ xfs_da3_node_hdr_to_disk( struct xfs_da_intnode *to, struct xfs_da3_icnode_hdr *from) { - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { struct xfs_da3_intnode *to3 = (struct xfs_da3_intnode *)to; ASSERT(from->magic == XFS_DA3_NODE_MAGIC); @@ -191,10 +191,10 @@ xfs_da3_blkinfo_verify( if (!xfs_verify_magic16(bp, hdr->magic)) return __this_address; - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; - if (be64_to_cpu(hdr3->blkno) != bp->b_bn) + if (be64_to_cpu(hdr3->blkno) != xfs_buf_daddr(bp)) return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) return __this_address; @@ -253,7 +253,7 @@ xfs_da3_node_write_verify( return; } - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_has_crc(mp)) return; if (bip) @@ -442,12 +442,12 @@ xfs_da3_node_create( xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF); node = bp->b_addr; - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { struct xfs_da3_node_hdr *hdr3 = bp->b_addr; memset(hdr3, 0, sizeof(struct xfs_da3_node_hdr)); ichdr.magic = XFS_DA3_NODE_MAGIC; - hdr3->info.blkno = cpu_to_be64(bp->b_bn); + hdr3->info.blkno = cpu_to_be64(xfs_buf_daddr(bp)); hdr3->info.owner = cpu_to_be64(args->dp->i_ino); uuid_copy(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid); } else { @@ -711,7 +711,7 @@ xfs_da3_root_split( oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { struct xfs_da3_intnode *node3 = (struct xfs_da3_intnode *)node; - node3->hdr.info.blkno = cpu_to_be64(bp->b_bn); + node3->hdr.info.blkno = cpu_to_be64(xfs_buf_daddr(bp)); } xfs_trans_log_buf(tp, bp, 0, size - 1); @@ -1219,7 +1219,7 @@ xfs_da3_root_join( xfs_trans_buf_copy_type(root_blk->bp, bp); if (oldroothdr.magic == XFS_DA3_NODE_MAGIC) { struct xfs_da3_blkinfo *da3 = root_blk->bp->b_addr; - da3->blkno = cpu_to_be64(root_blk->bp->b_bn); + da3->blkno = cpu_to_be64(xfs_buf_daddr(root_blk->bp)); } xfs_trans_log_buf(args->trans, root_blk->bp, 0, args->geo->blksize - 1); diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index b876b44c0204..5a49caa5c9df 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -789,7 +789,7 @@ struct xfs_attr3_rmt_hdr { #define XFS_ATTR3_RMT_CRC_OFF offsetof(struct xfs_attr3_rmt_hdr, rm_crc) #define XFS_ATTR3_RMT_BUF_SPACE(mp, bufsize) \ - ((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \ + ((bufsize) - (xfs_has_crc((mp)) ? \ sizeof(struct xfs_attr3_rmt_hdr) : 0)) /* Number of bytes in a directory block. */ diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 050bdcc4fe73..50546eadaae2 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -115,7 +115,7 @@ xfs_da_mount( dageo->fsblog = mp->m_sb.sb_blocklog; dageo->blksize = xfs_dir2_dirblock_bytes(&mp->m_sb); dageo->fsbcount = 1 << mp->m_sb.sb_dirblklog; - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { dageo->node_hdr_size = sizeof(struct xfs_da3_node_hdr); dageo->leaf_hdr_size = sizeof(struct xfs_dir3_leaf_hdr); dageo->free_hdr_size = sizeof(struct xfs_dir3_free_hdr); @@ -730,7 +730,7 @@ xfs_dir2_hashname( struct xfs_mount *mp, struct xfs_name *name) { - if (unlikely(xfs_sb_version_hasasciici(&mp->m_sb))) + if (unlikely(xfs_has_asciici(mp))) return xfs_ascii_ci_hashname(name); return xfs_da_hashname(name->name, name->len); } @@ -741,7 +741,7 @@ xfs_dir2_compname( const unsigned char *name, int len) { - if (unlikely(xfs_sb_version_hasasciici(&args->dp->i_mount->m_sb))) + if (unlikely(xfs_has_asciici(args->dp->i_mount))) return xfs_ascii_ci_compname(args, name, len); return xfs_da_compname(args, name, len); } diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index 75e1421f69c4..df0869bba275 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -53,10 +53,10 @@ xfs_dir3_block_verify( if (!xfs_verify_magic(bp, hdr3->magic)) return __this_address; - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; - if (be64_to_cpu(hdr3->blkno) != bp->b_bn) + if (be64_to_cpu(hdr3->blkno) != xfs_buf_daddr(bp)) return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) return __this_address; @@ -71,7 +71,7 @@ xfs_dir3_block_read_verify( struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; - if (xfs_sb_version_hascrc(&mp->m_sb) && + if (xfs_has_crc(mp) && !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) xfs_verifier_error(bp, -EFSBADCRC, __this_address); else { @@ -96,7 +96,7 @@ xfs_dir3_block_write_verify( return; } - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_has_crc(mp)) return; if (bip) @@ -121,7 +121,7 @@ xfs_dir3_block_header_check( { struct xfs_mount *mp = dp->i_mount; - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; if (be64_to_cpu(hdr3->owner) != dp->i_ino) @@ -171,10 +171,10 @@ xfs_dir3_block_init( bp->b_ops = &xfs_dir3_block_buf_ops; xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_BLOCK_BUF); - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { memset(hdr3, 0, sizeof(*hdr3)); hdr3->magic = cpu_to_be32(XFS_DIR3_BLOCK_MAGIC); - hdr3->blkno = cpu_to_be64(bp->b_bn); + hdr3->blkno = cpu_to_be64(xfs_buf_daddr(bp)); hdr3->owner = cpu_to_be64(dp->i_ino); uuid_copy(&hdr3->uuid, &mp->m_sb.sb_meta_uuid); return; diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index e67fa086f2c1..dbcf58979a59 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -29,7 +29,7 @@ xfs_dir2_data_bestfree_p( struct xfs_mount *mp, struct xfs_dir2_data_hdr *hdr) { - if (xfs_sb_version_hascrc(&mp->m_sb)) + if (xfs_has_crc(mp)) return ((struct xfs_dir3_data_hdr *)hdr)->best_free; return hdr->bestfree; } @@ -51,7 +51,7 @@ xfs_dir2_data_get_ftype( struct xfs_mount *mp, struct xfs_dir2_data_entry *dep) { - if (xfs_sb_version_hasftype(&mp->m_sb)) { + if (xfs_has_ftype(mp)) { uint8_t ftype = dep->name[dep->namelen]; if (likely(ftype < XFS_DIR3_FT_MAX)) @@ -70,7 +70,7 @@ xfs_dir2_data_put_ftype( ASSERT(ftype < XFS_DIR3_FT_MAX); ASSERT(dep->namelen != 0); - if (xfs_sb_version_hasftype(&mp->m_sb)) + if (xfs_has_ftype(mp)) dep->name[dep->namelen] = ftype; } @@ -297,10 +297,10 @@ xfs_dir3_data_verify( if (!xfs_verify_magic(bp, hdr3->magic)) return __this_address; - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; - if (be64_to_cpu(hdr3->blkno) != bp->b_bn) + if (be64_to_cpu(hdr3->blkno) != xfs_buf_daddr(bp)) return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) return __this_address; @@ -343,7 +343,7 @@ xfs_dir3_data_read_verify( struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; - if (xfs_sb_version_hascrc(&mp->m_sb) && + if (xfs_has_crc(mp) && !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) xfs_verifier_error(bp, -EFSBADCRC, __this_address); else { @@ -368,7 +368,7 @@ xfs_dir3_data_write_verify( return; } - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_has_crc(mp)) return; if (bip) @@ -401,7 +401,7 @@ xfs_dir3_data_header_check( { struct xfs_mount *mp = dp->i_mount; - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { struct xfs_dir3_data_hdr *hdr3 = bp->b_addr; if (be64_to_cpu(hdr3->hdr.owner) != dp->i_ino) @@ -717,12 +717,12 @@ xfs_dir3_data_init( * Initialize the header. */ hdr = bp->b_addr; - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; memset(hdr3, 0, sizeof(*hdr3)); hdr3->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC); - hdr3->blkno = cpu_to_be64(bp->b_bn); + hdr3->blkno = cpu_to_be64(xfs_buf_daddr(bp)); hdr3->owner = cpu_to_be64(dp->i_ino); uuid_copy(&hdr3->uuid, &mp->m_sb.sb_meta_uuid); diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index 5369d8bb2593..d9b66306a9a7 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -37,7 +37,7 @@ xfs_dir2_leaf_hdr_from_disk( struct xfs_dir3_icleaf_hdr *to, struct xfs_dir2_leaf *from) { - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { struct xfs_dir3_leaf *from3 = (struct xfs_dir3_leaf *)from; to->forw = be32_to_cpu(from3->hdr.info.hdr.forw); @@ -68,7 +68,7 @@ xfs_dir2_leaf_hdr_to_disk( struct xfs_dir2_leaf *to, struct xfs_dir3_icleaf_hdr *from) { - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { struct xfs_dir3_leaf *to3 = (struct xfs_dir3_leaf *)to; ASSERT(from->magic == XFS_DIR3_LEAF1_MAGIC || @@ -108,7 +108,7 @@ xfs_dir3_leaf1_check( if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) { struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; - if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) + if (be64_to_cpu(leaf3->info.blkno) != xfs_buf_daddr(bp)) return __this_address; } else if (leafhdr.magic != XFS_DIR2_LEAF1_MAGIC) return __this_address; @@ -209,7 +209,7 @@ xfs_dir3_leaf_read_verify( struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; - if (xfs_sb_version_hascrc(&mp->m_sb) && + if (xfs_has_crc(mp) && !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF)) xfs_verifier_error(bp, -EFSBADCRC, __this_address); else { @@ -234,7 +234,7 @@ xfs_dir3_leaf_write_verify( return; } - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_has_crc(mp)) return; if (bip) @@ -308,7 +308,7 @@ xfs_dir3_leaf_init( ASSERT(type == XFS_DIR2_LEAF1_MAGIC || type == XFS_DIR2_LEAFN_MAGIC); - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; memset(leaf3, 0, sizeof(*leaf3)); @@ -316,7 +316,7 @@ xfs_dir3_leaf_init( leaf3->info.hdr.magic = (type == XFS_DIR2_LEAF1_MAGIC) ? cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) : cpu_to_be16(XFS_DIR3_LEAFN_MAGIC); - leaf3->info.blkno = cpu_to_be64(bp->b_bn); + leaf3->info.blkno = cpu_to_be64(xfs_buf_daddr(bp)); leaf3->info.owner = cpu_to_be64(owner); uuid_copy(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid); } else { diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index d0520afb913a..7a03aeb9f4c9 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -68,7 +68,7 @@ xfs_dir3_leafn_check( if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC) { struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; - if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) + if (be64_to_cpu(leaf3->info.blkno) != xfs_buf_daddr(bp)) return __this_address; } else if (leafhdr.magic != XFS_DIR2_LEAFN_MAGIC) return __this_address; @@ -105,12 +105,12 @@ xfs_dir3_free_verify( if (!xfs_verify_magic(bp, hdr->magic)) return __this_address; - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; - if (be64_to_cpu(hdr3->blkno) != bp->b_bn) + if (be64_to_cpu(hdr3->blkno) != xfs_buf_daddr(bp)) return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) return __this_address; @@ -128,7 +128,7 @@ xfs_dir3_free_read_verify( struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; - if (xfs_sb_version_hascrc(&mp->m_sb) && + if (xfs_has_crc(mp) && !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF)) xfs_verifier_error(bp, -EFSBADCRC, __this_address); else { @@ -153,7 +153,7 @@ xfs_dir3_free_write_verify( return; } - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_has_crc(mp)) return; if (bip) @@ -185,7 +185,7 @@ xfs_dir3_free_header_check( firstdb = (xfs_dir2_da_to_db(mp->m_dir_geo, fbno) - xfs_dir2_byte_to_db(mp->m_dir_geo, XFS_DIR2_FREE_OFFSET)) * maxbests; - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; if (be32_to_cpu(hdr3->firstdb) != firstdb) @@ -247,7 +247,7 @@ xfs_dir2_free_hdr_from_disk( struct xfs_dir3_icfree_hdr *to, struct xfs_dir2_free *from) { - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { struct xfs_dir3_free *from3 = (struct xfs_dir3_free *)from; to->magic = be32_to_cpu(from3->hdr.hdr.magic); @@ -274,7 +274,7 @@ xfs_dir2_free_hdr_to_disk( struct xfs_dir2_free *to, struct xfs_dir3_icfree_hdr *from) { - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { struct xfs_dir3_free *to3 = (struct xfs_dir3_free *)to; ASSERT(from->magic == XFS_DIR3_FREE_MAGIC); @@ -341,12 +341,12 @@ xfs_dir3_free_get_buf( memset(bp->b_addr, 0, sizeof(struct xfs_dir3_free_hdr)); memset(&hdr, 0, sizeof(hdr)); - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; hdr.magic = XFS_DIR3_FREE_MAGIC; - hdr3->hdr.blkno = cpu_to_be64(bp->b_bn); + hdr3->hdr.blkno = cpu_to_be64(xfs_buf_daddr(bp)); hdr3->hdr.owner = cpu_to_be64(dp->i_ino); uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_meta_uuid); } else diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h index 94943ce49cab..711709a2aa53 100644 --- a/fs/xfs/libxfs/xfs_dir2_priv.h +++ b/fs/xfs/libxfs/xfs_dir2_priv.h @@ -196,7 +196,7 @@ xfs_dir2_data_entsize( len = offsetof(struct xfs_dir2_data_entry, name[0]) + namelen + sizeof(xfs_dir2_data_off_t) /* tag */; - if (xfs_sb_version_hasftype(&mp->m_sb)) + if (xfs_has_ftype(mp)) len += sizeof(uint8_t); return round_up(len, XFS_DIR2_DATA_ALIGN); } diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index 46d18bf9d5e1..5a97a87eaa20 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -48,7 +48,7 @@ xfs_dir2_sf_entsize( count += sizeof(struct xfs_dir2_sf_entry); /* namelen + offset */ count += hdr->i8count ? XFS_INO64_SIZE : XFS_INO32_SIZE; /* ino # */ - if (xfs_sb_version_hasftype(&mp->m_sb)) + if (xfs_has_ftype(mp)) count += sizeof(uint8_t); return count; } @@ -76,7 +76,7 @@ xfs_dir2_sf_get_ino( { uint8_t *from = sfep->name + sfep->namelen; - if (xfs_sb_version_hasftype(&mp->m_sb)) + if (xfs_has_ftype(mp)) from++; if (!hdr->i8count) @@ -95,7 +95,7 @@ xfs_dir2_sf_put_ino( ASSERT(ino <= XFS_MAXINUMBER); - if (xfs_sb_version_hasftype(&mp->m_sb)) + if (xfs_has_ftype(mp)) to++; if (hdr->i8count) @@ -135,7 +135,7 @@ xfs_dir2_sf_get_ftype( struct xfs_mount *mp, struct xfs_dir2_sf_entry *sfep) { - if (xfs_sb_version_hasftype(&mp->m_sb)) { + if (xfs_has_ftype(mp)) { uint8_t ftype = sfep->name[sfep->namelen]; if (ftype < XFS_DIR3_FT_MAX) @@ -153,7 +153,7 @@ xfs_dir2_sf_put_ftype( { ASSERT(ftype < XFS_DIR3_FT_MAX); - if (xfs_sb_version_hasftype(&mp->m_sb)) + if (xfs_has_ftype(mp)) sfep->name[sfep->namelen] = ftype; } @@ -192,7 +192,7 @@ xfs_dir2_block_sfsize( * if there is a filetype field, add the extra byte to the namelen * for each entry that we see. */ - has_ftype = xfs_sb_version_hasftype(&mp->m_sb) ? 1 : 0; + has_ftype = xfs_has_ftype(mp) ? 1 : 0; count = i8count = namelen = 0; btp = xfs_dir2_block_tail_p(geo, hdr); diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index 6766417d5ba4..deeb74becabc 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -70,7 +70,7 @@ xfs_dquot_verify( return __this_address; if ((ddq->d_type & XFS_DQTYPE_BIGTIME) && - !xfs_sb_version_hasbigtime(&mp->m_sb)) + !xfs_has_bigtime(mp)) return __this_address; if ((ddq->d_type & XFS_DQTYPE_BIGTIME) && !ddq->d_id) @@ -106,7 +106,7 @@ xfs_dqblk_verify( struct xfs_dqblk *dqb, xfs_dqid_t id) /* used only during quotacheck */ { - if (xfs_sb_version_hascrc(&mp->m_sb) && + if (xfs_has_crc(mp) && !uuid_equal(&dqb->dd_uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; @@ -134,7 +134,7 @@ xfs_dqblk_repair( dqb->dd_diskdq.d_type = type; dqb->dd_diskdq.d_id = cpu_to_be32(id); - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { uuid_copy(&dqb->dd_uuid, &mp->m_sb.sb_meta_uuid); xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk), XFS_DQUOT_CRC_OFF); @@ -151,7 +151,7 @@ xfs_dquot_buf_verify_crc( int ndquots; int i; - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_has_crc(mp)) return true; /* diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 76e2461b9e66..2d7057b7984b 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -9,7 +9,7 @@ /* * XFS On Disk Format Definitions * - * This header file defines all the on-disk format definitions for + * This header file defines all the on-disk format definitions for * general XFS objects. Directory and attribute related objects are defined in * xfs_da_format.h, which log and log item formats are defined in * xfs_log_format.h. Everything else goes here. @@ -265,7 +265,6 @@ typedef struct xfs_dsb { /* must be padded to 64 bit alignment */ } xfs_dsb_t; - /* * Misc. Flags - warning - these will be cleared by xfs_repair unless * a feature bit is set when the flag is used. @@ -280,37 +279,9 @@ typedef struct xfs_dsb { #define XFS_SB_VERSION_NUM(sbp) ((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS) -/* - * The first XFS version we support is a v4 superblock with V2 directories. - */ -static inline bool xfs_sb_good_v4_features(struct xfs_sb *sbp) -{ - if (!(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT)) - return false; - if (!(sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT)) - return false; - - /* check for unknown features in the fs */ - if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) || - ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && - (sbp->sb_features2 & ~XFS_SB_VERSION2_OKBITS))) - return false; - - return true; -} - -static inline bool xfs_sb_good_version(struct xfs_sb *sbp) +static inline bool xfs_sb_is_v5(struct xfs_sb *sbp) { - if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) - return true; - if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) - return xfs_sb_good_v4_features(sbp); - return false; -} - -static inline bool xfs_sb_version_hasrealtime(struct xfs_sb *sbp) -{ - return sbp->sb_rblocks > 0; + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; } /* @@ -322,9 +293,10 @@ static inline bool xfs_sb_has_mismatched_features2(struct xfs_sb *sbp) return sbp->sb_bad_features2 != sbp->sb_features2; } -static inline bool xfs_sb_version_hasattr(struct xfs_sb *sbp) +static inline bool xfs_sb_version_hasmorebits(struct xfs_sb *sbp) { - return (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT); + return xfs_sb_is_v5(sbp) || + (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT); } static inline void xfs_sb_version_addattr(struct xfs_sb *sbp) @@ -332,87 +304,18 @@ static inline void xfs_sb_version_addattr(struct xfs_sb *sbp) sbp->sb_versionnum |= XFS_SB_VERSION_ATTRBIT; } -static inline bool xfs_sb_version_hasquota(struct xfs_sb *sbp) -{ - return (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT); -} - static inline void xfs_sb_version_addquota(struct xfs_sb *sbp) { sbp->sb_versionnum |= XFS_SB_VERSION_QUOTABIT; } -static inline bool xfs_sb_version_hasalign(struct xfs_sb *sbp) -{ - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 || - (sbp->sb_versionnum & XFS_SB_VERSION_ALIGNBIT)); -} - -static inline bool xfs_sb_version_hasdalign(struct xfs_sb *sbp) -{ - return (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT); -} - -static inline bool xfs_sb_version_haslogv2(struct xfs_sb *sbp) -{ - return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 || - (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT); -} - -static inline bool xfs_sb_version_hassector(struct xfs_sb *sbp) -{ - return (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT); -} - -static inline bool xfs_sb_version_hasasciici(struct xfs_sb *sbp) -{ - return (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT); -} - -static inline bool xfs_sb_version_hasmorebits(struct xfs_sb *sbp) -{ - return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 || - (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT); -} - -/* - * sb_features2 bit version macros. - */ -static inline bool xfs_sb_version_haslazysbcount(struct xfs_sb *sbp) -{ - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || - (xfs_sb_version_hasmorebits(sbp) && - (sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT)); -} - -static inline bool xfs_sb_version_hasattr2(struct xfs_sb *sbp) -{ - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || - (xfs_sb_version_hasmorebits(sbp) && - (sbp->sb_features2 & XFS_SB_VERSION2_ATTR2BIT)); -} - static inline void xfs_sb_version_addattr2(struct xfs_sb *sbp) { sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT; sbp->sb_features2 |= XFS_SB_VERSION2_ATTR2BIT; } -static inline void xfs_sb_version_removeattr2(struct xfs_sb *sbp) -{ - sbp->sb_features2 &= ~XFS_SB_VERSION2_ATTR2BIT; - if (!sbp->sb_features2) - sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT; -} - -static inline bool xfs_sb_version_hasprojid32bit(struct xfs_sb *sbp) -{ - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || - (xfs_sb_version_hasmorebits(sbp) && - (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT)); -} - -static inline void xfs_sb_version_addprojid32bit(struct xfs_sb *sbp) +static inline void xfs_sb_version_addprojid32(struct xfs_sb *sbp) { sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT; sbp->sb_features2 |= XFS_SB_VERSION2_PROJID32BIT; @@ -495,106 +398,21 @@ xfs_sb_has_incompat_log_feature( return (sbp->sb_features_log_incompat & feature) != 0; } -/* - * V5 superblock specific feature checks - */ -static inline bool xfs_sb_version_hascrc(struct xfs_sb *sbp) -{ - return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; -} - -/* - * v5 file systems support V3 inodes only, earlier file systems support - * v2 and v1 inodes. - */ -static inline bool xfs_sb_version_has_v3inode(struct xfs_sb *sbp) -{ - return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; -} - -static inline bool xfs_dinode_good_version(struct xfs_sb *sbp, - uint8_t version) -{ - if (xfs_sb_version_has_v3inode(sbp)) - return version == 3; - return version == 1 || version == 2; -} - -static inline bool xfs_sb_version_has_pquotino(struct xfs_sb *sbp) -{ - return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; -} - -static inline int xfs_sb_version_hasftype(struct xfs_sb *sbp) -{ - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && - xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_FTYPE)) || - (xfs_sb_version_hasmorebits(sbp) && - (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE)); -} - -static inline bool xfs_sb_version_hasfinobt(xfs_sb_t *sbp) -{ - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) && - (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT); -} - -static inline bool xfs_sb_version_hassparseinodes(struct xfs_sb *sbp) -{ - return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && - xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_SPINODES); -} - -/* - * XFS_SB_FEAT_INCOMPAT_META_UUID indicates that the metadata UUID - * is stored separately from the user-visible UUID; this allows the - * user-visible UUID to be changed on V5 filesystems which have a - * filesystem UUID stamped into every piece of metadata. - */ -static inline bool xfs_sb_version_hasmetauuid(struct xfs_sb *sbp) -{ - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) && - (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID); -} - -static inline bool xfs_sb_version_hasrmapbt(struct xfs_sb *sbp) -{ - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) && - (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_RMAPBT); -} - -static inline bool xfs_sb_version_hasreflink(struct xfs_sb *sbp) -{ - return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && - (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_REFLINK); -} - -static inline bool xfs_sb_version_hasbigtime(struct xfs_sb *sbp) -{ - return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && - (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_BIGTIME); -} - -/* - * Inode btree block counter. We record the number of inobt and finobt blocks - * in the AGI header so that we can skip the finobt walk at mount time when - * setting up per-AG reservations. - */ -static inline bool xfs_sb_version_hasinobtcounts(struct xfs_sb *sbp) +static inline void +xfs_sb_remove_incompat_log_features( + struct xfs_sb *sbp) { - return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && - (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_INOBTCNT); + sbp->sb_features_log_incompat &= ~XFS_SB_FEAT_INCOMPAT_LOG_ALL; } -static inline bool xfs_sb_version_needsrepair(struct xfs_sb *sbp) +static inline void +xfs_sb_add_incompat_log_features( + struct xfs_sb *sbp, + unsigned int features) { - return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && - (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR); + sbp->sb_features_log_incompat |= features; } -/* - * end of superblock version macros - */ static inline bool xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino) @@ -1062,12 +880,12 @@ enum xfs_dinode_fmt { /* * Inode size for given fs. */ -#define XFS_DINODE_SIZE(sbp) \ - (xfs_sb_version_has_v3inode(sbp) ? \ +#define XFS_DINODE_SIZE(mp) \ + (xfs_has_v3inodes(mp) ? \ sizeof(struct xfs_dinode) : \ offsetof(struct xfs_dinode, di_crc)) #define XFS_LITINO(mp) \ - ((mp)->m_sb.sb_inodesize - XFS_DINODE_SIZE(&(mp)->m_sb)) + ((mp)->m_sb.sb_inodesize - XFS_DINODE_SIZE(mp)) /* * Inode data & attribute fork sizes, per inode. @@ -1454,7 +1272,7 @@ struct xfs_dsymlink_hdr { #define XFS_SYMLINK_MAPS 3 #define XFS_SYMLINK_BUF_SPACE(mp, bufsize) \ - ((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \ + ((bufsize) - (xfs_has_crc((mp)) ? \ sizeof(struct xfs_dsymlink_hdr) : 0)) @@ -1686,7 +1504,7 @@ struct xfs_rmap_key { typedef __be32 xfs_rmap_ptr_t; #define XFS_RMAP_BLOCK(mp) \ - (xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \ + (xfs_has_finobt(((mp))) ? \ XFS_FIBT_BLOCK(mp) + 1 : \ XFS_IBT_BLOCK(mp) + 1) @@ -1918,7 +1736,7 @@ struct xfs_acl { * limited only by the maximum size of the xattr that stores the information. */ #define XFS_ACL_MAX_ENTRIES(mp) \ - (xfs_sb_version_hascrc(&mp->m_sb) \ + (xfs_has_crc(mp) \ ? (XFS_XATTR_SIZE_MAX - sizeof(struct xfs_acl)) / \ sizeof(struct xfs_acl_entry) \ : 25) diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index aaf8805a82df..994ad783d407 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -58,7 +58,7 @@ xfs_inobt_update( union xfs_btree_rec rec; rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino); - if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) { + if (xfs_has_sparseinodes(cur->bc_mp)) { rec.inobt.ir_u.sp.ir_holemask = cpu_to_be16(irec->ir_holemask); rec.inobt.ir_u.sp.ir_count = irec->ir_count; rec.inobt.ir_u.sp.ir_freecount = irec->ir_freecount; @@ -74,11 +74,11 @@ xfs_inobt_update( void xfs_inobt_btrec_to_irec( struct xfs_mount *mp, - union xfs_btree_rec *rec, + const union xfs_btree_rec *rec, struct xfs_inobt_rec_incore *irec) { irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino); - if (xfs_sb_version_hassparseinodes(&mp->m_sb)) { + if (xfs_has_sparseinodes(mp)) { irec->ir_holemask = be16_to_cpu(rec->inobt.ir_u.sp.ir_holemask); irec->ir_count = rec->inobt.ir_u.sp.ir_count; irec->ir_freecount = rec->inobt.ir_u.sp.ir_freecount; @@ -241,7 +241,7 @@ xfs_check_agi_freecount( } } while (i == 1); - if (!XFS_FORCED_SHUTDOWN(cur->bc_mp)) + if (!xfs_is_shutdown(cur->bc_mp)) ASSERT(freecount == cur->bc_ag.pag->pagi_freecount); } return 0; @@ -302,7 +302,7 @@ xfs_ialloc_inode_init( * That means for v3 inode we log the entire buffer rather than just the * inode cores. */ - if (xfs_sb_version_has_v3inode(&mp->m_sb)) { + if (xfs_has_v3inodes(mp)) { version = 3; ino = XFS_AGINO_TO_INO(mp, agno, XFS_AGB_TO_AGINO(mp, agbno)); @@ -337,7 +337,6 @@ xfs_ialloc_inode_init( xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) { int ioffset = i << mp->m_sb.sb_inodelog; - uint isize = XFS_DINODE_SIZE(&mp->m_sb); free = xfs_make_iptr(mp, fbuf, i); free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); @@ -354,7 +353,7 @@ xfs_ialloc_inode_init( } else if (tp) { /* just log the inode core */ xfs_trans_log_buf(tp, fbuf, ioffset, - ioffset + isize - 1); + ioffset + XFS_DINODE_SIZE(mp) - 1); } } @@ -635,7 +634,7 @@ xfs_ialloc_ag_alloc( #ifdef DEBUG /* randomly do sparse inode allocations */ - if (xfs_sb_version_hassparseinodes(&tp->t_mountp->m_sb) && + if (xfs_has_sparseinodes(tp->t_mountp) && igeo->ialloc_min_blks < igeo->ialloc_blks) do_sparse = prandom_u32() & 1; #endif @@ -712,7 +711,7 @@ xfs_ialloc_ag_alloc( */ isaligned = 0; if (igeo->ialloc_align) { - ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); + ASSERT(!xfs_has_noalign(args.mp)); args.alignment = args.mp->m_dalign; isaligned = 1; } else @@ -754,7 +753,7 @@ xfs_ialloc_ag_alloc( * Finally, try a sparse allocation if the filesystem supports it and * the sparse allocation length is smaller than a full chunk. */ - if (xfs_sb_version_hassparseinodes(&args.mp->m_sb) && + if (xfs_has_sparseinodes(args.mp) && igeo->ialloc_min_blks < igeo->ialloc_blks && args.fsbno == NULLFSBLOCK) { sparse_alloc: @@ -856,7 +855,7 @@ sparse_alloc: * from the previous call. Set merge false to replace any * existing record with this one. */ - if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) { + if (xfs_has_finobt(args.mp)) { error = xfs_inobt_insert_sprec(args.mp, tp, agbp, pag, XFS_BTNUM_FINO, &rec, false); if (error) @@ -869,7 +868,7 @@ sparse_alloc: if (error) return error; - if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) { + if (xfs_has_finobt(args.mp)) { error = xfs_inobt_insert(args.mp, tp, agbp, pag, newino, newlen, XFS_BTNUM_FINO); if (error) @@ -1448,7 +1447,7 @@ xfs_dialloc_ag( int offset; int i; - if (!xfs_sb_version_hasfinobt(&mp->m_sb)) + if (!xfs_has_finobt(mp)) return xfs_dialloc_ag_inobt(tp, agbp, pag, parent, inop); /* @@ -1784,7 +1783,7 @@ xfs_dialloc( break; } - if (XFS_FORCED_SHUTDOWN(mp)) { + if (xfs_is_shutdown(mp)) { error = -EFSCORRUPTED; break; } @@ -1953,8 +1952,7 @@ xfs_difree_inobt( * remove the chunk if the block size is large enough for multiple inode * chunks (that might not be free). */ - if (!(mp->m_flags & XFS_MOUNT_IKEEP) && - rec.ir_free == XFS_INOBT_ALL_FREE && + if (!xfs_has_ikeep(mp) && rec.ir_free == XFS_INOBT_ALL_FREE && mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) { struct xfs_perag *pag = agbp->b_pag; @@ -1994,7 +1992,7 @@ xfs_difree_inobt( goto error0; } - /* + /* * Change the inode free counts and log the ag/sb changes. */ be32_add_cpu(&agi->agi_freecount, 1); @@ -2098,9 +2096,8 @@ xfs_difree_finobt( * enough for multiple chunks. Leave the finobt record to remain in sync * with the inobt. */ - if (rec.ir_free == XFS_INOBT_ALL_FREE && - mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK && - !(mp->m_flags & XFS_MOUNT_IKEEP)) { + if (!xfs_has_ikeep(mp) && rec.ir_free == XFS_INOBT_ALL_FREE && + mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) { error = xfs_btree_delete(cur, &i); if (error) goto error; @@ -2189,7 +2186,7 @@ xfs_difree( /* * Fix up the free inode btree. */ - if (xfs_sb_version_hasfinobt(&mp->m_sb)) { + if (xfs_has_finobt(mp)) { error = xfs_difree_finobt(mp, tp, agbp, pag, agino, &rec); if (error) goto error0; @@ -2478,7 +2475,7 @@ xfs_agi_verify( struct xfs_agi *agi = bp->b_addr; int i; - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(agi->agi_lsn))) @@ -2497,7 +2494,7 @@ xfs_agi_verify( be32_to_cpu(agi->agi_level) > M_IGEO(mp)->inobt_maxlevels) return __this_address; - if (xfs_sb_version_hasfinobt(&mp->m_sb) && + if (xfs_has_finobt(mp) && (be32_to_cpu(agi->agi_free_level) < 1 || be32_to_cpu(agi->agi_free_level) > M_IGEO(mp)->inobt_maxlevels)) return __this_address; @@ -2528,7 +2525,7 @@ xfs_agi_read_verify( struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; - if (xfs_sb_version_hascrc(&mp->m_sb) && + if (xfs_has_crc(mp) && !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF)) xfs_verifier_error(bp, -EFSBADCRC, __this_address); else { @@ -2553,7 +2550,7 @@ xfs_agi_write_verify( return; } - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_has_crc(mp)) return; if (bip) @@ -2626,7 +2623,7 @@ xfs_ialloc_read_agi( * we are in the middle of a forced shutdown. */ ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || - XFS_FORCED_SHUTDOWN(mp)); + xfs_is_shutdown(mp)); return 0; } @@ -2716,7 +2713,7 @@ struct xfs_ialloc_count_inodes { STATIC int xfs_ialloc_count_inodes_rec( struct xfs_btree_cur *cur, - union xfs_btree_rec *rec, + const union xfs_btree_rec *rec, void *priv) { struct xfs_inobt_rec_incore irec; @@ -2773,7 +2770,7 @@ xfs_ialloc_setup_geometry( uint inodes; igeo->new_diflags2 = 0; - if (xfs_sb_version_hasbigtime(&mp->m_sb)) + if (xfs_has_bigtime(mp)) igeo->new_diflags2 |= XFS_DIFLAG2_BIGTIME; /* Compute inode btree geometry. */ @@ -2828,7 +2825,7 @@ xfs_ialloc_setup_geometry( * cannot change the behavior. */ igeo->inode_cluster_size_raw = XFS_INODE_BIG_CLUSTER_SIZE; - if (xfs_sb_version_has_v3inode(&mp->m_sb)) { + if (xfs_has_v3inodes(mp)) { int new_size = igeo->inode_cluster_size_raw; new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE; @@ -2846,7 +2843,7 @@ xfs_ialloc_setup_geometry( igeo->inodes_per_cluster = XFS_FSB_TO_INO(mp, igeo->blocks_per_cluster); /* Calculate inode cluster alignment. */ - if (xfs_sb_version_hasalign(&mp->m_sb) && + if (xfs_has_align(mp) && mp->m_sb.sb_inoalignmt >= igeo->blocks_per_cluster) igeo->cluster_align = mp->m_sb.sb_inoalignmt; else @@ -2894,15 +2891,15 @@ xfs_ialloc_calc_rootino( first_bno += xfs_alloc_min_freelist(mp, NULL); /* ...the free inode btree root... */ - if (xfs_sb_version_hasfinobt(&mp->m_sb)) + if (xfs_has_finobt(mp)) first_bno++; /* ...the reverse mapping btree root... */ - if (xfs_sb_version_hasrmapbt(&mp->m_sb)) + if (xfs_has_rmapbt(mp)) first_bno++; /* ...the reference count btree... */ - if (xfs_sb_version_hasreflink(&mp->m_sb)) + if (xfs_has_reflink(mp)) first_bno++; /* @@ -2920,9 +2917,9 @@ xfs_ialloc_calc_rootino( * Now round first_bno up to whatever allocation alignment is given * by the filesystem or was passed in. */ - if (xfs_sb_version_hasdalign(&mp->m_sb) && igeo->ialloc_align > 0) + if (xfs_has_dalign(mp) && igeo->ialloc_align > 0) first_bno = roundup(first_bno, sunit); - else if (xfs_sb_version_hasalign(&mp->m_sb) && + else if (xfs_has_align(mp) && mp->m_sb.sb_inoalignmt > 1) first_bno = roundup(first_bno, mp->m_sb.sb_inoalignmt); @@ -2953,7 +2950,7 @@ xfs_ialloc_check_shrink( int has; int error; - if (!xfs_sb_version_hassparseinodes(&mp->m_sb)) + if (!xfs_has_sparseinodes(mp)) return 0; pag = xfs_perag_get(mp, agno); diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h index 9a2112b4ad5e..8b5c2b709022 100644 --- a/fs/xfs/libxfs/xfs_ialloc.h +++ b/fs/xfs/libxfs/xfs_ialloc.h @@ -106,7 +106,8 @@ int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, struct xfs_buf **bpp); union xfs_btree_rec; -void xfs_inobt_btrec_to_irec(struct xfs_mount *mp, union xfs_btree_rec *rec, +void xfs_inobt_btrec_to_irec(struct xfs_mount *mp, + const union xfs_btree_rec *rec, struct xfs_inobt_rec_incore *irec); int xfs_ialloc_has_inodes_at_extent(struct xfs_btree_cur *cur, xfs_agblock_t bno, xfs_extlen_t len, bool *exists); diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 823a038939f8..27190840c5d8 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -40,9 +40,9 @@ xfs_inobt_dup_cursor( STATIC void xfs_inobt_set_root( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *nptr, - int inc) /* level change */ + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *nptr, + int inc) /* level change */ { struct xfs_buf *agbp = cur->bc_ag.agbp; struct xfs_agi *agi = agbp->b_addr; @@ -54,9 +54,9 @@ xfs_inobt_set_root( STATIC void xfs_finobt_set_root( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *nptr, - int inc) /* level change */ + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *nptr, + int inc) /* level change */ { struct xfs_buf *agbp = cur->bc_ag.agbp; struct xfs_agi *agi = agbp->b_addr; @@ -76,7 +76,7 @@ xfs_inobt_mod_blockcount( struct xfs_buf *agbp = cur->bc_ag.agbp; struct xfs_agi *agi = agbp->b_addr; - if (!xfs_sb_version_hasinobtcounts(&cur->bc_mp->m_sb)) + if (!xfs_has_inobtcounts(cur->bc_mp)) return; if (cur->bc_btnum == XFS_BTNUM_FINO) @@ -88,11 +88,11 @@ xfs_inobt_mod_blockcount( STATIC int __xfs_inobt_alloc_block( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *start, - union xfs_btree_ptr *new, - int *stat, - enum xfs_ag_resv_type resv) + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int *stat, + enum xfs_ag_resv_type resv) { xfs_alloc_arg_t args; /* block allocation args */ int error; /* error return value */ @@ -127,20 +127,20 @@ __xfs_inobt_alloc_block( STATIC int xfs_inobt_alloc_block( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *start, - union xfs_btree_ptr *new, - int *stat) + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int *stat) { return __xfs_inobt_alloc_block(cur, start, new, stat, XFS_AG_RESV_NONE); } STATIC int xfs_finobt_alloc_block( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *start, - union xfs_btree_ptr *new, - int *stat) + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int *stat) { if (cur->bc_mp->m_finobt_nores) return xfs_inobt_alloc_block(cur, start, new, stat); @@ -156,7 +156,7 @@ __xfs_inobt_free_block( { xfs_inobt_mod_blockcount(cur, -1); return xfs_free_extent(cur->bc_tp, - XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1, + XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)), 1, &XFS_RMAP_OINFO_INOBT, resv); } @@ -188,18 +188,18 @@ xfs_inobt_get_maxrecs( STATIC void xfs_inobt_init_key_from_rec( - union xfs_btree_key *key, - union xfs_btree_rec *rec) + union xfs_btree_key *key, + const union xfs_btree_rec *rec) { key->inobt.ir_startino = rec->inobt.ir_startino; } STATIC void xfs_inobt_init_high_key_from_rec( - union xfs_btree_key *key, - union xfs_btree_rec *rec) + union xfs_btree_key *key, + const union xfs_btree_rec *rec) { - __u32 x; + __u32 x; x = be32_to_cpu(rec->inobt.ir_startino); x += XFS_INODES_PER_CHUNK - 1; @@ -212,7 +212,7 @@ xfs_inobt_init_rec_from_cur( union xfs_btree_rec *rec) { rec->inobt.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino); - if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) { + if (xfs_has_sparseinodes(cur->bc_mp)) { rec->inobt.ir_u.sp.ir_holemask = cpu_to_be16(cur->bc_rec.i.ir_holemask); rec->inobt.ir_u.sp.ir_count = cur->bc_rec.i.ir_count; @@ -253,8 +253,8 @@ xfs_finobt_init_ptr_from_cur( STATIC int64_t xfs_inobt_key_diff( - struct xfs_btree_cur *cur, - union xfs_btree_key *key) + struct xfs_btree_cur *cur, + const union xfs_btree_key *key) { return (int64_t)be32_to_cpu(key->inobt.ir_startino) - cur->bc_rec.i.ir_startino; @@ -262,9 +262,9 @@ xfs_inobt_key_diff( STATIC int64_t xfs_inobt_diff_two_keys( - struct xfs_btree_cur *cur, - union xfs_btree_key *k1, - union xfs_btree_key *k2) + struct xfs_btree_cur *cur, + const union xfs_btree_key *k1, + const union xfs_btree_key *k2) { return (int64_t)be32_to_cpu(k1->inobt.ir_startino) - be32_to_cpu(k2->inobt.ir_startino); @@ -292,7 +292,7 @@ xfs_inobt_verify( * but beware of the landmine (i.e. need to check pag->pagi_init) if we * ever do. */ - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { fa = xfs_btree_sblock_v5hdr_verify(bp); if (fa) return fa; @@ -360,9 +360,9 @@ const struct xfs_buf_ops xfs_finobt_buf_ops = { STATIC int xfs_inobt_keys_inorder( - struct xfs_btree_cur *cur, - union xfs_btree_key *k1, - union xfs_btree_key *k2) + struct xfs_btree_cur *cur, + const union xfs_btree_key *k1, + const union xfs_btree_key *k2) { return be32_to_cpu(k1->inobt.ir_startino) < be32_to_cpu(k2->inobt.ir_startino); @@ -370,9 +370,9 @@ xfs_inobt_keys_inorder( STATIC int xfs_inobt_recs_inorder( - struct xfs_btree_cur *cur, - union xfs_btree_rec *r1, - union xfs_btree_rec *r2) + struct xfs_btree_cur *cur, + const union xfs_btree_rec *r1, + const union xfs_btree_rec *r2) { return be32_to_cpu(r1->inobt.ir_startino) + XFS_INODES_PER_CHUNK <= be32_to_cpu(r2->inobt.ir_startino); @@ -446,7 +446,7 @@ xfs_inobt_init_common( cur->bc_blocklog = mp->m_sb.sb_blocklog; - if (xfs_sb_version_hascrc(&mp->m_sb)) + if (xfs_has_crc(mp)) cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; /* take a reference for the cursor */ @@ -511,7 +511,7 @@ xfs_inobt_commit_staged_btree( fields = XFS_AGI_ROOT | XFS_AGI_LEVEL; agi->agi_root = cpu_to_be32(afake->af_root); agi->agi_level = cpu_to_be32(afake->af_levels); - if (xfs_sb_version_hasinobtcounts(&cur->bc_mp->m_sb)) { + if (xfs_has_inobtcounts(cur->bc_mp)) { agi->agi_iblocks = cpu_to_be32(afake->af_blocks); fields |= XFS_AGI_IBLOCKS; } @@ -521,7 +521,7 @@ xfs_inobt_commit_staged_btree( fields = XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL; agi->agi_free_root = cpu_to_be32(afake->af_root); agi->agi_free_level = cpu_to_be32(afake->af_levels); - if (xfs_sb_version_hasinobtcounts(&cur->bc_mp->m_sb)) { + if (xfs_has_inobtcounts(cur->bc_mp)) { agi->agi_fblocks = cpu_to_be32(afake->af_blocks); fields |= XFS_AGI_IBLOCKS; } @@ -737,10 +737,10 @@ xfs_finobt_calc_reserves( xfs_extlen_t tree_len = 0; int error; - if (!xfs_sb_version_hasfinobt(&mp->m_sb)) + if (!xfs_has_finobt(mp)) return 0; - if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) + if (xfs_has_inobtcounts(mp)) error = xfs_finobt_read_blocks(mp, tp, pag, &tree_len); else error = xfs_inobt_count_blocks(mp, tp, pag, XFS_BTNUM_FINO, diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h index e530c82b2217..8a322d402e61 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.h +++ b/fs/xfs/libxfs/xfs_ialloc_btree.h @@ -19,7 +19,7 @@ struct xfs_perag; * Btree block header size depends on a superblock flag. */ #define XFS_INOBT_BLOCK_LEN(mp) \ - (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \ + (xfs_has_crc(((mp))) ? \ XFS_BTREE_SBLOCK_CRC_LEN : XFS_BTREE_SBLOCK_LEN) /* diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 84ea2e0af9f0..3932b4ebf903 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -48,7 +48,7 @@ xfs_inode_buf_verify( /* * Validate the magic number and version of every inode in the buffer */ - agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp)); + agno = xfs_daddr_to_agno(mp, xfs_buf_daddr(bp)); ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; for (i = 0; i < ni; i++) { int di_ok; @@ -58,7 +58,7 @@ xfs_inode_buf_verify( dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); unlinked_ino = be32_to_cpu(dip->di_next_unlinked); di_ok = xfs_verify_magic16(bp, dip->di_magic) && - xfs_dinode_good_version(&mp->m_sb, dip->di_version) && + xfs_dinode_good_version(mp, dip->di_version) && xfs_verify_agino_or_null(mp, agno, unlinked_ino); if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP))) { @@ -71,7 +71,7 @@ xfs_inode_buf_verify( #ifdef DEBUG xfs_alert(mp, "bad inode magic/vsn daddr %lld #%d (magic=%x)", - (unsigned long long)bp->b_bn, i, + (unsigned long long)xfs_buf_daddr(bp), i, be16_to_cpu(dip->di_magic)); #endif xfs_buf_verifier_error(bp, -EFSCORRUPTED, @@ -192,7 +192,7 @@ xfs_inode_from_disk( * inode. If the inode is unused, mode is zero and we shouldn't mess * with the uninitialized part of it. */ - if (!xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) + if (!xfs_has_v3inodes(ip->i_mount)) ip->i_flushiter = be16_to_cpu(from->di_flushiter); inode->i_generation = be32_to_cpu(from->di_gen); inode->i_mode = be16_to_cpu(from->di_mode); @@ -235,7 +235,7 @@ xfs_inode_from_disk( if (from->di_dmevmask || from->di_dmstate) xfs_iflags_set(ip, XFS_IPRESERVE_DM_FIELDS); - if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) { + if (xfs_has_v3inodes(ip->i_mount)) { inode_set_iversion_queried(inode, be64_to_cpu(from->di_changecount)); ip->i_crtime = xfs_inode_from_disk_ts(from, from->di_crtime); @@ -313,7 +313,7 @@ xfs_inode_to_disk( to->di_aformat = xfs_ifork_format(ip->i_afp); to->di_flags = cpu_to_be16(ip->i_diflags); - if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) { + if (xfs_has_v3inodes(ip->i_mount)) { to->di_version = 3; to->di_changecount = cpu_to_be64(inode_peek_iversion(inode)); to->di_crtime = xfs_inode_to_disk_ts(ip, ip->i_crtime); @@ -413,7 +413,7 @@ xfs_dinode_verify( /* Verify v3 integrity information first */ if (dip->di_version >= 3) { - if (!xfs_sb_version_has_v3inode(&mp->m_sb)) + if (!xfs_has_v3inodes(mp)) return __this_address; if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, XFS_DINODE_CRC_OFF)) @@ -515,7 +515,7 @@ xfs_dinode_verify( /* don't allow reflink/cowextsize if we don't have reflink */ if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) && - !xfs_sb_version_hasreflink(&mp->m_sb)) + !xfs_has_reflink(mp)) return __this_address; /* only regular files get reflink */ @@ -534,7 +534,7 @@ xfs_dinode_verify( /* bigtime iflag can only happen on bigtime filesystems */ if (xfs_dinode_has_bigtime(dip) && - !xfs_sb_version_hasbigtime(&mp->m_sb)) + !xfs_has_bigtime(mp)) return __this_address; return NULL; @@ -550,7 +550,7 @@ xfs_dinode_calc_crc( if (dip->di_version < 3) return; - ASSERT(xfs_sb_version_hascrc(&mp->m_sb)); + ASSERT(xfs_has_crc(mp)); crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize, XFS_DINODE_CRC_OFF); dip->di_crc = xfs_end_cksum(crc); @@ -677,7 +677,7 @@ xfs_inode_validate_cowextsize( hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE); cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize); - if (hint_flag && !xfs_sb_version_hasreflink(&mp->m_sb)) + if (hint_flag && !xfs_has_reflink(mp)) return __this_address; if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode))) diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h index 7f865bb4df84..585ed5a110af 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.h +++ b/fs/xfs/libxfs/xfs_inode_buf.h @@ -21,7 +21,7 @@ struct xfs_imap { int xfs_imap_to_bp(struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_imap *imap, struct xfs_buf **bpp); -void xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *); +void xfs_dinode_calc_crc(struct xfs_mount *mp, struct xfs_dinode *dip); void xfs_inode_to_disk(struct xfs_inode *ip, struct xfs_dinode *to, xfs_lsn_t lsn); int xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from); @@ -42,4 +42,13 @@ static inline uint64_t xfs_inode_encode_bigtime(struct timespec64 tv) struct timespec64 xfs_inode_from_disk_ts(struct xfs_dinode *dip, const xfs_timestamp_t ts); +static inline bool +xfs_dinode_good_version(struct xfs_mount *mp, uint8_t version) +{ + if (xfs_has_v3inodes(mp)) + return version == 3; + return version == 1 || version == 2; +} + + #endif /* __XFS_INODE_BUF_H__ */ diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 2c5bcbc19264..b322db523d65 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -41,10 +41,10 @@ typedef uint32_t xlog_tid_t; #define XFS_MIN_LOG_FACTOR 3 #define XLOG_REC_SHIFT(log) \ - BTOBB(1 << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \ + BTOBB(1 << (xfs_has_logv2(log->l_mp) ? \ XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT)) #define XLOG_TOTAL_REC_SHIFT(log) \ - BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \ + BTOBB(XLOG_MAX_ICLOGS << (xfs_has_logv2(log->l_mp) ? \ XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT)) /* get lsn fields */ @@ -434,7 +434,7 @@ struct xfs_log_dinode { }; #define xfs_log_dinode_size(mp) \ - (xfs_sb_version_has_v3inode(&(mp)->m_sb) ? \ + (xfs_has_v3inodes((mp)) ? \ sizeof(struct xfs_log_dinode) : \ offsetof(struct xfs_log_dinode, di_next_unlinked)) diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h index 3cca2bfe714c..ff69a0000817 100644 --- a/fs/xfs/libxfs/xfs_log_recover.h +++ b/fs/xfs/libxfs/xfs_log_recover.h @@ -122,6 +122,8 @@ void xlog_buf_readahead(struct xlog *log, xfs_daddr_t blkno, uint len, const struct xfs_buf_ops *ops); bool xlog_is_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len); +int xlog_recover_iget(struct xfs_mount *mp, xfs_ino_t ino, + struct xfs_inode **ipp); void xlog_recover_release_intent(struct xlog *log, unsigned short intent_type, uint64_t intent_id); diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c index 7f55eb3f3653..67798ff5e14e 100644 --- a/fs/xfs/libxfs/xfs_log_rlimit.c +++ b/fs/xfs/libxfs/xfs_log_rlimit.c @@ -92,7 +92,7 @@ xfs_log_calc_minimum_size( if (tres.tr_logcount > 1) max_logres *= tres.tr_logcount; - if (xfs_sb_version_haslogv2(&mp->m_sb) && mp->m_sb.sb_logsunit > 1) + if (xfs_has_logv2(mp) && mp->m_sb.sb_logsunit > 1) lsunit = BTOBB(mp->m_sb.sb_logsunit); /* diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h index 0f0af4e35032..a02c5062f9b2 100644 --- a/fs/xfs/libxfs/xfs_quota_defs.h +++ b/fs/xfs/libxfs/xfs_quota_defs.h @@ -60,37 +60,15 @@ typedef uint8_t xfs_dqtype_t; #define XFS_DQUOT_LOGRES(mp) \ ((sizeof(struct xfs_dq_logformat) + sizeof(struct xfs_disk_dquot)) * 6) -#define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) -#define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT) -#define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT) -#define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT) +#define XFS_IS_QUOTA_ON(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) +#define XFS_IS_UQUOTA_ON(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT) +#define XFS_IS_PQUOTA_ON(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT) +#define XFS_IS_GQUOTA_ON(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT) #define XFS_IS_UQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_UQUOTA_ENFD) #define XFS_IS_GQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_GQUOTA_ENFD) #define XFS_IS_PQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_PQUOTA_ENFD) /* - * Incore only flags for quotaoff - these bits get cleared when quota(s) - * are in the process of getting turned off. These flags are in m_qflags but - * never in sb_qflags. - */ -#define XFS_UQUOTA_ACTIVE 0x1000 /* uquotas are being turned off */ -#define XFS_GQUOTA_ACTIVE 0x2000 /* gquotas are being turned off */ -#define XFS_PQUOTA_ACTIVE 0x4000 /* pquotas are being turned off */ -#define XFS_ALL_QUOTA_ACTIVE \ - (XFS_UQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE) - -/* - * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees - * quota will be not be switched off as long as that inode lock is held. - */ -#define XFS_IS_QUOTA_ON(mp) ((mp)->m_qflags & (XFS_UQUOTA_ACTIVE | \ - XFS_GQUOTA_ACTIVE | \ - XFS_PQUOTA_ACTIVE)) -#define XFS_IS_UQUOTA_ON(mp) ((mp)->m_qflags & XFS_UQUOTA_ACTIVE) -#define XFS_IS_GQUOTA_ON(mp) ((mp)->m_qflags & XFS_GQUOTA_ACTIVE) -#define XFS_IS_PQUOTA_ON(mp) ((mp)->m_qflags & XFS_PQUOTA_ACTIVE) - -/* * Flags to tell various functions what to do. Not all of these are meaningful * to a single function. None of these XFS_QMOPT_* flags are meant to have * persistent values (ie. their values can and will change between versions) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 860a0c9801ba..e5d767a7fc5d 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -91,7 +91,7 @@ xfs_refcount_lookup_eq( /* Convert on-disk record to in-core format. */ void xfs_refcount_btrec_to_irec( - union xfs_btree_rec *rec, + const union xfs_btree_rec *rec, struct xfs_refcount_irec *irec) { irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock); @@ -1253,7 +1253,7 @@ xfs_refcount_increase_extent( struct xfs_trans *tp, struct xfs_bmbt_irec *PREV) { - if (!xfs_sb_version_hasreflink(&tp->t_mountp->m_sb)) + if (!xfs_has_reflink(tp->t_mountp)) return; __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE, PREV->br_startblock, @@ -1268,7 +1268,7 @@ xfs_refcount_decrease_extent( struct xfs_trans *tp, struct xfs_bmbt_irec *PREV) { - if (!xfs_sb_version_hasreflink(&tp->t_mountp->m_sb)) + if (!xfs_has_reflink(tp->t_mountp)) return; __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE, PREV->br_startblock, @@ -1617,7 +1617,7 @@ xfs_refcount_alloc_cow_extent( { struct xfs_mount *mp = tp->t_mountp; - if (!xfs_sb_version_hasreflink(&mp->m_sb)) + if (!xfs_has_reflink(mp)) return; __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, fsb, len); @@ -1636,7 +1636,7 @@ xfs_refcount_free_cow_extent( { struct xfs_mount *mp = tp->t_mountp; - if (!xfs_sb_version_hasreflink(&mp->m_sb)) + if (!xfs_has_reflink(mp)) return; /* Remove rmap entry */ @@ -1654,7 +1654,7 @@ struct xfs_refcount_recovery { STATIC int xfs_refcount_recover_extent( struct xfs_btree_cur *cur, - union xfs_btree_rec *rec, + const union xfs_btree_rec *rec, void *priv) { struct list_head *debris = priv; diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index 9f6e9aae4da0..02cb3aa405be 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -78,7 +78,7 @@ static inline xfs_fileoff_t xfs_refcount_max_unmap(int log_res) extern int xfs_refcount_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno, xfs_extlen_t len, bool *exists); union xfs_btree_rec; -extern void xfs_refcount_btrec_to_irec(union xfs_btree_rec *rec, +extern void xfs_refcount_btrec_to_irec(const union xfs_btree_rec *rec, struct xfs_refcount_irec *irec); extern int xfs_refcount_insert(struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec, int *stat); diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 92d336c17e83..1ef9b99962ab 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -31,9 +31,9 @@ xfs_refcountbt_dup_cursor( STATIC void xfs_refcountbt_set_root( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *ptr, - int inc) + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *ptr, + int inc) { struct xfs_buf *agbp = cur->bc_ag.agbp; struct xfs_agf *agf = agbp->b_addr; @@ -51,10 +51,10 @@ xfs_refcountbt_set_root( STATIC int xfs_refcountbt_alloc_block( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *start, - union xfs_btree_ptr *new, - int *stat) + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int *stat) { struct xfs_buf *agbp = cur->bc_ag.agbp; struct xfs_agf *agf = agbp->b_addr; @@ -102,7 +102,7 @@ xfs_refcountbt_free_block( struct xfs_mount *mp = cur->bc_mp; struct xfs_buf *agbp = cur->bc_ag.agbp; struct xfs_agf *agf = agbp->b_addr; - xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp)); + xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); int error; trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_ag.pag->pag_agno, @@ -135,18 +135,18 @@ xfs_refcountbt_get_maxrecs( STATIC void xfs_refcountbt_init_key_from_rec( - union xfs_btree_key *key, - union xfs_btree_rec *rec) + union xfs_btree_key *key, + const union xfs_btree_rec *rec) { key->refc.rc_startblock = rec->refc.rc_startblock; } STATIC void xfs_refcountbt_init_high_key_from_rec( - union xfs_btree_key *key, - union xfs_btree_rec *rec) + union xfs_btree_key *key, + const union xfs_btree_rec *rec) { - __u32 x; + __u32 x; x = be32_to_cpu(rec->refc.rc_startblock); x += be32_to_cpu(rec->refc.rc_blockcount) - 1; @@ -177,20 +177,20 @@ xfs_refcountbt_init_ptr_from_cur( STATIC int64_t xfs_refcountbt_key_diff( - struct xfs_btree_cur *cur, - union xfs_btree_key *key) + struct xfs_btree_cur *cur, + const union xfs_btree_key *key) { struct xfs_refcount_irec *rec = &cur->bc_rec.rc; - struct xfs_refcount_key *kp = &key->refc; + const struct xfs_refcount_key *kp = &key->refc; return (int64_t)be32_to_cpu(kp->rc_startblock) - rec->rc_startblock; } STATIC int64_t xfs_refcountbt_diff_two_keys( - struct xfs_btree_cur *cur, - union xfs_btree_key *k1, - union xfs_btree_key *k2) + struct xfs_btree_cur *cur, + const union xfs_btree_key *k1, + const union xfs_btree_key *k2) { return (int64_t)be32_to_cpu(k1->refc.rc_startblock) - be32_to_cpu(k2->refc.rc_startblock); @@ -209,7 +209,7 @@ xfs_refcountbt_verify( if (!xfs_verify_magic(bp, block->bb_magic)) return __this_address; - if (!xfs_sb_version_hasreflink(&mp->m_sb)) + if (!xfs_has_reflink(mp)) return __this_address; fa = xfs_btree_sblock_v5hdr_verify(bp); if (fa) @@ -269,9 +269,9 @@ const struct xfs_buf_ops xfs_refcountbt_buf_ops = { STATIC int xfs_refcountbt_keys_inorder( - struct xfs_btree_cur *cur, - union xfs_btree_key *k1, - union xfs_btree_key *k2) + struct xfs_btree_cur *cur, + const union xfs_btree_key *k1, + const union xfs_btree_key *k2) { return be32_to_cpu(k1->refc.rc_startblock) < be32_to_cpu(k2->refc.rc_startblock); @@ -279,9 +279,9 @@ xfs_refcountbt_keys_inorder( STATIC int xfs_refcountbt_recs_inorder( - struct xfs_btree_cur *cur, - union xfs_btree_rec *r1, - union xfs_btree_rec *r2) + struct xfs_btree_cur *cur, + const union xfs_btree_rec *r1, + const union xfs_btree_rec *r2) { return be32_to_cpu(r1->refc.rc_startblock) + be32_to_cpu(r1->refc.rc_blockcount) <= @@ -462,7 +462,7 @@ xfs_refcountbt_calc_reserves( xfs_extlen_t tree_len; int error; - if (!xfs_sb_version_hasreflink(&mp->m_sb)) + if (!xfs_has_reflink(mp)) return 0; error = xfs_alloc_read_agf(mp, tp, pag->pag_agno, 0, &agbp); diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index d1dfad0204e3..f45929b1b94a 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -179,8 +179,8 @@ done: /* Convert an internal btree record to an rmap record. */ int xfs_rmap_btrec_to_irec( - union xfs_btree_rec *rec, - struct xfs_rmap_irec *irec) + const union xfs_btree_rec *rec, + struct xfs_rmap_irec *irec) { irec->rm_startblock = be32_to_cpu(rec->rmap.rm_startblock); irec->rm_blockcount = be32_to_cpu(rec->rmap.rm_blockcount); @@ -255,9 +255,9 @@ struct xfs_find_left_neighbor_info { /* For each rmap given, figure out if it matches the key we want. */ STATIC int xfs_rmap_find_left_neighbor_helper( - struct xfs_btree_cur *cur, - struct xfs_rmap_irec *rec, - void *priv) + struct xfs_btree_cur *cur, + const struct xfs_rmap_irec *rec, + void *priv) { struct xfs_find_left_neighbor_info *info = priv; @@ -331,9 +331,9 @@ xfs_rmap_find_left_neighbor( /* For each rmap given, figure out if it matches the key we want. */ STATIC int xfs_rmap_lookup_le_range_helper( - struct xfs_btree_cur *cur, - struct xfs_rmap_irec *rec, - void *priv) + struct xfs_btree_cur *cur, + const struct xfs_rmap_irec *rec, + void *priv) { struct xfs_find_left_neighbor_info *info = priv; @@ -705,7 +705,7 @@ xfs_rmap_free( struct xfs_btree_cur *cur; int error; - if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) + if (!xfs_has_rmapbt(mp)) return 0; cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag); @@ -959,7 +959,7 @@ xfs_rmap_alloc( struct xfs_btree_cur *cur; int error; - if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) + if (!xfs_has_rmapbt(mp)) return 0; cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag); @@ -2278,9 +2278,9 @@ struct xfs_rmap_query_range_info { /* Format btree record and pass to our callback. */ STATIC int xfs_rmap_query_range_helper( - struct xfs_btree_cur *cur, - union xfs_btree_rec *rec, - void *priv) + struct xfs_btree_cur *cur, + const union xfs_btree_rec *rec, + void *priv) { struct xfs_rmap_query_range_info *query = priv; struct xfs_rmap_irec irec; @@ -2296,8 +2296,8 @@ xfs_rmap_query_range_helper( int xfs_rmap_query_range( struct xfs_btree_cur *cur, - struct xfs_rmap_irec *low_rec, - struct xfs_rmap_irec *high_rec, + const struct xfs_rmap_irec *low_rec, + const struct xfs_rmap_irec *high_rec, xfs_rmap_query_range_fn fn, void *priv) { @@ -2459,7 +2459,7 @@ xfs_rmap_update_is_needed( struct xfs_mount *mp, int whichfork) { - return xfs_sb_version_hasrmapbt(&mp->m_sb) && whichfork != XFS_COW_FORK; + return xfs_has_rmapbt(mp) && whichfork != XFS_COW_FORK; } /* @@ -2707,7 +2707,7 @@ struct xfs_rmap_key_state { STATIC int xfs_rmap_has_other_keys_helper( struct xfs_btree_cur *cur, - struct xfs_rmap_irec *rec, + const struct xfs_rmap_irec *rec, void *priv) { struct xfs_rmap_key_state *rks = priv; diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h index f2423cf7f1e2..fd67904ed446 100644 --- a/fs/xfs/libxfs/xfs_rmap.h +++ b/fs/xfs/libxfs/xfs_rmap.h @@ -134,12 +134,13 @@ int xfs_rmap_get_rec(struct xfs_btree_cur *cur, struct xfs_rmap_irec *irec, int *stat); typedef int (*xfs_rmap_query_range_fn)( - struct xfs_btree_cur *cur, - struct xfs_rmap_irec *rec, - void *priv); + struct xfs_btree_cur *cur, + const struct xfs_rmap_irec *rec, + void *priv); int xfs_rmap_query_range(struct xfs_btree_cur *cur, - struct xfs_rmap_irec *low_rec, struct xfs_rmap_irec *high_rec, + const struct xfs_rmap_irec *low_rec, + const struct xfs_rmap_irec *high_rec, xfs_rmap_query_range_fn fn, void *priv); int xfs_rmap_query_all(struct xfs_btree_cur *cur, xfs_rmap_query_range_fn fn, void *priv); @@ -192,7 +193,7 @@ int xfs_rmap_lookup_le_range(struct xfs_btree_cur *cur, xfs_agblock_t bno, int xfs_rmap_compare(const struct xfs_rmap_irec *a, const struct xfs_rmap_irec *b); union xfs_btree_rec; -int xfs_rmap_btrec_to_irec(union xfs_btree_rec *rec, +int xfs_rmap_btrec_to_irec(const union xfs_btree_rec *rec, struct xfs_rmap_irec *irec); int xfs_rmap_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno, xfs_extlen_t len, bool *exists); diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index f29bc71b9950..b7dbbfb3aeed 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -57,9 +57,9 @@ xfs_rmapbt_dup_cursor( STATIC void xfs_rmapbt_set_root( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *ptr, - int inc) + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *ptr, + int inc) { struct xfs_buf *agbp = cur->bc_ag.agbp; struct xfs_agf *agf = agbp->b_addr; @@ -76,10 +76,10 @@ xfs_rmapbt_set_root( STATIC int xfs_rmapbt_alloc_block( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *start, - union xfs_btree_ptr *new, - int *stat) + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int *stat) { struct xfs_buf *agbp = cur->bc_ag.agbp; struct xfs_agf *agf = agbp->b_addr; @@ -122,7 +122,7 @@ xfs_rmapbt_free_block( xfs_agblock_t bno; int error; - bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp)); + bno = xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp)); trace_xfs_rmapbt_free_block(cur->bc_mp, pag->pag_agno, bno, 1); be32_add_cpu(&agf->agf_rmap_blocks, -1); @@ -156,8 +156,8 @@ xfs_rmapbt_get_maxrecs( STATIC void xfs_rmapbt_init_key_from_rec( - union xfs_btree_key *key, - union xfs_btree_rec *rec) + union xfs_btree_key *key, + const union xfs_btree_rec *rec) { key->rmap.rm_startblock = rec->rmap.rm_startblock; key->rmap.rm_owner = rec->rmap.rm_owner; @@ -173,11 +173,11 @@ xfs_rmapbt_init_key_from_rec( */ STATIC void xfs_rmapbt_init_high_key_from_rec( - union xfs_btree_key *key, - union xfs_btree_rec *rec) + union xfs_btree_key *key, + const union xfs_btree_rec *rec) { - uint64_t off; - int adj; + uint64_t off; + int adj; adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1; @@ -219,13 +219,13 @@ xfs_rmapbt_init_ptr_from_cur( STATIC int64_t xfs_rmapbt_key_diff( - struct xfs_btree_cur *cur, - union xfs_btree_key *key) + struct xfs_btree_cur *cur, + const union xfs_btree_key *key) { - struct xfs_rmap_irec *rec = &cur->bc_rec.r; - struct xfs_rmap_key *kp = &key->rmap; - __u64 x, y; - int64_t d; + struct xfs_rmap_irec *rec = &cur->bc_rec.r; + const struct xfs_rmap_key *kp = &key->rmap; + __u64 x, y; + int64_t d; d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock; if (d) @@ -249,14 +249,14 @@ xfs_rmapbt_key_diff( STATIC int64_t xfs_rmapbt_diff_two_keys( - struct xfs_btree_cur *cur, - union xfs_btree_key *k1, - union xfs_btree_key *k2) + struct xfs_btree_cur *cur, + const union xfs_btree_key *k1, + const union xfs_btree_key *k2) { - struct xfs_rmap_key *kp1 = &k1->rmap; - struct xfs_rmap_key *kp2 = &k2->rmap; - int64_t d; - __u64 x, y; + const struct xfs_rmap_key *kp1 = &k1->rmap; + const struct xfs_rmap_key *kp2 = &k2->rmap; + int64_t d; + __u64 x, y; d = (int64_t)be32_to_cpu(kp1->rm_startblock) - be32_to_cpu(kp2->rm_startblock); @@ -304,7 +304,7 @@ xfs_rmapbt_verify( if (!xfs_verify_magic(bp, block->bb_magic)) return __this_address; - if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) + if (!xfs_has_rmapbt(mp)) return __this_address; fa = xfs_btree_sblock_v5hdr_verify(bp); if (fa) @@ -364,9 +364,9 @@ const struct xfs_buf_ops xfs_rmapbt_buf_ops = { STATIC int xfs_rmapbt_keys_inorder( - struct xfs_btree_cur *cur, - union xfs_btree_key *k1, - union xfs_btree_key *k2) + struct xfs_btree_cur *cur, + const union xfs_btree_key *k1, + const union xfs_btree_key *k2) { uint32_t x; uint32_t y; @@ -394,9 +394,9 @@ xfs_rmapbt_keys_inorder( STATIC int xfs_rmapbt_recs_inorder( - struct xfs_btree_cur *cur, - union xfs_btree_rec *r1, - union xfs_btree_rec *r2) + struct xfs_btree_cur *cur, + const union xfs_btree_rec *r1, + const union xfs_btree_rec *r2) { uint32_t x; uint32_t y; @@ -558,7 +558,7 @@ xfs_rmapbt_compute_maxlevels( * disallow reflinking when less than 10% of the per-AG metadata * block reservation since the fallback is a regular file copy. */ - if (xfs_sb_version_hasreflink(&mp->m_sb)) + if (xfs_has_reflink(mp)) mp->m_rmap_maxlevels = XFS_BTREE_MAXLEVELS; else mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels( @@ -606,7 +606,7 @@ xfs_rmapbt_calc_reserves( xfs_extlen_t tree_len; int error; - if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) + if (!xfs_has_rmapbt(mp)) return 0; error = xfs_alloc_read_agf(mp, tp, pag->pag_agno, 0, &agbp); diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h index 88d8d18788a2..f2eee6572af4 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.h +++ b/fs/xfs/libxfs/xfs_rmap_btree.h @@ -59,4 +59,4 @@ extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp, extern int xfs_rmapbt_calc_reserves(struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_perag *pag, xfs_extlen_t *ask, xfs_extlen_t *used); -#endif /* __XFS_RMAP_BTREE_H__ */ +#endif /* __XFS_RMAP_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 483375c6a735..5740ba664867 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -1009,8 +1009,8 @@ xfs_rtfree_extent( int xfs_rtalloc_query_range( struct xfs_trans *tp, - struct xfs_rtalloc_rec *low_rec, - struct xfs_rtalloc_rec *high_rec, + const struct xfs_rtalloc_rec *low_rec, + const struct xfs_rtalloc_rec *high_rec, xfs_rtalloc_query_range_fn fn, void *priv) { @@ -1018,6 +1018,7 @@ xfs_rtalloc_query_range( struct xfs_mount *mp = tp->t_mountp; xfs_rtblock_t rtstart; xfs_rtblock_t rtend; + xfs_rtblock_t high_key; int is_free; int error = 0; @@ -1026,12 +1027,12 @@ xfs_rtalloc_query_range( if (low_rec->ar_startext >= mp->m_sb.sb_rextents || low_rec->ar_startext == high_rec->ar_startext) return 0; - high_rec->ar_startext = min(high_rec->ar_startext, - mp->m_sb.sb_rextents - 1); + + high_key = min(high_rec->ar_startext, mp->m_sb.sb_rextents - 1); /* Iterate the bitmap, looking for discrepancies. */ rtstart = low_rec->ar_startext; - while (rtstart <= high_rec->ar_startext) { + while (rtstart <= high_key) { /* Is the first block free? */ error = xfs_rtcheck_range(mp, tp, rtstart, 1, 1, &rtend, &is_free); @@ -1039,8 +1040,7 @@ xfs_rtalloc_query_range( break; /* How long does the extent go for? */ - error = xfs_rtfind_forw(mp, tp, rtstart, - high_rec->ar_startext, &rtend); + error = xfs_rtfind_forw(mp, tp, rtstart, high_key, &rtend); if (error) break; diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 04f5386446db..e58349be78bd 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -30,13 +30,110 @@ * Physical superblock buffer manipulations. Shared with libxfs in userspace. */ +/* + * We support all XFS versions newer than a v4 superblock with V2 directories. + */ +bool +xfs_sb_good_version( + struct xfs_sb *sbp) +{ + /* all v5 filesystems are supported */ + if (xfs_sb_is_v5(sbp)) + return true; + + /* versions prior to v4 are not supported */ + if (XFS_SB_VERSION_NUM(sbp) < XFS_SB_VERSION_4) + return false; + + /* V4 filesystems need v2 directories and unwritten extents */ + if (!(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT)) + return false; + if (!(sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT)) + return false; + + /* And must not have any unknown v4 feature bits set */ + if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) || + ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && + (sbp->sb_features2 & ~XFS_SB_VERSION2_OKBITS))) + return false; + + /* It's a supported v4 filesystem */ + return true; +} + +uint64_t +xfs_sb_version_to_features( + struct xfs_sb *sbp) +{ + uint64_t features = 0; + + /* optional V4 features */ + if (sbp->sb_rblocks > 0) + features |= XFS_FEAT_REALTIME; + if (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT) + features |= XFS_FEAT_ATTR; + if (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT) + features |= XFS_FEAT_QUOTA; + if (sbp->sb_versionnum & XFS_SB_VERSION_ALIGNBIT) + features |= XFS_FEAT_ALIGN; + if (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT) + features |= XFS_FEAT_LOGV2; + if (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT) + features |= XFS_FEAT_DALIGN; + if (sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT) + features |= XFS_FEAT_EXTFLG; + if (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT) + features |= XFS_FEAT_SECTOR; + if (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT) + features |= XFS_FEAT_ASCIICI; + if (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) { + if (sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT) + features |= XFS_FEAT_LAZYSBCOUNT; + if (sbp->sb_features2 & XFS_SB_VERSION2_ATTR2BIT) + features |= XFS_FEAT_ATTR2; + if (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT) + features |= XFS_FEAT_PROJID32; + if (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE) + features |= XFS_FEAT_FTYPE; + } + + if (!xfs_sb_is_v5(sbp)) + return features; + + /* Always on V5 features */ + features |= XFS_FEAT_ALIGN | XFS_FEAT_LOGV2 | XFS_FEAT_EXTFLG | + XFS_FEAT_LAZYSBCOUNT | XFS_FEAT_ATTR2 | XFS_FEAT_PROJID32 | + XFS_FEAT_V3INODES | XFS_FEAT_CRC | XFS_FEAT_PQUOTINO; + + /* Optional V5 features */ + if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT) + features |= XFS_FEAT_FINOBT; + if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_RMAPBT) + features |= XFS_FEAT_RMAPBT; + if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_REFLINK) + features |= XFS_FEAT_REFLINK; + if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_INOBTCNT) + features |= XFS_FEAT_INOBTCNT; + if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_FTYPE) + features |= XFS_FEAT_FTYPE; + if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_SPINODES) + features |= XFS_FEAT_SPINODES; + if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID) + features |= XFS_FEAT_META_UUID; + if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_BIGTIME) + features |= XFS_FEAT_BIGTIME; + if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR) + features |= XFS_FEAT_NEEDSREPAIR; + return features; +} + /* Check all the superblock fields we care about when reading one in. */ STATIC int xfs_validate_sb_read( struct xfs_mount *mp, struct xfs_sb *sbp) { - if (XFS_SB_VERSION_NUM(sbp) != XFS_SB_VERSION_5) + if (!xfs_sb_is_v5(sbp)) return 0; /* @@ -56,7 +153,7 @@ xfs_validate_sb_read( "Superblock has unknown read-only compatible features (0x%x) enabled.", (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); - if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { + if (!xfs_is_readonly(mp)) { xfs_warn(mp, "Attempted to mount read-only compatible filesystem read-write."); xfs_warn(mp, @@ -95,7 +192,7 @@ xfs_validate_sb_write( * secondary superblocks, so allow this usage to continue because * we never read counters from such superblocks. */ - if (XFS_BUF_ADDR(bp) == XFS_SB_DADDR && !sbp->sb_inprogress && + if (xfs_buf_daddr(bp) == XFS_SB_DADDR && !sbp->sb_inprogress && (sbp->sb_fdblocks > sbp->sb_dblocks || !xfs_verify_icount(mp, sbp->sb_icount) || sbp->sb_ifree > sbp->sb_icount)) { @@ -103,7 +200,7 @@ xfs_validate_sb_write( return -EFSCORRUPTED; } - if (XFS_SB_VERSION_NUM(sbp) != XFS_SB_VERSION_5) + if (!xfs_sb_is_v5(sbp)) return 0; /* @@ -162,6 +259,7 @@ xfs_validate_sb_common( struct xfs_dsb *dsb = bp->b_addr; uint32_t agcount = 0; uint32_t rem; + bool has_dalign; if (!xfs_verify_magic(bp, dsb->sb_magicnum)) { xfs_warn(mp, "bad magic number"); @@ -173,12 +271,41 @@ xfs_validate_sb_common( return -EWRONGFS; } - if (xfs_sb_version_has_pquotino(sbp)) { + /* + * Validate feature flags and state + */ + if (xfs_sb_is_v5(sbp)) { + if (sbp->sb_blocksize < XFS_MIN_CRC_BLOCKSIZE) { + xfs_notice(mp, +"Block size (%u bytes) too small for Version 5 superblock (minimum %d bytes)", + sbp->sb_blocksize, XFS_MIN_CRC_BLOCKSIZE); + return -EFSCORRUPTED; + } + + /* V5 has a separate project quota inode */ if (sbp->sb_qflags & (XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD)) { xfs_notice(mp, "Version 5 of Super block has XFS_OQUOTA bits."); return -EFSCORRUPTED; } + + /* + * Full inode chunks must be aligned to inode chunk size when + * sparse inodes are enabled to support the sparse chunk + * allocation algorithm and prevent overlapping inode records. + */ + if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_SPINODES) { + uint32_t align; + + align = XFS_INODES_PER_CHUNK * sbp->sb_inodesize + >> sbp->sb_blocklog; + if (sbp->sb_inoalignmt != align) { + xfs_warn(mp, +"Inode block alignment (%u) must match chunk size (%u) for sparse inodes.", + sbp->sb_inoalignmt, align); + return -EINVAL; + } + } } else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD | XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) { xfs_notice(mp, @@ -186,24 +313,6 @@ xfs_validate_sb_common( return -EFSCORRUPTED; } - /* - * Full inode chunks must be aligned to inode chunk size when - * sparse inodes are enabled to support the sparse chunk - * allocation algorithm and prevent overlapping inode records. - */ - if (xfs_sb_version_hassparseinodes(sbp)) { - uint32_t align; - - align = XFS_INODES_PER_CHUNK * sbp->sb_inodesize - >> sbp->sb_blocklog; - if (sbp->sb_inoalignmt != align) { - xfs_warn(mp, -"Inode block alignment (%u) must match chunk size (%u) for sparse inodes.", - sbp->sb_inoalignmt, align); - return -EINVAL; - } - } - if (unlikely( sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { xfs_warn(mp, @@ -303,7 +412,8 @@ xfs_validate_sb_common( * Either (sb_unit and !hasdalign) or (!sb_unit and hasdalign) * would imply the image is corrupted. */ - if (!!sbp->sb_unit ^ xfs_sb_version_hasdalign(sbp)) { + has_dalign = sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT; + if (!!sbp->sb_unit ^ has_dalign) { xfs_notice(mp, "SB stripe alignment sanity check failed"); return -EFSCORRUPTED; } @@ -312,12 +422,6 @@ xfs_validate_sb_common( XFS_FSB_TO_B(mp, sbp->sb_width), 0, false)) return -EFSCORRUPTED; - if (xfs_sb_version_hascrc(&mp->m_sb) && - sbp->sb_blocksize < XFS_MIN_CRC_BLOCKSIZE) { - xfs_notice(mp, "v5 SB sanity check failed"); - return -EFSCORRUPTED; - } - /* * Currently only very few inode sizes are supported. */ @@ -361,7 +465,7 @@ xfs_sb_quota_from_disk(struct xfs_sb *sbp) * We need to do these manipilations only if we are working * with an older version of on-disk superblock. */ - if (xfs_sb_version_has_pquotino(sbp)) + if (xfs_sb_is_v5(sbp)) return; if (sbp->sb_qflags & XFS_OQUOTA_ENFD) @@ -454,7 +558,8 @@ __xfs_sb_from_disk( * sb_meta_uuid is only on disk if it differs from sb_uuid and the * feature flag is set; if not set we keep it only in memory. */ - if (xfs_sb_version_hasmetauuid(to)) + if (xfs_sb_is_v5(to) && + (to->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID)) uuid_copy(&to->sb_meta_uuid, &from->sb_meta_uuid); else uuid_copy(&to->sb_meta_uuid, &from->sb_uuid); @@ -479,7 +584,12 @@ xfs_sb_quota_to_disk( uint16_t qflags = from->sb_qflags; to->sb_uquotino = cpu_to_be64(from->sb_uquotino); - if (xfs_sb_version_has_pquotino(from)) { + + /* + * The in-memory superblock quota state matches the v5 on-disk format so + * just write them out and return + */ + if (xfs_sb_is_v5(from)) { to->sb_qflags = cpu_to_be16(from->sb_qflags); to->sb_gquotino = cpu_to_be64(from->sb_gquotino); to->sb_pquotino = cpu_to_be64(from->sb_pquotino); @@ -487,9 +597,9 @@ xfs_sb_quota_to_disk( } /* - * The in-core version of sb_qflags do not have XFS_OQUOTA_* - * flags, whereas the on-disk version does. So, convert incore - * XFS_{PG}QUOTA_* flags to on-disk XFS_OQUOTA_* flags. + * For older superblocks (v4), the in-core version of sb_qflags do not + * have XFS_OQUOTA_* flags, whereas the on-disk version does. So, + * convert incore XFS_{PG}QUOTA_* flags to on-disk XFS_OQUOTA_* flags. */ qflags &= ~(XFS_PQUOTA_ENFD | XFS_PQUOTA_CHKD | XFS_GQUOTA_ENFD | XFS_GQUOTA_CHKD); @@ -589,19 +699,20 @@ xfs_sb_to_disk( to->sb_features2 = cpu_to_be32(from->sb_features2); to->sb_bad_features2 = cpu_to_be32(from->sb_bad_features2); - if (xfs_sb_version_hascrc(from)) { - to->sb_features_compat = cpu_to_be32(from->sb_features_compat); - to->sb_features_ro_compat = - cpu_to_be32(from->sb_features_ro_compat); - to->sb_features_incompat = - cpu_to_be32(from->sb_features_incompat); - to->sb_features_log_incompat = - cpu_to_be32(from->sb_features_log_incompat); - to->sb_spino_align = cpu_to_be32(from->sb_spino_align); - to->sb_lsn = cpu_to_be64(from->sb_lsn); - if (xfs_sb_version_hasmetauuid(from)) - uuid_copy(&to->sb_meta_uuid, &from->sb_meta_uuid); - } + if (!xfs_sb_is_v5(from)) + return; + + to->sb_features_compat = cpu_to_be32(from->sb_features_compat); + to->sb_features_ro_compat = + cpu_to_be32(from->sb_features_ro_compat); + to->sb_features_incompat = + cpu_to_be32(from->sb_features_incompat); + to->sb_features_log_incompat = + cpu_to_be32(from->sb_features_log_incompat); + to->sb_spino_align = cpu_to_be32(from->sb_spino_align); + to->sb_lsn = cpu_to_be64(from->sb_lsn); + if (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID) + uuid_copy(&to->sb_meta_uuid, &from->sb_meta_uuid); } /* @@ -636,8 +747,8 @@ xfs_sb_read_verify( if (!xfs_buf_verify_cksum(bp, XFS_SB_CRC_OFF)) { /* Only fail bad secondaries on a known V5 filesystem */ - if (bp->b_bn == XFS_SB_DADDR || - xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_buf_daddr(bp) == XFS_SB_DADDR || + xfs_has_crc(mp)) { error = -EFSBADCRC; goto out_error; } @@ -704,7 +815,7 @@ xfs_sb_write_verify( if (error) goto out_error; - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_sb_is_v5(&sb)) return; if (bip) @@ -801,7 +912,7 @@ xfs_log_sb( * unclean shutdown, this will be corrected by log recovery rebuilding * the counters from the AGF block counts. */ - if (xfs_sb_version_haslazysbcount(&mp->m_sb)) { + if (xfs_has_lazysbcount(mp)) { mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount); mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree); mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks); @@ -950,10 +1061,12 @@ out: void xfs_fs_geometry( - struct xfs_sb *sbp, + struct xfs_mount *mp, struct xfs_fsop_geom *geo, int struct_version) { + struct xfs_sb *sbp = &mp->m_sb; + memset(geo, 0, sizeof(struct xfs_fsop_geom)); geo->blocksize = sbp->sb_blocksize; @@ -984,51 +1097,51 @@ xfs_fs_geometry( geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK | XFS_FSOP_GEOM_FLAGS_DIRV2 | XFS_FSOP_GEOM_FLAGS_EXTFLG; - if (xfs_sb_version_hasattr(sbp)) + if (xfs_has_attr(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR; - if (xfs_sb_version_hasquota(sbp)) + if (xfs_has_quota(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_QUOTA; - if (xfs_sb_version_hasalign(sbp)) + if (xfs_has_align(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_IALIGN; - if (xfs_sb_version_hasdalign(sbp)) + if (xfs_has_dalign(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_DALIGN; - if (xfs_sb_version_hassector(sbp)) - geo->flags |= XFS_FSOP_GEOM_FLAGS_SECTOR; - if (xfs_sb_version_hasasciici(sbp)) + if (xfs_has_asciici(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_DIRV2CI; - if (xfs_sb_version_haslazysbcount(sbp)) + if (xfs_has_lazysbcount(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_LAZYSB; - if (xfs_sb_version_hasattr2(sbp)) + if (xfs_has_attr2(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR2; - if (xfs_sb_version_hasprojid32bit(sbp)) + if (xfs_has_projid32(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_PROJID32; - if (xfs_sb_version_hascrc(sbp)) + if (xfs_has_crc(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_V5SB; - if (xfs_sb_version_hasftype(sbp)) + if (xfs_has_ftype(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_FTYPE; - if (xfs_sb_version_hasfinobt(sbp)) + if (xfs_has_finobt(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_FINOBT; - if (xfs_sb_version_hassparseinodes(sbp)) + if (xfs_has_sparseinodes(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_SPINODES; - if (xfs_sb_version_hasrmapbt(sbp)) + if (xfs_has_rmapbt(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_RMAPBT; - if (xfs_sb_version_hasreflink(sbp)) + if (xfs_has_reflink(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_REFLINK; - if (xfs_sb_version_hasbigtime(sbp)) + if (xfs_has_bigtime(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_BIGTIME; - if (xfs_sb_version_hasinobtcounts(sbp)) + if (xfs_has_inobtcounts(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_INOBTCNT; - if (xfs_sb_version_hassector(sbp)) + if (xfs_has_sector(mp)) { + geo->flags |= XFS_FSOP_GEOM_FLAGS_SECTOR; geo->logsectsize = sbp->sb_logsectsize; - else + } else { geo->logsectsize = BBSIZE; + } geo->rtsectsize = sbp->sb_blocksize; geo->dirblocksize = xfs_dir2_dirblock_bytes(sbp); if (struct_version < 4) return; - if (xfs_sb_version_haslogv2(sbp)) + if (xfs_has_logv2(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_LOGV2; geo->logsunit = sbp->sb_logsunit; diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index 0c1602d9b53d..a5e14740ec9a 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -20,11 +20,13 @@ extern void xfs_sb_mount_common(struct xfs_mount *mp, struct xfs_sb *sbp); extern void xfs_sb_from_disk(struct xfs_sb *to, struct xfs_dsb *from); extern void xfs_sb_to_disk(struct xfs_dsb *to, struct xfs_sb *from); extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp); +extern bool xfs_sb_good_version(struct xfs_sb *sbp); +extern uint64_t xfs_sb_version_to_features(struct xfs_sb *sbp); extern int xfs_update_secondary_sbs(struct xfs_mount *mp); #define XFS_FS_GEOM_MAX_STRUCT_VER (4) -extern void xfs_fs_geometry(struct xfs_sb *sbp, struct xfs_fsop_geom *geo, +extern void xfs_fs_geometry(struct xfs_mount *mp, struct xfs_fsop_geom *geo, int struct_version); extern int xfs_sb_read_secondary(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index 594bc447a7dd..f0b38f4aba80 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -42,7 +42,7 @@ xfs_symlink_hdr_set( { struct xfs_dsymlink_hdr *dsl = bp->b_addr; - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_has_crc(mp)) return 0; memset(dsl, 0, sizeof(struct xfs_dsymlink_hdr)); @@ -51,7 +51,7 @@ xfs_symlink_hdr_set( dsl->sl_bytes = cpu_to_be32(size); uuid_copy(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid); dsl->sl_owner = cpu_to_be64(ino); - dsl->sl_blkno = cpu_to_be64(bp->b_bn); + dsl->sl_blkno = cpu_to_be64(xfs_buf_daddr(bp)); bp->b_ops = &xfs_symlink_buf_ops; return sizeof(struct xfs_dsymlink_hdr); @@ -89,13 +89,13 @@ xfs_symlink_verify( struct xfs_mount *mp = bp->b_mount; struct xfs_dsymlink_hdr *dsl = bp->b_addr; - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_has_crc(mp)) return __this_address; if (!xfs_verify_magic(bp, dsl->sl_magic)) return __this_address; if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; - if (bp->b_bn != be64_to_cpu(dsl->sl_blkno)) + if (xfs_buf_daddr(bp) != be64_to_cpu(dsl->sl_blkno)) return __this_address; if (be32_to_cpu(dsl->sl_offset) + be32_to_cpu(dsl->sl_bytes) >= XFS_SYMLINK_MAXLEN) @@ -116,7 +116,7 @@ xfs_symlink_read_verify( xfs_failaddr_t fa; /* no verification of non-crc buffers */ - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_has_crc(mp)) return; if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF)) @@ -137,7 +137,7 @@ xfs_symlink_write_verify( xfs_failaddr_t fa; /* no verification of non-crc buffers */ - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_has_crc(mp)) return; fa = xfs_symlink_verify(bp); @@ -173,7 +173,7 @@ xfs_symlink_local_to_remote( xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SYMLINK_BUF); - if (!xfs_sb_version_hascrc(&mp->m_sb)) { + if (!xfs_has_crc(mp)) { bp->b_ops = NULL; memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c index 16f723ebe8dd..8b5547073379 100644 --- a/fs/xfs/libxfs/xfs_trans_inode.c +++ b/fs/xfs/libxfs/xfs_trans_inode.c @@ -136,7 +136,7 @@ xfs_trans_log_inode( * to upgrade this inode to bigtime format, do so now. */ if ((flags & (XFS_ILOG_CORE | XFS_ILOG_TIMESTAMP)) && - xfs_sb_version_hasbigtime(&ip->i_mount->m_sb) && + xfs_has_bigtime(ip->i_mount) && !xfs_inode_has_bigtime(ip)) { ip->i_diflags2 |= XFS_DIFLAG2_BIGTIME; flags |= XFS_ILOG_CORE; diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index d1a0848cb52e..5e300daa2559 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -71,9 +71,9 @@ xfs_allocfree_log_count( uint blocks; blocks = num_ops * 2 * (2 * mp->m_ag_maxlevels - 1); - if (xfs_sb_version_hasrmapbt(&mp->m_sb)) + if (xfs_has_rmapbt(mp)) blocks += num_ops * (2 * mp->m_rmap_maxlevels - 1); - if (xfs_sb_version_hasreflink(&mp->m_sb)) + if (xfs_has_reflink(mp)) blocks += num_ops * (2 * mp->m_refc_maxlevels - 1); return blocks; @@ -155,7 +155,7 @@ STATIC uint xfs_calc_finobt_res( struct xfs_mount *mp) { - if (!xfs_sb_version_hasfinobt(&mp->m_sb)) + if (!xfs_has_finobt(mp)) return 0; return xfs_calc_inobt_res(mp); @@ -187,7 +187,7 @@ xfs_calc_inode_chunk_res( XFS_FSB_TO_B(mp, 1)); if (alloc) { /* icreate tx uses ordered buffers */ - if (xfs_sb_version_has_v3inode(&mp->m_sb)) + if (xfs_has_v3inodes(mp)) return res; size = XFS_FSB_TO_B(mp, 1); } @@ -268,7 +268,7 @@ xfs_calc_write_reservation( xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz); - if (xfs_sb_version_hasrealtime(&mp->m_sb)) { + if (xfs_has_realtime(mp)) { t2 = xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), blksz) + @@ -317,7 +317,7 @@ xfs_calc_itruncate_reservation( t2 = xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), blksz); - if (xfs_sb_version_hasrealtime(&mp->m_sb)) { + if (xfs_has_realtime(mp)) { t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 2), blksz) + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz); @@ -799,29 +799,6 @@ xfs_calc_qm_dqalloc_reservation( } /* - * Turning off quotas. - * the quota off logitems: sizeof(struct xfs_qoff_logitem) * 2 - * the superblock for the quota flags: sector size - */ -STATIC uint -xfs_calc_qm_quotaoff_reservation( - struct xfs_mount *mp) -{ - return sizeof(struct xfs_qoff_logitem) * 2 + - xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); -} - -/* - * End of turning off quotas. - * the quota off logitems: sizeof(struct xfs_qoff_logitem) * 2 - */ -STATIC uint -xfs_calc_qm_quotaoff_end_reservation(void) -{ - return sizeof(struct xfs_qoff_logitem) * 2; -} - -/* * Syncing the incore super block changes to disk. * the super block to reflect the changes: sector size */ @@ -842,14 +819,14 @@ xfs_trans_resv_calc( * require a permanent reservation on space. */ resp->tr_write.tr_logres = xfs_calc_write_reservation(mp); - if (xfs_sb_version_hasreflink(&mp->m_sb)) + if (xfs_has_reflink(mp)) resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT_REFLINK; else resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT; resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES; resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp); - if (xfs_sb_version_hasreflink(&mp->m_sb)) + if (xfs_has_reflink(mp)) resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT_REFLINK; else @@ -910,7 +887,7 @@ xfs_trans_resv_calc( resp->tr_growrtalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES; resp->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation(mp); - if (xfs_sb_version_hasreflink(&mp->m_sb)) + if (xfs_has_reflink(mp)) resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT_REFLINK; else resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT; @@ -923,13 +900,6 @@ xfs_trans_resv_calc( resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation(); resp->tr_qm_setqlim.tr_logcount = XFS_DEFAULT_LOG_COUNT; - resp->tr_qm_quotaoff.tr_logres = xfs_calc_qm_quotaoff_reservation(mp); - resp->tr_qm_quotaoff.tr_logcount = XFS_DEFAULT_LOG_COUNT; - - resp->tr_qm_equotaoff.tr_logres = - xfs_calc_qm_quotaoff_end_reservation(); - resp->tr_qm_equotaoff.tr_logcount = XFS_DEFAULT_LOG_COUNT; - resp->tr_sb.tr_logres = xfs_calc_sb_reservation(mp); resp->tr_sb.tr_logcount = XFS_DEFAULT_LOG_COUNT; diff --git a/fs/xfs/libxfs/xfs_trans_resv.h b/fs/xfs/libxfs/xfs_trans_resv.h index 7241ab28cf84..fc4e9b369a3a 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.h +++ b/fs/xfs/libxfs/xfs_trans_resv.h @@ -46,8 +46,6 @@ struct xfs_trans_resv { struct xfs_trans_res tr_growrtfree; /* grow realtime freeing */ struct xfs_trans_res tr_qm_setqlim; /* adjust quota limits */ struct xfs_trans_res tr_qm_dqalloc; /* allocate quota on disk */ - struct xfs_trans_res tr_qm_quotaoff; /* turn quota off */ - struct xfs_trans_res tr_qm_equotaoff;/* end of turn quota off */ struct xfs_trans_res tr_sb; /* modify superblock */ struct xfs_trans_res tr_fsyncts; /* update timestamps on fsync */ }; diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h index 7ad3659c5d2a..50332be34388 100644 --- a/fs/xfs/libxfs/xfs_trans_space.h +++ b/fs/xfs/libxfs/xfs_trans_space.h @@ -57,8 +57,7 @@ XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK) #define XFS_IALLOC_SPACE_RES(mp) \ (M_IGEO(mp)->ialloc_blks + \ - ((xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1) * \ - M_IGEO(mp)->inobt_maxlevels)) + ((xfs_has_finobt(mp) ? 2 : 1) * M_IGEO(mp)->inobt_maxlevels)) /* * Space reservation values for various transactions. @@ -94,8 +93,7 @@ #define XFS_SYMLINK_SPACE_RES(mp,nl,b) \ (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b)) #define XFS_IFREE_SPACE_RES(mp) \ - (xfs_sb_version_hasfinobt(&mp->m_sb) ? \ - M_IGEO(mp)->inobt_maxlevels : 0) + (xfs_has_finobt(mp) ? M_IGEO(mp)->inobt_maxlevels : 0) #endif /* __XFS_TRANS_SPACE_H__ */ diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c index e8f4abee7892..e810d23f2d97 100644 --- a/fs/xfs/libxfs/xfs_types.c +++ b/fs/xfs/libxfs/xfs_types.c @@ -169,7 +169,7 @@ xfs_internal_inum( xfs_ino_t ino) { return ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino || - (xfs_sb_version_hasquota(&mp->m_sb) && + (xfs_has_quota(mp) && xfs_is_quota_inode(&mp->m_sb, ino)); } diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 0870ef6f933d..b6da06b40989 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -87,6 +87,11 @@ typedef void * xfs_failaddr_t; #define XFS_ATTR_FORK 1 #define XFS_COW_FORK 2 +#define XFS_WHICHFORK_STRINGS \ + { XFS_DATA_FORK, "data" }, \ + { XFS_ATTR_FORK, "attr" }, \ + { XFS_COW_FORK, "cow" } + /* * Min numbers of data/attr fork btree root pointers. */ diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index be1a7e1e65f7..ae3c9f6e2c69 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -36,7 +36,7 @@ xchk_superblock_xref( agbno = XFS_SB_BLOCK(mp); - error = xchk_ag_init(sc, agno, &sc->sa); + error = xchk_ag_init_existing(sc, agno, &sc->sa); if (!xchk_xref_process_error(sc, agno, agbno, &error)) return; @@ -63,6 +63,7 @@ xchk_superblock( struct xfs_mount *mp = sc->mp; struct xfs_buf *bp; struct xfs_dsb *sb; + struct xfs_perag *pag; xfs_agnumber_t agno; uint32_t v2_ok; __be32 features_mask; @@ -73,6 +74,15 @@ xchk_superblock( if (agno == 0) return 0; + /* + * Grab an active reference to the perag structure. If we can't get + * it, we're racing with something that's tearing down the AG, so + * signal that the AG no longer exists. + */ + pag = xfs_perag_get(mp, agno); + if (!pag) + return -ENOENT; + error = xfs_sb_read_secondary(mp, sc->tp, agno, &bp); /* * The superblock verifier can return several different error codes @@ -92,7 +102,7 @@ xchk_superblock( break; } if (!xchk_process_error(sc, agno, XFS_SB_BLOCK(mp), &error)) - return error; + goto out_pag; sb = bp->b_addr; @@ -248,7 +258,7 @@ xchk_superblock( xchk_block_set_corrupt(sc, bp); } else { v2_ok = XFS_SB_VERSION2_OKBITS; - if (XFS_SB_VERSION_NUM(&mp->m_sb) >= XFS_SB_VERSION_5) + if (xfs_sb_is_v5(&mp->m_sb)) v2_ok |= XFS_SB_VERSION2_CRCBIT; if (!!(sb->sb_features2 & cpu_to_be32(~v2_ok))) @@ -273,7 +283,7 @@ xchk_superblock( (cpu_to_be32(mp->m_sb.sb_features2) & features_mask)) xchk_block_set_corrupt(sc, bp); - if (!xfs_sb_version_hascrc(&mp->m_sb)) { + if (!xfs_has_crc(mp)) { /* all v5 fields must be zero */ if (memchr_inv(&sb->sb_features_compat, 0, sizeof(struct xfs_dsb) - @@ -324,7 +334,7 @@ xchk_superblock( /* Don't care about sb_lsn */ } - if (xfs_sb_version_hasmetauuid(&mp->m_sb)) { + if (xfs_has_metauuid(mp)) { /* The metadata UUID must be the same for all supers */ if (!uuid_equal(&sb->sb_meta_uuid, &mp->m_sb.sb_meta_uuid)) xchk_block_set_corrupt(sc, bp); @@ -336,7 +346,8 @@ xchk_superblock( xchk_block_set_corrupt(sc, bp); xchk_superblock_xref(sc, bp); - +out_pag: + xfs_perag_put(pag); return error; } @@ -346,7 +357,7 @@ xchk_superblock( STATIC int xchk_agf_record_bno_lengths( struct xfs_btree_cur *cur, - struct xfs_alloc_rec_incore *rec, + const struct xfs_alloc_rec_incore *rec, void *priv) { xfs_extlen_t *blocks = priv; @@ -419,7 +430,7 @@ xchk_agf_xref_btreeblks( int error; /* agf_btreeblks didn't exist before lazysbcount */ - if (!xfs_sb_version_haslazysbcount(&sc->mp->m_sb)) + if (!xfs_has_lazysbcount(sc->mp)) return; /* Check agf_rmap_blocks; set up for agf_btreeblks check */ @@ -438,7 +449,7 @@ xchk_agf_xref_btreeblks( * No rmap cursor; we can't xref if we have the rmapbt feature. * We also can't do it if we're missing the free space btree cursors. */ - if ((xfs_sb_version_hasrmapbt(&mp->m_sb) && !sc->sa.rmap_cur) || + if ((xfs_has_rmapbt(mp) && !sc->sa.rmap_cur) || !sc->sa.bno_cur || !sc->sa.cnt_cur) return; @@ -527,6 +538,7 @@ xchk_agf( xchk_buffer_recheck(sc, sc->sa.agf_bp); agf = sc->sa.agf_bp->b_addr; + pag = sc->sa.pag; /* Check the AG length */ eoag = be32_to_cpu(agf->agf_length); @@ -550,7 +562,7 @@ xchk_agf( if (level <= 0 || level > XFS_BTREE_MAXLEVELS) xchk_block_set_corrupt(sc, sc->sa.agf_bp); - if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { + if (xfs_has_rmapbt(mp)) { agbno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_RMAP]); if (!xfs_verify_agbno(mp, agno, agbno)) xchk_block_set_corrupt(sc, sc->sa.agf_bp); @@ -560,7 +572,7 @@ xchk_agf( xchk_block_set_corrupt(sc, sc->sa.agf_bp); } - if (xfs_sb_version_hasreflink(&mp->m_sb)) { + if (xfs_has_reflink(mp)) { agbno = be32_to_cpu(agf->agf_refcount_root); if (!xfs_verify_agbno(mp, agno, agbno)) xchk_block_set_corrupt(sc, sc->sa.agf_bp); @@ -582,15 +594,13 @@ xchk_agf( xchk_block_set_corrupt(sc, sc->sa.agf_bp); /* Do the incore counters match? */ - pag = xfs_perag_get(mp, agno); if (pag->pagf_freeblks != be32_to_cpu(agf->agf_freeblks)) xchk_block_set_corrupt(sc, sc->sa.agf_bp); if (pag->pagf_flcount != be32_to_cpu(agf->agf_flcount)) xchk_block_set_corrupt(sc, sc->sa.agf_bp); - if (xfs_sb_version_haslazysbcount(&sc->mp->m_sb) && + if (xfs_has_lazysbcount(sc->mp) && pag->pagf_btreeblks != be32_to_cpu(agf->agf_btreeblks)) xchk_block_set_corrupt(sc, sc->sa.agf_bp); - xfs_perag_put(pag); xchk_agf_xref(sc); out: @@ -630,7 +640,7 @@ xchk_agfl_block( { struct xchk_agfl_info *sai = priv; struct xfs_scrub *sc = sai->sc; - xfs_agnumber_t agno = sc->sa.agno; + xfs_agnumber_t agno = sc->sa.pag->pag_agno; if (xfs_verify_agbno(mp, agno, agbno) && sai->nr_entries < sai->sz_entries) @@ -787,7 +797,7 @@ xchk_agi_xref_fiblocks( xfs_agblock_t blocks; int error = 0; - if (!xfs_sb_version_hasinobtcounts(&sc->mp->m_sb)) + if (!xfs_has_inobtcounts(sc->mp)) return; if (sc->sa.ino_cur) { @@ -857,6 +867,7 @@ xchk_agi( xchk_buffer_recheck(sc, sc->sa.agi_bp); agi = sc->sa.agi_bp->b_addr; + pag = sc->sa.pag; /* Check the AG length */ eoag = be32_to_cpu(agi->agi_length); @@ -872,7 +883,7 @@ xchk_agi( if (level <= 0 || level > XFS_BTREE_MAXLEVELS) xchk_block_set_corrupt(sc, sc->sa.agi_bp); - if (xfs_sb_version_hasfinobt(&mp->m_sb)) { + if (xfs_has_finobt(mp)) { agbno = be32_to_cpu(agi->agi_free_root); if (!xfs_verify_agbno(mp, agno, agbno)) xchk_block_set_corrupt(sc, sc->sa.agi_bp); @@ -909,12 +920,10 @@ xchk_agi( xchk_block_set_corrupt(sc, sc->sa.agi_bp); /* Do the incore counters match? */ - pag = xfs_perag_get(mp, agno); if (pag->pagi_count != be32_to_cpu(agi->agi_count)) xchk_block_set_corrupt(sc, sc->sa.agi_bp); if (pag->pagi_freecount != be32_to_cpu(agi->agi_freecount)) xchk_block_set_corrupt(sc, sc->sa.agi_bp); - xfs_perag_put(pag); xchk_agi_xref(sc); out: diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index e95f8c98f0f7..0f8deee66f15 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -70,7 +70,7 @@ struct xrep_agf_allocbt { STATIC int xrep_agf_walk_allocbt( struct xfs_btree_cur *cur, - struct xfs_alloc_rec_incore *rec, + const struct xfs_alloc_rec_incore *rec, void *priv) { struct xrep_agf_allocbt *raa = priv; @@ -94,7 +94,7 @@ xrep_agf_check_agfl_block( { struct xfs_scrub *sc = priv; - if (!xfs_verify_agbno(mp, sc->sa.agno, agbno)) + if (!xfs_verify_agbno(mp, sc->sa.pag->pag_agno, agbno)) return -EFSCORRUPTED; return 0; } @@ -164,7 +164,7 @@ xrep_agf_find_btrees( return -EFSCORRUPTED; /* We must find the refcountbt root if that feature is enabled. */ - if (xfs_sb_version_hasreflink(&sc->mp->m_sb) && + if (xfs_has_reflink(sc->mp) && !xrep_check_btree_root(sc, &fab[XREP_AGF_REFCOUNTBT])) return -EFSCORRUPTED; @@ -188,12 +188,13 @@ xrep_agf_init_header( memset(agf, 0, BBTOB(agf_bp->b_length)); agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC); agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION); - agf->agf_seqno = cpu_to_be32(sc->sa.agno); - agf->agf_length = cpu_to_be32(xfs_ag_block_count(mp, sc->sa.agno)); + agf->agf_seqno = cpu_to_be32(sc->sa.pag->pag_agno); + agf->agf_length = cpu_to_be32(xfs_ag_block_count(mp, + sc->sa.pag->pag_agno)); agf->agf_flfirst = old_agf->agf_flfirst; agf->agf_fllast = old_agf->agf_fllast; agf->agf_flcount = old_agf->agf_flcount; - if (xfs_sb_version_hascrc(&mp->m_sb)) + if (xfs_has_crc(mp)) uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid); /* Mark the incore AGF data stale until we're done fixing things. */ @@ -223,7 +224,7 @@ xrep_agf_set_roots( agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(fab[XREP_AGF_RMAPBT].height); - if (xfs_sb_version_hasreflink(&sc->mp->m_sb)) { + if (xfs_has_reflink(sc->mp)) { agf->agf_refcount_root = cpu_to_be32(fab[XREP_AGF_REFCOUNTBT].root); agf->agf_refcount_level = @@ -280,7 +281,7 @@ xrep_agf_calc_from_btrees( agf->agf_btreeblks = cpu_to_be32(btreeblks); /* Update the AGF counters from the refcountbt. */ - if (xfs_sb_version_hasreflink(&mp->m_sb)) { + if (xfs_has_reflink(mp)) { cur = xfs_refcountbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag); error = xfs_btree_count_blocks(cur, &blocks); @@ -363,16 +364,16 @@ xrep_agf( int error; /* We require the rmapbt to rebuild anything. */ - if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) + if (!xfs_has_rmapbt(mp)) return -EOPNOTSUPP; - xchk_perag_get(sc->mp, &sc->sa); /* * Make sure we have the AGF buffer, as scrub might have decided it * was corrupt after xfs_alloc_read_agf failed with -EFSCORRUPTED. */ error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp, - XFS_AG_DADDR(mp, sc->sa.agno, XFS_AGF_DADDR(mp)), + XFS_AG_DADDR(mp, sc->sa.pag->pag_agno, + XFS_AGF_DADDR(mp)), XFS_FSS_TO_BB(mp, 1), 0, &agf_bp, NULL); if (error) return error; @@ -388,7 +389,7 @@ xrep_agf( * btrees rooted in the AGF. If the AGFL contents are obviously bad * then we'll bail out. */ - error = xfs_alloc_read_agfl(mp, sc->tp, sc->sa.agno, &agfl_bp); + error = xfs_alloc_read_agfl(mp, sc->tp, sc->sa.pag->pag_agno, &agfl_bp); if (error) return error; @@ -442,7 +443,7 @@ struct xrep_agfl { STATIC int xrep_agfl_walk_rmap( struct xfs_btree_cur *cur, - struct xfs_rmap_irec *rec, + const struct xfs_rmap_irec *rec, void *priv) { struct xrep_agfl *ra = priv; @@ -586,7 +587,7 @@ xrep_agfl_init_header( agfl = XFS_BUF_TO_AGFL(agfl_bp); memset(agfl, 0xFF, BBTOB(agfl_bp->b_length)); agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC); - agfl->agfl_seqno = cpu_to_be32(sc->sa.agno); + agfl->agfl_seqno = cpu_to_be32(sc->sa.pag->pag_agno); uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid); /* @@ -599,7 +600,8 @@ xrep_agfl_init_header( for_each_xbitmap_extent(br, n, agfl_extents) { agbno = XFS_FSB_TO_AGBNO(mp, br->start); - trace_xrep_agfl_insert(mp, sc->sa.agno, agbno, br->len); + trace_xrep_agfl_insert(mp, sc->sa.pag->pag_agno, agbno, + br->len); while (br->len > 0 && fl_off < flcount) { agfl_bno[fl_off] = cpu_to_be32(agbno); @@ -638,10 +640,9 @@ xrep_agfl( int error; /* We require the rmapbt to rebuild anything. */ - if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) + if (!xfs_has_rmapbt(mp)) return -EOPNOTSUPP; - xchk_perag_get(sc->mp, &sc->sa); xbitmap_init(&agfl_extents); /* @@ -649,7 +650,8 @@ xrep_agfl( * nothing wrong with the AGF, but all the AG header repair functions * have this chicken-and-egg problem. */ - error = xfs_alloc_read_agf(mp, sc->tp, sc->sa.agno, 0, &agf_bp); + error = xfs_alloc_read_agf(mp, sc->tp, sc->sa.pag->pag_agno, 0, + &agf_bp); if (error) return error; @@ -658,7 +660,8 @@ xrep_agfl( * was corrupt after xfs_alloc_read_agfl failed with -EFSCORRUPTED. */ error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp, - XFS_AG_DADDR(mp, sc->sa.agno, XFS_AGFL_DADDR(mp)), + XFS_AG_DADDR(mp, sc->sa.pag->pag_agno, + XFS_AGFL_DADDR(mp)), XFS_FSS_TO_BB(mp, 1), 0, &agfl_bp, NULL); if (error) return error; @@ -723,7 +726,8 @@ xrep_agi_find_btrees( int error; /* Read the AGF. */ - error = xfs_alloc_read_agf(mp, sc->tp, sc->sa.agno, 0, &agf_bp); + error = xfs_alloc_read_agf(mp, sc->tp, sc->sa.pag->pag_agno, 0, + &agf_bp); if (error) return error; @@ -737,7 +741,7 @@ xrep_agi_find_btrees( return -EFSCORRUPTED; /* We must find the finobt root if that feature is enabled. */ - if (xfs_sb_version_hasfinobt(&mp->m_sb) && + if (xfs_has_finobt(mp) && !xrep_check_btree_root(sc, &fab[XREP_AGI_FINOBT])) return -EFSCORRUPTED; @@ -761,11 +765,12 @@ xrep_agi_init_header( memset(agi, 0, BBTOB(agi_bp->b_length)); agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC); agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION); - agi->agi_seqno = cpu_to_be32(sc->sa.agno); - agi->agi_length = cpu_to_be32(xfs_ag_block_count(mp, sc->sa.agno)); + agi->agi_seqno = cpu_to_be32(sc->sa.pag->pag_agno); + agi->agi_length = cpu_to_be32(xfs_ag_block_count(mp, + sc->sa.pag->pag_agno)); agi->agi_newino = cpu_to_be32(NULLAGINO); agi->agi_dirino = cpu_to_be32(NULLAGINO); - if (xfs_sb_version_hascrc(&mp->m_sb)) + if (xfs_has_crc(mp)) uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid); /* We don't know how to fix the unlinked list yet. */ @@ -787,7 +792,7 @@ xrep_agi_set_roots( agi->agi_root = cpu_to_be32(fab[XREP_AGI_INOBT].root); agi->agi_level = cpu_to_be32(fab[XREP_AGI_INOBT].height); - if (xfs_sb_version_hasfinobt(&sc->mp->m_sb)) { + if (xfs_has_finobt(sc->mp)) { agi->agi_free_root = cpu_to_be32(fab[XREP_AGI_FINOBT].root); agi->agi_free_level = cpu_to_be32(fab[XREP_AGI_FINOBT].height); } @@ -811,7 +816,7 @@ xrep_agi_calc_from_btrees( error = xfs_ialloc_count_inodes(cur, &count, &freecount); if (error) goto err; - if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) { + if (xfs_has_inobtcounts(mp)) { xfs_agblock_t blocks; error = xfs_btree_count_blocks(cur, &blocks); @@ -824,8 +829,7 @@ xrep_agi_calc_from_btrees( agi->agi_count = cpu_to_be32(count); agi->agi_freecount = cpu_to_be32(freecount); - if (xfs_sb_version_hasfinobt(&mp->m_sb) && - xfs_sb_version_hasinobtcounts(&mp->m_sb)) { + if (xfs_has_finobt(mp) && xfs_has_inobtcounts(mp)) { xfs_agblock_t blocks; cur = xfs_inobt_init_cursor(mp, sc->tp, agi_bp, @@ -893,16 +897,16 @@ xrep_agi( int error; /* We require the rmapbt to rebuild anything. */ - if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) + if (!xfs_has_rmapbt(mp)) return -EOPNOTSUPP; - xchk_perag_get(sc->mp, &sc->sa); /* * Make sure we have the AGI buffer, as scrub might have decided it * was corrupt after xfs_ialloc_read_agi failed with -EFSCORRUPTED. */ error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp, - XFS_AG_DADDR(mp, sc->sa.agno, XFS_AGI_DADDR(mp)), + XFS_AG_DADDR(mp, sc->sa.pag->pag_agno, + XFS_AGI_DADDR(mp)), XFS_FSS_TO_BB(mp, 1), 0, &agi_bp, NULL); if (error) return error; diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c index d5741980094a..87518e1292f8 100644 --- a/fs/xfs/scrub/alloc.c +++ b/fs/xfs/scrub/alloc.c @@ -91,7 +91,7 @@ xchk_allocbt_xref( STATIC int xchk_allocbt_rec( struct xchk_btree *bs, - union xfs_btree_rec *rec) + const union xfs_btree_rec *rec) { struct xfs_mount *mp = bs->cur->bc_mp; xfs_agnumber_t agno = bs->cur->bc_ag.pag->pag_agno; diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index 552af0cf8482..b6f0c9f3f124 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -25,11 +25,11 @@ * reallocating the buffer if necessary. Buffer contents are not preserved * across a reallocation. */ -int +static int xchk_setup_xattr_buf( struct xfs_scrub *sc, size_t value_size, - xfs_km_flags_t flags) + gfp_t flags) { size_t sz; struct xchk_xattr_buf *ab = sc->buf; @@ -57,7 +57,7 @@ xchk_setup_xattr_buf( * Don't zero the buffer upon allocation to avoid runtime overhead. * All users must be careful never to read uninitialized contents. */ - ab = kmem_alloc_large(sizeof(*ab) + sz, flags); + ab = kvmalloc(sizeof(*ab) + sz, flags); if (!ab) return -ENOMEM; @@ -79,7 +79,7 @@ xchk_setup_xattr( * without the inode lock held, which means we can sleep. */ if (sc->flags & XCHK_TRY_HARDER) { - error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX, 0); + error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX, GFP_KERNEL); if (error) return error; } @@ -138,7 +138,8 @@ xchk_xattr_listent( * doesn't work, we overload the seen_enough variable to convey * the error message back to the main scrub function. */ - error = xchk_setup_xattr_buf(sx->sc, valuelen, KM_MAYFAIL); + error = xchk_setup_xattr_buf(sx->sc, valuelen, + GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (error == -ENOMEM) error = -EDEADLOCK; if (error) { @@ -323,7 +324,8 @@ xchk_xattr_block( return 0; /* Allocate memory for block usage checking. */ - error = xchk_setup_xattr_buf(ds->sc, 0, KM_MAYFAIL); + error = xchk_setup_xattr_buf(ds->sc, 0, + GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (error == -ENOMEM) return -EDEADLOCK; if (error) @@ -334,7 +336,7 @@ xchk_xattr_block( bitmap_zero(usedmap, mp->m_attr_geo->blksize); /* Check all the padding. */ - if (xfs_sb_version_hascrc(&ds->sc->mp->m_sb)) { + if (xfs_has_crc(ds->sc->mp)) { struct xfs_attr3_leafblock *leaf = bp->b_addr; if (leaf->hdr.pad1 != 0 || leaf->hdr.pad2 != 0 || diff --git a/fs/xfs/scrub/attr.h b/fs/xfs/scrub/attr.h index 13a1d2e8424d..1719e1c4da59 100644 --- a/fs/xfs/scrub/attr.h +++ b/fs/xfs/scrub/attr.h @@ -65,7 +65,4 @@ xchk_xattr_dstmap( BITS_TO_LONGS(sc->mp->m_attr_geo->blksize); } -int xchk_setup_xattr_buf(struct xfs_scrub *sc, size_t value_size, - xfs_km_flags_t flags); - #endif /* __XFS_SCRUB_ATTR_H__ */ diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c index 813b5f219113..d6d24c866bc4 100644 --- a/fs/xfs/scrub/bitmap.c +++ b/fs/xfs/scrub/bitmap.c @@ -260,7 +260,7 @@ xbitmap_set_btcur_path( xfs_btree_get_block(cur, i, &bp); if (!bp) continue; - fsb = XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn); + fsb = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)); error = xbitmap_set(bitmap, fsb, 1); if (error) return error; @@ -284,7 +284,7 @@ xbitmap_collect_btblock( if (!bp) return 0; - fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn); + fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)); return xbitmap_set(bitmap, fsbno, 1); } diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 1d146c9d9de1..017da9ceaee9 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -260,10 +260,10 @@ xchk_bmap_iextent_xref( agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock); len = irec->br_blockcount; - error = xchk_ag_init(info->sc, agno, &info->sc->sa); + error = xchk_ag_init_existing(info->sc, agno, &info->sc->sa); if (!xchk_fblock_process_error(info->sc, info->whichfork, irec->br_startoff, &error)) - return; + goto out_free; xchk_xref_is_used_space(info->sc, agbno, len); xchk_xref_is_not_inode_chunk(info->sc, agbno, len); @@ -283,6 +283,7 @@ xchk_bmap_iextent_xref( break; } +out_free: xchk_ag_free(info->sc, &info->sc->sa); } @@ -383,7 +384,7 @@ xchk_bmap_iextent( STATIC int xchk_bmapbt_rec( struct xchk_btree *bs, - union xfs_btree_rec *rec) + const union xfs_btree_rec *rec) { struct xfs_bmbt_irec irec; struct xfs_bmbt_irec iext_irec; @@ -400,7 +401,7 @@ xchk_bmapbt_rec( * Check the owners of the btree blocks up to the level below * the root since the verifiers don't do that. */ - if (xfs_sb_version_hascrc(&bs->cur->bc_mp->m_sb) && + if (xfs_has_crc(bs->cur->bc_mp) && bs->cur->bc_ptrs[0] == 1) { for (i = 0; i < bs->cur->bc_nlevels - 1; i++) { block = xfs_btree_get_block(bs->cur, i, &bp); @@ -473,10 +474,11 @@ struct xchk_bmap_check_rmap_info { STATIC int xchk_bmap_check_rmap( struct xfs_btree_cur *cur, - struct xfs_rmap_irec *rec, + const struct xfs_rmap_irec *rec, void *priv) { struct xfs_bmbt_irec irec; + struct xfs_rmap_irec check_rec; struct xchk_bmap_check_rmap_info *sbcri = priv; struct xfs_ifork *ifp; struct xfs_scrub *sc = sbcri->sc; @@ -510,28 +512,30 @@ xchk_bmap_check_rmap( * length, so we have to loop through the bmbt to make sure that the * entire rmap is covered by bmbt records. */ + check_rec = *rec; while (have_map) { - if (irec.br_startoff != rec->rm_offset) + if (irec.br_startoff != check_rec.rm_offset) xchk_fblock_set_corrupt(sc, sbcri->whichfork, - rec->rm_offset); + check_rec.rm_offset); if (irec.br_startblock != XFS_AGB_TO_FSB(sc->mp, - cur->bc_ag.pag->pag_agno, rec->rm_startblock)) + cur->bc_ag.pag->pag_agno, + check_rec.rm_startblock)) xchk_fblock_set_corrupt(sc, sbcri->whichfork, - rec->rm_offset); - if (irec.br_blockcount > rec->rm_blockcount) + check_rec.rm_offset); + if (irec.br_blockcount > check_rec.rm_blockcount) xchk_fblock_set_corrupt(sc, sbcri->whichfork, - rec->rm_offset); + check_rec.rm_offset); if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) break; - rec->rm_startblock += irec.br_blockcount; - rec->rm_offset += irec.br_blockcount; - rec->rm_blockcount -= irec.br_blockcount; - if (rec->rm_blockcount == 0) + check_rec.rm_startblock += irec.br_blockcount; + check_rec.rm_offset += irec.br_blockcount; + check_rec.rm_blockcount -= irec.br_blockcount; + if (check_rec.rm_blockcount == 0) break; have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec); if (!have_map) xchk_fblock_set_corrupt(sc, sbcri->whichfork, - rec->rm_offset); + check_rec.rm_offset); } out: @@ -581,7 +585,7 @@ xchk_bmap_check_rmaps( bool zero_size; int error; - if (!xfs_sb_version_hasrmapbt(&sc->mp->m_sb) || + if (!xfs_has_rmapbt(sc->mp) || whichfork == XFS_COW_FORK || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) return 0; @@ -659,8 +663,7 @@ xchk_bmap( } break; case XFS_ATTR_FORK: - if (!xfs_sb_version_hasattr(&mp->m_sb) && - !xfs_sb_version_hasattr2(&mp->m_sb)) + if (!xfs_has_attr(mp) && !xfs_has_attr2(mp)) xchk_ino_set_corrupt(sc, sc->ip->i_ino); break; default: diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index bd1172358964..eccb855dc904 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -374,10 +374,10 @@ xchk_btree_check_block_owner( init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS; if (init_sa) { - error = xchk_ag_init(bs->sc, agno, &bs->sc->sa); + error = xchk_ag_init_existing(bs->sc, agno, &bs->sc->sa); if (!xchk_btree_xref_process_error(bs->sc, bs->cur, level, &error)) - return error; + goto out_free; } xchk_xref_is_used_space(bs->sc, agbno, 1); @@ -393,6 +393,7 @@ xchk_btree_check_block_owner( if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP) bs->cur = NULL; +out_free: if (init_sa) xchk_ag_free(bs->sc, &bs->sc->sa); @@ -435,12 +436,12 @@ xchk_btree_check_owner( if (!co) return -ENOMEM; co->level = level; - co->daddr = XFS_BUF_ADDR(bp); + co->daddr = xfs_buf_daddr(bp); list_add_tail(&co->list, &bs->to_check); return 0; } - return xchk_btree_check_block_owner(bs, level, XFS_BUF_ADDR(bp)); + return xchk_btree_check_block_owner(bs, level, xfs_buf_daddr(bp)); } /* Decide if we want to check minrecs of a btree block in the inode root. */ diff --git a/fs/xfs/scrub/btree.h b/fs/xfs/scrub/btree.h index 5572e475f8ed..b7d2fc01fbf9 100644 --- a/fs/xfs/scrub/btree.h +++ b/fs/xfs/scrub/btree.h @@ -26,8 +26,8 @@ void xchk_btree_xref_set_corrupt(struct xfs_scrub *sc, struct xchk_btree; typedef int (*xchk_btree_rec_fn)( - struct xchk_btree *bs, - union xfs_btree_rec *rec); + struct xchk_btree *bs, + const union xfs_btree_rec *rec); struct xchk_btree { /* caller-provided scrub state */ diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 8558ca05e11d..bf1f3607d0b6 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -186,7 +186,7 @@ xchk_block_set_preen( struct xfs_buf *bp) { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN; - trace_xchk_block_preen(sc, bp->b_bn, __return_address); + trace_xchk_block_preen(sc, xfs_buf_daddr(bp), __return_address); } /* @@ -219,7 +219,7 @@ xchk_block_set_corrupt( struct xfs_buf *bp) { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; - trace_xchk_block_error(sc, bp->b_bn, __return_address); + trace_xchk_block_error(sc, xfs_buf_daddr(bp), __return_address); } /* Record a corruption while cross-referencing. */ @@ -229,7 +229,7 @@ xchk_block_xref_set_corrupt( struct xfs_buf *bp) { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; - trace_xchk_block_error(sc, bp->b_bn, __return_address); + trace_xchk_block_error(sc, xfs_buf_daddr(bp), __return_address); } /* @@ -324,7 +324,7 @@ struct xchk_rmap_ownedby_info { STATIC int xchk_count_rmap_ownedby_irec( struct xfs_btree_cur *cur, - struct xfs_rmap_irec *rec, + const struct xfs_rmap_irec *rec, void *priv) { struct xchk_rmap_ownedby_info *sroi = priv; @@ -394,11 +394,11 @@ want_ag_read_header_failure( } /* - * Grab all the headers for an AG. + * Grab the perag structure and all the headers for an AG. * - * The headers should be released by xchk_ag_free, but as a fail - * safe we attach all the buffers we grab to the scrub transaction so - * they'll all be freed when we cancel it. + * The headers should be released by xchk_ag_free, but as a fail safe we attach + * all the buffers we grab to the scrub transaction so they'll all be freed + * when we cancel it. Returns ENOENT if we can't grab the perag structure. */ int xchk_ag_read_headers( @@ -409,22 +409,24 @@ xchk_ag_read_headers( struct xfs_mount *mp = sc->mp; int error; - sa->agno = agno; + ASSERT(!sa->pag); + sa->pag = xfs_perag_get(mp, agno); + if (!sa->pag) + return -ENOENT; error = xfs_ialloc_read_agi(mp, sc->tp, agno, &sa->agi_bp); if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI)) - goto out; + return error; error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, &sa->agf_bp); if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF)) - goto out; + return error; error = xfs_alloc_read_agfl(mp, sc->tp, agno, &sa->agfl_bp); if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL)) - goto out; - error = 0; -out: - return error; + return error; + + return 0; } /* Release all the AG btree cursors. */ @@ -461,7 +463,6 @@ xchk_ag_btcur_init( { struct xfs_mount *mp = sc->mp; - xchk_perag_get(sc->mp, sa); if (sa->agf_bp && xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_BNO)) { /* Set up a bnobt cursor for cross-referencing. */ @@ -484,21 +485,21 @@ xchk_ag_btcur_init( } /* Set up a finobt cursor for cross-referencing. */ - if (sa->agi_bp && xfs_sb_version_hasfinobt(&mp->m_sb) && + if (sa->agi_bp && xfs_has_finobt(mp) && xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_FINO)) { sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp, sa->pag, XFS_BTNUM_FINO); } /* Set up a rmapbt cursor for cross-referencing. */ - if (sa->agf_bp && xfs_sb_version_hasrmapbt(&mp->m_sb) && + if (sa->agf_bp && xfs_has_rmapbt(mp) && xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_RMAP)) { sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp, sa->pag); } /* Set up a refcountbt cursor for cross-referencing. */ - if (sa->agf_bp && xfs_sb_version_hasreflink(&mp->m_sb) && + if (sa->agf_bp && xfs_has_reflink(mp) && xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_REFC)) { sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp, sa->agf_bp, sa->pag); @@ -528,15 +529,14 @@ xchk_ag_free( xfs_perag_put(sa->pag); sa->pag = NULL; } - sa->agno = NULLAGNUMBER; } /* - * For scrub, grab the AGI and the AGF headers, in that order. Locking - * order requires us to get the AGI before the AGF. We use the - * transaction to avoid deadlocking on crosslinked metadata buffers; - * either the caller passes one in (bmap scrub) or we have to create a - * transaction ourselves. + * For scrub, grab the perag structure, the AGI, and the AGF headers, in that + * order. Locking order requires us to get the AGI before the AGF. We use the + * transaction to avoid deadlocking on crosslinked metadata buffers; either the + * caller passes one in (bmap scrub) or we have to create a transaction + * ourselves. Returns ENOENT if the perag struct cannot be grabbed. */ int xchk_ag_init( @@ -554,19 +554,6 @@ xchk_ag_init( return 0; } -/* - * Grab the per-ag structure if we haven't already gotten it. Teardown of the - * xchk_ag will release it for us. - */ -void -xchk_perag_get( - struct xfs_mount *mp, - struct xchk_ag *sa) -{ - if (!sa->pag) - sa->pag = xfs_perag_get(mp, sa->agno); -} - /* Per-scrubber setup functions */ /* @@ -797,7 +784,7 @@ xchk_buffer_recheck( if (!fa) return; sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; - trace_xchk_block_error(sc, bp->b_bn, fa); + trace_xchk_block_error(sc, xfs_buf_daddr(bp), fa); } /* @@ -842,7 +829,7 @@ xchk_metadata_inode_forks( return error; /* Look for incorrect shared blocks. */ - if (xfs_sb_version_hasreflink(&sc->mp->m_sb)) { + if (xfs_has_reflink(sc->mp)) { error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, &shared); if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, @@ -884,6 +871,7 @@ xchk_stop_reaping( { sc->flags |= XCHK_REAPING_DISABLED; xfs_blockgc_stop(sc->mp); + xfs_inodegc_stop(sc->mp); } /* Restart background reaping of resources. */ @@ -891,6 +879,13 @@ void xchk_start_reaping( struct xfs_scrub *sc) { - xfs_blockgc_start(sc->mp); + /* + * Readonly filesystems do not perform inactivation or speculative + * preallocation, so there's no need to restart the workers. + */ + if (!xfs_is_readonly(sc->mp)) { + xfs_inodegc_start(sc->mp); + xfs_blockgc_start(sc->mp); + } sc->flags &= ~XCHK_REAPING_DISABLED; } diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index 0410faf7d735..454145db10e7 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -107,7 +107,23 @@ int xchk_setup_fscounters(struct xfs_scrub *sc); void xchk_ag_free(struct xfs_scrub *sc, struct xchk_ag *sa); int xchk_ag_init(struct xfs_scrub *sc, xfs_agnumber_t agno, struct xchk_ag *sa); -void xchk_perag_get(struct xfs_mount *mp, struct xchk_ag *sa); + +/* + * Grab all AG resources, treating the inability to grab the perag structure as + * a fs corruption. This is intended for callers checking an ondisk reference + * to a given AG, which means that the AG must still exist. + */ +static inline int +xchk_ag_init_existing( + struct xfs_scrub *sc, + xfs_agnumber_t agno, + struct xchk_ag *sa) +{ + int error = xchk_ag_init(sc, agno, sa); + + return error == -ENOENT ? -EFSCORRUPTED : error; +} + int xchk_ag_read_headers(struct xfs_scrub *sc, xfs_agnumber_t agno, struct xchk_ag *sa); void xchk_ag_btcur_free(struct xchk_ag *sa); diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c index 9f0dbb47c82c..8a52514bc1ff 100644 --- a/fs/xfs/scrub/dabtree.c +++ b/fs/xfs/scrub/dabtree.c @@ -367,11 +367,11 @@ xchk_da_btree_block( pmaxrecs = &ds->maxrecs[level]; /* We only started zeroing the header on v5 filesystems. */ - if (xfs_sb_version_hascrc(&ds->sc->mp->m_sb) && hdr3->hdr.pad) + if (xfs_has_crc(ds->sc->mp) && hdr3->hdr.pad) xchk_da_set_corrupt(ds, level); /* Check the owner. */ - if (xfs_sb_version_hascrc(&ip->i_mount->m_sb)) { + if (xfs_has_crc(ip->i_mount)) { owner = be64_to_cpu(hdr3->owner); if (owner != ip->i_ino) xchk_da_set_corrupt(ds, level); diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index 28dda391d5df..200a63f58fe7 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -51,7 +51,7 @@ xchk_dir_check_ftype( int ino_dtype; int error = 0; - if (!xfs_sb_version_hasftype(&mp->m_sb)) { + if (!xfs_has_ftype(mp)) { if (dtype != DT_UNKNOWN && dtype != DT_DIR) xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); @@ -140,7 +140,7 @@ xchk_dir_actor( if (!strncmp(".", name, namelen)) { /* If this is "." then check that the inum matches the dir. */ - if (xfs_sb_version_hasftype(&mp->m_sb) && type != DT_DIR) + if (xfs_has_ftype(mp) && type != DT_DIR) xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); checked_ftype = true; @@ -152,7 +152,7 @@ xchk_dir_actor( * If this is ".." in the root inode, check that the inum * matches this dir. */ - if (xfs_sb_version_hasftype(&mp->m_sb) && type != DT_DIR) + if (xfs_has_ftype(mp) && type != DT_DIR) xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); checked_ftype = true; @@ -526,7 +526,7 @@ xchk_directory_leaf1_bestfree( bestcount = be32_to_cpu(ltp->bestcount); bestp = xfs_dir2_leaf_bests_p(ltp); - if (xfs_sb_version_hascrc(&sc->mp->m_sb)) { + if (xfs_has_crc(sc->mp)) { struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; if (hdr3->pad != cpu_to_be32(0)) @@ -623,7 +623,7 @@ xchk_directory_free_bestfree( return error; xchk_buffer_recheck(sc, bp); - if (xfs_sb_version_hascrc(&sc->mp->m_sb)) { + if (xfs_has_crc(sc->mp)) { struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; if (hdr3->pad != cpu_to_be32(0)) diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c index fd7941e04ae1..48a6cbdf95d0 100644 --- a/fs/xfs/scrub/fscounters.c +++ b/fs/xfs/scrub/fscounters.c @@ -148,9 +148,9 @@ xchk_fscount_btreeblks( xfs_extlen_t blocks; int error; - error = xchk_ag_init(sc, agno, &sc->sa); + error = xchk_ag_init_existing(sc, agno, &sc->sa); if (error) - return error; + goto out_free; error = xfs_btree_count_blocks(sc->sa.bno_cur, &blocks); if (error) @@ -207,7 +207,7 @@ retry: /* Add up the free/freelist/bnobt/cntbt blocks */ fsc->fdblocks += pag->pagf_freeblks; fsc->fdblocks += pag->pagf_flcount; - if (xfs_sb_version_haslazysbcount(&sc->mp->m_sb)) { + if (xfs_has_lazysbcount(sc->mp)) { fsc->fdblocks += pag->pagf_btreeblks; } else { error = xchk_fscount_btreeblks(sc, fsc, agno); diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c index 30e568596b79..00848ee542fb 100644 --- a/fs/xfs/scrub/ialloc.c +++ b/fs/xfs/scrub/ialloc.c @@ -418,7 +418,7 @@ xchk_iallocbt_rec_alignment( STATIC int xchk_iallocbt_rec( struct xchk_btree *bs, - union xfs_btree_rec *rec) + const union xfs_btree_rec *rec) { struct xfs_mount *mp = bs->cur->bc_mp; struct xchk_iallocbt *iabt = bs->private; @@ -517,7 +517,7 @@ xchk_iallocbt_xref_rmap_btreeblks( int error; if (!sc->sa.ino_cur || !sc->sa.rmap_cur || - (xfs_sb_version_hasfinobt(&sc->mp->m_sb) && !sc->sa.fino_cur) || + (xfs_has_finobt(sc->mp) && !sc->sa.fino_cur) || xchk_skip_xref(sc->sm)) return; diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 76fbc7ca4cec..2405b09d03d0 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -181,7 +181,7 @@ xchk_inode_flags2( /* reflink flag requires reflink feature */ if ((flags2 & XFS_DIFLAG2_REFLINK) && - !xfs_sb_version_hasreflink(&mp->m_sb)) + !xfs_has_reflink(mp)) goto bad; /* cowextsize flag is checked w.r.t. mode separately */ @@ -199,8 +199,7 @@ xchk_inode_flags2( goto bad; /* no bigtime iflag without the bigtime feature */ - if (xfs_dinode_has_bigtime(dip) && - !xfs_sb_version_hasbigtime(&mp->m_sb)) + if (xfs_dinode_has_bigtime(dip) && !xfs_has_bigtime(mp)) goto bad; return; @@ -278,7 +277,7 @@ xchk_dinode( xchk_ino_set_corrupt(sc, ino); if (dip->di_projid_hi != 0 && - !xfs_sb_version_hasprojid32bit(&mp->m_sb)) + !xfs_has_projid32(mp)) xchk_ino_set_corrupt(sc, ino); break; default: @@ -532,9 +531,9 @@ xchk_inode_xref( agno = XFS_INO_TO_AGNO(sc->mp, ino); agbno = XFS_INO_TO_AGBNO(sc->mp, ino); - error = xchk_ag_init(sc, agno, &sc->sa); + error = xchk_ag_init_existing(sc, agno, &sc->sa); if (!xchk_xref_process_error(sc, agno, agbno, &error)) - return; + goto out_free; xchk_xref_is_used_space(sc, agbno, 1); xchk_inode_xref_finobt(sc, ino); @@ -542,6 +541,7 @@ xchk_inode_xref( xchk_xref_is_not_shared(sc, agbno, 1); xchk_inode_xref_bmap(sc, dip); +out_free: xchk_ag_free(sc, &sc->sa); } @@ -560,7 +560,7 @@ xchk_inode_check_reflink_iflag( bool has_shared; int error; - if (!xfs_sb_version_hasreflink(&mp->m_sb)) + if (!xfs_has_reflink(mp)) return; error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c index acbb9839d42f..d6c1b00a4fc8 100644 --- a/fs/xfs/scrub/quota.c +++ b/fs/xfs/scrub/quota.c @@ -42,7 +42,7 @@ xchk_setup_quota( xfs_dqtype_t dqtype; int error; - if (!XFS_IS_QUOTA_RUNNING(sc->mp) || !XFS_IS_QUOTA_ON(sc->mp)) + if (!XFS_IS_QUOTA_ON(sc->mp)) return -ENOENT; dqtype = xchk_quota_to_dqtype(sc); @@ -127,7 +127,7 @@ xchk_quota_item( * a reflink filesystem we're allowed to exceed physical space * if there are no quota limits. */ - if (xfs_sb_version_hasreflink(&mp->m_sb)) { + if (xfs_has_reflink(mp)) { if (mp->m_sb.sb_dblocks < dq->q_blk.count) xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset); diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index 7014b7408bad..2744eecdbaf0 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -91,7 +91,7 @@ struct xchk_refcnt_check { STATIC int xchk_refcountbt_rmap_check( struct xfs_btree_cur *cur, - struct xfs_rmap_irec *rec, + const struct xfs_rmap_irec *rec, void *priv) { struct xchk_refcnt_check *refchk = priv; @@ -330,7 +330,7 @@ xchk_refcountbt_xref( STATIC int xchk_refcountbt_rec( struct xchk_btree *bs, - union xfs_btree_rec *rec) + const union xfs_btree_rec *rec) { struct xfs_mount *mp = bs->cur->bc_mp; xfs_agblock_t *cow_blocks = bs->private; diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index ebb0e245aa72..8f3cba14ada3 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -248,19 +248,19 @@ xrep_calc_ag_resblks( * bnobt/cntbt or inobt/finobt as pairs. */ bnobt_sz = 2 * xfs_allocbt_calc_size(mp, freelen); - if (xfs_sb_version_hassparseinodes(&mp->m_sb)) + if (xfs_has_sparseinodes(mp)) inobt_sz = xfs_iallocbt_calc_size(mp, icount / XFS_INODES_PER_HOLEMASK_BIT); else inobt_sz = xfs_iallocbt_calc_size(mp, icount / XFS_INODES_PER_CHUNK); - if (xfs_sb_version_hasfinobt(&mp->m_sb)) + if (xfs_has_finobt(mp)) inobt_sz *= 2; - if (xfs_sb_version_hasreflink(&mp->m_sb)) + if (xfs_has_reflink(mp)) refcbt_sz = xfs_refcountbt_calc_size(mp, usedlen); else refcbt_sz = 0; - if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { + if (xfs_has_rmapbt(mp)) { /* * Guess how many blocks we need to rebuild the rmapbt. * For non-reflink filesystems we can't have more records than @@ -269,7 +269,7 @@ xrep_calc_ag_resblks( * many rmaps there could be in the AG, so we start off with * what we hope is an generous over-estimation. */ - if (xfs_sb_version_hasreflink(&mp->m_sb)) + if (xfs_has_reflink(mp)) rmapbt_sz = xfs_rmapbt_calc_size(mp, (unsigned long long)aglen * 2); else @@ -306,9 +306,9 @@ xrep_alloc_ag_block( return -ENOSPC; xfs_extent_busy_reuse(sc->mp, sc->sa.pag, bno, 1, false); - *fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.agno, bno); + *fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, bno); if (resv == XFS_AG_RESV_RMAPBT) - xfs_ag_resv_rmapbt_alloc(sc->mp, sc->sa.agno); + xfs_ag_resv_rmapbt_alloc(sc->mp, sc->sa.pag->pag_agno); return 0; default: break; @@ -317,7 +317,7 @@ xrep_alloc_ag_block( args.tp = sc->tp; args.mp = sc->mp; args.oinfo = *oinfo; - args.fsbno = XFS_AGB_TO_FSB(args.mp, sc->sa.agno, 0); + args.fsbno = XFS_AGB_TO_FSB(args.mp, sc->sa.pag->pag_agno, 0); args.minlen = 1; args.maxlen = 1; args.prod = 1; @@ -352,14 +352,14 @@ xrep_init_btblock( trace_xrep_init_btblock(mp, XFS_FSB_TO_AGNO(mp, fsb), XFS_FSB_TO_AGBNO(mp, fsb), btnum); - ASSERT(XFS_FSB_TO_AGNO(mp, fsb) == sc->sa.agno); + ASSERT(XFS_FSB_TO_AGNO(mp, fsb) == sc->sa.pag->pag_agno); error = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, fsb), XFS_FSB_TO_BB(mp, 1), 0, &bp); if (error) return error; xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); - xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno); + xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.pag->pag_agno); xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF); xfs_trans_log_buf(tp, bp, 0, BBTOB(bp->b_length) - 1); bp->b_ops = ops; @@ -481,7 +481,7 @@ xrep_fix_freelist( args.mp = sc->mp; args.tp = sc->tp; - args.agno = sc->sa.agno; + args.agno = sc->sa.pag->pag_agno; args.alignment = 1; args.pag = sc->sa.pag; @@ -611,11 +611,11 @@ xrep_reap_extents( xfs_fsblock_t fsbno; int error = 0; - ASSERT(xfs_sb_version_hasrmapbt(&sc->mp->m_sb)); + ASSERT(xfs_has_rmapbt(sc->mp)); for_each_xbitmap_block(fsbno, bmr, n, bitmap) { ASSERT(sc->ip != NULL || - XFS_FSB_TO_AGNO(sc->mp, fsbno) == sc->sa.agno); + XFS_FSB_TO_AGNO(sc->mp, fsbno) == sc->sa.pag->pag_agno); trace_xrep_dispose_btree_extent(sc->mp, XFS_FSB_TO_AGNO(sc->mp, fsbno), XFS_FSB_TO_AGBNO(sc->mp, fsbno), 1); @@ -690,7 +690,7 @@ xrep_findroot_block( int block_level; int error = 0; - daddr = XFS_AGB_TO_DADDR(mp, ri->sc->sa.agno, agbno); + daddr = XFS_AGB_TO_DADDR(mp, ri->sc->sa.pag->pag_agno, agbno); /* * Blocks in the AGFL have stale contents that might just happen to @@ -819,7 +819,7 @@ xrep_findroot_block( else fab->root = NULLAGBLOCK; - trace_xrep_findroot_block(mp, ri->sc->sa.agno, agbno, + trace_xrep_findroot_block(mp, ri->sc->sa.pag->pag_agno, agbno, be32_to_cpu(btblock->bb_magic), fab->height - 1); out: xfs_trans_brelse(ri->sc->tp, bp); @@ -833,7 +833,7 @@ out: STATIC int xrep_findroot_rmap( struct xfs_btree_cur *cur, - struct xfs_rmap_irec *rec, + const struct xfs_rmap_irec *rec, void *priv) { struct xrep_findroot *ri = priv; diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c index fc306573f0ac..8dae0345c7df 100644 --- a/fs/xfs/scrub/rmap.c +++ b/fs/xfs/scrub/rmap.c @@ -88,7 +88,7 @@ xchk_rmapbt_xref( STATIC int xchk_rmapbt_rec( struct xchk_btree *bs, - union xfs_btree_rec *rec) + const union xfs_btree_rec *rec) { struct xfs_mount *mp = bs->cur->bc_mp; struct xfs_rmap_irec irec; diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c index 37c0e2266c85..8fa012057405 100644 --- a/fs/xfs/scrub/rtbitmap.c +++ b/fs/xfs/scrub/rtbitmap.c @@ -41,7 +41,7 @@ xchk_setup_rt( STATIC int xchk_rtbitmap_rec( struct xfs_trans *tp, - struct xfs_rtalloc_rec *rec, + const struct xfs_rtalloc_rec *rec, void *priv) { struct xfs_scrub *sc = priv; diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 0e542636227c..51e4c61916d2 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -239,21 +239,21 @@ static const struct xchk_meta_ops meta_scrub_ops[] = { .type = ST_PERAG, .setup = xchk_setup_ag_iallocbt, .scrub = xchk_finobt, - .has = xfs_sb_version_hasfinobt, + .has = xfs_has_finobt, .repair = xrep_notsupported, }, [XFS_SCRUB_TYPE_RMAPBT] = { /* rmapbt */ .type = ST_PERAG, .setup = xchk_setup_ag_rmapbt, .scrub = xchk_rmapbt, - .has = xfs_sb_version_hasrmapbt, + .has = xfs_has_rmapbt, .repair = xrep_notsupported, }, [XFS_SCRUB_TYPE_REFCNTBT] = { /* refcountbt */ .type = ST_PERAG, .setup = xchk_setup_ag_refcountbt, .scrub = xchk_refcountbt, - .has = xfs_sb_version_hasreflink, + .has = xfs_has_reflink, .repair = xrep_notsupported, }, [XFS_SCRUB_TYPE_INODE] = { /* inode record */ @@ -308,14 +308,14 @@ static const struct xchk_meta_ops meta_scrub_ops[] = { .type = ST_FS, .setup = xchk_setup_rt, .scrub = xchk_rtbitmap, - .has = xfs_sb_version_hasrealtime, + .has = xfs_has_realtime, .repair = xrep_notsupported, }, [XFS_SCRUB_TYPE_RTSUM] = { /* realtime summary */ .type = ST_FS, .setup = xchk_setup_rt, .scrub = xchk_rtsummary, - .has = xfs_sb_version_hasrealtime, + .has = xfs_has_realtime, .repair = xrep_notsupported, }, [XFS_SCRUB_TYPE_UQUOTA] = { /* user quota */ @@ -383,7 +383,7 @@ xchk_validate_inputs( if (ops->setup == NULL || ops->scrub == NULL) goto out; /* Does this fs even support this type of metadata? */ - if (ops->has && !ops->has(&mp->m_sb)) + if (ops->has && !ops->has(mp)) goto out; error = -EINVAL; @@ -415,11 +415,11 @@ xchk_validate_inputs( */ if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) { error = -EOPNOTSUPP; - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_has_crc(mp)) goto out; error = -EROFS; - if (mp->m_flags & XFS_MOUNT_RDONLY) + if (xfs_is_readonly(mp)) goto out; } @@ -464,9 +464,6 @@ xfs_scrub_metadata( struct xfs_scrub sc = { .file = file, .sm = sm, - .sa = { - .agno = NULLAGNUMBER, - }, }; struct xfs_mount *mp = XFS_I(file_inode(file))->i_mount; int error = 0; @@ -480,10 +477,10 @@ xfs_scrub_metadata( /* Forbidden if we are shut down or mounted norecovery. */ error = -ESHUTDOWN; - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) goto out; error = -ENOTRECOVERABLE; - if (mp->m_flags & XFS_MOUNT_NORECOVERY) + if (xfs_has_norecovery(mp)) goto out; error = xchk_validate_inputs(mp, sm); diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index 08a483cb46e2..80e5026bba44 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -27,7 +27,7 @@ struct xchk_meta_ops { int (*repair)(struct xfs_scrub *); /* Decide if we even have this piece of metadata. */ - bool (*has)(struct xfs_sb *); + bool (*has)(struct xfs_mount *); /* type describing required/allowed inputs */ enum xchk_type type; @@ -35,7 +35,6 @@ struct xchk_meta_ops { /* Buffer pointers and btree cursors for an entire AG. */ struct xchk_ag { - xfs_agnumber_t agno; struct xfs_perag *pag; /* AG btree roots */ diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c index 03882a605a3c..c0ef53fe6611 100644 --- a/fs/xfs/scrub/trace.c +++ b/fs/xfs/scrub/trace.c @@ -22,11 +22,11 @@ xchk_btree_cur_fsbno( int level) { if (level < cur->bc_nlevels && cur->bc_bufs[level]) - return XFS_DADDR_TO_FSB(cur->bc_mp, cur->bc_bufs[level]->b_bn); - else if (level == cur->bc_nlevels - 1 && - cur->bc_flags & XFS_BTREE_LONG_PTRS) + return XFS_DADDR_TO_FSB(cur->bc_mp, + xfs_buf_daddr(cur->bc_bufs[level])); + if (level == cur->bc_nlevels - 1 && cur->bc_flags & XFS_BTREE_LONG_PTRS) return XFS_INO_TO_FSB(cur->bc_mp, cur->bc_ino.ip->i_ino); - else if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS)) + if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS)) return XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno, 0); return NULLFSBLOCK; } diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index e46f5cef90da..a7bbb84f91a7 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -2,6 +2,10 @@ /* * Copyright (C) 2017 Oracle. All Rights Reserved. * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * NOTE: none of these tracepoints shall be considered a stable kernel ABI + * as they can change at any time. See xfs_trace.h for documentation of + * specific units found in tracepoint output. */ #undef TRACE_SYSTEM #define TRACE_SYSTEM xfs_scrub @@ -79,6 +83,16 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS); { XFS_SCRUB_TYPE_PQUOTA, "prjquota" }, \ { XFS_SCRUB_TYPE_FSCOUNTERS, "fscounters" } +#define XFS_SCRUB_FLAG_STRINGS \ + { XFS_SCRUB_IFLAG_REPAIR, "repair" }, \ + { XFS_SCRUB_OFLAG_CORRUPT, "corrupt" }, \ + { XFS_SCRUB_OFLAG_PREEN, "preen" }, \ + { XFS_SCRUB_OFLAG_XFAIL, "xfail" }, \ + { XFS_SCRUB_OFLAG_XCORRUPT, "xcorrupt" }, \ + { XFS_SCRUB_OFLAG_INCOMPLETE, "incomplete" }, \ + { XFS_SCRUB_OFLAG_WARNING, "warning" }, \ + { XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED, "norepair" } + DECLARE_EVENT_CLASS(xchk_class, TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_metadata *sm, int error), @@ -103,14 +117,14 @@ DECLARE_EVENT_CLASS(xchk_class, __entry->flags = sm->sm_flags; __entry->error = error; ), - TP_printk("dev %d:%d ino 0x%llx type %s agno %u inum %llu gen %u flags 0x%x error %d", + TP_printk("dev %d:%d ino 0x%llx type %s agno 0x%x inum 0x%llx gen 0x%x flags (%s) error %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS), __entry->agno, __entry->inum, __entry->gen, - __entry->flags, + __print_flags(__entry->flags, "|", XFS_SCRUB_FLAG_STRINGS), __entry->error) ) #define DEFINE_SCRUB_EVENT(name) \ @@ -145,7 +159,7 @@ TRACE_EVENT(xchk_op_error, __entry->error = error; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d type %s agno %u agbno %u error %d ret_ip %pS", + TP_printk("dev %d:%d type %s agno 0x%x agbno 0x%x error %d ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS), __entry->agno, @@ -176,10 +190,10 @@ TRACE_EVENT(xchk_file_op_error, __entry->error = error; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d ino 0x%llx fork %d type %s offset %llu error %d ret_ip %pS", + TP_printk("dev %d:%d ino 0x%llx fork %s type %s fileoff 0x%llx error %d ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, - __entry->whichfork, + __print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS), __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS), __entry->offset, __entry->error, @@ -193,29 +207,21 @@ DECLARE_EVENT_CLASS(xchk_block_error_class, __field(dev_t, dev) __field(unsigned int, type) __field(xfs_agnumber_t, agno) - __field(xfs_agblock_t, bno) + __field(xfs_agblock_t, agbno) __field(void *, ret_ip) ), TP_fast_assign( - xfs_fsblock_t fsbno; - xfs_agnumber_t agno; - xfs_agblock_t bno; - - fsbno = XFS_DADDR_TO_FSB(sc->mp, daddr); - agno = XFS_FSB_TO_AGNO(sc->mp, fsbno); - bno = XFS_FSB_TO_AGBNO(sc->mp, fsbno); - __entry->dev = sc->mp->m_super->s_dev; __entry->type = sc->sm->sm_type; - __entry->agno = agno; - __entry->bno = bno; + __entry->agno = xfs_daddr_to_agno(sc->mp, daddr); + __entry->agbno = xfs_daddr_to_agbno(sc->mp, daddr); __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d type %s agno %u agbno %u ret_ip %pS", + TP_printk("dev %d:%d type %s agno 0x%x agbno 0x%x ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS), __entry->agno, - __entry->bno, + __entry->agbno, __entry->ret_ip) ) @@ -281,10 +287,10 @@ DECLARE_EVENT_CLASS(xchk_fblock_error_class, __entry->offset = offset; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d ino 0x%llx fork %d type %s offset %llu ret_ip %pS", + TP_printk("dev %d:%d ino 0x%llx fork %s type %s fileoff 0x%llx ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, - __entry->whichfork, + __print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS), __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS), __entry->offset, __entry->ret_ip) @@ -346,7 +352,7 @@ TRACE_EVENT(xchk_btree_op_error, __entry->error = error; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d type %s btree %s level %d ptr %d agno %u agbno %u error %d ret_ip %pS", + TP_printk("dev %d:%d type %s btree %s level %d ptr %d agno 0x%x agbno 0x%x error %d ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS), __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS), @@ -389,10 +395,10 @@ TRACE_EVENT(xchk_ifork_btree_op_error, __entry->error = error; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d ino 0x%llx fork %d type %s btree %s level %d ptr %d agno %u agbno %u error %d ret_ip %pS", + TP_printk("dev %d:%d ino 0x%llx fork %s type %s btree %s level %d ptr %d agno 0x%x agbno 0x%x error %d ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, - __entry->whichfork, + __print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS), __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS), __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS), __entry->level, @@ -428,7 +434,7 @@ TRACE_EVENT(xchk_btree_error, __entry->ptr = cur->bc_ptrs[level]; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d type %s btree %s level %d ptr %d agno %u agbno %u ret_ip %pS", + TP_printk("dev %d:%d type %s btree %s level %d ptr %d agno 0x%x agbno 0x%x ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS), __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS), @@ -468,10 +474,10 @@ TRACE_EVENT(xchk_ifork_btree_error, __entry->ptr = cur->bc_ptrs[level]; __entry->ret_ip = ret_ip; ), - TP_printk("dev %d:%d ino 0x%llx fork %d type %s btree %s level %d ptr %d agno %u agbno %u ret_ip %pS", + TP_printk("dev %d:%d ino 0x%llx fork %s type %s btree %s level %d ptr %d agno 0x%x agbno 0x%x ret_ip %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, - __entry->whichfork, + __print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS), __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS), __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS), __entry->level, @@ -507,7 +513,7 @@ DECLARE_EVENT_CLASS(xchk_sbtree_class, __entry->nlevels = cur->bc_nlevels; __entry->ptr = cur->bc_ptrs[level]; ), - TP_printk("dev %d:%d type %s btree %s agno %u agbno %u level %d nlevels %d ptr %d", + TP_printk("dev %d:%d type %s btree %s agno 0x%x agbno 0x%x level %d nlevels %d ptr %d", MAJOR(__entry->dev), MINOR(__entry->dev), __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS), __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS), @@ -580,7 +586,7 @@ TRACE_EVENT(xchk_iallocbt_check_cluster, __entry->holemask = holemask; __entry->cluster_ino = cluster_ino; ), - TP_printk("dev %d:%d agno %d startino %u daddr 0x%llx len %d chunkino %u nr_inodes %u cluster_mask 0x%x holemask 0x%x cluster_ino %u", + TP_printk("dev %d:%d agno 0x%x startino 0x%x daddr 0x%llx bbcount 0x%x chunkino 0x%x nr_inodes %u cluster_mask 0x%x holemask 0x%x cluster_ino 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->startino, @@ -670,7 +676,7 @@ DECLARE_EVENT_CLASS(xrep_extent_class, __entry->agbno = agbno; __entry->len = len; ), - TP_printk("dev %d:%d agno %u agbno %u len %u", + TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, @@ -707,7 +713,7 @@ DECLARE_EVENT_CLASS(xrep_rmap_class, __entry->offset = offset; __entry->flags = flags; ), - TP_printk("dev %d:%d agno %u agbno %u len %u owner %lld offset %llu flags 0x%x", + TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x owner 0x%llx fileoff 0x%llx flags 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, @@ -745,7 +751,7 @@ TRACE_EVENT(xrep_refcount_extent_fn, __entry->blockcount = irec->rc_blockcount; __entry->refcount = irec->rc_refcount; ), - TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u", + TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->startblock, @@ -769,7 +775,7 @@ TRACE_EVENT(xrep_init_btblock, __entry->agbno = agbno; __entry->btnum = btnum; ), - TP_printk("dev %d:%d agno %u agbno %u btree %s", + TP_printk("dev %d:%d agno 0x%x agbno 0x%x btree %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, @@ -793,7 +799,7 @@ TRACE_EVENT(xrep_findroot_block, __entry->magic = magic; __entry->level = level; ), - TP_printk("dev %d:%d agno %u agbno %u magic 0x%x level %u", + TP_printk("dev %d:%d agno 0x%x agbno 0x%x magic 0x%x level %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, @@ -821,7 +827,7 @@ TRACE_EVENT(xrep_calc_ag_resblks, __entry->freelen = freelen; __entry->usedlen = usedlen; ), - TP_printk("dev %d:%d agno %d icount %u aglen %u freelen %u usedlen %u", + TP_printk("dev %d:%d agno 0x%x icount %u aglen %u freelen %u usedlen %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->icount, @@ -850,7 +856,7 @@ TRACE_EVENT(xrep_calc_ag_resblks_btsize, __entry->rmapbt_sz = rmapbt_sz; __entry->refcbt_sz = refcbt_sz; ), - TP_printk("dev %d:%d agno %d bno %u ino %u rmap %u refcount %u", + TP_printk("dev %d:%d agno 0x%x bnobt %u inobt %u rmapbt %u refcountbt %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->bnobt_sz, @@ -894,7 +900,7 @@ TRACE_EVENT(xrep_ialloc_insert, __entry->freecount = freecount; __entry->freemask = freemask; ), - TP_printk("dev %d:%d agno %d startino %u holemask 0x%x count %u freecount %u freemask 0x%llx", + TP_printk("dev %d:%d agno 0x%x startino 0x%x holemask 0x%x count %u freecount %u freemask 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->startino, diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index d02bef24b32b..5c52ee869272 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -125,7 +125,7 @@ xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl) } struct posix_acl * -xfs_get_acl(struct inode *inode, int type) +xfs_get_acl(struct inode *inode, int type, bool rcu) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; @@ -137,6 +137,9 @@ xfs_get_acl(struct inode *inode, int type) }; int error; + if (rcu) + return ERR_PTR(-ECHILD); + trace_xfs_get_acl(ip); switch (type) { @@ -232,7 +235,7 @@ xfs_acl_set_mode( inode->i_ctime = current_time(inode); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - if (mp->m_flags & XFS_MOUNT_WSYNC) + if (xfs_has_wsync(mp)) xfs_trans_set_sync(tp); return xfs_trans_commit(tp); } diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 7bdb3a4ed798..bb6abdcb265d 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -10,13 +10,13 @@ struct inode; struct posix_acl; #ifdef CONFIG_XFS_POSIX_ACL -extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); +extern struct posix_acl *xfs_get_acl(struct inode *inode, int type, bool rcu); extern int xfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type); extern int __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type); void xfs_forget_acl(struct inode *inode, const char *name); #else -static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type) +static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type, bool rcu) { return NULL; } diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index cb4e0fcf4c76..34fc6148032a 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -97,7 +97,7 @@ xfs_end_ioend( /* * Just clean up the in-memory structures if the fs has been shut down. */ - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { + if (xfs_is_shutdown(ip->i_mount)) { error = -EIO; goto done; } @@ -260,7 +260,7 @@ xfs_map_blocks( int retries = 0; int error = 0; - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; /* @@ -440,7 +440,7 @@ xfs_discard_page( xfs_fileoff_t pageoff_fsb = XFS_B_TO_FSBT(mp, pageoff); int error; - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) goto out_invalidate; xfs_alert_ratelimited(mp, @@ -449,7 +449,7 @@ xfs_discard_page( error = xfs_bmap_punch_delalloc_range(ip, start_fsb, i_blocks_per_page(inode, page) - pageoff_fsb); - if (error && !XFS_FORCED_SHUTDOWN(mp)) + if (error && !xfs_is_shutdown(mp)) xfs_alert(mp, "page discard unable to remove delalloc mapping."); out_invalidate: iomap_invalidatepage(page, pageoff, PAGE_SIZE - pageoff); @@ -462,22 +462,6 @@ static const struct iomap_writeback_ops xfs_writeback_ops = { }; STATIC int -xfs_vm_writepage( - struct page *page, - struct writeback_control *wbc) -{ - struct xfs_writepage_ctx wpc = { }; - - if (WARN_ON_ONCE(current->journal_info)) { - redirty_page_for_writepage(wbc, page); - unlock_page(page); - return 0; - } - - return iomap_writepage(page, wbc, &wpc.ctx, &xfs_writeback_ops); -} - -STATIC int xfs_vm_writepages( struct address_space *mapping, struct writeback_control *wbc) @@ -559,7 +543,6 @@ xfs_iomap_swapfile_activate( const struct address_space_operations xfs_address_space_operations = { .readpage = xfs_vm_readpage, .readahead = xfs_vm_readahead, - .writepage = xfs_vm_writepage, .writepages = xfs_vm_writepages, .set_page_dirty = __set_page_dirty_nobuffers, .releasepage = iomap_releasepage, diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index aaa7e66c42d7..2b5da6218977 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -151,7 +151,7 @@ xfs_attr3_node_inactive( } xfs_da3_node_hdr_from_disk(dp->i_mount, &ichdr, bp->b_addr); - parent_blkno = bp->b_bn; + parent_blkno = xfs_buf_daddr(bp); if (!ichdr.count) { xfs_trans_brelse(*trans, bp); return 0; @@ -177,7 +177,7 @@ xfs_attr3_node_inactive( return error; /* save for re-read later */ - child_blkno = XFS_BUF_ADDR(child_bp); + child_blkno = xfs_buf_daddr(child_bp); /* * Invalidate the subtree, however we have to. @@ -271,7 +271,7 @@ xfs_attr3_root_inactive( error = xfs_da3_node_read(*trans, dp, 0, &bp, XFS_ATTR_FORK); if (error) return error; - blkno = bp->b_bn; + blkno = xfs_buf_daddr(bp); /* * Invalidate the tree, even if the "tree" is only a single leaf block. diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 25dcc98d50e6..2d1e5134cebe 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -529,7 +529,7 @@ xfs_attr_list( XFS_STATS_INC(dp->i_mount, xs_attr_list); - if (XFS_FORCED_SHUTDOWN(dp->i_mount)) + if (xfs_is_shutdown(dp->i_mount)) return -EIO; lock_mode = xfs_ilock_attr_map_shared(dp); diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index e3a691937e92..03159970133f 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -24,7 +24,6 @@ #include "xfs_error.h" #include "xfs_log_priv.h" #include "xfs_log_recover.h" -#include "xfs_quota.h" kmem_zone_t *xfs_bui_zone; kmem_zone_t *xfs_bud_zone; @@ -487,18 +486,10 @@ xfs_bui_item_recover( XFS_ATTR_FORK : XFS_DATA_FORK; bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK; - /* Grab the inode. */ - error = xfs_iget(mp, NULL, bmap->me_owner, 0, 0, &ip); + error = xlog_recover_iget(mp, bmap->me_owner, &ip); if (error) return error; - error = xfs_qm_dqattach(ip); - if (error) - goto err_rele; - - if (VFS_I(ip)->i_nlink == 0) - xfs_iflags_set(ip, XFS_IRECOVERY); - /* Allocate transaction and do the work. */ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp); @@ -522,6 +513,9 @@ xfs_bui_item_recover( error = xfs_trans_log_finish_bmap_update(tp, budp, bui_type, ip, whichfork, bmap->me_startoff, bmap->me_startblock, &count, state); + if (error == -EFSCORRUPTED) + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bmap, + sizeof(*bmap)); if (error) goto err_cancel; diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 213a97a921bb..73a36b7be3bd 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -731,7 +731,7 @@ xfs_free_eofblocks( error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); if (error) { - ASSERT(XFS_FORCED_SHUTDOWN(mp)); + ASSERT(xfs_is_shutdown(mp)); return error; } @@ -789,7 +789,7 @@ xfs_alloc_file_space( trace_xfs_alloc_file_space(ip); - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; error = xfs_qm_dqattach(ip); @@ -1282,7 +1282,7 @@ xfs_swap_extents_check_format( * If we have to use the (expensive) rmap swap method, we can * handle any number of extents and any format. */ - if (xfs_sb_version_hasrmapbt(&ip->i_mount->m_sb)) + if (xfs_has_rmapbt(ip->i_mount)) return 0; /* @@ -1516,7 +1516,7 @@ xfs_swap_extent_forks( * event of a crash. Set the owner change log flags now and leave the * bmbt scan as the last step. */ - if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) { + if (xfs_has_v3inodes(ip->i_mount)) { if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE) (*target_log_flags) |= XFS_ILOG_DOWNER; if (tip->i_df.if_format == XFS_DINODE_FMT_BTREE) @@ -1553,7 +1553,7 @@ xfs_swap_extent_forks( (*src_log_flags) |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: - ASSERT(!xfs_sb_version_has_v3inode(&ip->i_mount->m_sb) || + ASSERT(!xfs_has_v3inodes(ip->i_mount) || (*src_log_flags & XFS_ILOG_DOWNER)); (*src_log_flags) |= XFS_ILOG_DBROOT; break; @@ -1565,7 +1565,7 @@ xfs_swap_extent_forks( break; case XFS_DINODE_FMT_BTREE: (*target_log_flags) |= XFS_ILOG_DBROOT; - ASSERT(!xfs_sb_version_has_v3inode(&ip->i_mount->m_sb) || + ASSERT(!xfs_has_v3inodes(ip->i_mount) || (*target_log_flags & XFS_ILOG_DOWNER)); break; } @@ -1626,7 +1626,6 @@ xfs_swap_extents( struct xfs_bstat *sbp = &sxp->sx_stat; int src_log_flags, target_log_flags; int error = 0; - int lock_flags; uint64_t f; int resblks = 0; unsigned int flags = 0; @@ -1638,8 +1637,8 @@ xfs_swap_extents( * do the rest of the checks. */ lock_two_nondirectories(VFS_I(ip), VFS_I(tip)); - lock_flags = XFS_MMAPLOCK_EXCL; - xfs_lock_two_inodes(ip, XFS_MMAPLOCK_EXCL, tip, XFS_MMAPLOCK_EXCL); + filemap_invalidate_lock_two(VFS_I(ip)->i_mapping, + VFS_I(tip)->i_mapping); /* Verify that both files have the same format */ if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) { @@ -1679,7 +1678,7 @@ xfs_swap_extents( * a block reservation because it's really just a remap operation * performed with log redo items! */ - if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { + if (xfs_has_rmapbt(mp)) { int w = XFS_DATA_FORK; uint32_t ipnext = ip->i_df.if_nextents; uint32_t tipnext = tip->i_df.if_nextents; @@ -1711,7 +1710,6 @@ xfs_swap_extents( * or cancel will unlock the inodes from this point onwards. */ xfs_lock_two_inodes(ip, XFS_ILOCK_EXCL, tip, XFS_ILOCK_EXCL); - lock_flags |= XFS_ILOCK_EXCL; xfs_trans_ijoin(tp, ip, 0); xfs_trans_ijoin(tp, tip, 0); @@ -1761,7 +1759,7 @@ xfs_swap_extents( src_log_flags = XFS_ILOG_CORE; target_log_flags = XFS_ILOG_CORE; - if (xfs_sb_version_hasrmapbt(&mp->m_sb)) + if (xfs_has_rmapbt(mp)) error = xfs_swap_extent_rmap(&tp, ip, tip); else error = xfs_swap_extent_forks(tp, ip, tip, &src_log_flags, @@ -1780,7 +1778,7 @@ xfs_swap_extents( } /* Swap the cow forks. */ - if (xfs_sb_version_hasreflink(&mp->m_sb)) { + if (xfs_has_reflink(mp)) { ASSERT(!ip->i_cowfp || ip->i_cowfp->if_format == XFS_DINODE_FMT_EXTENTS); ASSERT(!tip->i_cowfp || @@ -1822,7 +1820,7 @@ xfs_swap_extents( * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. */ - if (mp->m_flags & XFS_MOUNT_WSYNC) + if (xfs_has_wsync(mp)) xfs_trans_set_sync(tp); error = xfs_trans_commit(tp); @@ -1830,13 +1828,16 @@ xfs_swap_extents( trace_xfs_swap_extent_after(ip, 0); trace_xfs_swap_extent_after(tip, 1); +out_unlock_ilock: + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_iunlock(tip, XFS_ILOCK_EXCL); out_unlock: - xfs_iunlock(ip, lock_flags); - xfs_iunlock(tip, lock_flags); + filemap_invalidate_unlock_two(VFS_I(ip)->i_mapping, + VFS_I(tip)->i_mapping); unlock_two_nondirectories(VFS_I(ip), VFS_I(tip)); return error; out_trans_cancel: xfs_trans_cancel(tp); - goto out_unlock; + goto out_unlock_ilock; } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 8ff42b3585e0..5fa6cd947dd4 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -251,7 +251,7 @@ _xfs_buf_alloc( return error; } - bp->b_bn = map[0].bm_bn; + bp->b_rhash_key = map[0].bm_bn; bp->b_length = 0; for (i = 0; i < nmaps; i++) { bp->b_maps[i].bm_bn = map[i].bm_bn; @@ -315,7 +315,6 @@ xfs_buf_alloc_kmem( struct xfs_buf *bp, xfs_buf_flags_t flags) { - int align_mask = xfs_buftarg_dma_alignment(bp->b_target); xfs_km_flags_t kmflag_mask = KM_NOFS; size_t size = BBTOB(bp->b_length); @@ -323,7 +322,7 @@ xfs_buf_alloc_kmem( if (!(flags & XBF_READ)) kmflag_mask |= KM_ZERO; - bp->b_addr = kmem_alloc_io(size, align_mask, kmflag_mask); + bp->b_addr = kmem_alloc(size, kmflag_mask); if (!bp->b_addr) return -ENOMEM; @@ -460,7 +459,7 @@ _xfs_buf_obj_cmp( */ BUILD_BUG_ON(offsetof(struct xfs_buf_map, bm_bn) != 0); - if (bp->b_bn != map->bm_bn) + if (bp->b_rhash_key != map->bm_bn) return 1; if (unlikely(bp->b_length != map->bm_len)) { @@ -482,7 +481,7 @@ static const struct rhashtable_params xfs_buf_hash_params = { .min_size = 32, /* empty AGs have minimal footprint */ .nelem_hint = 16, .key_len = sizeof(xfs_daddr_t), - .key_offset = offsetof(struct xfs_buf, b_bn), + .key_offset = offsetof(struct xfs_buf, b_rhash_key), .head_offset = offsetof(struct xfs_buf, b_rhash_head), .automatic_shrinking = true, .obj_cmpfn = _xfs_buf_obj_cmp, @@ -814,7 +813,7 @@ xfs_buf_read_map( * buffer. */ if (error) { - if (!XFS_FORCED_SHUTDOWN(target->bt_mount)) + if (!xfs_is_shutdown(target->bt_mount)) xfs_buf_ioerror_alert(bp, fa); bp->b_flags &= ~XBF_DONE; @@ -844,7 +843,7 @@ xfs_buf_readahead_map( { struct xfs_buf *bp; - if (bdi_read_congested(target->bt_bdev->bd_bdi)) + if (bdi_read_congested(target->bt_bdev->bd_disk->bdi)) return; xfs_buf_read_map(target, map, nmaps, @@ -854,7 +853,9 @@ xfs_buf_readahead_map( /* * Read an uncached buffer from disk. Allocates and returns a locked - * buffer containing the disk contents or nothing. + * buffer containing the disk contents or nothing. Uncached buffers always have + * a cache index of XFS_BUF_DADDR_NULL so we can easily determine if the buffer + * is cached or uncached during fault diagnosis. */ int xfs_buf_read_uncached( @@ -876,7 +877,7 @@ xfs_buf_read_uncached( /* set up the buffer for a read IO */ ASSERT(bp->b_map_count == 1); - bp->b_bn = XFS_BUF_DADDR_NULL; /* always null for uncached buffers */ + bp->b_rhash_key = XFS_BUF_DADDR_NULL; bp->b_maps[0].bm_bn = daddr; bp->b_flags |= XBF_READ; bp->b_ops = ops; @@ -1145,7 +1146,7 @@ xfs_buf_ioerror_permanent( return true; /* At unmount we may treat errors differently */ - if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount) + if (xfs_is_unmounting(mp) && mp->m_fail_unmount) return true; return false; @@ -1179,7 +1180,7 @@ xfs_buf_ioend_handle_error( * If we've already decided to shutdown the filesystem because of I/O * errors, there's no point in giving this a retry. */ - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) goto out_stale; xfs_buf_ioerror_alert_ratelimited(bp); @@ -1336,7 +1337,7 @@ xfs_buf_ioerror_alert( { xfs_buf_alert_ratelimited(bp, "XFS: metadata IO error", "metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d", - func, (uint64_t)XFS_BUF_ADDR(bp), + func, (uint64_t)xfs_buf_daddr(bp), bp->b_length, -bp->b_error); } @@ -1514,17 +1515,18 @@ _xfs_buf_ioapply( SHUTDOWN_CORRUPT_INCORE); return; } - } else if (bp->b_bn != XFS_BUF_DADDR_NULL) { + } else if (bp->b_rhash_key != XFS_BUF_DADDR_NULL) { struct xfs_mount *mp = bp->b_mount; /* * non-crc filesystems don't attach verifiers during * log recovery, so don't warn for such filesystems. */ - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { xfs_warn(mp, "%s: no buf ops on daddr 0x%llx len %d", - __func__, bp->b_bn, bp->b_length); + __func__, xfs_buf_daddr(bp), + bp->b_length); xfs_hex_dump(bp->b_addr, XFS_CORRUPTION_DUMP_LEN); dump_stack(); @@ -1592,7 +1594,7 @@ __xfs_buf_submit( ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); /* on shutdown we stale and complete the buffer immediately */ - if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { + if (xfs_is_shutdown(bp->b_mount)) { xfs_buf_ioend_fail(bp); return -EIO; } @@ -1794,7 +1796,7 @@ xfs_buftarg_drain( xfs_buf_alert_ratelimited(bp, "XFS: Corruption Alert", "Corruption Alert: Buffer at daddr 0x%llx had permanent write failures!", - (long long)bp->b_bn); + (long long)xfs_buf_daddr(bp)); } xfs_buf_rele(bp); } @@ -1809,7 +1811,7 @@ xfs_buftarg_drain( * down the fs. */ if (write_fail) { - ASSERT(XFS_FORCED_SHUTDOWN(btp->bt_mount)); + ASSERT(xfs_is_shutdown(btp->bt_mount)); xfs_alert(btp->bt_mount, "Please run xfs_repair to determine the extent of the problem."); } @@ -2302,7 +2304,7 @@ xfs_verify_magic( struct xfs_mount *mp = bp->b_mount; int idx; - idx = xfs_sb_version_hascrc(&mp->m_sb); + idx = xfs_has_crc(mp); if (WARN_ON(!bp->b_ops || !bp->b_ops->magic[idx])) return false; return dmagic == bp->b_ops->magic[idx]; @@ -2320,7 +2322,7 @@ xfs_verify_magic16( struct xfs_mount *mp = bp->b_mount; int idx; - idx = xfs_sb_version_hascrc(&mp->m_sb); + idx = xfs_has_crc(mp); if (WARN_ON(!bp->b_ops || !bp->b_ops->magic16[idx])) return false; return dmagic == bp->b_ops->magic16[idx]; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 464dc548fa23..6b0200b8007d 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -133,7 +133,8 @@ struct xfs_buf { * fast-path on locking. */ struct rhash_head b_rhash_head; /* pag buffer hash node */ - xfs_daddr_t b_bn; /* block number of buffer */ + + xfs_daddr_t b_rhash_key; /* buffer cache index */ int b_length; /* size of buffer in BBs */ atomic_t b_hold; /* reference count */ atomic_t b_lru_ref; /* lru reclaim ref count */ @@ -296,18 +297,10 @@ extern int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *); extern int xfs_buf_init(void); extern void xfs_buf_terminate(void); -/* - * These macros use the IO block map rather than b_bn. b_bn is now really - * just for the buffer cache index for cached buffers. As IO does not use b_bn - * anymore, uncached buffers do not use b_bn at all and hence must modify the IO - * map directly. Uncached buffers are not allowed to be discontiguous, so this - * is safe to do. - * - * In future, uncached buffers will pass the block number directly to the io - * request function and hence these macros will go away at that point. - */ -#define XFS_BUF_ADDR(bp) ((bp)->b_maps[0].bm_bn) -#define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_maps[0].bm_bn = (xfs_daddr_t)(bno)) +static inline xfs_daddr_t xfs_buf_daddr(struct xfs_buf *bp) +{ + return bp->b_maps[0].bm_bn; +} void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref); @@ -355,12 +348,6 @@ extern int xfs_setsize_buftarg(struct xfs_buftarg *, unsigned int); #define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) #define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) -static inline int -xfs_buftarg_dma_alignment(struct xfs_buftarg *bt) -{ - return queue_dma_alignment(bt->bt_bdev->bd_disk->queue); -} - int xfs_buf_reverify(struct xfs_buf *bp, const struct xfs_buf_ops *ops); bool xfs_verify_magic(struct xfs_buf *bp, __be32 dmagic); bool xfs_verify_magic16(struct xfs_buf *bp, __be16 dmagic); diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 2828ce45b701..b1ab100c09e1 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -428,7 +428,7 @@ xfs_buf_item_format( * occurs during recovery. */ if (bip->bli_flags & XFS_BLI_INODE_BUF) { - if (xfs_sb_version_has_v3inode(&lip->li_mountp->m_sb) || + if (xfs_has_v3inodes(lip->li_mountp) || !((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && xfs_log_item_in_current_chkpt(lip))) bip->__bli_format.blf_flags |= XFS_BLF_INODE_BUF; @@ -581,7 +581,7 @@ xfs_buf_item_push( if (bp->b_flags & XBF_WRITE_FAIL) { xfs_buf_alert_ratelimited(bp, "XFS: Failing async write", "Failing async write on buffer block 0x%llx. Retrying async write.", - (long long)bp->b_bn); + (long long)xfs_buf_daddr(bp)); } if (!xfs_buf_delwri_queue(bp, buffer_list)) @@ -616,7 +616,7 @@ xfs_buf_item_put( * that case, the bli is freed on buffer writeback completion. */ aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags) || - XFS_FORCED_SHUTDOWN(lip->li_mountp); + xfs_is_shutdown(lip->li_mountp); dirty = bip->bli_flags & XFS_BLI_DIRTY; if (dirty && !aborted) return false; diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c index 4775485b4062..a476c7ef5d53 100644 --- a/fs/xfs/xfs_buf_item_recover.c +++ b/fs/xfs/xfs_buf_item_recover.c @@ -219,7 +219,7 @@ xlog_recover_validate_buf_type( * inconsistent state resulting in verification failures. Hence for now * just avoid the verification stage for non-crc filesystems */ - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_has_crc(mp)) return; magic32 = be32_to_cpu(*(__be32 *)bp->b_addr); @@ -497,7 +497,7 @@ xlog_recover_do_reg_buffer( if (fa) { xfs_alert(mp, "dquot corrupt at %pS trying to replay into block 0x%llx", - fa, bp->b_bn); + fa, xfs_buf_daddr(bp)); goto next; } } @@ -597,7 +597,7 @@ xlog_recover_do_inode_buffer( * Post recovery validation only works properly on CRC enabled * filesystems. */ - if (xfs_sb_version_hascrc(&mp->m_sb)) + if (xfs_has_crc(mp)) bp->b_ops = &xfs_inode_buf_ops; inodes_per_buf = BBTOB(bp->b_length) >> mp->m_sb.sb_inodelog; @@ -710,7 +710,7 @@ xlog_recover_get_buf_lsn( uint16_t blft; /* v4 filesystems always recover immediately */ - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_has_crc(mp)) goto recover_immediately; /* @@ -787,7 +787,7 @@ xlog_recover_get_buf_lsn( * the relevant UUID in the superblock. */ lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn); - if (xfs_sb_version_hasmetauuid(&mp->m_sb)) + if (xfs_has_metauuid(mp)) uuid = &((struct xfs_dsb *)blk)->sb_meta_uuid; else uuid = &((struct xfs_dsb *)blk)->sb_uuid; diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index da1cc683560c..8310005af00f 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -32,7 +32,7 @@ xfs_dir3_get_dtype( struct xfs_mount *mp, uint8_t filetype) { - if (!xfs_sb_version_hasftype(&mp->m_sb)) + if (!xfs_has_ftype(mp)) return DT_UNKNOWN; if (filetype >= XFS_DIR3_FT_MAX) @@ -512,7 +512,7 @@ xfs_readdir( trace_xfs_readdir(dp); - if (XFS_FORCED_SHUTDOWN(dp->i_mount)) + if (xfs_is_shutdown(dp->i_mount)) return -EIO; ASSERT(S_ISDIR(VFS_I(dp)->i_mode)); diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 736df5660f1f..0191de8ce9ce 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -169,7 +169,7 @@ xfs_ioc_trim( * We haven't recovered the log, so we cannot use our bnobt-guided * storage zapping commands. */ - if (mp->m_flags & XFS_MOUNT_NORECOVERY) + if (xfs_has_norecovery(mp)) return -EROFS; if (copy_from_user(&range, urange, sizeof(range))) diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index ecd5059d6928..c15d61d47a06 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -223,9 +223,9 @@ xfs_qm_init_dquot_blk( d->dd_diskdq.d_version = XFS_DQUOT_VERSION; d->dd_diskdq.d_id = cpu_to_be32(curid); d->dd_diskdq.d_type = type; - if (curid > 0 && xfs_sb_version_hasbigtime(&mp->m_sb)) + if (curid > 0 && xfs_has_bigtime(mp)) d->dd_diskdq.d_type |= XFS_DQTYPE_BIGTIME; - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { uuid_copy(&d->dd_uuid, &mp->m_sb.sb_meta_uuid); xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk), XFS_DQUOT_CRC_OFF); @@ -526,7 +526,7 @@ xfs_dquot_check_type( * expect an exact match for user dquots and for non-root group and * project dquots. */ - if (xfs_sb_version_hascrc(&dqp->q_mount->m_sb) || + if (xfs_has_crc(dqp->q_mount) || dqp_type == XFS_DQTYPE_USER || dqp->q_id != 0) return ddqp_type == dqp_type; @@ -847,9 +847,6 @@ xfs_qm_dqget_checks( struct xfs_mount *mp, xfs_dqtype_t type) { - if (WARN_ON_ONCE(!XFS_IS_QUOTA_RUNNING(mp))) - return -ESRCH; - switch (type) { case XFS_DQTYPE_USER: if (!XFS_IS_UQUOTA_ON(mp)) @@ -1222,7 +1219,7 @@ xfs_qm_dqflush_check( /* bigtime flag should never be set on root dquots */ if (dqp->q_type & XFS_DQTYPE_BIGTIME) { - if (!xfs_sb_version_hasbigtime(&dqp->q_mount->m_sb)) + if (!xfs_has_bigtime(dqp->q_mount)) return __this_address; if (dqp->q_id == 0) return __this_address; @@ -1301,7 +1298,7 @@ xfs_qm_dqflush( * buffer always has a valid CRC. This ensures there is no possibility * of a dquot without an up-to-date CRC getting to disk. */ - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { dqblk->dd_lsn = cpu_to_be64(dqp->q_logitem.qli_item.li_lsn); xfs_update_cksum((char *)dqblk, sizeof(struct xfs_dqblk), XFS_DQUOT_CRC_OFF); diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index f642884a6834..6b5e3cf40c8b 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h @@ -54,6 +54,16 @@ struct xfs_dquot_res { xfs_qwarncnt_t warnings; }; +static inline bool +xfs_dquot_res_over_limits( + const struct xfs_dquot_res *qres) +{ + if ((qres->softlimit && qres->softlimit < qres->reserved) || + (qres->hardlimit && qres->hardlimit < qres->reserved)) + return true; + return false; +} + /* * The incore dquot structure */ diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 8ed47b739b6c..6a1aae799cf1 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -218,137 +218,3 @@ xfs_qm_dquot_logitem_init( &xfs_dquot_item_ops); lp->qli_dquot = dqp; } - -/*------------------ QUOTAOFF LOG ITEMS -------------------*/ - -static inline struct xfs_qoff_logitem *QOFF_ITEM(struct xfs_log_item *lip) -{ - return container_of(lip, struct xfs_qoff_logitem, qql_item); -} - - -/* - * This returns the number of iovecs needed to log the given quotaoff item. - * We only need 1 iovec for an quotaoff item. It just logs the - * quotaoff_log_format structure. - */ -STATIC void -xfs_qm_qoff_logitem_size( - struct xfs_log_item *lip, - int *nvecs, - int *nbytes) -{ - *nvecs += 1; - *nbytes += sizeof(struct xfs_qoff_logitem); -} - -STATIC void -xfs_qm_qoff_logitem_format( - struct xfs_log_item *lip, - struct xfs_log_vec *lv) -{ - struct xfs_qoff_logitem *qflip = QOFF_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; - struct xfs_qoff_logformat *qlf; - - qlf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_QUOTAOFF); - qlf->qf_type = XFS_LI_QUOTAOFF; - qlf->qf_size = 1; - qlf->qf_flags = qflip->qql_flags; - xlog_finish_iovec(lv, vecp, sizeof(struct xfs_qoff_logitem)); -} - -/* - * There isn't much you can do to push a quotaoff item. It is simply - * stuck waiting for the log to be flushed to disk. - */ -STATIC uint -xfs_qm_qoff_logitem_push( - struct xfs_log_item *lip, - struct list_head *buffer_list) -{ - return XFS_ITEM_LOCKED; -} - -STATIC xfs_lsn_t -xfs_qm_qoffend_logitem_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ - struct xfs_qoff_logitem *qfe = QOFF_ITEM(lip); - struct xfs_qoff_logitem *qfs = qfe->qql_start_lip; - - xfs_qm_qoff_logitem_relse(qfs); - - kmem_free(lip->li_lv_shadow); - kmem_free(qfe); - return (xfs_lsn_t)-1; -} - -STATIC void -xfs_qm_qoff_logitem_release( - struct xfs_log_item *lip) -{ - struct xfs_qoff_logitem *qoff = QOFF_ITEM(lip); - - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) { - if (qoff->qql_start_lip) - xfs_qm_qoff_logitem_relse(qoff->qql_start_lip); - xfs_qm_qoff_logitem_relse(qoff); - } -} - -static const struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { - .iop_size = xfs_qm_qoff_logitem_size, - .iop_format = xfs_qm_qoff_logitem_format, - .iop_committed = xfs_qm_qoffend_logitem_committed, - .iop_push = xfs_qm_qoff_logitem_push, - .iop_release = xfs_qm_qoff_logitem_release, -}; - -static const struct xfs_item_ops xfs_qm_qoff_logitem_ops = { - .iop_size = xfs_qm_qoff_logitem_size, - .iop_format = xfs_qm_qoff_logitem_format, - .iop_push = xfs_qm_qoff_logitem_push, - .iop_release = xfs_qm_qoff_logitem_release, -}; - -/* - * Delete the quotaoff intent from the AIL and free it. On success, - * this should only be called for the start item. It can be used for - * either on shutdown or abort. - */ -void -xfs_qm_qoff_logitem_relse( - struct xfs_qoff_logitem *qoff) -{ - struct xfs_log_item *lip = &qoff->qql_item; - - ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags) || - test_bit(XFS_LI_ABORTED, &lip->li_flags) || - XFS_FORCED_SHUTDOWN(lip->li_mountp)); - xfs_trans_ail_delete(lip, 0); - kmem_free(lip->li_lv_shadow); - kmem_free(qoff); -} - -/* - * Allocate and initialize an quotaoff item of the correct quota type(s). - */ -struct xfs_qoff_logitem * -xfs_qm_qoff_logitem_init( - struct xfs_mount *mp, - struct xfs_qoff_logitem *start, - uint flags) -{ - struct xfs_qoff_logitem *qf; - - qf = kmem_zalloc(sizeof(struct xfs_qoff_logitem), 0); - - xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ? - &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops); - qf->qql_item.li_mountp = mp; - qf->qql_start_lip = start; - qf->qql_flags = flags; - return qf; -} diff --git a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h index 2b86a43d7ce2..794710c24474 100644 --- a/fs/xfs/xfs_dquot_item.h +++ b/fs/xfs/xfs_dquot_item.h @@ -9,7 +9,6 @@ struct xfs_dquot; struct xfs_trans; struct xfs_mount; -struct xfs_qoff_logitem; struct xfs_dq_logitem { struct xfs_log_item qli_item; /* common portion */ @@ -17,22 +16,6 @@ struct xfs_dq_logitem { xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ }; -struct xfs_qoff_logitem { - struct xfs_log_item qql_item; /* common portion */ - struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */ - unsigned int qql_flags; -}; - - void xfs_qm_dquot_logitem_init(struct xfs_dquot *dqp); -struct xfs_qoff_logitem *xfs_qm_qoff_logitem_init(struct xfs_mount *mp, - struct xfs_qoff_logitem *start, - uint flags); -void xfs_qm_qoff_logitem_relse(struct xfs_qoff_logitem *); -struct xfs_qoff_logitem *xfs_trans_get_qoff_item(struct xfs_trans *tp, - struct xfs_qoff_logitem *startqoff, - uint flags); -void xfs_trans_log_quotaoff_item(struct xfs_trans *tp, - struct xfs_qoff_logitem *qlp); #endif /* __XFS_DQUOT_ITEM_H__ */ diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c index 5875c7e1bd28..8966ba842395 100644 --- a/fs/xfs/xfs_dquot_item_recover.c +++ b/fs/xfs/xfs_dquot_item_recover.c @@ -136,7 +136,7 @@ xlog_recover_dquot_commit_pass2( * If the dquot has an LSN in it, recover the dquot only if it's less * than the lsn of the transaction we are replaying. */ - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddq; xfs_lsn_t lsn = be64_to_cpu(dqb->dd_lsn); @@ -146,7 +146,7 @@ xlog_recover_dquot_commit_pass2( } memcpy(ddq, recddq, item->ri_buf[1].i_len); - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk), XFS_DQUOT_CRC_OFF); } diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index ce3bc1b291a1..81c445e9489b 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -371,7 +371,7 @@ xfs_buf_corruption_error( xfs_alert_tag(mp, XFS_PTAG_VERIFIER_ERROR, "Metadata corruption detected at %pS, %s block 0x%llx", - fa, bp->b_ops->name, bp->b_bn); + fa, bp->b_ops->name, xfs_buf_daddr(bp)); xfs_alert(mp, "Unmount and run xfs_repair"); @@ -402,7 +402,7 @@ xfs_buf_verifier_error( xfs_alert_tag(mp, XFS_PTAG_VERIFIER_ERROR, "Metadata %s detected at %pS, %s block 0x%llx %s", bp->b_error == -EFSBADCRC ? "CRC error" : "corruption", - fa, bp->b_ops->name, bp->b_bn, name); + fa, bp->b_ops->name, xfs_buf_daddr(bp), name); xfs_alert(mp, "Unmount and run xfs_repair"); diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 1717b7508356..5735d5ea87ee 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -75,4 +75,16 @@ extern int xfs_errortag_clearall(struct xfs_mount *mp); #define XFS_PTAG_FSBLOCK_ZERO 0x00000080 #define XFS_PTAG_VERIFIER_ERROR 0x00000100 +#define XFS_PTAG_STRINGS \ + { XFS_NO_PTAG, "none" }, \ + { XFS_PTAG_IFLUSH, "iflush" }, \ + { XFS_PTAG_LOGRES, "logres" }, \ + { XFS_PTAG_AILDELETE, "aildelete" }, \ + { XFS_PTAG_ERROR_REPORT , "error_report" }, \ + { XFS_PTAG_SHUTDOWN_CORRUPT, "corrupt" }, \ + { XFS_PTAG_SHUTDOWN_IOERROR, "ioerror" }, \ + { XFS_PTAG_SHUTDOWN_LOGERROR, "logerror" }, \ + { XFS_PTAG_FSBLOCK_ZERO, "fsb_zero" }, \ + { XFS_PTAG_VERIFIER_ERROR, "verifier" } + #endif /* __XFS_ERROR_H__ */ diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 1da59bdff245..1064c2342876 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -44,6 +44,7 @@ xfs_fs_encode_fh( int *max_len, struct inode *parent) { + struct xfs_mount *mp = XFS_M(inode->i_sb); struct fid *fid = (struct fid *)fh; struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fh; int fileid_type; @@ -63,8 +64,7 @@ xfs_fs_encode_fh( * large enough filesystem may contain them, thus the slightly * confusing looking conditional below. */ - if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) || - (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES)) + if (!xfs_has_small_inums(mp) || xfs_is_inode32(mp)) fileid_type |= XFS_FILEID_TYPE_64FLAG; /* diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 2424230ca2c3..3f8a0713573a 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -629,6 +629,9 @@ xfs_efi_item_recover( error = xfs_trans_free_extent(tp, efdp, extp->ext_start, extp->ext_len, &XFS_RMAP_OINFO_ANY_OWNER, false); + if (error == -EFSCORRUPTED) + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + extp, sizeof(*extp)); if (error) goto abort_error; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index cc3cfb12df53..7aa943edfc02 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -185,7 +185,7 @@ xfs_file_fsync( if (error) return error; - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; xfs_iflags_clear(ip, XFS_ITRUNCATED); @@ -318,7 +318,7 @@ xfs_file_read_iter( XFS_STATS_INC(mp, xs_read_calls); - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; if (IS_DAX(inode)) @@ -462,7 +462,7 @@ xfs_dio_write_end_io( trace_xfs_end_io_direct_write(ip, offset, size); - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + if (xfs_is_shutdown(ip->i_mount)) return -EIO; if (error) @@ -814,7 +814,7 @@ xfs_file_write_iter( if (ocount == 0) return 0; - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + if (xfs_is_shutdown(ip->i_mount)) return -EIO; if (IS_DAX(inode)) @@ -1122,7 +1122,7 @@ static inline bool xfs_file_sync_writes(struct file *filp) { struct xfs_inode *ip = XFS_I(file_inode(filp)); - if (ip->i_mount->m_flags & XFS_MOUNT_WSYNC) + if (xfs_has_wsync(ip->i_mount)) return true; if (filp->f_flags & (__O_SYNC | O_DSYNC)) return true; @@ -1153,10 +1153,10 @@ xfs_file_remap_range( if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) return -EINVAL; - if (!xfs_sb_version_hasreflink(&mp->m_sb)) + if (!xfs_has_reflink(mp)) return -EOPNOTSUPP; - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; /* Prepare and then clone file data. */ @@ -1205,7 +1205,7 @@ xfs_file_open( { if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) return -EFBIG; - if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb))) + if (xfs_is_shutdown(XFS_M(inode->i_sb))) return -EIO; file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; return 0; @@ -1277,7 +1277,7 @@ xfs_file_llseek( { struct inode *inode = file->f_mapping->host; - if (XFS_FORCED_SHUTDOWN(XFS_I(inode)->i_mount)) + if (xfs_is_shutdown(XFS_I(inode)->i_mount)) return -EIO; switch (whence) { @@ -1302,7 +1302,7 @@ xfs_file_llseek( * * mmap_lock (MM) * sb_start_pagefault(vfs, freeze) - * i_mmaplock (XFS - truncate serialisation) + * invalidate_lock (vfs/XFS_MMAPLOCK - truncate serialisation) * page_lock (MM) * i_lock (XFS - extent map serialisation) */ @@ -1323,24 +1323,27 @@ __xfs_filemap_fault( file_update_time(vmf->vma->vm_file); } - xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); if (IS_DAX(inode)) { pfn_t pfn; + xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL, (write_fault && !vmf->cow_page) ? &xfs_direct_write_iomap_ops : &xfs_read_iomap_ops); if (ret & VM_FAULT_NEEDDSYNC) ret = dax_finish_sync_fault(vmf, pe_size, pfn); + xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); } else { - if (write_fault) + if (write_fault) { + xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); ret = iomap_page_mkwrite(vmf, &xfs_buffered_write_iomap_ops); - else + xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); + } else { ret = filemap_fault(vmf); + } } - xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); if (write_fault) sb_end_pagefault(inode->i_sb); diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index eed6ca5f8f91..6a3ce0f6dc9e 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -295,7 +295,7 @@ xfs_filestream_lookup_ag( * Set the starting AG using the rotor for inode32, otherwise * use the directory inode's AG. */ - if (mp->m_flags & XFS_MOUNT_32BITINODES) { + if (xfs_is_inode32(mp)) { xfs_agnumber_t rotorstep = xfs_rotorstep; startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount; mp->m_agfrotor = (mp->m_agfrotor + 1) % diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h index 3af963743e4d..403226ebb80b 100644 --- a/fs/xfs/xfs_filestream.h +++ b/fs/xfs/xfs_filestream.h @@ -21,7 +21,7 @@ static inline int xfs_inode_is_filestream( struct xfs_inode *ip) { - return (ip->i_mount->m_flags & XFS_MOUNT_FILESTREAMS) || + return xfs_has_filestreams(ip->i_mount) || (ip->i_diflags & XFS_DIFLAG_FILESTREAM); } diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index 7d0b09c1366e..48287caad28b 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -61,7 +61,7 @@ xfs_fsmap_to_internal( static int xfs_fsmap_owner_to_rmap( struct xfs_rmap_irec *dest, - struct xfs_fsmap *src) + const struct xfs_fsmap *src) { if (!(src->fmr_flags & FMR_OF_SPECIAL_OWNER)) { dest->rm_owner = src->fmr_owner; @@ -111,8 +111,8 @@ xfs_fsmap_owner_to_rmap( /* Convert an rmapbt owner into an fsmap owner. */ static int xfs_fsmap_owner_from_rmap( - struct xfs_fsmap *dest, - struct xfs_rmap_irec *src) + struct xfs_fsmap *dest, + const struct xfs_rmap_irec *src) { dest->fmr_flags = 0; if (!XFS_RMAP_NON_INODE_OWNER(src->rm_owner)) { @@ -171,7 +171,7 @@ struct xfs_getfsmap_info { struct xfs_getfsmap_dev { u32 dev; int (*fn)(struct xfs_trans *tp, - struct xfs_fsmap *keys, + const struct xfs_fsmap *keys, struct xfs_getfsmap_info *info); }; @@ -192,7 +192,7 @@ STATIC int xfs_getfsmap_is_shared( struct xfs_trans *tp, struct xfs_getfsmap_info *info, - struct xfs_rmap_irec *rec, + const struct xfs_rmap_irec *rec, bool *stat) { struct xfs_mount *mp = tp->t_mountp; @@ -202,7 +202,7 @@ xfs_getfsmap_is_shared( int error; *stat = false; - if (!xfs_sb_version_hasreflink(&mp->m_sb)) + if (!xfs_has_reflink(mp)) return 0; /* rt files will have no perag structure */ if (!info->pag) @@ -245,7 +245,7 @@ STATIC int xfs_getfsmap_helper( struct xfs_trans *tp, struct xfs_getfsmap_info *info, - struct xfs_rmap_irec *rec, + const struct xfs_rmap_irec *rec, xfs_daddr_t rec_daddr) { struct xfs_fsmap fmr; @@ -347,7 +347,7 @@ out: STATIC int xfs_getfsmap_datadev_helper( struct xfs_btree_cur *cur, - struct xfs_rmap_irec *rec, + const struct xfs_rmap_irec *rec, void *priv) { struct xfs_mount *mp = cur->bc_mp; @@ -365,7 +365,7 @@ xfs_getfsmap_datadev_helper( STATIC int xfs_getfsmap_datadev_bnobt_helper( struct xfs_btree_cur *cur, - struct xfs_alloc_rec_incore *rec, + const struct xfs_alloc_rec_incore *rec, void *priv) { struct xfs_mount *mp = cur->bc_mp; @@ -389,7 +389,7 @@ xfs_getfsmap_datadev_bnobt_helper( static void xfs_getfsmap_set_irec_flags( struct xfs_rmap_irec *irec, - struct xfs_fsmap *fmr) + const struct xfs_fsmap *fmr) { irec->rm_flags = 0; if (fmr->fmr_flags & FMR_OF_ATTR_FORK) @@ -404,7 +404,7 @@ xfs_getfsmap_set_irec_flags( STATIC int xfs_getfsmap_logdev( struct xfs_trans *tp, - struct xfs_fsmap *keys, + const struct xfs_fsmap *keys, struct xfs_getfsmap_info *info) { struct xfs_mount *mp = tp->t_mountp; @@ -451,7 +451,7 @@ xfs_getfsmap_logdev( STATIC int xfs_getfsmap_rtdev_rtbitmap_helper( struct xfs_trans *tp, - struct xfs_rtalloc_rec *rec, + const struct xfs_rtalloc_rec *rec, void *priv) { struct xfs_mount *mp = tp->t_mountp; @@ -473,7 +473,7 @@ xfs_getfsmap_rtdev_rtbitmap_helper( STATIC int __xfs_getfsmap_rtdev( struct xfs_trans *tp, - struct xfs_fsmap *keys, + const struct xfs_fsmap *keys, int (*query_fn)(struct xfs_trans *, struct xfs_getfsmap_info *), struct xfs_getfsmap_info *info) @@ -481,16 +481,14 @@ __xfs_getfsmap_rtdev( struct xfs_mount *mp = tp->t_mountp; xfs_fsblock_t start_fsb; xfs_fsblock_t end_fsb; - xfs_daddr_t eofs; + uint64_t eofs; int error = 0; eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); if (keys[0].fmr_physical >= eofs) return 0; - if (keys[1].fmr_physical >= eofs) - keys[1].fmr_physical = eofs - 1; start_fsb = XFS_BB_TO_FSBT(mp, keys[0].fmr_physical); - end_fsb = XFS_BB_TO_FSB(mp, keys[1].fmr_physical); + end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical)); /* Set up search keys */ info->low.rm_startblock = start_fsb; @@ -523,27 +521,37 @@ xfs_getfsmap_rtdev_rtbitmap_query( { struct xfs_rtalloc_rec alow = { 0 }; struct xfs_rtalloc_rec ahigh = { 0 }; + struct xfs_mount *mp = tp->t_mountp; int error; - xfs_ilock(tp->t_mountp->m_rbmip, XFS_ILOCK_SHARED); + xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED); + /* + * Set up query parameters to return free rtextents covering the range + * we want. + */ alow.ar_startext = info->low.rm_startblock; ahigh.ar_startext = info->high.rm_startblock; - do_div(alow.ar_startext, tp->t_mountp->m_sb.sb_rextsize); - if (do_div(ahigh.ar_startext, tp->t_mountp->m_sb.sb_rextsize)) + do_div(alow.ar_startext, mp->m_sb.sb_rextsize); + if (do_div(ahigh.ar_startext, mp->m_sb.sb_rextsize)) ahigh.ar_startext++; error = xfs_rtalloc_query_range(tp, &alow, &ahigh, xfs_getfsmap_rtdev_rtbitmap_helper, info); if (error) goto err; - /* Report any gaps at the end of the rtbitmap */ + /* + * Report any gaps at the end of the rtbitmap by simulating a null + * rmap starting at the block after the end of the query range. + */ info->last = true; + ahigh.ar_startext = min(mp->m_sb.sb_rextents, ahigh.ar_startext); + error = xfs_getfsmap_rtdev_rtbitmap_helper(tp, &ahigh, info); if (error) goto err; err: - xfs_iunlock(tp->t_mountp->m_rbmip, XFS_ILOCK_SHARED); + xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED); return error; } @@ -551,7 +559,7 @@ err: STATIC int xfs_getfsmap_rtdev_rtbitmap( struct xfs_trans *tp, - struct xfs_fsmap *keys, + const struct xfs_fsmap *keys, struct xfs_getfsmap_info *info) { info->missing_owner = XFS_FMR_OWN_UNKNOWN; @@ -564,7 +572,7 @@ xfs_getfsmap_rtdev_rtbitmap( STATIC int __xfs_getfsmap_datadev( struct xfs_trans *tp, - struct xfs_fsmap *keys, + const struct xfs_fsmap *keys, struct xfs_getfsmap_info *info, int (*query_fn)(struct xfs_trans *, struct xfs_getfsmap_info *, @@ -579,16 +587,14 @@ __xfs_getfsmap_datadev( xfs_fsblock_t end_fsb; xfs_agnumber_t start_ag; xfs_agnumber_t end_ag; - xfs_daddr_t eofs; + uint64_t eofs; int error = 0; eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); if (keys[0].fmr_physical >= eofs) return 0; - if (keys[1].fmr_physical >= eofs) - keys[1].fmr_physical = eofs - 1; start_fsb = XFS_DADDR_TO_FSB(mp, keys[0].fmr_physical); - end_fsb = XFS_DADDR_TO_FSB(mp, keys[1].fmr_physical); + end_fsb = XFS_DADDR_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical)); /* * Convert the fsmap low/high keys to AG based keys. Initialize @@ -716,7 +722,7 @@ xfs_getfsmap_datadev_rmapbt_query( STATIC int xfs_getfsmap_datadev_rmapbt( struct xfs_trans *tp, - struct xfs_fsmap *keys, + const struct xfs_fsmap *keys, struct xfs_getfsmap_info *info) { info->missing_owner = XFS_FMR_OWN_FREE; @@ -751,7 +757,7 @@ xfs_getfsmap_datadev_bnobt_query( STATIC int xfs_getfsmap_datadev_bnobt( struct xfs_trans *tp, - struct xfs_fsmap *keys, + const struct xfs_fsmap *keys, struct xfs_getfsmap_info *info) { struct xfs_alloc_rec_incore akeys[2]; @@ -859,7 +865,7 @@ xfs_getfsmap( return -EINVAL; use_rmap = capable(CAP_SYS_ADMIN) && - xfs_sb_version_hasrmapbt(&mp->m_sb); + xfs_has_rmapbt(mp); head->fmh_entries = 0; /* Set up our device handlers. */ diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 6ed29b158312..33e26690a8c4 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -19,6 +19,7 @@ #include "xfs_log.h" #include "xfs_ag.h" #include "xfs_ag_resv.h" +#include "xfs_trace.h" /* * Write new AG headers to disk. Non-transactional, but need to be @@ -177,7 +178,7 @@ xfs_growfs_data_private( * particularly important for shrink because the write verifier * will fail if sb_fdblocks is ever larger than sb_dblocks. */ - if (xfs_sb_version_haslazysbcount(&mp->m_sb)) + if (xfs_has_lazysbcount(mp)) xfs_log_sb(tp); xfs_trans_set_sync(tp); @@ -511,6 +512,11 @@ xfs_fs_goingdown( * consistent. We don't do an unmount here; just shutdown the shop, make sure * that absolutely nothing persistent happens to this filesystem after this * point. + * + * The shutdown state change is atomic, resulting in the first and only the + * first shutdown call processing the shutdown. This means we only shutdown the + * log once as it requires, and we don't spam the logs when multiple concurrent + * shutdowns race to set the shutdown flags. */ void xfs_do_force_shutdown( @@ -519,48 +525,37 @@ xfs_do_force_shutdown( char *fname, int lnnum) { - bool logerror = flags & SHUTDOWN_LOG_IO_ERROR; - - /* - * No need to duplicate efforts. - */ - if (XFS_FORCED_SHUTDOWN(mp) && !logerror) - return; - - /* - * This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't - * queue up anybody new on the log reservations, and wakes up - * everybody who's sleeping on log reservations to tell them - * the bad news. - */ - if (xfs_log_force_umount(mp, logerror)) - return; + int tag; + const char *why; - if (flags & SHUTDOWN_FORCE_UMOUNT) { - xfs_alert(mp, -"User initiated shutdown (0x%x) received. Shutting down filesystem", - flags); + if (test_and_set_bit(XFS_OPSTATE_SHUTDOWN, &mp->m_opstate)) return; - } - - if (flags & SHUTDOWN_CORRUPT_INCORE) { - xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT, -"Corruption of in-memory data (0x%x) detected at %pS (%s:%d). Shutting down filesystem", - flags, __return_address, fname, lnnum); - if (XFS_ERRLEVEL_HIGH <= xfs_error_level) - xfs_stack_trace(); - } else if (logerror) { - xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR, -"Log I/O error (0x%x) detected at %pS (%s:%d). Shutting down filesystem", - flags, __return_address, fname, lnnum); + if (mp->m_sb_bp) + mp->m_sb_bp->b_flags |= XBF_DONE; + + if (flags & SHUTDOWN_FORCE_UMOUNT) + xfs_alert(mp, "User initiated shutdown received."); + + if (xlog_force_shutdown(mp->m_log, flags)) { + tag = XFS_PTAG_SHUTDOWN_LOGERROR; + why = "Log I/O Error"; + } else if (flags & SHUTDOWN_CORRUPT_INCORE) { + tag = XFS_PTAG_SHUTDOWN_CORRUPT; + why = "Corruption of in-memory data"; } else { - xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR, -"I/O error (0x%x) detected at %pS (%s:%d). Shutting down filesystem", - flags, __return_address, fname, lnnum); + tag = XFS_PTAG_SHUTDOWN_IOERROR; + why = "Metadata I/O Error"; } + trace_xfs_force_shutdown(mp, tag, flags, fname, lnnum); + + xfs_alert_tag(mp, tag, +"%s (0x%x) detected at %pS (%s:%d). Shutting down filesystem.", + why, flags, __return_address, fname, lnnum); xfs_alert(mp, "Please unmount the filesystem and rectify the problem(s)"); + if (xfs_error_level >= XFS_ERRLEVEL_HIGH) + xfs_stack_trace(); } /* diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c index eb10eacabc8f..72a075bb2c10 100644 --- a/fs/xfs/xfs_health.c +++ b/fs/xfs/xfs_health.c @@ -30,7 +30,7 @@ xfs_health_unmount( unsigned int checked = 0; bool warn = false; - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return; /* Measure AG corruption levels. */ diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 6007683482c6..f2210d927481 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -38,23 +38,11 @@ * radix tree tags when convenient. Avoid existing XFS_IWALK namespace. */ enum xfs_icwalk_goal { - /* Goals that are not related to tags; these must be < 0. */ - XFS_ICWALK_DQRELE = -1, - /* Goals directly associated with tagged inodes. */ XFS_ICWALK_BLOCKGC = XFS_ICI_BLOCKGC_TAG, XFS_ICWALK_RECLAIM = XFS_ICI_RECLAIM_TAG, }; -#define XFS_ICWALK_NULL_TAG (-1U) - -/* Compute the inode radix tree tag for this goal. */ -static inline unsigned int -xfs_icwalk_tag(enum xfs_icwalk_goal goal) -{ - return goal < 0 ? XFS_ICWALK_NULL_TAG : goal; -} - static int xfs_icwalk(struct xfs_mount *mp, enum xfs_icwalk_goal goal, struct xfs_icwalk *icw); static int xfs_icwalk_ag(struct xfs_perag *pag, @@ -64,9 +52,6 @@ static int xfs_icwalk_ag(struct xfs_perag *pag, * Private inode cache walk flags for struct xfs_icwalk. Must not * coincide with XFS_ICWALK_FLAGS_VALID. */ -#define XFS_ICWALK_FLAG_DROP_UDQUOT (1U << 31) -#define XFS_ICWALK_FLAG_DROP_GDQUOT (1U << 30) -#define XFS_ICWALK_FLAG_DROP_PDQUOT (1U << 29) /* Stop scanning after icw_scan_limit inodes. */ #define XFS_ICWALK_FLAG_SCAN_LIMIT (1U << 28) @@ -74,10 +59,7 @@ static int xfs_icwalk_ag(struct xfs_perag *pag, #define XFS_ICWALK_FLAG_RECLAIM_SICK (1U << 27) #define XFS_ICWALK_FLAG_UNION (1U << 26) /* union filter algorithm */ -#define XFS_ICWALK_PRIVATE_FLAGS (XFS_ICWALK_FLAG_DROP_UDQUOT | \ - XFS_ICWALK_FLAG_DROP_GDQUOT | \ - XFS_ICWALK_FLAG_DROP_PDQUOT | \ - XFS_ICWALK_FLAG_SCAN_LIMIT | \ +#define XFS_ICWALK_PRIVATE_FLAGS (XFS_ICWALK_FLAG_SCAN_LIMIT | \ XFS_ICWALK_FLAG_RECLAIM_SICK | \ XFS_ICWALK_FLAG_UNION) @@ -102,8 +84,9 @@ xfs_inode_alloc( return NULL; } - /* VFS doesn't initialise i_mode! */ + /* VFS doesn't initialise i_mode or i_state! */ VFS_I(ip)->i_mode = 0; + VFS_I(ip)->i_state = 0; XFS_STATS_INC(mp, vn_active); ASSERT(atomic_read(&ip->i_pincount) == 0); @@ -220,9 +203,14 @@ static inline void xfs_blockgc_queue( struct xfs_perag *pag) { + struct xfs_mount *mp = pag->pag_mount; + + if (!xfs_is_blockgc_enabled(mp)) + return; + rcu_read_lock(); if (radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_BLOCKGC_TAG)) - queue_delayed_work(pag->pag_mount->m_gc_workqueue, + queue_delayed_work(pag->pag_mount->m_blockgc_wq, &pag->pag_blockgc_work, msecs_to_jiffies(xfs_blockgc_secs * 1000)); rcu_read_unlock(); @@ -301,31 +289,6 @@ xfs_perag_clear_inode_tag( trace_xfs_perag_clear_inode_tag(mp, pag->pag_agno, tag, _RET_IP_); } -/* - * We set the inode flag atomically with the radix tree tag. - * Once we get tag lookups on the radix tree, this inode flag - * can go away. - */ -void -xfs_inode_mark_reclaimable( - struct xfs_inode *ip) -{ - struct xfs_mount *mp = ip->i_mount; - struct xfs_perag *pag; - - pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); - spin_lock(&pag->pag_ici_lock); - spin_lock(&ip->i_flags_lock); - - xfs_perag_set_inode_tag(pag, XFS_INO_TO_AGINO(mp, ip->i_ino), - XFS_ICI_RECLAIM_TAG); - __xfs_iflags_set(ip, XFS_IRECLAIMABLE); - - spin_unlock(&ip->i_flags_lock); - spin_unlock(&pag->pag_ici_lock); - xfs_perag_put(pag); -} - static inline void xfs_inew_wait( struct xfs_inode *ip) @@ -484,6 +447,21 @@ xfs_iget_check_free_state( return 0; } +/* Make all pending inactivation work start immediately. */ +static void +xfs_inodegc_queue_all( + struct xfs_mount *mp) +{ + struct xfs_inodegc *gc; + int cpu; + + for_each_online_cpu(cpu) { + gc = per_cpu_ptr(mp->m_inodegc, cpu); + if (!llist_empty(&gc->list)) + queue_work_on(cpu, mp->m_inodegc_wq, &gc->work); + } +} + /* * Check the validity of the inode we just found it the cache */ @@ -516,13 +494,30 @@ xfs_iget_cache_hit( * reclaimable state, wait for the initialisation to complete * before continuing. * + * If we're racing with the inactivation worker we also want to wait. + * If we're creating a new file, it's possible that the worker + * previously marked the inode as free on disk but hasn't finished + * updating the incore state yet. The AGI buffer will be dirty and + * locked to the icreate transaction, so a synchronous push of the + * inodegc workers would result in deadlock. For a regular iget, the + * worker is running already, so we might as well wait. + * * XXX(hch): eventually we should do something equivalent to * wait_on_inode to wait for these flags to be cleared * instead of polling for it. */ - if (ip->i_flags & (XFS_INEW | XFS_IRECLAIM)) + if (ip->i_flags & (XFS_INEW | XFS_IRECLAIM | XFS_INACTIVATING)) goto out_skip; + if (ip->i_flags & XFS_NEED_INACTIVE) { + /* Unlinked inodes cannot be re-grabbed. */ + if (VFS_I(ip)->i_nlink == 0) { + error = -ENOENT; + goto out_error; + } + goto out_inodegc_flush; + } + /* * Check the inode free state is valid. This also detects lookup * racing with unlinks. @@ -570,6 +565,17 @@ out_error: spin_unlock(&ip->i_flags_lock); rcu_read_unlock(); return error; + +out_inodegc_flush: + spin_unlock(&ip->i_flags_lock); + rcu_read_unlock(); + /* + * Do not wait for the workers, because the caller could hold an AGI + * buffer lock. We're just going to sleep in a loop anyway. + */ + if (xfs_is_inodegc_enabled(mp)) + xfs_inodegc_queue_all(mp); + return -EAGAIN; } static int @@ -597,7 +603,7 @@ xfs_iget_cache_miss( /* * For version 5 superblocks, if we are initialising a new inode and we - * are not utilising the XFS_MOUNT_IKEEP inode cluster mode, we can + * are not utilising the XFS_FEAT_IKEEP inode cluster mode, we can * simply build the new inode core with a random generation number. * * For version 4 (and older) superblocks, log recovery is dependent on @@ -605,8 +611,8 @@ xfs_iget_cache_miss( * value and hence we must also read the inode off disk even when * initializing new inodes. */ - if (xfs_sb_version_has_v3inode(&mp->m_sb) && - (flags & XFS_IGET_CREATE) && !(mp->m_flags & XFS_MOUNT_IKEEP)) { + if (xfs_has_v3inodes(mp) && + (flags & XFS_IGET_CREATE) && !xfs_has_ikeep(mp)) { VFS_I(ip)->i_generation = prandom_u32(); } else { struct xfs_buf *bp; @@ -817,97 +823,6 @@ xfs_icache_inode_is_allocated( return 0; } -#ifdef CONFIG_XFS_QUOTA -/* Decide if we want to grab this inode to drop its dquots. */ -static bool -xfs_dqrele_igrab( - struct xfs_inode *ip) -{ - bool ret = false; - - ASSERT(rcu_read_lock_held()); - - /* Check for stale RCU freed inode */ - spin_lock(&ip->i_flags_lock); - if (!ip->i_ino) - goto out_unlock; - - /* - * Skip inodes that are anywhere in the reclaim machinery because we - * drop dquots before tagging an inode for reclamation. - */ - if (ip->i_flags & (XFS_IRECLAIM | XFS_IRECLAIMABLE)) - goto out_unlock; - - /* - * The inode looks alive; try to grab a VFS reference so that it won't - * get destroyed. If we got the reference, return true to say that - * we grabbed the inode. - * - * If we can't get the reference, then we know the inode had its VFS - * state torn down and hasn't yet entered the reclaim machinery. Since - * we also know that dquots are detached from an inode before it enters - * reclaim, we can skip the inode. - */ - ret = igrab(VFS_I(ip)) != NULL; - -out_unlock: - spin_unlock(&ip->i_flags_lock); - return ret; -} - -/* Drop this inode's dquots. */ -static void -xfs_dqrele_inode( - struct xfs_inode *ip, - struct xfs_icwalk *icw) -{ - if (xfs_iflags_test(ip, XFS_INEW)) - xfs_inew_wait(ip); - - xfs_ilock(ip, XFS_ILOCK_EXCL); - if (icw->icw_flags & XFS_ICWALK_FLAG_DROP_UDQUOT) { - xfs_qm_dqrele(ip->i_udquot); - ip->i_udquot = NULL; - } - if (icw->icw_flags & XFS_ICWALK_FLAG_DROP_GDQUOT) { - xfs_qm_dqrele(ip->i_gdquot); - ip->i_gdquot = NULL; - } - if (icw->icw_flags & XFS_ICWALK_FLAG_DROP_PDQUOT) { - xfs_qm_dqrele(ip->i_pdquot); - ip->i_pdquot = NULL; - } - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_irele(ip); -} - -/* - * Detach all dquots from incore inodes if we can. The caller must already - * have dropped the relevant XFS_[UGP]QUOTA_ACTIVE flags so that dquots will - * not get reattached. - */ -int -xfs_dqrele_all_inodes( - struct xfs_mount *mp, - unsigned int qflags) -{ - struct xfs_icwalk icw = { .icw_flags = 0 }; - - if (qflags & XFS_UQUOTA_ACCT) - icw.icw_flags |= XFS_ICWALK_FLAG_DROP_UDQUOT; - if (qflags & XFS_GQUOTA_ACCT) - icw.icw_flags |= XFS_ICWALK_FLAG_DROP_GDQUOT; - if (qflags & XFS_PQUOTA_ACCT) - icw.icw_flags |= XFS_ICWALK_FLAG_DROP_PDQUOT; - - return xfs_icwalk(mp, XFS_ICWALK_DQRELE, &icw); -} -#else -# define xfs_dqrele_igrab(ip) (false) -# define xfs_dqrele_inode(ip, priv) ((void)0) -#endif /* CONFIG_XFS_QUOTA */ - /* * Grab the inode for reclaim exclusively. * @@ -976,7 +891,7 @@ xfs_reclaim_inode( if (xfs_iflags_test_and_set(ip, XFS_IFLUSHING)) goto out_iunlock; - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { + if (xfs_is_shutdown(ip->i_mount)) { xfs_iunpin_wait(ip); xfs_iflush_abort(ip); goto reclaim; @@ -988,6 +903,7 @@ xfs_reclaim_inode( xfs_iflags_clear(ip, XFS_IFLUSHING); reclaim: + trace_xfs_inode_reclaiming(ip); /* * Because we use RCU freeing we need to ensure the inode always appears @@ -1052,9 +968,8 @@ static inline bool xfs_want_reclaim_sick( struct xfs_mount *mp) { - return (mp->m_flags & XFS_MOUNT_UNMOUNTING) || - (mp->m_flags & XFS_MOUNT_NORECOVERY) || - XFS_FORCED_SHUTDOWN(mp); + return xfs_is_unmounting(mp) || xfs_has_norecovery(mp) || + xfs_is_shutdown(mp); } void @@ -1447,8 +1362,12 @@ xfs_blockgc_stop( struct xfs_perag *pag; xfs_agnumber_t agno; - for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG) + if (!xfs_clear_blockgc_enabled(mp)) + return; + + for_each_perag(mp, agno, pag) cancel_delayed_work_sync(&pag->pag_blockgc_work); + trace_xfs_blockgc_stop(mp, __return_address); } /* Enable post-EOF and CoW block auto-reclamation. */ @@ -1459,12 +1378,18 @@ xfs_blockgc_start( struct xfs_perag *pag; xfs_agnumber_t agno; + if (xfs_set_blockgc_enabled(mp)) + return; + + trace_xfs_blockgc_start(mp, __return_address); for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG) xfs_blockgc_queue(pag); } /* Don't try to run block gc on an inode that's in any of these states. */ #define XFS_BLOCKGC_NOGRAB_IFLAGS (XFS_INEW | \ + XFS_NEED_INACTIVE | \ + XFS_INACTIVATING | \ XFS_IRECLAIMABLE | \ XFS_IRECLAIM) /* @@ -1490,7 +1415,7 @@ xfs_blockgc_igrab( spin_unlock(&ip->i_flags_lock); /* nothing to sync during shutdown */ - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + if (xfs_is_shutdown(ip->i_mount)) return false; /* If we can't grab the inode, it must on it's way to reclaim. */ @@ -1536,27 +1461,62 @@ xfs_blockgc_worker( struct xfs_mount *mp = pag->pag_mount; int error; - if (!sb_start_write_trylock(mp->m_super)) - return; + trace_xfs_blockgc_worker(mp, __return_address); + error = xfs_icwalk_ag(pag, XFS_ICWALK_BLOCKGC, NULL); if (error) xfs_info(mp, "AG %u preallocation gc worker failed, err=%d", pag->pag_agno, error); - sb_end_write(mp->m_super); xfs_blockgc_queue(pag); } /* - * Try to free space in the filesystem by purging eofblocks and cowblocks. + * Try to free space in the filesystem by purging inactive inodes, eofblocks + * and cowblocks. */ int xfs_blockgc_free_space( struct xfs_mount *mp, struct xfs_icwalk *icw) { + int error; + trace_xfs_blockgc_free_space(mp, icw, _RET_IP_); - return xfs_icwalk(mp, XFS_ICWALK_BLOCKGC, icw); + error = xfs_icwalk(mp, XFS_ICWALK_BLOCKGC, icw); + if (error) + return error; + + xfs_inodegc_flush(mp); + return 0; +} + +/* + * Reclaim all the free space that we can by scheduling the background blockgc + * and inodegc workers immediately and waiting for them all to clear. + */ +void +xfs_blockgc_flush_all( + struct xfs_mount *mp) +{ + struct xfs_perag *pag; + xfs_agnumber_t agno; + + trace_xfs_blockgc_flush_all(mp, __return_address); + + /* + * For each blockgc worker, move its queue time up to now. If it + * wasn't queued, it will not be requeued. Then flush whatever's + * left. + */ + for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG) + mod_delayed_work(pag->pag_mount->m_blockgc_wq, + &pag->pag_blockgc_work, 0); + + for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG) + flush_delayed_work(&pag->pag_blockgc_work); + + xfs_inodegc_flush(mp); } /* @@ -1647,8 +1607,6 @@ xfs_icwalk_igrab( struct xfs_icwalk *icw) { switch (goal) { - case XFS_ICWALK_DQRELE: - return xfs_dqrele_igrab(ip); case XFS_ICWALK_BLOCKGC: return xfs_blockgc_igrab(ip); case XFS_ICWALK_RECLAIM: @@ -1672,9 +1630,6 @@ xfs_icwalk_process_inode( int error = 0; switch (goal) { - case XFS_ICWALK_DQRELE: - xfs_dqrele_inode(ip, icw); - break; case XFS_ICWALK_BLOCKGC: error = xfs_blockgc_scan_inode(ip, icw); break; @@ -1712,22 +1667,14 @@ restart: nr_found = 0; do { struct xfs_inode *batch[XFS_LOOKUP_BATCH]; - unsigned int tag = xfs_icwalk_tag(goal); int error = 0; int i; rcu_read_lock(); - if (tag == XFS_ICWALK_NULL_TAG) - nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, - (void **)batch, first_index, - XFS_LOOKUP_BATCH); - else - nr_found = radix_tree_gang_lookup_tag( - &pag->pag_ici_root, - (void **) batch, first_index, - XFS_LOOKUP_BATCH, tag); - + nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root, + (void **) batch, first_index, + XFS_LOOKUP_BATCH, goal); if (!nr_found) { done = true; rcu_read_unlock(); @@ -1805,20 +1752,6 @@ restart: return last_error; } -/* Fetch the next (possibly tagged) per-AG structure. */ -static inline struct xfs_perag * -xfs_icwalk_get_perag( - struct xfs_mount *mp, - xfs_agnumber_t agno, - enum xfs_icwalk_goal goal) -{ - unsigned int tag = xfs_icwalk_tag(goal); - - if (tag == XFS_ICWALK_NULL_TAG) - return xfs_perag_get(mp, agno); - return xfs_perag_get_tag(mp, agno, tag); -} - /* Walk all incore inodes to achieve a given goal. */ static int xfs_icwalk( @@ -1829,18 +1762,465 @@ xfs_icwalk( struct xfs_perag *pag; int error = 0; int last_error = 0; - xfs_agnumber_t agno = 0; + xfs_agnumber_t agno; - while ((pag = xfs_icwalk_get_perag(mp, agno, goal))) { - agno = pag->pag_agno + 1; + for_each_perag_tag(mp, agno, pag, goal) { error = xfs_icwalk_ag(pag, goal, icw); - xfs_perag_put(pag); if (error) { last_error = error; - if (error == -EFSCORRUPTED) + if (error == -EFSCORRUPTED) { + xfs_perag_put(pag); break; + } } } return last_error; BUILD_BUG_ON(XFS_ICWALK_PRIVATE_FLAGS & XFS_ICWALK_FLAGS_VALID); } + +#ifdef DEBUG +static void +xfs_check_delalloc( + struct xfs_inode *ip, + int whichfork) +{ + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + struct xfs_bmbt_irec got; + struct xfs_iext_cursor icur; + + if (!ifp || !xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got)) + return; + do { + if (isnullstartblock(got.br_startblock)) { + xfs_warn(ip->i_mount, + "ino %llx %s fork has delalloc extent at [0x%llx:0x%llx]", + ip->i_ino, + whichfork == XFS_DATA_FORK ? "data" : "cow", + got.br_startoff, got.br_blockcount); + } + } while (xfs_iext_next_extent(ifp, &icur, &got)); +} +#else +#define xfs_check_delalloc(ip, whichfork) do { } while (0) +#endif + +/* Schedule the inode for reclaim. */ +static void +xfs_inodegc_set_reclaimable( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_perag *pag; + + if (!xfs_is_shutdown(mp) && ip->i_delayed_blks) { + xfs_check_delalloc(ip, XFS_DATA_FORK); + xfs_check_delalloc(ip, XFS_COW_FORK); + ASSERT(0); + } + + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); + spin_lock(&pag->pag_ici_lock); + spin_lock(&ip->i_flags_lock); + + trace_xfs_inode_set_reclaimable(ip); + ip->i_flags &= ~(XFS_NEED_INACTIVE | XFS_INACTIVATING); + ip->i_flags |= XFS_IRECLAIMABLE; + xfs_perag_set_inode_tag(pag, XFS_INO_TO_AGINO(mp, ip->i_ino), + XFS_ICI_RECLAIM_TAG); + + spin_unlock(&ip->i_flags_lock); + spin_unlock(&pag->pag_ici_lock); + xfs_perag_put(pag); +} + +/* + * Free all speculative preallocations and possibly even the inode itself. + * This is the last chance to make changes to an otherwise unreferenced file + * before incore reclamation happens. + */ +static void +xfs_inodegc_inactivate( + struct xfs_inode *ip) +{ + trace_xfs_inode_inactivating(ip); + xfs_inactive(ip); + xfs_inodegc_set_reclaimable(ip); +} + +void +xfs_inodegc_worker( + struct work_struct *work) +{ + struct xfs_inodegc *gc = container_of(work, struct xfs_inodegc, + work); + struct llist_node *node = llist_del_all(&gc->list); + struct xfs_inode *ip, *n; + + WRITE_ONCE(gc->items, 0); + + if (!node) + return; + + ip = llist_entry(node, struct xfs_inode, i_gclist); + trace_xfs_inodegc_worker(ip->i_mount, READ_ONCE(gc->shrinker_hits)); + + WRITE_ONCE(gc->shrinker_hits, 0); + llist_for_each_entry_safe(ip, n, node, i_gclist) { + xfs_iflags_set(ip, XFS_INACTIVATING); + xfs_inodegc_inactivate(ip); + } +} + +/* + * Force all currently queued inode inactivation work to run immediately, and + * wait for the work to finish. Two pass - queue all the work first pass, wait + * for it in a second pass. + */ +void +xfs_inodegc_flush( + struct xfs_mount *mp) +{ + struct xfs_inodegc *gc; + int cpu; + + if (!xfs_is_inodegc_enabled(mp)) + return; + + trace_xfs_inodegc_flush(mp, __return_address); + + xfs_inodegc_queue_all(mp); + + for_each_online_cpu(cpu) { + gc = per_cpu_ptr(mp->m_inodegc, cpu); + flush_work(&gc->work); + } +} + +/* + * Flush all the pending work and then disable the inode inactivation background + * workers and wait for them to stop. + */ +void +xfs_inodegc_stop( + struct xfs_mount *mp) +{ + struct xfs_inodegc *gc; + int cpu; + + if (!xfs_clear_inodegc_enabled(mp)) + return; + + xfs_inodegc_queue_all(mp); + + for_each_online_cpu(cpu) { + gc = per_cpu_ptr(mp->m_inodegc, cpu); + cancel_work_sync(&gc->work); + } + trace_xfs_inodegc_stop(mp, __return_address); +} + +/* + * Enable the inode inactivation background workers and schedule deferred inode + * inactivation work if there is any. + */ +void +xfs_inodegc_start( + struct xfs_mount *mp) +{ + if (xfs_set_inodegc_enabled(mp)) + return; + + trace_xfs_inodegc_start(mp, __return_address); + xfs_inodegc_queue_all(mp); +} + +#ifdef CONFIG_XFS_RT +static inline bool +xfs_inodegc_want_queue_rt_file( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + uint64_t freertx; + + if (!XFS_IS_REALTIME_INODE(ip)) + return false; + + freertx = READ_ONCE(mp->m_sb.sb_frextents); + return freertx < mp->m_low_rtexts[XFS_LOWSP_5_PCNT]; +} +#else +# define xfs_inodegc_want_queue_rt_file(ip) (false) +#endif /* CONFIG_XFS_RT */ + +/* + * Schedule the inactivation worker when: + * + * - We've accumulated more than one inode cluster buffer's worth of inodes. + * - There is less than 5% free space left. + * - Any of the quotas for this inode are near an enforcement limit. + */ +static inline bool +xfs_inodegc_want_queue_work( + struct xfs_inode *ip, + unsigned int items) +{ + struct xfs_mount *mp = ip->i_mount; + + if (items > mp->m_ino_geo.inodes_per_cluster) + return true; + + if (__percpu_counter_compare(&mp->m_fdblocks, + mp->m_low_space[XFS_LOWSP_5_PCNT], + XFS_FDBLOCKS_BATCH) < 0) + return true; + + if (xfs_inodegc_want_queue_rt_file(ip)) + return true; + + if (xfs_inode_near_dquot_enforcement(ip, XFS_DQTYPE_USER)) + return true; + + if (xfs_inode_near_dquot_enforcement(ip, XFS_DQTYPE_GROUP)) + return true; + + if (xfs_inode_near_dquot_enforcement(ip, XFS_DQTYPE_PROJ)) + return true; + + return false; +} + +/* + * Upper bound on the number of inodes in each AG that can be queued for + * inactivation at any given time, to avoid monopolizing the workqueue. + */ +#define XFS_INODEGC_MAX_BACKLOG (4 * XFS_INODES_PER_CHUNK) + +/* + * Make the frontend wait for inactivations when: + * + * - Memory shrinkers queued the inactivation worker and it hasn't finished. + * - The queue depth exceeds the maximum allowable percpu backlog. + * + * Note: If the current thread is running a transaction, we don't ever want to + * wait for other transactions because that could introduce a deadlock. + */ +static inline bool +xfs_inodegc_want_flush_work( + struct xfs_inode *ip, + unsigned int items, + unsigned int shrinker_hits) +{ + if (current->journal_info) + return false; + + if (shrinker_hits > 0) + return true; + + if (items > XFS_INODEGC_MAX_BACKLOG) + return true; + + return false; +} + +/* + * Queue a background inactivation worker if there are inodes that need to be + * inactivated and higher level xfs code hasn't disabled the background + * workers. + */ +static void +xfs_inodegc_queue( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_inodegc *gc; + int items; + unsigned int shrinker_hits; + + trace_xfs_inode_set_need_inactive(ip); + spin_lock(&ip->i_flags_lock); + ip->i_flags |= XFS_NEED_INACTIVE; + spin_unlock(&ip->i_flags_lock); + + gc = get_cpu_ptr(mp->m_inodegc); + llist_add(&ip->i_gclist, &gc->list); + items = READ_ONCE(gc->items); + WRITE_ONCE(gc->items, items + 1); + shrinker_hits = READ_ONCE(gc->shrinker_hits); + put_cpu_ptr(gc); + + if (!xfs_is_inodegc_enabled(mp)) + return; + + if (xfs_inodegc_want_queue_work(ip, items)) { + trace_xfs_inodegc_queue(mp, __return_address); + queue_work(mp->m_inodegc_wq, &gc->work); + } + + if (xfs_inodegc_want_flush_work(ip, items, shrinker_hits)) { + trace_xfs_inodegc_throttle(mp, __return_address); + flush_work(&gc->work); + } +} + +/* + * Fold the dead CPU inodegc queue into the current CPUs queue. + */ +void +xfs_inodegc_cpu_dead( + struct xfs_mount *mp, + unsigned int dead_cpu) +{ + struct xfs_inodegc *dead_gc, *gc; + struct llist_node *first, *last; + unsigned int count = 0; + + dead_gc = per_cpu_ptr(mp->m_inodegc, dead_cpu); + cancel_work_sync(&dead_gc->work); + + if (llist_empty(&dead_gc->list)) + return; + + first = dead_gc->list.first; + last = first; + while (last->next) { + last = last->next; + count++; + } + dead_gc->list.first = NULL; + dead_gc->items = 0; + + /* Add pending work to current CPU */ + gc = get_cpu_ptr(mp->m_inodegc); + llist_add_batch(first, last, &gc->list); + count += READ_ONCE(gc->items); + WRITE_ONCE(gc->items, count); + put_cpu_ptr(gc); + + if (xfs_is_inodegc_enabled(mp)) { + trace_xfs_inodegc_queue(mp, __return_address); + queue_work(mp->m_inodegc_wq, &gc->work); + } +} + +/* + * We set the inode flag atomically with the radix tree tag. Once we get tag + * lookups on the radix tree, this inode flag can go away. + * + * We always use background reclaim here because even if the inode is clean, it + * still may be under IO and hence we have wait for IO completion to occur + * before we can reclaim the inode. The background reclaim path handles this + * more efficiently than we can here, so simply let background reclaim tear down + * all inodes. + */ +void +xfs_inode_mark_reclaimable( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + bool need_inactive; + + XFS_STATS_INC(mp, vn_reclaim); + + /* + * We should never get here with any of the reclaim flags already set. + */ + ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_ALL_IRECLAIM_FLAGS)); + + need_inactive = xfs_inode_needs_inactive(ip); + if (need_inactive) { + xfs_inodegc_queue(ip); + return; + } + + /* Going straight to reclaim, so drop the dquots. */ + xfs_qm_dqdetach(ip); + xfs_inodegc_set_reclaimable(ip); +} + +/* + * Register a phony shrinker so that we can run background inodegc sooner when + * there's memory pressure. Inactivation does not itself free any memory but + * it does make inodes reclaimable, which eventually frees memory. + * + * The count function, seek value, and batch value are crafted to trigger the + * scan function during the second round of scanning. Hopefully this means + * that we reclaimed enough memory that initiating metadata transactions won't + * make things worse. + */ +#define XFS_INODEGC_SHRINKER_COUNT (1UL << DEF_PRIORITY) +#define XFS_INODEGC_SHRINKER_BATCH ((XFS_INODEGC_SHRINKER_COUNT / 2) + 1) + +static unsigned long +xfs_inodegc_shrinker_count( + struct shrinker *shrink, + struct shrink_control *sc) +{ + struct xfs_mount *mp = container_of(shrink, struct xfs_mount, + m_inodegc_shrinker); + struct xfs_inodegc *gc; + int cpu; + + if (!xfs_is_inodegc_enabled(mp)) + return 0; + + for_each_online_cpu(cpu) { + gc = per_cpu_ptr(mp->m_inodegc, cpu); + if (!llist_empty(&gc->list)) + return XFS_INODEGC_SHRINKER_COUNT; + } + + return 0; +} + +static unsigned long +xfs_inodegc_shrinker_scan( + struct shrinker *shrink, + struct shrink_control *sc) +{ + struct xfs_mount *mp = container_of(shrink, struct xfs_mount, + m_inodegc_shrinker); + struct xfs_inodegc *gc; + int cpu; + bool no_items = true; + + if (!xfs_is_inodegc_enabled(mp)) + return SHRINK_STOP; + + trace_xfs_inodegc_shrinker_scan(mp, sc, __return_address); + + for_each_online_cpu(cpu) { + gc = per_cpu_ptr(mp->m_inodegc, cpu); + if (!llist_empty(&gc->list)) { + unsigned int h = READ_ONCE(gc->shrinker_hits); + + WRITE_ONCE(gc->shrinker_hits, h + 1); + queue_work_on(cpu, mp->m_inodegc_wq, &gc->work); + no_items = false; + } + } + + /* + * If there are no inodes to inactivate, we don't want the shrinker + * to think there's deferred work to call us back about. + */ + if (no_items) + return LONG_MAX; + + return SHRINK_STOP; +} + +/* Register a shrinker so we can accelerate inodegc and throttle queuing. */ +int +xfs_inodegc_register_shrinker( + struct xfs_mount *mp) +{ + struct shrinker *shrink = &mp->m_inodegc_shrinker; + + shrink->count_objects = xfs_inodegc_shrinker_count; + shrink->scan_objects = xfs_inodegc_shrinker_scan; + shrink->seeks = 0; + shrink->flags = SHRINKER_NONSLAB; + shrink->batch = XFS_INODEGC_SHRINKER_BATCH; + + return register_shrinker(shrink); +} diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index c751cc32dc46..2e4cfddf8b8e 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -59,6 +59,7 @@ int xfs_blockgc_free_dquots(struct xfs_mount *mp, struct xfs_dquot *udqp, unsigned int iwalk_flags); int xfs_blockgc_free_quota(struct xfs_inode *ip, unsigned int iwalk_flags); int xfs_blockgc_free_space(struct xfs_mount *mp, struct xfs_icwalk *icm); +void xfs_blockgc_flush_all(struct xfs_mount *mp); void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip); void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip); @@ -68,16 +69,17 @@ void xfs_inode_clear_cowblocks_tag(struct xfs_inode *ip); void xfs_blockgc_worker(struct work_struct *work); -#ifdef CONFIG_XFS_QUOTA -int xfs_dqrele_all_inodes(struct xfs_mount *mp, unsigned int qflags); -#else -# define xfs_dqrele_all_inodes(mp, qflags) (0) -#endif - int xfs_icache_inode_is_allocated(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino, bool *inuse); void xfs_blockgc_stop(struct xfs_mount *mp); void xfs_blockgc_start(struct xfs_mount *mp); +void xfs_inodegc_worker(struct work_struct *work); +void xfs_inodegc_flush(struct xfs_mount *mp); +void xfs_inodegc_stop(struct xfs_mount *mp); +void xfs_inodegc_start(struct xfs_mount *mp); +void xfs_inodegc_cpu_dead(struct xfs_mount *mp, unsigned int cpu); +int xfs_inodegc_register_shrinker(struct xfs_mount *mp); + #endif diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c index 9b3994b9c716..017904a34c02 100644 --- a/fs/xfs/xfs_icreate_item.c +++ b/fs/xfs/xfs_icreate_item.c @@ -201,7 +201,7 @@ xlog_recover_icreate_commit_pass2( if (length != igeo->ialloc_blks && length != igeo->ialloc_min_blks) { xfs_warn(log->l_mp, - "%s: unsupported chunk length", __FUNCTION__); + "%s: unsupported chunk length", __func__); return -EINVAL; } @@ -209,7 +209,7 @@ xlog_recover_icreate_commit_pass2( if ((count >> mp->m_sb.sb_inopblog) != length) { xfs_warn(log->l_mp, "%s: inconsistent inode count and chunk length", - __FUNCTION__); + __func__); return -EINVAL; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 990b72ae3635..a4f6f034fb81 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -132,7 +132,7 @@ xfs_ilock_attr_map_shared( /* * In addition to i_rwsem in the VFS inode, the xfs inode contains 2 - * multi-reader locks: i_mmap_lock and the i_lock. This routine allows + * multi-reader locks: invalidate_lock and the i_lock. This routine allows * various combinations of the locks to be obtained. * * The 3 locks should always be ordered so that the IO lock is obtained first, @@ -140,23 +140,23 @@ xfs_ilock_attr_map_shared( * * Basic locking order: * - * i_rwsem -> i_mmap_lock -> page_lock -> i_ilock + * i_rwsem -> invalidate_lock -> page_lock -> i_ilock * * mmap_lock locking order: * * i_rwsem -> page lock -> mmap_lock - * mmap_lock -> i_mmap_lock -> page_lock + * mmap_lock -> invalidate_lock -> page_lock * * The difference in mmap_lock locking order mean that we cannot hold the - * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can - * fault in pages during copy in/out (for buffered IO) or require the mmap_lock - * in get_user_pages() to map the user pages into the kernel address space for - * direct IO. Similarly the i_rwsem cannot be taken inside a page fault because - * page faults already hold the mmap_lock. + * invalidate_lock over syscall based read(2)/write(2) based IO. These IO paths + * can fault in pages during copy in/out (for buffered IO) or require the + * mmap_lock in get_user_pages() to map the user pages into the kernel address + * space for direct IO. Similarly the i_rwsem cannot be taken inside a page + * fault because page faults already hold the mmap_lock. * * Hence to serialise fully against both syscall and mmap based IO, we need to - * take both the i_rwsem and the i_mmap_lock. These locks should *only* be both - * taken in places where we need to invalidate the page cache in a race + * take both the i_rwsem and the invalidate_lock. These locks should *only* be + * both taken in places where we need to invalidate the page cache in a race * free manner (e.g. truncate, hole punch and other extent manipulation * functions). */ @@ -188,10 +188,13 @@ xfs_ilock( XFS_IOLOCK_DEP(lock_flags)); } - if (lock_flags & XFS_MMAPLOCK_EXCL) - mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags)); - else if (lock_flags & XFS_MMAPLOCK_SHARED) - mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags)); + if (lock_flags & XFS_MMAPLOCK_EXCL) { + down_write_nested(&VFS_I(ip)->i_mapping->invalidate_lock, + XFS_MMAPLOCK_DEP(lock_flags)); + } else if (lock_flags & XFS_MMAPLOCK_SHARED) { + down_read_nested(&VFS_I(ip)->i_mapping->invalidate_lock, + XFS_MMAPLOCK_DEP(lock_flags)); + } if (lock_flags & XFS_ILOCK_EXCL) mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); @@ -240,10 +243,10 @@ xfs_ilock_nowait( } if (lock_flags & XFS_MMAPLOCK_EXCL) { - if (!mrtryupdate(&ip->i_mmaplock)) + if (!down_write_trylock(&VFS_I(ip)->i_mapping->invalidate_lock)) goto out_undo_iolock; } else if (lock_flags & XFS_MMAPLOCK_SHARED) { - if (!mrtryaccess(&ip->i_mmaplock)) + if (!down_read_trylock(&VFS_I(ip)->i_mapping->invalidate_lock)) goto out_undo_iolock; } @@ -258,9 +261,9 @@ xfs_ilock_nowait( out_undo_mmaplock: if (lock_flags & XFS_MMAPLOCK_EXCL) - mrunlock_excl(&ip->i_mmaplock); + up_write(&VFS_I(ip)->i_mapping->invalidate_lock); else if (lock_flags & XFS_MMAPLOCK_SHARED) - mrunlock_shared(&ip->i_mmaplock); + up_read(&VFS_I(ip)->i_mapping->invalidate_lock); out_undo_iolock: if (lock_flags & XFS_IOLOCK_EXCL) up_write(&VFS_I(ip)->i_rwsem); @@ -307,9 +310,9 @@ xfs_iunlock( up_read(&VFS_I(ip)->i_rwsem); if (lock_flags & XFS_MMAPLOCK_EXCL) - mrunlock_excl(&ip->i_mmaplock); + up_write(&VFS_I(ip)->i_mapping->invalidate_lock); else if (lock_flags & XFS_MMAPLOCK_SHARED) - mrunlock_shared(&ip->i_mmaplock); + up_read(&VFS_I(ip)->i_mapping->invalidate_lock); if (lock_flags & XFS_ILOCK_EXCL) mrunlock_excl(&ip->i_lock); @@ -335,7 +338,7 @@ xfs_ilock_demote( if (lock_flags & XFS_ILOCK_EXCL) mrdemote(&ip->i_lock); if (lock_flags & XFS_MMAPLOCK_EXCL) - mrdemote(&ip->i_mmaplock); + downgrade_write(&VFS_I(ip)->i_mapping->invalidate_lock); if (lock_flags & XFS_IOLOCK_EXCL) downgrade_write(&VFS_I(ip)->i_rwsem); @@ -343,9 +346,29 @@ xfs_ilock_demote( } #if defined(DEBUG) || defined(XFS_WARN) -int +static inline bool +__xfs_rwsem_islocked( + struct rw_semaphore *rwsem, + bool shared) +{ + if (!debug_locks) + return rwsem_is_locked(rwsem); + + if (!shared) + return lockdep_is_held_type(rwsem, 0); + + /* + * We are checking that the lock is held at least in shared + * mode but don't care that it might be held exclusively + * (i.e. shared | excl). Hence we check if the lock is held + * in any mode rather than an explicit shared mode. + */ + return lockdep_is_held_type(rwsem, -1); +} + +bool xfs_isilocked( - xfs_inode_t *ip, + struct xfs_inode *ip, uint lock_flags) { if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) { @@ -355,20 +378,17 @@ xfs_isilocked( } if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) { - if (!(lock_flags & XFS_MMAPLOCK_SHARED)) - return !!ip->i_mmaplock.mr_writer; - return rwsem_is_locked(&ip->i_mmaplock.mr_lock); + return __xfs_rwsem_islocked(&VFS_I(ip)->i_rwsem, + (lock_flags & XFS_IOLOCK_SHARED)); } - if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { - if (!(lock_flags & XFS_IOLOCK_SHARED)) - return !debug_locks || - lockdep_is_held_type(&VFS_I(ip)->i_rwsem, 0); - return rwsem_is_locked(&VFS_I(ip)->i_rwsem); + if (lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) { + return __xfs_rwsem_islocked(&VFS_I(ip)->i_rwsem, + (lock_flags & XFS_IOLOCK_SHARED)); } ASSERT(0); - return 0; + return false; } #endif @@ -532,12 +552,10 @@ again: } /* - * xfs_lock_two_inodes() can only be used to lock one type of lock at a time - - * the mmaplock or the ilock, but not more than one type at a time. If we lock - * more than one at a time, lockdep will report false positives saying we have - * violated locking orders. The iolock must be double-locked separately since - * we use i_rwsem for that. We now support taking one lock EXCL and the other - * SHARED. + * xfs_lock_two_inodes() can only be used to lock ilock. The iolock and + * mmaplock must be double-locked separately since we use i_rwsem and + * invalidate_lock for that. We now support taking one lock EXCL and the + * other SHARED. */ void xfs_lock_two_inodes( @@ -555,15 +573,8 @@ xfs_lock_two_inodes( ASSERT(hweight32(ip1_mode) == 1); ASSERT(!(ip0_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))); ASSERT(!(ip1_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))); - ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || - !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); - ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || - !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); - ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || - !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); - ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || - !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); - + ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))); + ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))); ASSERT(ip0->i_ino != ip1->i_ino); if (ip0->i_ino > ip1->i_ino) { @@ -663,7 +674,7 @@ xfs_lookup( trace_xfs_lookup(dp, name); - if (XFS_FORCED_SHUTDOWN(dp->i_mount)) + if (xfs_is_shutdown(dp->i_mount)) return -EIO; error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); @@ -705,7 +716,7 @@ xfs_inode_inherit_flags( di_flags |= XFS_DIFLAG_PROJINHERIT; } else if (S_ISREG(mode)) { if ((pip->i_diflags & XFS_DIFLAG_RTINHERIT) && - xfs_sb_version_hasrealtime(&ip->i_mount->m_sb)) + xfs_has_realtime(ip->i_mount)) di_flags |= XFS_DIFLAG_REALTIME; if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) { di_flags |= XFS_DIFLAG_EXTSIZE; @@ -826,8 +837,7 @@ xfs_init_new_inode( inode->i_rdev = rdev; ip->i_projid = prid; - if (dir && !(dir->i_mode & S_ISGID) && - (mp->m_flags & XFS_MOUNT_GRPID)) { + if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) { inode_fsuid_set(inode, mnt_userns); inode->i_gid = dir->i_gid; inode->i_mode = mode; @@ -857,7 +867,7 @@ xfs_init_new_inode( ip->i_extsize = 0; ip->i_diflags = 0; - if (xfs_sb_version_has_v3inode(&mp->m_sb)) { + if (xfs_has_v3inodes(mp)) { inode_set_iversion(inode, 1); ip->i_cowextsize = 0; ip->i_crtime = tv; @@ -897,7 +907,7 @@ xfs_init_new_inode( * this saves us from needing to run a separate transaction to set the * fork offset in the immediate future. */ - if (init_xattrs && xfs_sb_version_hasattr(&mp->m_sb)) { + if (init_xattrs && xfs_has_attr(mp)) { ip->i_forkoff = xfs_default_attroffset(ip) >> 3; ip->i_afp = xfs_ifork_alloc(XFS_DINODE_FMT_EXTENTS, 0); } @@ -976,7 +986,7 @@ xfs_create( trace_xfs_create(dp, name); - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; prid = xfs_get_initial_prid(dp); @@ -1068,7 +1078,7 @@ xfs_create( * create transaction goes to disk before returning to * the user. */ - if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) + if (xfs_has_wsync(mp) || xfs_has_dirsync(mp)) xfs_trans_set_sync(tp); /* @@ -1130,7 +1140,7 @@ xfs_create_tmpfile( uint resblks; xfs_ino_t ino; - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; prid = xfs_get_initial_prid(dp); @@ -1160,7 +1170,7 @@ xfs_create_tmpfile( if (error) goto out_trans_cancel; - if (mp->m_flags & XFS_MOUNT_WSYNC) + if (xfs_has_wsync(mp)) xfs_trans_set_sync(tp); /* @@ -1220,7 +1230,7 @@ xfs_link( ASSERT(!S_ISDIR(VFS_I(sip)->i_mode)); - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; error = xfs_qm_dqattach(sip); @@ -1294,7 +1304,7 @@ xfs_link( * link transaction goes to disk before returning to * the user. */ - if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) + if (xfs_has_wsync(mp) || xfs_has_dirsync(mp)) xfs_trans_set_sync(tp); return xfs_trans_commit(tp); @@ -1435,10 +1445,10 @@ xfs_release( return 0; /* If this is a read-only mount, don't do this (would generate I/O) */ - if (mp->m_flags & XFS_MOUNT_RDONLY) + if (xfs_is_readonly(mp)) return 0; - if (!XFS_FORCED_SHUTDOWN(mp)) { + if (!xfs_is_shutdown(mp)) { int truncated; /* @@ -1521,7 +1531,7 @@ xfs_inactive_truncate( error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); if (error) { - ASSERT(XFS_FORCED_SHUTDOWN(mp)); + ASSERT(xfs_is_shutdown(mp)); return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -1592,7 +1602,7 @@ xfs_inactive_ifree( "Failed to remove inode(s) from unlinked list. " "Please free space, unmount and run xfs_repair."); } else { - ASSERT(XFS_FORCED_SHUTDOWN(mp)); + ASSERT(xfs_is_shutdown(mp)); } return error; } @@ -1628,7 +1638,7 @@ xfs_inactive_ifree( * might do that, we need to make sure. Otherwise the * inode might be lost for a long time or forever. */ - if (!XFS_FORCED_SHUTDOWN(mp)) { + if (!xfs_is_shutdown(mp)) { xfs_notice(mp, "%s: xfs_ifree returned error %d", __func__, error); xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); @@ -1655,6 +1665,59 @@ xfs_inactive_ifree( } /* + * Returns true if we need to update the on-disk metadata before we can free + * the memory used by this inode. Updates include freeing post-eof + * preallocations; freeing COW staging extents; and marking the inode free in + * the inobt if it is on the unlinked list. + */ +bool +xfs_inode_needs_inactive( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *cow_ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + + /* + * If the inode is already free, then there can be nothing + * to clean up here. + */ + if (VFS_I(ip)->i_mode == 0) + return false; + + /* If this is a read-only mount, don't do this (would generate I/O) */ + if (xfs_is_readonly(mp)) + return false; + + /* If the log isn't running, push inodes straight to reclaim. */ + if (xfs_is_shutdown(mp) || xfs_has_norecovery(mp)) + return false; + + /* Metadata inodes require explicit resource cleanup. */ + if (xfs_is_metadata_inode(ip)) + return false; + + /* Want to clean out the cow blocks if there are any. */ + if (cow_ifp && cow_ifp->if_bytes > 0) + return true; + + /* Unlinked files must be freed. */ + if (VFS_I(ip)->i_nlink == 0) + return true; + + /* + * This file isn't being freed, so check if there are post-eof blocks + * to free. @force is true because we are evicting an inode from the + * cache. Post-eof blocks must be freed, lest we end up with broken + * free space accounting. + * + * Note: don't bother with iolock here since lockdep complains about + * acquiring it in reclaim context. We have the only reference to the + * inode at this point anyways. + */ + return xfs_can_free_eofblocks(ip, true); +} + +/* * xfs_inactive * * This is called when the vnode reference count for the vnode @@ -1683,7 +1746,7 @@ xfs_inactive( ASSERT(!xfs_iflags_test(ip, XFS_IRECOVERY)); /* If this is a read-only mount, don't do this (would generate I/O) */ - if (mp->m_flags & XFS_MOUNT_RDONLY) + if (xfs_is_readonly(mp)) goto out; /* Metadata inodes require explicit resource cleanup. */ @@ -1958,7 +2021,7 @@ xfs_iunlink_destroy( rhashtable_free_and_destroy(&pag->pagi_unlinked_hash, xfs_iunlink_free_item, &freed_anything); - ASSERT(freed_anything == false || XFS_FORCED_SHUTDOWN(pag->pag_mount)); + ASSERT(freed_anything == false || xfs_is_shutdown(pag->pag_mount)); } /* @@ -2703,7 +2766,7 @@ xfs_remove( trace_xfs_remove(dp, name); - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; error = xfs_qm_dqattach(dp); @@ -2802,7 +2865,7 @@ xfs_remove( * remove transaction goes to disk before returning to * the user. */ - if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) + if (xfs_has_wsync(mp) || xfs_has_dirsync(mp)) xfs_trans_set_sync(tp); error = xfs_trans_commit(tp); @@ -2879,7 +2942,7 @@ xfs_finish_rename( * If this is a synchronous mount, make sure that the rename transaction * goes to disk before returning to the user. */ - if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) + if (xfs_has_wsync(tp->t_mountp) || xfs_has_dirsync(tp->t_mountp)) xfs_trans_set_sync(tp); return xfs_trans_commit(tp); @@ -3462,7 +3525,7 @@ xfs_iflush( * happen but we need to still do it to ensure backwards compatibility * with old kernels that predate logging all inode changes. */ - if (!xfs_sb_version_has_v3inode(&mp->m_sb)) + if (!xfs_has_v3inodes(mp)) ip->i_flushiter++; /* @@ -3484,7 +3547,7 @@ xfs_iflush( xfs_inode_to_disk(ip, dip, iip->ili_item.li_lsn); /* Wrap, we never let the log put out DI_MAX_FLUSH */ - if (!xfs_sb_version_has_v3inode(&mp->m_sb)) { + if (!xfs_has_v3inodes(mp)) { if (ip->i_flushiter == DI_MAX_FLUSH) ip->i_flushiter = 0; } @@ -3603,7 +3666,7 @@ xfs_iflush_cluster( * AIL, leaving a dirty/unpinned inode attached to the buffer * that otherwise looks like it should be flushed. */ - if (XFS_FORCED_SHUTDOWN(mp)) { + if (xfs_is_shutdown(mp)) { xfs_iunpin_wait(ip); xfs_iflush_abort(ip); xfs_iunlock(ip, XFS_ILOCK_SHARED); @@ -3741,11 +3804,8 @@ xfs_ilock2_io_mmap( ret = xfs_iolock_two_inodes_and_break_layout(VFS_I(ip1), VFS_I(ip2)); if (ret) return ret; - if (ip1 == ip2) - xfs_ilock(ip1, XFS_MMAPLOCK_EXCL); - else - xfs_lock_two_inodes(ip1, XFS_MMAPLOCK_EXCL, - ip2, XFS_MMAPLOCK_EXCL); + filemap_invalidate_lock_two(VFS_I(ip1)->i_mapping, + VFS_I(ip2)->i_mapping); return 0; } @@ -3755,12 +3815,9 @@ xfs_iunlock2_io_mmap( struct xfs_inode *ip1, struct xfs_inode *ip2) { - bool same_inode = (ip1 == ip2); - - xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL); - if (!same_inode) - xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL); + filemap_invalidate_unlock_two(VFS_I(ip1)->i_mapping, + VFS_I(ip2)->i_mapping); inode_unlock(VFS_I(ip2)); - if (!same_inode) + if (ip1 != ip2) inode_unlock(VFS_I(ip1)); } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 4b6703dbffb8..b21b177832d1 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -40,8 +40,8 @@ typedef struct xfs_inode { /* Transaction and locking information. */ struct xfs_inode_log_item *i_itemp; /* logging information */ mrlock_t i_lock; /* inode lock */ - mrlock_t i_mmaplock; /* inode mmap IO lock */ atomic_t i_pincount; /* inode pin count */ + struct llist_node i_gclist; /* deferred inactivation list */ /* * Bitsets of inode metadata that have been checked and/or are sick. @@ -240,6 +240,7 @@ static inline bool xfs_inode_has_bigtime(struct xfs_inode *ip) #define __XFS_IPINNED_BIT 8 /* wakeup key for zero pin count */ #define XFS_IPINNED (1 << __XFS_IPINNED_BIT) #define XFS_IEOFBLOCKS (1 << 9) /* has the preallocblocks tag set */ +#define XFS_NEED_INACTIVE (1 << 10) /* see XFS_INACTIVATING below */ /* * If this unlinked inode is in the middle of recovery, don't let drop_inode * truncate and free the inode. This can happen if we iget the inode during @@ -249,13 +250,29 @@ static inline bool xfs_inode_has_bigtime(struct xfs_inode *ip) #define XFS_ICOWBLOCKS (1 << 12)/* has the cowblocks tag set */ /* + * If we need to update on-disk metadata before this IRECLAIMABLE inode can be + * freed, then NEED_INACTIVE will be set. Once we start the updates, the + * INACTIVATING bit will be set to keep iget away from this inode. After the + * inactivation completes, both flags will be cleared and the inode is a + * plain old IRECLAIMABLE inode. + */ +#define XFS_INACTIVATING (1 << 13) + +/* All inode state flags related to inode reclaim. */ +#define XFS_ALL_IRECLAIM_FLAGS (XFS_IRECLAIMABLE | \ + XFS_IRECLAIM | \ + XFS_NEED_INACTIVE | \ + XFS_INACTIVATING) + +/* * Per-lifetime flags need to be reset when re-using a reclaimable inode during * inode lookup. This prevents unintended behaviour on the new inode from * ocurring. */ #define XFS_IRECLAIM_RESET_FLAGS \ (XFS_IRECLAIMABLE | XFS_IRECLAIM | \ - XFS_IDIRTY_RELEASE | XFS_ITRUNCATED) + XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | XFS_NEED_INACTIVE | \ + XFS_INACTIVATING) /* * Flags for inode locking. @@ -382,8 +399,7 @@ enum layout_break_reason { * new subdirectory gets S_ISGID bit from parent. */ #define XFS_INHERIT_GID(pip) \ - (((pip)->i_mount->m_flags & XFS_MOUNT_GRPID) || \ - (VFS_I(pip)->i_mode & S_ISGID)) + (xfs_has_grpid((pip)->i_mount) || (VFS_I(pip)->i_mode & S_ISGID)) int xfs_release(struct xfs_inode *ip); void xfs_inactive(struct xfs_inode *ip); @@ -410,7 +426,7 @@ void xfs_ilock(xfs_inode_t *, uint); int xfs_ilock_nowait(xfs_inode_t *, uint); void xfs_iunlock(xfs_inode_t *, uint); void xfs_ilock_demote(xfs_inode_t *, uint); -int xfs_isilocked(xfs_inode_t *, uint); +bool xfs_isilocked(struct xfs_inode *, uint); uint xfs_ilock_data_map_shared(struct xfs_inode *); uint xfs_ilock_attr_map_shared(struct xfs_inode *); @@ -493,6 +509,8 @@ extern struct kmem_zone *xfs_inode_zone; /* The default CoW extent size hint. */ #define XFS_DEFAULT_COWEXTSZ_HINT 32 +bool xfs_inode_needs_inactive(struct xfs_inode *ip); + int xfs_iunlink_init(struct xfs_perag *pag); void xfs_iunlink_destroy(struct xfs_perag *pag); diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 35de30849fcc..0659d19c211e 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -396,7 +396,7 @@ xfs_inode_to_log_dinode( /* log a dummy value to ensure log structure is fully initialised */ to->di_next_unlinked = NULLAGINO; - if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) { + if (xfs_has_v3inodes(ip->i_mount)) { to->di_version = 3; to->di_changecount = inode_peek_iversion(inode); to->di_crtime = xfs_inode_to_log_dinode_ts(ip, ip->i_crtime); diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c index e0072a6cd2d3..239dd2e3384e 100644 --- a/fs/xfs/xfs_inode_item_recover.c +++ b/fs/xfs/xfs_inode_item_recover.c @@ -295,7 +295,7 @@ xlog_recover_inode_commit_pass2( * superblock flag to determine whether we need to look at di_flushiter * to skip replay when the on disk inode is newer than the log one */ - if (!xfs_sb_version_has_v3inode(&mp->m_sb) && + if (!xfs_has_v3inodes(mp) && ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) { /* * Deal with the wrap case, DI_MAX_FLUSH is less diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 16039ea10ac9..0c795dc093ef 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -756,7 +756,7 @@ xfs_ioc_fsbulkstat( if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; if (copy_from_user(&bulkreq, arg, sizeof(struct xfs_fsop_bulkreq))) @@ -927,7 +927,7 @@ xfs_ioc_bulkstat( if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr))) @@ -977,7 +977,7 @@ xfs_ioc_inumbers( if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr))) @@ -1010,7 +1010,7 @@ xfs_ioc_fsgeometry( struct xfs_fsop_geom fsgeo; size_t len; - xfs_fs_geometry(&mp->m_sb, &fsgeo, struct_version); + xfs_fs_geometry(mp, &fsgeo, struct_version); if (struct_version <= 3) len = sizeof(struct xfs_fsop_geom_v1); @@ -1213,7 +1213,7 @@ xfs_ioctl_setattr_xflags( /* diflags2 only valid for v3 inodes. */ i_flags2 = xfs_flags2diflags2(ip, fa->fsx_xflags); - if (i_flags2 && !xfs_sb_version_has_v3inode(&mp->m_sb)) + if (i_flags2 && !xfs_has_v3inodes(mp)) return -EINVAL; ip->i_diflags = xfs_flags2diflags(ip, fa->fsx_xflags); @@ -1237,8 +1237,7 @@ xfs_ioctl_setattr_prepare_dax( if (S_ISDIR(inode->i_mode)) return; - if ((mp->m_flags & XFS_MOUNT_DAX_ALWAYS) || - (mp->m_flags & XFS_MOUNT_DAX_NEVER)) + if (xfs_has_dax_always(mp) || xfs_has_dax_never(mp)) return; if (((fa->fsx_xflags & FS_XFLAG_DAX) && @@ -1263,10 +1262,10 @@ xfs_ioctl_setattr_get_trans( struct xfs_trans *tp; int error = -EROFS; - if (mp->m_flags & XFS_MOUNT_RDONLY) + if (xfs_is_readonly(mp)) goto out_error; error = -EIO; - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) goto out_error; error = xfs_trans_alloc_ichange(ip, NULL, NULL, pdqp, @@ -1274,7 +1273,7 @@ xfs_ioctl_setattr_get_trans( if (error) goto out_error; - if (mp->m_flags & XFS_MOUNT_WSYNC) + if (xfs_has_wsync(mp)) xfs_trans_set_sync(tp); return tp; @@ -1362,9 +1361,9 @@ xfs_ioctl_setattr_check_projid( if (!fa->fsx_valid) return 0; - /* Disallow 32bit project ids if projid32bit feature is not enabled. */ + /* Disallow 32bit project ids if 32bit IDs are not enabled. */ if (fa->fsx_projid > (uint16_t)-1 && - !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) + !xfs_has_projid32(ip->i_mount)) return -EINVAL; return 0; } @@ -1450,7 +1449,7 @@ xfs_fileattr_set( /* Change the ownerships and register project quota modifications */ if (ip->i_projid != fa->fsx_projid) { - if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { + if (XFS_IS_PQUOTA_ON(mp)) { olddquot = xfs_qm_vop_chown(tp, ip, &ip->i_pdquot, pdqp); } @@ -1467,7 +1466,7 @@ xfs_fileattr_set( else ip->i_extsize = 0; - if (xfs_sb_version_has_v3inode(&mp->m_sb)) { + if (xfs_has_v3inodes(mp)) { if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) ip->i_cowextsize = XFS_B_TO_FSB(mp, fa->fsx_cowextsize); else @@ -1792,7 +1791,7 @@ xfs_ioc_swapext( goto out_put_tmp_file; } - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { + if (xfs_is_shutdown(ip->i_mount)) { error = -EIO; goto out_put_tmp_file; } @@ -2081,7 +2080,7 @@ xfs_file_ioctl( if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (mp->m_flags & XFS_MOUNT_RDONLY) + if (xfs_is_readonly(mp)) return -EROFS; if (copy_from_user(&inout, arg, sizeof(inout))) @@ -2198,7 +2197,7 @@ xfs_file_ioctl( if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (mp->m_flags & XFS_MOUNT_RDONLY) + if (xfs_is_readonly(mp)) return -EROFS; if (copy_from_user(&eofb, arg, sizeof(eofb))) diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index e6506773ba55..8783af203cfc 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -50,7 +50,7 @@ xfs_compat_ioc_fsgeometry_v1( { struct xfs_fsop_geom fsgeo; - xfs_fs_geometry(&mp->m_sb, &fsgeo, 3); + xfs_fs_geometry(mp, &fsgeo, 3); /* The 32-bit variant simply has some padding at the end */ if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1))) return -EFAULT; @@ -254,7 +254,7 @@ xfs_compat_ioc_fsbulkstat( if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; if (get_user(addr, &p32->lastip)) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index d8cd2583dedb..093758440ad5 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -132,7 +132,7 @@ xfs_eof_alignment( * If mounted with the "-o swalloc" option the alignment is * increased from the strip unit size to the stripe width. */ - if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC)) + if (mp->m_swidth && xfs_has_swalloc(mp)) align = mp->m_swidth; else if (mp->m_dalign) align = mp->m_dalign; @@ -734,7 +734,7 @@ xfs_direct_write_iomap_begin( ASSERT(flags & (IOMAP_WRITE | IOMAP_ZERO)); - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; /* @@ -874,7 +874,7 @@ xfs_buffered_write_iomap_begin( int allocfork = XFS_DATA_FORK; int error = 0; - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; /* we can't use delayed allocations when using extent size hints */ @@ -994,7 +994,7 @@ xfs_buffered_write_iomap_begin( * Determine the initial size of the preallocation. * We clean up any extra preallocation when the file is closed. */ - if (mp->m_flags & XFS_MOUNT_ALLOCSIZE) + if (xfs_has_allocsize(mp)) prealloc_blocks = mp->m_allocsize_blocks; else prealloc_blocks = xfs_iomap_prealloc_size(ip, allocfork, @@ -1064,11 +1064,11 @@ found_cow: error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0); if (error) return error; - } else { - xfs_trim_extent(&cmap, offset_fsb, - imap.br_startoff - offset_fsb); + return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED); } - return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED); + + xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb); + return xfs_bmbt_to_iomap(ip, iomap, &cmap, 0); out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -1127,7 +1127,7 @@ xfs_buffered_write_iomap_end( error = xfs_bmap_punch_delalloc_range(ip, start_fsb, end_fsb - start_fsb); - if (error && !XFS_FORCED_SHUTDOWN(mp)) { + if (error && !xfs_is_shutdown(mp)) { xfs_alert(mp, "%s: unable to clean up ino %lld", __func__, ip->i_ino); return error; @@ -1162,7 +1162,7 @@ xfs_read_iomap_begin( ASSERT(!(flags & (IOMAP_WRITE | IOMAP_ZERO))); - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; error = xfs_ilock_for_iomap(ip, flags, &lockmode); @@ -1203,7 +1203,7 @@ xfs_seek_iomap_begin( int error = 0; unsigned lockmode; - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; lockmode = xfs_ilock_data_map_shared(ip); @@ -1285,7 +1285,7 @@ xfs_xattr_iomap_begin( int nimaps = 1, error = 0; unsigned lockmode; - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; lockmode = xfs_ilock_attr_map_shared(ip); diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 93c082db04b7..a607d6aca5c4 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -393,7 +393,7 @@ xfs_vn_unlink( * but still hashed. This is incompatible with case-insensitive * mode, so invalidate (unhash) the dentry in CI-mode. */ - if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb)) + if (xfs_has_asciici(XFS_M(dir->i_sb))) d_invalidate(dentry); return 0; } @@ -558,10 +558,10 @@ xfs_stat_blksize( * default buffered I/O size, return that, otherwise return the compat * default. */ - if (mp->m_flags & XFS_MOUNT_LARGEIO) { + if (xfs_has_large_iosize(mp)) { if (mp->m_swidth) return XFS_FSB_TO_B(mp, mp->m_swidth); - if (mp->m_flags & XFS_MOUNT_ALLOCSIZE) + if (xfs_has_allocsize(mp)) return 1U << mp->m_allocsize_log; } @@ -582,7 +582,7 @@ xfs_vn_getattr( trace_xfs_getattr(ip); - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; stat->size = XFS_ISIZE(ip); @@ -597,7 +597,7 @@ xfs_vn_getattr( stat->ctime = inode->i_ctime; stat->blocks = XFS_FSB_TO_BB(mp, ip->i_nblocks + ip->i_delayed_blks); - if (xfs_sb_version_has_v3inode(&mp->m_sb)) { + if (xfs_has_v3inodes(mp)) { if (request_mask & STATX_BTIME) { stat->result_mask |= STATX_BTIME; stat->btime = ip->i_crtime; @@ -673,10 +673,10 @@ xfs_vn_change_ok( { struct xfs_mount *mp = XFS_I(d_inode(dentry))->i_mount; - if (mp->m_flags & XFS_MOUNT_RDONLY) + if (xfs_is_readonly(mp)) return -EROFS; - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; return setattr_prepare(mnt_userns, dentry, iattr); @@ -778,7 +778,7 @@ xfs_setattr_nonsize( * in the transaction. */ if (!uid_eq(iuid, uid)) { - if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) { + if (XFS_IS_UQUOTA_ON(mp)) { ASSERT(mask & ATTR_UID); ASSERT(udqp); olddquot1 = xfs_qm_vop_chown(tp, ip, @@ -787,8 +787,8 @@ xfs_setattr_nonsize( inode->i_uid = uid; } if (!gid_eq(igid, gid)) { - if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { - ASSERT(xfs_sb_version_has_pquotino(&mp->m_sb) || + if (XFS_IS_GQUOTA_ON(mp)) { + ASSERT(xfs_has_pquotino(mp) || !XFS_IS_PQUOTA_ON(mp)); ASSERT(mask & ATTR_GID); ASSERT(gdqp); @@ -808,7 +808,7 @@ xfs_setattr_nonsize( XFS_STATS_INC(mp, xs_ig_attrchg); - if (mp->m_flags & XFS_MOUNT_WSYNC) + if (xfs_has_wsync(mp)) xfs_trans_set_sync(tp); error = xfs_trans_commit(tp); @@ -1037,7 +1037,7 @@ xfs_setattr_size( XFS_STATS_INC(mp, xs_ig_attrchg); - if (mp->m_flags & XFS_MOUNT_WSYNC) + if (xfs_has_wsync(mp)) xfs_trans_set_sync(tp); error = xfs_trans_commit(tp); @@ -1287,11 +1287,11 @@ xfs_inode_should_enable_dax( { if (!IS_ENABLED(CONFIG_FS_DAX)) return false; - if (ip->i_mount->m_flags & XFS_MOUNT_DAX_NEVER) + if (xfs_has_dax_never(ip->i_mount)) return false; if (!xfs_inode_supports_dax(ip)) return false; - if (ip->i_mount->m_flags & XFS_MOUNT_DAX_ALWAYS) + if (xfs_has_dax_always(ip->i_mount)) return true; if (ip->i_diflags2 & XFS_DIFLAG2_DAX) return true; @@ -1344,7 +1344,7 @@ xfs_setup_inode( gfp_t gfp_mask; inode->i_ino = ip->i_ino; - inode->i_state = I_NEW; + inode->i_state |= I_NEW; inode_sb_list_add(inode); /* make the inode look hashed for the writeback code */ @@ -1401,7 +1401,7 @@ xfs_setup_iops( inode->i_mapping->a_ops = &xfs_address_space_operations; break; case S_IFDIR: - if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) + if (xfs_has_asciici(XFS_M(inode->i_sb))) inode->i_op = &xfs_dir_ci_inode_operations; else inode->i_op = &xfs_dir_inode_operations; diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index f331975a16de..c08c79d9e311 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -19,6 +19,7 @@ #include "xfs_error.h" #include "xfs_icache.h" #include "xfs_health.h" +#include "xfs_trans.h" /* * Bulk Stat @@ -107,7 +108,7 @@ xfs_bulkstat_one_int( buf->bs_forkoff = XFS_IFORK_BOFF(ip); buf->bs_version = XFS_BULKSTAT_VERSION_V5; - if (xfs_sb_version_has_v3inode(&mp->m_sb)) { + if (xfs_has_v3inodes(mp)) { buf->bs_btime = ip->i_crtime.tv_sec; buf->bs_btime_nsec = ip->i_crtime.tv_nsec; if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) @@ -163,6 +164,7 @@ xfs_bulkstat_one( .formatter = formatter, .breq = breq, }; + struct xfs_trans *tp; int error; if (breq->mnt_userns != &init_user_ns) { @@ -178,9 +180,18 @@ xfs_bulkstat_one( if (!bc.buf) return -ENOMEM; - error = xfs_bulkstat_one_int(breq->mp, breq->mnt_userns, NULL, - breq->startino, &bc); + /* + * Grab an empty transaction so that we can use its recursive buffer + * locking abilities to detect cycles in the inobt without deadlocking. + */ + error = xfs_trans_alloc_empty(breq->mp, &tp); + if (error) + goto out; + error = xfs_bulkstat_one_int(breq->mp, breq->mnt_userns, tp, + breq->startino, &bc); + xfs_trans_cancel(tp); +out: kmem_free(bc.buf); /* @@ -244,6 +255,7 @@ xfs_bulkstat( .formatter = formatter, .breq = breq, }; + struct xfs_trans *tp; int error; if (breq->mnt_userns != &init_user_ns) { @@ -259,9 +271,18 @@ xfs_bulkstat( if (!bc.buf) return -ENOMEM; - error = xfs_iwalk(breq->mp, NULL, breq->startino, breq->flags, - xfs_bulkstat_iwalk, breq->icount, &bc); + /* + * Grab an empty transaction so that we can use its recursive buffer + * locking abilities to detect cycles in the inobt without deadlocking. + */ + error = xfs_trans_alloc_empty(breq->mp, &tp); + if (error) + goto out; + error = xfs_iwalk(breq->mp, tp, breq->startino, breq->flags, + xfs_bulkstat_iwalk, breq->icount, &bc); + xfs_trans_cancel(tp); +out: kmem_free(bc.buf); /* @@ -374,13 +395,24 @@ xfs_inumbers( .formatter = formatter, .breq = breq, }; + struct xfs_trans *tp; int error = 0; if (xfs_bulkstat_already_done(breq->mp, breq->startino)) return 0; - error = xfs_inobt_walk(breq->mp, NULL, breq->startino, breq->flags, + /* + * Grab an empty transaction so that we can use its recursive buffer + * locking abilities to detect cycles in the inobt without deadlocking. + */ + error = xfs_trans_alloc_empty(breq->mp, &tp); + if (error) + goto out; + + error = xfs_inobt_walk(breq->mp, tp, breq->startino, breq->flags, xfs_inumbers_walk, breq->icount, &ic); + xfs_trans_cancel(tp); +out: /* * We found some inode groups, so clear the error status and return diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c index 917d51eefee3..7558486f4937 100644 --- a/fs/xfs/xfs_iwalk.c +++ b/fs/xfs/xfs_iwalk.c @@ -83,6 +83,9 @@ struct xfs_iwalk_ag { /* Skip empty inobt records? */ unsigned int skip_empty:1; + + /* Drop the (hopefully empty) transaction when calling iwalk_fn. */ + unsigned int drop_trans:1; }; /* @@ -352,7 +355,6 @@ xfs_iwalk_run_callbacks( int *has_more) { struct xfs_mount *mp = iwag->mp; - struct xfs_trans *tp = iwag->tp; struct xfs_inobt_rec_incore *irec; xfs_agino_t next_agino; int error; @@ -362,10 +364,15 @@ xfs_iwalk_run_callbacks( ASSERT(iwag->nr_recs > 0); /* Delete cursor but remember the last record we cached... */ - xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0); + xfs_iwalk_del_inobt(iwag->tp, curpp, agi_bpp, 0); irec = &iwag->recs[iwag->nr_recs - 1]; ASSERT(next_agino >= irec->ir_startino + XFS_INODES_PER_CHUNK); + if (iwag->drop_trans) { + xfs_trans_cancel(iwag->tp); + iwag->tp = NULL; + } + error = xfs_iwalk_ag_recs(iwag); if (error) return error; @@ -376,8 +383,15 @@ xfs_iwalk_run_callbacks( if (!has_more) return 0; + if (iwag->drop_trans) { + error = xfs_trans_alloc_empty(mp, &iwag->tp); + if (error) + return error; + } + /* ...and recreate the cursor just past where we left off. */ - error = xfs_inobt_cur(mp, tp, iwag->pag, XFS_BTNUM_INO, curpp, agi_bpp); + error = xfs_inobt_cur(mp, iwag->tp, iwag->pag, XFS_BTNUM_INO, curpp, + agi_bpp); if (error) return error; @@ -390,7 +404,6 @@ xfs_iwalk_ag( struct xfs_iwalk_ag *iwag) { struct xfs_mount *mp = iwag->mp; - struct xfs_trans *tp = iwag->tp; struct xfs_perag *pag = iwag->pag; struct xfs_buf *agi_bp = NULL; struct xfs_btree_cur *cur = NULL; @@ -469,7 +482,7 @@ xfs_iwalk_ag( error = xfs_iwalk_run_callbacks(iwag, &cur, &agi_bp, &has_more); out: - xfs_iwalk_del_inobt(tp, &cur, &agi_bp, error); + xfs_iwalk_del_inobt(iwag->tp, &cur, &agi_bp, error); return error; } @@ -599,8 +612,18 @@ xfs_iwalk_ag_work( error = xfs_iwalk_alloc(iwag); if (error) goto out; + /* + * Grab an empty transaction so that we can use its recursive buffer + * locking abilities to detect cycles in the inobt without deadlocking. + */ + error = xfs_trans_alloc_empty(mp, &iwag->tp); + if (error) + goto out; + iwag->drop_trans = 1; error = xfs_iwalk_ag(iwag); + if (iwag->tp) + xfs_trans_cancel(iwag->tp); xfs_iwalk_free(iwag); out: xfs_perag_put(iwag->pag); diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 60ac5fd63f1e..f6cd2d4aa770 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -41,6 +41,8 @@ xlog_dealloc_log( /* local state machine functions */ STATIC void xlog_state_done_syncing( struct xlog_in_core *iclog); +STATIC void xlog_state_do_callback( + struct xlog *log); STATIC int xlog_state_get_iclog_space( struct xlog *log, @@ -50,11 +52,6 @@ xlog_state_get_iclog_space( int *continued_write, int *logoffsetp); STATIC void -xlog_state_switch_iclogs( - struct xlog *log, - struct xlog_in_core *iclog, - int eventual_size); -STATIC void xlog_grant_push_ail( struct xlog *log, int need_bytes); @@ -246,7 +243,7 @@ xlog_grant_head_wait( list_add_tail(&tic->t_queue, &head->waiters); do { - if (XLOG_FORCED_SHUTDOWN(log)) + if (xlog_is_shutdown(log)) goto shutdown; xlog_grant_push_ail(log, need_bytes); @@ -260,7 +257,7 @@ xlog_grant_head_wait( trace_xfs_log_grant_wake(log, tic); spin_lock(&head->lock); - if (XLOG_FORCED_SHUTDOWN(log)) + if (xlog_is_shutdown(log)) goto shutdown; } while (xlog_space_left(log, &head->grant) < need_bytes); @@ -298,7 +295,7 @@ xlog_grant_head_check( int free_bytes; int error = 0; - ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY)); + ASSERT(!xlog_in_recovery(log)); /* * If there are other waiters on the queue then give them a chance at @@ -359,13 +356,13 @@ xfs_log_writable( * mounts allow internal writes for log recovery and unmount purposes, * so don't restrict that case. */ - if (mp->m_flags & XFS_MOUNT_NORECOVERY) + if (xfs_has_norecovery(mp)) return false; if (xfs_readonly_buftarg(mp->m_ddev_targp)) return false; if (xfs_readonly_buftarg(mp->m_log->l_targ)) return false; - if (XFS_FORCED_SHUTDOWN(mp)) + if (xlog_is_shutdown(mp->m_log)) return false; return true; } @@ -382,7 +379,7 @@ xfs_log_regrant( int need_bytes; int error = 0; - if (XLOG_FORCED_SHUTDOWN(log)) + if (xlog_is_shutdown(log)) return -EIO; XFS_STATS_INC(mp, xs_try_logspace); @@ -450,7 +447,7 @@ xfs_log_reserve( ASSERT(client == XFS_TRANSACTION || client == XFS_LOG); - if (XLOG_FORCED_SHUTDOWN(log)) + if (xlog_is_shutdown(log)) return -EIO; XFS_STATS_INC(mp, xs_try_logspace); @@ -487,6 +484,42 @@ out_error: } /* + * Run all the pending iclog callbacks and wake log force waiters and iclog + * space waiters so they can process the newly set shutdown state. We really + * don't care what order we process callbacks here because the log is shut down + * and so state cannot change on disk anymore. + * + * We avoid processing actively referenced iclogs so that we don't run callbacks + * while the iclog owner might still be preparing the iclog for IO submssion. + * These will be caught by xlog_state_iclog_release() and call this function + * again to process any callbacks that may have been added to that iclog. + */ +static void +xlog_state_shutdown_callbacks( + struct xlog *log) +{ + struct xlog_in_core *iclog; + LIST_HEAD(cb_list); + + spin_lock(&log->l_icloglock); + iclog = log->l_iclog; + do { + if (atomic_read(&iclog->ic_refcnt)) { + /* Reference holder will re-run iclog callbacks. */ + continue; + } + list_splice_init(&iclog->ic_callbacks, &cb_list); + wake_up_all(&iclog->ic_write_wait); + wake_up_all(&iclog->ic_force_wait); + } while ((iclog = iclog->ic_next) != log->l_iclog); + + wake_up_all(&log->l_flush_wait); + spin_unlock(&log->l_icloglock); + + xlog_cil_process_committed(&cb_list); +} + +/* * Flush iclog to disk if this is the last reference to the given iclog and the * it is in the WANT_SYNC state. * @@ -520,12 +553,11 @@ xlog_state_release_iclog( xfs_lsn_t old_tail_lsn) { xfs_lsn_t tail_lsn; + bool last_ref; + lockdep_assert_held(&log->l_icloglock); trace_xlog_iclog_release(iclog, _RET_IP_); - if (iclog->ic_state == XLOG_STATE_IOERROR) - return -EIO; - /* * Grabbing the current log tail needs to be atomic w.r.t. the writing * of the tail LSN into the iclog so we guarantee that the log tail does @@ -543,7 +575,23 @@ xlog_state_release_iclog( iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); } - if (!atomic_dec_and_test(&iclog->ic_refcnt)) + last_ref = atomic_dec_and_test(&iclog->ic_refcnt); + + if (xlog_is_shutdown(log)) { + /* + * If there are no more references to this iclog, process the + * pending iclog callbacks that were waiting on the release of + * this iclog. + */ + if (last_ref) { + spin_unlock(&log->l_icloglock); + xlog_state_shutdown_callbacks(log); + spin_lock(&log->l_icloglock); + } + return -EIO; + } + + if (!last_ref) return 0; if (iclog->ic_state != XLOG_STATE_WANT_SYNC) { @@ -580,25 +628,27 @@ xfs_log_mount( xfs_daddr_t blk_offset, int num_bblks) { - bool fatal = xfs_sb_version_hascrc(&mp->m_sb); + struct xlog *log; + bool fatal = xfs_has_crc(mp); int error = 0; int min_logfsbs; - if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) { + if (!xfs_has_norecovery(mp)) { xfs_notice(mp, "Mounting V%d Filesystem", XFS_SB_VERSION_NUM(&mp->m_sb)); } else { xfs_notice(mp, "Mounting V%d filesystem in no-recovery mode. Filesystem will be inconsistent.", XFS_SB_VERSION_NUM(&mp->m_sb)); - ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); + ASSERT(xfs_is_readonly(mp)); } - mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks); - if (IS_ERR(mp->m_log)) { - error = PTR_ERR(mp->m_log); + log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks); + if (IS_ERR(log)) { + error = PTR_ERR(log); goto out; } + mp->m_log = log; /* * Validate the given log space and drop a critical message via syslog @@ -663,51 +713,51 @@ xfs_log_mount( xfs_warn(mp, "AIL initialisation failed: error %d", error); goto out_free_log; } - mp->m_log->l_ailp = mp->m_ail; + log->l_ailp = mp->m_ail; /* * skip log recovery on a norecovery mount. pretend it all * just worked. */ - if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) { - int readonly = (mp->m_flags & XFS_MOUNT_RDONLY); - - if (readonly) - mp->m_flags &= ~XFS_MOUNT_RDONLY; - - error = xlog_recover(mp->m_log); - + if (!xfs_has_norecovery(mp)) { + /* + * log recovery ignores readonly state and so we need to clear + * mount-based read only state so it can write to disk. + */ + bool readonly = test_and_clear_bit(XFS_OPSTATE_READONLY, + &mp->m_opstate); + error = xlog_recover(log); if (readonly) - mp->m_flags |= XFS_MOUNT_RDONLY; + set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); if (error) { xfs_warn(mp, "log mount/recovery failed: error %d", error); - xlog_recover_cancel(mp->m_log); + xlog_recover_cancel(log); goto out_destroy_ail; } } - error = xfs_sysfs_init(&mp->m_log->l_kobj, &xfs_log_ktype, &mp->m_kobj, + error = xfs_sysfs_init(&log->l_kobj, &xfs_log_ktype, &mp->m_kobj, "log"); if (error) goto out_destroy_ail; /* Normal transactions can now occur */ - mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY; + clear_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate); /* * Now the log has been fully initialised and we know were our * space grant counters are, we can initialise the permanent ticket * needed for delayed logging to work. */ - xlog_cil_init_post_recovery(mp->m_log); + xlog_cil_init_post_recovery(log); return 0; out_destroy_ail: xfs_trans_ail_destroy(mp); out_free_log: - xlog_dealloc_log(mp->m_log); + xlog_dealloc_log(log); out: return error; } @@ -726,19 +776,22 @@ int xfs_log_mount_finish( struct xfs_mount *mp) { - int error = 0; - bool readonly = (mp->m_flags & XFS_MOUNT_RDONLY); - bool recovered = mp->m_log->l_flags & XLOG_RECOVERY_NEEDED; + struct xlog *log = mp->m_log; + bool readonly; + int error = 0; - if (mp->m_flags & XFS_MOUNT_NORECOVERY) { - ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); + if (xfs_has_norecovery(mp)) { + ASSERT(xfs_is_readonly(mp)); return 0; - } else if (readonly) { - /* Allow unlinked processing to proceed */ - mp->m_flags &= ~XFS_MOUNT_RDONLY; } /* + * log recovery ignores readonly state and so we need to clear + * mount-based read only state so it can write to disk. + */ + readonly = test_and_clear_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); + + /* * During the second phase of log recovery, we need iget and * iput to behave like they do for an active filesystem. * xfs_fs_drop_inode needs to be able to prevent the deletion @@ -759,7 +812,8 @@ xfs_log_mount_finish( * mount failure occurs. */ mp->m_super->s_flags |= SB_ACTIVE; - error = xlog_recover_finish(mp->m_log); + if (xlog_recovery_needed(log)) + error = xlog_recover_finish(log); if (!error) xfs_log_work_queue(mp); mp->m_super->s_flags &= ~SB_ACTIVE; @@ -774,17 +828,24 @@ xfs_log_mount_finish( * Don't push in the error case because the AIL may have pending intents * that aren't removed until recovery is cancelled. */ - if (!error && recovered) { - xfs_log_force(mp, XFS_LOG_SYNC); - xfs_ail_push_all_sync(mp->m_ail); + if (xlog_recovery_needed(log)) { + if (!error) { + xfs_log_force(mp, XFS_LOG_SYNC); + xfs_ail_push_all_sync(mp->m_ail); + } + xfs_notice(mp, "Ending recovery (logdev: %s)", + mp->m_logname ? mp->m_logname : "internal"); + } else { + xfs_info(mp, "Ending clean mount"); } xfs_buftarg_drain(mp->m_ddev_targp); + clear_bit(XLOG_RECOVERY_NEEDED, &log->l_opstate); if (readonly) - mp->m_flags |= XFS_MOUNT_RDONLY; + set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); /* Make sure the log is dead if we're returning failure. */ - ASSERT(!error || (mp->m_log->l_flags & XLOG_IO_ERROR)); + ASSERT(!error || xlog_is_shutdown(log)); return error; } @@ -830,7 +891,7 @@ xlog_wait_on_iclog( struct xlog *log = iclog->ic_log; trace_xlog_iclog_wait_on(iclog, _RET_IP_); - if (!XLOG_FORCED_SHUTDOWN(log) && + if (!xlog_is_shutdown(log) && iclog->ic_state != XLOG_STATE_ACTIVE && iclog->ic_state != XLOG_STATE_DIRTY) { XFS_STATS_INC(log->l_mp, xs_log_force_sleep); @@ -839,7 +900,7 @@ xlog_wait_on_iclog( spin_unlock(&log->l_icloglock); } - if (XLOG_FORCED_SHUTDOWN(log)) + if (xlog_is_shutdown(log)) return -EIO; return 0; } @@ -870,7 +931,7 @@ xlog_write_unmount_record( /* account for space used by record data */ ticket->t_curr_res -= sizeof(ulf); - return xlog_write(log, &vec, ticket, NULL, NULL, XLOG_UNMOUNT_TRANS); + return xlog_write(log, NULL, &vec, ticket, XLOG_UNMOUNT_TRANS); } /* @@ -893,7 +954,7 @@ xlog_unmount_write( error = xlog_write_unmount_record(log, tic); /* * At this point, we're umounting anyway, so there's no point in - * transitioning log state to IOERROR. Just continue... + * transitioning log state to shutdown. Just continue... */ out_err: if (error) @@ -940,7 +1001,7 @@ xfs_log_unmount_write( xfs_log_force(mp, XFS_LOG_SYNC); - if (XLOG_FORCED_SHUTDOWN(log)) + if (xlog_is_shutdown(log)) return; /* @@ -972,6 +1033,20 @@ int xfs_log_quiesce( struct xfs_mount *mp) { + /* + * Clear log incompat features since we're quiescing the log. Report + * failures, though it's not fatal to have a higher log feature + * protection level than the log contents actually require. + */ + if (xfs_clear_incompat_log_features(mp)) { + int error; + + error = xfs_sync_sb(mp, false); + if (error) + xfs_warn(mp, + "Failed to clear log incompat features on quiesce"); + } + cancel_delayed_work_sync(&mp->m_log->l_work); xfs_log_force(mp, XFS_LOG_SYNC); @@ -1049,11 +1124,11 @@ xfs_log_space_wake( struct xlog *log = mp->m_log; int free_bytes; - if (XLOG_FORCED_SHUTDOWN(log)) + if (xlog_is_shutdown(log)) return; if (!list_empty_careful(&log->l_write_head.waiters)) { - ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY)); + ASSERT(!xlog_in_recovery(log)); spin_lock(&log->l_write_head.lock); free_bytes = xlog_space_left(log, &log->l_write_head.grant); @@ -1062,7 +1137,7 @@ xfs_log_space_wake( } if (!list_empty_careful(&log->l_reserve_head.waiters)) { - ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY)); + ASSERT(!xlog_in_recovery(log)); spin_lock(&log->l_reserve_head.lock); free_bytes = xlog_space_left(log, &log->l_reserve_head.grant); @@ -1140,7 +1215,7 @@ xfs_log_cover( ASSERT((xlog_cil_empty(mp->m_log) && xlog_iclogs_empty(mp->m_log) && !xfs_ail_min_lsn(mp->m_log->l_ailp)) || - XFS_FORCED_SHUTDOWN(mp)); + xlog_is_shutdown(mp->m_log)); if (!xfs_log_writable(mp)) return 0; @@ -1157,7 +1232,7 @@ xfs_log_cover( * handles this for us. */ need_covered = xfs_log_need_covered(mp); - if (!need_covered && !xfs_sb_version_haslazysbcount(&mp->m_sb)) + if (!need_covered && !xfs_has_lazysbcount(mp)) return 0; /* @@ -1230,16 +1305,18 @@ xlog_assign_tail_lsn( * wrap the tail, we should blow up. Rather than catch this case here, * we depend on other ASSERTions in other parts of the code. XXXmiken * - * This code also handles the case where the reservation head is behind - * the tail. The details of this case are described below, but the end - * result is that we return the size of the log as the amount of space left. + * If reservation head is behind the tail, we have a problem. Warn about it, + * but then treat it as if the log is empty. + * + * If the log is shut down, the head and tail may be invalid or out of whack, so + * shortcut invalidity asserts in this case so that we don't trigger them + * falsely. */ STATIC int xlog_space_left( struct xlog *log, atomic64_t *head) { - int free_bytes; int tail_bytes; int tail_cycle; int head_cycle; @@ -1249,29 +1326,30 @@ xlog_space_left( xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_bytes); tail_bytes = BBTOB(tail_bytes); if (tail_cycle == head_cycle && head_bytes >= tail_bytes) - free_bytes = log->l_logsize - (head_bytes - tail_bytes); - else if (tail_cycle + 1 < head_cycle) + return log->l_logsize - (head_bytes - tail_bytes); + if (tail_cycle + 1 < head_cycle) return 0; - else if (tail_cycle < head_cycle) { + + /* Ignore potential inconsistency when shutdown. */ + if (xlog_is_shutdown(log)) + return log->l_logsize; + + if (tail_cycle < head_cycle) { ASSERT(tail_cycle == (head_cycle - 1)); - free_bytes = tail_bytes - head_bytes; - } else { - /* - * The reservation head is behind the tail. - * In this case we just want to return the size of the - * log as the amount of space left. - */ - xfs_alert(log->l_mp, "xlog_space_left: head behind tail"); - xfs_alert(log->l_mp, - " tail_cycle = %d, tail_bytes = %d", - tail_cycle, tail_bytes); - xfs_alert(log->l_mp, - " GH cycle = %d, GH bytes = %d", - head_cycle, head_bytes); - ASSERT(0); - free_bytes = log->l_logsize; + return tail_bytes - head_bytes; } - return free_bytes; + + /* + * The reservation head is behind the tail. In this case we just want to + * return the size of the log as the amount of space left. + */ + xfs_alert(log->l_mp, "xlog_space_left: head behind tail"); + xfs_alert(log->l_mp, " tail_cycle = %d, tail_bytes = %d", + tail_cycle, tail_bytes); + xfs_alert(log->l_mp, " GH cycle = %d, GH bytes = %d", + head_cycle, head_bytes); + ASSERT(0); + return log->l_logsize; } @@ -1349,6 +1427,32 @@ xfs_log_work_queue( } /* + * Clear the log incompat flags if we have the opportunity. + * + * This only happens if we're about to log the second dummy transaction as part + * of covering the log and we can get the log incompat feature usage lock. + */ +static inline void +xlog_clear_incompat( + struct xlog *log) +{ + struct xfs_mount *mp = log->l_mp; + + if (!xfs_sb_has_incompat_log_feature(&mp->m_sb, + XFS_SB_FEAT_INCOMPAT_LOG_ALL)) + return; + + if (log->l_covered_state != XLOG_STATE_COVER_DONE2) + return; + + if (!down_write_trylock(&log->l_incompat_users)) + return; + + xfs_clear_incompat_log_features(mp); + up_write(&log->l_incompat_users); +} + +/* * Every sync period we need to unpin all items in the AIL and push them to * disk. If there is nothing dirty, then we might need to cover the log to * indicate that the filesystem is idle. @@ -1374,6 +1478,7 @@ xfs_log_worker( * synchronously log the superblock instead to ensure the * superblock is immediately unpinned and can be written back. */ + xlog_clear_incompat(log); xfs_sync_sb(mp, true); } else xfs_log_force(mp, 0); @@ -1417,7 +1522,7 @@ xlog_alloc_log( log->l_logBBstart = blk_offset; log->l_logBBsize = num_bblks; log->l_covered_state = XLOG_STATE_COVER_IDLE; - log->l_flags |= XLOG_ACTIVE_RECOVERY; + set_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate); INIT_DELAYED_WORK(&log->l_work, xfs_log_worker); log->l_prev_block = -1; @@ -1426,7 +1531,7 @@ xlog_alloc_log( xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0); log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ - if (xfs_sb_version_haslogv2(&mp->m_sb) && mp->m_sb.sb_logsunit > 1) + if (xfs_has_logv2(mp) && mp->m_sb.sb_logsunit > 1) log->l_iclog_roundoff = mp->m_sb.sb_logsunit; else log->l_iclog_roundoff = BBSIZE; @@ -1435,7 +1540,7 @@ xlog_alloc_log( xlog_grant_head_init(&log->l_write_head); error = -EFSCORRUPTED; - if (xfs_sb_version_hassector(&mp->m_sb)) { + if (xfs_has_sector(mp)) { log2_size = mp->m_sb.sb_logsectlog; if (log2_size < BBSHIFT) { xfs_warn(mp, "Log sector size too small (0x%x < 0x%x)", @@ -1452,7 +1557,7 @@ xlog_alloc_log( /* for larger sector sizes, must have v2 or external log */ if (log2_size && log->l_logBBstart > 0 && - !xfs_sb_version_haslogv2(&mp->m_sb)) { + !xfs_has_logv2(mp)) { xfs_warn(mp, "log sector size (0x%x) invalid for configuration.", log2_size); @@ -1461,6 +1566,8 @@ xlog_alloc_log( } log->l_sectBBsize = 1 << log2_size; + init_rwsem(&log->l_incompat_users); + xlog_get_iclog_buffer_size(mp, log); spin_lock_init(&log->l_icloglock); @@ -1476,7 +1583,6 @@ xlog_alloc_log( */ ASSERT(log->l_iclog_size >= 4096); for (i = 0; i < log->l_iclog_bufs; i++) { - int align_mask = xfs_buftarg_dma_alignment(mp->m_logdev_targp); size_t bvec_size = howmany(log->l_iclog_size, PAGE_SIZE) * sizeof(struct bio_vec); @@ -1488,8 +1594,8 @@ xlog_alloc_log( iclog->ic_prev = prev_iclog; prev_iclog = iclog; - iclog->ic_data = kmem_alloc_io(log->l_iclog_size, align_mask, - KM_MAYFAIL | KM_ZERO); + iclog->ic_data = kvzalloc(log->l_iclog_size, + GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!iclog->ic_data) goto out_free_iclog; #ifdef DEBUG @@ -1499,7 +1605,7 @@ xlog_alloc_log( memset(head, 0, sizeof(xlog_rec_header_t)); head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); head->h_version = cpu_to_be32( - xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1); + xfs_has_logv2(log->l_mp) ? 2 : 1); head->h_size = cpu_to_be32(log->l_iclog_size); /* new fields */ head->h_fmt = cpu_to_be32(XLOG_FMT); @@ -1551,37 +1657,6 @@ out: } /* xlog_alloc_log */ /* - * Write out the commit record of a transaction associated with the given - * ticket to close off a running log write. Return the lsn of the commit record. - */ -int -xlog_commit_record( - struct xlog *log, - struct xlog_ticket *ticket, - struct xlog_in_core **iclog, - xfs_lsn_t *lsn) -{ - struct xfs_log_iovec reg = { - .i_addr = NULL, - .i_len = 0, - .i_type = XLOG_REG_TYPE_COMMIT, - }; - struct xfs_log_vec vec = { - .lv_niovecs = 1, - .lv_iovecp = ®, - }; - int error; - - if (XLOG_FORCED_SHUTDOWN(log)) - return -EIO; - - error = xlog_write(log, &vec, ticket, lsn, iclog, XLOG_COMMIT_TRANS); - if (error) - xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); - return error; -} - -/* * Compute the LSN that we'd need to push the log tail towards in order to have * (a) enough on-disk log space to log the number of bytes specified, (b) at * least 25% of the log space free, and (c) at least 256 blocks free. If the @@ -1653,7 +1728,7 @@ xlog_grant_push_ail( xfs_lsn_t threshold_lsn; threshold_lsn = xlog_grant_push_threshold(log, need_bytes); - if (threshold_lsn == NULLCOMMITLSN || XLOG_FORCED_SHUTDOWN(log)) + if (threshold_lsn == NULLCOMMITLSN || xlog_is_shutdown(log)) return; /* @@ -1689,7 +1764,7 @@ xlog_pack_data( dp += BBSIZE; } - if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { + if (xfs_has_logv2(log->l_mp)) { xlog_in_core_2_t *xhdr = iclog->ic_data; for ( ; i < BTOBB(size); i++) { @@ -1726,7 +1801,7 @@ xlog_cksum( offsetof(struct xlog_rec_header, h_crc)); /* ... then for additional cycle data for v2 logs ... */ - if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { + if (xfs_has_logv2(log->l_mp)) { union xlog_in_core2 *xhdr = (union xlog_in_core2 *)rhead; int i; int xheads; @@ -1795,7 +1870,7 @@ xlog_write_iclog( * across the log IO to archieve that. */ down(&iclog->ic_sema); - if (unlikely(iclog->ic_state == XLOG_STATE_IOERROR)) { + if (xlog_is_shutdown(log)) { /* * It would seem logical to return EIO here, but we rely on * the log state machine to propagate I/O errors instead of @@ -1953,7 +2028,7 @@ xlog_sync( /* real byte length */ size = iclog->ic_offset; - if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) + if (xfs_has_logv2(log->l_mp)) size += roundoff; iclog->ic_header.h_len = cpu_to_be32(size); @@ -2303,8 +2378,7 @@ xlog_write_copy_finish( int *data_cnt, int *partial_copy, int *partial_copy_len, - int log_offset, - struct xlog_in_core **commit_iclog) + int log_offset) { int error; @@ -2323,27 +2397,20 @@ xlog_write_copy_finish( *partial_copy = 0; *partial_copy_len = 0; - if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) { - /* no more space in this iclog - push it. */ - spin_lock(&log->l_icloglock); - xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt); - *record_cnt = 0; - *data_cnt = 0; - - if (iclog->ic_state == XLOG_STATE_ACTIVE) - xlog_state_switch_iclogs(log, iclog, 0); - else - ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC || - iclog->ic_state == XLOG_STATE_IOERROR); - if (!commit_iclog) - goto release_iclog; - spin_unlock(&log->l_icloglock); - ASSERT(flags & XLOG_COMMIT_TRANS); - *commit_iclog = iclog; - } + if (iclog->ic_size - log_offset > sizeof(xlog_op_header_t)) + return 0; - return 0; + /* no more space in this iclog - push it. */ + spin_lock(&log->l_icloglock); + xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt); + *record_cnt = 0; + *data_cnt = 0; + if (iclog->ic_state == XLOG_STATE_ACTIVE) + xlog_state_switch_iclogs(log, iclog, 0); + else + ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC || + xlog_is_shutdown(log)); release_iclog: error = xlog_state_release_iclog(log, iclog, 0); spin_unlock(&log->l_icloglock); @@ -2393,10 +2460,9 @@ release_iclog: int xlog_write( struct xlog *log, + struct xfs_cil_ctx *ctx, struct xfs_log_vec *log_vector, struct xlog_ticket *ticket, - xfs_lsn_t *start_lsn, - struct xlog_in_core **commit_iclog, uint optype) { struct xlog_in_core *iclog = NULL; @@ -2426,8 +2492,6 @@ xlog_write( } len = xlog_write_calc_vec_length(ticket, log_vector, optype); - if (start_lsn) - *start_lsn = 0; while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) { void *ptr; int log_offset; @@ -2440,9 +2504,15 @@ xlog_write( ASSERT(log_offset <= iclog->ic_size - 1); ptr = iclog->ic_datap + log_offset; - /* Start_lsn is the first lsn written to. */ - if (start_lsn && !*start_lsn) - *start_lsn = be64_to_cpu(iclog->ic_header.h_lsn); + /* + * If we have a context pointer, pass it the first iclog we are + * writing to so it can record state needed for iclog write + * ordering. + */ + if (ctx) { + xlog_cil_set_ctx_write_state(ctx, iclog); + ctx = NULL; + } /* * This loop writes out as many regions as can fit in the amount @@ -2521,8 +2591,7 @@ xlog_write( &record_cnt, &data_cnt, &partial_copy, &partial_copy_len, - log_offset, - commit_iclog); + log_offset); if (error) return error; @@ -2560,12 +2629,7 @@ next_lv: spin_lock(&log->l_icloglock); xlog_state_finish_copy(log, iclog, record_cnt, data_cnt); - if (commit_iclog) { - ASSERT(optype & XLOG_COMMIT_TRANS); - *commit_iclog = iclog; - } else { - error = xlog_state_release_iclog(log, iclog, 0); - } + error = xlog_state_release_iclog(log, iclog, 0); spin_unlock(&log->l_icloglock); return error; @@ -2751,8 +2815,7 @@ xlog_state_set_callback( static bool xlog_state_iodone_process_iclog( struct xlog *log, - struct xlog_in_core *iclog, - bool *ioerror) + struct xlog_in_core *iclog) { xfs_lsn_t lowest_lsn; xfs_lsn_t header_lsn; @@ -2764,15 +2827,6 @@ xlog_state_iodone_process_iclog( * Skip all iclogs in the ACTIVE & DIRTY states: */ return false; - case XLOG_STATE_IOERROR: - /* - * Between marking a filesystem SHUTDOWN and stopping the log, - * we do flush all iclogs to disk (if there wasn't a log I/O - * error). So, we do want things to go smoothly in case of just - * a SHUTDOWN w/o a LOG_IO_ERROR. - */ - *ioerror = true; - return false; case XLOG_STATE_DONE_SYNC: /* * Now that we have an iclog that is in the DONE_SYNC state, do @@ -2796,72 +2850,75 @@ xlog_state_iodone_process_iclog( } } -STATIC void -xlog_state_do_callback( +/* + * Loop over all the iclogs, running attached callbacks on them. Return true if + * we ran any callbacks, indicating that we dropped the icloglock. We don't need + * to handle transient shutdown state here at all because + * xlog_state_shutdown_callbacks() will be run to do the necessary shutdown + * cleanup of the callbacks. + */ +static bool +xlog_state_do_iclog_callbacks( struct xlog *log) + __releases(&log->l_icloglock) + __acquires(&log->l_icloglock) { - struct xlog_in_core *iclog; - struct xlog_in_core *first_iclog; - bool cycled_icloglock; - bool ioerror; - int flushcnt = 0; - int repeats = 0; + struct xlog_in_core *first_iclog = log->l_iclog; + struct xlog_in_core *iclog = first_iclog; + bool ran_callback = false; - spin_lock(&log->l_icloglock); do { - /* - * Scan all iclogs starting with the one pointed to by the - * log. Reset this starting point each time the log is - * unlocked (during callbacks). - * - * Keep looping through iclogs until one full pass is made - * without running any callbacks. - */ - first_iclog = log->l_iclog; - iclog = log->l_iclog; - cycled_icloglock = false; - ioerror = false; - repeats++; + LIST_HEAD(cb_list); - do { - LIST_HEAD(cb_list); + if (xlog_state_iodone_process_iclog(log, iclog)) + break; + if (iclog->ic_state != XLOG_STATE_CALLBACK) { + iclog = iclog->ic_next; + continue; + } + list_splice_init(&iclog->ic_callbacks, &cb_list); + spin_unlock(&log->l_icloglock); - if (xlog_state_iodone_process_iclog(log, iclog, - &ioerror)) - break; + trace_xlog_iclog_callbacks_start(iclog, _RET_IP_); + xlog_cil_process_committed(&cb_list); + trace_xlog_iclog_callbacks_done(iclog, _RET_IP_); + ran_callback = true; - if (iclog->ic_state != XLOG_STATE_CALLBACK && - iclog->ic_state != XLOG_STATE_IOERROR) { - iclog = iclog->ic_next; - continue; - } - list_splice_init(&iclog->ic_callbacks, &cb_list); - spin_unlock(&log->l_icloglock); + spin_lock(&log->l_icloglock); + xlog_state_clean_iclog(log, iclog); + iclog = iclog->ic_next; + } while (iclog != first_iclog); + + return ran_callback; +} - trace_xlog_iclog_callbacks_start(iclog, _RET_IP_); - xlog_cil_process_committed(&cb_list); - trace_xlog_iclog_callbacks_done(iclog, _RET_IP_); - cycled_icloglock = true; - spin_lock(&log->l_icloglock); - if (XLOG_FORCED_SHUTDOWN(log)) - wake_up_all(&iclog->ic_force_wait); - else - xlog_state_clean_iclog(log, iclog); - iclog = iclog->ic_next; - } while (first_iclog != iclog); +/* + * Loop running iclog completion callbacks until there are no more iclogs in a + * state that can run callbacks. + */ +STATIC void +xlog_state_do_callback( + struct xlog *log) +{ + int flushcnt = 0; + int repeats = 0; + + spin_lock(&log->l_icloglock); + while (xlog_state_do_iclog_callbacks(log)) { + if (xlog_is_shutdown(log)) + break; - if (repeats > 5000) { + if (++repeats > 5000) { flushcnt += repeats; repeats = 0; xfs_warn(log->l_mp, "%s: possible infinite loop (%d iterations)", __func__, flushcnt); } - } while (!ioerror && cycled_icloglock); + } - if (log->l_iclog->ic_state == XLOG_STATE_ACTIVE || - log->l_iclog->ic_state == XLOG_STATE_IOERROR) + if (log->l_iclog->ic_state == XLOG_STATE_ACTIVE) wake_up_all(&log->l_flush_wait); spin_unlock(&log->l_icloglock); @@ -2871,13 +2928,6 @@ xlog_state_do_callback( /* * Finish transitioning this iclog to the dirty state. * - * Make sure that we completely execute this routine only when this is - * the last call to the iclog. There is a good chance that iclog flushes, - * when we reach the end of the physical log, get turned into 2 separate - * calls to bwrite. Hence, one iclog flush could generate two calls to this - * routine. By using the reference count bwritecnt, we guarantee that only - * the second completion goes through. - * * Callbacks could take time, so they are done outside the scope of the * global state machine log lock. */ @@ -2896,7 +2946,7 @@ xlog_state_done_syncing( * split log writes, on the second, we shut down the file system and * no iclogs should ever be attempted to be written to disk again. */ - if (!XLOG_FORCED_SHUTDOWN(log)) { + if (!xlog_is_shutdown(log)) { ASSERT(iclog->ic_state == XLOG_STATE_SYNCING); iclog->ic_state = XLOG_STATE_DONE_SYNC; } @@ -2944,7 +2994,7 @@ xlog_state_get_iclog_space( restart: spin_lock(&log->l_icloglock); - if (XLOG_FORCED_SHUTDOWN(log)) { + if (xlog_is_shutdown(log)) { spin_unlock(&log->l_icloglock); return -EIO; } @@ -3122,7 +3172,7 @@ xfs_log_ticket_ungrant( * This routine will mark the current iclog in the ring as WANT_SYNC and move * the current iclog pointer to the next iclog in the ring. */ -STATIC void +void xlog_state_switch_iclogs( struct xlog *log, struct xlog_in_core *iclog, @@ -3237,10 +3287,10 @@ xfs_log_force( xlog_cil_force(log); spin_lock(&log->l_icloglock); - iclog = log->l_iclog; - if (iclog->ic_state == XLOG_STATE_IOERROR) + if (xlog_is_shutdown(log)) goto out_error; + iclog = log->l_iclog; trace_xlog_iclog_force(iclog, _RET_IP_); if (iclog->ic_state == XLOG_STATE_DIRTY || @@ -3294,6 +3344,20 @@ out_error: return -EIO; } +/* + * Force the log to a specific LSN. + * + * If an iclog with that lsn can be found: + * If it is in the DIRTY state, just return. + * If it is in the ACTIVE state, move the in-core log into the WANT_SYNC + * state and go to sleep or return. + * If it is in any other state, go to sleep or return. + * + * Synchronous forces are implemented with a wait queue. All callers trying + * to force a given lsn to disk must wait on the queue attached to the + * specific in-core log. When given in-core log finally completes its write + * to disk, that thread will wake up all threads waiting on the queue. + */ static int xlog_force_lsn( struct xlog *log, @@ -3306,10 +3370,10 @@ xlog_force_lsn( bool completed; spin_lock(&log->l_icloglock); - iclog = log->l_iclog; - if (iclog->ic_state == XLOG_STATE_IOERROR) + if (xlog_is_shutdown(log)) goto out_error; + iclog = log->l_iclog; while (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) { trace_xlog_iclog_force_lsn(iclog, _RET_IP_); iclog = iclog->ic_next; @@ -3379,18 +3443,13 @@ out_error: } /* - * Force the in-core log to disk for a specific LSN. - * - * Find in-core log with lsn. - * If it is in the DIRTY state, just return. - * If it is in the ACTIVE state, move the in-core log into the WANT_SYNC - * state and go to sleep or return. - * If it is in any other state, go to sleep or return. + * Force the log to a specific checkpoint sequence. * - * Synchronous forces are implemented with a wait queue. All callers trying - * to force a given lsn to disk must wait on the queue attached to the - * specific in-core log. When given in-core log finally completes its write - * to disk, that thread will wake up all threads waiting on the queue. + * First force the CIL so that all the required changes have been flushed to the + * iclogs. If the CIL force completed it will return a commit LSN that indicates + * the iclog that needs to be flushed to stable storage. If the caller needs + * a synchronous log force, we will wait on the iclog with the LSN returned by + * xlog_cil_force_seq() to be completed. */ int xfs_log_force_seq( @@ -3619,17 +3678,15 @@ xlog_verify_grant_tail( xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks); if (tail_cycle != cycle) { if (cycle - 1 != tail_cycle && - !(log->l_flags & XLOG_TAIL_WARN)) { + !test_and_set_bit(XLOG_TAIL_WARN, &log->l_opstate)) { xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, "%s: cycle - 1 != tail_cycle", __func__); - log->l_flags |= XLOG_TAIL_WARN; } if (space > BBTOB(tail_blocks) && - !(log->l_flags & XLOG_TAIL_WARN)) { + !test_and_set_bit(XLOG_TAIL_WARN, &log->l_opstate)) { xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, "%s: space > BBTOB(tail_blocks)", __func__); - log->l_flags |= XLOG_TAIL_WARN; } } } @@ -3765,105 +3822,66 @@ xlog_verify_iclog( #endif /* - * Mark all iclogs IOERROR. l_icloglock is held by the caller. - */ -STATIC int -xlog_state_ioerror( - struct xlog *log) -{ - xlog_in_core_t *iclog, *ic; - - iclog = log->l_iclog; - if (iclog->ic_state != XLOG_STATE_IOERROR) { - /* - * Mark all the incore logs IOERROR. - * From now on, no log flushes will result. - */ - ic = iclog; - do { - ic->ic_state = XLOG_STATE_IOERROR; - ic = ic->ic_next; - } while (ic != iclog); - return 0; - } - /* - * Return non-zero, if state transition has already happened. - */ - return 1; -} - -/* - * This is called from xfs_force_shutdown, when we're forcibly - * shutting down the filesystem, typically because of an IO error. + * Perform a forced shutdown on the log. This should be called once and once + * only by the high level filesystem shutdown code to shut the log subsystem + * down cleanly. + * * Our main objectives here are to make sure that: - * a. if !logerror, flush the logs to disk. Anything modified - * after this is ignored. - * b. the filesystem gets marked 'SHUTDOWN' for all interested - * parties to find out, 'atomically'. - * c. those who're sleeping on log reservations, pinned objects and - * other resources get woken up, and be told the bad news. - * d. nothing new gets queued up after (b) and (c) are done. + * a. if the shutdown was not due to a log IO error, flush the logs to + * disk. Anything modified after this is ignored. + * b. the log gets atomically marked 'XLOG_IO_ERROR' for all interested + * parties to find out. Nothing new gets queued after this is done. + * c. Tasks sleeping on log reservations, pinned objects and + * other resources get woken up. * - * Note: for the !logerror case we need to flush the regions held in memory out - * to disk first. This needs to be done before the log is marked as shutdown, - * otherwise the iclog writes will fail. + * Return true if the shutdown cause was a log IO error and we actually shut the + * log down. */ -int -xfs_log_force_umount( - struct xfs_mount *mp, - int logerror) +bool +xlog_force_shutdown( + struct xlog *log, + int shutdown_flags) { - struct xlog *log; - int retval; - - log = mp->m_log; + bool log_error = (shutdown_flags & SHUTDOWN_LOG_IO_ERROR); /* - * If this happens during log recovery, don't worry about - * locking; the log isn't open for business yet. + * If this happens during log recovery then we aren't using the runtime + * log mechanisms yet so there's nothing to shut down. */ - if (!log || - log->l_flags & XLOG_ACTIVE_RECOVERY) { - mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; - if (mp->m_sb_bp) - mp->m_sb_bp->b_flags |= XBF_DONE; - return 0; - } + if (!log || xlog_in_recovery(log)) + return false; - /* - * Somebody could've already done the hard work for us. - * No need to get locks for this. - */ - if (logerror && log->l_iclog->ic_state == XLOG_STATE_IOERROR) { - ASSERT(XLOG_FORCED_SHUTDOWN(log)); - return 1; - } + ASSERT(!xlog_is_shutdown(log)); /* * Flush all the completed transactions to disk before marking the log - * being shut down. We need to do it in this order to ensure that - * completed operations are safely on disk before we shut down, and that - * we don't have to issue any buffer IO after the shutdown flags are set - * to guarantee this. + * being shut down. We need to do this first as shutting down the log + * before the force will prevent the log force from flushing the iclogs + * to disk. + * + * Re-entry due to a log IO error shutdown during the log force is + * prevented by the atomicity of higher level shutdown code. */ - if (!logerror) - xfs_log_force(mp, XFS_LOG_SYNC); + if (!log_error) + xfs_log_force(log->l_mp, XFS_LOG_SYNC); /* - * mark the filesystem and the as in a shutdown state and wake - * everybody up to tell them the bad news. + * Atomically set the shutdown state. If the shutdown state is already + * set, there someone else is performing the shutdown and so we are done + * here. This should never happen because we should only ever get called + * once by the first shutdown caller. + * + * Much of the log state machine transitions assume that shutdown state + * cannot change once they hold the log->l_icloglock. Hence we need to + * hold that lock here, even though we use the atomic test_and_set_bit() + * operation to set the shutdown state. */ spin_lock(&log->l_icloglock); - mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; - if (mp->m_sb_bp) - mp->m_sb_bp->b_flags |= XBF_DONE; - - /* - * Mark the log and the iclogs with IO error flags to prevent any - * further log IO from being issued or completed. - */ - log->l_flags |= XLOG_IO_ERROR; - retval = xlog_state_ioerror(log); + if (test_and_set_bit(XLOG_IO_ERROR, &log->l_opstate)) { + spin_unlock(&log->l_icloglock); + ASSERT(0); + return false; + } spin_unlock(&log->l_icloglock); /* @@ -3883,12 +3901,12 @@ xfs_log_force_umount( * avoid races. */ spin_lock(&log->l_cilp->xc_push_lock); + wake_up_all(&log->l_cilp->xc_start_wait); wake_up_all(&log->l_cilp->xc_commit_wait); spin_unlock(&log->l_cilp->xc_push_lock); - xlog_state_do_callback(log); + xlog_state_shutdown_callbacks(log); - /* return non-zero if log IOERROR transition had already happened */ - return retval; + return log_error; } STATIC int @@ -3926,7 +3944,7 @@ xfs_log_check_lsn( * resets the in-core LSN. We can't validate in this mode, but * modifications are not allowed anyways so just return true. */ - if (mp->m_flags & XFS_MOUNT_NORECOVERY) + if (xfs_has_norecovery(mp)) return true; /* @@ -3952,11 +3970,22 @@ xfs_log_check_lsn( return valid; } -bool -xfs_log_in_recovery( - struct xfs_mount *mp) +/* + * Notify the log that we're about to start using a feature that is protected + * by a log incompat feature flag. This will prevent log covering from + * clearing those flags. + */ +void +xlog_use_incompat_feat( + struct xlog *log) { - struct xlog *log = mp->m_log; + down_read(&log->l_incompat_users); +} - return log->l_flags & XLOG_ACTIVE_RECOVERY; +/* Notify the log that we've finished using log incompat features. */ +void +xlog_drop_incompat_feat( + struct xlog *log) +{ + up_read(&log->l_incompat_users); } diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 813b972e9788..dc1b77b92fc1 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -104,6 +104,7 @@ struct xlog_ticket; struct xfs_log_item; struct xfs_item_ops; struct xfs_trans; +struct xlog; int xfs_log_force(struct xfs_mount *mp, uint flags); int xfs_log_force_seq(struct xfs_mount *mp, xfs_csn_t seq, uint flags, @@ -125,7 +126,6 @@ int xfs_log_reserve(struct xfs_mount *mp, bool permanent); int xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic); void xfs_log_unmount(struct xfs_mount *mp); -int xfs_log_force_umount(struct xfs_mount *mp, int logerror); bool xfs_log_writable(struct xfs_mount *mp); struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); @@ -138,8 +138,11 @@ void xfs_log_work_queue(struct xfs_mount *mp); int xfs_log_quiesce(struct xfs_mount *mp); void xfs_log_clean(struct xfs_mount *mp); bool xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t); -bool xfs_log_in_recovery(struct xfs_mount *); xfs_lsn_t xlog_grant_push_threshold(struct xlog *log, int need_bytes); +bool xlog_force_shutdown(struct xlog *log, int shutdown_flags); + +void xlog_use_incompat_feat(struct xlog *log); +void xlog_drop_incompat_feat(struct xlog *log); #endif /* __XFS_LOG_H__ */ diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 4c44bc3786c0..6c93c8ada6f3 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -48,6 +48,34 @@ xlog_cil_ticket_alloc( } /* + * Unavoidable forward declaration - xlog_cil_push_work() calls + * xlog_cil_ctx_alloc() itself. + */ +static void xlog_cil_push_work(struct work_struct *work); + +static struct xfs_cil_ctx * +xlog_cil_ctx_alloc(void) +{ + struct xfs_cil_ctx *ctx; + + ctx = kmem_zalloc(sizeof(*ctx), KM_NOFS); + INIT_LIST_HEAD(&ctx->committing); + INIT_LIST_HEAD(&ctx->busy_extents); + INIT_WORK(&ctx->push_work, xlog_cil_push_work); + return ctx; +} + +static void +xlog_cil_ctx_switch( + struct xfs_cil *cil, + struct xfs_cil_ctx *ctx) +{ + ctx->sequence = ++cil->xc_current_sequence; + ctx->cil = cil; + cil->xc_ctx = ctx; +} + +/* * After the first stage of log recovery is done, we know where the head and * tail of the log are. We need this log initialisation done before we can * initialise the first CIL checkpoint context. @@ -185,7 +213,15 @@ xlog_cil_alloc_shadow_bufs( */ kmem_free(lip->li_lv_shadow); - lv = kmem_alloc_large(buf_size, KM_NOFS); + /* + * We are in transaction context, which means this + * allocation will pick up GFP_NOFS from the + * memalloc_nofs_save/restore context the transaction + * holds. This means we can use GFP_KERNEL here so the + * generic kvmalloc() code will run vmalloc on + * contiguous page allocation failure as we require. + */ + lv = kvmalloc(buf_size, GFP_KERNEL); memset(lv, 0, xlog_cil_iovec_space(niovecs)); lv->lv_item = lip; @@ -535,7 +571,7 @@ xlog_discard_busy_extents( struct blk_plug plug; int error = 0; - ASSERT(mp->m_flags & XFS_MOUNT_DISCARD); + ASSERT(xfs_has_discard(mp)); blk_start_plug(&plug); list_for_each_entry(busyp, list, list) { @@ -576,7 +612,7 @@ xlog_cil_committed( struct xfs_cil_ctx *ctx) { struct xfs_mount *mp = ctx->cil->xc_log->l_mp; - bool abort = XLOG_FORCED_SHUTDOWN(ctx->cil->xc_log); + bool abort = xlog_is_shutdown(ctx->cil->xc_log); /* * If the I/O failed, we're aborting the commit and already shutdown. @@ -587,6 +623,7 @@ xlog_cil_committed( */ if (abort) { spin_lock(&ctx->cil->xc_push_lock); + wake_up_all(&ctx->cil->xc_start_wait); wake_up_all(&ctx->cil->xc_commit_wait); spin_unlock(&ctx->cil->xc_push_lock); } @@ -596,7 +633,7 @@ xlog_cil_committed( xfs_extent_busy_sort(&ctx->busy_extents); xfs_extent_busy_clear(mp, &ctx->busy_extents, - (mp->m_flags & XFS_MOUNT_DISCARD) && !abort); + xfs_has_discard(mp) && !abort); spin_lock(&ctx->cil->xc_push_lock); list_del(&ctx->committing); @@ -624,6 +661,180 @@ xlog_cil_process_committed( } /* +* Record the LSN of the iclog we were just granted space to start writing into. +* If the context doesn't have a start_lsn recorded, then this iclog will +* contain the start record for the checkpoint. Otherwise this write contains +* the commit record for the checkpoint. +*/ +void +xlog_cil_set_ctx_write_state( + struct xfs_cil_ctx *ctx, + struct xlog_in_core *iclog) +{ + struct xfs_cil *cil = ctx->cil; + xfs_lsn_t lsn = be64_to_cpu(iclog->ic_header.h_lsn); + + ASSERT(!ctx->commit_lsn); + if (!ctx->start_lsn) { + spin_lock(&cil->xc_push_lock); + /* + * The LSN we need to pass to the log items on transaction + * commit is the LSN reported by the first log vector write, not + * the commit lsn. If we use the commit record lsn then we can + * move the tail beyond the grant write head. + */ + ctx->start_lsn = lsn; + wake_up_all(&cil->xc_start_wait); + spin_unlock(&cil->xc_push_lock); + return; + } + + /* + * Take a reference to the iclog for the context so that we still hold + * it when xlog_write is done and has released it. This means the + * context controls when the iclog is released for IO. + */ + atomic_inc(&iclog->ic_refcnt); + + /* + * xlog_state_get_iclog_space() guarantees there is enough space in the + * iclog for an entire commit record, so we can attach the context + * callbacks now. This needs to be done before we make the commit_lsn + * visible to waiters so that checkpoints with commit records in the + * same iclog order their IO completion callbacks in the same order that + * the commit records appear in the iclog. + */ + spin_lock(&cil->xc_log->l_icloglock); + list_add_tail(&ctx->iclog_entry, &iclog->ic_callbacks); + spin_unlock(&cil->xc_log->l_icloglock); + + /* + * Now we can record the commit LSN and wake anyone waiting for this + * sequence to have the ordered commit record assigned to a physical + * location in the log. + */ + spin_lock(&cil->xc_push_lock); + ctx->commit_iclog = iclog; + ctx->commit_lsn = lsn; + wake_up_all(&cil->xc_commit_wait); + spin_unlock(&cil->xc_push_lock); +} + + +/* + * Ensure that the order of log writes follows checkpoint sequence order. This + * relies on the context LSN being zero until the log write has guaranteed the + * LSN that the log write will start at via xlog_state_get_iclog_space(). + */ +enum _record_type { + _START_RECORD, + _COMMIT_RECORD, +}; + +static int +xlog_cil_order_write( + struct xfs_cil *cil, + xfs_csn_t sequence, + enum _record_type record) +{ + struct xfs_cil_ctx *ctx; + +restart: + spin_lock(&cil->xc_push_lock); + list_for_each_entry(ctx, &cil->xc_committing, committing) { + /* + * Avoid getting stuck in this loop because we were woken by the + * shutdown, but then went back to sleep once already in the + * shutdown state. + */ + if (xlog_is_shutdown(cil->xc_log)) { + spin_unlock(&cil->xc_push_lock); + return -EIO; + } + + /* + * Higher sequences will wait for this one so skip them. + * Don't wait for our own sequence, either. + */ + if (ctx->sequence >= sequence) + continue; + + /* Wait until the LSN for the record has been recorded. */ + switch (record) { + case _START_RECORD: + if (!ctx->start_lsn) { + xlog_wait(&cil->xc_start_wait, &cil->xc_push_lock); + goto restart; + } + break; + case _COMMIT_RECORD: + if (!ctx->commit_lsn) { + xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock); + goto restart; + } + break; + } + } + spin_unlock(&cil->xc_push_lock); + return 0; +} + +/* + * Write out the log vector change now attached to the CIL context. This will + * write a start record that needs to be strictly ordered in ascending CIL + * sequence order so that log recovery will always use in-order start LSNs when + * replaying checkpoints. + */ +static int +xlog_cil_write_chain( + struct xfs_cil_ctx *ctx, + struct xfs_log_vec *chain) +{ + struct xlog *log = ctx->cil->xc_log; + int error; + + error = xlog_cil_order_write(ctx->cil, ctx->sequence, _START_RECORD); + if (error) + return error; + return xlog_write(log, ctx, chain, ctx->ticket, XLOG_START_TRANS); +} + +/* + * Write out the commit record of a checkpoint transaction to close off a + * running log write. These commit records are strictly ordered in ascending CIL + * sequence order so that log recovery will always replay the checkpoints in the + * correct order. + */ +static int +xlog_cil_write_commit_record( + struct xfs_cil_ctx *ctx) +{ + struct xlog *log = ctx->cil->xc_log; + struct xfs_log_iovec reg = { + .i_addr = NULL, + .i_len = 0, + .i_type = XLOG_REG_TYPE_COMMIT, + }; + struct xfs_log_vec vec = { + .lv_niovecs = 1, + .lv_iovecp = ®, + }; + int error; + + if (xlog_is_shutdown(log)) + return -EIO; + + error = xlog_cil_order_write(ctx->cil, ctx->sequence, _COMMIT_RECORD); + if (error) + return error; + + error = xlog_write(log, ctx, &vec, ctx->ticket, XLOG_COMMIT_TRANS); + if (error) + xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); + return error; +} + +/* * Push the Committed Item List to the log. * * If the current sequence is the same as xc_push_seq we need to do a flush. If @@ -641,13 +852,12 @@ static void xlog_cil_push_work( struct work_struct *work) { - struct xfs_cil *cil = - container_of(work, struct xfs_cil, xc_push_work); + struct xfs_cil_ctx *ctx = + container_of(work, struct xfs_cil_ctx, push_work); + struct xfs_cil *cil = ctx->cil; struct xlog *log = cil->xc_log; struct xfs_log_vec *lv; - struct xfs_cil_ctx *ctx; struct xfs_cil_ctx *new_ctx; - struct xlog_in_core *commit_iclog; struct xlog_ticket *tic; int num_iovecs; int error = 0; @@ -655,20 +865,21 @@ xlog_cil_push_work( struct xfs_log_iovec lhdr; struct xfs_log_vec lvhdr = { NULL }; xfs_lsn_t preflush_tail_lsn; - xfs_lsn_t commit_lsn; xfs_csn_t push_seq; struct bio bio; DECLARE_COMPLETION_ONSTACK(bdev_flush); + bool push_commit_stable; - new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_NOFS); + new_ctx = xlog_cil_ctx_alloc(); new_ctx->ticket = xlog_cil_ticket_alloc(log); down_write(&cil->xc_ctx_lock); - ctx = cil->xc_ctx; spin_lock(&cil->xc_push_lock); push_seq = cil->xc_push_seq; ASSERT(push_seq <= ctx->sequence); + push_commit_stable = cil->xc_push_commit_stable; + cil->xc_push_commit_stable = false; /* * As we are about to switch to a new, empty CIL context, we no longer @@ -694,7 +905,7 @@ xlog_cil_push_work( /* check for a previously pushed sequence */ - if (push_seq < cil->xc_ctx->sequence) { + if (push_seq < ctx->sequence) { spin_unlock(&cil->xc_push_lock); goto out_skip; } @@ -767,19 +978,7 @@ xlog_cil_push_work( } /* - * initialise the new context and attach it to the CIL. Then attach - * the current context to the CIL committing list so it can be found - * during log forces to extract the commit lsn of the sequence that - * needs to be forced. - */ - INIT_LIST_HEAD(&new_ctx->committing); - INIT_LIST_HEAD(&new_ctx->busy_extents); - new_ctx->sequence = ctx->sequence + 1; - new_ctx->cil = cil; - cil->xc_ctx = new_ctx; - - /* - * The switch is now done, so we can drop the context lock and move out + * Switch the contexts so we can drop the context lock and move out * of a shared context. We can't just go straight to the commit record, * though - we need to synchronise with previous and future commits so * that the commit records are correctly ordered in the log to ensure @@ -804,7 +1003,7 @@ xlog_cil_push_work( * deferencing a freed context pointer. */ spin_lock(&cil->xc_push_lock); - cil->xc_current_sequence = new_ctx->sequence; + xlog_cil_ctx_switch(cil, new_ctx); spin_unlock(&cil->xc_push_lock); up_write(&cil->xc_ctx_lock); @@ -837,78 +1036,17 @@ xlog_cil_push_work( */ wait_for_completion(&bdev_flush); - error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, - XLOG_START_TRANS); + error = xlog_cil_write_chain(ctx, &lvhdr); if (error) goto out_abort_free_ticket; - /* - * now that we've written the checkpoint into the log, strictly - * order the commit records so replay will get them in the right order. - */ -restart: - spin_lock(&cil->xc_push_lock); - list_for_each_entry(new_ctx, &cil->xc_committing, committing) { - /* - * Avoid getting stuck in this loop because we were woken by the - * shutdown, but then went back to sleep once already in the - * shutdown state. - */ - if (XLOG_FORCED_SHUTDOWN(log)) { - spin_unlock(&cil->xc_push_lock); - goto out_abort_free_ticket; - } - - /* - * Higher sequences will wait for this one so skip them. - * Don't wait for our own sequence, either. - */ - if (new_ctx->sequence >= ctx->sequence) - continue; - if (!new_ctx->commit_lsn) { - /* - * It is still being pushed! Wait for the push to - * complete, then start again from the beginning. - */ - xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock); - goto restart; - } - } - spin_unlock(&cil->xc_push_lock); - - error = xlog_commit_record(log, tic, &commit_iclog, &commit_lsn); + error = xlog_cil_write_commit_record(ctx); if (error) goto out_abort_free_ticket; xfs_log_ticket_ungrant(log, tic); /* - * Once we attach the ctx to the iclog, a shutdown can process the - * iclog, run the callbacks and free the ctx. The only thing preventing - * this potential UAF situation here is that we are holding the - * icloglock. Hence we cannot access the ctx once we have attached the - * callbacks and dropped the icloglock. - */ - spin_lock(&log->l_icloglock); - if (commit_iclog->ic_state == XLOG_STATE_IOERROR) { - spin_unlock(&log->l_icloglock); - goto out_abort; - } - ASSERT_ALWAYS(commit_iclog->ic_state == XLOG_STATE_ACTIVE || - commit_iclog->ic_state == XLOG_STATE_WANT_SYNC); - list_add_tail(&ctx->iclog_entry, &commit_iclog->ic_callbacks); - - /* - * now the checkpoint commit is complete and we've attached the - * callbacks to the iclog we can assign the commit LSN to the context - * and wake up anyone who is waiting for the commit to complete. - */ - spin_lock(&cil->xc_push_lock); - ctx->commit_lsn = commit_lsn; - wake_up_all(&cil->xc_commit_wait); - spin_unlock(&cil->xc_push_lock); - - /* * If the checkpoint spans multiple iclogs, wait for all previous iclogs * to complete before we submit the commit_iclog. We can't use state * checks for this - ACTIVE can be either a past completed iclog or a @@ -919,21 +1057,19 @@ restart: * wakeup until this commit_iclog is written to disk. Hence we use the * iclog header lsn and compare it to the commit lsn to determine if we * need to wait on iclogs or not. - * - * NOTE: It is not safe to reference the ctx after this check as we drop - * the icloglock if we have to wait for completion of other iclogs. */ - if (ctx->start_lsn != commit_lsn) { + spin_lock(&log->l_icloglock); + if (ctx->start_lsn != ctx->commit_lsn) { xfs_lsn_t plsn; - plsn = be64_to_cpu(commit_iclog->ic_prev->ic_header.h_lsn); - if (plsn && XFS_LSN_CMP(plsn, commit_lsn) < 0) { + plsn = be64_to_cpu(ctx->commit_iclog->ic_prev->ic_header.h_lsn); + if (plsn && XFS_LSN_CMP(plsn, ctx->commit_lsn) < 0) { /* * Waiting on ic_force_wait orders the completion of * iclogs older than ic_prev. Hence we only need to wait * on the most recent older iclog here. */ - xlog_wait_on_iclog(commit_iclog->ic_prev); + xlog_wait_on_iclog(ctx->commit_iclog->ic_prev); spin_lock(&log->l_icloglock); } @@ -941,16 +1077,27 @@ restart: * We need to issue a pre-flush so that the ordering for this * checkpoint is correctly preserved down to stable storage. */ - commit_iclog->ic_flags |= XLOG_ICL_NEED_FLUSH; + ctx->commit_iclog->ic_flags |= XLOG_ICL_NEED_FLUSH; } /* * The commit iclog must be written to stable storage to guarantee * journal IO vs metadata writeback IO is correctly ordered on stable * storage. + * + * If the push caller needs the commit to be immediately stable and the + * commit_iclog is not yet marked as XLOG_STATE_WANT_SYNC to indicate it + * will be written when released, switch it's state to WANT_SYNC right + * now. */ - commit_iclog->ic_flags |= XLOG_ICL_NEED_FUA; - xlog_state_release_iclog(log, commit_iclog, preflush_tail_lsn); + ctx->commit_iclog->ic_flags |= XLOG_ICL_NEED_FUA; + if (push_commit_stable && + ctx->commit_iclog->ic_state == XLOG_STATE_ACTIVE) + xlog_state_switch_iclogs(log, ctx->commit_iclog, 0); + xlog_state_release_iclog(log, ctx->commit_iclog, preflush_tail_lsn); + + /* Not safe to reference ctx now! */ + spin_unlock(&log->l_icloglock); return; @@ -962,9 +1109,15 @@ out_skip: out_abort_free_ticket: xfs_log_ticket_ungrant(log, tic); -out_abort: - ASSERT(XLOG_FORCED_SHUTDOWN(log)); - xlog_cil_committed(ctx); + ASSERT(xlog_is_shutdown(log)); + if (!ctx->commit_iclog) { + xlog_cil_committed(ctx); + return; + } + spin_lock(&log->l_icloglock); + xlog_state_release_iclog(log, ctx->commit_iclog, 0); + /* Not safe to reference ctx now! */ + spin_unlock(&log->l_icloglock); } /* @@ -998,7 +1151,7 @@ xlog_cil_push_background( spin_lock(&cil->xc_push_lock); if (cil->xc_push_seq < cil->xc_current_sequence) { cil->xc_push_seq = cil->xc_current_sequence; - queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work); + queue_work(cil->xc_push_wq, &cil->xc_ctx->push_work); } /* @@ -1034,13 +1187,26 @@ xlog_cil_push_background( /* * xlog_cil_push_now() is used to trigger an immediate CIL push to the sequence * number that is passed. When it returns, the work will be queued for - * @push_seq, but it won't be completed. The caller is expected to do any - * waiting for push_seq to complete if it is required. + * @push_seq, but it won't be completed. + * + * If the caller is performing a synchronous force, we will flush the workqueue + * to get previously queued work moving to minimise the wait time they will + * undergo waiting for all outstanding pushes to complete. The caller is + * expected to do the required waiting for push_seq to complete. + * + * If the caller is performing an async push, we need to ensure that the + * checkpoint is fully flushed out of the iclogs when we finish the push. If we + * don't do this, then the commit record may remain sitting in memory in an + * ACTIVE iclog. This then requires another full log force to push to disk, + * which defeats the purpose of having an async, non-blocking CIL force + * mechanism. Hence in this case we need to pass a flag to the push work to + * indicate it needs to flush the commit record itself. */ static void xlog_cil_push_now( struct xlog *log, - xfs_lsn_t push_seq) + xfs_lsn_t push_seq, + bool async) { struct xfs_cil *cil = log->l_cilp; @@ -1050,7 +1216,8 @@ xlog_cil_push_now( ASSERT(push_seq && push_seq <= cil->xc_current_sequence); /* start on any pending background push to minimise wait time on it */ - flush_work(&cil->xc_push_work); + if (!async) + flush_workqueue(cil->xc_push_wq); /* * If the CIL is empty or we've already pushed the sequence then @@ -1063,7 +1230,8 @@ xlog_cil_push_now( } cil->xc_push_seq = push_seq; - queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work); + cil->xc_push_commit_stable = async; + queue_work(cil->xc_push_wq, &cil->xc_ctx->push_work); spin_unlock(&cil->xc_push_lock); } @@ -1116,7 +1284,7 @@ xlog_cil_commit( xlog_cil_insert_items(log, tp); - if (regrant && !XLOG_FORCED_SHUTDOWN(log)) + if (regrant && !xlog_is_shutdown(log)) xfs_log_ticket_regrant(log, tp->t_ticket); else xfs_log_ticket_ungrant(log, tp->t_ticket); @@ -1148,11 +1316,26 @@ xlog_cil_commit( } /* + * Flush the CIL to stable storage but don't wait for it to complete. This + * requires the CIL push to ensure the commit record for the push hits the disk, + * but otherwise is no different to a push done from a log force. + */ +void +xlog_cil_flush( + struct xlog *log) +{ + xfs_csn_t seq = log->l_cilp->xc_current_sequence; + + trace_xfs_log_force(log->l_mp, seq, _RET_IP_); + xlog_cil_push_now(log, seq, true); +} + +/* * Conditionally push the CIL based on the sequence passed in. * - * We only need to push if we haven't already pushed the sequence - * number given. Hence the only time we will trigger a push here is - * if the push sequence is the same as the current context. + * We only need to push if we haven't already pushed the sequence number given. + * Hence the only time we will trigger a push here is if the push sequence is + * the same as the current context. * * We return the current commit lsn to allow the callers to determine if a * iclog flush is necessary following this call. @@ -1168,13 +1351,17 @@ xlog_cil_force_seq( ASSERT(sequence <= cil->xc_current_sequence); + if (!sequence) + sequence = cil->xc_current_sequence; + trace_xfs_log_force(log->l_mp, sequence, _RET_IP_); + /* * check to see if we need to force out the current context. * xlog_cil_push() handles racing pushes for the same sequence, * so no need to deal with it here. */ restart: - xlog_cil_push_now(log, sequence); + xlog_cil_push_now(log, sequence, false); /* * See if we can find a previous sequence still committing. @@ -1189,7 +1376,7 @@ restart: * shutdown, but then went back to sleep once already in the * shutdown state. */ - if (XLOG_FORCED_SHUTDOWN(log)) + if (xlog_is_shutdown(log)) goto out_shutdown; if (ctx->sequence > sequence) continue; @@ -1198,6 +1385,7 @@ restart: * It is still being pushed! Wait for the push to * complete, then start again from the beginning. */ + XFS_STATS_INC(log->l_mp, xs_log_force_sleep); xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock); goto restart; } @@ -1282,32 +1470,35 @@ xlog_cil_init( cil = kmem_zalloc(sizeof(*cil), KM_MAYFAIL); if (!cil) return -ENOMEM; + /* + * Limit the CIL pipeline depth to 4 concurrent works to bound the + * concurrency the log spinlocks will be exposed to. + */ + cil->xc_push_wq = alloc_workqueue("xfs-cil/%s", + XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_UNBOUND), + 4, log->l_mp->m_super->s_id); + if (!cil->xc_push_wq) + goto out_destroy_cil; - ctx = kmem_zalloc(sizeof(*ctx), KM_MAYFAIL); - if (!ctx) { - kmem_free(cil); - return -ENOMEM; - } - - INIT_WORK(&cil->xc_push_work, xlog_cil_push_work); INIT_LIST_HEAD(&cil->xc_cil); INIT_LIST_HEAD(&cil->xc_committing); spin_lock_init(&cil->xc_cil_lock); spin_lock_init(&cil->xc_push_lock); init_waitqueue_head(&cil->xc_push_wait); init_rwsem(&cil->xc_ctx_lock); + init_waitqueue_head(&cil->xc_start_wait); init_waitqueue_head(&cil->xc_commit_wait); - - INIT_LIST_HEAD(&ctx->committing); - INIT_LIST_HEAD(&ctx->busy_extents); - ctx->sequence = 1; - ctx->cil = cil; - cil->xc_ctx = ctx; - cil->xc_current_sequence = ctx->sequence; - cil->xc_log = log; log->l_cilp = cil; + + ctx = xlog_cil_ctx_alloc(); + xlog_cil_ctx_switch(cil, ctx); + return 0; + +out_destroy_cil: + kmem_free(cil); + return -ENOMEM; } void @@ -1321,6 +1512,7 @@ xlog_cil_destroy( } ASSERT(list_empty(&log->l_cilp->xc_cil)); + destroy_workqueue(log->l_cilp->xc_push_wq); kmem_free(log->l_cilp); } diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index f3e79a45d60a..844fbeec3545 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -12,15 +12,6 @@ struct xlog_ticket; struct xfs_mount; /* - * Flags for log structure - */ -#define XLOG_ACTIVE_RECOVERY 0x2 /* in the middle of recovery */ -#define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ -#define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being - shutdown */ -#define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */ - -/* * get client id from packed copy. * * this hack is here because the xlog_pack code copies four bytes @@ -47,7 +38,6 @@ enum xlog_iclog_state { XLOG_STATE_DONE_SYNC, /* Done syncing to disk */ XLOG_STATE_CALLBACK, /* Callback functions now */ XLOG_STATE_DIRTY, /* Dirty IC log, not ready for ACTIVE status */ - XLOG_STATE_IOERROR, /* IO error happened in sync'ing log */ }; #define XLOG_STATE_STRINGS \ @@ -56,8 +46,7 @@ enum xlog_iclog_state { { XLOG_STATE_SYNCING, "XLOG_STATE_SYNCING" }, \ { XLOG_STATE_DONE_SYNC, "XLOG_STATE_DONE_SYNC" }, \ { XLOG_STATE_CALLBACK, "XLOG_STATE_CALLBACK" }, \ - { XLOG_STATE_DIRTY, "XLOG_STATE_DIRTY" }, \ - { XLOG_STATE_IOERROR, "XLOG_STATE_IOERROR" } + { XLOG_STATE_DIRTY, "XLOG_STATE_DIRTY" } /* * In core log flags @@ -251,6 +240,7 @@ struct xfs_cil_ctx { xfs_csn_t sequence; /* chkpt sequence # */ xfs_lsn_t start_lsn; /* first LSN of chkpt commit */ xfs_lsn_t commit_lsn; /* chkpt commit record lsn */ + struct xlog_in_core *commit_iclog; struct xlog_ticket *ticket; /* chkpt ticket */ int nvecs; /* number of regions */ int space_used; /* aggregate size of regions */ @@ -259,6 +249,7 @@ struct xfs_cil_ctx { struct list_head iclog_entry; struct list_head committing; /* ctx committing list */ struct work_struct discard_endio_work; + struct work_struct push_work; }; /* @@ -281,16 +272,18 @@ struct xfs_cil { struct xlog *xc_log; struct list_head xc_cil; spinlock_t xc_cil_lock; + struct workqueue_struct *xc_push_wq; struct rw_semaphore xc_ctx_lock ____cacheline_aligned_in_smp; struct xfs_cil_ctx *xc_ctx; spinlock_t xc_push_lock ____cacheline_aligned_in_smp; xfs_csn_t xc_push_seq; + bool xc_push_commit_stable; struct list_head xc_committing; wait_queue_head_t xc_commit_wait; + wait_queue_head_t xc_start_wait; xfs_csn_t xc_current_sequence; - struct work_struct xc_push_work; wait_queue_head_t xc_push_wait; /* background push throttle */ } ____cacheline_aligned_in_smp; @@ -407,7 +400,7 @@ struct xlog { struct xfs_buftarg *l_targ; /* buftarg of log */ struct workqueue_struct *l_ioend_workqueue; /* for I/O completions */ struct delayed_work l_work; /* background flush work */ - uint l_flags; + long l_opstate; /* operational state */ uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ struct list_head *l_buf_cancel_table; int l_iclog_hsize; /* size of iclog header */ @@ -456,13 +449,40 @@ struct xlog { xfs_lsn_t l_recovery_lsn; uint32_t l_iclog_roundoff;/* padding roundoff */ + + /* Users of log incompat features should take a read lock. */ + struct rw_semaphore l_incompat_users; }; #define XLOG_BUF_CANCEL_BUCKET(log, blkno) \ ((log)->l_buf_cancel_table + ((uint64_t)blkno % XLOG_BC_TABLE_SIZE)) -#define XLOG_FORCED_SHUTDOWN(log) \ - (unlikely((log)->l_flags & XLOG_IO_ERROR)) +/* + * Bits for operational state + */ +#define XLOG_ACTIVE_RECOVERY 0 /* in the middle of recovery */ +#define XLOG_RECOVERY_NEEDED 1 /* log was recovered */ +#define XLOG_IO_ERROR 2 /* log hit an I/O error, and being + shutdown */ +#define XLOG_TAIL_WARN 3 /* log tail verify warning issued */ + +static inline bool +xlog_recovery_needed(struct xlog *log) +{ + return test_bit(XLOG_RECOVERY_NEEDED, &log->l_opstate); +} + +static inline bool +xlog_in_recovery(struct xlog *log) +{ + return test_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate); +} + +static inline bool +xlog_is_shutdown(struct xlog *log) +{ + return test_bit(XLOG_IO_ERROR, &log->l_opstate); +} /* common routines */ extern int @@ -496,14 +516,14 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes) void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket); void xlog_print_trans(struct xfs_trans *); -int xlog_write(struct xlog *log, struct xfs_log_vec *log_vector, - struct xlog_ticket *tic, xfs_lsn_t *start_lsn, - struct xlog_in_core **commit_iclog, uint optype); -int xlog_commit_record(struct xlog *log, struct xlog_ticket *ticket, - struct xlog_in_core **iclog, xfs_lsn_t *lsn); +int xlog_write(struct xlog *log, struct xfs_cil_ctx *ctx, + struct xfs_log_vec *log_vector, struct xlog_ticket *tic, + uint optype); void xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket); void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket); +void xlog_state_switch_iclogs(struct xlog *log, struct xlog_in_core *iclog, + int eventual_size); int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog, xfs_lsn_t log_tail_lsn); @@ -571,10 +591,14 @@ void xlog_cil_destroy(struct xlog *log); bool xlog_cil_empty(struct xlog *log); void xlog_cil_commit(struct xlog *log, struct xfs_trans *tp, xfs_csn_t *commit_seq, bool regrant); +void xlog_cil_set_ctx_write_state(struct xfs_cil_ctx *ctx, + struct xlog_in_core *iclog); + /* * CIL force routines */ +void xlog_cil_flush(struct xlog *log); xfs_lsn_t xlog_cil_force_seq(struct xlog *log, xfs_csn_t sequence); static inline void diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 1721fce2ec94..10562ecbd9ea 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -26,6 +26,8 @@ #include "xfs_error.h" #include "xfs_buf_item.h" #include "xfs_ag.h" +#include "xfs_quota.h" + #define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1) @@ -79,8 +81,6 @@ xlog_alloc_buffer( struct xlog *log, int nbblks) { - int align_mask = xfs_buftarg_dma_alignment(log->l_targ); - /* * Pass log block 0 since we don't have an addr yet, buffer will be * verified on read. @@ -108,7 +108,7 @@ xlog_alloc_buffer( if (nbblks > 1 && log->l_sectBBsize > 1) nbblks += log->l_sectBBsize; nbblks = round_up(nbblks, log->l_sectBBsize); - return kmem_alloc_io(BBTOB(nbblks), align_mask, KM_MAYFAIL | KM_ZERO); + return kvzalloc(BBTOB(nbblks), GFP_KERNEL | __GFP_RETRY_MAYFAIL); } /* @@ -146,7 +146,7 @@ xlog_do_io( error = xfs_rw_bdev(log->l_targ->bt_bdev, log->l_logBBstart + blk_no, BBTOB(nbblks), data, op); - if (error && !XFS_FORCED_SHUTDOWN(log->l_mp)) { + if (error && !xlog_is_shutdown(log)) { xfs_alert(log->l_mp, "log recovery %s I/O error at daddr 0x%llx len %d error %d", op == REQ_OP_WRITE ? "write" : "read", @@ -375,7 +375,7 @@ out: static inline int xlog_logrec_hblks(struct xlog *log, struct xlog_rec_header *rh) { - if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { + if (xfs_has_logv2(log->l_mp)) { int h_size = be32_to_cpu(rh->h_size); if ((be32_to_cpu(rh->h_version) & XLOG_VERSION_2) && @@ -1347,7 +1347,7 @@ xlog_find_tail( * headers if we have a filesystem using non-persistent counters. */ if (clean) - log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN; + set_bit(XFS_OPSTATE_CLEAN, &log->l_mp->m_opstate); /* * Make sure that there are no blocks in front of the head @@ -1504,7 +1504,7 @@ xlog_add_record( recp->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); recp->h_cycle = cpu_to_be32(cycle); recp->h_version = cpu_to_be32( - xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1); + xfs_has_logv2(log->l_mp) ? 2 : 1); recp->h_lsn = cpu_to_be64(xlog_assign_lsn(cycle, block)); recp->h_tail_lsn = cpu_to_be64(xlog_assign_lsn(tail_cycle, tail_block)); recp->h_fmt = cpu_to_be32(XLOG_FMT); @@ -1756,6 +1756,30 @@ xlog_recover_release_intent( spin_unlock(&ailp->ail_lock); } +int +xlog_recover_iget( + struct xfs_mount *mp, + xfs_ino_t ino, + struct xfs_inode **ipp) +{ + int error; + + error = xfs_iget(mp, NULL, ino, 0, 0, ipp); + if (error) + return error; + + error = xfs_qm_dqattach(*ipp); + if (error) { + xfs_irele(*ipp); + return error; + } + + if (VFS_I(*ipp)->i_nlink == 0) + xfs_iflags_set(*ipp, XFS_IRECOVERY); + + return 0; +} + /****************************************************************************** * * Log recover routines @@ -2062,7 +2086,9 @@ xlog_recover_add_to_cont_trans( old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; old_len = item->ri_buf[item->ri_cnt-1].i_len; - ptr = krealloc(old_ptr, len + old_len, GFP_KERNEL | __GFP_NOFAIL); + ptr = kvrealloc(old_ptr, old_len, len + old_len, GFP_KERNEL); + if (!ptr) + return -ENOMEM; memcpy(&ptr[old_len], dp, len); item->ri_buf[item->ri_cnt-1].i_len += len; item->ri_buf[item->ri_cnt-1].i_addr = ptr; @@ -2786,6 +2812,13 @@ xlog_recover_process_iunlinks( } xfs_buf_rele(agibp); } + + /* + * Flush the pending unlinked inodes to ensure that the inactivations + * are fully completed on disk and the incore inodes can be reclaimed + * before we signal that recovery is complete. + */ + xfs_inodegc_flush(mp); } STATIC void @@ -2802,7 +2835,7 @@ xlog_unpack_data( dp += BBSIZE; } - if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { + if (xfs_has_logv2(log->l_mp)) { xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead; for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) { j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); @@ -2850,7 +2883,7 @@ xlog_recover_process( * the kernel from one that does not add CRCs by default. */ if (crc != old_crc) { - if (old_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) { + if (old_crc || xfs_has_crc(log->l_mp)) { xfs_alert(log->l_mp, "log record CRC mismatch: found 0x%x, expected 0x%x.", le32_to_cpu(old_crc), @@ -2862,7 +2895,7 @@ xlog_recover_process( * If the filesystem is CRC enabled, this mismatch becomes a * fatal log corruption failure. */ - if (xfs_sb_version_hascrc(&log->l_mp->m_sb)) { + if (xfs_has_crc(log->l_mp)) { XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); return -EFSCORRUPTED; } @@ -2948,7 +2981,7 @@ xlog_do_recovery_pass( * Read the header of the tail block and get the iclog buffer size from * h_size. Use this to tell how many sectors make up the log header. */ - if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { + if (xfs_has_logv2(log->l_mp)) { /* * When using variable length iclogs, read first sector of * iclog header and extract the header size from it. Get a @@ -3280,10 +3313,7 @@ xlog_do_recover( if (error) return error; - /* - * If IO errors happened during recovery, bail out. - */ - if (XFS_FORCED_SHUTDOWN(mp)) + if (xlog_is_shutdown(log)) return -EIO; /* @@ -3305,7 +3335,7 @@ xlog_do_recover( xfs_buf_hold(bp); error = _xfs_buf_read(bp, XBF_READ); if (error) { - if (!XFS_FORCED_SHUTDOWN(mp)) { + if (!xlog_is_shutdown(log)) { xfs_buf_ioerror_alert(bp, __this_address); ASSERT(0); } @@ -3318,6 +3348,7 @@ xlog_do_recover( xfs_buf_relse(bp); /* re-initialise in-core superblock and geometry structures */ + mp->m_features |= xfs_sb_version_to_features(sbp); xfs_reinit_percpu_counters(mp); error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi); if (error) { @@ -3329,7 +3360,7 @@ xlog_do_recover( xlog_recover_check_summary(log); /* Normal transactions can now occur */ - log->l_flags &= ~XLOG_ACTIVE_RECOVERY; + clear_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate); return 0; } @@ -3355,7 +3386,7 @@ xlog_recover( * could not be verified. Check the superblock LSN against the current * LSN now that it's known. */ - if (xfs_sb_version_hascrc(&log->l_mp->m_sb) && + if (xfs_has_crc(log->l_mp) && !xfs_log_check_lsn(log->l_mp, log->l_mp->m_sb.sb_lsn)) return -EINVAL; @@ -3382,7 +3413,7 @@ xlog_recover( * (e.g. unsupported transactions, then simply reject the * attempt at recovery before touching anything. */ - if (XFS_SB_VERSION_NUM(&log->l_mp->m_sb) == XFS_SB_VERSION_5 && + if (xfs_sb_is_v5(&log->l_mp->m_sb) && xfs_sb_has_incompat_log_feature(&log->l_mp->m_sb, XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)) { xfs_warn(log->l_mp, @@ -3413,68 +3444,64 @@ xlog_recover( : "internal"); error = xlog_do_recover(log, head_blk, tail_blk); - log->l_flags |= XLOG_RECOVERY_NEEDED; + set_bit(XLOG_RECOVERY_NEEDED, &log->l_opstate); } return error; } /* - * In the first part of recovery we replay inodes and buffers and build - * up the list of extent free items which need to be processed. Here - * we process the extent free items and clean up the on disk unlinked - * inode lists. This is separated from the first part of recovery so - * that the root and real-time bitmap inodes can be read in from disk in - * between the two stages. This is necessary so that we can free space - * in the real-time portion of the file system. + * In the first part of recovery we replay inodes and buffers and build up the + * list of intents which need to be processed. Here we process the intents and + * clean up the on disk unlinked inode lists. This is separated from the first + * part of recovery so that the root and real-time bitmap inodes can be read in + * from disk in between the two stages. This is necessary so that we can free + * space in the real-time portion of the file system. */ int xlog_recover_finish( struct xlog *log) { - /* - * Now we're ready to do the transactions needed for the - * rest of recovery. Start with completing all the extent - * free intent records and then process the unlinked inode - * lists. At this point, we essentially run in normal mode - * except that we're still performing recovery actions - * rather than accepting new requests. - */ - if (log->l_flags & XLOG_RECOVERY_NEEDED) { - int error; - error = xlog_recover_process_intents(log); - if (error) { - /* - * Cancel all the unprocessed intent items now so that - * we don't leave them pinned in the AIL. This can - * cause the AIL to livelock on the pinned item if - * anyone tries to push the AIL (inode reclaim does - * this) before we get around to xfs_log_mount_cancel. - */ - xlog_recover_cancel_intents(log); - xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); - xfs_alert(log->l_mp, "Failed to recover intents"); - return error; - } + int error; + error = xlog_recover_process_intents(log); + if (error) { /* - * Sync the log to get all the intents out of the AIL. - * This isn't absolutely necessary, but it helps in - * case the unlink transactions would have problems - * pushing the intents out of the way. + * Cancel all the unprocessed intent items now so that we don't + * leave them pinned in the AIL. This can cause the AIL to + * livelock on the pinned item if anyone tries to push the AIL + * (inode reclaim does this) before we get around to + * xfs_log_mount_cancel. */ - xfs_log_force(log->l_mp, XFS_LOG_SYNC); - - xlog_recover_process_iunlinks(log); + xlog_recover_cancel_intents(log); + xfs_alert(log->l_mp, "Failed to recover intents"); + xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); + return error; + } - xlog_recover_check_summary(log); + /* + * Sync the log to get all the intents out of the AIL. This isn't + * absolutely necessary, but it helps in case the unlink transactions + * would have problems pushing the intents out of the way. + */ + xfs_log_force(log->l_mp, XFS_LOG_SYNC); - xfs_notice(log->l_mp, "Ending recovery (logdev: %s)", - log->l_mp->m_logname ? log->l_mp->m_logname - : "internal"); - log->l_flags &= ~XLOG_RECOVERY_NEEDED; - } else { - xfs_info(log->l_mp, "Ending clean mount"); + /* + * Now that we've recovered the log and all the intents, we can clear + * the log incompat feature bits in the superblock because there's no + * longer anything to protect. We rely on the AIL push to write out the + * updated superblock after everything else. + */ + if (xfs_clear_incompat_log_features(log->l_mp)) { + error = xfs_sync_sb(log->l_mp, false); + if (error < 0) { + xfs_alert(log->l_mp, + "Failed to clear log incompat features on recovery"); + return error; + } } + + xlog_recover_process_iunlinks(log); + xlog_recover_check_summary(log); return 0; } @@ -3482,7 +3509,7 @@ void xlog_recover_cancel( struct xlog *log) { - if (log->l_flags & XLOG_RECOVERY_NEEDED) + if (xlog_recovery_needed(log)) xlog_recover_cancel_intents(log); } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index d0755494597f..06dac09eddbd 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -62,7 +62,7 @@ xfs_uuid_mount( /* Publish UUID in struct super_block */ uuid_copy(&mp->m_super->s_uuid, uuid); - if (mp->m_flags & XFS_MOUNT_NOUUID) + if (xfs_has_nouuid(mp)) return 0; if (uuid_is_null(uuid)) { @@ -104,7 +104,7 @@ xfs_uuid_unmount( uuid_t *uuid = &mp->m_sb.sb_uuid; int i; - if (mp->m_flags & XFS_MOUNT_NOUUID) + if (xfs_has_nouuid(mp)) return; mutex_lock(&xfs_uuid_table_mutex); @@ -225,6 +225,7 @@ reread: goto reread; } + mp->m_features |= xfs_sb_version_to_features(sbp); xfs_reinit_percpu_counters(mp); /* no need to be quiet anymore, so reset the buf ops */ @@ -318,7 +319,7 @@ xfs_validate_new_dalign( } } - if (!xfs_sb_version_hasdalign(&mp->m_sb)) { + if (!xfs_has_dalign(mp)) { xfs_warn(mp, "cannot change alignment: superblock does not support data alignment"); return -EINVAL; @@ -349,8 +350,7 @@ xfs_update_alignment( sbp->sb_unit = mp->m_dalign; sbp->sb_width = mp->m_swidth; mp->m_update_sb = true; - } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && - xfs_sb_version_hasdalign(&mp->m_sb)) { + } else if (!xfs_has_noalign(mp) && xfs_has_dalign(mp)) { mp->m_dalign = sbp->sb_unit; mp->m_swidth = sbp->sb_width; } @@ -365,13 +365,16 @@ void xfs_set_low_space_thresholds( struct xfs_mount *mp) { - int i; + uint64_t dblocks = mp->m_sb.sb_dblocks; + uint64_t rtexts = mp->m_sb.sb_rextents; + int i; - for (i = 0; i < XFS_LOWSP_MAX; i++) { - uint64_t space = mp->m_sb.sb_dblocks; + do_div(dblocks, 100); + do_div(rtexts, 100); - do_div(space, 100); - mp->m_low_space[i] = space * (i + 1); + for (i = 0; i < XFS_LOWSP_MAX; i++) { + mp->m_low_space[i] = dblocks * (i + 1); + mp->m_low_rtexts[i] = rtexts * (i + 1); } } @@ -485,7 +488,7 @@ xfs_check_summary_counts( * counters. If any of them are obviously incorrect, we can recompute * them from the AGF headers in the next step. */ - if (XFS_LAST_UNMOUNT_WAS_CLEAN(mp) && + if (xfs_is_clean(mp) && (mp->m_sb.sb_fdblocks > mp->m_sb.sb_dblocks || !xfs_verify_icount(mp, mp->m_sb.sb_icount) || mp->m_sb.sb_ifree > mp->m_sb.sb_icount)) @@ -502,8 +505,7 @@ xfs_check_summary_counts( * superblock to be correct and we don't need to do anything here. * Otherwise, recalculate the summary counters. */ - if ((!xfs_sb_version_haslazysbcount(&mp->m_sb) || - XFS_LAST_UNMOUNT_WAS_CLEAN(mp)) && + if ((!xfs_has_lazysbcount(mp) || xfs_is_clean(mp)) && !xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS)) return 0; @@ -514,7 +516,8 @@ xfs_check_summary_counts( * Flush and reclaim dirty inodes in preparation for unmount. Inodes and * internal inode structures can be sitting in the CIL and AIL at this point, * so we need to unpin them, write them back and/or reclaim them before unmount - * can proceed. + * can proceed. In other words, callers are required to have inactivated all + * inodes. * * An inode cluster that has been freed can have its buffer still pinned in * memory because the transaction is still sitting in a iclog. The stale inodes @@ -543,9 +546,10 @@ xfs_unmount_flush_inodes( xfs_extent_busy_wait_all(mp); flush_workqueue(xfs_discard_wq); - mp->m_flags |= XFS_MOUNT_UNMOUNTING; + set_bit(XFS_OPSTATE_UNMOUNTING, &mp->m_opstate); xfs_ail_push_all_sync(mp->m_ail); + xfs_inodegc_stop(mp); cancel_delayed_work_sync(&mp->m_reclaim_work); xfs_reclaim_inodes(mp); xfs_health_unmount(mp); @@ -607,29 +611,13 @@ xfs_mountfs( xfs_warn(mp, "correcting sb_features alignment problem"); sbp->sb_features2 |= sbp->sb_bad_features2; mp->m_update_sb = true; - - /* - * Re-check for ATTR2 in case it was found in bad_features2 - * slot. - */ - if (xfs_sb_version_hasattr2(&mp->m_sb) && - !(mp->m_flags & XFS_MOUNT_NOATTR2)) - mp->m_flags |= XFS_MOUNT_ATTR2; } - if (xfs_sb_version_hasattr2(&mp->m_sb) && - (mp->m_flags & XFS_MOUNT_NOATTR2)) { - xfs_sb_version_removeattr2(&mp->m_sb); - mp->m_update_sb = true; - - /* update sb_versionnum for the clearing of the morebits */ - if (!sbp->sb_features2) - mp->m_update_sb = true; - } /* always use v2 inodes by default now */ if (!(mp->m_sb.sb_versionnum & XFS_SB_VERSION_NLINKBIT)) { mp->m_sb.sb_versionnum |= XFS_SB_VERSION_NLINKBIT; + mp->m_features |= XFS_FEAT_NLINK; mp->m_update_sb = true; } @@ -702,7 +690,7 @@ xfs_mountfs( * cluster size. Full inode chunk alignment must match the chunk size, * but that is checked on sb read verification... */ - if (xfs_sb_version_hassparseinodes(&mp->m_sb) && + if (xfs_has_sparseinodes(mp) && mp->m_sb.sb_spino_align != XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw)) { xfs_warn(mp, @@ -764,6 +752,10 @@ xfs_mountfs( goto out_free_perag; } + error = xfs_inodegc_register_shrinker(mp); + if (error) + goto out_fail_wait; + /* * Log's mount-time initialization. The first part of recovery can place * some items on the AIL, to be handled when recovery is finished or @@ -774,7 +766,7 @@ xfs_mountfs( XFS_FSB_TO_BB(mp, sbp->sb_logblocks)); if (error) { xfs_warn(mp, "log mount failed"); - goto out_fail_wait; + goto out_inodegc_shrinker; } /* Make sure the summary counts are ok. */ @@ -782,6 +774,23 @@ xfs_mountfs( if (error) goto out_log_dealloc; + /* Enable background inode inactivation workers. */ + xfs_inodegc_start(mp); + xfs_blockgc_start(mp); + + /* + * Now that we've recovered any pending superblock feature bit + * additions, we can finish setting up the attr2 behaviour for the + * mount. The noattr2 option overrides the superblock flag, so only + * check the superblock feature flag if the mount option is not set. + */ + if (xfs_has_noattr2(mp)) { + mp->m_features &= ~XFS_FEAT_ATTR2; + } else if (!xfs_has_attr2(mp) && + (mp->m_sb.sb_features2 & XFS_SB_VERSION2_ATTR2BIT)) { + mp->m_features |= XFS_FEAT_ATTR2; + } + /* * Get and sanity-check the root inode. * Save the pointer to it in the mount structure. @@ -825,7 +834,7 @@ xfs_mountfs( * the next remount into writeable mode. Otherwise we would never * perform the update e.g. for the root filesystem. */ - if (mp->m_update_sb && !(mp->m_flags & XFS_MOUNT_RDONLY)) { + if (mp->m_update_sb && !xfs_is_readonly(mp)) { error = xfs_sync_sb(mp, false); if (error) { xfs_warn(mp, "failed to write sb changes"); @@ -836,13 +845,11 @@ xfs_mountfs( /* * Initialise the XFS quota management subsystem for this mount */ - if (XFS_IS_QUOTA_RUNNING(mp)) { + if (XFS_IS_QUOTA_ON(mp)) { error = xfs_qm_newmount(mp, "amount, "aflags); if (error) goto out_rtunmount; } else { - ASSERT(!XFS_IS_QUOTA_ON(mp)); - /* * If a file system had quotas running earlier, but decided to * mount without -o uquota/pquota/gquota options, revoke the @@ -884,10 +891,8 @@ xfs_mountfs( * We use the same quiesce mechanism as the rw->ro remount, as they are * semantically identical operations. */ - if ((mp->m_flags & (XFS_MOUNT_RDONLY|XFS_MOUNT_NORECOVERY)) == - XFS_MOUNT_RDONLY) { + if (xfs_is_readonly(mp) && !xfs_has_norecovery(mp)) xfs_log_clean(mp); - } /* * Complete the quota initialisation, post-log-replay component. @@ -910,7 +915,7 @@ xfs_mountfs( * This may drive us straight to ENOSPC on mount, but that implies * we were already there on the last unmount. Warn if this occurs. */ - if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { + if (!xfs_is_readonly(mp)) { resblks = xfs_default_resblks(mp); error = xfs_reserve_blocks(mp, &resblks, NULL); if (error) @@ -944,6 +949,15 @@ xfs_mountfs( xfs_irele(rip); /* Clean out dquots that might be in memory after quotacheck. */ xfs_qm_unmount(mp); + + /* + * Inactivate all inodes that might still be in memory after a log + * intent recovery failure so that reclaim can free them. Metadata + * inodes and the root directory shouldn't need inactivation, but the + * mount failed for some reason, so pull down all the state and flee. + */ + xfs_inodegc_flush(mp); + /* * Flush all inode reclamation work and flush the log. * We have to do this /after/ rtunmount and qm_unmount because those @@ -958,6 +972,8 @@ xfs_mountfs( xfs_unmount_flush_inodes(mp); out_log_dealloc: xfs_log_mount_cancel(mp); + out_inodegc_shrinker: + unregister_shrinker(&mp->m_inodegc_shrinker); out_fail_wait: if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) xfs_buftarg_drain(mp->m_logdev_targp); @@ -991,6 +1007,16 @@ xfs_unmountfs( uint64_t resblks; int error; + /* + * Perform all on-disk metadata updates required to inactivate inodes + * that the VFS evicted earlier in the unmount process. Freeing inodes + * and discarding CoW fork preallocations can cause shape changes to + * the free inode and refcount btrees, respectively, so we must finish + * this before we discard the metadata space reservations. Metadata + * inodes and the root directory do not require inactivation. + */ + xfs_inodegc_flush(mp); + xfs_blockgc_stop(mp); xfs_fs_unreserve_ag_blocks(mp); xfs_qm_unmount_quotas(mp); @@ -1028,6 +1054,7 @@ xfs_unmountfs( #if defined(DEBUG) xfs_errortag_clearall(mp); #endif + unregister_shrinker(&mp->m_inodegc_shrinker); xfs_free_perag(mp); xfs_errortag_del(mp); @@ -1049,20 +1076,12 @@ xfs_fs_writable( { ASSERT(level > SB_UNFROZEN); if ((mp->m_super->s_writers.frozen >= level) || - XFS_FORCED_SHUTDOWN(mp) || (mp->m_flags & XFS_MOUNT_RDONLY)) + xfs_is_shutdown(mp) || xfs_is_readonly(mp)) return false; return true; } -/* - * Deltas for the block count can vary from 1 to very large, but lock contention - * only occurs on frequent small block count updates such as in the delayed - * allocation path for buffered writes (page a time updates). Hence we set - * a large batch count (1024) to minimise global counter updates except when - * we get near to ENOSPC and we have to be very accurate with our updates. - */ -#define XFS_FDBLOCKS_BATCH 1024 int xfs_mod_fdblocks( struct xfs_mount *mp, @@ -1210,13 +1229,123 @@ void xfs_force_summary_recalc( struct xfs_mount *mp) { - if (!xfs_sb_version_haslazysbcount(&mp->m_sb)) + if (!xfs_has_lazysbcount(mp)) return; xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS); } /* + * Enable a log incompat feature flag in the primary superblock. The caller + * cannot have any other transactions in progress. + */ +int +xfs_add_incompat_log_feature( + struct xfs_mount *mp, + uint32_t feature) +{ + struct xfs_dsb *dsb; + int error; + + ASSERT(hweight32(feature) == 1); + ASSERT(!(feature & XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)); + + /* + * Force the log to disk and kick the background AIL thread to reduce + * the chances that the bwrite will stall waiting for the AIL to unpin + * the primary superblock buffer. This isn't a data integrity + * operation, so we don't need a synchronous push. + */ + error = xfs_log_force(mp, XFS_LOG_SYNC); + if (error) + return error; + xfs_ail_push_all(mp->m_ail); + + /* + * Lock the primary superblock buffer to serialize all callers that + * are trying to set feature bits. + */ + xfs_buf_lock(mp->m_sb_bp); + xfs_buf_hold(mp->m_sb_bp); + + if (xfs_is_shutdown(mp)) { + error = -EIO; + goto rele; + } + + if (xfs_sb_has_incompat_log_feature(&mp->m_sb, feature)) + goto rele; + + /* + * Write the primary superblock to disk immediately, because we need + * the log_incompat bit to be set in the primary super now to protect + * the log items that we're going to commit later. + */ + dsb = mp->m_sb_bp->b_addr; + xfs_sb_to_disk(dsb, &mp->m_sb); + dsb->sb_features_log_incompat |= cpu_to_be32(feature); + error = xfs_bwrite(mp->m_sb_bp); + if (error) + goto shutdown; + + /* + * Add the feature bits to the incore superblock before we unlock the + * buffer. + */ + xfs_sb_add_incompat_log_features(&mp->m_sb, feature); + xfs_buf_relse(mp->m_sb_bp); + + /* Log the superblock to disk. */ + return xfs_sync_sb(mp, false); +shutdown: + xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); +rele: + xfs_buf_relse(mp->m_sb_bp); + return error; +} + +/* + * Clear all the log incompat flags from the superblock. + * + * The caller cannot be in a transaction, must ensure that the log does not + * contain any log items protected by any log incompat bit, and must ensure + * that there are no other threads that depend on the state of the log incompat + * feature flags in the primary super. + * + * Returns true if the superblock is dirty. + */ +bool +xfs_clear_incompat_log_features( + struct xfs_mount *mp) +{ + bool ret = false; + + if (!xfs_has_crc(mp) || + !xfs_sb_has_incompat_log_feature(&mp->m_sb, + XFS_SB_FEAT_INCOMPAT_LOG_ALL) || + xfs_is_shutdown(mp)) + return false; + + /* + * Update the incore superblock. We synchronize on the primary super + * buffer lock to be consistent with the add function, though at least + * in theory this shouldn't be necessary. + */ + xfs_buf_lock(mp->m_sb_bp); + xfs_buf_hold(mp->m_sb_bp); + + if (xfs_sb_has_incompat_log_feature(&mp->m_sb, + XFS_SB_FEAT_INCOMPAT_LOG_ALL)) { + xfs_info(mp, "Clearing log incompat feature flags."); + xfs_sb_remove_incompat_log_features(&mp->m_sb); + ret = true; + } + + xfs_buf_relse(mp->m_sb_bp); + return ret; +} + +/* * Update the in-core delayed block counter. * * We prefer to update the counter without having to take a spinlock for every diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index c78b63fe779a..e091f3b3fa15 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -57,6 +57,18 @@ struct xfs_error_cfg { }; /* + * Per-cpu deferred inode inactivation GC lists. + */ +struct xfs_inodegc { + struct llist_head list; + struct work_struct work; + + /* approximate count of inodes in the list */ + unsigned int items; + unsigned int shrinker_hits; +}; + +/* * The struct xfsmount layout is optimised to separate read-mostly variables * from variables that are frequently modified. We put the read-mostly variables * first, then place all the other variables at the end. @@ -82,6 +94,9 @@ typedef struct xfs_mount { xfs_buftarg_t *m_ddev_targp; /* saves taking the address */ xfs_buftarg_t *m_logdev_targp;/* ptr to log device */ xfs_buftarg_t *m_rtdev_targp; /* ptr to rt device */ + struct list_head m_mount_list; /* global mount list */ + void __percpu *m_inodegc; /* percpu inodegc structures */ + /* * Optional cache of rt summary level per bitmap block with the * invariant that m_rsum_cache[bbno] <= the minimum i for which @@ -92,10 +107,10 @@ typedef struct xfs_mount { struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ struct workqueue_struct *m_buf_workqueue; struct workqueue_struct *m_unwritten_workqueue; - struct workqueue_struct *m_cil_workqueue; struct workqueue_struct *m_reclaim_workqueue; - struct workqueue_struct *m_gc_workqueue; struct workqueue_struct *m_sync_workqueue; + struct workqueue_struct *m_blockgc_wq; + struct workqueue_struct *m_inodegc_wq; int m_bsize; /* fs logical block size */ uint8_t m_blkbit_log; /* blocklog + NBBY */ @@ -131,11 +146,13 @@ typedef struct xfs_mount { uint m_rsumsize; /* size of rt summary, bytes */ int m_fixedfsid[2]; /* unchanged for life of FS */ uint m_qflags; /* quota status flags */ - uint64_t m_flags; /* global mount flags */ - int64_t m_low_space[XFS_LOWSP_MAX]; + uint64_t m_features; /* active filesystem features */ + uint64_t m_low_space[XFS_LOWSP_MAX]; + uint64_t m_low_rtexts[XFS_LOWSP_MAX]; struct xfs_ino_geometry m_ino_geo; /* inode geometry */ struct xfs_trans_resv m_resv; /* precomputed res values */ /* low free space thresholds */ + unsigned long m_opstate; /* dynamic state flags */ bool m_always_cow; bool m_fail_unmount; bool m_finobt_nores; /* no per-AG finobt resv. */ @@ -193,6 +210,8 @@ typedef struct xfs_mount { xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */ spinlock_t m_agirotor_lock;/* .. and lock protecting it */ + /* Memory shrinker to throttle and reprioritize inodegc */ + struct shrinker m_inodegc_shrinker; /* * Workqueue item so that we can coalesce multiple inode flush attempts * into a single flush. @@ -225,38 +244,178 @@ typedef struct xfs_mount { #define M_IGEO(mp) (&(mp)->m_ino_geo) /* - * Flags for m_flags. + * Flags for m_features. + * + * These are all the active features in the filesystem, regardless of how + * they are configured. */ -#define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops - must be synchronous except - for space allocations */ -#define XFS_MOUNT_UNMOUNTING (1ULL << 1) /* filesystem is unmounting */ -#define XFS_MOUNT_WAS_CLEAN (1ULL << 3) -#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem - operations, typically for - disk errors in metadata */ -#define XFS_MOUNT_DISCARD (1ULL << 5) /* discard unused blocks */ -#define XFS_MOUNT_NOALIGN (1ULL << 7) /* turn off stripe alignment - allocations */ -#define XFS_MOUNT_ATTR2 (1ULL << 8) /* allow use of attr2 format */ -#define XFS_MOUNT_GRPID (1ULL << 9) /* group-ID assigned from directory */ -#define XFS_MOUNT_NORECOVERY (1ULL << 10) /* no recovery - dirty fs */ -#define XFS_MOUNT_ALLOCSIZE (1ULL << 12) /* specified allocation size */ -#define XFS_MOUNT_SMALL_INUMS (1ULL << 14) /* user wants 32bit inodes */ -#define XFS_MOUNT_32BITINODES (1ULL << 15) /* inode32 allocator active */ -#define XFS_MOUNT_NOUUID (1ULL << 16) /* ignore uuid during mount */ -#define XFS_MOUNT_IKEEP (1ULL << 18) /* keep empty inode clusters*/ -#define XFS_MOUNT_SWALLOC (1ULL << 19) /* turn on stripe width - * allocation */ -#define XFS_MOUNT_RDONLY (1ULL << 20) /* read-only fs */ -#define XFS_MOUNT_DIRSYNC (1ULL << 21) /* synchronous directory ops */ -#define XFS_MOUNT_LARGEIO (1ULL << 22) /* report large preferred +#define XFS_FEAT_ATTR (1ULL << 0) /* xattrs present in fs */ +#define XFS_FEAT_NLINK (1ULL << 1) /* 32 bit link counts */ +#define XFS_FEAT_QUOTA (1ULL << 2) /* quota active */ +#define XFS_FEAT_ALIGN (1ULL << 3) /* inode alignment */ +#define XFS_FEAT_DALIGN (1ULL << 4) /* data alignment */ +#define XFS_FEAT_LOGV2 (1ULL << 5) /* version 2 logs */ +#define XFS_FEAT_SECTOR (1ULL << 6) /* sector size > 512 bytes */ +#define XFS_FEAT_EXTFLG (1ULL << 7) /* unwritten extents */ +#define XFS_FEAT_ASCIICI (1ULL << 8) /* ASCII only case-insens. */ +#define XFS_FEAT_LAZYSBCOUNT (1ULL << 9) /* Superblk counters */ +#define XFS_FEAT_ATTR2 (1ULL << 10) /* dynamic attr fork */ +#define XFS_FEAT_PARENT (1ULL << 11) /* parent pointers */ +#define XFS_FEAT_PROJID32 (1ULL << 12) /* 32 bit project id */ +#define XFS_FEAT_CRC (1ULL << 13) /* metadata CRCs */ +#define XFS_FEAT_V3INODES (1ULL << 14) /* Version 3 inodes */ +#define XFS_FEAT_PQUOTINO (1ULL << 15) /* non-shared proj/grp quotas */ +#define XFS_FEAT_FTYPE (1ULL << 16) /* inode type in dir */ +#define XFS_FEAT_FINOBT (1ULL << 17) /* free inode btree */ +#define XFS_FEAT_RMAPBT (1ULL << 18) /* reverse map btree */ +#define XFS_FEAT_REFLINK (1ULL << 19) /* reflinked files */ +#define XFS_FEAT_SPINODES (1ULL << 20) /* sparse inode chunks */ +#define XFS_FEAT_META_UUID (1ULL << 21) /* metadata UUID */ +#define XFS_FEAT_REALTIME (1ULL << 22) /* realtime device present */ +#define XFS_FEAT_INOBTCNT (1ULL << 23) /* inobt block counts */ +#define XFS_FEAT_BIGTIME (1ULL << 24) /* large timestamps */ +#define XFS_FEAT_NEEDSREPAIR (1ULL << 25) /* needs xfs_repair */ + +/* Mount features */ +#define XFS_FEAT_NOATTR2 (1ULL << 48) /* disable attr2 creation */ +#define XFS_FEAT_NOALIGN (1ULL << 49) /* ignore alignment */ +#define XFS_FEAT_ALLOCSIZE (1ULL << 50) /* user specified allocation size */ +#define XFS_FEAT_LARGE_IOSIZE (1ULL << 51) /* report large preferred * I/O size in stat() */ -#define XFS_MOUNT_FILESTREAMS (1ULL << 24) /* enable the filestreams - allocator */ -#define XFS_MOUNT_NOATTR2 (1ULL << 25) /* disable use of attr2 format */ -#define XFS_MOUNT_DAX_ALWAYS (1ULL << 26) -#define XFS_MOUNT_DAX_NEVER (1ULL << 27) +#define XFS_FEAT_WSYNC (1ULL << 52) /* synchronous metadata ops */ +#define XFS_FEAT_DIRSYNC (1ULL << 53) /* synchronous directory ops */ +#define XFS_FEAT_DISCARD (1ULL << 54) /* discard unused blocks */ +#define XFS_FEAT_GRPID (1ULL << 55) /* group-ID assigned from directory */ +#define XFS_FEAT_SMALL_INUMS (1ULL << 56) /* user wants 32bit inodes */ +#define XFS_FEAT_IKEEP (1ULL << 57) /* keep empty inode clusters*/ +#define XFS_FEAT_SWALLOC (1ULL << 58) /* stripe width allocation */ +#define XFS_FEAT_FILESTREAMS (1ULL << 59) /* use filestreams allocator */ +#define XFS_FEAT_DAX_ALWAYS (1ULL << 60) /* DAX always enabled */ +#define XFS_FEAT_DAX_NEVER (1ULL << 61) /* DAX never enabled */ +#define XFS_FEAT_NORECOVERY (1ULL << 62) /* no recovery - dirty fs */ +#define XFS_FEAT_NOUUID (1ULL << 63) /* ignore uuid during mount */ + +#define __XFS_HAS_FEAT(name, NAME) \ +static inline bool xfs_has_ ## name (struct xfs_mount *mp) \ +{ \ + return mp->m_features & XFS_FEAT_ ## NAME; \ +} + +/* Some features can be added dynamically so they need a set wrapper, too. */ +#define __XFS_ADD_FEAT(name, NAME) \ + __XFS_HAS_FEAT(name, NAME); \ +static inline void xfs_add_ ## name (struct xfs_mount *mp) \ +{ \ + mp->m_features |= XFS_FEAT_ ## NAME; \ + xfs_sb_version_add ## name(&mp->m_sb); \ +} + +/* Superblock features */ +__XFS_ADD_FEAT(attr, ATTR) +__XFS_HAS_FEAT(nlink, NLINK) +__XFS_ADD_FEAT(quota, QUOTA) +__XFS_HAS_FEAT(align, ALIGN) +__XFS_HAS_FEAT(dalign, DALIGN) +__XFS_HAS_FEAT(logv2, LOGV2) +__XFS_HAS_FEAT(sector, SECTOR) +__XFS_HAS_FEAT(extflg, EXTFLG) +__XFS_HAS_FEAT(asciici, ASCIICI) +__XFS_HAS_FEAT(lazysbcount, LAZYSBCOUNT) +__XFS_ADD_FEAT(attr2, ATTR2) +__XFS_HAS_FEAT(parent, PARENT) +__XFS_ADD_FEAT(projid32, PROJID32) +__XFS_HAS_FEAT(crc, CRC) +__XFS_HAS_FEAT(v3inodes, V3INODES) +__XFS_HAS_FEAT(pquotino, PQUOTINO) +__XFS_HAS_FEAT(ftype, FTYPE) +__XFS_HAS_FEAT(finobt, FINOBT) +__XFS_HAS_FEAT(rmapbt, RMAPBT) +__XFS_HAS_FEAT(reflink, REFLINK) +__XFS_HAS_FEAT(sparseinodes, SPINODES) +__XFS_HAS_FEAT(metauuid, META_UUID) +__XFS_HAS_FEAT(realtime, REALTIME) +__XFS_HAS_FEAT(inobtcounts, INOBTCNT) +__XFS_HAS_FEAT(bigtime, BIGTIME) +__XFS_HAS_FEAT(needsrepair, NEEDSREPAIR) + +/* + * Mount features + * + * These do not change dynamically - features that can come and go, such as 32 + * bit inodes and read-only state, are kept as operational state rather than + * features. + */ +__XFS_HAS_FEAT(noattr2, NOATTR2) +__XFS_HAS_FEAT(noalign, NOALIGN) +__XFS_HAS_FEAT(allocsize, ALLOCSIZE) +__XFS_HAS_FEAT(large_iosize, LARGE_IOSIZE) +__XFS_HAS_FEAT(wsync, WSYNC) +__XFS_HAS_FEAT(dirsync, DIRSYNC) +__XFS_HAS_FEAT(discard, DISCARD) +__XFS_HAS_FEAT(grpid, GRPID) +__XFS_HAS_FEAT(small_inums, SMALL_INUMS) +__XFS_HAS_FEAT(ikeep, IKEEP) +__XFS_HAS_FEAT(swalloc, SWALLOC) +__XFS_HAS_FEAT(filestreams, FILESTREAMS) +__XFS_HAS_FEAT(dax_always, DAX_ALWAYS) +__XFS_HAS_FEAT(dax_never, DAX_NEVER) +__XFS_HAS_FEAT(norecovery, NORECOVERY) +__XFS_HAS_FEAT(nouuid, NOUUID) + +/* + * Operational mount state flags + * + * Use these with atomic bit ops only! + */ +#define XFS_OPSTATE_UNMOUNTING 0 /* filesystem is unmounting */ +#define XFS_OPSTATE_CLEAN 1 /* mount was clean */ +#define XFS_OPSTATE_SHUTDOWN 2 /* stop all fs operations */ +#define XFS_OPSTATE_INODE32 3 /* inode32 allocator active */ +#define XFS_OPSTATE_READONLY 4 /* read-only fs */ + +/* + * If set, inactivation worker threads will be scheduled to process queued + * inodegc work. If not, queued inodes remain in memory waiting to be + * processed. + */ +#define XFS_OPSTATE_INODEGC_ENABLED 5 +/* + * If set, background speculative prealloc gc worker threads will be scheduled + * to process queued blockgc work. If not, inodes retain their preallocations + * until explicitly deleted. + */ +#define XFS_OPSTATE_BLOCKGC_ENABLED 6 + +#define __XFS_IS_OPSTATE(name, NAME) \ +static inline bool xfs_is_ ## name (struct xfs_mount *mp) \ +{ \ + return test_bit(XFS_OPSTATE_ ## NAME, &mp->m_opstate); \ +} \ +static inline bool xfs_clear_ ## name (struct xfs_mount *mp) \ +{ \ + return test_and_clear_bit(XFS_OPSTATE_ ## NAME, &mp->m_opstate); \ +} \ +static inline bool xfs_set_ ## name (struct xfs_mount *mp) \ +{ \ + return test_and_set_bit(XFS_OPSTATE_ ## NAME, &mp->m_opstate); \ +} + +__XFS_IS_OPSTATE(unmounting, UNMOUNTING) +__XFS_IS_OPSTATE(clean, CLEAN) +__XFS_IS_OPSTATE(shutdown, SHUTDOWN) +__XFS_IS_OPSTATE(inode32, INODE32) +__XFS_IS_OPSTATE(readonly, READONLY) +__XFS_IS_OPSTATE(inodegc_enabled, INODEGC_ENABLED) +__XFS_IS_OPSTATE(blockgc_enabled, BLOCKGC_ENABLED) + +#define XFS_OPSTATE_STRINGS \ + { (1UL << XFS_OPSTATE_UNMOUNTING), "unmounting" }, \ + { (1UL << XFS_OPSTATE_CLEAN), "clean" }, \ + { (1UL << XFS_OPSTATE_SHUTDOWN), "shutdown" }, \ + { (1UL << XFS_OPSTATE_INODE32), "inode32" }, \ + { (1UL << XFS_OPSTATE_READONLY), "read_only" }, \ + { (1UL << XFS_OPSTATE_INODEGC_ENABLED), "inodegc" }, \ + { (1UL << XFS_OPSTATE_BLOCKGC_ENABLED), "blockgc" } /* * Max and min values for mount-option defined I/O @@ -265,9 +424,7 @@ typedef struct xfs_mount { #define XFS_MAX_IO_LOG 30 /* 1G */ #define XFS_MIN_IO_LOG PAGE_SHIFT -#define XFS_LAST_UNMOUNT_WAS_CLEAN(mp) \ - ((mp)->m_flags & XFS_MOUNT_WAS_CLEAN) -#define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN) +#define xfs_is_shutdown(mp) xfs_is_shutdown(mp) void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname, int lnnum); #define xfs_force_shutdown(m,f) \ @@ -278,6 +435,12 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname, #define SHUTDOWN_FORCE_UMOUNT 0x0004 /* shutdown from a forced unmount */ #define SHUTDOWN_CORRUPT_INCORE 0x0008 /* corrupt in-memory data structures */ +#define XFS_SHUTDOWN_STRINGS \ + { SHUTDOWN_META_IO_ERROR, "metadata_io" }, \ + { SHUTDOWN_LOG_IO_ERROR, "log_io" }, \ + { SHUTDOWN_FORCE_UMOUNT, "force_umount" }, \ + { SHUTDOWN_CORRUPT_INCORE, "corruption" } + /* * Flags for xfs_mountfs */ @@ -306,6 +469,15 @@ extern uint64_t xfs_default_resblks(xfs_mount_t *mp); extern int xfs_mountfs(xfs_mount_t *mp); extern void xfs_unmountfs(xfs_mount_t *); +/* + * Deltas for the block count can vary from 1 to very large, but lock contention + * only occurs on frequent small block count updates such as in the delayed + * allocation path for buffered writes (page a time updates). Hence we set + * a large batch count (1024) to minimise global counter updates except when + * we get near to ENOSPC and we have to be very accurate with our updates. + */ +#define XFS_FDBLOCKS_BATCH 1024 + extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta, bool reserved); extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta); @@ -325,6 +497,8 @@ int xfs_zero_extent(struct xfs_inode *ip, xfs_fsblock_t start_fsb, struct xfs_error_cfg * xfs_error_get_cfg(struct xfs_mount *mp, int error_class, int error); void xfs_force_summary_recalc(struct xfs_mount *mp); +int xfs_add_incompat_log_feature(struct xfs_mount *mp, uint32_t feature); +bool xfs_clear_incompat_log_features(struct xfs_mount *mp); void xfs_mod_delalloc(struct xfs_mount *mp, int64_t delta); #endif /* __XFS_MOUNT_H__ */ diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 956cca24e67f..5e1d29d8b2e7 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -92,7 +92,7 @@ xfs_fs_map_blocks( uint lock_flags; int error = 0; - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; /* diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index fe341f3fd419..5608066d6e53 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -157,7 +157,7 @@ xfs_qm_dqpurge( } ASSERT(atomic_read(&dqp->q_pincount) == 0); - ASSERT(XFS_FORCED_SHUTDOWN(mp) || + ASSERT(xfs_is_shutdown(mp) || !test_bit(XFS_LI_IN_AIL, &dqp->q_logitem.qli_item.li_flags)); xfs_dqfunlock(dqp); @@ -185,17 +185,13 @@ out_unlock: /* * Purge the dquot cache. */ -void +static void xfs_qm_dqpurge_all( - struct xfs_mount *mp, - uint flags) + struct xfs_mount *mp) { - if (flags & XFS_QMOPT_UQUOTA) - xfs_qm_dquot_walk(mp, XFS_DQTYPE_USER, xfs_qm_dqpurge, NULL); - if (flags & XFS_QMOPT_GQUOTA) - xfs_qm_dquot_walk(mp, XFS_DQTYPE_GROUP, xfs_qm_dqpurge, NULL); - if (flags & XFS_QMOPT_PQUOTA) - xfs_qm_dquot_walk(mp, XFS_DQTYPE_PROJ, xfs_qm_dqpurge, NULL); + xfs_qm_dquot_walk(mp, XFS_DQTYPE_USER, xfs_qm_dqpurge, NULL); + xfs_qm_dquot_walk(mp, XFS_DQTYPE_GROUP, xfs_qm_dqpurge, NULL); + xfs_qm_dquot_walk(mp, XFS_DQTYPE_PROJ, xfs_qm_dqpurge, NULL); } /* @@ -206,7 +202,7 @@ xfs_qm_unmount( struct xfs_mount *mp) { if (mp->m_quotainfo) { - xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL); + xfs_qm_dqpurge_all(mp); xfs_qm_destroy_quotainfo(mp); } } @@ -299,8 +295,6 @@ xfs_qm_need_dqattach( { struct xfs_mount *mp = ip->i_mount; - if (!XFS_IS_QUOTA_RUNNING(mp)) - return false; if (!XFS_IS_QUOTA_ON(mp)) return false; if (!XFS_NOT_DQATTACHED(mp, ip)) @@ -635,7 +629,7 @@ xfs_qm_init_quotainfo( struct xfs_quotainfo *qinf; int error; - ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + ASSERT(XFS_IS_QUOTA_ON(mp)); qinf = mp->m_quotainfo = kmem_zalloc(sizeof(struct xfs_quotainfo), 0); @@ -662,7 +656,7 @@ xfs_qm_init_quotainfo( /* Precalc some constants */ qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); qinf->qi_dqperchunk = xfs_calc_dquots_per_chunk(qinf->qi_dqchunklen); - if (xfs_sb_version_hasbigtime(&mp->m_sb)) { + if (xfs_has_bigtime(mp)) { qinf->qi_expiry_min = xfs_dq_bigtime_to_unix(XFS_DQ_BIGTIME_EXPIRY_MIN); qinf->qi_expiry_max = @@ -680,11 +674,11 @@ xfs_qm_init_quotainfo( xfs_qm_init_timelimits(mp, XFS_DQTYPE_GROUP); xfs_qm_init_timelimits(mp, XFS_DQTYPE_PROJ); - if (XFS_IS_UQUOTA_RUNNING(mp)) + if (XFS_IS_UQUOTA_ON(mp)) xfs_qm_set_defquota(mp, XFS_DQTYPE_USER, qinf); - if (XFS_IS_GQUOTA_RUNNING(mp)) + if (XFS_IS_GQUOTA_ON(mp)) xfs_qm_set_defquota(mp, XFS_DQTYPE_GROUP, qinf); - if (XFS_IS_PQUOTA_RUNNING(mp)) + if (XFS_IS_PQUOTA_ON(mp)) xfs_qm_set_defquota(mp, XFS_DQTYPE_PROJ, qinf); qinf->qi_shrinker.count_objects = xfs_qm_shrink_count; @@ -755,7 +749,7 @@ xfs_qm_qino_alloc( * with PQUOTA, just use sb_gquotino for sb_pquotino and * vice-versa. */ - if (!xfs_sb_version_has_pquotino(&mp->m_sb) && + if (!xfs_has_pquotino(mp) && (flags & (XFS_QMOPT_PQUOTA|XFS_QMOPT_GQUOTA))) { xfs_ino_t ino = NULLFSINO; @@ -808,9 +802,9 @@ xfs_qm_qino_alloc( */ spin_lock(&mp->m_sb_lock); if (flags & XFS_QMOPT_SBVERSION) { - ASSERT(!xfs_sb_version_hasquota(&mp->m_sb)); + ASSERT(!xfs_has_quota(mp)); - xfs_sb_version_addquota(&mp->m_sb); + xfs_add_quota(mp); mp->m_sb.sb_uquotino = NULLFSINO; mp->m_sb.sb_gquotino = NULLFSINO; mp->m_sb.sb_pquotino = NULLFSINO; @@ -829,7 +823,7 @@ xfs_qm_qino_alloc( error = xfs_trans_commit(tp); if (error) { - ASSERT(XFS_FORCED_SHUTDOWN(mp)); + ASSERT(xfs_is_shutdown(mp)); xfs_alert(mp, "%s failed (error %d)!", __func__, error); } if (need_alloc) @@ -896,11 +890,11 @@ xfs_qm_reset_dqcounts( ddq->d_bwarns = 0; ddq->d_iwarns = 0; ddq->d_rtbwarns = 0; - if (xfs_sb_version_hasbigtime(&mp->m_sb)) + if (xfs_has_bigtime(mp)) ddq->d_type |= XFS_DQTYPE_BIGTIME; } - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { xfs_update_cksum((char *)&dqb[j], sizeof(struct xfs_dqblk), XFS_DQUOT_CRC_OFF); @@ -1147,7 +1141,7 @@ xfs_qm_dqusage_adjust( xfs_filblks_t rtblks = 0; /* total rt blks */ int error; - ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + ASSERT(XFS_IS_QUOTA_ON(mp)); /* * rootino must have its resources accounted for, not so with the quota @@ -1288,7 +1282,7 @@ xfs_qm_quotacheck( flags = 0; ASSERT(uip || gip || pip); - ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + ASSERT(XFS_IS_QUOTA_ON(mp)); xfs_notice(mp, "Quotacheck needed: Please wait."); @@ -1359,7 +1353,7 @@ xfs_qm_quotacheck( * at this point (because we intentionally didn't in dqget_noattach). */ if (error) { - xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL); + xfs_qm_dqpurge_all(mp); goto error_return; } @@ -1418,7 +1412,7 @@ xfs_qm_mount_quotas( goto write_changes; } - ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + ASSERT(XFS_IS_QUOTA_ON(mp)); /* * Allocate the quotainfo structure inside the mount struct, and @@ -1473,7 +1467,7 @@ xfs_qm_mount_quotas( * the incore structures are convinced that quotas are * off, but the on disk superblock doesn't know that ! */ - ASSERT(!(XFS_IS_QUOTA_RUNNING(mp))); + ASSERT(!(XFS_IS_QUOTA_ON(mp))); xfs_alert(mp, "%s: Superblock update failed!", __func__); } @@ -1504,7 +1498,7 @@ xfs_qm_init_quotainos( /* * Get the uquota and gquota inodes */ - if (xfs_sb_version_hasquota(&mp->m_sb)) { + if (xfs_has_quota(mp)) { if (XFS_IS_UQUOTA_ON(mp) && mp->m_sb.sb_uquotino != NULLFSINO) { ASSERT(mp->m_sb.sb_uquotino > 0); @@ -1645,7 +1639,7 @@ xfs_qm_vop_dqalloc( int error; uint lockflags; - if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) + if (!XFS_IS_QUOTA_ON(mp)) return 0; lockflags = XFS_ILOCK_EXCL; @@ -1776,7 +1770,7 @@ xfs_qm_vop_chown( ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); + ASSERT(XFS_IS_QUOTA_ON(ip->i_mount)); /* old dquot */ prevdq = *IO_olddq; @@ -1829,7 +1823,7 @@ xfs_qm_vop_rename_dqattach( struct xfs_mount *mp = i_tab[0]->i_mount; int i; - if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) + if (!XFS_IS_QUOTA_ON(mp)) return 0; for (i = 0; (i < 4 && i_tab[i]); i++) { @@ -1860,7 +1854,7 @@ xfs_qm_vop_create_dqattach( { struct xfs_mount *mp = tp->t_mountp; - if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) + if (!XFS_IS_QUOTA_ON(mp)) return; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); @@ -1888,3 +1882,37 @@ xfs_qm_vop_create_dqattach( } } +/* Decide if this inode's dquot is near an enforcement boundary. */ +bool +xfs_inode_near_dquot_enforcement( + struct xfs_inode *ip, + xfs_dqtype_t type) +{ + struct xfs_dquot *dqp; + int64_t freesp; + + /* We only care for quotas that are enabled and enforced. */ + dqp = xfs_inode_dquot(ip, type); + if (!dqp || !xfs_dquot_is_enforced(dqp)) + return false; + + if (xfs_dquot_res_over_limits(&dqp->q_ino) || + xfs_dquot_res_over_limits(&dqp->q_rtb)) + return true; + + /* For space on the data device, check the various thresholds. */ + if (!dqp->q_prealloc_hi_wmark) + return false; + + if (dqp->q_blk.reserved < dqp->q_prealloc_lo_wmark) + return false; + + if (dqp->q_blk.reserved >= dqp->q_prealloc_hi_wmark) + return true; + + freesp = dqp->q_prealloc_hi_wmark - dqp->q_blk.reserved; + if (freesp < dqp->q_low_space[XFS_QLOWSP_5_PCNT]) + return true; + + return false; +} diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index ebbb484c49dc..442a0f97a9d4 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -140,9 +140,6 @@ struct xfs_dquot_acct { extern void xfs_qm_destroy_quotainfo(struct xfs_mount *); -/* dquot stuff */ -extern void xfs_qm_dqpurge_all(struct xfs_mount *, uint); - /* quota ops */ extern int xfs_qm_scall_trunc_qfiles(struct xfs_mount *, uint); extern int xfs_qm_scall_getquota(struct xfs_mount *mp, diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c index df00dfbf5c9d..b77673dd0558 100644 --- a/fs/xfs/xfs_qm_bhv.c +++ b/fs/xfs/xfs_qm_bhv.c @@ -75,7 +75,7 @@ xfs_qm_newmount( uint quotaondisk; uint uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0; - quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) && + quotaondisk = xfs_has_quota(mp) && (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT); if (quotaondisk) { diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 13a56e1ea15c..47fe60e1a887 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -19,91 +19,11 @@ #include "xfs_qm.h" #include "xfs_icache.h" -STATIC int -xfs_qm_log_quotaoff( - struct xfs_mount *mp, - struct xfs_qoff_logitem **qoffstartp, - uint flags) -{ - struct xfs_trans *tp; - int error; - struct xfs_qoff_logitem *qoffi; - - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_quotaoff, 0, 0, 0, &tp); - if (error) - goto out; - - qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT); - xfs_trans_log_quotaoff_item(tp, qoffi); - - spin_lock(&mp->m_sb_lock); - mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL; - spin_unlock(&mp->m_sb_lock); - - xfs_log_sb(tp); - - /* - * We have to make sure that the transaction is secure on disk before we - * return and actually stop quota accounting. So, make it synchronous. - * We don't care about quotoff's performance. - */ - xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp); - if (error) - goto out; - - *qoffstartp = qoffi; -out: - return error; -} - -STATIC int -xfs_qm_log_quotaoff_end( - struct xfs_mount *mp, - struct xfs_qoff_logitem **startqoff, - uint flags) -{ - struct xfs_trans *tp; - int error; - struct xfs_qoff_logitem *qoffi; - - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_equotaoff, 0, 0, 0, &tp); - if (error) - return error; - - qoffi = xfs_trans_get_qoff_item(tp, *startqoff, - flags & XFS_ALL_QUOTA_ACCT); - xfs_trans_log_quotaoff_item(tp, qoffi); - *startqoff = NULL; - - /* - * We have to make sure that the transaction is secure on disk before we - * return and actually stop quota accounting. So, make it synchronous. - * We don't care about quotoff's performance. - */ - xfs_trans_set_sync(tp); - return xfs_trans_commit(tp); -} - -/* - * Turn off quota accounting and/or enforcement for all udquots and/or - * gdquots. Called only at unmount time. - * - * This assumes that there are no dquots of this file system cached - * incore, and modifies the ondisk dquot directly. Therefore, for example, - * it is an error to call this twice, without purging the cache. - */ int xfs_qm_scall_quotaoff( xfs_mount_t *mp, uint flags) { - struct xfs_quotainfo *q = mp->m_quotainfo; - uint dqtype; - int error; - uint inactivate_flags; - struct xfs_qoff_logitem *qoffstart = NULL; - /* * No file system can have quotas enabled on disk but not in core. * Note that quota utilities (like quotaoff) _expect_ @@ -111,160 +31,23 @@ xfs_qm_scall_quotaoff( */ if ((mp->m_qflags & flags) == 0) return -EEXIST; - error = 0; - - flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); - - /* - * We don't want to deal with two quotaoffs messing up each other, - * so we're going to serialize it. quotaoff isn't exactly a performance - * critical thing. - * If quotaoff, then we must be dealing with the root filesystem. - */ - ASSERT(q); - mutex_lock(&q->qi_quotaofflock); /* - * If we're just turning off quota enforcement, change mp and go. + * We do not support actually turning off quota accounting any more. + * Just log a warning and ignore the accounting related flags. */ - if ((flags & XFS_ALL_QUOTA_ACCT) == 0) { - mp->m_qflags &= ~(flags); + if (flags & XFS_ALL_QUOTA_ACCT) + xfs_info(mp, "disabling of quota accounting not supported."); - spin_lock(&mp->m_sb_lock); - mp->m_sb.sb_qflags = mp->m_qflags; - spin_unlock(&mp->m_sb_lock); - mutex_unlock(&q->qi_quotaofflock); - - /* XXX what to do if error ? Revert back to old vals incore ? */ - return xfs_sync_sb(mp, false); - } - - dqtype = 0; - inactivate_flags = 0; - /* - * If accounting is off, we must turn enforcement off, clear the - * quota 'CHKD' certificate to make it known that we have to - * do a quotacheck the next time this quota is turned on. - */ - if (flags & XFS_UQUOTA_ACCT) { - dqtype |= XFS_QMOPT_UQUOTA; - flags |= (XFS_UQUOTA_CHKD | XFS_UQUOTA_ENFD); - inactivate_flags |= XFS_UQUOTA_ACTIVE; - } - if (flags & XFS_GQUOTA_ACCT) { - dqtype |= XFS_QMOPT_GQUOTA; - flags |= (XFS_GQUOTA_CHKD | XFS_GQUOTA_ENFD); - inactivate_flags |= XFS_GQUOTA_ACTIVE; - } - if (flags & XFS_PQUOTA_ACCT) { - dqtype |= XFS_QMOPT_PQUOTA; - flags |= (XFS_PQUOTA_CHKD | XFS_PQUOTA_ENFD); - inactivate_flags |= XFS_PQUOTA_ACTIVE; - } - - /* - * Nothing to do? Don't complain. This happens when we're just - * turning off quota enforcement. - */ - if ((mp->m_qflags & flags) == 0) - goto out_unlock; - - /* - * Write the LI_QUOTAOFF log record, and do SB changes atomically, - * and synchronously. If we fail to write, we should abort the - * operation as it cannot be recovered safely if we crash. - */ - error = xfs_qm_log_quotaoff(mp, &qoffstart, flags); - if (error) - goto out_unlock; - - /* - * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct - * to take care of the race between dqget and quotaoff. We don't take - * any special locks to reset these bits. All processes need to check - * these bits *after* taking inode lock(s) to see if the particular - * quota type is in the process of being turned off. If *ACTIVE, it is - * guaranteed that all dquot structures and all quotainode ptrs will all - * stay valid as long as that inode is kept locked. - * - * There is no turning back after this. - */ - mp->m_qflags &= ~inactivate_flags; - - /* - * Give back all the dquot reference(s) held by inodes. - * Here we go thru every single incore inode in this file system, and - * do a dqrele on the i_udquot/i_gdquot that it may have. - * Essentially, as long as somebody has an inode locked, this guarantees - * that quotas will not be turned off. This is handy because in a - * transaction once we lock the inode(s) and check for quotaon, we can - * depend on the quota inodes (and other things) being valid as long as - * we keep the lock(s). - */ - error = xfs_dqrele_all_inodes(mp, flags); - ASSERT(!error); - - /* - * Next we make the changes in the quota flag in the mount struct. - * This isn't protected by a particular lock directly, because we - * don't want to take a mrlock every time we depend on quotas being on. - */ - mp->m_qflags &= ~flags; - - /* - * Go through all the dquots of this file system and purge them, - * according to what was turned off. - */ - xfs_qm_dqpurge_all(mp, dqtype); - - /* - * Transactions that had started before ACTIVE state bit was cleared - * could have logged many dquots, so they'd have higher LSNs than - * the first QUOTAOFF log record does. If we happen to crash when - * the tail of the log has gone past the QUOTAOFF record, but - * before the last dquot modification, those dquots __will__ - * recover, and that's not good. - * - * So, we have QUOTAOFF start and end logitems; the start - * logitem won't get overwritten until the end logitem appears... - */ - error = xfs_qm_log_quotaoff_end(mp, &qoffstart, flags); - if (error) { - /* We're screwed now. Shutdown is the only option. */ - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); - goto out_unlock; - } - - /* - * If all quotas are completely turned off, close shop. - */ - if (mp->m_qflags == 0) { - mutex_unlock(&q->qi_quotaofflock); - xfs_qm_destroy_quotainfo(mp); - return 0; - } - - /* - * Release our quotainode references if we don't need them anymore. - */ - if ((dqtype & XFS_QMOPT_UQUOTA) && q->qi_uquotaip) { - xfs_irele(q->qi_uquotaip); - q->qi_uquotaip = NULL; - } - if ((dqtype & XFS_QMOPT_GQUOTA) && q->qi_gquotaip) { - xfs_irele(q->qi_gquotaip); - q->qi_gquotaip = NULL; - } - if ((dqtype & XFS_QMOPT_PQUOTA) && q->qi_pquotaip) { - xfs_irele(q->qi_pquotaip); - q->qi_pquotaip = NULL; - } + mutex_lock(&mp->m_quotainfo->qi_quotaofflock); + mp->m_qflags &= ~(flags & XFS_ALL_QUOTA_ENFD); + spin_lock(&mp->m_sb_lock); + mp->m_sb.sb_qflags = mp->m_qflags; + spin_unlock(&mp->m_sb_lock); + mutex_unlock(&mp->m_quotainfo->qi_quotaofflock); -out_unlock: - if (error && qoffstart) - xfs_qm_qoff_logitem_relse(qoffstart); - mutex_unlock(&q->qi_quotaofflock); - return error; + /* XXX what to do if error ? Revert back to old vals incore ? */ + return xfs_sync_sb(mp, false); } STATIC int @@ -322,7 +105,7 @@ xfs_qm_scall_trunc_qfiles( { int error = -EINVAL; - if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0 || + if (!xfs_has_quota(mp) || flags == 0 || (flags & ~XFS_QMOPT_QUOTALL)) { xfs_debug(mp, "%s: flags=%x m_qflags=%x", __func__, flags, mp->m_qflags); @@ -421,7 +204,7 @@ xfs_qm_scall_quotaon( (mp->m_qflags & XFS_GQUOTA_ACCT))) return 0; - if (! XFS_IS_QUOTA_RUNNING(mp)) + if (!XFS_IS_QUOTA_ON(mp)) return -ESRCH; /* @@ -698,6 +481,10 @@ xfs_qm_scall_getquota( struct xfs_dquot *dqp; int error; + /* Flush inodegc work at the start of a quota reporting scan. */ + if (id == 0) + xfs_inodegc_flush(mp); + /* * Try to get the dquot. We don't want it allocated on disk, so don't * set doalloc. If it doesn't exist, we'll get ENOENT back. @@ -736,6 +523,10 @@ xfs_qm_scall_getquota_next( struct xfs_dquot *dqp; int error; + /* Flush inodegc work at the start of a quota reporting scan. */ + if (*id == 0) + xfs_inodegc_flush(mp); + error = xfs_qm_dqget_next(mp, *id, type, &dqp); if (error) return error; diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index d00d01302545..dcc785fdd345 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -113,6 +113,7 @@ xfs_quota_reserve_blkres(struct xfs_inode *ip, int64_t blocks) { return xfs_trans_reserve_quota_nblks(NULL, ip, blocks, 0, false); } +bool xfs_inode_near_dquot_enforcement(struct xfs_inode *ip, xfs_dqtype_t type); #else static inline int xfs_qm_vop_dqalloc(struct xfs_inode *ip, kuid_t kuid, kgid_t kgid, @@ -168,6 +169,7 @@ xfs_trans_reserve_quota_icreate(struct xfs_trans *tp, struct xfs_dquot *udqp, #define xfs_qm_mount_quotas(mp) #define xfs_qm_unmount(mp) #define xfs_qm_unmount_quotas(mp) +#define xfs_inode_near_dquot_enforcement(ip, type) (false) #endif /* CONFIG_XFS_QUOTA */ static inline int diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index 88d70c236a54..07989bd67728 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -60,18 +60,18 @@ xfs_fs_get_quota_state( struct xfs_quotainfo *q = mp->m_quotainfo; memset(state, 0, sizeof(*state)); - if (!XFS_IS_QUOTA_RUNNING(mp)) + if (!XFS_IS_QUOTA_ON(mp)) return 0; state->s_incoredqs = q->qi_dquots; - if (XFS_IS_UQUOTA_RUNNING(mp)) + if (XFS_IS_UQUOTA_ON(mp)) state->s_state[USRQUOTA].flags |= QCI_ACCT_ENABLED; if (XFS_IS_UQUOTA_ENFORCED(mp)) state->s_state[USRQUOTA].flags |= QCI_LIMITS_ENFORCED; - if (XFS_IS_GQUOTA_RUNNING(mp)) + if (XFS_IS_GQUOTA_ON(mp)) state->s_state[GRPQUOTA].flags |= QCI_ACCT_ENABLED; if (XFS_IS_GQUOTA_ENFORCED(mp)) state->s_state[GRPQUOTA].flags |= QCI_LIMITS_ENFORCED; - if (XFS_IS_PQUOTA_RUNNING(mp)) + if (XFS_IS_PQUOTA_ON(mp)) state->s_state[PRJQUOTA].flags |= QCI_ACCT_ENABLED; if (XFS_IS_PQUOTA_ENFORCED(mp)) state->s_state[PRJQUOTA].flags |= QCI_LIMITS_ENFORCED; @@ -114,10 +114,8 @@ xfs_fs_set_info( if (sb_rdonly(sb)) return -EROFS; - if (!XFS_IS_QUOTA_RUNNING(mp)) - return -ENOSYS; if (!XFS_IS_QUOTA_ON(mp)) - return -ESRCH; + return -ENOSYS; if (info->i_fieldmask & ~XFS_QC_SETINFO_MASK) return -EINVAL; if ((info->i_fieldmask & XFS_QC_SETINFO_MASK) == 0) @@ -164,7 +162,7 @@ xfs_quota_enable( if (sb_rdonly(sb)) return -EROFS; - if (!XFS_IS_QUOTA_RUNNING(mp)) + if (!XFS_IS_QUOTA_ON(mp)) return -ENOSYS; return xfs_qm_scall_quotaon(mp, xfs_quota_flags(uflags)); @@ -179,10 +177,8 @@ xfs_quota_disable( if (sb_rdonly(sb)) return -EROFS; - if (!XFS_IS_QUOTA_RUNNING(mp)) - return -ENOSYS; if (!XFS_IS_QUOTA_ON(mp)) - return -EINVAL; + return -ENOSYS; return xfs_qm_scall_quotaoff(mp, xfs_quota_flags(uflags)); } @@ -223,10 +219,8 @@ xfs_fs_get_dqblk( struct xfs_mount *mp = XFS_M(sb); xfs_dqid_t id; - if (!XFS_IS_QUOTA_RUNNING(mp)) - return -ENOSYS; if (!XFS_IS_QUOTA_ON(mp)) - return -ESRCH; + return -ENOSYS; id = from_kqid(&init_user_ns, qid); return xfs_qm_scall_getquota(mp, id, xfs_quota_type(qid.type), qdq); @@ -243,10 +237,8 @@ xfs_fs_get_nextdqblk( struct xfs_mount *mp = XFS_M(sb); xfs_dqid_t id; - if (!XFS_IS_QUOTA_RUNNING(mp)) - return -ENOSYS; if (!XFS_IS_QUOTA_ON(mp)) - return -ESRCH; + return -ENOSYS; id = from_kqid(&init_user_ns, *qid); ret = xfs_qm_scall_getquota_next(mp, &id, xfs_quota_type(qid->type), @@ -269,10 +261,8 @@ xfs_fs_set_dqblk( if (sb_rdonly(sb)) return -EROFS; - if (!XFS_IS_QUOTA_RUNNING(mp)) - return -ENOSYS; if (!XFS_IS_QUOTA_ON(mp)) - return -ESRCH; + return -ENOSYS; return xfs_qm_scall_setqlim(mp, from_kqid(&init_user_ns, qid), xfs_quota_type(qid.type), qdq); diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 746f4eda724c..46904b793bd4 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -423,7 +423,7 @@ xfs_cui_validate_phys( struct xfs_mount *mp, struct xfs_phys_extent *refc) { - if (!xfs_sb_version_hasreflink(&mp->m_sb)) + if (!xfs_has_reflink(mp)) return false; if (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS) @@ -522,6 +522,9 @@ xfs_cui_item_recover( error = xfs_trans_log_finish_refcount_update(tp, cudp, type, refc->pe_startblock, refc->pe_len, &new_fsb, &new_len, &rcur); + if (error == -EFSCORRUPTED) + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + refc, sizeof(*refc)); if (error) goto abort_error; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index c256104772cb..76355f293488 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -759,7 +759,7 @@ xfs_reflink_recover_cow( xfs_agnumber_t agno; int error = 0; - if (!xfs_sb_version_hasreflink(&mp->m_sb)) + if (!xfs_has_reflink(mp)) return 0; for_each_perag(mp, agno, pag) { @@ -967,7 +967,7 @@ xfs_reflink_ag_has_free_space( struct xfs_perag *pag; int error = 0; - if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) + if (!xfs_has_rmapbt(mp)) return 0; pag = xfs_perag_get(mp, agno); diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index 487b00434b96..bea65f2fe657 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -8,8 +8,7 @@ static inline bool xfs_is_always_cow_inode(struct xfs_inode *ip) { - return ip->i_mount->m_always_cow && - xfs_sb_version_hasreflink(&ip->i_mount->m_sb); + return ip->i_mount->m_always_cow && xfs_has_reflink(ip->i_mount); } static inline bool xfs_is_cow_inode(struct xfs_inode *ip) diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index dc4f0c9f0897..5f0695980467 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -466,7 +466,7 @@ xfs_rui_validate_map( struct xfs_mount *mp, struct xfs_map_extent *rmap) { - if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) + if (!xfs_has_rmapbt(mp)) return false; if (rmap->me_flags & ~XFS_RMAP_EXTENT_FLAGS) @@ -578,6 +578,9 @@ xfs_rui_item_recover( rmap->me_owner, whichfork, rmap->me_startoff, rmap->me_startblock, rmap->me_len, state, &rcur); + if (error == -EFSCORRUPTED) + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + rmap, sizeof(*rmap)); if (error) goto abort_error; diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 699066fb9052..b8c79ee791af 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -951,8 +951,7 @@ xfs_growfs_rt( return -EINVAL; /* Unsupported realtime features. */ - if (xfs_sb_version_hasrmapbt(&mp->m_sb) || - xfs_sb_version_hasreflink(&mp->m_sb)) + if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp)) return -EOPNOTSUPP; nrblocks = in->newblocks; @@ -1131,6 +1130,9 @@ error_cancel: error = xfs_trans_commit(tp); if (error) break; + + /* Ensure the mount RT feature flag is now set. */ + mp->m_features |= XFS_FEAT_REALTIME; } if (error) goto out_free; diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index ed885620589c..91b00289509b 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h @@ -22,9 +22,9 @@ struct xfs_rtalloc_rec { }; typedef int (*xfs_rtalloc_query_range_fn)( - struct xfs_trans *tp, - struct xfs_rtalloc_rec *rec, - void *priv); + struct xfs_trans *tp, + const struct xfs_rtalloc_rec *rec, + void *priv); #ifdef CONFIG_XFS_RT /* @@ -124,10 +124,9 @@ int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp, xfs_rtblock_t start, xfs_extlen_t len, struct xfs_buf **rbpp, xfs_fsblock_t *rsb); int xfs_rtalloc_query_range(struct xfs_trans *tp, - struct xfs_rtalloc_rec *low_rec, - struct xfs_rtalloc_rec *high_rec, - xfs_rtalloc_query_range_fn fn, - void *priv); + const struct xfs_rtalloc_rec *low_rec, + const struct xfs_rtalloc_rec *high_rec, + xfs_rtalloc_query_range_fn fn, void *priv); int xfs_rtalloc_query_all(struct xfs_trans *tp, xfs_rtalloc_query_range_fn fn, void *priv); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 2c9e26a44546..9a86d3ec2cb6 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -49,6 +49,28 @@ static struct kset *xfs_kset; /* top-level xfs sysfs dir */ static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */ #endif +#ifdef CONFIG_HOTPLUG_CPU +static LIST_HEAD(xfs_mount_list); +static DEFINE_SPINLOCK(xfs_mount_list_lock); + +static inline void xfs_mount_list_add(struct xfs_mount *mp) +{ + spin_lock(&xfs_mount_list_lock); + list_add(&mp->m_mount_list, &xfs_mount_list); + spin_unlock(&xfs_mount_list_lock); +} + +static inline void xfs_mount_list_del(struct xfs_mount *mp) +{ + spin_lock(&xfs_mount_list_lock); + list_del(&mp->m_mount_list); + spin_unlock(&xfs_mount_list_lock); +} +#else /* !CONFIG_HOTPLUG_CPU */ +static inline void xfs_mount_list_add(struct xfs_mount *mp) {} +static inline void xfs_mount_list_del(struct xfs_mount *mp) {} +#endif + enum xfs_dax_mode { XFS_DAX_INODE = 0, XFS_DAX_ALWAYS = 1, @@ -62,15 +84,15 @@ xfs_mount_set_dax_mode( { switch (mode) { case XFS_DAX_INODE: - mp->m_flags &= ~(XFS_MOUNT_DAX_ALWAYS | XFS_MOUNT_DAX_NEVER); + mp->m_features &= ~(XFS_FEAT_DAX_ALWAYS | XFS_FEAT_DAX_NEVER); break; case XFS_DAX_ALWAYS: - mp->m_flags |= XFS_MOUNT_DAX_ALWAYS; - mp->m_flags &= ~XFS_MOUNT_DAX_NEVER; + mp->m_features |= XFS_FEAT_DAX_ALWAYS; + mp->m_features &= ~XFS_FEAT_DAX_NEVER; break; case XFS_DAX_NEVER: - mp->m_flags |= XFS_MOUNT_DAX_NEVER; - mp->m_flags &= ~XFS_MOUNT_DAX_ALWAYS; + mp->m_features |= XFS_FEAT_DAX_NEVER; + mp->m_features &= ~XFS_FEAT_DAX_ALWAYS; break; } } @@ -154,33 +176,32 @@ xfs_fs_show_options( { static struct proc_xfs_info xfs_info_set[] = { /* the few simple ones we can get from the mount struct */ - { XFS_MOUNT_IKEEP, ",ikeep" }, - { XFS_MOUNT_WSYNC, ",wsync" }, - { XFS_MOUNT_NOALIGN, ",noalign" }, - { XFS_MOUNT_SWALLOC, ",swalloc" }, - { XFS_MOUNT_NOUUID, ",nouuid" }, - { XFS_MOUNT_NORECOVERY, ",norecovery" }, - { XFS_MOUNT_ATTR2, ",attr2" }, - { XFS_MOUNT_FILESTREAMS, ",filestreams" }, - { XFS_MOUNT_GRPID, ",grpid" }, - { XFS_MOUNT_DISCARD, ",discard" }, - { XFS_MOUNT_LARGEIO, ",largeio" }, - { XFS_MOUNT_DAX_ALWAYS, ",dax=always" }, - { XFS_MOUNT_DAX_NEVER, ",dax=never" }, + { XFS_FEAT_IKEEP, ",ikeep" }, + { XFS_FEAT_WSYNC, ",wsync" }, + { XFS_FEAT_NOALIGN, ",noalign" }, + { XFS_FEAT_SWALLOC, ",swalloc" }, + { XFS_FEAT_NOUUID, ",nouuid" }, + { XFS_FEAT_NORECOVERY, ",norecovery" }, + { XFS_FEAT_ATTR2, ",attr2" }, + { XFS_FEAT_FILESTREAMS, ",filestreams" }, + { XFS_FEAT_GRPID, ",grpid" }, + { XFS_FEAT_DISCARD, ",discard" }, + { XFS_FEAT_LARGE_IOSIZE, ",largeio" }, + { XFS_FEAT_DAX_ALWAYS, ",dax=always" }, + { XFS_FEAT_DAX_NEVER, ",dax=never" }, { 0, NULL } }; struct xfs_mount *mp = XFS_M(root->d_sb); struct proc_xfs_info *xfs_infop; for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) { - if (mp->m_flags & xfs_infop->flag) + if (mp->m_features & xfs_infop->flag) seq_puts(m, xfs_infop->str); } - seq_printf(m, ",inode%d", - (mp->m_flags & XFS_MOUNT_SMALL_INUMS) ? 32 : 64); + seq_printf(m, ",inode%d", xfs_has_small_inums(mp) ? 32 : 64); - if (mp->m_flags & XFS_MOUNT_ALLOCSIZE) + if (xfs_has_allocsize(mp)) seq_printf(m, ",allocsize=%dk", (1 << mp->m_allocsize_log) >> 10); @@ -201,25 +222,20 @@ xfs_fs_show_options( seq_printf(m, ",swidth=%d", (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); - if (mp->m_qflags & XFS_UQUOTA_ACCT) { - if (mp->m_qflags & XFS_UQUOTA_ENFD) - seq_puts(m, ",usrquota"); - else - seq_puts(m, ",uqnoenforce"); - } + if (mp->m_qflags & XFS_UQUOTA_ENFD) + seq_puts(m, ",usrquota"); + else if (mp->m_qflags & XFS_UQUOTA_ACCT) + seq_puts(m, ",uqnoenforce"); - if (mp->m_qflags & XFS_PQUOTA_ACCT) { - if (mp->m_qflags & XFS_PQUOTA_ENFD) - seq_puts(m, ",prjquota"); - else - seq_puts(m, ",pqnoenforce"); - } - if (mp->m_qflags & XFS_GQUOTA_ACCT) { - if (mp->m_qflags & XFS_GQUOTA_ENFD) - seq_puts(m, ",grpquota"); - else - seq_puts(m, ",gqnoenforce"); - } + if (mp->m_qflags & XFS_PQUOTA_ENFD) + seq_puts(m, ",prjquota"); + else if (mp->m_qflags & XFS_PQUOTA_ACCT) + seq_puts(m, ",pqnoenforce"); + + if (mp->m_qflags & XFS_GQUOTA_ENFD) + seq_puts(m, ",grpquota"); + else if (mp->m_qflags & XFS_GQUOTA_ACCT) + seq_puts(m, ",gqnoenforce"); if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) seq_puts(m, ",noquota"); @@ -230,11 +246,11 @@ xfs_fs_show_options( /* * Set parameters for inode allocation heuristics, taking into account * filesystem size and inode32/inode64 mount options; i.e. specifically - * whether or not XFS_MOUNT_SMALL_INUMS is set. + * whether or not XFS_FEAT_SMALL_INUMS is set. * * Inode allocation patterns are altered only if inode32 is requested - * (XFS_MOUNT_SMALL_INUMS), and the filesystem is sufficiently large. - * If altered, XFS_MOUNT_32BITINODES is set as well. + * (XFS_FEAT_SMALL_INUMS), and the filesystem is sufficiently large. + * If altered, XFS_OPSTATE_INODE32 is set as well. * * An agcount independent of that in the mount structure is provided * because in the growfs case, mp->m_sb.sb_agcount is not yet updated @@ -276,13 +292,13 @@ xfs_set_inode_alloc( /* * If user asked for no more than 32-bit inodes, and the fs is - * sufficiently large, set XFS_MOUNT_32BITINODES if we must alter + * sufficiently large, set XFS_OPSTATE_INODE32 if we must alter * the allocator to accommodate the request. */ - if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32) - mp->m_flags |= XFS_MOUNT_32BITINODES; + if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32) + set_bit(XFS_OPSTATE_INODE32, &mp->m_opstate); else - mp->m_flags &= ~XFS_MOUNT_32BITINODES; + clear_bit(XFS_OPSTATE_INODE32, &mp->m_opstate); for (index = 0; index < agcount; index++) { struct xfs_perag *pag; @@ -291,7 +307,7 @@ xfs_set_inode_alloc( pag = xfs_perag_get(mp, index); - if (mp->m_flags & XFS_MOUNT_32BITINODES) { + if (xfs_is_inode32(mp)) { if (ino > XFS_MAXINUMBER_32) { pag->pagi_inodeok = 0; pag->pagf_metadata = 0; @@ -311,7 +327,7 @@ xfs_set_inode_alloc( xfs_perag_put(pag); } - return (mp->m_flags & XFS_MOUNT_32BITINODES) ? maxagi : agcount; + return xfs_is_inode32(mp) ? maxagi : agcount; } STATIC int @@ -468,7 +484,7 @@ xfs_setup_devices( if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { unsigned int log_sector_size = BBSIZE; - if (xfs_sb_version_hassector(&mp->m_sb)) + if (xfs_has_sector(mp)) log_sector_size = mp->m_sb.sb_logsectsize; error = xfs_setsize_buftarg(mp->m_logdev_targp, log_sector_size); @@ -501,37 +517,37 @@ xfs_init_mount_workqueues( if (!mp->m_unwritten_workqueue) goto out_destroy_buf; - mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s", - XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_UNBOUND), - 0, mp->m_super->s_id); - if (!mp->m_cil_workqueue) - goto out_destroy_unwritten; - mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), 0, mp->m_super->s_id); if (!mp->m_reclaim_workqueue) - goto out_destroy_cil; + goto out_destroy_unwritten; - mp->m_gc_workqueue = alloc_workqueue("xfs-gc/%s", - WQ_SYSFS | WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM, + mp->m_blockgc_wq = alloc_workqueue("xfs-blockgc/%s", + XFS_WQFLAGS(WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM), 0, mp->m_super->s_id); - if (!mp->m_gc_workqueue) + if (!mp->m_blockgc_wq) goto out_destroy_reclaim; + mp->m_inodegc_wq = alloc_workqueue("xfs-inodegc/%s", + XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), + 1, mp->m_super->s_id); + if (!mp->m_inodegc_wq) + goto out_destroy_blockgc; + mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", XFS_WQFLAGS(WQ_FREEZABLE), 0, mp->m_super->s_id); if (!mp->m_sync_workqueue) - goto out_destroy_eofb; + goto out_destroy_inodegc; return 0; -out_destroy_eofb: - destroy_workqueue(mp->m_gc_workqueue); +out_destroy_inodegc: + destroy_workqueue(mp->m_inodegc_wq); +out_destroy_blockgc: + destroy_workqueue(mp->m_blockgc_wq); out_destroy_reclaim: destroy_workqueue(mp->m_reclaim_workqueue); -out_destroy_cil: - destroy_workqueue(mp->m_cil_workqueue); out_destroy_unwritten: destroy_workqueue(mp->m_unwritten_workqueue); out_destroy_buf: @@ -545,9 +561,9 @@ xfs_destroy_mount_workqueues( struct xfs_mount *mp) { destroy_workqueue(mp->m_sync_workqueue); - destroy_workqueue(mp->m_gc_workqueue); + destroy_workqueue(mp->m_blockgc_wq); + destroy_workqueue(mp->m_inodegc_wq); destroy_workqueue(mp->m_reclaim_workqueue); - destroy_workqueue(mp->m_cil_workqueue); destroy_workqueue(mp->m_unwritten_workqueue); destroy_workqueue(mp->m_buf_workqueue); } @@ -596,32 +612,6 @@ xfs_fs_alloc_inode( return NULL; } -#ifdef DEBUG -static void -xfs_check_delalloc( - struct xfs_inode *ip, - int whichfork) -{ - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); - struct xfs_bmbt_irec got; - struct xfs_iext_cursor icur; - - if (!ifp || !xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got)) - return; - do { - if (isnullstartblock(got.br_startblock)) { - xfs_warn(ip->i_mount, - "ino %llx %s fork has delalloc extent at [0x%llx:0x%llx]", - ip->i_ino, - whichfork == XFS_DATA_FORK ? "data" : "cow", - got.br_startoff, got.br_blockcount); - } - } while (xfs_iext_next_extent(ifp, &icur, &got)); -} -#else -#define xfs_check_delalloc(ip, whichfork) do { } while (0) -#endif - /* * Now that the generic code is guaranteed not to be accessing * the linux inode, we can inactivate and reclaim the inode. @@ -637,30 +627,6 @@ xfs_fs_destroy_inode( ASSERT(!rwsem_is_locked(&inode->i_rwsem)); XFS_STATS_INC(ip->i_mount, vn_rele); XFS_STATS_INC(ip->i_mount, vn_remove); - - xfs_inactive(ip); - - if (!XFS_FORCED_SHUTDOWN(ip->i_mount) && ip->i_delayed_blks) { - xfs_check_delalloc(ip, XFS_DATA_FORK); - xfs_check_delalloc(ip, XFS_COW_FORK); - ASSERT(0); - } - - XFS_STATS_INC(ip->i_mount, vn_reclaim); - - /* - * We should never get here with one of the reclaim flags already set. - */ - ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE)); - ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM)); - - /* - * We always use background reclaim here because even if the inode is - * clean, it still may be under IO and hence we have wait for IO - * completion to occur before we can reclaim the inode. The background - * reclaim path handles this more efficiently than we can here, so - * simply let background reclaim tear down all inodes. - */ xfs_inode_mark_reclaimable(ip); } @@ -709,8 +675,6 @@ xfs_fs_inode_init_once( atomic_set(&ip->i_pincount, 0); spin_lock_init(&ip->i_flags_lock); - mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, - "xfsino", ip->i_ino); mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, "xfsino", ip->i_ino); } @@ -734,7 +698,7 @@ xfs_fs_drop_inode( * that. See the comment for this inode flag. */ if (ip->i_flags & XFS_IRECOVERY) { - ASSERT(ip->i_mount->m_log->l_flags & XLOG_RECOVERY_NEEDED); + ASSERT(xlog_recovery_needed(ip->i_mount->m_log)); return 0; } @@ -757,6 +721,8 @@ xfs_fs_sync_fs( { struct xfs_mount *mp = XFS_M(sb); + trace_xfs_fs_sync_fs(mp, __return_address); + /* * Doing anything during the async pass would be counterproductive. */ @@ -773,6 +739,25 @@ xfs_fs_sync_fs( flush_delayed_work(&mp->m_log->l_work); } + /* + * If we are called with page faults frozen out, it means we are about + * to freeze the transaction subsystem. Take the opportunity to shut + * down inodegc because once SB_FREEZE_FS is set it's too late to + * prevent inactivation races with freeze. The fs doesn't get called + * again by the freezing process until after SB_FREEZE_FS has been set, + * so it's now or never. Same logic applies to speculative allocation + * garbage collection. + * + * We don't care if this is a normal syncfs call that does this or + * freeze that does this - we can run this multiple times without issue + * and we won't race with a restart because a restart can only occur + * when the state is either SB_FREEZE_FS or SB_FREEZE_COMPLETE. + */ + if (sb->s_writers.frozen == SB_FREEZE_PAGEFAULT) { + xfs_inodegc_stop(mp); + xfs_blockgc_stop(mp); + } + return 0; } @@ -791,6 +776,9 @@ xfs_fs_statfs( xfs_extlen_t lsize; int64_t ffree; + /* Wait for whatever inactivations are in progress. */ + xfs_inodegc_flush(mp); + statp->f_type = XFS_SUPER_MAGIC; statp->f_namelen = MAXNAMELEN - 1; @@ -886,10 +874,22 @@ xfs_fs_freeze( * set a GFP_NOFS context here to avoid recursion deadlocks. */ flags = memalloc_nofs_save(); - xfs_blockgc_stop(mp); xfs_save_resvblks(mp); ret = xfs_log_quiesce(mp); memalloc_nofs_restore(flags); + + /* + * For read-write filesystems, we need to restart the inodegc on error + * because we stopped it at SB_FREEZE_PAGEFAULT level and a thaw is not + * going to be run to restart it now. We are at SB_FREEZE_FS level + * here, so we can restart safely without racing with a stop in + * xfs_fs_sync_fs(). + */ + if (ret && !xfs_is_readonly(mp)) { + xfs_blockgc_start(mp); + xfs_inodegc_start(mp); + } + return ret; } @@ -901,7 +901,18 @@ xfs_fs_unfreeze( xfs_restore_resvblks(mp); xfs_log_work_queue(mp); - xfs_blockgc_start(mp); + + /* + * Don't reactivate the inodegc worker on a readonly filesystem because + * inodes are sent directly to reclaim. Don't reactivate the blockgc + * worker because there are no speculative preallocations on a readonly + * filesystem. + */ + if (!xfs_is_readonly(mp)) { + xfs_blockgc_start(mp); + xfs_inodegc_start(mp); + } + return 0; } @@ -913,10 +924,8 @@ STATIC int xfs_finish_flags( struct xfs_mount *mp) { - int ronly = (mp->m_flags & XFS_MOUNT_RDONLY); - /* Fail a mount where the logbuf is smaller than the log stripe */ - if (xfs_sb_version_haslogv2(&mp->m_sb)) { + if (xfs_has_logv2(mp)) { if (mp->m_logbsize <= 0 && mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) { mp->m_logbsize = mp->m_sb.sb_logsunit; @@ -938,33 +947,24 @@ xfs_finish_flags( /* * V5 filesystems always use attr2 format for attributes. */ - if (xfs_sb_version_hascrc(&mp->m_sb) && - (mp->m_flags & XFS_MOUNT_NOATTR2)) { + if (xfs_has_crc(mp) && xfs_has_noattr2(mp)) { xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. " "attr2 is always enabled for V5 filesystems."); return -EINVAL; } /* - * mkfs'ed attr2 will turn on attr2 mount unless explicitly - * told by noattr2 to turn it off - */ - if (xfs_sb_version_hasattr2(&mp->m_sb) && - !(mp->m_flags & XFS_MOUNT_NOATTR2)) - mp->m_flags |= XFS_MOUNT_ATTR2; - - /* * prohibit r/w mounts of read-only filesystems */ - if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) { + if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !xfs_is_readonly(mp)) { xfs_warn(mp, "cannot mount a read-only filesystem as read-write"); return -EROFS; } - if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && - (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE)) && - !xfs_sb_version_has_pquotino(&mp->m_sb)) { + if ((mp->m_qflags & XFS_GQUOTA_ACCT) && + (mp->m_qflags & XFS_PQUOTA_ACCT) && + !xfs_has_pquotino(mp)) { xfs_warn(mp, "Super block does not support project and group quota together"); return -EINVAL; @@ -1022,11 +1022,40 @@ xfs_destroy_percpu_counters( percpu_counter_destroy(&mp->m_icount); percpu_counter_destroy(&mp->m_ifree); percpu_counter_destroy(&mp->m_fdblocks); - ASSERT(XFS_FORCED_SHUTDOWN(mp) || + ASSERT(xfs_is_shutdown(mp) || percpu_counter_sum(&mp->m_delalloc_blks) == 0); percpu_counter_destroy(&mp->m_delalloc_blks); } +static int +xfs_inodegc_init_percpu( + struct xfs_mount *mp) +{ + struct xfs_inodegc *gc; + int cpu; + + mp->m_inodegc = alloc_percpu(struct xfs_inodegc); + if (!mp->m_inodegc) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + gc = per_cpu_ptr(mp->m_inodegc, cpu); + init_llist_head(&gc->list); + gc->items = 0; + INIT_WORK(&gc->work, xfs_inodegc_worker); + } + return 0; +} + +static void +xfs_inodegc_free_percpu( + struct xfs_mount *mp) +{ + if (!mp->m_inodegc) + return; + free_percpu(mp->m_inodegc); +} + static void xfs_fs_put_super( struct super_block *sb) @@ -1043,6 +1072,8 @@ xfs_fs_put_super( xfs_freesb(mp); free_percpu(mp->m_stats.xs_stats); + xfs_mount_list_del(mp); + xfs_inodegc_free_percpu(mp); xfs_destroy_percpu_counters(mp); xfs_destroy_mount_workqueues(mp); xfs_close_devices(mp); @@ -1131,7 +1162,7 @@ xfs_fs_warn_deprecated( * already had the flag set */ if ((fc->purpose & FS_CONTEXT_FOR_RECONFIGURE) && - !!(XFS_M(fc->root->d_sb)->m_flags & flag) == value) + !!(XFS_M(fc->root->d_sb)->m_features & flag) == value) return; xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key); } @@ -1179,27 +1210,27 @@ xfs_fs_parse_param( if (suffix_kstrtoint(param->string, 10, &size)) return -EINVAL; parsing_mp->m_allocsize_log = ffs(size) - 1; - parsing_mp->m_flags |= XFS_MOUNT_ALLOCSIZE; + parsing_mp->m_features |= XFS_FEAT_ALLOCSIZE; return 0; case Opt_grpid: case Opt_bsdgroups: - parsing_mp->m_flags |= XFS_MOUNT_GRPID; + parsing_mp->m_features |= XFS_FEAT_GRPID; return 0; case Opt_nogrpid: case Opt_sysvgroups: - parsing_mp->m_flags &= ~XFS_MOUNT_GRPID; + parsing_mp->m_features &= ~XFS_FEAT_GRPID; return 0; case Opt_wsync: - parsing_mp->m_flags |= XFS_MOUNT_WSYNC; + parsing_mp->m_features |= XFS_FEAT_WSYNC; return 0; case Opt_norecovery: - parsing_mp->m_flags |= XFS_MOUNT_NORECOVERY; + parsing_mp->m_features |= XFS_FEAT_NORECOVERY; return 0; case Opt_noalign: - parsing_mp->m_flags |= XFS_MOUNT_NOALIGN; + parsing_mp->m_features |= XFS_FEAT_NOALIGN; return 0; case Opt_swalloc: - parsing_mp->m_flags |= XFS_MOUNT_SWALLOC; + parsing_mp->m_features |= XFS_FEAT_SWALLOC; return 0; case Opt_sunit: parsing_mp->m_dalign = result.uint_32; @@ -1208,62 +1239,58 @@ xfs_fs_parse_param( parsing_mp->m_swidth = result.uint_32; return 0; case Opt_inode32: - parsing_mp->m_flags |= XFS_MOUNT_SMALL_INUMS; + parsing_mp->m_features |= XFS_FEAT_SMALL_INUMS; return 0; case Opt_inode64: - parsing_mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; + parsing_mp->m_features &= ~XFS_FEAT_SMALL_INUMS; return 0; case Opt_nouuid: - parsing_mp->m_flags |= XFS_MOUNT_NOUUID; + parsing_mp->m_features |= XFS_FEAT_NOUUID; return 0; case Opt_largeio: - parsing_mp->m_flags |= XFS_MOUNT_LARGEIO; + parsing_mp->m_features |= XFS_FEAT_LARGE_IOSIZE; return 0; case Opt_nolargeio: - parsing_mp->m_flags &= ~XFS_MOUNT_LARGEIO; + parsing_mp->m_features &= ~XFS_FEAT_LARGE_IOSIZE; return 0; case Opt_filestreams: - parsing_mp->m_flags |= XFS_MOUNT_FILESTREAMS; + parsing_mp->m_features |= XFS_FEAT_FILESTREAMS; return 0; case Opt_noquota: parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT; parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD; - parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE; return 0; case Opt_quota: case Opt_uquota: case Opt_usrquota: - parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | - XFS_UQUOTA_ENFD); + parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ENFD); return 0; case Opt_qnoenforce: case Opt_uqnoenforce: - parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE); + parsing_mp->m_qflags |= XFS_UQUOTA_ACCT; parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD; return 0; case Opt_pquota: case Opt_prjquota: - parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | - XFS_PQUOTA_ENFD); + parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ENFD); return 0; case Opt_pqnoenforce: - parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); + parsing_mp->m_qflags |= XFS_PQUOTA_ACCT; parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD; return 0; case Opt_gquota: case Opt_grpquota: - parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | - XFS_GQUOTA_ENFD); + parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ENFD); return 0; case Opt_gqnoenforce: - parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); + parsing_mp->m_qflags |= XFS_GQUOTA_ACCT; parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD; return 0; case Opt_discard: - parsing_mp->m_flags |= XFS_MOUNT_DISCARD; + parsing_mp->m_features |= XFS_FEAT_DISCARD; return 0; case Opt_nodiscard: - parsing_mp->m_flags &= ~XFS_MOUNT_DISCARD; + parsing_mp->m_features &= ~XFS_FEAT_DISCARD; return 0; #ifdef CONFIG_FS_DAX case Opt_dax: @@ -1275,21 +1302,20 @@ xfs_fs_parse_param( #endif /* Following mount options will be removed in September 2025 */ case Opt_ikeep: - xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_IKEEP, true); - parsing_mp->m_flags |= XFS_MOUNT_IKEEP; + xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, true); + parsing_mp->m_features |= XFS_FEAT_IKEEP; return 0; case Opt_noikeep: - xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_IKEEP, false); - parsing_mp->m_flags &= ~XFS_MOUNT_IKEEP; + xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, false); + parsing_mp->m_features &= ~XFS_FEAT_IKEEP; return 0; case Opt_attr2: - xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_ATTR2, true); - parsing_mp->m_flags |= XFS_MOUNT_ATTR2; + xfs_fs_warn_deprecated(fc, param, XFS_FEAT_ATTR2, true); + parsing_mp->m_features |= XFS_FEAT_ATTR2; return 0; case Opt_noattr2: - xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_NOATTR2, true); - parsing_mp->m_flags &= ~XFS_MOUNT_ATTR2; - parsing_mp->m_flags |= XFS_MOUNT_NOATTR2; + xfs_fs_warn_deprecated(fc, param, XFS_FEAT_NOATTR2, true); + parsing_mp->m_features |= XFS_FEAT_NOATTR2; return 0; default: xfs_warn(parsing_mp, "unknown mount option [%s].", param->key); @@ -1303,17 +1329,23 @@ static int xfs_fs_validate_params( struct xfs_mount *mp) { + /* No recovery flag requires a read-only mount */ + if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) { + xfs_warn(mp, "no-recovery mounts must be read-only."); + return -EINVAL; + } + /* - * no recovery flag requires a read-only mount + * We have not read the superblock at this point, so only the attr2 + * mount option can set the attr2 feature by this stage. */ - if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && - !(mp->m_flags & XFS_MOUNT_RDONLY)) { - xfs_warn(mp, "no-recovery mounts must be read-only."); + if (xfs_has_attr2(mp) && xfs_has_noattr2(mp)) { + xfs_warn(mp, "attr2 and noattr2 cannot both be specified."); return -EINVAL; } - if ((mp->m_flags & XFS_MOUNT_NOALIGN) && - (mp->m_dalign || mp->m_swidth)) { + + if (xfs_has_noalign(mp) && (mp->m_dalign || mp->m_swidth)) { xfs_warn(mp, "sunit and swidth options incompatible with the noalign option"); return -EINVAL; @@ -1357,7 +1389,7 @@ xfs_fs_validate_params( return -EINVAL; } - if ((mp->m_flags & XFS_MOUNT_ALLOCSIZE) && + if (xfs_has_allocsize(mp) && (mp->m_allocsize_log > XFS_MAX_IO_LOG || mp->m_allocsize_log < XFS_MIN_IO_LOG)) { xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", @@ -1418,11 +1450,22 @@ xfs_fs_fill_super( if (error) goto out_destroy_workqueues; + error = xfs_inodegc_init_percpu(mp); + if (error) + goto out_destroy_counters; + + /* + * All percpu data structures requiring cleanup when a cpu goes offline + * must be allocated before adding this @mp to the cpu-dead handler's + * mount list. + */ + xfs_mount_list_add(mp); + /* Allocate stats memory before we do operations that might use it */ mp->m_stats.xs_stats = alloc_percpu(struct xfsstats); if (!mp->m_stats.xs_stats) { error = -ENOMEM; - goto out_destroy_counters; + goto out_destroy_inodegc; } error = xfs_readsb(mp, flags); @@ -1438,7 +1481,7 @@ xfs_fs_fill_super( goto out_free_sb; /* V4 support is undergoing deprecation. */ - if (!xfs_sb_version_hascrc(&mp->m_sb)) { + if (!xfs_has_crc(mp)) { #ifdef CONFIG_XFS_SUPPORT_V4 xfs_warn_once(mp, "Deprecated V4 format (crc=0) will not be supported after September 2030."); @@ -1451,7 +1494,7 @@ xfs_fs_fill_super( } /* Filesystem claims it needs repair, so refuse the mount. */ - if (xfs_sb_version_needsrepair(&mp->m_sb)) { + if (xfs_has_needsrepair(mp)) { xfs_warn(mp, "Filesystem needs repair. Please run xfs_repair."); error = -EFSCORRUPTED; goto out_free_sb; @@ -1523,7 +1566,7 @@ xfs_fs_fill_super( sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_max_links = XFS_MAXLINK; sb->s_time_gran = 1; - if (xfs_sb_version_hasbigtime(&mp->m_sb)) { + if (xfs_has_bigtime(mp)) { sb->s_time_min = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MIN); sb->s_time_max = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MAX); } else { @@ -1536,14 +1579,10 @@ xfs_fs_fill_super( set_posix_acl_flag(sb); /* version 5 superblocks support inode version counters. */ - if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) + if (xfs_has_crc(mp)) sb->s_flags |= SB_I_VERSION; - if (xfs_sb_version_hasbigtime(&mp->m_sb)) - xfs_warn(mp, - "EXPERIMENTAL big timestamp feature in use. Use at your own risk!"); - - if (mp->m_flags & XFS_MOUNT_DAX_ALWAYS) { + if (xfs_has_dax_always(mp)) { bool rtdev_is_dax = false, datadev_is_dax; xfs_warn(mp, @@ -1559,7 +1598,7 @@ xfs_fs_fill_super( "DAX unsupported by block device. Turning off DAX."); xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER); } - if (xfs_sb_version_hasreflink(&mp->m_sb)) { + if (xfs_has_reflink(mp)) { xfs_alert(mp, "DAX and reflink cannot be used together!"); error = -EINVAL; @@ -1567,17 +1606,17 @@ xfs_fs_fill_super( } } - if (mp->m_flags & XFS_MOUNT_DISCARD) { + if (xfs_has_discard(mp)) { struct request_queue *q = bdev_get_queue(sb->s_bdev); if (!blk_queue_discard(q)) { xfs_warn(mp, "mounting with \"discard\" option, but " "the device does not support discard"); - mp->m_flags &= ~XFS_MOUNT_DISCARD; + mp->m_features &= ~XFS_FEAT_DISCARD; } } - if (xfs_sb_version_hasreflink(&mp->m_sb)) { + if (xfs_has_reflink(mp)) { if (mp->m_sb.sb_rblocks) { xfs_alert(mp, "reflink not compatible with realtime device!"); @@ -1591,17 +1630,13 @@ xfs_fs_fill_super( } } - if (xfs_sb_version_hasrmapbt(&mp->m_sb) && mp->m_sb.sb_rblocks) { + if (xfs_has_rmapbt(mp) && mp->m_sb.sb_rblocks) { xfs_alert(mp, "reverse mapping btree not compatible with realtime device!"); error = -EINVAL; goto out_filestream_unmount; } - if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) - xfs_warn(mp, - "EXPERIMENTAL inode btree counters feature in use. Use at your own risk!"); - error = xfs_mountfs(mp); if (error) goto out_filestream_unmount; @@ -1625,6 +1660,9 @@ xfs_fs_fill_super( xfs_freesb(mp); out_free_stats: free_percpu(mp->m_stats.xs_stats); + out_destroy_inodegc: + xfs_mount_list_del(mp); + xfs_inodegc_free_percpu(mp); out_destroy_counters: xfs_destroy_percpu_counters(mp); out_destroy_workqueues: @@ -1656,13 +1694,13 @@ xfs_remount_rw( struct xfs_sb *sbp = &mp->m_sb; int error; - if (mp->m_flags & XFS_MOUNT_NORECOVERY) { + if (xfs_has_norecovery(mp)) { xfs_warn(mp, "ro->rw transition prohibited on norecovery mount"); return -EINVAL; } - if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && + if (xfs_sb_is_v5(sbp) && xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { xfs_warn(mp, "ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem", @@ -1671,7 +1709,7 @@ xfs_remount_rw( return -EINVAL; } - mp->m_flags &= ~XFS_MOUNT_RDONLY; + clear_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); /* * If this is the first remount to writeable state we might have some @@ -1708,6 +1746,9 @@ xfs_remount_rw( if (error && error != -ENOSPC) return error; + /* Re-enable the background inode inactivation worker. */ + xfs_inodegc_start(mp); + return 0; } @@ -1730,6 +1771,15 @@ xfs_remount_ro( return error; } + /* + * Stop the inodegc background worker. xfs_fs_reconfigure already + * flushed all pending inodegc work when it sync'd the filesystem. + * The VFS holds s_umount, so we know that inodes cannot enter + * xfs_fs_destroy_inode during a remount operation. In readonly mode + * we send inodes straight to reclaim, so no inodes will be queued. + */ + xfs_inodegc_stop(mp); + /* Free the per-AG metadata reservation pool. */ error = xfs_fs_unreserve_ag_blocks(mp); if (error) { @@ -1747,7 +1797,7 @@ xfs_remount_ro( xfs_save_resvblks(mp); xfs_log_clean(mp); - mp->m_flags |= XFS_MOUNT_RDONLY; + set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); return 0; } @@ -1770,12 +1820,11 @@ xfs_fs_reconfigure( { struct xfs_mount *mp = XFS_M(fc->root->d_sb); struct xfs_mount *new_mp = fc->s_fs_info; - xfs_sb_t *sbp = &mp->m_sb; int flags = fc->sb_flags; int error; /* version 5 superblocks always support version counters. */ - if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) + if (xfs_has_crc(mp)) fc->sb_flags |= SB_I_VERSION; error = xfs_fs_validate_params(new_mp); @@ -1785,28 +1834,26 @@ xfs_fs_reconfigure( sync_filesystem(mp->m_super); /* inode32 -> inode64 */ - if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && - !(new_mp->m_flags & XFS_MOUNT_SMALL_INUMS)) { - mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; - mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount); + if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) { + mp->m_features &= ~XFS_FEAT_SMALL_INUMS; + mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount); } /* inode64 -> inode32 */ - if (!(mp->m_flags & XFS_MOUNT_SMALL_INUMS) && - (new_mp->m_flags & XFS_MOUNT_SMALL_INUMS)) { - mp->m_flags |= XFS_MOUNT_SMALL_INUMS; - mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount); + if (!xfs_has_small_inums(mp) && xfs_has_small_inums(new_mp)) { + mp->m_features |= XFS_FEAT_SMALL_INUMS; + mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount); } /* ro -> rw */ - if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(flags & SB_RDONLY)) { + if (xfs_is_readonly(mp) && !(flags & SB_RDONLY)) { error = xfs_remount_rw(mp); if (error) return error; } /* rw -> ro */ - if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (flags & SB_RDONLY)) { + if (!xfs_is_readonly(mp) && (flags & SB_RDONLY)) { error = xfs_remount_ro(mp); if (error) return error; @@ -1873,11 +1920,11 @@ static int xfs_init_fs_context( * Copy binary VFS mount flags we are interested in. */ if (fc->sb_flags & SB_RDONLY) - mp->m_flags |= XFS_MOUNT_RDONLY; + set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); if (fc->sb_flags & SB_DIRSYNC) - mp->m_flags |= XFS_MOUNT_DIRSYNC; + mp->m_features |= XFS_FEAT_DIRSYNC; if (fc->sb_flags & SB_SYNCHRONOUS) - mp->m_flags |= XFS_MOUNT_WSYNC; + mp->m_features |= XFS_FEAT_WSYNC; fc->s_fs_info = mp; fc->ops = &xfs_context_ops; @@ -2120,6 +2167,48 @@ xfs_destroy_workqueues(void) destroy_workqueue(xfs_alloc_wq); } +#ifdef CONFIG_HOTPLUG_CPU +static int +xfs_cpu_dead( + unsigned int cpu) +{ + struct xfs_mount *mp, *n; + + spin_lock(&xfs_mount_list_lock); + list_for_each_entry_safe(mp, n, &xfs_mount_list, m_mount_list) { + spin_unlock(&xfs_mount_list_lock); + xfs_inodegc_cpu_dead(mp, cpu); + spin_lock(&xfs_mount_list_lock); + } + spin_unlock(&xfs_mount_list_lock); + return 0; +} + +static int __init +xfs_cpu_hotplug_init(void) +{ + int error; + + error = cpuhp_setup_state_nocalls(CPUHP_XFS_DEAD, "xfs:dead", NULL, + xfs_cpu_dead); + if (error < 0) + xfs_alert(NULL, +"Failed to initialise CPU hotplug, error %d. XFS is non-functional.", + error); + return error; +} + +static void +xfs_cpu_hotplug_destroy(void) +{ + cpuhp_remove_state_nocalls(CPUHP_XFS_DEAD); +} + +#else /* !CONFIG_HOTPLUG_CPU */ +static inline int xfs_cpu_hotplug_init(void) { return 0; } +static inline void xfs_cpu_hotplug_destroy(void) {} +#endif + STATIC int __init init_xfs_fs(void) { @@ -2132,10 +2221,14 @@ init_xfs_fs(void) xfs_dir_startup(); - error = xfs_init_zones(); + error = xfs_cpu_hotplug_init(); if (error) goto out; + error = xfs_init_zones(); + if (error) + goto out_destroy_hp; + error = xfs_init_workqueues(); if (error) goto out_destroy_zones; @@ -2215,6 +2308,8 @@ init_xfs_fs(void) xfs_destroy_workqueues(); out_destroy_zones: xfs_destroy_zones(); + out_destroy_hp: + xfs_cpu_hotplug_destroy(); out: return error; } @@ -2237,6 +2332,7 @@ exit_xfs_fs(void) xfs_destroy_workqueues(); xfs_destroy_zones(); xfs_uuid_table_free(); + xfs_cpu_hotplug_destroy(); } module_init(init_xfs_fs); diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 1525636f4065..fc2c6a404647 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -63,7 +63,7 @@ xfs_readlink_bmap_ilocked( byte_cnt = pathlen; cur_chunk = bp->b_addr; - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { if (!xfs_symlink_hdr_ok(ip->i_ino, offset, byte_cnt, bp)) { error = -EFSCORRUPTED; @@ -107,7 +107,7 @@ xfs_readlink( ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_LOCAL); - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; xfs_ilock(ip, XFS_ILOCK_SHARED); @@ -168,7 +168,7 @@ xfs_symlink( trace_xfs_symlink(dp, link_name); - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; /* @@ -321,9 +321,8 @@ xfs_symlink( * symlink transaction goes to disk before returning to * the user. */ - if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { + if (xfs_has_wsync(mp) || xfs_has_dirsync(mp)) xfs_trans_set_sync(tp); - } error = xfs_trans_commit(tp); if (error) @@ -445,7 +444,7 @@ xfs_inactive_symlink_rmt( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); error = xfs_trans_commit(tp); if (error) { - ASSERT(XFS_FORCED_SHUTDOWN(mp)); + ASSERT(xfs_is_shutdown(mp)); goto error_unlock; } @@ -478,7 +477,7 @@ xfs_inactive_symlink( trace_xfs_inactive_symlink(ip); - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; xfs_ilock(ip, XFS_ILOCK_EXCL); diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index f1bc88f4367c..18dc5eca6c04 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -10,6 +10,7 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_sysfs.h" +#include "xfs_log.h" #include "xfs_log_priv.h" #include "xfs_mount.h" diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index 7e01e00550ac..d269ef57ff01 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c @@ -20,6 +20,7 @@ #include "xfs_bmap.h" #include "xfs_attr.h" #include "xfs_trans.h" +#include "xfs_log.h" #include "xfs_log_priv.h" #include "xfs_buf_item.h" #include "xfs_quota.h" @@ -32,6 +33,7 @@ #include "xfs_icache.h" #include "xfs_ag.h" #include "xfs_ag_resv.h" +#include "xfs_error.h" /* * We include this last to have the helpers above available for the trace diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 19260291ff8b..1033a95fbf8e 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -2,6 +2,41 @@ /* * Copyright (c) 2009, Christoph Hellwig * All Rights Reserved. + * + * NOTE: none of these tracepoints shall be considered a stable kernel ABI + * as they can change at any time. + * + * Current conventions for printing numbers measuring specific units: + * + * agno: allocation group number + * + * agino: per-AG inode number + * ino: filesystem inode number + * + * agbno: per-AG block number in fs blocks + * startblock: physical block number for file mappings. This is either a + * segmented fsblock for data device mappings, or a rfsblock + * for realtime device mappings + * fsbcount: number of blocks in an extent, in fs blocks + * + * daddr: physical block number in 512b blocks + * bbcount: number of blocks in a physical extent, in 512b blocks + * + * owner: reverse-mapping owner, usually inodes + * + * fileoff: file offset, in fs blocks + * pos: file offset, in bytes + * bytecount: number of bytes + * + * disize: ondisk file size, in bytes + * isize: incore file size, in bytes + * + * forkoff: inode fork offset, in bytes + * + * ireccount: number of inode records + * + * Numbers describing space allocations (blocks, extents, inodes) should be + * formatted in hexadecimal. */ #undef TRACE_SYSTEM #define TRACE_SYSTEM xfs @@ -139,7 +174,7 @@ DECLARE_EVENT_CLASS(xfs_perag_class, __entry->refcount = refcount; __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d agno %u refcount %d caller %pS", + TP_printk("dev %d:%d agno 0x%x refcount %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->refcount, @@ -157,6 +192,84 @@ DEFINE_PERAG_REF_EVENT(xfs_perag_put); DEFINE_PERAG_REF_EVENT(xfs_perag_set_inode_tag); DEFINE_PERAG_REF_EVENT(xfs_perag_clear_inode_tag); +TRACE_EVENT(xfs_inodegc_worker, + TP_PROTO(struct xfs_mount *mp, unsigned int shrinker_hits), + TP_ARGS(mp, shrinker_hits), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, shrinker_hits) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->shrinker_hits = shrinker_hits; + ), + TP_printk("dev %d:%d shrinker_hits %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->shrinker_hits) +); + +DECLARE_EVENT_CLASS(xfs_fs_class, + TP_PROTO(struct xfs_mount *mp, void *caller_ip), + TP_ARGS(mp, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned long long, mflags) + __field(unsigned long, opstate) + __field(unsigned long, sbflags) + __field(void *, caller_ip) + ), + TP_fast_assign( + if (mp) { + __entry->dev = mp->m_super->s_dev; + __entry->mflags = mp->m_features; + __entry->opstate = mp->m_opstate; + __entry->sbflags = mp->m_super->s_flags; + } + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d m_features 0x%llx opstate (%s) s_flags 0x%lx caller %pS", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->mflags, + __print_flags(__entry->opstate, "|", XFS_OPSTATE_STRINGS), + __entry->sbflags, + __entry->caller_ip) +); + +#define DEFINE_FS_EVENT(name) \ +DEFINE_EVENT(xfs_fs_class, name, \ + TP_PROTO(struct xfs_mount *mp, void *caller_ip), \ + TP_ARGS(mp, caller_ip)) +DEFINE_FS_EVENT(xfs_inodegc_flush); +DEFINE_FS_EVENT(xfs_inodegc_start); +DEFINE_FS_EVENT(xfs_inodegc_stop); +DEFINE_FS_EVENT(xfs_inodegc_queue); +DEFINE_FS_EVENT(xfs_inodegc_throttle); +DEFINE_FS_EVENT(xfs_fs_sync_fs); +DEFINE_FS_EVENT(xfs_blockgc_start); +DEFINE_FS_EVENT(xfs_blockgc_stop); +DEFINE_FS_EVENT(xfs_blockgc_worker); +DEFINE_FS_EVENT(xfs_blockgc_flush_all); + +TRACE_EVENT(xfs_inodegc_shrinker_scan, + TP_PROTO(struct xfs_mount *mp, struct shrink_control *sc, + void *caller_ip), + TP_ARGS(mp, sc, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned long, nr_to_scan) + __field(void *, caller_ip) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->nr_to_scan = sc->nr_to_scan; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d nr_to_scan %lu caller %pS", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->nr_to_scan, + __entry->caller_ip) +); + DECLARE_EVENT_CLASS(xfs_ag_class, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno), TP_ARGS(mp, agno), @@ -168,7 +281,7 @@ DECLARE_EVENT_CLASS(xfs_ag_class, __entry->dev = mp->m_super->s_dev; __entry->agno = agno; ), - TP_printk("dev %d:%d agno %u", + TP_printk("dev %d:%d agno 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno) ); @@ -268,7 +381,7 @@ DECLARE_EVENT_CLASS(xfs_bmap_class, __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d ino 0x%llx state %s cur %p/%d " - "offset %lld block %lld count %lld flag %d caller %pS", + "fileoff 0x%llx startblock 0x%llx fsbcount 0x%llx flag %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), @@ -308,10 +421,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class, ), TP_fast_assign( __entry->dev = bp->b_target->bt_dev; - if (bp->b_bn == XFS_BUF_DADDR_NULL) - __entry->bno = bp->b_maps[0].bm_bn; - else - __entry->bno = bp->b_bn; + __entry->bno = xfs_buf_daddr(bp); __entry->nblks = bp->b_length; __entry->hold = atomic_read(&bp->b_hold); __entry->pincount = atomic_read(&bp->b_pin_count); @@ -319,7 +429,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class, __entry->flags = bp->b_flags; __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d " + TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d " "lock %d flags %s caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->bno, @@ -370,7 +480,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_daddr_t, bno) - __field(size_t, buffer_length) + __field(unsigned int, length) __field(int, hold) __field(int, pincount) __field(unsigned, lockval) @@ -379,19 +489,19 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class, ), TP_fast_assign( __entry->dev = bp->b_target->bt_dev; - __entry->bno = bp->b_bn; - __entry->buffer_length = BBTOB(bp->b_length); + __entry->bno = xfs_buf_daddr(bp); + __entry->length = bp->b_length; __entry->flags = flags; __entry->hold = atomic_read(&bp->b_hold); __entry->pincount = atomic_read(&bp->b_pin_count); __entry->lockval = bp->b_sema.count; __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " + TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d " "lock %d flags %s caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->bno, - __entry->buffer_length, + __entry->length, __entry->hold, __entry->pincount, __entry->lockval, @@ -413,7 +523,7 @@ TRACE_EVENT(xfs_buf_ioerror, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_daddr_t, bno) - __field(size_t, buffer_length) + __field(unsigned int, length) __field(unsigned, flags) __field(int, hold) __field(int, pincount) @@ -423,8 +533,8 @@ TRACE_EVENT(xfs_buf_ioerror, ), TP_fast_assign( __entry->dev = bp->b_target->bt_dev; - __entry->bno = bp->b_bn; - __entry->buffer_length = BBTOB(bp->b_length); + __entry->bno = xfs_buf_daddr(bp); + __entry->length = bp->b_length; __entry->hold = atomic_read(&bp->b_hold); __entry->pincount = atomic_read(&bp->b_pin_count); __entry->lockval = bp->b_sema.count; @@ -432,11 +542,11 @@ TRACE_EVENT(xfs_buf_ioerror, __entry->flags = bp->b_flags; __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " + TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d " "lock %d error %d flags %s caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->bno, - __entry->buffer_length, + __entry->length, __entry->hold, __entry->pincount, __entry->lockval, @@ -451,7 +561,7 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_daddr_t, buf_bno) - __field(size_t, buf_len) + __field(unsigned int, buf_len) __field(int, buf_hold) __field(int, buf_pincount) __field(int, buf_lockval) @@ -466,15 +576,15 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class, __entry->bli_flags = bip->bli_flags; __entry->bli_recur = bip->bli_recur; __entry->bli_refcount = atomic_read(&bip->bli_refcount); - __entry->buf_bno = bip->bli_buf->b_bn; - __entry->buf_len = BBTOB(bip->bli_buf->b_length); + __entry->buf_bno = xfs_buf_daddr(bip->bli_buf); + __entry->buf_len = bip->bli_buf->b_length; __entry->buf_flags = bip->bli_buf->b_flags; __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold); __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count); __entry->buf_lockval = bip->bli_buf->b_sema.count; __entry->li_flags = bip->bli_item.li_flags; ), - TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " + TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d " "lock %d flags %s recur %d refcount %d bliflags %s " "liflags %s", MAJOR(__entry->dev), MINOR(__entry->dev), @@ -534,7 +644,7 @@ DECLARE_EVENT_CLASS(xfs_filestream_class, __entry->agno = agno; __entry->streams = xfs_filestream_peek_ag(mp, agno); ), - TP_printk("dev %d:%d ino 0x%llx agno %u streams %d", + TP_printk("dev %d:%d ino 0x%llx agno 0x%x streams %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->agno, @@ -568,7 +678,7 @@ TRACE_EVENT(xfs_filestream_pick, __entry->free = free; __entry->nscan = nscan; ), - TP_printk("dev %d:%d ino 0x%llx agno %u streams %d free %d nscan %d", + TP_printk("dev %d:%d ino 0x%llx agno 0x%x streams %d free %d nscan %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->agno, @@ -616,14 +726,17 @@ DECLARE_EVENT_CLASS(xfs_inode_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) + __field(unsigned long, iflags) ), TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->ino = ip->i_ino; + __entry->iflags = ip->i_flags; ), - TP_printk("dev %d:%d ino 0x%llx", + TP_printk("dev %d:%d ino 0x%llx iflags 0x%lx", MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino) + __entry->ino, + __entry->iflags) ) #define DEFINE_INODE_EVENT(name) \ @@ -667,6 +780,10 @@ DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid); DEFINE_INODE_EVENT(xfs_inode_set_cowblocks_tag); DEFINE_INODE_EVENT(xfs_inode_clear_cowblocks_tag); DEFINE_INODE_EVENT(xfs_inode_free_cowblocks_invalid); +DEFINE_INODE_EVENT(xfs_inode_set_reclaimable); +DEFINE_INODE_EVENT(xfs_inode_reclaiming); +DEFINE_INODE_EVENT(xfs_inode_set_need_inactive); +DEFINE_INODE_EVENT(xfs_inode_inactivating); /* * ftrace's __print_symbolic requires that all enum values be wrapped in the @@ -773,9 +890,12 @@ TRACE_EVENT(xfs_irec_merge_pre, __entry->nagino = nagino; __entry->nholemask = holemask; ), - TP_printk("dev %d:%d agno %d inobt (%u:0x%x) new (%u:0x%x)", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, - __entry->agino, __entry->holemask, __entry->nagino, + TP_printk("dev %d:%d agno 0x%x agino 0x%x holemask 0x%x new_agino 0x%x new_holemask 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->agino, + __entry->holemask, + __entry->nagino, __entry->nholemask) ) @@ -795,8 +915,11 @@ TRACE_EVENT(xfs_irec_merge_post, __entry->agino = agino; __entry->holemask = holemask; ), - TP_printk("dev %d:%d agno %d inobt (%u:0x%x)", MAJOR(__entry->dev), - MINOR(__entry->dev), __entry->agno, __entry->agino, + TP_printk("dev %d:%d agno 0x%x agino 0x%x holemask 0x%x", + MAJOR(__entry->dev), + MINOR(__entry->dev), + __entry->agno, + __entry->agino, __entry->holemask) ) @@ -1301,7 +1424,7 @@ DECLARE_EVENT_CLASS(xfs_file_class, __entry->offset = iocb->ki_pos; __entry->count = iov_iter_count(iter); ), - TP_printk("dev %d:%d ino 0x%llx size 0x%llx offset 0x%llx count 0x%zx", + TP_printk("dev %d:%d ino 0x%llx disize 0x%llx pos 0x%llx bytecount 0x%zx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->size, @@ -1348,14 +1471,14 @@ DECLARE_EVENT_CLASS(xfs_imap_class, __entry->startblock = irec ? irec->br_startblock : 0; __entry->blockcount = irec ? irec->br_blockcount : 0; ), - TP_printk("dev %d:%d ino 0x%llx size 0x%llx offset 0x%llx count %zd " - "fork %s startoff 0x%llx startblock %lld blockcount 0x%llx", + TP_printk("dev %d:%d ino 0x%llx disize 0x%llx pos 0x%llx bytecount 0x%zx " + "fork %s startoff 0x%llx startblock 0x%llx fsbcount 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->size, __entry->offset, __entry->count, - __entry->whichfork == XFS_COW_FORK ? "cow" : "data", + __print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS), __entry->startoff, (int64_t)__entry->startblock, __entry->blockcount) @@ -1391,7 +1514,7 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class, __entry->count = count; ), TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx " - "offset 0x%llx count %zd", + "pos 0x%llx bytecount 0x%zx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->isize, @@ -1427,7 +1550,7 @@ DECLARE_EVENT_CLASS(xfs_itrunc_class, __entry->size = ip->i_disk_size; __entry->new_size = new_size; ), - TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx", + TP_printk("dev %d:%d ino 0x%llx disize 0x%llx new_size 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->size, @@ -1458,7 +1581,7 @@ TRACE_EVENT(xfs_pagecache_inval, __entry->start = start; __entry->finish = finish; ), - TP_printk("dev %d:%d ino 0x%llx size 0x%llx start 0x%llx finish 0x%llx", + TP_printk("dev %d:%d ino 0x%llx disize 0x%llx start 0x%llx finish 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->size, @@ -1467,14 +1590,14 @@ TRACE_EVENT(xfs_pagecache_inval, ); TRACE_EVENT(xfs_bunmap, - TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, + TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t fileoff, xfs_filblks_t len, int flags, unsigned long caller_ip), - TP_ARGS(ip, bno, len, flags, caller_ip), + TP_ARGS(ip, fileoff, len, flags, caller_ip), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(xfs_fsize_t, size) - __field(xfs_fileoff_t, bno) + __field(xfs_fileoff_t, fileoff) __field(xfs_filblks_t, len) __field(unsigned long, caller_ip) __field(int, flags) @@ -1483,17 +1606,17 @@ TRACE_EVENT(xfs_bunmap, __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->ino = ip->i_ino; __entry->size = ip->i_disk_size; - __entry->bno = bno; + __entry->fileoff = fileoff; __entry->len = len; __entry->caller_ip = caller_ip; __entry->flags = flags; ), - TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx" + TP_printk("dev %d:%d ino 0x%llx disize 0x%llx fileoff 0x%llx fsbcount 0x%llx" "flags %s caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->size, - __entry->bno, + __entry->fileoff, __entry->len, __print_flags(__entry->flags, "|", XFS_BMAPI_FLAGS), (void *)__entry->caller_ip) @@ -1516,7 +1639,7 @@ DECLARE_EVENT_CLASS(xfs_extent_busy_class, __entry->agbno = agbno; __entry->len = len; ), - TP_printk("dev %d:%d agno %u agbno %u len %u", + TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, @@ -1554,7 +1677,7 @@ TRACE_EVENT(xfs_extent_busy_trim, __entry->tbno = tbno; __entry->tlen = tlen; ), - TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u", + TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x found_agbno 0x%x found_fsbcount 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, @@ -1601,7 +1724,7 @@ DECLARE_EVENT_CLASS(xfs_agf_class, __entry->longest = be32_to_cpu(agf->agf_longest); __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u " + TP_printk("dev %d:%d agno 0x%x flags %s length %u roots b %u c %u " "levels b %u c %u flfirst %u fllast %u flcount %u " "freeblks %u longest %u caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), @@ -1650,7 +1773,7 @@ TRACE_EVENT(xfs_free_extent, __entry->haveleft = haveleft; __entry->haveright = haveright; ), - TP_printk("dev %d:%d agno %u agbno %u len %u resv %d %s", + TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x resv %d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, @@ -1707,7 +1830,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class, __entry->datatype = args->datatype; __entry->firstblock = args->tp->t_firstblock; ), - TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u " + TP_printk("dev %d:%d agno 0x%x agbno 0x%x minlen %u maxlen %u mod %u " "prod %u minleft %u total %u alignment %u minalignslop %u " "len %u type %s otype %s wasdel %d wasfromfl %d resv %d " "datatype 0x%x firstblock 0x%llx", @@ -1785,7 +1908,7 @@ TRACE_EVENT(xfs_alloc_cur_check, __entry->diff = diff; __entry->new = new; ), - TP_printk("dev %d:%d btree %s bno 0x%x len 0x%x diff 0x%x new %d", + TP_printk("dev %d:%d btree %s agbno 0x%x fsbcount 0x%x diff 0x%x new %d", MAJOR(__entry->dev), MINOR(__entry->dev), __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS), __entry->bno, __entry->len, __entry->diff, __entry->new) @@ -2060,7 +2183,7 @@ DECLARE_EVENT_CLASS(xfs_swap_extent_class, __entry->fork_off = XFS_IFORK_BOFF(ip); ), TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, " - "broot size %d, fork offset %d", + "broot size %d, forkoff 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __print_symbolic(__entry->which, XFS_SWAPEXT_INODES), @@ -2186,7 +2309,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class, __entry->size = buf_f->blf_size; __entry->map_size = buf_f->blf_map_size; ), - TP_printk("dev %d:%d blkno 0x%llx, len %u, flags 0x%x, size %d, " + TP_printk("dev %d:%d daddr 0x%llx, bbcount 0x%x, flags 0x%x, size %d, " "map_size %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->blkno, @@ -2237,7 +2360,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class, __entry->boffset = in_f->ilf_boffset; ), TP_printk("dev %d:%d ino 0x%llx, size %u, fields 0x%x, asize %d, " - "dsize %d, blkno 0x%llx, len %d, boffset %d", + "dsize %d, daddr 0x%llx, bbcount 0x%x, boffset %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->size, @@ -2278,10 +2401,14 @@ DECLARE_EVENT_CLASS(xfs_log_recover_icreate_item_class, __entry->length = be32_to_cpu(in_f->icl_length); __entry->gen = be32_to_cpu(in_f->icl_gen); ), - TP_printk("dev %d:%d agno %u agbno %u count %u isize %u length %u " - "gen %u", MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->agno, __entry->agbno, __entry->count, __entry->isize, - __entry->length, __entry->gen) + TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x ireccount %u isize %u gen 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->agbno, + __entry->length, + __entry->count, + __entry->isize, + __entry->gen) ) #define DEFINE_LOG_RECOVER_ICREATE_ITEM(name) \ DEFINE_EVENT(xfs_log_recover_icreate_item_class, name, \ @@ -2307,7 +2434,7 @@ DECLARE_EVENT_CLASS(xfs_discard_class, __entry->agbno = agbno; __entry->len = len; ), - TP_printk("dev %d:%d agno %u agbno %u len %u", + TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, @@ -2350,7 +2477,7 @@ DECLARE_EVENT_CLASS(xfs_btree_cur_class, __entry->level = level; __entry->nlevels = cur->bc_nlevels; __entry->ptr = cur->bc_ptrs[level]; - __entry->daddr = bp ? bp->b_bn : -1; + __entry->daddr = bp ? xfs_buf_daddr(bp) : -1; ), TP_printk("dev %d:%d btree %s level %d/%d ptr %d daddr 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), @@ -2466,7 +2593,7 @@ DECLARE_EVENT_CLASS(xfs_phys_extent_deferred_class, __entry->agbno = agbno; __entry->len = len; ), - TP_printk("dev %d:%d op %d agno %u agbno %u len %u", + TP_printk("dev %d:%d op %d agno 0x%x agbno 0x%x fsbcount 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->type, __entry->agno, @@ -2513,13 +2640,13 @@ DECLARE_EVENT_CLASS(xfs_map_extent_deferred_class, __entry->l_state = state; __entry->op = op; ), - TP_printk("dev %d:%d op %d agno %u agbno %u owner %lld %s offset %llu len %llu state %d", + TP_printk("dev %d:%d op %d agno 0x%x agbno 0x%x owner 0x%llx %s fileoff 0x%llx fsbcount 0x%llx state %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->op, __entry->agno, __entry->agbno, __entry->ino, - __entry->whichfork == XFS_ATTR_FORK ? "attr" : "data", + __print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS), __entry->l_loff, __entry->l_len, __entry->l_state) @@ -2583,7 +2710,7 @@ DECLARE_EVENT_CLASS(xfs_rmap_class, if (unwritten) __entry->flags |= XFS_RMAP_UNWRITTEN; ), - TP_printk("dev %d:%d agno %u agbno %u len %u owner %lld offset %llu flags 0x%lx", + TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x owner 0x%llx fileoff 0x%llx flags 0x%lx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, @@ -2616,7 +2743,7 @@ DECLARE_EVENT_CLASS(xfs_ag_error_class, __entry->error = error; __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d agno %u error %d caller %pS", + TP_printk("dev %d:%d agno 0x%x error %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->error, @@ -2663,7 +2790,7 @@ DECLARE_EVENT_CLASS(xfs_rmapbt_class, __entry->offset = offset; __entry->flags = flags; ), - TP_printk("dev %d:%d agno %u agbno %u len %u owner %lld offset %llu flags 0x%x", + TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x owner 0x%llx fileoff 0x%llx flags 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, @@ -2732,7 +2859,7 @@ DECLARE_EVENT_CLASS(xfs_ag_resv_class, __entry->asked = r ? r->ar_asked : 0; __entry->len = len; ), - TP_printk("dev %d:%d agno %u resv %d freeblks %u flcount %u " + TP_printk("dev %d:%d agno 0x%x resv %d freeblks %u flcount %u " "resv %u ask %u len %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, @@ -2785,7 +2912,7 @@ DECLARE_EVENT_CLASS(xfs_ag_btree_lookup_class, __entry->agbno = agbno; __entry->dir = dir; ), - TP_printk("dev %d:%d agno %u agbno %u cmp %s(%d)", + TP_printk("dev %d:%d agno 0x%x agbno 0x%x cmp %s(%d)", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, @@ -2818,7 +2945,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_class, __entry->blockcount = irec->rc_blockcount; __entry->refcount = irec->rc_refcount; ), - TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u", + TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->startblock, @@ -2853,7 +2980,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class, __entry->refcount = irec->rc_refcount; __entry->agbno = agbno; ), - TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u @ agbno %u", + TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->startblock, @@ -2893,8 +3020,8 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class, __entry->i2_blockcount = i2->rc_blockcount; __entry->i2_refcount = i2->rc_refcount; ), - TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- " - "agbno %u len %u refcount %u", + TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u -- " + "agbno 0x%x fsbcount 0x%x refcount %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->i1_startblock, @@ -2939,8 +3066,8 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class, __entry->i2_refcount = i2->rc_refcount; __entry->agbno = agbno; ), - TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- " - "agbno %u len %u refcount %u @ agbno %u", + TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u -- " + "agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->i1_startblock, @@ -2991,9 +3118,9 @@ DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class, __entry->i3_blockcount = i3->rc_blockcount; __entry->i3_refcount = i3->rc_refcount; ), - TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- " - "agbno %u len %u refcount %u -- " - "agbno %u len %u refcount %u", + TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u -- " + "agbno 0x%x fsbcount 0x%x refcount %u -- " + "agbno 0x%x fsbcount 0x%x refcount %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->i1_startblock, @@ -3080,7 +3207,7 @@ TRACE_EVENT(xfs_refcount_finish_one_leftover, __entry->new_agbno = new_agbno; __entry->new_len = new_len; ), - TP_printk("dev %d:%d type %d agno %u agbno %u len %u new_agbno %u new_len %u", + TP_printk("dev %d:%d type %d agno 0x%x agbno 0x%x fsbcount 0x%x new_agbno 0x%x new_fsbcount 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->type, __entry->agno, @@ -3106,7 +3233,7 @@ DECLARE_EVENT_CLASS(xfs_inode_error_class, __entry->error = error; __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d ino %llx error %d caller %pS", + TP_printk("dev %d:%d ino 0x%llx error %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->error, @@ -3132,7 +3259,7 @@ DECLARE_EVENT_CLASS(xfs_double_io_class, __field(loff_t, src_isize) __field(loff_t, src_disize) __field(loff_t, src_offset) - __field(size_t, len) + __field(long long, len) __field(xfs_ino_t, dest_ino) __field(loff_t, dest_isize) __field(loff_t, dest_disize) @@ -3150,9 +3277,9 @@ DECLARE_EVENT_CLASS(xfs_double_io_class, __entry->dest_disize = dest->i_disk_size; __entry->dest_offset = doffset; ), - TP_printk("dev %d:%d count %zd " - "ino 0x%llx isize 0x%llx disize 0x%llx offset 0x%llx -> " - "ino 0x%llx isize 0x%llx disize 0x%llx offset 0x%llx", + TP_printk("dev %d:%d bytecount 0x%llx " + "ino 0x%llx isize 0x%llx disize 0x%llx pos 0x%llx -> " + "ino 0x%llx isize 0x%llx disize 0x%llx pos 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->len, __entry->src_ino, @@ -3191,7 +3318,7 @@ DECLARE_EVENT_CLASS(xfs_inode_irec_class, __entry->pblk = irec->br_startblock; __entry->state = irec->br_state; ), - TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x pblk %llu st %d", + TP_printk("dev %d:%d ino 0x%llx fileoff 0x%llx fsbcount 0x%x startblock 0x%llx st %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->lblk, @@ -3231,9 +3358,8 @@ TRACE_EVENT(xfs_reflink_remap_blocks, __entry->dest_ino = dest->i_ino; __entry->dest_lblk = doffset; ), - TP_printk("dev %d:%d len 0x%llx " - "ino 0x%llx offset 0x%llx blocks -> " - "ino 0x%llx offset 0x%llx blocks", + TP_printk("dev %d:%d fsbcount 0x%llx " + "ino 0x%llx fileoff 0x%llx -> ino 0x%llx fileoff 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->len, __entry->src_ino, @@ -3272,9 +3398,7 @@ TRACE_EVENT(xfs_ioctl_clone, __entry->dest_ino = dest->i_ino; __entry->dest_isize = i_size_read(dest); ), - TP_printk("dev %d:%d " - "ino 0x%lx isize 0x%llx -> " - "ino 0x%lx isize 0x%llx", + TP_printk("dev %d:%d ino 0x%lx isize 0x%llx -> ino 0x%lx isize 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->src_ino, __entry->src_isize, @@ -3310,7 +3434,7 @@ DEFINE_INODE_ERROR_EVENT(xfs_swap_extent_rmap_error); /* fsmap traces */ DECLARE_EVENT_CLASS(xfs_fsmap_class, TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_agnumber_t agno, - struct xfs_rmap_irec *rmap), + const struct xfs_rmap_irec *rmap), TP_ARGS(mp, keydev, agno, rmap), TP_STRUCT__entry( __field(dev_t, dev) @@ -3332,7 +3456,7 @@ DECLARE_EVENT_CLASS(xfs_fsmap_class, __entry->offset = rmap->rm_offset; __entry->flags = rmap->rm_flags; ), - TP_printk("dev %d:%d keydev %d:%d agno %u bno %llu len %llu owner %lld offset %llu flags 0x%x", + TP_printk("dev %d:%d keydev %d:%d agno 0x%x startblock 0x%llx fsbcount 0x%llx owner 0x%llx fileoff 0x%llx flags 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->keydev), MINOR(__entry->keydev), __entry->agno, @@ -3345,7 +3469,7 @@ DECLARE_EVENT_CLASS(xfs_fsmap_class, #define DEFINE_FSMAP_EVENT(name) \ DEFINE_EVENT(xfs_fsmap_class, name, \ TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_agnumber_t agno, \ - struct xfs_rmap_irec *rmap), \ + const struct xfs_rmap_irec *rmap), \ TP_ARGS(mp, keydev, agno, rmap)) DEFINE_FSMAP_EVENT(xfs_fsmap_low_key); DEFINE_FSMAP_EVENT(xfs_fsmap_high_key); @@ -3372,7 +3496,7 @@ DECLARE_EVENT_CLASS(xfs_getfsmap_class, __entry->offset = fsmap->fmr_offset; __entry->flags = fsmap->fmr_flags; ), - TP_printk("dev %d:%d keydev %d:%d block %llu len %llu owner %lld offset %llu flags 0x%llx", + TP_printk("dev %d:%d keydev %d:%d daddr 0x%llx bbcount 0x%llx owner 0x%llx fileoff_daddr 0x%llx flags 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->keydev), MINOR(__entry->keydev), __entry->block, @@ -3471,7 +3595,7 @@ TRACE_EVENT(xfs_iunlink_update_bucket, __entry->old_ptr = old_ptr; __entry->new_ptr = new_ptr; ), - TP_printk("dev %d:%d agno %u bucket %u old 0x%x new 0x%x", + TP_printk("dev %d:%d agno 0x%x bucket %u old 0x%x new 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->bucket, @@ -3497,7 +3621,7 @@ TRACE_EVENT(xfs_iunlink_update_dinode, __entry->old_ptr = old_ptr; __entry->new_ptr = new_ptr; ), - TP_printk("dev %d:%d agno %u agino 0x%x old 0x%x new 0x%x", + TP_printk("dev %d:%d agno 0x%x agino 0x%x old 0x%x new 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agino, @@ -3518,7 +3642,7 @@ DECLARE_EVENT_CLASS(xfs_ag_inode_class, __entry->agno = XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino); __entry->agino = XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino); ), - TP_printk("dev %d:%d agno %u agino %u", + TP_printk("dev %d:%d agno 0x%x agino 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agino) ) @@ -3570,7 +3694,7 @@ DECLARE_EVENT_CLASS(xfs_ag_corrupt_class, __entry->agno = agno; __entry->flags = flags; ), - TP_printk("dev %d:%d agno %u flags 0x%x", + TP_printk("dev %d:%d agno 0x%x flags 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->flags) ); @@ -3621,7 +3745,7 @@ TRACE_EVENT(xfs_iwalk_ag, __entry->agno = agno; __entry->startino = startino; ), - TP_printk("dev %d:%d agno %d startino %u", + TP_printk("dev %d:%d agno 0x%x startino 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->startino) ) @@ -3642,7 +3766,7 @@ TRACE_EVENT(xfs_iwalk_ag_rec, __entry->startino = irec->ir_startino; __entry->freemask = irec->ir_free; ), - TP_printk("dev %d:%d agno %d startino %u freemask 0x%llx", + TP_printk("dev %d:%d agno 0x%x startino 0x%x freemask 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->startino, __entry->freemask) ) @@ -3689,8 +3813,6 @@ DEFINE_EVENT(xfs_kmem_class, name, \ TP_PROTO(ssize_t size, int flags, unsigned long caller_ip), \ TP_ARGS(size, flags, caller_ip)) DEFINE_KMEM_EVENT(kmem_alloc); -DEFINE_KMEM_EVENT(kmem_alloc_io); -DEFINE_KMEM_EVENT(kmem_alloc_large); TRACE_EVENT(xfs_check_new_dalign, TP_PROTO(struct xfs_mount *mp, int new_dalign, xfs_ino_t calc_rootino), @@ -3707,7 +3829,7 @@ TRACE_EVENT(xfs_check_new_dalign, __entry->sb_rootino = mp->m_sb.sb_rootino; __entry->calc_rootino = calc_rootino; ), - TP_printk("dev %d:%d new_dalign %d sb_rootino %llu calc_rootino %llu", + TP_printk("dev %d:%d new_dalign %d sb_rootino 0x%llx calc_rootino 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->new_dalign, __entry->sb_rootino, __entry->calc_rootino) @@ -3732,7 +3854,7 @@ TRACE_EVENT(xfs_btree_commit_afakeroot, __entry->levels = cur->bc_ag.afake->af_levels; __entry->blocks = cur->bc_ag.afake->af_blocks; ), - TP_printk("dev %d:%d btree %s ag %u levels %u blocks %u root %u", + TP_printk("dev %d:%d btree %s agno 0x%x levels %u blocks %u root %u", MAJOR(__entry->dev), MINOR(__entry->dev), __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS), __entry->agno, @@ -3764,12 +3886,12 @@ TRACE_EVENT(xfs_btree_commit_ifakeroot, __entry->blocks = cur->bc_ino.ifake->if_blocks; __entry->whichfork = cur->bc_ino.whichfork; ), - TP_printk("dev %d:%d btree %s ag %u agino %u whichfork %s levels %u blocks %u", + TP_printk("dev %d:%d btree %s agno 0x%x agino 0x%x whichfork %s levels %u blocks %u", MAJOR(__entry->dev), MINOR(__entry->dev), __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS), __entry->agno, __entry->agino, - __entry->whichfork == XFS_ATTR_FORK ? "attr" : "data", + __print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS), __entry->levels, __entry->blocks) ) @@ -3847,7 +3969,7 @@ TRACE_EVENT(xfs_btree_bload_block, } __entry->nr_records = nr_records; ), - TP_printk("dev %d:%d btree %s level %u block %llu/%llu fsb (%u/%u) recs %u", + TP_printk("dev %d:%d btree %s level %u block %llu/%llu agno 0x%x agbno 0x%x recs %u", MAJOR(__entry->dev), MINOR(__entry->dev), __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS), __entry->level, @@ -3934,7 +4056,6 @@ TRACE_DEFINE_ENUM(XLOG_STATE_SYNCING); TRACE_DEFINE_ENUM(XLOG_STATE_DONE_SYNC); TRACE_DEFINE_ENUM(XLOG_STATE_CALLBACK); TRACE_DEFINE_ENUM(XLOG_STATE_DIRTY); -TRACE_DEFINE_ENUM(XLOG_STATE_IOERROR); DECLARE_EVENT_CLASS(xlog_iclog_class, TP_PROTO(struct xlog_in_core *iclog, unsigned long caller_ip), @@ -3990,6 +4111,57 @@ DEFINE_ICLOG_EVENT(xlog_iclog_want_sync); DEFINE_ICLOG_EVENT(xlog_iclog_wait_on); DEFINE_ICLOG_EVENT(xlog_iclog_write); +DECLARE_EVENT_CLASS(xfs_das_state_class, + TP_PROTO(int das, struct xfs_inode *ip), + TP_ARGS(das, ip), + TP_STRUCT__entry( + __field(int, das) + __field(xfs_ino_t, ino) + ), + TP_fast_assign( + __entry->das = das; + __entry->ino = ip->i_ino; + ), + TP_printk("state change %d ino 0x%llx", + __entry->das, __entry->ino) +) + +#define DEFINE_DAS_STATE_EVENT(name) \ +DEFINE_EVENT(xfs_das_state_class, name, \ + TP_PROTO(int das, struct xfs_inode *ip), \ + TP_ARGS(das, ip)) +DEFINE_DAS_STATE_EVENT(xfs_attr_sf_addname_return); +DEFINE_DAS_STATE_EVENT(xfs_attr_set_iter_return); +DEFINE_DAS_STATE_EVENT(xfs_attr_node_addname_return); +DEFINE_DAS_STATE_EVENT(xfs_attr_remove_iter_return); +DEFINE_DAS_STATE_EVENT(xfs_attr_rmtval_remove_return); + +TRACE_EVENT(xfs_force_shutdown, + TP_PROTO(struct xfs_mount *mp, int ptag, int flags, const char *fname, + int line_num), + TP_ARGS(mp, ptag, flags, fname, line_num), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, ptag) + __field(int, flags) + __string(fname, fname) + __field(int, line_num) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->ptag = ptag; + __entry->flags = flags; + __assign_str(fname, fname); + __entry->line_num = line_num; + ), + TP_printk("dev %d:%d tag %s flags %s file %s line_num %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_flags(__entry->ptag, "|", XFS_PTAG_STRINGS), + __print_flags(__entry->flags, "|", XFS_SHUTDOWN_STRINGS), + __get_str(fname), + __entry->line_num) +); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 87bffd12c20c..67dec11e34c7 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -9,7 +9,6 @@ #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" -#include "xfs_log_priv.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_extent_busy.h" @@ -17,6 +16,7 @@ #include "xfs_trans.h" #include "xfs_trans_priv.h" #include "xfs_log.h" +#include "xfs_log_priv.h" #include "xfs_trace.h" #include "xfs_error.h" #include "xfs_defer.h" @@ -275,7 +275,7 @@ retry: WARN_ON(resp->tr_logres > 0 && mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); ASSERT(!(flags & XFS_TRANS_RES_FDBLKS) || - xfs_sb_version_haslazysbcount(&mp->m_sb)); + xfs_has_lazysbcount(mp)); tp->t_magic = XFS_TRANS_HEADER_MAGIC; tp->t_flags = flags; @@ -295,10 +295,7 @@ retry: * Do not perform a synchronous scan because callers can hold * other locks. */ - error = xfs_blockgc_free_space(mp, NULL); - if (error) - return error; - + xfs_blockgc_flush_all(mp); want_retry = false; goto retry; } @@ -367,12 +364,12 @@ xfs_trans_mod_sb( switch (field) { case XFS_TRANS_SB_ICOUNT: tp->t_icount_delta += delta; - if (xfs_sb_version_haslazysbcount(&mp->m_sb)) + if (xfs_has_lazysbcount(mp)) flags &= ~XFS_TRANS_SB_DIRTY; break; case XFS_TRANS_SB_IFREE: tp->t_ifree_delta += delta; - if (xfs_sb_version_haslazysbcount(&mp->m_sb)) + if (xfs_has_lazysbcount(mp)) flags &= ~XFS_TRANS_SB_DIRTY; break; case XFS_TRANS_SB_FDBLOCKS: @@ -401,7 +398,7 @@ xfs_trans_mod_sb( delta -= blkres_delta; } tp->t_fdblocks_delta += delta; - if (xfs_sb_version_haslazysbcount(&mp->m_sb)) + if (xfs_has_lazysbcount(mp)) flags &= ~XFS_TRANS_SB_DIRTY; break; case XFS_TRANS_SB_RES_FDBLOCKS: @@ -411,7 +408,7 @@ xfs_trans_mod_sb( * be applied to the on-disk superblock. */ tp->t_res_fdblocks_delta += delta; - if (xfs_sb_version_haslazysbcount(&mp->m_sb)) + if (xfs_has_lazysbcount(mp)) flags &= ~XFS_TRANS_SB_DIRTY; break; case XFS_TRANS_SB_FREXTENTS: @@ -490,7 +487,7 @@ xfs_trans_apply_sb_deltas( /* * Only update the superblock counters if we are logging them */ - if (!xfs_sb_version_haslazysbcount(&(tp->t_mountp->m_sb))) { + if (!xfs_has_lazysbcount((tp->t_mountp))) { if (tp->t_icount_delta) be64_add_cpu(&sbp->sb_icount, tp->t_icount_delta); if (tp->t_ifree_delta) @@ -588,7 +585,7 @@ xfs_trans_unreserve_and_mod_sb( if (tp->t_blk_res > 0) blkdelta = tp->t_blk_res; if ((tp->t_fdblocks_delta != 0) && - (xfs_sb_version_haslazysbcount(&mp->m_sb) || + (xfs_has_lazysbcount(mp) || (tp->t_flags & XFS_TRANS_SB_DIRTY))) blkdelta += tp->t_fdblocks_delta; @@ -598,7 +595,7 @@ xfs_trans_unreserve_and_mod_sb( (tp->t_flags & XFS_TRANS_SB_DIRTY)) rtxdelta += tp->t_frextents_delta; - if (xfs_sb_version_haslazysbcount(&mp->m_sb) || + if (xfs_has_lazysbcount(mp) || (tp->t_flags & XFS_TRANS_SB_DIRTY)) { idelta = tp->t_icount_delta; ifreedelta = tp->t_ifree_delta; @@ -778,7 +775,7 @@ xfs_trans_committed_bulk( * object into the AIL as we are in a shutdown situation. */ if (aborted) { - ASSERT(XFS_FORCED_SHUTDOWN(ailp->ail_mount)); + ASSERT(xfs_is_shutdown(ailp->ail_mount)); if (lip->li_ops->iop_unpin) lip->li_ops->iop_unpin(lip, 1); continue; @@ -867,7 +864,7 @@ __xfs_trans_commit( if (!(tp->t_flags & XFS_TRANS_DIRTY)) goto out_unreserve; - if (XFS_FORCED_SHUTDOWN(mp)) { + if (xfs_is_shutdown(mp)) { error = -EIO; goto out_unreserve; } @@ -908,7 +905,7 @@ out_unreserve: */ xfs_trans_unreserve_and_mod_dquots(tp); if (tp->t_ticket) { - if (regrant && !XLOG_FORCED_SHUTDOWN(mp->m_log)) + if (regrant && !xlog_is_shutdown(mp->m_log)) xfs_log_ticket_regrant(mp->m_log, tp->t_ticket); else xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket); @@ -953,12 +950,12 @@ xfs_trans_cancel( * filesystem. This happens in paths where we detect * corruption and decide to give up. */ - if (dirty && !XFS_FORCED_SHUTDOWN(mp)) { + if (dirty && !xfs_is_shutdown(mp)) { XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); } #ifdef DEBUG - if (!dirty && !XFS_FORCED_SHUTDOWN(mp)) { + if (!dirty && !xfs_is_shutdown(mp)) { struct xfs_log_item *lip; list_for_each_entry(lip, &tp->t_items, li_trans) diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index dbb69b4bf3ed..2a8c8dc54c95 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -17,6 +17,7 @@ #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_log.h" +#include "xfs_log_priv.h" #ifdef DEBUG /* @@ -429,8 +430,12 @@ xfsaild_push( /* * If we encountered pinned items or did not finish writing out all - * buffers the last time we ran, force the log first and wait for it - * before pushing again. + * buffers the last time we ran, force a background CIL push to get the + * items unpinned in the near future. We do not wait on the CIL push as + * that could stall us for seconds if there is enough background IO + * load. Stalling for that long when the tail of the log is pinned and + * needs flushing will hard stop the transaction subsystem when log + * space runs out. */ if (ailp->ail_log_flush && ailp->ail_last_pushed_lsn == 0 && (!list_empty_careful(&ailp->ail_buf_list) || @@ -438,7 +443,7 @@ xfsaild_push( ailp->ail_log_flush = 0; XFS_STATS_INC(mp, xs_push_ail_flush); - xfs_log_force(mp, XFS_LOG_SYNC); + xlog_cil_flush(mp->m_log); } spin_lock(&ailp->ail_lock); @@ -615,7 +620,7 @@ xfsaild( * opportunity to release such buffers from the queue. */ ASSERT(list_empty(&ailp->ail_buf_list) || - XFS_FORCED_SHUTDOWN(ailp->ail_mount)); + xfs_is_shutdown(ailp->ail_mount)); xfs_buf_delwri_cancel(&ailp->ail_buf_list); break; } @@ -678,7 +683,7 @@ xfs_ail_push( struct xfs_log_item *lip; lip = xfs_ail_min(ailp); - if (!lip || XFS_FORCED_SHUTDOWN(ailp->ail_mount) || + if (!lip || xfs_is_shutdown(ailp->ail_mount) || XFS_LSN_CMP(threshold_lsn, ailp->ail_target) <= 0) return; @@ -743,7 +748,7 @@ xfs_ail_update_finish( return; } - if (!XFS_FORCED_SHUTDOWN(mp)) + if (!xfs_is_shutdown(mp)) xlog_assign_tail_lsn_locked(mp); if (list_empty(&ailp->ail_head)) @@ -863,7 +868,7 @@ xfs_trans_ail_delete( spin_lock(&ailp->ail_lock); if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) { spin_unlock(&ailp->ail_lock); - if (shutdown_type && !XFS_FORCED_SHUTDOWN(mp)) { + if (shutdown_type && !xfs_is_shutdown(mp)) { xfs_alert_tag(mp, XFS_PTAG_AILDELETE, "%s: attempting to delete a log item that is not in the AIL", __func__); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index d11d032da0b4..6549e50d852c 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -38,7 +38,7 @@ xfs_trans_buf_item_match( blip = (struct xfs_buf_log_item *)lip; if (blip->bli_item.li_type == XFS_LI_BUF && blip->bli_buf->b_target == target && - XFS_BUF_ADDR(blip->bli_buf) == map[0].bm_bn && + xfs_buf_daddr(blip->bli_buf) == map[0].bm_bn && blip->bli_buf->b_length == len) { ASSERT(blip->bli_buf->b_map_count == nmaps); return blip->bli_buf; @@ -138,7 +138,7 @@ xfs_trans_get_buf_map( bp = xfs_trans_buf_item_match(tp, target, map, nmaps); if (bp != NULL) { ASSERT(xfs_buf_islocked(bp)); - if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) { + if (xfs_is_shutdown(tp->t_mountp)) { xfs_buf_stale(bp); bp->b_flags |= XBF_DONE; } @@ -244,7 +244,7 @@ xfs_trans_read_buf_map( * We never locked this buf ourselves, so we shouldn't * brelse it either. Just get out. */ - if (XFS_FORCED_SHUTDOWN(mp)) { + if (xfs_is_shutdown(mp)) { trace_xfs_trans_read_buf_shut(bp, _RET_IP_); return -EIO; } @@ -300,7 +300,7 @@ xfs_trans_read_buf_map( return error; } - if (XFS_FORCED_SHUTDOWN(mp)) { + if (xfs_is_shutdown(mp)) { xfs_buf_relse(bp); trace_xfs_trans_read_buf_shut(bp, _RET_IP_); return -EIO; diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index 48e09ea30ee5..3872ce671411 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -58,7 +58,7 @@ xfs_trans_log_dquot( /* Upgrade the dquot to bigtime format if possible. */ if (dqp->q_id != 0 && - xfs_sb_version_hasbigtime(&tp->t_mountp->m_sb) && + xfs_has_bigtime(tp->t_mountp) && !(dqp->q_type & XFS_DQTYPE_BIGTIME)) dqp->q_type |= XFS_DQTYPE_BIGTIME; @@ -132,8 +132,7 @@ xfs_trans_mod_dquot_byino( { xfs_mount_t *mp = tp->t_mountp; - if (!XFS_IS_QUOTA_RUNNING(mp) || - !XFS_IS_QUOTA_ON(mp) || + if (!XFS_IS_QUOTA_ON(mp) || xfs_is_quota_inode(&mp->m_sb, ip->i_ino)) return; @@ -192,7 +191,7 @@ xfs_trans_mod_dquot( struct xfs_dqtrx *qtrx; ASSERT(tp); - ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp)); + ASSERT(XFS_IS_QUOTA_ON(tp->t_mountp)); qtrx = NULL; if (!delta) @@ -738,7 +737,7 @@ xfs_trans_reserve_quota_bydquots( { int error; - if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) + if (!XFS_IS_QUOTA_ON(mp)) return 0; ASSERT(flags & XFS_QMOPT_RESBLK_MASK); @@ -795,7 +794,7 @@ xfs_trans_reserve_quota_nblks( unsigned int qflags = 0; int error; - if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) + if (!XFS_IS_QUOTA_ON(mp)) return 0; ASSERT(!xfs_is_quota_inode(&mp->m_sb, ip->i_ino)); @@ -836,51 +835,13 @@ xfs_trans_reserve_quota_icreate( { struct xfs_mount *mp = tp->t_mountp; - if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) + if (!XFS_IS_QUOTA_ON(mp)) return 0; return xfs_trans_reserve_quota_bydquots(tp, mp, udqp, gdqp, pdqp, dblocks, 1, XFS_QMOPT_RES_REGBLKS); } -/* - * This routine is called to allocate a quotaoff log item. - */ -struct xfs_qoff_logitem * -xfs_trans_get_qoff_item( - struct xfs_trans *tp, - struct xfs_qoff_logitem *startqoff, - uint flags) -{ - struct xfs_qoff_logitem *q; - - ASSERT(tp != NULL); - - q = xfs_qm_qoff_logitem_init(tp->t_mountp, startqoff, flags); - ASSERT(q != NULL); - - /* - * Get a log_item_desc to point at the new item. - */ - xfs_trans_add_item(tp, &q->qql_item); - return q; -} - - -/* - * This is called to mark the quotaoff logitem as needing - * to be logged when the transaction is committed. The logitem must - * already be associated with the given transaction. - */ -void -xfs_trans_log_quotaoff_item( - struct xfs_trans *tp, - struct xfs_qoff_logitem *qlp) -{ - tp->t_flags |= XFS_TRANS_DIRTY; - set_bit(XFS_LI_DIRTY, &qlp->qql_item.li_flags); -} - STATIC void xfs_trans_alloc_dqinfo( xfs_trans_t *tp) |