diff options
Diffstat (limited to 'fs/xfs/libxfs')
27 files changed, 1159 insertions, 408 deletions
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 86696a1c6891..1b078bbbf225 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -81,6 +81,19 @@ xfs_perag_get_tag( return pag; } +/* Get a passive reference to the given perag. */ +struct xfs_perag * +xfs_perag_hold( + struct xfs_perag *pag) +{ + ASSERT(atomic_read(&pag->pag_ref) > 0 || + atomic_read(&pag->pag_active_ref) > 0); + + trace_xfs_perag_hold(pag, _RET_IP_); + atomic_inc(&pag->pag_ref); + return pag; +} + void xfs_perag_put( struct xfs_perag *pag) @@ -247,6 +260,7 @@ xfs_free_perag( spin_unlock(&mp->m_perag_lock); ASSERT(pag); XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0); + xfs_defer_drain_free(&pag->pag_intents_drain); cancel_delayed_work_sync(&pag->pag_blockgc_work); xfs_buf_hash_destroy(pag); @@ -372,6 +386,7 @@ xfs_initialize_perag( spin_lock_init(&pag->pag_state_lock); INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker); INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); + xfs_defer_drain_init(&pag->pag_intents_drain); init_waitqueue_head(&pag->pagb_wait); init_waitqueue_head(&pag->pag_active_wq); pag->pagb_count = 0; @@ -408,6 +423,7 @@ xfs_initialize_perag( return 0; out_remove_pag: + xfs_defer_drain_free(&pag->pag_intents_drain); radix_tree_delete(&mp->m_perag_tree, index); out_free_pag: kmem_free(pag); @@ -418,6 +434,7 @@ out_unwind_new_pags: if (!pag) break; xfs_buf_hash_destroy(pag); + xfs_defer_drain_free(&pag->pag_intents_drain); kmem_free(pag); } return error; @@ -1043,10 +1060,8 @@ xfs_ag_extend_space( if (error) return error; - error = xfs_free_extent(tp, XFS_AGB_TO_FSB(pag->pag_mount, pag->pag_agno, - be32_to_cpu(agf->agf_length) - len), - len, &XFS_RMAP_OINFO_SKIP_UPDATE, - XFS_AG_RESV_NONE); + error = xfs_free_extent(tp, pag, be32_to_cpu(agf->agf_length) - len, + len, &XFS_RMAP_OINFO_SKIP_UPDATE, XFS_AG_RESV_NONE); if (error) return error; diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 5e18536dfdce..2e0aef87d633 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -101,6 +101,14 @@ struct xfs_perag { /* background prealloc block trimming */ struct delayed_work pag_blockgc_work; + /* + * We use xfs_drain to track the number of deferred log intent items + * that have been queued (but not yet processed) so that waiters (e.g. + * scrub) will not lock resources when other threads are in the middle + * of processing a chain of intent items only to find momentary + * inconsistencies. + */ + struct xfs_defer_drain pag_intents_drain; #endif /* __KERNEL__ */ }; @@ -134,6 +142,7 @@ void xfs_free_perag(struct xfs_mount *mp); struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno); struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno, unsigned int tag); +struct xfs_perag *xfs_perag_hold(struct xfs_perag *pag); void xfs_perag_put(struct xfs_perag *pag); /* Active AG references */ diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 203f16c48c19..fdfa08cbf4db 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -233,6 +233,52 @@ xfs_alloc_update( return xfs_btree_update(cur, &rec); } +/* Convert the ondisk btree record to its incore representation. */ +void +xfs_alloc_btrec_to_irec( + const union xfs_btree_rec *rec, + struct xfs_alloc_rec_incore *irec) +{ + irec->ar_startblock = be32_to_cpu(rec->alloc.ar_startblock); + irec->ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount); +} + +/* Simple checks for free space records. */ +xfs_failaddr_t +xfs_alloc_check_irec( + struct xfs_btree_cur *cur, + const struct xfs_alloc_rec_incore *irec) +{ + struct xfs_perag *pag = cur->bc_ag.pag; + + if (irec->ar_blockcount == 0) + return __this_address; + + /* check for valid extent range, including overflow */ + if (!xfs_verify_agbext(pag, irec->ar_startblock, irec->ar_blockcount)) + return __this_address; + + return NULL; +} + +static inline int +xfs_alloc_complain_bad_rec( + struct xfs_btree_cur *cur, + xfs_failaddr_t fa, + const struct xfs_alloc_rec_incore *irec) +{ + struct xfs_mount *mp = cur->bc_mp; + + xfs_warn(mp, + "%s Freespace BTree record corruption in AG %d detected at %pS!", + cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size", + cur->bc_ag.pag->pag_agno, fa); + xfs_warn(mp, + "start block 0x%x block count 0x%x", irec->ar_startblock, + irec->ar_blockcount); + return -EFSCORRUPTED; +} + /* * Get the data from the pointed-to record. */ @@ -243,35 +289,23 @@ xfs_alloc_get_rec( xfs_extlen_t *len, /* output: length of extent */ int *stat) /* output: success/failure */ { - struct xfs_mount *mp = cur->bc_mp; - struct xfs_perag *pag = cur->bc_ag.pag; + struct xfs_alloc_rec_incore irec; union xfs_btree_rec *rec; + xfs_failaddr_t fa; int error; error = xfs_btree_get_rec(cur, &rec, stat); if (error || !(*stat)) return error; - *bno = be32_to_cpu(rec->alloc.ar_startblock); - *len = be32_to_cpu(rec->alloc.ar_blockcount); - - if (*len == 0) - goto out_bad_rec; - - /* check for valid extent range, including overflow */ - if (!xfs_verify_agbext(pag, *bno, *len)) - goto out_bad_rec; + xfs_alloc_btrec_to_irec(rec, &irec); + fa = xfs_alloc_check_irec(cur, &irec); + if (fa) + return xfs_alloc_complain_bad_rec(cur, fa, &irec); + *bno = irec.ar_startblock; + *len = irec.ar_blockcount; return 0; - -out_bad_rec: - xfs_warn(mp, - "%s Freespace BTree record corruption in AG %d detected!", - cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size", - pag->pag_agno); - xfs_warn(mp, - "start block 0x%x block count 0x%x", *bno, *len); - return -EFSCORRUPTED; } /* @@ -2405,6 +2439,7 @@ xfs_defer_agfl_block( trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); + xfs_extent_free_get_group(mp, xefi); xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list); } @@ -2421,8 +2456,8 @@ __xfs_free_extent_later( bool skip_discard) { struct xfs_extent_free_item *xefi; -#ifdef DEBUG struct xfs_mount *mp = tp->t_mountp; +#ifdef DEBUG xfs_agnumber_t agno; xfs_agblock_t agbno; @@ -2456,9 +2491,11 @@ __xfs_free_extent_later( } else { xefi->xefi_owner = XFS_RMAP_OWN_NULL; } - trace_xfs_bmap_free_defer(tp->t_mountp, + trace_xfs_bmap_free_defer(mp, XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0, XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len); + + xfs_extent_free_get_group(mp, xefi); xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list); } @@ -3596,7 +3633,8 @@ xfs_free_extent_fix_freelist( int __xfs_free_extent( struct xfs_trans *tp, - xfs_fsblock_t bno, + struct xfs_perag *pag, + xfs_agblock_t agbno, xfs_extlen_t len, const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type, @@ -3604,12 +3642,9 @@ __xfs_free_extent( { struct xfs_mount *mp = tp->t_mountp; struct xfs_buf *agbp; - xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno); - xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno); struct xfs_agf *agf; int error; unsigned int busy_flags = 0; - struct xfs_perag *pag; ASSERT(len != 0); ASSERT(type != XFS_AG_RESV_AGFL); @@ -3618,10 +3653,9 @@ __xfs_free_extent( XFS_ERRTAG_FREE_EXTENT)) return -EIO; - pag = xfs_perag_get(mp, agno); error = xfs_free_extent_fix_freelist(tp, pag, &agbp); if (error) - goto err; + return error; agf = agbp->b_addr; if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) { @@ -3635,20 +3669,18 @@ __xfs_free_extent( goto err_release; } - error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type); + error = xfs_free_ag_extent(tp, agbp, pag->pag_agno, agbno, len, oinfo, + type); if (error) goto err_release; if (skip_discard) busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD; xfs_extent_busy_insert(tp, pag, agbno, len, busy_flags); - xfs_perag_put(pag); return 0; err_release: xfs_trans_brelse(tp, agbp); -err: - xfs_perag_put(pag); return error; } @@ -3666,9 +3698,13 @@ xfs_alloc_query_range_helper( { struct xfs_alloc_query_range_info *query = priv; struct xfs_alloc_rec_incore irec; + xfs_failaddr_t fa; + + xfs_alloc_btrec_to_irec(rec, &irec); + fa = xfs_alloc_check_irec(cur, &irec); + if (fa) + return xfs_alloc_complain_bad_rec(cur, fa, &irec); - irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock); - irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount); return query->fn(cur, &irec, query->priv); } @@ -3709,13 +3745,16 @@ xfs_alloc_query_all( return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query); } -/* Is there a record covering a given extent? */ +/* + * Scan part of the keyspace of the free space and tell us if the area has no + * records, is fully mapped by records, or is partially filled. + */ int -xfs_alloc_has_record( +xfs_alloc_has_records( struct xfs_btree_cur *cur, xfs_agblock_t bno, xfs_extlen_t len, - bool *exists) + enum xbtree_recpacking *outcome) { union xfs_btree_irec low; union xfs_btree_irec high; @@ -3725,7 +3764,7 @@ xfs_alloc_has_record( memset(&high, 0xFF, sizeof(high)); high.a.ar_startblock = bno + len - 1; - return xfs_btree_has_record(cur, &low, &high, exists); + return xfs_btree_has_records(cur, &low, &high, NULL, outcome); } /* diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 2b246d74c189..5dbb25546d0b 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -141,7 +141,8 @@ int xfs_alloc_vextent_first_ag(struct xfs_alloc_arg *args, int /* error */ __xfs_free_extent( struct xfs_trans *tp, /* transaction pointer */ - xfs_fsblock_t bno, /* starting block number of extent */ + struct xfs_perag *pag, + xfs_agblock_t agbno, xfs_extlen_t len, /* length of extent */ const struct xfs_owner_info *oinfo, /* extent owner */ enum xfs_ag_resv_type type, /* block reservation type */ @@ -150,12 +151,13 @@ __xfs_free_extent( static inline int xfs_free_extent( struct xfs_trans *tp, - xfs_fsblock_t bno, + struct xfs_perag *pag, + xfs_agblock_t agbno, xfs_extlen_t len, const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type) { - return __xfs_free_extent(tp, bno, len, oinfo, type, false); + return __xfs_free_extent(tp, pag, agbno, len, oinfo, type, false); } int /* error */ @@ -179,6 +181,12 @@ xfs_alloc_get_rec( xfs_extlen_t *len, /* output: length of extent */ int *stat); /* output: success/failure */ +union xfs_btree_rec; +void xfs_alloc_btrec_to_irec(const union xfs_btree_rec *rec, + struct xfs_alloc_rec_incore *irec); +xfs_failaddr_t xfs_alloc_check_irec(struct xfs_btree_cur *cur, + const struct xfs_alloc_rec_incore *irec); + int xfs_read_agf(struct xfs_perag *pag, struct xfs_trans *tp, int flags, struct xfs_buf **agfbpp); int xfs_alloc_read_agf(struct xfs_perag *pag, struct xfs_trans *tp, int flags, @@ -205,8 +213,8 @@ int xfs_alloc_query_range(struct xfs_btree_cur *cur, int xfs_alloc_query_all(struct xfs_btree_cur *cur, xfs_alloc_query_range_fn fn, void *priv); -int xfs_alloc_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno, - xfs_extlen_t len, bool *exist); +int xfs_alloc_has_records(struct xfs_btree_cur *cur, xfs_agblock_t bno, + xfs_extlen_t len, enum xbtree_recpacking *outcome); typedef int (*xfs_agfl_walk_fn)(struct xfs_mount *mp, xfs_agblock_t bno, void *priv); @@ -235,9 +243,13 @@ struct xfs_extent_free_item { uint64_t xefi_owner; xfs_fsblock_t xefi_startblock;/* starting fs block number */ xfs_extlen_t xefi_blockcount;/* number of blocks in extent */ + struct xfs_perag *xefi_pag; unsigned int xefi_flags; }; +void xfs_extent_free_get_group(struct xfs_mount *mp, + struct xfs_extent_free_item *xefi); + #define XFS_EFI_SKIP_DISCARD (1U << 0) /* don't issue discard */ #define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */ #define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */ diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 0f29c7b1b39f..c65228efed4a 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -260,20 +260,27 @@ STATIC int64_t xfs_bnobt_diff_two_keys( struct xfs_btree_cur *cur, const union xfs_btree_key *k1, - const union xfs_btree_key *k2) + const union xfs_btree_key *k2, + const union xfs_btree_key *mask) { + ASSERT(!mask || mask->alloc.ar_startblock); + return (int64_t)be32_to_cpu(k1->alloc.ar_startblock) - - be32_to_cpu(k2->alloc.ar_startblock); + be32_to_cpu(k2->alloc.ar_startblock); } STATIC int64_t xfs_cntbt_diff_two_keys( struct xfs_btree_cur *cur, const union xfs_btree_key *k1, - const union xfs_btree_key *k2) + const union xfs_btree_key *k2, + const union xfs_btree_key *mask) { int64_t diff; + ASSERT(!mask || (mask->alloc.ar_blockcount && + mask->alloc.ar_startblock)); + diff = be32_to_cpu(k1->alloc.ar_blockcount) - be32_to_cpu(k2->alloc.ar_blockcount); if (diff) @@ -423,6 +430,19 @@ xfs_cntbt_recs_inorder( be32_to_cpu(r2->alloc.ar_startblock)); } +STATIC enum xbtree_key_contig +xfs_allocbt_keys_contiguous( + struct xfs_btree_cur *cur, + const union xfs_btree_key *key1, + const union xfs_btree_key *key2, + const union xfs_btree_key *mask) +{ + ASSERT(!mask || mask->alloc.ar_startblock); + + return xbtree_key_contig(be32_to_cpu(key1->alloc.ar_startblock), + be32_to_cpu(key2->alloc.ar_startblock)); +} + static const struct xfs_btree_ops xfs_bnobt_ops = { .rec_len = sizeof(xfs_alloc_rec_t), .key_len = sizeof(xfs_alloc_key_t), @@ -443,6 +463,7 @@ static const struct xfs_btree_ops xfs_bnobt_ops = { .diff_two_keys = xfs_bnobt_diff_two_keys, .keys_inorder = xfs_bnobt_keys_inorder, .recs_inorder = xfs_bnobt_recs_inorder, + .keys_contiguous = xfs_allocbt_keys_contiguous, }; static const struct xfs_btree_ops xfs_cntbt_ops = { @@ -465,6 +486,7 @@ static const struct xfs_btree_ops xfs_cntbt_ops = { .diff_two_keys = xfs_cntbt_diff_two_keys, .keys_inorder = xfs_cntbt_keys_inorder, .recs_inorder = xfs_cntbt_recs_inorder, + .keys_contiguous = NULL, /* not needed right now */ }; /* Allocate most of a new allocation btree cursor. */ @@ -492,9 +514,7 @@ xfs_allocbt_init_common( cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2); } - /* take a reference for the cursor */ - atomic_inc(&pag->pag_ref); - cur->bc_ag.pag = pag; + cur->bc_ag.pag = xfs_perag_hold(pag); if (xfs_has_crc(mp)) cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 34de6e6898c4..b512de0540d5 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -1083,6 +1083,34 @@ struct xfs_iread_state { xfs_extnum_t loaded; }; +int +xfs_bmap_complain_bad_rec( + struct xfs_inode *ip, + int whichfork, + xfs_failaddr_t fa, + const struct xfs_bmbt_irec *irec) +{ + struct xfs_mount *mp = ip->i_mount; + const char *forkname; + + switch (whichfork) { + case XFS_DATA_FORK: forkname = "data"; break; + case XFS_ATTR_FORK: forkname = "attr"; break; + case XFS_COW_FORK: forkname = "CoW"; break; + default: forkname = "???"; break; + } + + xfs_warn(mp, + "Bmap BTree record corruption in inode 0x%llx %s fork detected at %pS!", + ip->i_ino, forkname, fa); + xfs_warn(mp, + "Offset 0x%llx, start block 0x%llx, block count 0x%llx state 0x%x", + irec->br_startoff, irec->br_startblock, irec->br_blockcount, + irec->br_state); + + return -EFSCORRUPTED; +} + /* Stuff every bmbt record from this block into the incore extent map. */ static int xfs_iread_bmbt_block( @@ -1125,7 +1153,8 @@ xfs_iread_bmbt_block( xfs_inode_verifier_error(ip, -EFSCORRUPTED, "xfs_iread_extents(2)", frp, sizeof(*frp), fa); - return -EFSCORRUPTED; + return xfs_bmap_complain_bad_rec(ip, whichfork, fa, + &new); } xfs_iext_insert(ip, &ir->icur, &new, xfs_bmap_fork_to_state(whichfork)); @@ -1171,6 +1200,12 @@ xfs_iread_extents( goto out; } ASSERT(ir.loaded == xfs_iext_count(ifp)); + /* + * Use release semantics so that we can use acquire semantics in + * xfs_need_iread_extents and be guaranteed to see a valid mapping tree + * after that load. + */ + smp_store_release(&ifp->if_needextents, 0); return 0; out: xfs_iext_destroy(ifp); @@ -3505,7 +3540,6 @@ xfs_bmap_btalloc_at_eof( * original non-aligned state so the caller can proceed on allocation * failure as if this function was never called. */ - args->fsbno = ap->blkno; args->alignment = 1; return 0; } @@ -6075,6 +6109,7 @@ __xfs_bmap_add( bi->bi_whichfork = whichfork; bi->bi_bmap = *bmap; + xfs_bmap_update_get_group(tp->t_mountp, bi); xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_BMAP, &bi->bi_list); return 0; } diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index dd08361ca5a6..e33470e39728 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -145,7 +145,7 @@ static inline int xfs_bmapi_whichfork(uint32_t bmapi_flags) { BMAP_COWFORK, "COW" } /* Return true if the extent is an allocated extent, written or not. */ -static inline bool xfs_bmap_is_real_extent(struct xfs_bmbt_irec *irec) +static inline bool xfs_bmap_is_real_extent(const struct xfs_bmbt_irec *irec) { return irec->br_startblock != HOLESTARTBLOCK && irec->br_startblock != DELAYSTARTBLOCK && @@ -238,9 +238,13 @@ struct xfs_bmap_intent { enum xfs_bmap_intent_type bi_type; int bi_whichfork; struct xfs_inode *bi_owner; + struct xfs_perag *bi_pag; struct xfs_bmbt_irec bi_bmap; }; +void xfs_bmap_update_get_group(struct xfs_mount *mp, + struct xfs_bmap_intent *bi); + int xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_bmap_intent *bi); void xfs_bmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip, struct xfs_bmbt_irec *imap); @@ -261,6 +265,8 @@ static inline uint32_t xfs_bmap_fork_to_state(int whichfork) xfs_failaddr_t xfs_bmap_validate_extent(struct xfs_inode *ip, int whichfork, struct xfs_bmbt_irec *irec); +int xfs_bmap_complain_bad_rec(struct xfs_inode *ip, int whichfork, + xfs_failaddr_t fa, const struct xfs_bmbt_irec *irec); int xfs_bmapi_remap(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, xfs_fsblock_t startblock, diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index b8ad95050c9b..1b40e5f8b1ec 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -382,11 +382,14 @@ STATIC int64_t xfs_bmbt_diff_two_keys( struct xfs_btree_cur *cur, const union xfs_btree_key *k1, - const union xfs_btree_key *k2) + const union xfs_btree_key *k2, + const union xfs_btree_key *mask) { uint64_t a = be64_to_cpu(k1->bmbt.br_startoff); uint64_t b = be64_to_cpu(k2->bmbt.br_startoff); + ASSERT(!mask || mask->bmbt.br_startoff); + /* * Note: This routine previously casted a and b to int64 and subtracted * them to generate a result. This lead to problems if b was the @@ -500,6 +503,19 @@ xfs_bmbt_recs_inorder( xfs_bmbt_disk_get_startoff(&r2->bmbt); } +STATIC enum xbtree_key_contig +xfs_bmbt_keys_contiguous( + struct xfs_btree_cur *cur, + const union xfs_btree_key *key1, + const union xfs_btree_key *key2, + const union xfs_btree_key *mask) +{ + ASSERT(!mask || mask->bmbt.br_startoff); + + return xbtree_key_contig(be64_to_cpu(key1->bmbt.br_startoff), + be64_to_cpu(key2->bmbt.br_startoff)); +} + static const struct xfs_btree_ops xfs_bmbt_ops = { .rec_len = sizeof(xfs_bmbt_rec_t), .key_len = sizeof(xfs_bmbt_key_t), @@ -520,6 +536,7 @@ static const struct xfs_btree_ops xfs_bmbt_ops = { .buf_ops = &xfs_bmbt_buf_ops, .keys_inorder = xfs_bmbt_keys_inorder, .recs_inorder = xfs_bmbt_recs_inorder, + .keys_contiguous = xfs_bmbt_keys_contiguous, }; /* diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index c4649cc624e1..6a6503ab0cd7 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -2067,8 +2067,7 @@ xfs_btree_get_leaf_keys( for (n = 2; n <= xfs_btree_get_numrecs(block); n++) { rec = xfs_btree_rec_addr(cur, n, block); cur->bc_ops->init_high_key_from_rec(&hkey, rec); - if (cur->bc_ops->diff_two_keys(cur, &hkey, &max_hkey) - > 0) + if (xfs_btree_keycmp_gt(cur, &hkey, &max_hkey)) max_hkey = hkey; } @@ -2096,7 +2095,7 @@ xfs_btree_get_node_keys( max_hkey = xfs_btree_high_key_addr(cur, 1, block); for (n = 2; n <= xfs_btree_get_numrecs(block); n++) { hkey = xfs_btree_high_key_addr(cur, n, block); - if (cur->bc_ops->diff_two_keys(cur, hkey, max_hkey) > 0) + if (xfs_btree_keycmp_gt(cur, hkey, max_hkey)) max_hkey = hkey; } @@ -2183,8 +2182,8 @@ __xfs_btree_updkeys( nlkey = xfs_btree_key_addr(cur, ptr, block); nhkey = xfs_btree_high_key_addr(cur, ptr, block); if (!force_all && - !(cur->bc_ops->diff_two_keys(cur, nlkey, lkey) != 0 || - cur->bc_ops->diff_two_keys(cur, nhkey, hkey) != 0)) + xfs_btree_keycmp_eq(cur, nlkey, lkey) && + xfs_btree_keycmp_eq(cur, nhkey, hkey)) break; xfs_btree_copy_keys(cur, nlkey, lkey, 1); xfs_btree_log_keys(cur, bp, ptr, ptr); @@ -4716,7 +4715,6 @@ xfs_btree_simple_query_range( { union xfs_btree_rec *recp; union xfs_btree_key rec_key; - int64_t diff; int stat; bool firstrec = true; int error; @@ -4746,20 +4744,17 @@ xfs_btree_simple_query_range( if (error || !stat) break; - /* Skip if high_key(rec) < low_key. */ + /* Skip if low_key > high_key(rec). */ if (firstrec) { cur->bc_ops->init_high_key_from_rec(&rec_key, recp); firstrec = false; - diff = cur->bc_ops->diff_two_keys(cur, low_key, - &rec_key); - if (diff > 0) + if (xfs_btree_keycmp_gt(cur, low_key, &rec_key)) goto advloop; } - /* Stop if high_key < low_key(rec). */ + /* Stop if low_key(rec) > high_key. */ cur->bc_ops->init_key_from_rec(&rec_key, recp); - diff = cur->bc_ops->diff_two_keys(cur, &rec_key, high_key); - if (diff > 0) + if (xfs_btree_keycmp_gt(cur, &rec_key, high_key)) break; /* Callback */ @@ -4813,8 +4808,6 @@ xfs_btree_overlapped_query_range( union xfs_btree_key *hkp; union xfs_btree_rec *recp; struct xfs_btree_block *block; - int64_t ldiff; - int64_t hdiff; int level; struct xfs_buf *bp; int i; @@ -4854,25 +4847,23 @@ pop_up: block); cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp); - ldiff = cur->bc_ops->diff_two_keys(cur, &rec_hkey, - low_key); - cur->bc_ops->init_key_from_rec(&rec_key, recp); - hdiff = cur->bc_ops->diff_two_keys(cur, high_key, - &rec_key); /* + * If (query's high key < record's low key), then there + * are no more interesting records in this block. Pop + * up to the leaf level to find more record blocks. + * * If (record's high key >= query's low key) and * (query's high key >= record's low key), then * this record overlaps the query range; callback. */ - if (ldiff >= 0 && hdiff >= 0) { + if (xfs_btree_keycmp_lt(cur, high_key, &rec_key)) + goto pop_up; + if (xfs_btree_keycmp_ge(cur, &rec_hkey, low_key)) { error = fn(cur, recp, priv); if (error) break; - } else if (hdiff < 0) { - /* Record is larger than high key; pop. */ - goto pop_up; } cur->bc_levels[level].ptr++; continue; @@ -4884,15 +4875,18 @@ pop_up: block); pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block); - ldiff = cur->bc_ops->diff_two_keys(cur, hkp, low_key); - hdiff = cur->bc_ops->diff_two_keys(cur, high_key, lkp); - /* + * If (query's high key < pointer's low key), then there are no + * more interesting keys in this block. Pop up one leaf level + * to continue looking for records. + * * If (pointer's high key >= query's low key) and * (query's high key >= pointer's low key), then * this record overlaps the query range; follow pointer. */ - if (ldiff >= 0 && hdiff >= 0) { + if (xfs_btree_keycmp_lt(cur, high_key, lkp)) + goto pop_up; + if (xfs_btree_keycmp_ge(cur, hkp, low_key)) { level--; error = xfs_btree_lookup_get_block(cur, level, pp, &block); @@ -4907,9 +4901,6 @@ pop_up: #endif cur->bc_levels[level].ptr = 1; continue; - } else if (hdiff < 0) { - /* The low key is larger than the upper range; pop. */ - goto pop_up; } cur->bc_levels[level].ptr++; } @@ -4937,6 +4928,19 @@ out: return error; } +static inline void +xfs_btree_key_from_irec( + struct xfs_btree_cur *cur, + union xfs_btree_key *key, + const union xfs_btree_irec *irec) +{ + union xfs_btree_rec rec; + + cur->bc_rec = *irec; + cur->bc_ops->init_rec_from_cur(cur, &rec); + cur->bc_ops->init_key_from_rec(key, &rec); +} + /* * Query a btree for all records overlapping a given interval of keys. The * supplied function will be called with each record found; return one of the @@ -4951,21 +4955,15 @@ xfs_btree_query_range( xfs_btree_query_range_fn fn, void *priv) { - union xfs_btree_rec rec; union xfs_btree_key low_key; union xfs_btree_key high_key; /* Find the keys of both ends of the interval. */ - cur->bc_rec = *high_rec; - cur->bc_ops->init_rec_from_cur(cur, &rec); - cur->bc_ops->init_key_from_rec(&high_key, &rec); + xfs_btree_key_from_irec(cur, &high_key, high_rec); + xfs_btree_key_from_irec(cur, &low_key, low_rec); - cur->bc_rec = *low_rec; - cur->bc_ops->init_rec_from_cur(cur, &rec); - cur->bc_ops->init_key_from_rec(&low_key, &rec); - - /* Enforce low key < high key. */ - if (cur->bc_ops->diff_two_keys(cur, &low_key, &high_key) > 0) + /* Enforce low key <= high key. */ + if (!xfs_btree_keycmp_le(cur, &low_key, &high_key)) return -EINVAL; if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING)) @@ -5027,34 +5025,132 @@ xfs_btree_diff_two_ptrs( return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s); } -/* If there's an extent, we're done. */ +struct xfs_btree_has_records { + /* Keys for the start and end of the range we want to know about. */ + union xfs_btree_key start_key; + union xfs_btree_key end_key; + + /* Mask for key comparisons, if desired. */ + const union xfs_btree_key *key_mask; + + /* Highest record key we've seen so far. */ + union xfs_btree_key high_key; + + enum xbtree_recpacking outcome; +}; + STATIC int -xfs_btree_has_record_helper( +xfs_btree_has_records_helper( struct xfs_btree_cur *cur, const union xfs_btree_rec *rec, void *priv) { - return -ECANCELED; + union xfs_btree_key rec_key; + union xfs_btree_key rec_high_key; + struct xfs_btree_has_records *info = priv; + enum xbtree_key_contig key_contig; + + cur->bc_ops->init_key_from_rec(&rec_key, rec); + + if (info->outcome == XBTREE_RECPACKING_EMPTY) { + info->outcome = XBTREE_RECPACKING_SPARSE; + + /* + * If the first record we find does not overlap the start key, + * then there is a hole at the start of the search range. + * Classify this as sparse and stop immediately. + */ + if (xfs_btree_masked_keycmp_lt(cur, &info->start_key, &rec_key, + info->key_mask)) + return -ECANCELED; + } else { + /* + * If a subsequent record does not overlap with the any record + * we've seen so far, there is a hole in the middle of the + * search range. Classify this as sparse and stop. + * If the keys overlap and this btree does not allow overlap, + * signal corruption. + */ + key_contig = cur->bc_ops->keys_contiguous(cur, &info->high_key, + &rec_key, info->key_mask); + if (key_contig == XBTREE_KEY_OVERLAP && + !(cur->bc_flags & XFS_BTREE_OVERLAPPING)) + return -EFSCORRUPTED; + if (key_contig == XBTREE_KEY_GAP) + return -ECANCELED; + } + + /* + * If high_key(rec) is larger than any other high key we've seen, + * remember it for later. + */ + cur->bc_ops->init_high_key_from_rec(&rec_high_key, rec); + if (xfs_btree_masked_keycmp_gt(cur, &rec_high_key, &info->high_key, + info->key_mask)) + info->high_key = rec_high_key; /* struct copy */ + + return 0; } -/* Is there a record covering a given range of keys? */ +/* + * Scan part of the keyspace of a btree and tell us if that keyspace does not + * map to any records; is fully mapped to records; or is partially mapped to + * records. This is the btree record equivalent to determining if a file is + * sparse. + * + * For most btree types, the record scan should use all available btree key + * fields to compare the keys encountered. These callers should pass NULL for + * @mask. However, some callers (e.g. scanning physical space in the rmapbt) + * want to ignore some part of the btree record keyspace when performing the + * comparison. These callers should pass in a union xfs_btree_key object with + * the fields that *should* be a part of the comparison set to any nonzero + * value, and the rest zeroed. + */ int -xfs_btree_has_record( +xfs_btree_has_records( struct xfs_btree_cur *cur, const union xfs_btree_irec *low, const union xfs_btree_irec *high, - bool *exists) + const union xfs_btree_key *mask, + enum xbtree_recpacking *outcome) { + struct xfs_btree_has_records info = { + .outcome = XBTREE_RECPACKING_EMPTY, + .key_mask = mask, + }; int error; - error = xfs_btree_query_range(cur, low, high, - &xfs_btree_has_record_helper, NULL); - if (error == -ECANCELED) { - *exists = true; - return 0; + /* Not all btrees support this operation. */ + if (!cur->bc_ops->keys_contiguous) { + ASSERT(0); + return -EOPNOTSUPP; } - *exists = false; - return error; + + xfs_btree_key_from_irec(cur, &info.start_key, low); + xfs_btree_key_from_irec(cur, &info.end_key, high); + + error = xfs_btree_query_range(cur, low, high, + xfs_btree_has_records_helper, &info); + if (error == -ECANCELED) + goto out; + if (error) + return error; + + if (info.outcome == XBTREE_RECPACKING_EMPTY) + goto out; + + /* + * If the largest high_key(rec) we saw during the walk is greater than + * the end of the search range, classify this as full. Otherwise, + * there is a hole at the end of the search range. + */ + if (xfs_btree_masked_keycmp_ge(cur, &info.high_key, &info.end_key, + mask)) + info.outcome = XBTREE_RECPACKING_FULL; + +out: + *outcome = info.outcome; + return 0; } /* Are there more records in this btree? */ diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 29c4b4ccb909..a2aa36b23e25 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -90,6 +90,27 @@ uint32_t xfs_btree_magic(int crc, xfs_btnum_t btnum); #define XFS_BTREE_STATS_ADD(cur, stat, val) \ XFS_STATS_ADD_OFF((cur)->bc_mp, (cur)->bc_statoff + __XBTS_ ## stat, val) +enum xbtree_key_contig { + XBTREE_KEY_GAP = 0, + XBTREE_KEY_CONTIGUOUS, + XBTREE_KEY_OVERLAP, +}; + +/* + * Decide if these two numeric btree key fields are contiguous, overlapping, + * or if there's a gap between them. @x should be the field from the high + * key and @y should be the field from the low key. + */ +static inline enum xbtree_key_contig xbtree_key_contig(uint64_t x, uint64_t y) +{ + x++; + if (x < y) + return XBTREE_KEY_GAP; + if (x == y) + return XBTREE_KEY_CONTIGUOUS; + return XBTREE_KEY_OVERLAP; +} + struct xfs_btree_ops { /* size of the key and record structures */ size_t key_len; @@ -140,11 +161,14 @@ struct xfs_btree_ops { /* * Difference between key2 and key1 -- positive if key1 > key2, - * negative if key1 < key2, and zero if equal. + * negative if key1 < key2, and zero if equal. If the @mask parameter + * is non NULL, each key field to be used in the comparison must + * contain a nonzero value. */ int64_t (*diff_two_keys)(struct xfs_btree_cur *cur, const union xfs_btree_key *key1, - const union xfs_btree_key *key2); + const union xfs_btree_key *key2, + const union xfs_btree_key *mask); const struct xfs_buf_ops *buf_ops; @@ -157,6 +181,22 @@ struct xfs_btree_ops { int (*recs_inorder)(struct xfs_btree_cur *cur, const union xfs_btree_rec *r1, const union xfs_btree_rec *r2); + + /* + * Are these two btree keys immediately adjacent? + * + * Given two btree keys @key1 and @key2, decide if it is impossible for + * there to be a third btree key K satisfying the relationship + * @key1 < K < @key2. To determine if two btree records are + * immediately adjacent, @key1 should be the high key of the first + * record and @key2 should be the low key of the second record. + * If the @mask parameter is non NULL, each key field to be used in the + * comparison must contain a nonzero value. + */ + enum xbtree_key_contig (*keys_contiguous)(struct xfs_btree_cur *cur, + const union xfs_btree_key *key1, + const union xfs_btree_key *key2, + const union xfs_btree_key *mask); }; /* @@ -540,12 +580,105 @@ void xfs_btree_get_keys(struct xfs_btree_cur *cur, struct xfs_btree_block *block, union xfs_btree_key *key); union xfs_btree_key *xfs_btree_high_key_from_key(struct xfs_btree_cur *cur, union xfs_btree_key *key); -int xfs_btree_has_record(struct xfs_btree_cur *cur, +typedef bool (*xfs_btree_key_gap_fn)(struct xfs_btree_cur *cur, + const union xfs_btree_key *key1, + const union xfs_btree_key *key2); + +int xfs_btree_has_records(struct xfs_btree_cur *cur, const union xfs_btree_irec *low, - const union xfs_btree_irec *high, bool *exists); + const union xfs_btree_irec *high, + const union xfs_btree_key *mask, + enum xbtree_recpacking *outcome); + bool xfs_btree_has_more_records(struct xfs_btree_cur *cur); struct xfs_ifork *xfs_btree_ifork_ptr(struct xfs_btree_cur *cur); +/* Key comparison helpers */ +static inline bool +xfs_btree_keycmp_lt( + struct xfs_btree_cur *cur, + const union xfs_btree_key *key1, + const union xfs_btree_key *key2) +{ + return cur->bc_ops->diff_two_keys(cur, key1, key2, NULL) < 0; +} + +static inline bool +xfs_btree_keycmp_gt( + struct xfs_btree_cur *cur, + const union xfs_btree_key *key1, + const union xfs_btree_key *key2) +{ + return cur->bc_ops->diff_two_keys(cur, key1, key2, NULL) > 0; +} + +static inline bool +xfs_btree_keycmp_eq( + struct xfs_btree_cur *cur, + const union xfs_btree_key *key1, + const union xfs_btree_key *key2) +{ + return cur->bc_ops->diff_two_keys(cur, key1, key2, NULL) == 0; +} + +static inline bool +xfs_btree_keycmp_le( + struct xfs_btree_cur *cur, + const union xfs_btree_key *key1, + const union xfs_btree_key *key2) +{ + return !xfs_btree_keycmp_gt(cur, key1, key2); +} + +static inline bool +xfs_btree_keycmp_ge( + struct xfs_btree_cur *cur, + const union xfs_btree_key *key1, + const union xfs_btree_key *key2) +{ + return !xfs_btree_keycmp_lt(cur, key1, key2); +} + +static inline bool +xfs_btree_keycmp_ne( + struct xfs_btree_cur *cur, + const union xfs_btree_key *key1, + const union xfs_btree_key *key2) +{ + return !xfs_btree_keycmp_eq(cur, key1, key2); +} + +/* Masked key comparison helpers */ +static inline bool +xfs_btree_masked_keycmp_lt( + struct xfs_btree_cur *cur, + const union xfs_btree_key *key1, + const union xfs_btree_key *key2, + const union xfs_btree_key *mask) +{ + return cur->bc_ops->diff_two_keys(cur, key1, key2, mask) < 0; +} + +static inline bool +xfs_btree_masked_keycmp_gt( + struct xfs_btree_cur *cur, + const union xfs_btree_key *key1, + const union xfs_btree_key *key2, + const union xfs_btree_key *mask) +{ + return cur->bc_ops->diff_two_keys(cur, key1, key2, mask) > 0; +} + +static inline bool +xfs_btree_masked_keycmp_ge( + struct xfs_btree_cur *cur, + const union xfs_btree_key *key1, + const union xfs_btree_key *key2, + const union xfs_btree_key *mask) +{ + return !xfs_btree_masked_keycmp_lt(cur, key1, key2, mask); +} + /* Does this cursor point to the last block in the given level? */ static inline bool xfs_btree_islastblock( diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 5a321b783398..bcfb6a4203cd 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -397,6 +397,7 @@ xfs_defer_cancel_list( list_for_each_safe(pwi, n, &dfp->dfp_work) { list_del(pwi); dfp->dfp_count--; + trace_xfs_defer_cancel_item(mp, dfp, pwi); ops->cancel_item(pwi); } ASSERT(dfp->dfp_count == 0); @@ -476,6 +477,7 @@ xfs_defer_finish_one( list_for_each_safe(li, n, &dfp->dfp_work) { list_del(li); dfp->dfp_count--; + trace_xfs_defer_finish_item(tp->t_mountp, dfp, li); error = ops->finish_item(tp, dfp->dfp_done, li, &state); if (error == -EAGAIN) { int ret; @@ -623,7 +625,7 @@ xfs_defer_add( struct list_head *li) { struct xfs_defer_pending *dfp = NULL; - const struct xfs_defer_op_type *ops; + const struct xfs_defer_op_type *ops = defer_op_types[type]; ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); BUILD_BUG_ON(ARRAY_SIZE(defer_op_types) != XFS_DEFER_OPS_TYPE_MAX); @@ -636,7 +638,6 @@ xfs_defer_add( if (!list_empty(&tp->t_dfops)) { dfp = list_last_entry(&tp->t_dfops, struct xfs_defer_pending, dfp_list); - ops = defer_op_types[dfp->dfp_type]; if (dfp->dfp_type != type || (ops->max_items && dfp->dfp_count >= ops->max_items)) dfp = NULL; @@ -653,6 +654,7 @@ xfs_defer_add( } list_add_tail(li, &dfp->dfp_work); + trace_xfs_defer_add_item(tp->t_mountp, dfp, li); dfp->dfp_count++; } diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 92bac3373f1f..f5462fd582d5 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -64,7 +64,7 @@ xfs_ascii_ci_hashname( int i; for (i = 0, hash = 0; i < name->len; i++) - hash = tolower(name->name[i]) ^ rol32(hash, 7); + hash = xfs_ascii_ci_xfrm(name->name[i]) ^ rol32(hash, 7); return hash; } @@ -85,7 +85,8 @@ xfs_ascii_ci_compname( for (i = 0; i < len; i++) { if (args->name[i] == name[i]) continue; - if (tolower(args->name[i]) != tolower(name[i])) + if (xfs_ascii_ci_xfrm(args->name[i]) != + xfs_ascii_ci_xfrm(name[i])) return XFS_CMP_DIFFERENT; result = XFS_CMP_CASE; } diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index dd39f17dd9a9..19af22a16c41 100644 --- a/fs/xfs/libxfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h @@ -248,4 +248,35 @@ unsigned int xfs_dir3_data_end_offset(struct xfs_da_geometry *geo, struct xfs_dir2_data_hdr *hdr); bool xfs_dir2_namecheck(const void *name, size_t length); +/* + * The "ascii-ci" feature was created to speed up case-insensitive lookups for + * a Samba product. Because of the inherent problems with CI and UTF-8 + * encoding, etc, it was decided that Samba would be configured to export + * latin1/iso 8859-1 encodings as that covered >90% of the target markets for + * the product. Hence the "ascii-ci" casefolding code could be encoded into + * the XFS directory operations and remove all the overhead of casefolding from + * Samba. + * + * To provide consistent hashing behavior between the userspace and kernel, + * these functions prepare names for hashing by transforming specific bytes + * to other bytes. Robustness with other encodings is not guaranteed. + */ +static inline bool xfs_ascii_ci_need_xfrm(unsigned char c) +{ + if (c >= 0x41 && c <= 0x5a) /* A-Z */ + return true; + if (c >= 0xc0 && c <= 0xd6) /* latin A-O with accents */ + return true; + if (c >= 0xd8 && c <= 0xde) /* latin O-Y with accents */ + return true; + return false; +} + +static inline unsigned char xfs_ascii_ci_xfrm(unsigned char c) +{ + if (xfs_ascii_ci_need_xfrm(c)) + c -= 'A' - 'a'; + return c; +} + #endif /* __XFS_DIR2_H__ */ diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 7ee292aecbeb..a16d5de16933 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -95,33 +95,25 @@ xfs_inobt_btrec_to_irec( irec->ir_free = be64_to_cpu(rec->inobt.ir_free); } -/* - * Get the data from the pointed-to record. - */ -int -xfs_inobt_get_rec( - struct xfs_btree_cur *cur, - struct xfs_inobt_rec_incore *irec, - int *stat) +/* Simple checks for inode records. */ +xfs_failaddr_t +xfs_inobt_check_irec( + struct xfs_btree_cur *cur, + const struct xfs_inobt_rec_incore *irec) { - struct xfs_mount *mp = cur->bc_mp; - union xfs_btree_rec *rec; - int error; uint64_t realfree; - error = xfs_btree_get_rec(cur, &rec, stat); - if (error || *stat == 0) - return error; - - xfs_inobt_btrec_to_irec(mp, rec, irec); - + /* Record has to be properly aligned within the AG. */ if (!xfs_verify_agino(cur->bc_ag.pag, irec->ir_startino)) - goto out_bad_rec; + return __this_address; + if (!xfs_verify_agino(cur->bc_ag.pag, + irec->ir_startino + XFS_INODES_PER_CHUNK - 1)) + return __this_address; if (irec->ir_count < XFS_INODES_PER_HOLEMASK_BIT || irec->ir_count > XFS_INODES_PER_CHUNK) - goto out_bad_rec; + return __this_address; if (irec->ir_freecount > XFS_INODES_PER_CHUNK) - goto out_bad_rec; + return __this_address; /* if there are no holes, return the first available offset */ if (!xfs_inobt_issparse(irec->ir_holemask)) @@ -129,15 +121,23 @@ xfs_inobt_get_rec( else realfree = irec->ir_free & xfs_inobt_irec_to_allocmask(irec); if (hweight64(realfree) != irec->ir_freecount) - goto out_bad_rec; + return __this_address; - return 0; + return NULL; +} + +static inline int +xfs_inobt_complain_bad_rec( + struct xfs_btree_cur *cur, + xfs_failaddr_t fa, + const struct xfs_inobt_rec_incore *irec) +{ + struct xfs_mount *mp = cur->bc_mp; -out_bad_rec: xfs_warn(mp, - "%s Inode BTree record corruption in AG %d detected!", + "%s Inode BTree record corruption in AG %d detected at %pS!", cur->bc_btnum == XFS_BTNUM_INO ? "Used" : "Free", - cur->bc_ag.pag->pag_agno); + cur->bc_ag.pag->pag_agno, fa); xfs_warn(mp, "start inode 0x%x, count 0x%x, free 0x%x freemask 0x%llx, holemask 0x%x", irec->ir_startino, irec->ir_count, irec->ir_freecount, @@ -146,6 +146,32 @@ out_bad_rec: } /* + * Get the data from the pointed-to record. + */ +int +xfs_inobt_get_rec( + struct xfs_btree_cur *cur, + struct xfs_inobt_rec_incore *irec, + int *stat) +{ + struct xfs_mount *mp = cur->bc_mp; + union xfs_btree_rec *rec; + xfs_failaddr_t fa; + int error; + + error = xfs_btree_get_rec(cur, &rec, stat); + if (error || *stat == 0) + return error; + + xfs_inobt_btrec_to_irec(mp, rec, irec); + fa = xfs_inobt_check_irec(cur, irec); + if (fa) + return xfs_inobt_complain_bad_rec(cur, fa, irec); + + return 0; +} + +/* * Insert a single inobt record. Cursor must already point to desired location. */ int @@ -1952,8 +1978,6 @@ xfs_difree_inobt( */ if (!xfs_has_ikeep(mp) && rec.ir_free == XFS_INOBT_ALL_FREE && mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) { - struct xfs_perag *pag = agbp->b_pag; - xic->deleted = true; xic->first_ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino); @@ -2617,44 +2641,50 @@ xfs_ialloc_read_agi( return 0; } -/* Is there an inode record covering a given range of inode numbers? */ -int -xfs_ialloc_has_inode_record( - struct xfs_btree_cur *cur, - xfs_agino_t low, - xfs_agino_t high, - bool *exists) +/* How many inodes are backed by inode clusters ondisk? */ +STATIC int +xfs_ialloc_count_ondisk( + struct xfs_btree_cur *cur, + xfs_agino_t low, + xfs_agino_t high, + unsigned int *allocated) { struct xfs_inobt_rec_incore irec; - xfs_agino_t agino; - uint16_t holemask; - int has_record; - int i; - int error; + unsigned int ret = 0; + int has_record; + int error; - *exists = false; error = xfs_inobt_lookup(cur, low, XFS_LOOKUP_LE, &has_record); - while (error == 0 && has_record) { + if (error) + return error; + + while (has_record) { + unsigned int i, hole_idx; + error = xfs_inobt_get_rec(cur, &irec, &has_record); - if (error || irec.ir_startino > high) + if (error) + return error; + if (irec.ir_startino > high) break; - agino = irec.ir_startino; - holemask = irec.ir_holemask; - for (i = 0; i < XFS_INOBT_HOLEMASK_BITS; holemask >>= 1, - i++, agino += XFS_INODES_PER_HOLEMASK_BIT) { - if (holemask & 1) + for (i = 0; i < XFS_INODES_PER_CHUNK; i++) { + if (irec.ir_startino + i < low) continue; - if (agino + XFS_INODES_PER_HOLEMASK_BIT > low && - agino <= high) { - *exists = true; - return 0; - } + if (irec.ir_startino + i > high) + break; + + hole_idx = i / XFS_INODES_PER_HOLEMASK_BIT; + if (!(irec.ir_holemask & (1U << hole_idx))) + ret++; } error = xfs_btree_increment(cur, 0, &has_record); + if (error) + return error; } - return error; + + *allocated = ret; + return 0; } /* Is there an inode record covering a given extent? */ @@ -2663,15 +2693,27 @@ xfs_ialloc_has_inodes_at_extent( struct xfs_btree_cur *cur, xfs_agblock_t bno, xfs_extlen_t len, - bool *exists) + enum xbtree_recpacking *outcome) { - xfs_agino_t low; - xfs_agino_t high; + xfs_agino_t agino; + xfs_agino_t last_agino; + unsigned int allocated; + int error; + + agino = XFS_AGB_TO_AGINO(cur->bc_mp, bno); + last_agino = XFS_AGB_TO_AGINO(cur->bc_mp, bno + len) - 1; - low = XFS_AGB_TO_AGINO(cur->bc_mp, bno); - high = XFS_AGB_TO_AGINO(cur->bc_mp, bno + len) - 1; + error = xfs_ialloc_count_ondisk(cur, agino, last_agino, &allocated); + if (error) + return error; - return xfs_ialloc_has_inode_record(cur, low, high, exists); + if (allocated == 0) + *outcome = XBTREE_RECPACKING_EMPTY; + else if (allocated == last_agino - agino + 1) + *outcome = XBTREE_RECPACKING_FULL; + else + *outcome = XBTREE_RECPACKING_SPARSE; + return 0; } struct xfs_ialloc_count_inodes { @@ -2688,8 +2730,13 @@ xfs_ialloc_count_inodes_rec( { struct xfs_inobt_rec_incore irec; struct xfs_ialloc_count_inodes *ci = priv; + xfs_failaddr_t fa; xfs_inobt_btrec_to_irec(cur->bc_mp, rec, &irec); + fa = xfs_inobt_check_irec(cur, &irec); + if (fa) + return xfs_inobt_complain_bad_rec(cur, fa, &irec); + ci->count += irec.ir_count; ci->freecount += irec.ir_freecount; diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h index ab8c30b4ec22..fe824bb04a09 100644 --- a/fs/xfs/libxfs/xfs_ialloc.h +++ b/fs/xfs/libxfs/xfs_ialloc.h @@ -93,10 +93,11 @@ union xfs_btree_rec; void xfs_inobt_btrec_to_irec(struct xfs_mount *mp, const union xfs_btree_rec *rec, struct xfs_inobt_rec_incore *irec); +xfs_failaddr_t xfs_inobt_check_irec(struct xfs_btree_cur *cur, + const struct xfs_inobt_rec_incore *irec); int xfs_ialloc_has_inodes_at_extent(struct xfs_btree_cur *cur, - xfs_agblock_t bno, xfs_extlen_t len, bool *exists); -int xfs_ialloc_has_inode_record(struct xfs_btree_cur *cur, xfs_agino_t low, - xfs_agino_t high, bool *exists); + xfs_agblock_t bno, xfs_extlen_t len, + enum xbtree_recpacking *outcome); int xfs_ialloc_count_inodes(struct xfs_btree_cur *cur, xfs_agino_t *count, xfs_agino_t *freecount); int xfs_inobt_insert_rec(struct xfs_btree_cur *cur, uint16_t holemask, diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 9b28211d5a4c..5a945ae21b5d 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -156,9 +156,12 @@ __xfs_inobt_free_block( struct xfs_buf *bp, enum xfs_ag_resv_type resv) { + xfs_fsblock_t fsbno; + xfs_inobt_mod_blockcount(cur, -1); - return xfs_free_extent(cur->bc_tp, - XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)), 1, + fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)); + return xfs_free_extent(cur->bc_tp, cur->bc_ag.pag, + XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1, &XFS_RMAP_OINFO_INOBT, resv); } @@ -266,10 +269,13 @@ STATIC int64_t xfs_inobt_diff_two_keys( struct xfs_btree_cur *cur, const union xfs_btree_key *k1, - const union xfs_btree_key *k2) + const union xfs_btree_key *k2, + const union xfs_btree_key *mask) { + ASSERT(!mask || mask->inobt.ir_startino); + return (int64_t)be32_to_cpu(k1->inobt.ir_startino) - - be32_to_cpu(k2->inobt.ir_startino); + be32_to_cpu(k2->inobt.ir_startino); } static xfs_failaddr_t @@ -380,6 +386,19 @@ xfs_inobt_recs_inorder( be32_to_cpu(r2->inobt.ir_startino); } +STATIC enum xbtree_key_contig +xfs_inobt_keys_contiguous( + struct xfs_btree_cur *cur, + const union xfs_btree_key *key1, + const union xfs_btree_key *key2, + const union xfs_btree_key *mask) +{ + ASSERT(!mask || mask->inobt.ir_startino); + + return xbtree_key_contig(be32_to_cpu(key1->inobt.ir_startino), + be32_to_cpu(key2->inobt.ir_startino)); +} + static const struct xfs_btree_ops xfs_inobt_ops = { .rec_len = sizeof(xfs_inobt_rec_t), .key_len = sizeof(xfs_inobt_key_t), @@ -399,6 +418,7 @@ static const struct xfs_btree_ops xfs_inobt_ops = { .diff_two_keys = xfs_inobt_diff_two_keys, .keys_inorder = xfs_inobt_keys_inorder, .recs_inorder = xfs_inobt_recs_inorder, + .keys_contiguous = xfs_inobt_keys_contiguous, }; static const struct xfs_btree_ops xfs_finobt_ops = { @@ -420,6 +440,7 @@ static const struct xfs_btree_ops xfs_finobt_ops = { .diff_two_keys = xfs_inobt_diff_two_keys, .keys_inorder = xfs_inobt_keys_inorder, .recs_inorder = xfs_inobt_recs_inorder, + .keys_contiguous = xfs_inobt_keys_contiguous, }; /* @@ -447,9 +468,7 @@ xfs_inobt_init_common( if (xfs_has_crc(mp)) cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; - /* take a reference for the cursor */ - atomic_inc(&pag->pag_ref); - cur->bc_ag.pag = pag; + cur->bc_ag.pag = xfs_perag_hold(pag); return cur; } @@ -607,7 +626,7 @@ xfs_iallocbt_maxlevels_ondisk(void) */ uint64_t xfs_inobt_irec_to_allocmask( - struct xfs_inobt_rec_incore *rec) + const struct xfs_inobt_rec_incore *rec) { uint64_t bitmap = 0; uint64_t inodespbit; diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h index e859a6e05230..3262c3fe5ebe 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.h +++ b/fs/xfs/libxfs/xfs_ialloc_btree.h @@ -53,7 +53,7 @@ struct xfs_btree_cur *xfs_inobt_stage_cursor(struct xfs_perag *pag, extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int); /* ir_holemask to inode allocation bitmap conversion */ -uint64_t xfs_inobt_irec_to_allocmask(struct xfs_inobt_rec_incore *); +uint64_t xfs_inobt_irec_to_allocmask(const struct xfs_inobt_rec_incore *irec); #if defined(DEBUG) || defined(XFS_WARN) int xfs_inobt_rec_check_count(struct xfs_mount *, diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 6b21760184d9..5a2e7ddfa76d 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -140,7 +140,8 @@ xfs_iformat_extents( xfs_inode_verifier_error(ip, -EFSCORRUPTED, "xfs_iformat_extents(2)", dp, sizeof(*dp), fa); - return -EFSCORRUPTED; + return xfs_bmap_complain_bad_rec(ip, whichfork, + fa, &new); } xfs_iext_insert(ip, &icur, &new, state); @@ -226,10 +227,15 @@ xfs_iformat_data_fork( /* * Initialize the extent count early, as the per-format routines may - * depend on it. + * depend on it. Use release semantics to set needextents /after/ we + * set the format. This ensures that we can use acquire semantics on + * needextents in xfs_need_iread_extents() and be guaranteed to see a + * valid format value after that load. */ ip->i_df.if_format = dip->di_format; ip->i_df.if_nextents = xfs_dfork_data_extents(dip); + smp_store_release(&ip->i_df.if_needextents, + ip->i_df.if_format == XFS_DINODE_FMT_BTREE ? 1 : 0); switch (inode->i_mode & S_IFMT) { case S_IFIFO: @@ -282,8 +288,17 @@ xfs_ifork_init_attr( enum xfs_dinode_fmt format, xfs_extnum_t nextents) { + /* + * Initialize the extent count early, as the per-format routines may + * depend on it. Use release semantics to set needextents /after/ we + * set the format. This ensures that we can use acquire semantics on + * needextents in xfs_need_iread_extents() and be guaranteed to see a + * valid format value after that load. + */ ip->i_af.if_format = format; ip->i_af.if_nextents = nextents; + smp_store_release(&ip->i_af.if_needextents, + ip->i_af.if_format == XFS_DINODE_FMT_BTREE ? 1 : 0); } void diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index d3943d6ad0b9..96d307784c85 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -24,6 +24,7 @@ struct xfs_ifork { xfs_extnum_t if_nextents; /* # of extents in this fork */ short if_broot_bytes; /* bytes allocated for root */ int8_t if_format; /* format of this fork */ + uint8_t if_needextents; /* extents have not been read */ }; /* @@ -260,9 +261,10 @@ int xfs_iext_count_upgrade(struct xfs_trans *tp, struct xfs_inode *ip, uint nr_to_add); /* returns true if the fork has extents but they are not read in yet. */ -static inline bool xfs_need_iread_extents(struct xfs_ifork *ifp) +static inline bool xfs_need_iread_extents(const struct xfs_ifork *ifp) { - return ifp->if_format == XFS_DINODE_FMT_BTREE && ifp->if_height == 0; + /* see xfs_iformat_{data,attr}_fork() for needextents semantics */ + return smp_load_acquire(&ifp->if_needextents) != 0; } #endif /* __XFS_INODE_FORK_H__ */ diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index bcf46aa0d08b..c1c65774dcc2 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -120,45 +120,41 @@ xfs_refcount_btrec_to_irec( irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount); } -/* - * Get the data from the pointed-to record. - */ -int -xfs_refcount_get_rec( +/* Simple checks for refcount records. */ +xfs_failaddr_t +xfs_refcount_check_irec( struct xfs_btree_cur *cur, - struct xfs_refcount_irec *irec, - int *stat) + const struct xfs_refcount_irec *irec) { - struct xfs_mount *mp = cur->bc_mp; struct xfs_perag *pag = cur->bc_ag.pag; - union xfs_btree_rec *rec; - int error; - - error = xfs_btree_get_rec(cur, &rec, stat); - if (error || !*stat) - return error; - xfs_refcount_btrec_to_irec(rec, irec); if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN) - goto out_bad_rec; + return __this_address; if (!xfs_refcount_check_domain(irec)) - goto out_bad_rec; + return __this_address; /* check for valid extent range, including overflow */ if (!xfs_verify_agbext(pag, irec->rc_startblock, irec->rc_blockcount)) - goto out_bad_rec; + return __this_address; if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT) - goto out_bad_rec; + return __this_address; - trace_xfs_refcount_get(cur->bc_mp, pag->pag_agno, irec); - return 0; + return NULL; +} + +static inline int +xfs_refcount_complain_bad_rec( + struct xfs_btree_cur *cur, + xfs_failaddr_t fa, + const struct xfs_refcount_irec *irec) +{ + struct xfs_mount *mp = cur->bc_mp; -out_bad_rec: xfs_warn(mp, - "Refcount BTree record corruption in AG %d detected!", - pag->pag_agno); + "Refcount BTree record corruption in AG %d detected at %pS!", + cur->bc_ag.pag->pag_agno, fa); xfs_warn(mp, "Start block 0x%x, block count 0x%x, references 0x%x", irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount); @@ -166,6 +162,32 @@ out_bad_rec: } /* + * Get the data from the pointed-to record. + */ +int +xfs_refcount_get_rec( + struct xfs_btree_cur *cur, + struct xfs_refcount_irec *irec, + int *stat) +{ + union xfs_btree_rec *rec; + xfs_failaddr_t fa; + int error; + + error = xfs_btree_get_rec(cur, &rec, stat); + if (error || !*stat) + return error; + + xfs_refcount_btrec_to_irec(rec, irec); + fa = xfs_refcount_check_irec(cur, irec); + if (fa) + return xfs_refcount_complain_bad_rec(cur, fa, irec); + + trace_xfs_refcount_get(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); + return 0; +} + +/* * Update the record referred to by cur to the value given * by [bno, len, refcount]. * This either works (return 0) or gets an EFSCORRUPTED error. @@ -1332,26 +1354,22 @@ xfs_refcount_finish_one( xfs_agblock_t bno; unsigned long nr_ops = 0; int shape_changes = 0; - struct xfs_perag *pag; - pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ri->ri_startblock)); bno = XFS_FSB_TO_AGBNO(mp, ri->ri_startblock); trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, ri->ri_startblock), ri->ri_type, XFS_FSB_TO_AGBNO(mp, ri->ri_startblock), ri->ri_blockcount); - if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE)) { - error = -EIO; - goto out_drop; - } + if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE)) + return -EIO; /* * If we haven't gotten a cursor or the cursor AG doesn't match * the startblock, get one now. */ rcur = *pcur; - if (rcur != NULL && rcur->bc_ag.pag != pag) { + if (rcur != NULL && rcur->bc_ag.pag != ri->ri_pag) { nr_ops = rcur->bc_ag.refc.nr_ops; shape_changes = rcur->bc_ag.refc.shape_changes; xfs_refcount_finish_one_cleanup(tp, rcur, 0); @@ -1359,12 +1377,12 @@ xfs_refcount_finish_one( *pcur = NULL; } if (rcur == NULL) { - error = xfs_alloc_read_agf(pag, tp, XFS_ALLOC_FLAG_FREEING, - &agbp); + error = xfs_alloc_read_agf(ri->ri_pag, tp, + XFS_ALLOC_FLAG_FREEING, &agbp); if (error) - goto out_drop; + return error; - rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag); + rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, ri->ri_pag); rcur->bc_ag.refc.nr_ops = nr_ops; rcur->bc_ag.refc.shape_changes = shape_changes; } @@ -1375,7 +1393,7 @@ xfs_refcount_finish_one( error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount, XFS_REFCOUNT_ADJUST_INCREASE); if (error) - goto out_drop; + return error; if (ri->ri_blockcount > 0) error = xfs_refcount_continue_op(rcur, ri, bno); break; @@ -1383,31 +1401,29 @@ xfs_refcount_finish_one( error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount, XFS_REFCOUNT_ADJUST_DECREASE); if (error) - goto out_drop; + return error; if (ri->ri_blockcount > 0) error = xfs_refcount_continue_op(rcur, ri, bno); break; case XFS_REFCOUNT_ALLOC_COW: error = __xfs_refcount_cow_alloc(rcur, bno, ri->ri_blockcount); if (error) - goto out_drop; + return error; ri->ri_blockcount = 0; break; case XFS_REFCOUNT_FREE_COW: error = __xfs_refcount_cow_free(rcur, bno, ri->ri_blockcount); if (error) - goto out_drop; + return error; ri->ri_blockcount = 0; break; default: ASSERT(0); - error = -EFSCORRUPTED; + return -EFSCORRUPTED; } if (!error && ri->ri_blockcount > 0) - trace_xfs_refcount_finish_one_leftover(mp, pag->pag_agno, + trace_xfs_refcount_finish_one_leftover(mp, ri->ri_pag->pag_agno, ri->ri_type, bno, ri->ri_blockcount); -out_drop: - xfs_perag_put(pag); return error; } @@ -1435,6 +1451,7 @@ __xfs_refcount_add( ri->ri_startblock = startblock; ri->ri_blockcount = blockcount; + xfs_refcount_update_get_group(tp->t_mountp, ri); xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list); } @@ -1876,7 +1893,8 @@ xfs_refcount_recover_extent( INIT_LIST_HEAD(&rr->rr_list); xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); - if (XFS_IS_CORRUPT(cur->bc_mp, + if (xfs_refcount_check_irec(cur, &rr->rr_rrec) != NULL || + XFS_IS_CORRUPT(cur->bc_mp, rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) { kfree(rr); return -EFSCORRUPTED; @@ -1980,14 +1998,17 @@ out_free: return error; } -/* Is there a record covering a given extent? */ +/* + * Scan part of the keyspace of the refcount records and tell us if the area + * has no records, is fully mapped by records, or is partially filled. + */ int -xfs_refcount_has_record( +xfs_refcount_has_records( struct xfs_btree_cur *cur, enum xfs_refc_domain domain, xfs_agblock_t bno, xfs_extlen_t len, - bool *exists) + enum xbtree_recpacking *outcome) { union xfs_btree_irec low; union xfs_btree_irec high; @@ -1998,7 +2019,7 @@ xfs_refcount_has_record( high.rc.rc_startblock = bno + len - 1; low.rc.rc_domain = high.rc.rc_domain = domain; - return xfs_btree_has_record(cur, &low, &high, exists); + return xfs_btree_has_records(cur, &low, &high, NULL, outcome); } int __init diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index c633477ce3ce..783cd89ca195 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -50,6 +50,7 @@ enum xfs_refcount_intent_type { struct xfs_refcount_intent { struct list_head ri_list; + struct xfs_perag *ri_pag; enum xfs_refcount_intent_type ri_type; xfs_extlen_t ri_blockcount; xfs_fsblock_t ri_startblock; @@ -67,6 +68,9 @@ xfs_refcount_check_domain( return true; } +void xfs_refcount_update_get_group(struct xfs_mount *mp, + struct xfs_refcount_intent *ri); + void xfs_refcount_increase_extent(struct xfs_trans *tp, struct xfs_bmbt_irec *irec); void xfs_refcount_decrease_extent(struct xfs_trans *tp, @@ -107,12 +111,14 @@ extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp, */ #define XFS_REFCOUNT_ITEM_OVERHEAD 32 -extern int xfs_refcount_has_record(struct xfs_btree_cur *cur, +extern int xfs_refcount_has_records(struct xfs_btree_cur *cur, enum xfs_refc_domain domain, xfs_agblock_t bno, - xfs_extlen_t len, bool *exists); + xfs_extlen_t len, enum xbtree_recpacking *outcome); union xfs_btree_rec; extern void xfs_refcount_btrec_to_irec(const union xfs_btree_rec *rec, struct xfs_refcount_irec *irec); +xfs_failaddr_t xfs_refcount_check_irec(struct xfs_btree_cur *cur, + const struct xfs_refcount_irec *irec); extern int xfs_refcount_insert(struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec, int *stat); diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index f3b860970b26..d4afc5f4e6a5 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -112,8 +112,9 @@ xfs_refcountbt_free_block( XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1); be32_add_cpu(&agf->agf_refcount_blocks, -1); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS); - error = xfs_free_extent(cur->bc_tp, fsbno, 1, &XFS_RMAP_OINFO_REFC, - XFS_AG_RESV_METADATA); + error = xfs_free_extent(cur->bc_tp, cur->bc_ag.pag, + XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1, + &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA); if (error) return error; @@ -201,10 +202,13 @@ STATIC int64_t xfs_refcountbt_diff_two_keys( struct xfs_btree_cur *cur, const union xfs_btree_key *k1, - const union xfs_btree_key *k2) + const union xfs_btree_key *k2, + const union xfs_btree_key *mask) { + ASSERT(!mask || mask->refc.rc_startblock); + return (int64_t)be32_to_cpu(k1->refc.rc_startblock) - - be32_to_cpu(k2->refc.rc_startblock); + be32_to_cpu(k2->refc.rc_startblock); } STATIC xfs_failaddr_t @@ -299,6 +303,19 @@ xfs_refcountbt_recs_inorder( be32_to_cpu(r2->refc.rc_startblock); } +STATIC enum xbtree_key_contig +xfs_refcountbt_keys_contiguous( + struct xfs_btree_cur *cur, + const union xfs_btree_key *key1, + const union xfs_btree_key *key2, + const union xfs_btree_key *mask) +{ + ASSERT(!mask || mask->refc.rc_startblock); + + return xbtree_key_contig(be32_to_cpu(key1->refc.rc_startblock), + be32_to_cpu(key2->refc.rc_startblock)); +} + static const struct xfs_btree_ops xfs_refcountbt_ops = { .rec_len = sizeof(struct xfs_refcount_rec), .key_len = sizeof(struct xfs_refcount_key), @@ -318,6 +335,7 @@ static const struct xfs_btree_ops xfs_refcountbt_ops = { .diff_two_keys = xfs_refcountbt_diff_two_keys, .keys_inorder = xfs_refcountbt_keys_inorder, .recs_inorder = xfs_refcountbt_recs_inorder, + .keys_contiguous = xfs_refcountbt_keys_contiguous, }; /* @@ -339,10 +357,7 @@ xfs_refcountbt_init_common( cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; - /* take a reference for the cursor */ - atomic_inc(&pag->pag_ref); - cur->bc_ag.pag = pag; - + cur->bc_ag.pag = xfs_perag_hold(pag); cur->bc_ag.refc.nr_ops = 0; cur->bc_ag.refc.shape_changes = 0; cur->bc_ops = &xfs_refcountbt_ops; diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index df720041cd3d..f4dc23b3b837 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -193,7 +193,7 @@ done: } /* Convert an internal btree record to an rmap record. */ -int +xfs_failaddr_t xfs_rmap_btrec_to_irec( const union xfs_btree_rec *rec, struct xfs_rmap_irec *irec) @@ -205,51 +205,74 @@ xfs_rmap_btrec_to_irec( irec); } -/* - * Get the data from the pointed-to record. - */ -int -xfs_rmap_get_rec( - struct xfs_btree_cur *cur, - struct xfs_rmap_irec *irec, - int *stat) +/* Simple checks for rmap records. */ +xfs_failaddr_t +xfs_rmap_check_irec( + struct xfs_btree_cur *cur, + const struct xfs_rmap_irec *irec) { - struct xfs_mount *mp = cur->bc_mp; - struct xfs_perag *pag = cur->bc_ag.pag; - union xfs_btree_rec *rec; - int error; - - error = xfs_btree_get_rec(cur, &rec, stat); - if (error || !*stat) - return error; - - if (xfs_rmap_btrec_to_irec(rec, irec)) - goto out_bad_rec; + struct xfs_mount *mp = cur->bc_mp; + bool is_inode; + bool is_unwritten; + bool is_bmbt; + bool is_attr; if (irec->rm_blockcount == 0) - goto out_bad_rec; + return __this_address; if (irec->rm_startblock <= XFS_AGFL_BLOCK(mp)) { if (irec->rm_owner != XFS_RMAP_OWN_FS) - goto out_bad_rec; + return __this_address; if (irec->rm_blockcount != XFS_AGFL_BLOCK(mp) + 1) - goto out_bad_rec; + return __this_address; } else { /* check for valid extent range, including overflow */ - if (!xfs_verify_agbext(pag, irec->rm_startblock, - irec->rm_blockcount)) - goto out_bad_rec; + if (!xfs_verify_agbext(cur->bc_ag.pag, irec->rm_startblock, + irec->rm_blockcount)) + return __this_address; } if (!(xfs_verify_ino(mp, irec->rm_owner) || (irec->rm_owner <= XFS_RMAP_OWN_FS && irec->rm_owner >= XFS_RMAP_OWN_MIN))) - goto out_bad_rec; + return __this_address; + + /* Check flags. */ + is_inode = !XFS_RMAP_NON_INODE_OWNER(irec->rm_owner); + is_bmbt = irec->rm_flags & XFS_RMAP_BMBT_BLOCK; + is_attr = irec->rm_flags & XFS_RMAP_ATTR_FORK; + is_unwritten = irec->rm_flags & XFS_RMAP_UNWRITTEN; + + if (is_bmbt && irec->rm_offset != 0) + return __this_address; + + if (!is_inode && irec->rm_offset != 0) + return __this_address; + + if (is_unwritten && (is_bmbt || !is_inode || is_attr)) + return __this_address; + + if (!is_inode && (is_bmbt || is_unwritten || is_attr)) + return __this_address; + + /* Check for a valid fork offset, if applicable. */ + if (is_inode && !is_bmbt && + !xfs_verify_fileext(mp, irec->rm_offset, irec->rm_blockcount)) + return __this_address; + + return NULL; +} + +static inline int +xfs_rmap_complain_bad_rec( + struct xfs_btree_cur *cur, + xfs_failaddr_t fa, + const struct xfs_rmap_irec *irec) +{ + struct xfs_mount *mp = cur->bc_mp; - return 0; -out_bad_rec: xfs_warn(mp, - "Reverse Mapping BTree record corruption in AG %d detected!", - pag->pag_agno); + "Reverse Mapping BTree record corruption in AG %d detected at %pS!", + cur->bc_ag.pag->pag_agno, fa); xfs_warn(mp, "Owner 0x%llx, flags 0x%x, start block 0x%x block count 0x%x", irec->rm_owner, irec->rm_flags, irec->rm_startblock, @@ -257,6 +280,32 @@ out_bad_rec: return -EFSCORRUPTED; } +/* + * Get the data from the pointed-to record. + */ +int +xfs_rmap_get_rec( + struct xfs_btree_cur *cur, + struct xfs_rmap_irec *irec, + int *stat) +{ + union xfs_btree_rec *rec; + xfs_failaddr_t fa; + int error; + + error = xfs_btree_get_rec(cur, &rec, stat); + if (error || !*stat) + return error; + + fa = xfs_rmap_btrec_to_irec(rec, irec); + if (!fa) + fa = xfs_rmap_check_irec(cur, irec); + if (fa) + return xfs_rmap_complain_bad_rec(cur, fa, irec); + + return 0; +} + struct xfs_find_left_neighbor_info { struct xfs_rmap_irec high; struct xfs_rmap_irec *irec; @@ -2320,11 +2369,14 @@ xfs_rmap_query_range_helper( { struct xfs_rmap_query_range_info *query = priv; struct xfs_rmap_irec irec; - int error; + xfs_failaddr_t fa; + + fa = xfs_rmap_btrec_to_irec(rec, &irec); + if (!fa) + fa = xfs_rmap_check_irec(cur, &irec); + if (fa) + return xfs_rmap_complain_bad_rec(cur, fa, &irec); - error = xfs_rmap_btrec_to_irec(rec, &irec); - if (error) - return error; return query->fn(cur, &irec, query->priv); } @@ -2394,7 +2446,6 @@ xfs_rmap_finish_one( struct xfs_btree_cur **pcur) { struct xfs_mount *mp = tp->t_mountp; - struct xfs_perag *pag; struct xfs_btree_cur *rcur; struct xfs_buf *agbp = NULL; int error = 0; @@ -2402,26 +2453,22 @@ xfs_rmap_finish_one( xfs_agblock_t bno; bool unwritten; - pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ri->ri_bmap.br_startblock)); bno = XFS_FSB_TO_AGBNO(mp, ri->ri_bmap.br_startblock); - trace_xfs_rmap_deferred(mp, pag->pag_agno, ri->ri_type, bno, + trace_xfs_rmap_deferred(mp, ri->ri_pag->pag_agno, ri->ri_type, bno, ri->ri_owner, ri->ri_whichfork, ri->ri_bmap.br_startoff, ri->ri_bmap.br_blockcount, ri->ri_bmap.br_state); - if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_RMAP_FINISH_ONE)) { - error = -EIO; - goto out_drop; - } - + if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_RMAP_FINISH_ONE)) + return -EIO; /* * If we haven't gotten a cursor or the cursor AG doesn't match * the startblock, get one now. */ rcur = *pcur; - if (rcur != NULL && rcur->bc_ag.pag != pag) { + if (rcur != NULL && rcur->bc_ag.pag != ri->ri_pag) { xfs_rmap_finish_one_cleanup(tp, rcur, 0); rcur = NULL; *pcur = NULL; @@ -2432,15 +2479,13 @@ xfs_rmap_finish_one( * rmapbt, because a shape change could cause us to * allocate blocks. */ - error = xfs_free_extent_fix_freelist(tp, pag, &agbp); + error = xfs_free_extent_fix_freelist(tp, ri->ri_pag, &agbp); if (error) - goto out_drop; - if (XFS_IS_CORRUPT(tp->t_mountp, !agbp)) { - error = -EFSCORRUPTED; - goto out_drop; - } + return error; + if (XFS_IS_CORRUPT(tp->t_mountp, !agbp)) + return -EFSCORRUPTED; - rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag); + rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, ri->ri_pag); } *pcur = rcur; @@ -2480,8 +2525,7 @@ xfs_rmap_finish_one( ASSERT(0); error = -EFSCORRUPTED; } -out_drop: - xfs_perag_put(pag); + return error; } @@ -2526,6 +2570,7 @@ __xfs_rmap_add( ri->ri_whichfork = whichfork; ri->ri_bmap = *bmap; + xfs_rmap_update_get_group(tp->t_mountp, ri); xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_RMAP, &ri->ri_list); } @@ -2664,14 +2709,21 @@ xfs_rmap_compare( return 0; } -/* Is there a record covering a given extent? */ +/* + * Scan the physical storage part of the keyspace of the reverse mapping index + * and tell us if the area has no records, is fully mapped by records, or is + * partially filled. + */ int -xfs_rmap_has_record( +xfs_rmap_has_records( struct xfs_btree_cur *cur, xfs_agblock_t bno, xfs_extlen_t len, - bool *exists) + enum xbtree_recpacking *outcome) { + union xfs_btree_key mask = { + .rmap.rm_startblock = cpu_to_be32(-1U), + }; union xfs_btree_irec low; union xfs_btree_irec high; @@ -2680,68 +2732,144 @@ xfs_rmap_has_record( memset(&high, 0xFF, sizeof(high)); high.r.rm_startblock = bno + len - 1; - return xfs_btree_has_record(cur, &low, &high, exists); + return xfs_btree_has_records(cur, &low, &high, &mask, outcome); } -/* - * Is there a record for this owner completely covering a given physical - * extent? If so, *has_rmap will be set to true. If there is no record - * or the record only covers part of the range, we set *has_rmap to false. - * This function doesn't perform range lookups or offset checks, so it is - * not suitable for checking data fork blocks. - */ -int -xfs_rmap_record_exists( - struct xfs_btree_cur *cur, +struct xfs_rmap_ownercount { + /* Owner that we're looking for. */ + struct xfs_rmap_irec good; + + /* rmap search keys */ + struct xfs_rmap_irec low; + struct xfs_rmap_irec high; + + struct xfs_rmap_matches *results; + + /* Stop early if we find a nonmatch? */ + bool stop_on_nonmatch; +}; + +/* Does this rmap represent space that can have multiple owners? */ +static inline bool +xfs_rmap_shareable( + struct xfs_mount *mp, + const struct xfs_rmap_irec *rmap) +{ + if (!xfs_has_reflink(mp)) + return false; + if (XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner)) + return false; + if (rmap->rm_flags & (XFS_RMAP_ATTR_FORK | + XFS_RMAP_BMBT_BLOCK)) + return false; + return true; +} + +static inline void +xfs_rmap_ownercount_init( + struct xfs_rmap_ownercount *roc, xfs_agblock_t bno, xfs_extlen_t len, const struct xfs_owner_info *oinfo, - bool *has_rmap) + struct xfs_rmap_matches *results) { - uint64_t owner; - uint64_t offset; - unsigned int flags; - int has_record; - struct xfs_rmap_irec irec; - int error; + memset(roc, 0, sizeof(*roc)); + roc->results = results; + + roc->low.rm_startblock = bno; + memset(&roc->high, 0xFF, sizeof(roc->high)); + roc->high.rm_startblock = bno + len - 1; + + memset(results, 0, sizeof(*results)); + roc->good.rm_startblock = bno; + roc->good.rm_blockcount = len; + roc->good.rm_owner = oinfo->oi_owner; + roc->good.rm_offset = oinfo->oi_offset; + if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK) + roc->good.rm_flags |= XFS_RMAP_ATTR_FORK; + if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK) + roc->good.rm_flags |= XFS_RMAP_BMBT_BLOCK; +} - xfs_owner_info_unpack(oinfo, &owner, &offset, &flags); - ASSERT(XFS_RMAP_NON_INODE_OWNER(owner) || - (flags & XFS_RMAP_BMBT_BLOCK)); +/* Figure out if this is a match for the owner. */ +STATIC int +xfs_rmap_count_owners_helper( + struct xfs_btree_cur *cur, + const struct xfs_rmap_irec *rec, + void *priv) +{ + struct xfs_rmap_ownercount *roc = priv; + struct xfs_rmap_irec check = *rec; + unsigned int keyflags; + bool filedata; + int64_t delta; + + filedata = !XFS_RMAP_NON_INODE_OWNER(check.rm_owner) && + !(check.rm_flags & XFS_RMAP_BMBT_BLOCK); + + /* Trim the part of check that comes before the comparison range. */ + delta = (int64_t)roc->good.rm_startblock - check.rm_startblock; + if (delta > 0) { + check.rm_startblock += delta; + check.rm_blockcount -= delta; + if (filedata) + check.rm_offset += delta; + } - error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, &irec, - &has_record); - if (error) - return error; - if (!has_record) { - *has_rmap = false; - return 0; + /* Trim the part of check that comes after the comparison range. */ + delta = (check.rm_startblock + check.rm_blockcount) - + (roc->good.rm_startblock + roc->good.rm_blockcount); + if (delta > 0) + check.rm_blockcount -= delta; + + /* Don't care about unwritten status for establishing ownership. */ + keyflags = check.rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK); + + if (check.rm_startblock == roc->good.rm_startblock && + check.rm_blockcount == roc->good.rm_blockcount && + check.rm_owner == roc->good.rm_owner && + check.rm_offset == roc->good.rm_offset && + keyflags == roc->good.rm_flags) { + roc->results->matches++; + } else { + roc->results->non_owner_matches++; + if (xfs_rmap_shareable(cur->bc_mp, &roc->good) ^ + xfs_rmap_shareable(cur->bc_mp, &check)) + roc->results->bad_non_owner_matches++; } - *has_rmap = (irec.rm_owner == owner && irec.rm_startblock <= bno && - irec.rm_startblock + irec.rm_blockcount >= bno + len); + if (roc->results->non_owner_matches && roc->stop_on_nonmatch) + return -ECANCELED; + return 0; } -struct xfs_rmap_key_state { - uint64_t owner; - uint64_t offset; - unsigned int flags; -}; - -/* For each rmap given, figure out if it doesn't match the key we want. */ -STATIC int -xfs_rmap_has_other_keys_helper( +/* Count the number of owners and non-owners of this range of blocks. */ +int +xfs_rmap_count_owners( struct xfs_btree_cur *cur, - const struct xfs_rmap_irec *rec, - void *priv) + xfs_agblock_t bno, + xfs_extlen_t len, + const struct xfs_owner_info *oinfo, + struct xfs_rmap_matches *results) { - struct xfs_rmap_key_state *rks = priv; + struct xfs_rmap_ownercount roc; + int error; - if (rks->owner == rec->rm_owner && rks->offset == rec->rm_offset && - ((rks->flags & rec->rm_flags) & XFS_RMAP_KEY_FLAGS) == rks->flags) - return 0; - return -ECANCELED; + xfs_rmap_ownercount_init(&roc, bno, len, oinfo, results); + error = xfs_rmap_query_range(cur, &roc.low, &roc.high, + xfs_rmap_count_owners_helper, &roc); + if (error) + return error; + + /* + * There can't be any non-owner rmaps that conflict with the given + * owner if we didn't find any rmaps matching the owner. + */ + if (!results->matches) + results->bad_non_owner_matches = 0; + + return 0; } /* @@ -2754,28 +2882,26 @@ xfs_rmap_has_other_keys( xfs_agblock_t bno, xfs_extlen_t len, const struct xfs_owner_info *oinfo, - bool *has_rmap) + bool *has_other) { - struct xfs_rmap_irec low = {0}; - struct xfs_rmap_irec high; - struct xfs_rmap_key_state rks; + struct xfs_rmap_matches res; + struct xfs_rmap_ownercount roc; int error; - xfs_owner_info_unpack(oinfo, &rks.owner, &rks.offset, &rks.flags); - *has_rmap = false; - - low.rm_startblock = bno; - memset(&high, 0xFF, sizeof(high)); - high.rm_startblock = bno + len - 1; + xfs_rmap_ownercount_init(&roc, bno, len, oinfo, &res); + roc.stop_on_nonmatch = true; - error = xfs_rmap_query_range(cur, &low, &high, - xfs_rmap_has_other_keys_helper, &rks); + error = xfs_rmap_query_range(cur, &roc.low, &roc.high, + xfs_rmap_count_owners_helper, &roc); if (error == -ECANCELED) { - *has_rmap = true; + *has_other = true; return 0; } + if (error) + return error; - return error; + *has_other = false; + return 0; } const struct xfs_owner_info XFS_RMAP_OINFO_SKIP_UPDATE = { diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h index 2dac88cea28d..3c98d9d50afb 100644 --- a/fs/xfs/libxfs/xfs_rmap.h +++ b/fs/xfs/libxfs/xfs_rmap.h @@ -62,13 +62,14 @@ xfs_rmap_irec_offset_pack( return x; } -static inline int +static inline xfs_failaddr_t xfs_rmap_irec_offset_unpack( __u64 offset, struct xfs_rmap_irec *irec) { if (offset & ~(XFS_RMAP_OFF_MASK | XFS_RMAP_OFF_FLAGS)) - return -EFSCORRUPTED; + return __this_address; + irec->rm_offset = XFS_RMAP_OFF(offset); irec->rm_flags = 0; if (offset & XFS_RMAP_OFF_ATTR_FORK) @@ -77,7 +78,7 @@ xfs_rmap_irec_offset_unpack( irec->rm_flags |= XFS_RMAP_BMBT_BLOCK; if (offset & XFS_RMAP_OFF_UNWRITTEN) irec->rm_flags |= XFS_RMAP_UNWRITTEN; - return 0; + return NULL; } static inline void @@ -162,8 +163,12 @@ struct xfs_rmap_intent { int ri_whichfork; uint64_t ri_owner; struct xfs_bmbt_irec ri_bmap; + struct xfs_perag *ri_pag; }; +void xfs_rmap_update_get_group(struct xfs_mount *mp, + struct xfs_rmap_intent *ri); + /* functions for updating the rmapbt based on bmbt map/unmap operations */ void xfs_rmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, struct xfs_bmbt_irec *imap); @@ -188,16 +193,31 @@ int xfs_rmap_lookup_le_range(struct xfs_btree_cur *cur, xfs_agblock_t bno, int xfs_rmap_compare(const struct xfs_rmap_irec *a, const struct xfs_rmap_irec *b); union xfs_btree_rec; -int xfs_rmap_btrec_to_irec(const union xfs_btree_rec *rec, +xfs_failaddr_t xfs_rmap_btrec_to_irec(const union xfs_btree_rec *rec, struct xfs_rmap_irec *irec); -int xfs_rmap_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno, - xfs_extlen_t len, bool *exists); -int xfs_rmap_record_exists(struct xfs_btree_cur *cur, xfs_agblock_t bno, +xfs_failaddr_t xfs_rmap_check_irec(struct xfs_btree_cur *cur, + const struct xfs_rmap_irec *irec); + +int xfs_rmap_has_records(struct xfs_btree_cur *cur, xfs_agblock_t bno, + xfs_extlen_t len, enum xbtree_recpacking *outcome); + +struct xfs_rmap_matches { + /* Number of owner matches. */ + unsigned long long matches; + + /* Number of non-owner matches. */ + unsigned long long non_owner_matches; + + /* Number of non-owner matches that conflict with the owner matches. */ + unsigned long long bad_non_owner_matches; +}; + +int xfs_rmap_count_owners(struct xfs_btree_cur *cur, xfs_agblock_t bno, xfs_extlen_t len, const struct xfs_owner_info *oinfo, - bool *has_rmap); + struct xfs_rmap_matches *rmatch); int xfs_rmap_has_other_keys(struct xfs_btree_cur *cur, xfs_agblock_t bno, xfs_extlen_t len, const struct xfs_owner_info *oinfo, - bool *has_rmap); + bool *has_other); int xfs_rmap_map_raw(struct xfs_btree_cur *cur, struct xfs_rmap_irec *rmap); extern const struct xfs_owner_info XFS_RMAP_OINFO_SKIP_UPDATE; diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index d3285684bb5e..6c81b20e97d2 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -156,6 +156,16 @@ xfs_rmapbt_get_maxrecs( return cur->bc_mp->m_rmap_mxr[level != 0]; } +/* + * Convert the ondisk record's offset field into the ondisk key's offset field. + * Fork and bmbt are significant parts of the rmap record key, but written + * status is merely a record attribute. + */ +static inline __be64 ondisk_rec_offset_to_key(const union xfs_btree_rec *rec) +{ + return rec->rmap.rm_offset & ~cpu_to_be64(XFS_RMAP_OFF_UNWRITTEN); +} + STATIC void xfs_rmapbt_init_key_from_rec( union xfs_btree_key *key, @@ -163,7 +173,7 @@ xfs_rmapbt_init_key_from_rec( { key->rmap.rm_startblock = rec->rmap.rm_startblock; key->rmap.rm_owner = rec->rmap.rm_owner; - key->rmap.rm_offset = rec->rmap.rm_offset; + key->rmap.rm_offset = ondisk_rec_offset_to_key(rec); } /* @@ -186,7 +196,7 @@ xfs_rmapbt_init_high_key_from_rec( key->rmap.rm_startblock = rec->rmap.rm_startblock; be32_add_cpu(&key->rmap.rm_startblock, adj); key->rmap.rm_owner = rec->rmap.rm_owner; - key->rmap.rm_offset = rec->rmap.rm_offset; + key->rmap.rm_offset = ondisk_rec_offset_to_key(rec); if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) || XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset))) return; @@ -219,6 +229,16 @@ xfs_rmapbt_init_ptr_from_cur( ptr->s = agf->agf_roots[cur->bc_btnum]; } +/* + * Mask the appropriate parts of the ondisk key field for a key comparison. + * Fork and bmbt are significant parts of the rmap record key, but written + * status is merely a record attribute. + */ +static inline uint64_t offset_keymask(uint64_t offset) +{ + return offset & ~XFS_RMAP_OFF_UNWRITTEN; +} + STATIC int64_t xfs_rmapbt_key_diff( struct xfs_btree_cur *cur, @@ -240,8 +260,8 @@ xfs_rmapbt_key_diff( else if (y > x) return -1; - x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset)); - y = rec->rm_offset; + x = offset_keymask(be64_to_cpu(kp->rm_offset)); + y = offset_keymask(xfs_rmap_irec_offset_pack(rec)); if (x > y) return 1; else if (y > x) @@ -253,31 +273,43 @@ STATIC int64_t xfs_rmapbt_diff_two_keys( struct xfs_btree_cur *cur, const union xfs_btree_key *k1, - const union xfs_btree_key *k2) + const union xfs_btree_key *k2, + const union xfs_btree_key *mask) { const struct xfs_rmap_key *kp1 = &k1->rmap; const struct xfs_rmap_key *kp2 = &k2->rmap; int64_t d; __u64 x, y; + /* Doesn't make sense to mask off the physical space part */ + ASSERT(!mask || mask->rmap.rm_startblock); + d = (int64_t)be32_to_cpu(kp1->rm_startblock) - - be32_to_cpu(kp2->rm_startblock); + be32_to_cpu(kp2->rm_startblock); if (d) return d; - x = be64_to_cpu(kp1->rm_owner); - y = be64_to_cpu(kp2->rm_owner); - if (x > y) - return 1; - else if (y > x) - return -1; + if (!mask || mask->rmap.rm_owner) { + x = be64_to_cpu(kp1->rm_owner); + y = be64_to_cpu(kp2->rm_owner); + if (x > y) + return 1; + else if (y > x) + return -1; + } + + if (!mask || mask->rmap.rm_offset) { + /* Doesn't make sense to allow offset but not owner */ + ASSERT(!mask || mask->rmap.rm_owner); + + x = offset_keymask(be64_to_cpu(kp1->rm_offset)); + y = offset_keymask(be64_to_cpu(kp2->rm_offset)); + if (x > y) + return 1; + else if (y > x) + return -1; + } - x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset)); - y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset)); - if (x > y) - return 1; - else if (y > x) - return -1; return 0; } @@ -387,8 +419,8 @@ xfs_rmapbt_keys_inorder( return 1; else if (a > b) return 0; - a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset)); - b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset)); + a = offset_keymask(be64_to_cpu(k1->rmap.rm_offset)); + b = offset_keymask(be64_to_cpu(k2->rmap.rm_offset)); if (a <= b) return 1; return 0; @@ -417,13 +449,33 @@ xfs_rmapbt_recs_inorder( return 1; else if (a > b) return 0; - a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset)); - b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset)); + a = offset_keymask(be64_to_cpu(r1->rmap.rm_offset)); + b = offset_keymask(be64_to_cpu(r2->rmap.rm_offset)); if (a <= b) return 1; return 0; } +STATIC enum xbtree_key_contig +xfs_rmapbt_keys_contiguous( + struct xfs_btree_cur *cur, + const union xfs_btree_key *key1, + const union xfs_btree_key *key2, + const union xfs_btree_key *mask) +{ + ASSERT(!mask || mask->rmap.rm_startblock); + + /* + * We only support checking contiguity of the physical space component. + * If any callers ever need more specificity than that, they'll have to + * implement it here. + */ + ASSERT(!mask || (!mask->rmap.rm_owner && !mask->rmap.rm_offset)); + + return xbtree_key_contig(be32_to_cpu(key1->rmap.rm_startblock), + be32_to_cpu(key2->rmap.rm_startblock)); +} + static const struct xfs_btree_ops xfs_rmapbt_ops = { .rec_len = sizeof(struct xfs_rmap_rec), .key_len = 2 * sizeof(struct xfs_rmap_key), @@ -443,6 +495,7 @@ static const struct xfs_btree_ops xfs_rmapbt_ops = { .diff_two_keys = xfs_rmapbt_diff_two_keys, .keys_inorder = xfs_rmapbt_keys_inorder, .recs_inorder = xfs_rmapbt_recs_inorder, + .keys_contiguous = xfs_rmapbt_keys_contiguous, }; static struct xfs_btree_cur * @@ -460,10 +513,7 @@ xfs_rmapbt_init_common( cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2); cur->bc_ops = &xfs_rmapbt_ops; - /* take a reference for the cursor */ - atomic_inc(&pag->pag_ref); - cur->bc_ag.pag = pag; - + cur->bc_ag.pag = xfs_perag_hold(pag); return cur; } diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 99cc03a298e2..ba0f17bc1dc0 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -72,7 +72,8 @@ xfs_sb_validate_v5_features( } /* - * We support all XFS versions newer than a v4 superblock with V2 directories. + * We current support XFS v5 formats with known features and v4 superblocks with + * at least V2 directories. */ bool xfs_sb_good_version( @@ -86,16 +87,16 @@ xfs_sb_good_version( if (xfs_sb_is_v5(sbp)) return xfs_sb_validate_v5_features(sbp); + /* versions prior to v4 are not supported */ + if (XFS_SB_VERSION_NUM(sbp) != XFS_SB_VERSION_4) + return false; + /* We must not have any unknown v4 feature bits set */ if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) || ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && (sbp->sb_features2 & ~XFS_SB_VERSION2_OKBITS))) return false; - /* versions prior to v4 are not supported */ - if (XFS_SB_VERSION_NUM(sbp) < XFS_SB_VERSION_4) - return false; - /* V4 filesystems need v2 directories and unwritten extents */ if (!(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT)) return false; diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 5ebdda7e1078..851220021484 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -204,6 +204,18 @@ enum xfs_ag_resv_type { XFS_AG_RESV_RMAPBT, }; +/* Results of scanning a btree keyspace to check occupancy. */ +enum xbtree_recpacking { + /* None of the keyspace maps to records. */ + XBTREE_RECPACKING_EMPTY = 0, + + /* Some, but not all, of the keyspace maps to records. */ + XBTREE_RECPACKING_SPARSE, + + /* The entire keyspace maps to records. */ + XBTREE_RECPACKING_FULL, +}; + /* * Type verifier functions */ |