summaryrefslogtreecommitdiffstats
path: root/fs/xfs/libxfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/libxfs')
-rw-r--r--fs/xfs/libxfs/xfs_ag.c23
-rw-r--r--fs/xfs/libxfs/xfs_ag.h9
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c115
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h22
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c32
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c39
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h8
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c19
-rw-r--r--fs/xfs/libxfs/xfs_btree.c204
-rw-r--r--fs/xfs/libxfs/xfs_btree.h141
-rw-r--r--fs/xfs/libxfs/xfs_defer.c6
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c5
-rw-r--r--fs/xfs/libxfs/xfs_dir2.h31
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c165
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.h7
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c35
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.h2
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c19
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.h6
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c117
-rw-r--r--fs/xfs/libxfs/xfs_refcount.h10
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.c31
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c358
-rw-r--r--fs/xfs/libxfs/xfs_rmap.h38
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c102
-rw-r--r--fs/xfs/libxfs/xfs_sb.c11
-rw-r--r--fs/xfs/libxfs/xfs_types.h12
27 files changed, 1159 insertions, 408 deletions
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 86696a1c6891..1b078bbbf225 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -81,6 +81,19 @@ xfs_perag_get_tag(
return pag;
}
+/* Get a passive reference to the given perag. */
+struct xfs_perag *
+xfs_perag_hold(
+ struct xfs_perag *pag)
+{
+ ASSERT(atomic_read(&pag->pag_ref) > 0 ||
+ atomic_read(&pag->pag_active_ref) > 0);
+
+ trace_xfs_perag_hold(pag, _RET_IP_);
+ atomic_inc(&pag->pag_ref);
+ return pag;
+}
+
void
xfs_perag_put(
struct xfs_perag *pag)
@@ -247,6 +260,7 @@ xfs_free_perag(
spin_unlock(&mp->m_perag_lock);
ASSERT(pag);
XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0);
+ xfs_defer_drain_free(&pag->pag_intents_drain);
cancel_delayed_work_sync(&pag->pag_blockgc_work);
xfs_buf_hash_destroy(pag);
@@ -372,6 +386,7 @@ xfs_initialize_perag(
spin_lock_init(&pag->pag_state_lock);
INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker);
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
+ xfs_defer_drain_init(&pag->pag_intents_drain);
init_waitqueue_head(&pag->pagb_wait);
init_waitqueue_head(&pag->pag_active_wq);
pag->pagb_count = 0;
@@ -408,6 +423,7 @@ xfs_initialize_perag(
return 0;
out_remove_pag:
+ xfs_defer_drain_free(&pag->pag_intents_drain);
radix_tree_delete(&mp->m_perag_tree, index);
out_free_pag:
kmem_free(pag);
@@ -418,6 +434,7 @@ out_unwind_new_pags:
if (!pag)
break;
xfs_buf_hash_destroy(pag);
+ xfs_defer_drain_free(&pag->pag_intents_drain);
kmem_free(pag);
}
return error;
@@ -1043,10 +1060,8 @@ xfs_ag_extend_space(
if (error)
return error;
- error = xfs_free_extent(tp, XFS_AGB_TO_FSB(pag->pag_mount, pag->pag_agno,
- be32_to_cpu(agf->agf_length) - len),
- len, &XFS_RMAP_OINFO_SKIP_UPDATE,
- XFS_AG_RESV_NONE);
+ error = xfs_free_extent(tp, pag, be32_to_cpu(agf->agf_length) - len,
+ len, &XFS_RMAP_OINFO_SKIP_UPDATE, XFS_AG_RESV_NONE);
if (error)
return error;
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
index 5e18536dfdce..2e0aef87d633 100644
--- a/fs/xfs/libxfs/xfs_ag.h
+++ b/fs/xfs/libxfs/xfs_ag.h
@@ -101,6 +101,14 @@ struct xfs_perag {
/* background prealloc block trimming */
struct delayed_work pag_blockgc_work;
+ /*
+ * We use xfs_drain to track the number of deferred log intent items
+ * that have been queued (but not yet processed) so that waiters (e.g.
+ * scrub) will not lock resources when other threads are in the middle
+ * of processing a chain of intent items only to find momentary
+ * inconsistencies.
+ */
+ struct xfs_defer_drain pag_intents_drain;
#endif /* __KERNEL__ */
};
@@ -134,6 +142,7 @@ void xfs_free_perag(struct xfs_mount *mp);
struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno);
struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno,
unsigned int tag);
+struct xfs_perag *xfs_perag_hold(struct xfs_perag *pag);
void xfs_perag_put(struct xfs_perag *pag);
/* Active AG references */
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 203f16c48c19..fdfa08cbf4db 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -233,6 +233,52 @@ xfs_alloc_update(
return xfs_btree_update(cur, &rec);
}
+/* Convert the ondisk btree record to its incore representation. */
+void
+xfs_alloc_btrec_to_irec(
+ const union xfs_btree_rec *rec,
+ struct xfs_alloc_rec_incore *irec)
+{
+ irec->ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
+ irec->ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
+}
+
+/* Simple checks for free space records. */
+xfs_failaddr_t
+xfs_alloc_check_irec(
+ struct xfs_btree_cur *cur,
+ const struct xfs_alloc_rec_incore *irec)
+{
+ struct xfs_perag *pag = cur->bc_ag.pag;
+
+ if (irec->ar_blockcount == 0)
+ return __this_address;
+
+ /* check for valid extent range, including overflow */
+ if (!xfs_verify_agbext(pag, irec->ar_startblock, irec->ar_blockcount))
+ return __this_address;
+
+ return NULL;
+}
+
+static inline int
+xfs_alloc_complain_bad_rec(
+ struct xfs_btree_cur *cur,
+ xfs_failaddr_t fa,
+ const struct xfs_alloc_rec_incore *irec)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+
+ xfs_warn(mp,
+ "%s Freespace BTree record corruption in AG %d detected at %pS!",
+ cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size",
+ cur->bc_ag.pag->pag_agno, fa);
+ xfs_warn(mp,
+ "start block 0x%x block count 0x%x", irec->ar_startblock,
+ irec->ar_blockcount);
+ return -EFSCORRUPTED;
+}
+
/*
* Get the data from the pointed-to record.
*/
@@ -243,35 +289,23 @@ xfs_alloc_get_rec(
xfs_extlen_t *len, /* output: length of extent */
int *stat) /* output: success/failure */
{
- struct xfs_mount *mp = cur->bc_mp;
- struct xfs_perag *pag = cur->bc_ag.pag;
+ struct xfs_alloc_rec_incore irec;
union xfs_btree_rec *rec;
+ xfs_failaddr_t fa;
int error;
error = xfs_btree_get_rec(cur, &rec, stat);
if (error || !(*stat))
return error;
- *bno = be32_to_cpu(rec->alloc.ar_startblock);
- *len = be32_to_cpu(rec->alloc.ar_blockcount);
-
- if (*len == 0)
- goto out_bad_rec;
-
- /* check for valid extent range, including overflow */
- if (!xfs_verify_agbext(pag, *bno, *len))
- goto out_bad_rec;
+ xfs_alloc_btrec_to_irec(rec, &irec);
+ fa = xfs_alloc_check_irec(cur, &irec);
+ if (fa)
+ return xfs_alloc_complain_bad_rec(cur, fa, &irec);
+ *bno = irec.ar_startblock;
+ *len = irec.ar_blockcount;
return 0;
-
-out_bad_rec:
- xfs_warn(mp,
- "%s Freespace BTree record corruption in AG %d detected!",
- cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size",
- pag->pag_agno);
- xfs_warn(mp,
- "start block 0x%x block count 0x%x", *bno, *len);
- return -EFSCORRUPTED;
}
/*
@@ -2405,6 +2439,7 @@ xfs_defer_agfl_block(
trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
+ xfs_extent_free_get_group(mp, xefi);
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list);
}
@@ -2421,8 +2456,8 @@ __xfs_free_extent_later(
bool skip_discard)
{
struct xfs_extent_free_item *xefi;
-#ifdef DEBUG
struct xfs_mount *mp = tp->t_mountp;
+#ifdef DEBUG
xfs_agnumber_t agno;
xfs_agblock_t agbno;
@@ -2456,9 +2491,11 @@ __xfs_free_extent_later(
} else {
xefi->xefi_owner = XFS_RMAP_OWN_NULL;
}
- trace_xfs_bmap_free_defer(tp->t_mountp,
+ trace_xfs_bmap_free_defer(mp,
XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
+
+ xfs_extent_free_get_group(mp, xefi);
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list);
}
@@ -3596,7 +3633,8 @@ xfs_free_extent_fix_freelist(
int
__xfs_free_extent(
struct xfs_trans *tp,
- xfs_fsblock_t bno,
+ struct xfs_perag *pag,
+ xfs_agblock_t agbno,
xfs_extlen_t len,
const struct xfs_owner_info *oinfo,
enum xfs_ag_resv_type type,
@@ -3604,12 +3642,9 @@ __xfs_free_extent(
{
struct xfs_mount *mp = tp->t_mountp;
struct xfs_buf *agbp;
- xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno);
- xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno);
struct xfs_agf *agf;
int error;
unsigned int busy_flags = 0;
- struct xfs_perag *pag;
ASSERT(len != 0);
ASSERT(type != XFS_AG_RESV_AGFL);
@@ -3618,10 +3653,9 @@ __xfs_free_extent(
XFS_ERRTAG_FREE_EXTENT))
return -EIO;
- pag = xfs_perag_get(mp, agno);
error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
if (error)
- goto err;
+ return error;
agf = agbp->b_addr;
if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) {
@@ -3635,20 +3669,18 @@ __xfs_free_extent(
goto err_release;
}
- error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
+ error = xfs_free_ag_extent(tp, agbp, pag->pag_agno, agbno, len, oinfo,
+ type);
if (error)
goto err_release;
if (skip_discard)
busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
xfs_extent_busy_insert(tp, pag, agbno, len, busy_flags);
- xfs_perag_put(pag);
return 0;
err_release:
xfs_trans_brelse(tp, agbp);
-err:
- xfs_perag_put(pag);
return error;
}
@@ -3666,9 +3698,13 @@ xfs_alloc_query_range_helper(
{
struct xfs_alloc_query_range_info *query = priv;
struct xfs_alloc_rec_incore irec;
+ xfs_failaddr_t fa;
+
+ xfs_alloc_btrec_to_irec(rec, &irec);
+ fa = xfs_alloc_check_irec(cur, &irec);
+ if (fa)
+ return xfs_alloc_complain_bad_rec(cur, fa, &irec);
- irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
- irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
return query->fn(cur, &irec, query->priv);
}
@@ -3709,13 +3745,16 @@ xfs_alloc_query_all(
return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
}
-/* Is there a record covering a given extent? */
+/*
+ * Scan part of the keyspace of the free space and tell us if the area has no
+ * records, is fully mapped by records, or is partially filled.
+ */
int
-xfs_alloc_has_record(
+xfs_alloc_has_records(
struct xfs_btree_cur *cur,
xfs_agblock_t bno,
xfs_extlen_t len,
- bool *exists)
+ enum xbtree_recpacking *outcome)
{
union xfs_btree_irec low;
union xfs_btree_irec high;
@@ -3725,7 +3764,7 @@ xfs_alloc_has_record(
memset(&high, 0xFF, sizeof(high));
high.a.ar_startblock = bno + len - 1;
- return xfs_btree_has_record(cur, &low, &high, exists);
+ return xfs_btree_has_records(cur, &low, &high, NULL, outcome);
}
/*
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 2b246d74c189..5dbb25546d0b 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -141,7 +141,8 @@ int xfs_alloc_vextent_first_ag(struct xfs_alloc_arg *args,
int /* error */
__xfs_free_extent(
struct xfs_trans *tp, /* transaction pointer */
- xfs_fsblock_t bno, /* starting block number of extent */
+ struct xfs_perag *pag,
+ xfs_agblock_t agbno,
xfs_extlen_t len, /* length of extent */
const struct xfs_owner_info *oinfo, /* extent owner */
enum xfs_ag_resv_type type, /* block reservation type */
@@ -150,12 +151,13 @@ __xfs_free_extent(
static inline int
xfs_free_extent(
struct xfs_trans *tp,
- xfs_fsblock_t bno,
+ struct xfs_perag *pag,
+ xfs_agblock_t agbno,
xfs_extlen_t len,
const struct xfs_owner_info *oinfo,
enum xfs_ag_resv_type type)
{
- return __xfs_free_extent(tp, bno, len, oinfo, type, false);
+ return __xfs_free_extent(tp, pag, agbno, len, oinfo, type, false);
}
int /* error */
@@ -179,6 +181,12 @@ xfs_alloc_get_rec(
xfs_extlen_t *len, /* output: length of extent */
int *stat); /* output: success/failure */
+union xfs_btree_rec;
+void xfs_alloc_btrec_to_irec(const union xfs_btree_rec *rec,
+ struct xfs_alloc_rec_incore *irec);
+xfs_failaddr_t xfs_alloc_check_irec(struct xfs_btree_cur *cur,
+ const struct xfs_alloc_rec_incore *irec);
+
int xfs_read_agf(struct xfs_perag *pag, struct xfs_trans *tp, int flags,
struct xfs_buf **agfbpp);
int xfs_alloc_read_agf(struct xfs_perag *pag, struct xfs_trans *tp, int flags,
@@ -205,8 +213,8 @@ int xfs_alloc_query_range(struct xfs_btree_cur *cur,
int xfs_alloc_query_all(struct xfs_btree_cur *cur, xfs_alloc_query_range_fn fn,
void *priv);
-int xfs_alloc_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno,
- xfs_extlen_t len, bool *exist);
+int xfs_alloc_has_records(struct xfs_btree_cur *cur, xfs_agblock_t bno,
+ xfs_extlen_t len, enum xbtree_recpacking *outcome);
typedef int (*xfs_agfl_walk_fn)(struct xfs_mount *mp, xfs_agblock_t bno,
void *priv);
@@ -235,9 +243,13 @@ struct xfs_extent_free_item {
uint64_t xefi_owner;
xfs_fsblock_t xefi_startblock;/* starting fs block number */
xfs_extlen_t xefi_blockcount;/* number of blocks in extent */
+ struct xfs_perag *xefi_pag;
unsigned int xefi_flags;
};
+void xfs_extent_free_get_group(struct xfs_mount *mp,
+ struct xfs_extent_free_item *xefi);
+
#define XFS_EFI_SKIP_DISCARD (1U << 0) /* don't issue discard */
#define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */
#define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index 0f29c7b1b39f..c65228efed4a 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -260,20 +260,27 @@ STATIC int64_t
xfs_bnobt_diff_two_keys(
struct xfs_btree_cur *cur,
const union xfs_btree_key *k1,
- const union xfs_btree_key *k2)
+ const union xfs_btree_key *k2,
+ const union xfs_btree_key *mask)
{
+ ASSERT(!mask || mask->alloc.ar_startblock);
+
return (int64_t)be32_to_cpu(k1->alloc.ar_startblock) -
- be32_to_cpu(k2->alloc.ar_startblock);
+ be32_to_cpu(k2->alloc.ar_startblock);
}
STATIC int64_t
xfs_cntbt_diff_two_keys(
struct xfs_btree_cur *cur,
const union xfs_btree_key *k1,
- const union xfs_btree_key *k2)
+ const union xfs_btree_key *k2,
+ const union xfs_btree_key *mask)
{
int64_t diff;
+ ASSERT(!mask || (mask->alloc.ar_blockcount &&
+ mask->alloc.ar_startblock));
+
diff = be32_to_cpu(k1->alloc.ar_blockcount) -
be32_to_cpu(k2->alloc.ar_blockcount);
if (diff)
@@ -423,6 +430,19 @@ xfs_cntbt_recs_inorder(
be32_to_cpu(r2->alloc.ar_startblock));
}
+STATIC enum xbtree_key_contig
+xfs_allocbt_keys_contiguous(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_key *key1,
+ const union xfs_btree_key *key2,
+ const union xfs_btree_key *mask)
+{
+ ASSERT(!mask || mask->alloc.ar_startblock);
+
+ return xbtree_key_contig(be32_to_cpu(key1->alloc.ar_startblock),
+ be32_to_cpu(key2->alloc.ar_startblock));
+}
+
static const struct xfs_btree_ops xfs_bnobt_ops = {
.rec_len = sizeof(xfs_alloc_rec_t),
.key_len = sizeof(xfs_alloc_key_t),
@@ -443,6 +463,7 @@ static const struct xfs_btree_ops xfs_bnobt_ops = {
.diff_two_keys = xfs_bnobt_diff_two_keys,
.keys_inorder = xfs_bnobt_keys_inorder,
.recs_inorder = xfs_bnobt_recs_inorder,
+ .keys_contiguous = xfs_allocbt_keys_contiguous,
};
static const struct xfs_btree_ops xfs_cntbt_ops = {
@@ -465,6 +486,7 @@ static const struct xfs_btree_ops xfs_cntbt_ops = {
.diff_two_keys = xfs_cntbt_diff_two_keys,
.keys_inorder = xfs_cntbt_keys_inorder,
.recs_inorder = xfs_cntbt_recs_inorder,
+ .keys_contiguous = NULL, /* not needed right now */
};
/* Allocate most of a new allocation btree cursor. */
@@ -492,9 +514,7 @@ xfs_allocbt_init_common(
cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2);
}
- /* take a reference for the cursor */
- atomic_inc(&pag->pag_ref);
- cur->bc_ag.pag = pag;
+ cur->bc_ag.pag = xfs_perag_hold(pag);
if (xfs_has_crc(mp))
cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 34de6e6898c4..b512de0540d5 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1083,6 +1083,34 @@ struct xfs_iread_state {
xfs_extnum_t loaded;
};
+int
+xfs_bmap_complain_bad_rec(
+ struct xfs_inode *ip,
+ int whichfork,
+ xfs_failaddr_t fa,
+ const struct xfs_bmbt_irec *irec)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ const char *forkname;
+
+ switch (whichfork) {
+ case XFS_DATA_FORK: forkname = "data"; break;
+ case XFS_ATTR_FORK: forkname = "attr"; break;
+ case XFS_COW_FORK: forkname = "CoW"; break;
+ default: forkname = "???"; break;
+ }
+
+ xfs_warn(mp,
+ "Bmap BTree record corruption in inode 0x%llx %s fork detected at %pS!",
+ ip->i_ino, forkname, fa);
+ xfs_warn(mp,
+ "Offset 0x%llx, start block 0x%llx, block count 0x%llx state 0x%x",
+ irec->br_startoff, irec->br_startblock, irec->br_blockcount,
+ irec->br_state);
+
+ return -EFSCORRUPTED;
+}
+
/* Stuff every bmbt record from this block into the incore extent map. */
static int
xfs_iread_bmbt_block(
@@ -1125,7 +1153,8 @@ xfs_iread_bmbt_block(
xfs_inode_verifier_error(ip, -EFSCORRUPTED,
"xfs_iread_extents(2)", frp,
sizeof(*frp), fa);
- return -EFSCORRUPTED;
+ return xfs_bmap_complain_bad_rec(ip, whichfork, fa,
+ &new);
}
xfs_iext_insert(ip, &ir->icur, &new,
xfs_bmap_fork_to_state(whichfork));
@@ -1171,6 +1200,12 @@ xfs_iread_extents(
goto out;
}
ASSERT(ir.loaded == xfs_iext_count(ifp));
+ /*
+ * Use release semantics so that we can use acquire semantics in
+ * xfs_need_iread_extents and be guaranteed to see a valid mapping tree
+ * after that load.
+ */
+ smp_store_release(&ifp->if_needextents, 0);
return 0;
out:
xfs_iext_destroy(ifp);
@@ -3505,7 +3540,6 @@ xfs_bmap_btalloc_at_eof(
* original non-aligned state so the caller can proceed on allocation
* failure as if this function was never called.
*/
- args->fsbno = ap->blkno;
args->alignment = 1;
return 0;
}
@@ -6075,6 +6109,7 @@ __xfs_bmap_add(
bi->bi_whichfork = whichfork;
bi->bi_bmap = *bmap;
+ xfs_bmap_update_get_group(tp->t_mountp, bi);
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_BMAP, &bi->bi_list);
return 0;
}
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index dd08361ca5a6..e33470e39728 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -145,7 +145,7 @@ static inline int xfs_bmapi_whichfork(uint32_t bmapi_flags)
{ BMAP_COWFORK, "COW" }
/* Return true if the extent is an allocated extent, written or not. */
-static inline bool xfs_bmap_is_real_extent(struct xfs_bmbt_irec *irec)
+static inline bool xfs_bmap_is_real_extent(const struct xfs_bmbt_irec *irec)
{
return irec->br_startblock != HOLESTARTBLOCK &&
irec->br_startblock != DELAYSTARTBLOCK &&
@@ -238,9 +238,13 @@ struct xfs_bmap_intent {
enum xfs_bmap_intent_type bi_type;
int bi_whichfork;
struct xfs_inode *bi_owner;
+ struct xfs_perag *bi_pag;
struct xfs_bmbt_irec bi_bmap;
};
+void xfs_bmap_update_get_group(struct xfs_mount *mp,
+ struct xfs_bmap_intent *bi);
+
int xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_bmap_intent *bi);
void xfs_bmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip,
struct xfs_bmbt_irec *imap);
@@ -261,6 +265,8 @@ static inline uint32_t xfs_bmap_fork_to_state(int whichfork)
xfs_failaddr_t xfs_bmap_validate_extent(struct xfs_inode *ip, int whichfork,
struct xfs_bmbt_irec *irec);
+int xfs_bmap_complain_bad_rec(struct xfs_inode *ip, int whichfork,
+ xfs_failaddr_t fa, const struct xfs_bmbt_irec *irec);
int xfs_bmapi_remap(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t bno, xfs_filblks_t len, xfs_fsblock_t startblock,
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index b8ad95050c9b..1b40e5f8b1ec 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -382,11 +382,14 @@ STATIC int64_t
xfs_bmbt_diff_two_keys(
struct xfs_btree_cur *cur,
const union xfs_btree_key *k1,
- const union xfs_btree_key *k2)
+ const union xfs_btree_key *k2,
+ const union xfs_btree_key *mask)
{
uint64_t a = be64_to_cpu(k1->bmbt.br_startoff);
uint64_t b = be64_to_cpu(k2->bmbt.br_startoff);
+ ASSERT(!mask || mask->bmbt.br_startoff);
+
/*
* Note: This routine previously casted a and b to int64 and subtracted
* them to generate a result. This lead to problems if b was the
@@ -500,6 +503,19 @@ xfs_bmbt_recs_inorder(
xfs_bmbt_disk_get_startoff(&r2->bmbt);
}
+STATIC enum xbtree_key_contig
+xfs_bmbt_keys_contiguous(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_key *key1,
+ const union xfs_btree_key *key2,
+ const union xfs_btree_key *mask)
+{
+ ASSERT(!mask || mask->bmbt.br_startoff);
+
+ return xbtree_key_contig(be64_to_cpu(key1->bmbt.br_startoff),
+ be64_to_cpu(key2->bmbt.br_startoff));
+}
+
static const struct xfs_btree_ops xfs_bmbt_ops = {
.rec_len = sizeof(xfs_bmbt_rec_t),
.key_len = sizeof(xfs_bmbt_key_t),
@@ -520,6 +536,7 @@ static const struct xfs_btree_ops xfs_bmbt_ops = {
.buf_ops = &xfs_bmbt_buf_ops,
.keys_inorder = xfs_bmbt_keys_inorder,
.recs_inorder = xfs_bmbt_recs_inorder,
+ .keys_contiguous = xfs_bmbt_keys_contiguous,
};
/*
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index c4649cc624e1..6a6503ab0cd7 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -2067,8 +2067,7 @@ xfs_btree_get_leaf_keys(
for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
rec = xfs_btree_rec_addr(cur, n, block);
cur->bc_ops->init_high_key_from_rec(&hkey, rec);
- if (cur->bc_ops->diff_two_keys(cur, &hkey, &max_hkey)
- > 0)
+ if (xfs_btree_keycmp_gt(cur, &hkey, &max_hkey))
max_hkey = hkey;
}
@@ -2096,7 +2095,7 @@ xfs_btree_get_node_keys(
max_hkey = xfs_btree_high_key_addr(cur, 1, block);
for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
hkey = xfs_btree_high_key_addr(cur, n, block);
- if (cur->bc_ops->diff_two_keys(cur, hkey, max_hkey) > 0)
+ if (xfs_btree_keycmp_gt(cur, hkey, max_hkey))
max_hkey = hkey;
}
@@ -2183,8 +2182,8 @@ __xfs_btree_updkeys(
nlkey = xfs_btree_key_addr(cur, ptr, block);
nhkey = xfs_btree_high_key_addr(cur, ptr, block);
if (!force_all &&
- !(cur->bc_ops->diff_two_keys(cur, nlkey, lkey) != 0 ||
- cur->bc_ops->diff_two_keys(cur, nhkey, hkey) != 0))
+ xfs_btree_keycmp_eq(cur, nlkey, lkey) &&
+ xfs_btree_keycmp_eq(cur, nhkey, hkey))
break;
xfs_btree_copy_keys(cur, nlkey, lkey, 1);
xfs_btree_log_keys(cur, bp, ptr, ptr);
@@ -4716,7 +4715,6 @@ xfs_btree_simple_query_range(
{
union xfs_btree_rec *recp;
union xfs_btree_key rec_key;
- int64_t diff;
int stat;
bool firstrec = true;
int error;
@@ -4746,20 +4744,17 @@ xfs_btree_simple_query_range(
if (error || !stat)
break;
- /* Skip if high_key(rec) < low_key. */
+ /* Skip if low_key > high_key(rec). */
if (firstrec) {
cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
firstrec = false;
- diff = cur->bc_ops->diff_two_keys(cur, low_key,
- &rec_key);
- if (diff > 0)
+ if (xfs_btree_keycmp_gt(cur, low_key, &rec_key))
goto advloop;
}
- /* Stop if high_key < low_key(rec). */
+ /* Stop if low_key(rec) > high_key. */
cur->bc_ops->init_key_from_rec(&rec_key, recp);
- diff = cur->bc_ops->diff_two_keys(cur, &rec_key, high_key);
- if (diff > 0)
+ if (xfs_btree_keycmp_gt(cur, &rec_key, high_key))
break;
/* Callback */
@@ -4813,8 +4808,6 @@ xfs_btree_overlapped_query_range(
union xfs_btree_key *hkp;
union xfs_btree_rec *recp;
struct xfs_btree_block *block;
- int64_t ldiff;
- int64_t hdiff;
int level;
struct xfs_buf *bp;
int i;
@@ -4854,25 +4847,23 @@ pop_up:
block);
cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp);
- ldiff = cur->bc_ops->diff_two_keys(cur, &rec_hkey,
- low_key);
-
cur->bc_ops->init_key_from_rec(&rec_key, recp);
- hdiff = cur->bc_ops->diff_two_keys(cur, high_key,
- &rec_key);
/*
+ * If (query's high key < record's low key), then there
+ * are no more interesting records in this block. Pop
+ * up to the leaf level to find more record blocks.
+ *
* If (record's high key >= query's low key) and
* (query's high key >= record's low key), then
* this record overlaps the query range; callback.
*/
- if (ldiff >= 0 && hdiff >= 0) {
+ if (xfs_btree_keycmp_lt(cur, high_key, &rec_key))
+ goto pop_up;
+ if (xfs_btree_keycmp_ge(cur, &rec_hkey, low_key)) {
error = fn(cur, recp, priv);
if (error)
break;
- } else if (hdiff < 0) {
- /* Record is larger than high key; pop. */
- goto pop_up;
}
cur->bc_levels[level].ptr++;
continue;
@@ -4884,15 +4875,18 @@ pop_up:
block);
pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block);
- ldiff = cur->bc_ops->diff_two_keys(cur, hkp, low_key);
- hdiff = cur->bc_ops->diff_two_keys(cur, high_key, lkp);
-
/*
+ * If (query's high key < pointer's low key), then there are no
+ * more interesting keys in this block. Pop up one leaf level
+ * to continue looking for records.
+ *
* If (pointer's high key >= query's low key) and
* (query's high key >= pointer's low key), then
* this record overlaps the query range; follow pointer.
*/
- if (ldiff >= 0 && hdiff >= 0) {
+ if (xfs_btree_keycmp_lt(cur, high_key, lkp))
+ goto pop_up;
+ if (xfs_btree_keycmp_ge(cur, hkp, low_key)) {
level--;
error = xfs_btree_lookup_get_block(cur, level, pp,
&block);
@@ -4907,9 +4901,6 @@ pop_up:
#endif
cur->bc_levels[level].ptr = 1;
continue;
- } else if (hdiff < 0) {
- /* The low key is larger than the upper range; pop. */
- goto pop_up;
}
cur->bc_levels[level].ptr++;
}
@@ -4937,6 +4928,19 @@ out:
return error;
}
+static inline void
+xfs_btree_key_from_irec(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *key,
+ const union xfs_btree_irec *irec)
+{
+ union xfs_btree_rec rec;
+
+ cur->bc_rec = *irec;
+ cur->bc_ops->init_rec_from_cur(cur, &rec);
+ cur->bc_ops->init_key_from_rec(key, &rec);
+}
+
/*
* Query a btree for all records overlapping a given interval of keys. The
* supplied function will be called with each record found; return one of the
@@ -4951,21 +4955,15 @@ xfs_btree_query_range(
xfs_btree_query_range_fn fn,
void *priv)
{
- union xfs_btree_rec rec;
union xfs_btree_key low_key;
union xfs_btree_key high_key;
/* Find the keys of both ends of the interval. */
- cur->bc_rec = *high_rec;
- cur->bc_ops->init_rec_from_cur(cur, &rec);
- cur->bc_ops->init_key_from_rec(&high_key, &rec);
+ xfs_btree_key_from_irec(cur, &high_key, high_rec);
+ xfs_btree_key_from_irec(cur, &low_key, low_rec);
- cur->bc_rec = *low_rec;
- cur->bc_ops->init_rec_from_cur(cur, &rec);
- cur->bc_ops->init_key_from_rec(&low_key, &rec);
-
- /* Enforce low key < high key. */
- if (cur->bc_ops->diff_two_keys(cur, &low_key, &high_key) > 0)
+ /* Enforce low key <= high key. */
+ if (!xfs_btree_keycmp_le(cur, &low_key, &high_key))
return -EINVAL;
if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
@@ -5027,34 +5025,132 @@ xfs_btree_diff_two_ptrs(
return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s);
}
-/* If there's an extent, we're done. */
+struct xfs_btree_has_records {
+ /* Keys for the start and end of the range we want to know about. */
+ union xfs_btree_key start_key;
+ union xfs_btree_key end_key;
+
+ /* Mask for key comparisons, if desired. */
+ const union xfs_btree_key *key_mask;
+
+ /* Highest record key we've seen so far. */
+ union xfs_btree_key high_key;
+
+ enum xbtree_recpacking outcome;
+};
+
STATIC int
-xfs_btree_has_record_helper(
+xfs_btree_has_records_helper(
struct xfs_btree_cur *cur,
const union xfs_btree_rec *rec,
void *priv)
{
- return -ECANCELED;
+ union xfs_btree_key rec_key;
+ union xfs_btree_key rec_high_key;
+ struct xfs_btree_has_records *info = priv;
+ enum xbtree_key_contig key_contig;
+
+ cur->bc_ops->init_key_from_rec(&rec_key, rec);
+
+ if (info->outcome == XBTREE_RECPACKING_EMPTY) {
+ info->outcome = XBTREE_RECPACKING_SPARSE;
+
+ /*
+ * If the first record we find does not overlap the start key,
+ * then there is a hole at the start of the search range.
+ * Classify this as sparse and stop immediately.
+ */
+ if (xfs_btree_masked_keycmp_lt(cur, &info->start_key, &rec_key,
+ info->key_mask))
+ return -ECANCELED;
+ } else {
+ /*
+ * If a subsequent record does not overlap with the any record
+ * we've seen so far, there is a hole in the middle of the
+ * search range. Classify this as sparse and stop.
+ * If the keys overlap and this btree does not allow overlap,
+ * signal corruption.
+ */
+ key_contig = cur->bc_ops->keys_contiguous(cur, &info->high_key,
+ &rec_key, info->key_mask);
+ if (key_contig == XBTREE_KEY_OVERLAP &&
+ !(cur->bc_flags & XFS_BTREE_OVERLAPPING))
+ return -EFSCORRUPTED;
+ if (key_contig == XBTREE_KEY_GAP)
+ return -ECANCELED;
+ }
+
+ /*
+ * If high_key(rec) is larger than any other high key we've seen,
+ * remember it for later.
+ */
+ cur->bc_ops->init_high_key_from_rec(&rec_high_key, rec);
+ if (xfs_btree_masked_keycmp_gt(cur, &rec_high_key, &info->high_key,
+ info->key_mask))
+ info->high_key = rec_high_key; /* struct copy */
+
+ return 0;
}
-/* Is there a record covering a given range of keys? */
+/*
+ * Scan part of the keyspace of a btree and tell us if that keyspace does not
+ * map to any records; is fully mapped to records; or is partially mapped to
+ * records. This is the btree record equivalent to determining if a file is
+ * sparse.
+ *
+ * For most btree types, the record scan should use all available btree key
+ * fields to compare the keys encountered. These callers should pass NULL for
+ * @mask. However, some callers (e.g. scanning physical space in the rmapbt)
+ * want to ignore some part of the btree record keyspace when performing the
+ * comparison. These callers should pass in a union xfs_btree_key object with
+ * the fields that *should* be a part of the comparison set to any nonzero
+ * value, and the rest zeroed.
+ */
int
-xfs_btree_has_record(
+xfs_btree_has_records(
struct xfs_btree_cur *cur,
const union xfs_btree_irec *low,
const union xfs_btree_irec *high,
- bool *exists)
+ const union xfs_btree_key *mask,
+ enum xbtree_recpacking *outcome)
{
+ struct xfs_btree_has_records info = {
+ .outcome = XBTREE_RECPACKING_EMPTY,
+ .key_mask = mask,
+ };
int error;
- error = xfs_btree_query_range(cur, low, high,
- &xfs_btree_has_record_helper, NULL);
- if (error == -ECANCELED) {
- *exists = true;
- return 0;
+ /* Not all btrees support this operation. */
+ if (!cur->bc_ops->keys_contiguous) {
+ ASSERT(0);
+ return -EOPNOTSUPP;
}
- *exists = false;
- return error;
+
+ xfs_btree_key_from_irec(cur, &info.start_key, low);
+ xfs_btree_key_from_irec(cur, &info.end_key, high);
+
+ error = xfs_btree_query_range(cur, low, high,
+ xfs_btree_has_records_helper, &info);
+ if (error == -ECANCELED)
+ goto out;
+ if (error)
+ return error;
+
+ if (info.outcome == XBTREE_RECPACKING_EMPTY)
+ goto out;
+
+ /*
+ * If the largest high_key(rec) we saw during the walk is greater than
+ * the end of the search range, classify this as full. Otherwise,
+ * there is a hole at the end of the search range.
+ */
+ if (xfs_btree_masked_keycmp_ge(cur, &info.high_key, &info.end_key,
+ mask))
+ info.outcome = XBTREE_RECPACKING_FULL;
+
+out:
+ *outcome = info.outcome;
+ return 0;
}
/* Are there more records in this btree? */
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 29c4b4ccb909..a2aa36b23e25 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -90,6 +90,27 @@ uint32_t xfs_btree_magic(int crc, xfs_btnum_t btnum);
#define XFS_BTREE_STATS_ADD(cur, stat, val) \
XFS_STATS_ADD_OFF((cur)->bc_mp, (cur)->bc_statoff + __XBTS_ ## stat, val)
+enum xbtree_key_contig {
+ XBTREE_KEY_GAP = 0,
+ XBTREE_KEY_CONTIGUOUS,
+ XBTREE_KEY_OVERLAP,
+};
+
+/*
+ * Decide if these two numeric btree key fields are contiguous, overlapping,
+ * or if there's a gap between them. @x should be the field from the high
+ * key and @y should be the field from the low key.
+ */
+static inline enum xbtree_key_contig xbtree_key_contig(uint64_t x, uint64_t y)
+{
+ x++;
+ if (x < y)
+ return XBTREE_KEY_GAP;
+ if (x == y)
+ return XBTREE_KEY_CONTIGUOUS;
+ return XBTREE_KEY_OVERLAP;
+}
+
struct xfs_btree_ops {
/* size of the key and record structures */
size_t key_len;
@@ -140,11 +161,14 @@ struct xfs_btree_ops {
/*
* Difference between key2 and key1 -- positive if key1 > key2,
- * negative if key1 < key2, and zero if equal.
+ * negative if key1 < key2, and zero if equal. If the @mask parameter
+ * is non NULL, each key field to be used in the comparison must
+ * contain a nonzero value.
*/
int64_t (*diff_two_keys)(struct xfs_btree_cur *cur,
const union xfs_btree_key *key1,
- const union xfs_btree_key *key2);
+ const union xfs_btree_key *key2,
+ const union xfs_btree_key *mask);
const struct xfs_buf_ops *buf_ops;
@@ -157,6 +181,22 @@ struct xfs_btree_ops {
int (*recs_inorder)(struct xfs_btree_cur *cur,
const union xfs_btree_rec *r1,
const union xfs_btree_rec *r2);
+
+ /*
+ * Are these two btree keys immediately adjacent?
+ *
+ * Given two btree keys @key1 and @key2, decide if it is impossible for
+ * there to be a third btree key K satisfying the relationship
+ * @key1 < K < @key2. To determine if two btree records are
+ * immediately adjacent, @key1 should be the high key of the first
+ * record and @key2 should be the low key of the second record.
+ * If the @mask parameter is non NULL, each key field to be used in the
+ * comparison must contain a nonzero value.
+ */
+ enum xbtree_key_contig (*keys_contiguous)(struct xfs_btree_cur *cur,
+ const union xfs_btree_key *key1,
+ const union xfs_btree_key *key2,
+ const union xfs_btree_key *mask);
};
/*
@@ -540,12 +580,105 @@ void xfs_btree_get_keys(struct xfs_btree_cur *cur,
struct xfs_btree_block *block, union xfs_btree_key *key);
union xfs_btree_key *xfs_btree_high_key_from_key(struct xfs_btree_cur *cur,
union xfs_btree_key *key);
-int xfs_btree_has_record(struct xfs_btree_cur *cur,
+typedef bool (*xfs_btree_key_gap_fn)(struct xfs_btree_cur *cur,
+ const union xfs_btree_key *key1,
+ const union xfs_btree_key *key2);
+
+int xfs_btree_has_records(struct xfs_btree_cur *cur,
const union xfs_btree_irec *low,
- const union xfs_btree_irec *high, bool *exists);
+ const union xfs_btree_irec *high,
+ const union xfs_btree_key *mask,
+ enum xbtree_recpacking *outcome);
+
bool xfs_btree_has_more_records(struct xfs_btree_cur *cur);
struct xfs_ifork *xfs_btree_ifork_ptr(struct xfs_btree_cur *cur);
+/* Key comparison helpers */
+static inline bool
+xfs_btree_keycmp_lt(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_key *key1,
+ const union xfs_btree_key *key2)
+{
+ return cur->bc_ops->diff_two_keys(cur, key1, key2, NULL) < 0;
+}
+
+static inline bool
+xfs_btree_keycmp_gt(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_key *key1,
+ const union xfs_btree_key *key2)
+{
+ return cur->bc_ops->diff_two_keys(cur, key1, key2, NULL) > 0;
+}
+
+static inline bool
+xfs_btree_keycmp_eq(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_key *key1,
+ const union xfs_btree_key *key2)
+{
+ return cur->bc_ops->diff_two_keys(cur, key1, key2, NULL) == 0;
+}
+
+static inline bool
+xfs_btree_keycmp_le(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_key *key1,
+ const union xfs_btree_key *key2)
+{
+ return !xfs_btree_keycmp_gt(cur, key1, key2);
+}
+
+static inline bool
+xfs_btree_keycmp_ge(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_key *key1,
+ const union xfs_btree_key *key2)
+{
+ return !xfs_btree_keycmp_lt(cur, key1, key2);
+}
+
+static inline bool
+xfs_btree_keycmp_ne(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_key *key1,
+ const union xfs_btree_key *key2)
+{
+ return !xfs_btree_keycmp_eq(cur, key1, key2);
+}
+
+/* Masked key comparison helpers */
+static inline bool
+xfs_btree_masked_keycmp_lt(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_key *key1,
+ const union xfs_btree_key *key2,
+ const union xfs_btree_key *mask)
+{
+ return cur->bc_ops->diff_two_keys(cur, key1, key2, mask) < 0;
+}
+
+static inline bool
+xfs_btree_masked_keycmp_gt(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_key *key1,
+ const union xfs_btree_key *key2,
+ const union xfs_btree_key *mask)
+{
+ return cur->bc_ops->diff_two_keys(cur, key1, key2, mask) > 0;
+}
+
+static inline bool
+xfs_btree_masked_keycmp_ge(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_key *key1,
+ const union xfs_btree_key *key2,
+ const union xfs_btree_key *mask)
+{
+ return !xfs_btree_masked_keycmp_lt(cur, key1, key2, mask);
+}
+
/* Does this cursor point to the last block in the given level? */
static inline bool
xfs_btree_islastblock(
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 5a321b783398..bcfb6a4203cd 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -397,6 +397,7 @@ xfs_defer_cancel_list(
list_for_each_safe(pwi, n, &dfp->dfp_work) {
list_del(pwi);
dfp->dfp_count--;
+ trace_xfs_defer_cancel_item(mp, dfp, pwi);
ops->cancel_item(pwi);
}
ASSERT(dfp->dfp_count == 0);
@@ -476,6 +477,7 @@ xfs_defer_finish_one(
list_for_each_safe(li, n, &dfp->dfp_work) {
list_del(li);
dfp->dfp_count--;
+ trace_xfs_defer_finish_item(tp->t_mountp, dfp, li);
error = ops->finish_item(tp, dfp->dfp_done, li, &state);
if (error == -EAGAIN) {
int ret;
@@ -623,7 +625,7 @@ xfs_defer_add(
struct list_head *li)
{
struct xfs_defer_pending *dfp = NULL;
- const struct xfs_defer_op_type *ops;
+ const struct xfs_defer_op_type *ops = defer_op_types[type];
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
BUILD_BUG_ON(ARRAY_SIZE(defer_op_types) != XFS_DEFER_OPS_TYPE_MAX);
@@ -636,7 +638,6 @@ xfs_defer_add(
if (!list_empty(&tp->t_dfops)) {
dfp = list_last_entry(&tp->t_dfops,
struct xfs_defer_pending, dfp_list);
- ops = defer_op_types[dfp->dfp_type];
if (dfp->dfp_type != type ||
(ops->max_items && dfp->dfp_count >= ops->max_items))
dfp = NULL;
@@ -653,6 +654,7 @@ xfs_defer_add(
}
list_add_tail(li, &dfp->dfp_work);
+ trace_xfs_defer_add_item(tp->t_mountp, dfp, li);
dfp->dfp_count++;
}
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 92bac3373f1f..f5462fd582d5 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -64,7 +64,7 @@ xfs_ascii_ci_hashname(
int i;
for (i = 0, hash = 0; i < name->len; i++)
- hash = tolower(name->name[i]) ^ rol32(hash, 7);
+ hash = xfs_ascii_ci_xfrm(name->name[i]) ^ rol32(hash, 7);
return hash;
}
@@ -85,7 +85,8 @@ xfs_ascii_ci_compname(
for (i = 0; i < len; i++) {
if (args->name[i] == name[i])
continue;
- if (tolower(args->name[i]) != tolower(name[i]))
+ if (xfs_ascii_ci_xfrm(args->name[i]) !=
+ xfs_ascii_ci_xfrm(name[i]))
return XFS_CMP_DIFFERENT;
result = XFS_CMP_CASE;
}
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index dd39f17dd9a9..19af22a16c41 100644
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -248,4 +248,35 @@ unsigned int xfs_dir3_data_end_offset(struct xfs_da_geometry *geo,
struct xfs_dir2_data_hdr *hdr);
bool xfs_dir2_namecheck(const void *name, size_t length);
+/*
+ * The "ascii-ci" feature was created to speed up case-insensitive lookups for
+ * a Samba product. Because of the inherent problems with CI and UTF-8
+ * encoding, etc, it was decided that Samba would be configured to export
+ * latin1/iso 8859-1 encodings as that covered >90% of the target markets for
+ * the product. Hence the "ascii-ci" casefolding code could be encoded into
+ * the XFS directory operations and remove all the overhead of casefolding from
+ * Samba.
+ *
+ * To provide consistent hashing behavior between the userspace and kernel,
+ * these functions prepare names for hashing by transforming specific bytes
+ * to other bytes. Robustness with other encodings is not guaranteed.
+ */
+static inline bool xfs_ascii_ci_need_xfrm(unsigned char c)
+{
+ if (c >= 0x41 && c <= 0x5a) /* A-Z */
+ return true;
+ if (c >= 0xc0 && c <= 0xd6) /* latin A-O with accents */
+ return true;
+ if (c >= 0xd8 && c <= 0xde) /* latin O-Y with accents */
+ return true;
+ return false;
+}
+
+static inline unsigned char xfs_ascii_ci_xfrm(unsigned char c)
+{
+ if (xfs_ascii_ci_need_xfrm(c))
+ c -= 'A' - 'a';
+ return c;
+}
+
#endif /* __XFS_DIR2_H__ */
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 7ee292aecbeb..a16d5de16933 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -95,33 +95,25 @@ xfs_inobt_btrec_to_irec(
irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
}
-/*
- * Get the data from the pointed-to record.
- */
-int
-xfs_inobt_get_rec(
- struct xfs_btree_cur *cur,
- struct xfs_inobt_rec_incore *irec,
- int *stat)
+/* Simple checks for inode records. */
+xfs_failaddr_t
+xfs_inobt_check_irec(
+ struct xfs_btree_cur *cur,
+ const struct xfs_inobt_rec_incore *irec)
{
- struct xfs_mount *mp = cur->bc_mp;
- union xfs_btree_rec *rec;
- int error;
uint64_t realfree;
- error = xfs_btree_get_rec(cur, &rec, stat);
- if (error || *stat == 0)
- return error;
-
- xfs_inobt_btrec_to_irec(mp, rec, irec);
-
+ /* Record has to be properly aligned within the AG. */
if (!xfs_verify_agino(cur->bc_ag.pag, irec->ir_startino))
- goto out_bad_rec;
+ return __this_address;
+ if (!xfs_verify_agino(cur->bc_ag.pag,
+ irec->ir_startino + XFS_INODES_PER_CHUNK - 1))
+ return __this_address;
if (irec->ir_count < XFS_INODES_PER_HOLEMASK_BIT ||
irec->ir_count > XFS_INODES_PER_CHUNK)
- goto out_bad_rec;
+ return __this_address;
if (irec->ir_freecount > XFS_INODES_PER_CHUNK)
- goto out_bad_rec;
+ return __this_address;
/* if there are no holes, return the first available offset */
if (!xfs_inobt_issparse(irec->ir_holemask))
@@ -129,15 +121,23 @@ xfs_inobt_get_rec(
else
realfree = irec->ir_free & xfs_inobt_irec_to_allocmask(irec);
if (hweight64(realfree) != irec->ir_freecount)
- goto out_bad_rec;
+ return __this_address;
- return 0;
+ return NULL;
+}
+
+static inline int
+xfs_inobt_complain_bad_rec(
+ struct xfs_btree_cur *cur,
+ xfs_failaddr_t fa,
+ const struct xfs_inobt_rec_incore *irec)
+{
+ struct xfs_mount *mp = cur->bc_mp;
-out_bad_rec:
xfs_warn(mp,
- "%s Inode BTree record corruption in AG %d detected!",
+ "%s Inode BTree record corruption in AG %d detected at %pS!",
cur->bc_btnum == XFS_BTNUM_INO ? "Used" : "Free",
- cur->bc_ag.pag->pag_agno);
+ cur->bc_ag.pag->pag_agno, fa);
xfs_warn(mp,
"start inode 0x%x, count 0x%x, free 0x%x freemask 0x%llx, holemask 0x%x",
irec->ir_startino, irec->ir_count, irec->ir_freecount,
@@ -146,6 +146,32 @@ out_bad_rec:
}
/*
+ * Get the data from the pointed-to record.
+ */
+int
+xfs_inobt_get_rec(
+ struct xfs_btree_cur *cur,
+ struct xfs_inobt_rec_incore *irec,
+ int *stat)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+ union xfs_btree_rec *rec;
+ xfs_failaddr_t fa;
+ int error;
+
+ error = xfs_btree_get_rec(cur, &rec, stat);
+ if (error || *stat == 0)
+ return error;
+
+ xfs_inobt_btrec_to_irec(mp, rec, irec);
+ fa = xfs_inobt_check_irec(cur, irec);
+ if (fa)
+ return xfs_inobt_complain_bad_rec(cur, fa, irec);
+
+ return 0;
+}
+
+/*
* Insert a single inobt record. Cursor must already point to desired location.
*/
int
@@ -1952,8 +1978,6 @@ xfs_difree_inobt(
*/
if (!xfs_has_ikeep(mp) && rec.ir_free == XFS_INOBT_ALL_FREE &&
mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
- struct xfs_perag *pag = agbp->b_pag;
-
xic->deleted = true;
xic->first_ino = XFS_AGINO_TO_INO(mp, pag->pag_agno,
rec.ir_startino);
@@ -2617,44 +2641,50 @@ xfs_ialloc_read_agi(
return 0;
}
-/* Is there an inode record covering a given range of inode numbers? */
-int
-xfs_ialloc_has_inode_record(
- struct xfs_btree_cur *cur,
- xfs_agino_t low,
- xfs_agino_t high,
- bool *exists)
+/* How many inodes are backed by inode clusters ondisk? */
+STATIC int
+xfs_ialloc_count_ondisk(
+ struct xfs_btree_cur *cur,
+ xfs_agino_t low,
+ xfs_agino_t high,
+ unsigned int *allocated)
{
struct xfs_inobt_rec_incore irec;
- xfs_agino_t agino;
- uint16_t holemask;
- int has_record;
- int i;
- int error;
+ unsigned int ret = 0;
+ int has_record;
+ int error;
- *exists = false;
error = xfs_inobt_lookup(cur, low, XFS_LOOKUP_LE, &has_record);
- while (error == 0 && has_record) {
+ if (error)
+ return error;
+
+ while (has_record) {
+ unsigned int i, hole_idx;
+
error = xfs_inobt_get_rec(cur, &irec, &has_record);
- if (error || irec.ir_startino > high)
+ if (error)
+ return error;
+ if (irec.ir_startino > high)
break;
- agino = irec.ir_startino;
- holemask = irec.ir_holemask;
- for (i = 0; i < XFS_INOBT_HOLEMASK_BITS; holemask >>= 1,
- i++, agino += XFS_INODES_PER_HOLEMASK_BIT) {
- if (holemask & 1)
+ for (i = 0; i < XFS_INODES_PER_CHUNK; i++) {
+ if (irec.ir_startino + i < low)
continue;
- if (agino + XFS_INODES_PER_HOLEMASK_BIT > low &&
- agino <= high) {
- *exists = true;
- return 0;
- }
+ if (irec.ir_startino + i > high)
+ break;
+
+ hole_idx = i / XFS_INODES_PER_HOLEMASK_BIT;
+ if (!(irec.ir_holemask & (1U << hole_idx)))
+ ret++;
}
error = xfs_btree_increment(cur, 0, &has_record);
+ if (error)
+ return error;
}
- return error;
+
+ *allocated = ret;
+ return 0;
}
/* Is there an inode record covering a given extent? */
@@ -2663,15 +2693,27 @@ xfs_ialloc_has_inodes_at_extent(
struct xfs_btree_cur *cur,
xfs_agblock_t bno,
xfs_extlen_t len,
- bool *exists)
+ enum xbtree_recpacking *outcome)
{
- xfs_agino_t low;
- xfs_agino_t high;
+ xfs_agino_t agino;
+ xfs_agino_t last_agino;
+ unsigned int allocated;
+ int error;
+
+ agino = XFS_AGB_TO_AGINO(cur->bc_mp, bno);
+ last_agino = XFS_AGB_TO_AGINO(cur->bc_mp, bno + len) - 1;
- low = XFS_AGB_TO_AGINO(cur->bc_mp, bno);
- high = XFS_AGB_TO_AGINO(cur->bc_mp, bno + len) - 1;
+ error = xfs_ialloc_count_ondisk(cur, agino, last_agino, &allocated);
+ if (error)
+ return error;
- return xfs_ialloc_has_inode_record(cur, low, high, exists);
+ if (allocated == 0)
+ *outcome = XBTREE_RECPACKING_EMPTY;
+ else if (allocated == last_agino - agino + 1)
+ *outcome = XBTREE_RECPACKING_FULL;
+ else
+ *outcome = XBTREE_RECPACKING_SPARSE;
+ return 0;
}
struct xfs_ialloc_count_inodes {
@@ -2688,8 +2730,13 @@ xfs_ialloc_count_inodes_rec(
{
struct xfs_inobt_rec_incore irec;
struct xfs_ialloc_count_inodes *ci = priv;
+ xfs_failaddr_t fa;
xfs_inobt_btrec_to_irec(cur->bc_mp, rec, &irec);
+ fa = xfs_inobt_check_irec(cur, &irec);
+ if (fa)
+ return xfs_inobt_complain_bad_rec(cur, fa, &irec);
+
ci->count += irec.ir_count;
ci->freecount += irec.ir_freecount;
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index ab8c30b4ec22..fe824bb04a09 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -93,10 +93,11 @@ union xfs_btree_rec;
void xfs_inobt_btrec_to_irec(struct xfs_mount *mp,
const union xfs_btree_rec *rec,
struct xfs_inobt_rec_incore *irec);
+xfs_failaddr_t xfs_inobt_check_irec(struct xfs_btree_cur *cur,
+ const struct xfs_inobt_rec_incore *irec);
int xfs_ialloc_has_inodes_at_extent(struct xfs_btree_cur *cur,
- xfs_agblock_t bno, xfs_extlen_t len, bool *exists);
-int xfs_ialloc_has_inode_record(struct xfs_btree_cur *cur, xfs_agino_t low,
- xfs_agino_t high, bool *exists);
+ xfs_agblock_t bno, xfs_extlen_t len,
+ enum xbtree_recpacking *outcome);
int xfs_ialloc_count_inodes(struct xfs_btree_cur *cur, xfs_agino_t *count,
xfs_agino_t *freecount);
int xfs_inobt_insert_rec(struct xfs_btree_cur *cur, uint16_t holemask,
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 9b28211d5a4c..5a945ae21b5d 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -156,9 +156,12 @@ __xfs_inobt_free_block(
struct xfs_buf *bp,
enum xfs_ag_resv_type resv)
{
+ xfs_fsblock_t fsbno;
+
xfs_inobt_mod_blockcount(cur, -1);
- return xfs_free_extent(cur->bc_tp,
- XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)), 1,
+ fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
+ return xfs_free_extent(cur->bc_tp, cur->bc_ag.pag,
+ XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1,
&XFS_RMAP_OINFO_INOBT, resv);
}
@@ -266,10 +269,13 @@ STATIC int64_t
xfs_inobt_diff_two_keys(
struct xfs_btree_cur *cur,
const union xfs_btree_key *k1,
- const union xfs_btree_key *k2)
+ const union xfs_btree_key *k2,
+ const union xfs_btree_key *mask)
{
+ ASSERT(!mask || mask->inobt.ir_startino);
+
return (int64_t)be32_to_cpu(k1->inobt.ir_startino) -
- be32_to_cpu(k2->inobt.ir_startino);
+ be32_to_cpu(k2->inobt.ir_startino);
}
static xfs_failaddr_t
@@ -380,6 +386,19 @@ xfs_inobt_recs_inorder(
be32_to_cpu(r2->inobt.ir_startino);
}
+STATIC enum xbtree_key_contig
+xfs_inobt_keys_contiguous(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_key *key1,
+ const union xfs_btree_key *key2,
+ const union xfs_btree_key *mask)
+{
+ ASSERT(!mask || mask->inobt.ir_startino);
+
+ return xbtree_key_contig(be32_to_cpu(key1->inobt.ir_startino),
+ be32_to_cpu(key2->inobt.ir_startino));
+}
+
static const struct xfs_btree_ops xfs_inobt_ops = {
.rec_len = sizeof(xfs_inobt_rec_t),
.key_len = sizeof(xfs_inobt_key_t),
@@ -399,6 +418,7 @@ static const struct xfs_btree_ops xfs_inobt_ops = {
.diff_two_keys = xfs_inobt_diff_two_keys,
.keys_inorder = xfs_inobt_keys_inorder,
.recs_inorder = xfs_inobt_recs_inorder,
+ .keys_contiguous = xfs_inobt_keys_contiguous,
};
static const struct xfs_btree_ops xfs_finobt_ops = {
@@ -420,6 +440,7 @@ static const struct xfs_btree_ops xfs_finobt_ops = {
.diff_two_keys = xfs_inobt_diff_two_keys,
.keys_inorder = xfs_inobt_keys_inorder,
.recs_inorder = xfs_inobt_recs_inorder,
+ .keys_contiguous = xfs_inobt_keys_contiguous,
};
/*
@@ -447,9 +468,7 @@ xfs_inobt_init_common(
if (xfs_has_crc(mp))
cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
- /* take a reference for the cursor */
- atomic_inc(&pag->pag_ref);
- cur->bc_ag.pag = pag;
+ cur->bc_ag.pag = xfs_perag_hold(pag);
return cur;
}
@@ -607,7 +626,7 @@ xfs_iallocbt_maxlevels_ondisk(void)
*/
uint64_t
xfs_inobt_irec_to_allocmask(
- struct xfs_inobt_rec_incore *rec)
+ const struct xfs_inobt_rec_incore *rec)
{
uint64_t bitmap = 0;
uint64_t inodespbit;
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h
index e859a6e05230..3262c3fe5ebe 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.h
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.h
@@ -53,7 +53,7 @@ struct xfs_btree_cur *xfs_inobt_stage_cursor(struct xfs_perag *pag,
extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int);
/* ir_holemask to inode allocation bitmap conversion */
-uint64_t xfs_inobt_irec_to_allocmask(struct xfs_inobt_rec_incore *);
+uint64_t xfs_inobt_irec_to_allocmask(const struct xfs_inobt_rec_incore *irec);
#if defined(DEBUG) || defined(XFS_WARN)
int xfs_inobt_rec_check_count(struct xfs_mount *,
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 6b21760184d9..5a2e7ddfa76d 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -140,7 +140,8 @@ xfs_iformat_extents(
xfs_inode_verifier_error(ip, -EFSCORRUPTED,
"xfs_iformat_extents(2)",
dp, sizeof(*dp), fa);
- return -EFSCORRUPTED;
+ return xfs_bmap_complain_bad_rec(ip, whichfork,
+ fa, &new);
}
xfs_iext_insert(ip, &icur, &new, state);
@@ -226,10 +227,15 @@ xfs_iformat_data_fork(
/*
* Initialize the extent count early, as the per-format routines may
- * depend on it.
+ * depend on it. Use release semantics to set needextents /after/ we
+ * set the format. This ensures that we can use acquire semantics on
+ * needextents in xfs_need_iread_extents() and be guaranteed to see a
+ * valid format value after that load.
*/
ip->i_df.if_format = dip->di_format;
ip->i_df.if_nextents = xfs_dfork_data_extents(dip);
+ smp_store_release(&ip->i_df.if_needextents,
+ ip->i_df.if_format == XFS_DINODE_FMT_BTREE ? 1 : 0);
switch (inode->i_mode & S_IFMT) {
case S_IFIFO:
@@ -282,8 +288,17 @@ xfs_ifork_init_attr(
enum xfs_dinode_fmt format,
xfs_extnum_t nextents)
{
+ /*
+ * Initialize the extent count early, as the per-format routines may
+ * depend on it. Use release semantics to set needextents /after/ we
+ * set the format. This ensures that we can use acquire semantics on
+ * needextents in xfs_need_iread_extents() and be guaranteed to see a
+ * valid format value after that load.
+ */
ip->i_af.if_format = format;
ip->i_af.if_nextents = nextents;
+ smp_store_release(&ip->i_af.if_needextents,
+ ip->i_af.if_format == XFS_DINODE_FMT_BTREE ? 1 : 0);
}
void
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index d3943d6ad0b9..96d307784c85 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -24,6 +24,7 @@ struct xfs_ifork {
xfs_extnum_t if_nextents; /* # of extents in this fork */
short if_broot_bytes; /* bytes allocated for root */
int8_t if_format; /* format of this fork */
+ uint8_t if_needextents; /* extents have not been read */
};
/*
@@ -260,9 +261,10 @@ int xfs_iext_count_upgrade(struct xfs_trans *tp, struct xfs_inode *ip,
uint nr_to_add);
/* returns true if the fork has extents but they are not read in yet. */
-static inline bool xfs_need_iread_extents(struct xfs_ifork *ifp)
+static inline bool xfs_need_iread_extents(const struct xfs_ifork *ifp)
{
- return ifp->if_format == XFS_DINODE_FMT_BTREE && ifp->if_height == 0;
+ /* see xfs_iformat_{data,attr}_fork() for needextents semantics */
+ return smp_load_acquire(&ifp->if_needextents) != 0;
}
#endif /* __XFS_INODE_FORK_H__ */
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index bcf46aa0d08b..c1c65774dcc2 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -120,45 +120,41 @@ xfs_refcount_btrec_to_irec(
irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount);
}
-/*
- * Get the data from the pointed-to record.
- */
-int
-xfs_refcount_get_rec(
+/* Simple checks for refcount records. */
+xfs_failaddr_t
+xfs_refcount_check_irec(
struct xfs_btree_cur *cur,
- struct xfs_refcount_irec *irec,
- int *stat)
+ const struct xfs_refcount_irec *irec)
{
- struct xfs_mount *mp = cur->bc_mp;
struct xfs_perag *pag = cur->bc_ag.pag;
- union xfs_btree_rec *rec;
- int error;
-
- error = xfs_btree_get_rec(cur, &rec, stat);
- if (error || !*stat)
- return error;
- xfs_refcount_btrec_to_irec(rec, irec);
if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN)
- goto out_bad_rec;
+ return __this_address;
if (!xfs_refcount_check_domain(irec))
- goto out_bad_rec;
+ return __this_address;
/* check for valid extent range, including overflow */
if (!xfs_verify_agbext(pag, irec->rc_startblock, irec->rc_blockcount))
- goto out_bad_rec;
+ return __this_address;
if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT)
- goto out_bad_rec;
+ return __this_address;
- trace_xfs_refcount_get(cur->bc_mp, pag->pag_agno, irec);
- return 0;
+ return NULL;
+}
+
+static inline int
+xfs_refcount_complain_bad_rec(
+ struct xfs_btree_cur *cur,
+ xfs_failaddr_t fa,
+ const struct xfs_refcount_irec *irec)
+{
+ struct xfs_mount *mp = cur->bc_mp;
-out_bad_rec:
xfs_warn(mp,
- "Refcount BTree record corruption in AG %d detected!",
- pag->pag_agno);
+ "Refcount BTree record corruption in AG %d detected at %pS!",
+ cur->bc_ag.pag->pag_agno, fa);
xfs_warn(mp,
"Start block 0x%x, block count 0x%x, references 0x%x",
irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount);
@@ -166,6 +162,32 @@ out_bad_rec:
}
/*
+ * Get the data from the pointed-to record.
+ */
+int
+xfs_refcount_get_rec(
+ struct xfs_btree_cur *cur,
+ struct xfs_refcount_irec *irec,
+ int *stat)
+{
+ union xfs_btree_rec *rec;
+ xfs_failaddr_t fa;
+ int error;
+
+ error = xfs_btree_get_rec(cur, &rec, stat);
+ if (error || !*stat)
+ return error;
+
+ xfs_refcount_btrec_to_irec(rec, irec);
+ fa = xfs_refcount_check_irec(cur, irec);
+ if (fa)
+ return xfs_refcount_complain_bad_rec(cur, fa, irec);
+
+ trace_xfs_refcount_get(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec);
+ return 0;
+}
+
+/*
* Update the record referred to by cur to the value given
* by [bno, len, refcount].
* This either works (return 0) or gets an EFSCORRUPTED error.
@@ -1332,26 +1354,22 @@ xfs_refcount_finish_one(
xfs_agblock_t bno;
unsigned long nr_ops = 0;
int shape_changes = 0;
- struct xfs_perag *pag;
- pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ri->ri_startblock));
bno = XFS_FSB_TO_AGBNO(mp, ri->ri_startblock);
trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, ri->ri_startblock),
ri->ri_type, XFS_FSB_TO_AGBNO(mp, ri->ri_startblock),
ri->ri_blockcount);
- if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE)) {
- error = -EIO;
- goto out_drop;
- }
+ if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE))
+ return -EIO;
/*
* If we haven't gotten a cursor or the cursor AG doesn't match
* the startblock, get one now.
*/
rcur = *pcur;
- if (rcur != NULL && rcur->bc_ag.pag != pag) {
+ if (rcur != NULL && rcur->bc_ag.pag != ri->ri_pag) {
nr_ops = rcur->bc_ag.refc.nr_ops;
shape_changes = rcur->bc_ag.refc.shape_changes;
xfs_refcount_finish_one_cleanup(tp, rcur, 0);
@@ -1359,12 +1377,12 @@ xfs_refcount_finish_one(
*pcur = NULL;
}
if (rcur == NULL) {
- error = xfs_alloc_read_agf(pag, tp, XFS_ALLOC_FLAG_FREEING,
- &agbp);
+ error = xfs_alloc_read_agf(ri->ri_pag, tp,
+ XFS_ALLOC_FLAG_FREEING, &agbp);
if (error)
- goto out_drop;
+ return error;
- rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag);
+ rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, ri->ri_pag);
rcur->bc_ag.refc.nr_ops = nr_ops;
rcur->bc_ag.refc.shape_changes = shape_changes;
}
@@ -1375,7 +1393,7 @@ xfs_refcount_finish_one(
error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount,
XFS_REFCOUNT_ADJUST_INCREASE);
if (error)
- goto out_drop;
+ return error;
if (ri->ri_blockcount > 0)
error = xfs_refcount_continue_op(rcur, ri, bno);
break;
@@ -1383,31 +1401,29 @@ xfs_refcount_finish_one(
error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount,
XFS_REFCOUNT_ADJUST_DECREASE);
if (error)
- goto out_drop;
+ return error;
if (ri->ri_blockcount > 0)
error = xfs_refcount_continue_op(rcur, ri, bno);
break;
case XFS_REFCOUNT_ALLOC_COW:
error = __xfs_refcount_cow_alloc(rcur, bno, ri->ri_blockcount);
if (error)
- goto out_drop;
+ return error;
ri->ri_blockcount = 0;
break;
case XFS_REFCOUNT_FREE_COW:
error = __xfs_refcount_cow_free(rcur, bno, ri->ri_blockcount);
if (error)
- goto out_drop;
+ return error;
ri->ri_blockcount = 0;
break;
default:
ASSERT(0);
- error = -EFSCORRUPTED;
+ return -EFSCORRUPTED;
}
if (!error && ri->ri_blockcount > 0)
- trace_xfs_refcount_finish_one_leftover(mp, pag->pag_agno,
+ trace_xfs_refcount_finish_one_leftover(mp, ri->ri_pag->pag_agno,
ri->ri_type, bno, ri->ri_blockcount);
-out_drop:
- xfs_perag_put(pag);
return error;
}
@@ -1435,6 +1451,7 @@ __xfs_refcount_add(
ri->ri_startblock = startblock;
ri->ri_blockcount = blockcount;
+ xfs_refcount_update_get_group(tp->t_mountp, ri);
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list);
}
@@ -1876,7 +1893,8 @@ xfs_refcount_recover_extent(
INIT_LIST_HEAD(&rr->rr_list);
xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec);
- if (XFS_IS_CORRUPT(cur->bc_mp,
+ if (xfs_refcount_check_irec(cur, &rr->rr_rrec) != NULL ||
+ XFS_IS_CORRUPT(cur->bc_mp,
rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) {
kfree(rr);
return -EFSCORRUPTED;
@@ -1980,14 +1998,17 @@ out_free:
return error;
}
-/* Is there a record covering a given extent? */
+/*
+ * Scan part of the keyspace of the refcount records and tell us if the area
+ * has no records, is fully mapped by records, or is partially filled.
+ */
int
-xfs_refcount_has_record(
+xfs_refcount_has_records(
struct xfs_btree_cur *cur,
enum xfs_refc_domain domain,
xfs_agblock_t bno,
xfs_extlen_t len,
- bool *exists)
+ enum xbtree_recpacking *outcome)
{
union xfs_btree_irec low;
union xfs_btree_irec high;
@@ -1998,7 +2019,7 @@ xfs_refcount_has_record(
high.rc.rc_startblock = bno + len - 1;
low.rc.rc_domain = high.rc.rc_domain = domain;
- return xfs_btree_has_record(cur, &low, &high, exists);
+ return xfs_btree_has_records(cur, &low, &high, NULL, outcome);
}
int __init
diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h
index c633477ce3ce..783cd89ca195 100644
--- a/fs/xfs/libxfs/xfs_refcount.h
+++ b/fs/xfs/libxfs/xfs_refcount.h
@@ -50,6 +50,7 @@ enum xfs_refcount_intent_type {
struct xfs_refcount_intent {
struct list_head ri_list;
+ struct xfs_perag *ri_pag;
enum xfs_refcount_intent_type ri_type;
xfs_extlen_t ri_blockcount;
xfs_fsblock_t ri_startblock;
@@ -67,6 +68,9 @@ xfs_refcount_check_domain(
return true;
}
+void xfs_refcount_update_get_group(struct xfs_mount *mp,
+ struct xfs_refcount_intent *ri);
+
void xfs_refcount_increase_extent(struct xfs_trans *tp,
struct xfs_bmbt_irec *irec);
void xfs_refcount_decrease_extent(struct xfs_trans *tp,
@@ -107,12 +111,14 @@ extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp,
*/
#define XFS_REFCOUNT_ITEM_OVERHEAD 32
-extern int xfs_refcount_has_record(struct xfs_btree_cur *cur,
+extern int xfs_refcount_has_records(struct xfs_btree_cur *cur,
enum xfs_refc_domain domain, xfs_agblock_t bno,
- xfs_extlen_t len, bool *exists);
+ xfs_extlen_t len, enum xbtree_recpacking *outcome);
union xfs_btree_rec;
extern void xfs_refcount_btrec_to_irec(const union xfs_btree_rec *rec,
struct xfs_refcount_irec *irec);
+xfs_failaddr_t xfs_refcount_check_irec(struct xfs_btree_cur *cur,
+ const struct xfs_refcount_irec *irec);
extern int xfs_refcount_insert(struct xfs_btree_cur *cur,
struct xfs_refcount_irec *irec, int *stat);
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index f3b860970b26..d4afc5f4e6a5 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -112,8 +112,9 @@ xfs_refcountbt_free_block(
XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1);
be32_add_cpu(&agf->agf_refcount_blocks, -1);
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS);
- error = xfs_free_extent(cur->bc_tp, fsbno, 1, &XFS_RMAP_OINFO_REFC,
- XFS_AG_RESV_METADATA);
+ error = xfs_free_extent(cur->bc_tp, cur->bc_ag.pag,
+ XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1,
+ &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA);
if (error)
return error;
@@ -201,10 +202,13 @@ STATIC int64_t
xfs_refcountbt_diff_two_keys(
struct xfs_btree_cur *cur,
const union xfs_btree_key *k1,
- const union xfs_btree_key *k2)
+ const union xfs_btree_key *k2,
+ const union xfs_btree_key *mask)
{
+ ASSERT(!mask || mask->refc.rc_startblock);
+
return (int64_t)be32_to_cpu(k1->refc.rc_startblock) -
- be32_to_cpu(k2->refc.rc_startblock);
+ be32_to_cpu(k2->refc.rc_startblock);
}
STATIC xfs_failaddr_t
@@ -299,6 +303,19 @@ xfs_refcountbt_recs_inorder(
be32_to_cpu(r2->refc.rc_startblock);
}
+STATIC enum xbtree_key_contig
+xfs_refcountbt_keys_contiguous(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_key *key1,
+ const union xfs_btree_key *key2,
+ const union xfs_btree_key *mask)
+{
+ ASSERT(!mask || mask->refc.rc_startblock);
+
+ return xbtree_key_contig(be32_to_cpu(key1->refc.rc_startblock),
+ be32_to_cpu(key2->refc.rc_startblock));
+}
+
static const struct xfs_btree_ops xfs_refcountbt_ops = {
.rec_len = sizeof(struct xfs_refcount_rec),
.key_len = sizeof(struct xfs_refcount_key),
@@ -318,6 +335,7 @@ static const struct xfs_btree_ops xfs_refcountbt_ops = {
.diff_two_keys = xfs_refcountbt_diff_two_keys,
.keys_inorder = xfs_refcountbt_keys_inorder,
.recs_inorder = xfs_refcountbt_recs_inorder,
+ .keys_contiguous = xfs_refcountbt_keys_contiguous,
};
/*
@@ -339,10 +357,7 @@ xfs_refcountbt_init_common(
cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
- /* take a reference for the cursor */
- atomic_inc(&pag->pag_ref);
- cur->bc_ag.pag = pag;
-
+ cur->bc_ag.pag = xfs_perag_hold(pag);
cur->bc_ag.refc.nr_ops = 0;
cur->bc_ag.refc.shape_changes = 0;
cur->bc_ops = &xfs_refcountbt_ops;
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index df720041cd3d..f4dc23b3b837 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -193,7 +193,7 @@ done:
}
/* Convert an internal btree record to an rmap record. */
-int
+xfs_failaddr_t
xfs_rmap_btrec_to_irec(
const union xfs_btree_rec *rec,
struct xfs_rmap_irec *irec)
@@ -205,51 +205,74 @@ xfs_rmap_btrec_to_irec(
irec);
}
-/*
- * Get the data from the pointed-to record.
- */
-int
-xfs_rmap_get_rec(
- struct xfs_btree_cur *cur,
- struct xfs_rmap_irec *irec,
- int *stat)
+/* Simple checks for rmap records. */
+xfs_failaddr_t
+xfs_rmap_check_irec(
+ struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *irec)
{
- struct xfs_mount *mp = cur->bc_mp;
- struct xfs_perag *pag = cur->bc_ag.pag;
- union xfs_btree_rec *rec;
- int error;
-
- error = xfs_btree_get_rec(cur, &rec, stat);
- if (error || !*stat)
- return error;
-
- if (xfs_rmap_btrec_to_irec(rec, irec))
- goto out_bad_rec;
+ struct xfs_mount *mp = cur->bc_mp;
+ bool is_inode;
+ bool is_unwritten;
+ bool is_bmbt;
+ bool is_attr;
if (irec->rm_blockcount == 0)
- goto out_bad_rec;
+ return __this_address;
if (irec->rm_startblock <= XFS_AGFL_BLOCK(mp)) {
if (irec->rm_owner != XFS_RMAP_OWN_FS)
- goto out_bad_rec;
+ return __this_address;
if (irec->rm_blockcount != XFS_AGFL_BLOCK(mp) + 1)
- goto out_bad_rec;
+ return __this_address;
} else {
/* check for valid extent range, including overflow */
- if (!xfs_verify_agbext(pag, irec->rm_startblock,
- irec->rm_blockcount))
- goto out_bad_rec;
+ if (!xfs_verify_agbext(cur->bc_ag.pag, irec->rm_startblock,
+ irec->rm_blockcount))
+ return __this_address;
}
if (!(xfs_verify_ino(mp, irec->rm_owner) ||
(irec->rm_owner <= XFS_RMAP_OWN_FS &&
irec->rm_owner >= XFS_RMAP_OWN_MIN)))
- goto out_bad_rec;
+ return __this_address;
+
+ /* Check flags. */
+ is_inode = !XFS_RMAP_NON_INODE_OWNER(irec->rm_owner);
+ is_bmbt = irec->rm_flags & XFS_RMAP_BMBT_BLOCK;
+ is_attr = irec->rm_flags & XFS_RMAP_ATTR_FORK;
+ is_unwritten = irec->rm_flags & XFS_RMAP_UNWRITTEN;
+
+ if (is_bmbt && irec->rm_offset != 0)
+ return __this_address;
+
+ if (!is_inode && irec->rm_offset != 0)
+ return __this_address;
+
+ if (is_unwritten && (is_bmbt || !is_inode || is_attr))
+ return __this_address;
+
+ if (!is_inode && (is_bmbt || is_unwritten || is_attr))
+ return __this_address;
+
+ /* Check for a valid fork offset, if applicable. */
+ if (is_inode && !is_bmbt &&
+ !xfs_verify_fileext(mp, irec->rm_offset, irec->rm_blockcount))
+ return __this_address;
+
+ return NULL;
+}
+
+static inline int
+xfs_rmap_complain_bad_rec(
+ struct xfs_btree_cur *cur,
+ xfs_failaddr_t fa,
+ const struct xfs_rmap_irec *irec)
+{
+ struct xfs_mount *mp = cur->bc_mp;
- return 0;
-out_bad_rec:
xfs_warn(mp,
- "Reverse Mapping BTree record corruption in AG %d detected!",
- pag->pag_agno);
+ "Reverse Mapping BTree record corruption in AG %d detected at %pS!",
+ cur->bc_ag.pag->pag_agno, fa);
xfs_warn(mp,
"Owner 0x%llx, flags 0x%x, start block 0x%x block count 0x%x",
irec->rm_owner, irec->rm_flags, irec->rm_startblock,
@@ -257,6 +280,32 @@ out_bad_rec:
return -EFSCORRUPTED;
}
+/*
+ * Get the data from the pointed-to record.
+ */
+int
+xfs_rmap_get_rec(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *irec,
+ int *stat)
+{
+ union xfs_btree_rec *rec;
+ xfs_failaddr_t fa;
+ int error;
+
+ error = xfs_btree_get_rec(cur, &rec, stat);
+ if (error || !*stat)
+ return error;
+
+ fa = xfs_rmap_btrec_to_irec(rec, irec);
+ if (!fa)
+ fa = xfs_rmap_check_irec(cur, irec);
+ if (fa)
+ return xfs_rmap_complain_bad_rec(cur, fa, irec);
+
+ return 0;
+}
+
struct xfs_find_left_neighbor_info {
struct xfs_rmap_irec high;
struct xfs_rmap_irec *irec;
@@ -2320,11 +2369,14 @@ xfs_rmap_query_range_helper(
{
struct xfs_rmap_query_range_info *query = priv;
struct xfs_rmap_irec irec;
- int error;
+ xfs_failaddr_t fa;
+
+ fa = xfs_rmap_btrec_to_irec(rec, &irec);
+ if (!fa)
+ fa = xfs_rmap_check_irec(cur, &irec);
+ if (fa)
+ return xfs_rmap_complain_bad_rec(cur, fa, &irec);
- error = xfs_rmap_btrec_to_irec(rec, &irec);
- if (error)
- return error;
return query->fn(cur, &irec, query->priv);
}
@@ -2394,7 +2446,6 @@ xfs_rmap_finish_one(
struct xfs_btree_cur **pcur)
{
struct xfs_mount *mp = tp->t_mountp;
- struct xfs_perag *pag;
struct xfs_btree_cur *rcur;
struct xfs_buf *agbp = NULL;
int error = 0;
@@ -2402,26 +2453,22 @@ xfs_rmap_finish_one(
xfs_agblock_t bno;
bool unwritten;
- pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ri->ri_bmap.br_startblock));
bno = XFS_FSB_TO_AGBNO(mp, ri->ri_bmap.br_startblock);
- trace_xfs_rmap_deferred(mp, pag->pag_agno, ri->ri_type, bno,
+ trace_xfs_rmap_deferred(mp, ri->ri_pag->pag_agno, ri->ri_type, bno,
ri->ri_owner, ri->ri_whichfork,
ri->ri_bmap.br_startoff, ri->ri_bmap.br_blockcount,
ri->ri_bmap.br_state);
- if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_RMAP_FINISH_ONE)) {
- error = -EIO;
- goto out_drop;
- }
-
+ if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_RMAP_FINISH_ONE))
+ return -EIO;
/*
* If we haven't gotten a cursor or the cursor AG doesn't match
* the startblock, get one now.
*/
rcur = *pcur;
- if (rcur != NULL && rcur->bc_ag.pag != pag) {
+ if (rcur != NULL && rcur->bc_ag.pag != ri->ri_pag) {
xfs_rmap_finish_one_cleanup(tp, rcur, 0);
rcur = NULL;
*pcur = NULL;
@@ -2432,15 +2479,13 @@ xfs_rmap_finish_one(
* rmapbt, because a shape change could cause us to
* allocate blocks.
*/
- error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
+ error = xfs_free_extent_fix_freelist(tp, ri->ri_pag, &agbp);
if (error)
- goto out_drop;
- if (XFS_IS_CORRUPT(tp->t_mountp, !agbp)) {
- error = -EFSCORRUPTED;
- goto out_drop;
- }
+ return error;
+ if (XFS_IS_CORRUPT(tp->t_mountp, !agbp))
+ return -EFSCORRUPTED;
- rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag);
+ rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, ri->ri_pag);
}
*pcur = rcur;
@@ -2480,8 +2525,7 @@ xfs_rmap_finish_one(
ASSERT(0);
error = -EFSCORRUPTED;
}
-out_drop:
- xfs_perag_put(pag);
+
return error;
}
@@ -2526,6 +2570,7 @@ __xfs_rmap_add(
ri->ri_whichfork = whichfork;
ri->ri_bmap = *bmap;
+ xfs_rmap_update_get_group(tp->t_mountp, ri);
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_RMAP, &ri->ri_list);
}
@@ -2664,14 +2709,21 @@ xfs_rmap_compare(
return 0;
}
-/* Is there a record covering a given extent? */
+/*
+ * Scan the physical storage part of the keyspace of the reverse mapping index
+ * and tell us if the area has no records, is fully mapped by records, or is
+ * partially filled.
+ */
int
-xfs_rmap_has_record(
+xfs_rmap_has_records(
struct xfs_btree_cur *cur,
xfs_agblock_t bno,
xfs_extlen_t len,
- bool *exists)
+ enum xbtree_recpacking *outcome)
{
+ union xfs_btree_key mask = {
+ .rmap.rm_startblock = cpu_to_be32(-1U),
+ };
union xfs_btree_irec low;
union xfs_btree_irec high;
@@ -2680,68 +2732,144 @@ xfs_rmap_has_record(
memset(&high, 0xFF, sizeof(high));
high.r.rm_startblock = bno + len - 1;
- return xfs_btree_has_record(cur, &low, &high, exists);
+ return xfs_btree_has_records(cur, &low, &high, &mask, outcome);
}
-/*
- * Is there a record for this owner completely covering a given physical
- * extent? If so, *has_rmap will be set to true. If there is no record
- * or the record only covers part of the range, we set *has_rmap to false.
- * This function doesn't perform range lookups or offset checks, so it is
- * not suitable for checking data fork blocks.
- */
-int
-xfs_rmap_record_exists(
- struct xfs_btree_cur *cur,
+struct xfs_rmap_ownercount {
+ /* Owner that we're looking for. */
+ struct xfs_rmap_irec good;
+
+ /* rmap search keys */
+ struct xfs_rmap_irec low;
+ struct xfs_rmap_irec high;
+
+ struct xfs_rmap_matches *results;
+
+ /* Stop early if we find a nonmatch? */
+ bool stop_on_nonmatch;
+};
+
+/* Does this rmap represent space that can have multiple owners? */
+static inline bool
+xfs_rmap_shareable(
+ struct xfs_mount *mp,
+ const struct xfs_rmap_irec *rmap)
+{
+ if (!xfs_has_reflink(mp))
+ return false;
+ if (XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
+ return false;
+ if (rmap->rm_flags & (XFS_RMAP_ATTR_FORK |
+ XFS_RMAP_BMBT_BLOCK))
+ return false;
+ return true;
+}
+
+static inline void
+xfs_rmap_ownercount_init(
+ struct xfs_rmap_ownercount *roc,
xfs_agblock_t bno,
xfs_extlen_t len,
const struct xfs_owner_info *oinfo,
- bool *has_rmap)
+ struct xfs_rmap_matches *results)
{
- uint64_t owner;
- uint64_t offset;
- unsigned int flags;
- int has_record;
- struct xfs_rmap_irec irec;
- int error;
+ memset(roc, 0, sizeof(*roc));
+ roc->results = results;
+
+ roc->low.rm_startblock = bno;
+ memset(&roc->high, 0xFF, sizeof(roc->high));
+ roc->high.rm_startblock = bno + len - 1;
+
+ memset(results, 0, sizeof(*results));
+ roc->good.rm_startblock = bno;
+ roc->good.rm_blockcount = len;
+ roc->good.rm_owner = oinfo->oi_owner;
+ roc->good.rm_offset = oinfo->oi_offset;
+ if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
+ roc->good.rm_flags |= XFS_RMAP_ATTR_FORK;
+ if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
+ roc->good.rm_flags |= XFS_RMAP_BMBT_BLOCK;
+}
- xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
- ASSERT(XFS_RMAP_NON_INODE_OWNER(owner) ||
- (flags & XFS_RMAP_BMBT_BLOCK));
+/* Figure out if this is a match for the owner. */
+STATIC int
+xfs_rmap_count_owners_helper(
+ struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xfs_rmap_ownercount *roc = priv;
+ struct xfs_rmap_irec check = *rec;
+ unsigned int keyflags;
+ bool filedata;
+ int64_t delta;
+
+ filedata = !XFS_RMAP_NON_INODE_OWNER(check.rm_owner) &&
+ !(check.rm_flags & XFS_RMAP_BMBT_BLOCK);
+
+ /* Trim the part of check that comes before the comparison range. */
+ delta = (int64_t)roc->good.rm_startblock - check.rm_startblock;
+ if (delta > 0) {
+ check.rm_startblock += delta;
+ check.rm_blockcount -= delta;
+ if (filedata)
+ check.rm_offset += delta;
+ }
- error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, &irec,
- &has_record);
- if (error)
- return error;
- if (!has_record) {
- *has_rmap = false;
- return 0;
+ /* Trim the part of check that comes after the comparison range. */
+ delta = (check.rm_startblock + check.rm_blockcount) -
+ (roc->good.rm_startblock + roc->good.rm_blockcount);
+ if (delta > 0)
+ check.rm_blockcount -= delta;
+
+ /* Don't care about unwritten status for establishing ownership. */
+ keyflags = check.rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK);
+
+ if (check.rm_startblock == roc->good.rm_startblock &&
+ check.rm_blockcount == roc->good.rm_blockcount &&
+ check.rm_owner == roc->good.rm_owner &&
+ check.rm_offset == roc->good.rm_offset &&
+ keyflags == roc->good.rm_flags) {
+ roc->results->matches++;
+ } else {
+ roc->results->non_owner_matches++;
+ if (xfs_rmap_shareable(cur->bc_mp, &roc->good) ^
+ xfs_rmap_shareable(cur->bc_mp, &check))
+ roc->results->bad_non_owner_matches++;
}
- *has_rmap = (irec.rm_owner == owner && irec.rm_startblock <= bno &&
- irec.rm_startblock + irec.rm_blockcount >= bno + len);
+ if (roc->results->non_owner_matches && roc->stop_on_nonmatch)
+ return -ECANCELED;
+
return 0;
}
-struct xfs_rmap_key_state {
- uint64_t owner;
- uint64_t offset;
- unsigned int flags;
-};
-
-/* For each rmap given, figure out if it doesn't match the key we want. */
-STATIC int
-xfs_rmap_has_other_keys_helper(
+/* Count the number of owners and non-owners of this range of blocks. */
+int
+xfs_rmap_count_owners(
struct xfs_btree_cur *cur,
- const struct xfs_rmap_irec *rec,
- void *priv)
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ const struct xfs_owner_info *oinfo,
+ struct xfs_rmap_matches *results)
{
- struct xfs_rmap_key_state *rks = priv;
+ struct xfs_rmap_ownercount roc;
+ int error;
- if (rks->owner == rec->rm_owner && rks->offset == rec->rm_offset &&
- ((rks->flags & rec->rm_flags) & XFS_RMAP_KEY_FLAGS) == rks->flags)
- return 0;
- return -ECANCELED;
+ xfs_rmap_ownercount_init(&roc, bno, len, oinfo, results);
+ error = xfs_rmap_query_range(cur, &roc.low, &roc.high,
+ xfs_rmap_count_owners_helper, &roc);
+ if (error)
+ return error;
+
+ /*
+ * There can't be any non-owner rmaps that conflict with the given
+ * owner if we didn't find any rmaps matching the owner.
+ */
+ if (!results->matches)
+ results->bad_non_owner_matches = 0;
+
+ return 0;
}
/*
@@ -2754,28 +2882,26 @@ xfs_rmap_has_other_keys(
xfs_agblock_t bno,
xfs_extlen_t len,
const struct xfs_owner_info *oinfo,
- bool *has_rmap)
+ bool *has_other)
{
- struct xfs_rmap_irec low = {0};
- struct xfs_rmap_irec high;
- struct xfs_rmap_key_state rks;
+ struct xfs_rmap_matches res;
+ struct xfs_rmap_ownercount roc;
int error;
- xfs_owner_info_unpack(oinfo, &rks.owner, &rks.offset, &rks.flags);
- *has_rmap = false;
-
- low.rm_startblock = bno;
- memset(&high, 0xFF, sizeof(high));
- high.rm_startblock = bno + len - 1;
+ xfs_rmap_ownercount_init(&roc, bno, len, oinfo, &res);
+ roc.stop_on_nonmatch = true;
- error = xfs_rmap_query_range(cur, &low, &high,
- xfs_rmap_has_other_keys_helper, &rks);
+ error = xfs_rmap_query_range(cur, &roc.low, &roc.high,
+ xfs_rmap_count_owners_helper, &roc);
if (error == -ECANCELED) {
- *has_rmap = true;
+ *has_other = true;
return 0;
}
+ if (error)
+ return error;
- return error;
+ *has_other = false;
+ return 0;
}
const struct xfs_owner_info XFS_RMAP_OINFO_SKIP_UPDATE = {
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 2dac88cea28d..3c98d9d50afb 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -62,13 +62,14 @@ xfs_rmap_irec_offset_pack(
return x;
}
-static inline int
+static inline xfs_failaddr_t
xfs_rmap_irec_offset_unpack(
__u64 offset,
struct xfs_rmap_irec *irec)
{
if (offset & ~(XFS_RMAP_OFF_MASK | XFS_RMAP_OFF_FLAGS))
- return -EFSCORRUPTED;
+ return __this_address;
+
irec->rm_offset = XFS_RMAP_OFF(offset);
irec->rm_flags = 0;
if (offset & XFS_RMAP_OFF_ATTR_FORK)
@@ -77,7 +78,7 @@ xfs_rmap_irec_offset_unpack(
irec->rm_flags |= XFS_RMAP_BMBT_BLOCK;
if (offset & XFS_RMAP_OFF_UNWRITTEN)
irec->rm_flags |= XFS_RMAP_UNWRITTEN;
- return 0;
+ return NULL;
}
static inline void
@@ -162,8 +163,12 @@ struct xfs_rmap_intent {
int ri_whichfork;
uint64_t ri_owner;
struct xfs_bmbt_irec ri_bmap;
+ struct xfs_perag *ri_pag;
};
+void xfs_rmap_update_get_group(struct xfs_mount *mp,
+ struct xfs_rmap_intent *ri);
+
/* functions for updating the rmapbt based on bmbt map/unmap operations */
void xfs_rmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip,
int whichfork, struct xfs_bmbt_irec *imap);
@@ -188,16 +193,31 @@ int xfs_rmap_lookup_le_range(struct xfs_btree_cur *cur, xfs_agblock_t bno,
int xfs_rmap_compare(const struct xfs_rmap_irec *a,
const struct xfs_rmap_irec *b);
union xfs_btree_rec;
-int xfs_rmap_btrec_to_irec(const union xfs_btree_rec *rec,
+xfs_failaddr_t xfs_rmap_btrec_to_irec(const union xfs_btree_rec *rec,
struct xfs_rmap_irec *irec);
-int xfs_rmap_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno,
- xfs_extlen_t len, bool *exists);
-int xfs_rmap_record_exists(struct xfs_btree_cur *cur, xfs_agblock_t bno,
+xfs_failaddr_t xfs_rmap_check_irec(struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *irec);
+
+int xfs_rmap_has_records(struct xfs_btree_cur *cur, xfs_agblock_t bno,
+ xfs_extlen_t len, enum xbtree_recpacking *outcome);
+
+struct xfs_rmap_matches {
+ /* Number of owner matches. */
+ unsigned long long matches;
+
+ /* Number of non-owner matches. */
+ unsigned long long non_owner_matches;
+
+ /* Number of non-owner matches that conflict with the owner matches. */
+ unsigned long long bad_non_owner_matches;
+};
+
+int xfs_rmap_count_owners(struct xfs_btree_cur *cur, xfs_agblock_t bno,
xfs_extlen_t len, const struct xfs_owner_info *oinfo,
- bool *has_rmap);
+ struct xfs_rmap_matches *rmatch);
int xfs_rmap_has_other_keys(struct xfs_btree_cur *cur, xfs_agblock_t bno,
xfs_extlen_t len, const struct xfs_owner_info *oinfo,
- bool *has_rmap);
+ bool *has_other);
int xfs_rmap_map_raw(struct xfs_btree_cur *cur, struct xfs_rmap_irec *rmap);
extern const struct xfs_owner_info XFS_RMAP_OINFO_SKIP_UPDATE;
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index d3285684bb5e..6c81b20e97d2 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -156,6 +156,16 @@ xfs_rmapbt_get_maxrecs(
return cur->bc_mp->m_rmap_mxr[level != 0];
}
+/*
+ * Convert the ondisk record's offset field into the ondisk key's offset field.
+ * Fork and bmbt are significant parts of the rmap record key, but written
+ * status is merely a record attribute.
+ */
+static inline __be64 ondisk_rec_offset_to_key(const union xfs_btree_rec *rec)
+{
+ return rec->rmap.rm_offset & ~cpu_to_be64(XFS_RMAP_OFF_UNWRITTEN);
+}
+
STATIC void
xfs_rmapbt_init_key_from_rec(
union xfs_btree_key *key,
@@ -163,7 +173,7 @@ xfs_rmapbt_init_key_from_rec(
{
key->rmap.rm_startblock = rec->rmap.rm_startblock;
key->rmap.rm_owner = rec->rmap.rm_owner;
- key->rmap.rm_offset = rec->rmap.rm_offset;
+ key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
}
/*
@@ -186,7 +196,7 @@ xfs_rmapbt_init_high_key_from_rec(
key->rmap.rm_startblock = rec->rmap.rm_startblock;
be32_add_cpu(&key->rmap.rm_startblock, adj);
key->rmap.rm_owner = rec->rmap.rm_owner;
- key->rmap.rm_offset = rec->rmap.rm_offset;
+ key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) ||
XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset)))
return;
@@ -219,6 +229,16 @@ xfs_rmapbt_init_ptr_from_cur(
ptr->s = agf->agf_roots[cur->bc_btnum];
}
+/*
+ * Mask the appropriate parts of the ondisk key field for a key comparison.
+ * Fork and bmbt are significant parts of the rmap record key, but written
+ * status is merely a record attribute.
+ */
+static inline uint64_t offset_keymask(uint64_t offset)
+{
+ return offset & ~XFS_RMAP_OFF_UNWRITTEN;
+}
+
STATIC int64_t
xfs_rmapbt_key_diff(
struct xfs_btree_cur *cur,
@@ -240,8 +260,8 @@ xfs_rmapbt_key_diff(
else if (y > x)
return -1;
- x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset));
- y = rec->rm_offset;
+ x = offset_keymask(be64_to_cpu(kp->rm_offset));
+ y = offset_keymask(xfs_rmap_irec_offset_pack(rec));
if (x > y)
return 1;
else if (y > x)
@@ -253,31 +273,43 @@ STATIC int64_t
xfs_rmapbt_diff_two_keys(
struct xfs_btree_cur *cur,
const union xfs_btree_key *k1,
- const union xfs_btree_key *k2)
+ const union xfs_btree_key *k2,
+ const union xfs_btree_key *mask)
{
const struct xfs_rmap_key *kp1 = &k1->rmap;
const struct xfs_rmap_key *kp2 = &k2->rmap;
int64_t d;
__u64 x, y;
+ /* Doesn't make sense to mask off the physical space part */
+ ASSERT(!mask || mask->rmap.rm_startblock);
+
d = (int64_t)be32_to_cpu(kp1->rm_startblock) -
- be32_to_cpu(kp2->rm_startblock);
+ be32_to_cpu(kp2->rm_startblock);
if (d)
return d;
- x = be64_to_cpu(kp1->rm_owner);
- y = be64_to_cpu(kp2->rm_owner);
- if (x > y)
- return 1;
- else if (y > x)
- return -1;
+ if (!mask || mask->rmap.rm_owner) {
+ x = be64_to_cpu(kp1->rm_owner);
+ y = be64_to_cpu(kp2->rm_owner);
+ if (x > y)
+ return 1;
+ else if (y > x)
+ return -1;
+ }
+
+ if (!mask || mask->rmap.rm_offset) {
+ /* Doesn't make sense to allow offset but not owner */
+ ASSERT(!mask || mask->rmap.rm_owner);
+
+ x = offset_keymask(be64_to_cpu(kp1->rm_offset));
+ y = offset_keymask(be64_to_cpu(kp2->rm_offset));
+ if (x > y)
+ return 1;
+ else if (y > x)
+ return -1;
+ }
- x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset));
- y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset));
- if (x > y)
- return 1;
- else if (y > x)
- return -1;
return 0;
}
@@ -387,8 +419,8 @@ xfs_rmapbt_keys_inorder(
return 1;
else if (a > b)
return 0;
- a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset));
- b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset));
+ a = offset_keymask(be64_to_cpu(k1->rmap.rm_offset));
+ b = offset_keymask(be64_to_cpu(k2->rmap.rm_offset));
if (a <= b)
return 1;
return 0;
@@ -417,13 +449,33 @@ xfs_rmapbt_recs_inorder(
return 1;
else if (a > b)
return 0;
- a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset));
- b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset));
+ a = offset_keymask(be64_to_cpu(r1->rmap.rm_offset));
+ b = offset_keymask(be64_to_cpu(r2->rmap.rm_offset));
if (a <= b)
return 1;
return 0;
}
+STATIC enum xbtree_key_contig
+xfs_rmapbt_keys_contiguous(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_key *key1,
+ const union xfs_btree_key *key2,
+ const union xfs_btree_key *mask)
+{
+ ASSERT(!mask || mask->rmap.rm_startblock);
+
+ /*
+ * We only support checking contiguity of the physical space component.
+ * If any callers ever need more specificity than that, they'll have to
+ * implement it here.
+ */
+ ASSERT(!mask || (!mask->rmap.rm_owner && !mask->rmap.rm_offset));
+
+ return xbtree_key_contig(be32_to_cpu(key1->rmap.rm_startblock),
+ be32_to_cpu(key2->rmap.rm_startblock));
+}
+
static const struct xfs_btree_ops xfs_rmapbt_ops = {
.rec_len = sizeof(struct xfs_rmap_rec),
.key_len = 2 * sizeof(struct xfs_rmap_key),
@@ -443,6 +495,7 @@ static const struct xfs_btree_ops xfs_rmapbt_ops = {
.diff_two_keys = xfs_rmapbt_diff_two_keys,
.keys_inorder = xfs_rmapbt_keys_inorder,
.recs_inorder = xfs_rmapbt_recs_inorder,
+ .keys_contiguous = xfs_rmapbt_keys_contiguous,
};
static struct xfs_btree_cur *
@@ -460,10 +513,7 @@ xfs_rmapbt_init_common(
cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2);
cur->bc_ops = &xfs_rmapbt_ops;
- /* take a reference for the cursor */
- atomic_inc(&pag->pag_ref);
- cur->bc_ag.pag = pag;
-
+ cur->bc_ag.pag = xfs_perag_hold(pag);
return cur;
}
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 99cc03a298e2..ba0f17bc1dc0 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -72,7 +72,8 @@ xfs_sb_validate_v5_features(
}
/*
- * We support all XFS versions newer than a v4 superblock with V2 directories.
+ * We current support XFS v5 formats with known features and v4 superblocks with
+ * at least V2 directories.
*/
bool
xfs_sb_good_version(
@@ -86,16 +87,16 @@ xfs_sb_good_version(
if (xfs_sb_is_v5(sbp))
return xfs_sb_validate_v5_features(sbp);
+ /* versions prior to v4 are not supported */
+ if (XFS_SB_VERSION_NUM(sbp) != XFS_SB_VERSION_4)
+ return false;
+
/* We must not have any unknown v4 feature bits set */
if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) ||
((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) &&
(sbp->sb_features2 & ~XFS_SB_VERSION2_OKBITS)))
return false;
- /* versions prior to v4 are not supported */
- if (XFS_SB_VERSION_NUM(sbp) < XFS_SB_VERSION_4)
- return false;
-
/* V4 filesystems need v2 directories and unwritten extents */
if (!(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT))
return false;
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index 5ebdda7e1078..851220021484 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -204,6 +204,18 @@ enum xfs_ag_resv_type {
XFS_AG_RESV_RMAPBT,
};
+/* Results of scanning a btree keyspace to check occupancy. */
+enum xbtree_recpacking {
+ /* None of the keyspace maps to records. */
+ XBTREE_RECPACKING_EMPTY = 0,
+
+ /* Some, but not all, of the keyspace maps to records. */
+ XBTREE_RECPACKING_SPARSE,
+
+ /* The entire keyspace maps to records. */
+ XBTREE_RECPACKING_FULL,
+};
+
/*
* Type verifier functions
*/