summaryrefslogtreecommitdiffstats
path: root/fs/xfs/libxfs/xfs_refcount_btree.c
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2022-10-10 18:06:24 +0200
committerDarrick J. Wong <djwong@kernel.org>2022-10-31 16:58:21 +0100
commit9a50ee4f8db6e4dd0d8d757b7adaf0591776860a (patch)
tree136d1cac30e3b07333be46da4b777edaf300389a /fs/xfs/libxfs/xfs_refcount_btree.c
parentxfs: refactor refcount record usage in xchk_refcountbt_rec (diff)
downloadlinux-9a50ee4f8db6e4dd0d8d757b7adaf0591776860a.tar.xz
linux-9a50ee4f8db6e4dd0d8d757b7adaf0591776860a.zip
xfs: track cow/shared record domains explicitly in xfs_refcount_irec
Just prior to committing the reflink code into upstream, the xfs maintainer at the time requested that I find a way to shard the refcount records into two domains -- one for records tracking shared extents, and a second for tracking CoW staging extents. The idea here was to minimize mount time CoW reclamation by pushing all the CoW records to the right edge of the keyspace, and it was accomplished by setting the upper bit in rc_startblock. We don't allow AGs to have more than 2^31 blocks, so the bit was free. Unfortunately, this was a very late addition to the codebase, so most of the refcount record processing code still treats rc_startblock as a u32 and pays no attention to whether or not the upper bit (the cow flag) is set. This is a weakness is theoretically exploitable, since we're not fully validating the incoming metadata records. Fuzzing demonstrates practical exploits of this weakness. If the cow flag of a node block key record is corrupted, a lookup operation can go to the wrong record block and start returning records from the wrong cow/shared domain. This causes the math to go all wrong (since cow domain is still implicit in the upper bit of rc_startblock) and we can crash the kernel by tricking xfs into jumping into a nonexistent AG and tripping over xfs_perag_get(mp, <nonexistent AG>) returning NULL. To fix this, start tracking the domain as an explicit part of struct xfs_refcount_irec, adjust all refcount functions to check the domain of a returned record, and alter the function definitions to accept them where necessary. Found by fuzzing keys[2].cowflag = add in xfs/464. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Dave Chinner <dchinner@redhat.com>
Diffstat (limited to 'fs/xfs/libxfs/xfs_refcount_btree.c')
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.c15
1 files changed, 12 insertions, 3 deletions
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 316c1ec0c3c2..e1f789866683 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -13,6 +13,7 @@
#include "xfs_btree.h"
#include "xfs_btree_staging.h"
#include "xfs_refcount_btree.h"
+#include "xfs_refcount.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
#include "xfs_trace.h"
@@ -160,7 +161,12 @@ xfs_refcountbt_init_rec_from_cur(
struct xfs_btree_cur *cur,
union xfs_btree_rec *rec)
{
- rec->refc.rc_startblock = cpu_to_be32(cur->bc_rec.rc.rc_startblock);
+ const struct xfs_refcount_irec *irec = &cur->bc_rec.rc;
+ uint32_t start;
+
+ start = xfs_refcount_encode_startblock(irec->rc_startblock,
+ irec->rc_domain);
+ rec->refc.rc_startblock = cpu_to_be32(start);
rec->refc.rc_blockcount = cpu_to_be32(cur->bc_rec.rc.rc_blockcount);
rec->refc.rc_refcount = cpu_to_be32(cur->bc_rec.rc.rc_refcount);
}
@@ -182,10 +188,13 @@ xfs_refcountbt_key_diff(
struct xfs_btree_cur *cur,
const union xfs_btree_key *key)
{
- struct xfs_refcount_irec *rec = &cur->bc_rec.rc;
const struct xfs_refcount_key *kp = &key->refc;
+ const struct xfs_refcount_irec *irec = &cur->bc_rec.rc;
+ uint32_t start;
- return (int64_t)be32_to_cpu(kp->rc_startblock) - rec->rc_startblock;
+ start = xfs_refcount_encode_startblock(irec->rc_startblock,
+ irec->rc_domain);
+ return (int64_t)be32_to_cpu(kp->rc_startblock) - start;
}
STATIC int64_t