summaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_btree.c
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2013-04-21 21:53:46 +0200
committerBen Myers <bpm@sgi.com>2013-04-21 21:53:46 +0200
commitee1a47ab0e77600fcbdf1c87d461bd8f3f63150d (patch)
tree6340d9f4b8b53c0d18045da1372599645375efce /fs/xfs/xfs_btree.c
parentxfs: increase hexdump output in xfs_corruption_error (diff)
downloadlinux-ee1a47ab0e77600fcbdf1c87d461bd8f3f63150d.tar.xz
linux-ee1a47ab0e77600fcbdf1c87d461bd8f3f63150d.zip
xfs: add support for large btree blocks
Add support for larger btree blocks that contains a CRC32C checksum, a filesystem uuid and block number for detecting filesystem consistency and out of place writes. [dchinner@redhat.com] Also include an owner field to allow reverse mappings to be implemented for improved repairability and a LSN field to so that log recovery can easily determine the last modification that made it to disk for each buffer. [dchinner@redhat.com] Add buffer log format flags to indicate the type of buffer to recovery so that we don't have to do blind magic number tests to determine what the buffer is. [dchinner@redhat.com] Modified to fit into the verifier structure. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Ben Myers <bpm@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_btree.c')
-rw-r--r--fs/xfs/xfs_btree.c256
1 files changed, 211 insertions, 45 deletions
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index db010408d701..ec77036f13b5 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -30,9 +30,11 @@
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
+#include "xfs_buf_item.h"
#include "xfs_btree.h"
#include "xfs_error.h"
#include "xfs_trace.h"
+#include "xfs_cksum.h"
/*
* Cursor allocation zone.
@@ -42,9 +44,13 @@ kmem_zone_t *xfs_btree_cur_zone;
/*
* Btree magic numbers.
*/
-const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
- XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
+static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
+ { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC },
+ { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC,
+ XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC }
};
+#define xfs_btree_magic(cur) \
+ xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
STATIC int /* error (0 or EFSCORRUPTED) */
@@ -54,30 +60,38 @@ xfs_btree_check_lblock(
int level, /* level of the btree block */
struct xfs_buf *bp) /* buffer for block, if any */
{
- int lblock_ok; /* block passes checks */
+ int lblock_ok = 1; /* block passes checks */
struct xfs_mount *mp; /* file system mount point */
mp = cur->bc_mp;
- lblock_ok =
- be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
+
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ lblock_ok = lblock_ok &&
+ uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid) &&
+ block->bb_u.l.bb_blkno == cpu_to_be64(
+ bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
+ }
+
+ lblock_ok = lblock_ok &&
+ be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
be16_to_cpu(block->bb_level) == level &&
be16_to_cpu(block->bb_numrecs) <=
cur->bc_ops->get_maxrecs(cur, level) &&
block->bb_u.l.bb_leftsib &&
(block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
XFS_FSB_SANITY_CHECK(mp,
- be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
+ be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
block->bb_u.l.bb_rightsib &&
(block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
XFS_FSB_SANITY_CHECK(mp,
- be64_to_cpu(block->bb_u.l.bb_rightsib)));
+ be64_to_cpu(block->bb_u.l.bb_rightsib)));
+
if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
XFS_ERRTAG_BTREE_CHECK_LBLOCK,
XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
if (bp)
trace_xfs_btree_corrupt(bp, _RET_IP_);
- XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
- mp);
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
return XFS_ERROR(EFSCORRUPTED);
}
return 0;
@@ -90,16 +104,26 @@ xfs_btree_check_sblock(
int level, /* level of the btree block */
struct xfs_buf *bp) /* buffer containing block */
{
+ struct xfs_mount *mp; /* file system mount point */
struct xfs_buf *agbp; /* buffer for ag. freespace struct */
struct xfs_agf *agf; /* ag. freespace structure */
xfs_agblock_t agflen; /* native ag. freespace length */
- int sblock_ok; /* block passes checks */
+ int sblock_ok = 1; /* block passes checks */
+ mp = cur->bc_mp;
agbp = cur->bc_private.a.agbp;
agf = XFS_BUF_TO_AGF(agbp);
agflen = be32_to_cpu(agf->agf_length);
- sblock_ok =
- be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
+
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ sblock_ok = sblock_ok &&
+ uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid) &&
+ block->bb_u.s.bb_blkno == cpu_to_be64(
+ bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
+ }
+
+ sblock_ok = sblock_ok &&
+ be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
be16_to_cpu(block->bb_level) == level &&
be16_to_cpu(block->bb_numrecs) <=
cur->bc_ops->get_maxrecs(cur, level) &&
@@ -109,13 +133,13 @@ xfs_btree_check_sblock(
(block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
block->bb_u.s.bb_rightsib;
- if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
+
+ if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp,
XFS_ERRTAG_BTREE_CHECK_SBLOCK,
XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
if (bp)
trace_xfs_btree_corrupt(bp, _RET_IP_);
- XFS_CORRUPTION_ERROR("xfs_btree_check_sblock",
- XFS_ERRLEVEL_LOW, cur->bc_mp, block);
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
return XFS_ERROR(EFSCORRUPTED);
}
return 0;
@@ -194,6 +218,72 @@ xfs_btree_check_ptr(
#endif
/*
+ * Calculate CRC on the whole btree block and stuff it into the
+ * long-form btree header.
+ *
+ * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
+ * it into the buffer so recovery knows what the last modifcation was that made
+ * it to disk.
+ */
+void
+xfs_btree_lblock_calc_crc(
+ struct xfs_buf *bp)
+{
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ struct xfs_buf_log_item *bip = bp->b_fspriv;
+
+ if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+ return;
+ if (bip)
+ block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+ xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
+ XFS_BTREE_LBLOCK_CRC_OFF);
+}
+
+bool
+xfs_btree_lblock_verify_crc(
+ struct xfs_buf *bp)
+{
+ if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+ return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
+ XFS_BTREE_LBLOCK_CRC_OFF);
+ return true;
+}
+
+/*
+ * Calculate CRC on the whole btree block and stuff it into the
+ * short-form btree header.
+ *
+ * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
+ * it into the buffer so recovery knows what the last modifcation was that made
+ * it to disk.
+ */
+void
+xfs_btree_sblock_calc_crc(
+ struct xfs_buf *bp)
+{
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ struct xfs_buf_log_item *bip = bp->b_fspriv;
+
+ if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+ return;
+ if (bip)
+ block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+ xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
+ XFS_BTREE_SBLOCK_CRC_OFF);
+}
+
+bool
+xfs_btree_sblock_verify_crc(
+ struct xfs_buf *bp)
+{
+ if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+ return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
+ XFS_BTREE_SBLOCK_CRC_OFF);
+ return true;
+}
+
+/*
* Delete the btree cursor.
*/
void
@@ -277,10 +367,8 @@ xfs_btree_dup_cursor(
*ncur = NULL;
return error;
}
- new->bc_bufs[i] = bp;
- ASSERT(!xfs_buf_geterror(bp));
- } else
- new->bc_bufs[i] = NULL;
+ }
+ new->bc_bufs[i] = bp;
}
*ncur = new;
return 0;
@@ -321,9 +409,14 @@ xfs_btree_dup_cursor(
*/
static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
{
- return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
- XFS_BTREE_LBLOCK_LEN :
- XFS_BTREE_SBLOCK_LEN;
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
+ return XFS_BTREE_LBLOCK_CRC_LEN;
+ return XFS_BTREE_LBLOCK_LEN;
+ }
+ if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
+ return XFS_BTREE_SBLOCK_CRC_LEN;
+ return XFS_BTREE_SBLOCK_LEN;
}
/*
@@ -863,43 +956,85 @@ xfs_btree_set_sibling(
}
void
+xfs_btree_init_block_int(
+ struct xfs_mount *mp,
+ struct xfs_btree_block *buf,
+ xfs_daddr_t blkno,
+ __u32 magic,
+ __u16 level,
+ __u16 numrecs,
+ __u64 owner,
+ unsigned int flags)
+{
+ buf->bb_magic = cpu_to_be32(magic);
+ buf->bb_level = cpu_to_be16(level);
+ buf->bb_numrecs = cpu_to_be16(numrecs);
+
+ if (flags & XFS_BTREE_LONG_PTRS) {
+ buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
+ buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
+ if (flags & XFS_BTREE_CRC_BLOCKS) {
+ buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
+ buf->bb_u.l.bb_owner = cpu_to_be64(owner);
+ uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid);
+ buf->bb_u.l.bb_pad = 0;
+ }
+ } else {
+ /* owner is a 32 bit value on short blocks */
+ __u32 __owner = (__u32)owner;
+
+ buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
+ buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
+ if (flags & XFS_BTREE_CRC_BLOCKS) {
+ buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
+ buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
+ uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid);
+ }
+ }
+}
+
+void
xfs_btree_init_block(
struct xfs_mount *mp,
struct xfs_buf *bp,
__u32 magic,
__u16 level,
__u16 numrecs,
+ __u64 owner,
unsigned int flags)
{
- struct xfs_btree_block *new = XFS_BUF_TO_BLOCK(bp);
-
- new->bb_magic = cpu_to_be32(magic);
- new->bb_level = cpu_to_be16(level);
- new->bb_numrecs = cpu_to_be16(numrecs);
-
- if (flags & XFS_BTREE_LONG_PTRS) {
- new->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
- new->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
- } else {
- new->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
- new->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
- }
+ xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
+ magic, level, numrecs, owner, flags);
}
STATIC void
xfs_btree_init_block_cur(
struct xfs_btree_cur *cur,
+ struct xfs_buf *bp,
int level,
- int numrecs,
- struct xfs_buf *bp)
+ int numrecs)
{
- xfs_btree_init_block(cur->bc_mp, bp, xfs_magics[cur->bc_btnum],
- level, numrecs, cur->bc_flags);
+ __u64 owner;
+
+ /*
+ * we can pull the owner from the cursor right now as the different
+ * owners align directly with the pointer size of the btree. This may
+ * change in future, but is safe for current users of the generic btree
+ * code.
+ */
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ owner = cur->bc_private.b.ip->i_ino;
+ else
+ owner = cur->bc_private.a.agno;
+
+ xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
+ xfs_btree_magic(cur), level, numrecs,
+ owner, cur->bc_flags);
}
/*
* Return true if ptr is the last record in the btree and
- * we need to track updateѕ to this record. The decision
+ * we need to track updates to this record. The decision
* will be further refined in the update_lastrec method.
*/
STATIC int
@@ -1147,6 +1282,7 @@ xfs_btree_log_keys(
XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
if (bp) {
+ xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLF_BTREE_BUF);
xfs_trans_log_buf(cur->bc_tp, bp,
xfs_btree_key_offset(cur, first),
xfs_btree_key_offset(cur, last + 1) - 1);
@@ -1171,6 +1307,7 @@ xfs_btree_log_recs(
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
+ xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLF_BTREE_BUF);
xfs_trans_log_buf(cur->bc_tp, bp,
xfs_btree_rec_offset(cur, first),
xfs_btree_rec_offset(cur, last + 1) - 1);
@@ -1195,6 +1332,7 @@ xfs_btree_log_ptrs(
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
int level = xfs_btree_get_level(block);
+ xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLF_BTREE_BUF);
xfs_trans_log_buf(cur->bc_tp, bp,
xfs_btree_ptr_offset(cur, first, level),
xfs_btree_ptr_offset(cur, last + 1, level) - 1);
@@ -1223,7 +1361,12 @@ xfs_btree_log_block(
offsetof(struct xfs_btree_block, bb_numrecs),
offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
- XFS_BTREE_SBLOCK_LEN
+ offsetof(struct xfs_btree_block, bb_u.s.bb_blkno),
+ offsetof(struct xfs_btree_block, bb_u.s.bb_lsn),
+ offsetof(struct xfs_btree_block, bb_u.s.bb_uuid),
+ offsetof(struct xfs_btree_block, bb_u.s.bb_owner),
+ offsetof(struct xfs_btree_block, bb_u.s.bb_crc),
+ XFS_BTREE_SBLOCK_CRC_LEN
};
static const short loffsets[] = { /* table of offsets (long) */
offsetof(struct xfs_btree_block, bb_magic),
@@ -1231,17 +1374,40 @@ xfs_btree_log_block(
offsetof(struct xfs_btree_block, bb_numrecs),
offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
- XFS_BTREE_LBLOCK_LEN
+ offsetof(struct xfs_btree_block, bb_u.l.bb_blkno),
+ offsetof(struct xfs_btree_block, bb_u.l.bb_lsn),
+ offsetof(struct xfs_btree_block, bb_u.l.bb_uuid),
+ offsetof(struct xfs_btree_block, bb_u.l.bb_owner),
+ offsetof(struct xfs_btree_block, bb_u.l.bb_crc),
+ offsetof(struct xfs_btree_block, bb_u.l.bb_pad),
+ XFS_BTREE_LBLOCK_CRC_LEN
};
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
if (bp) {
+ int nbits;
+
+ if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
+ /*
+ * We don't log the CRC when updating a btree
+ * block but instead recreate it during log
+ * recovery. As the log buffers have checksums
+ * of their own this is safe and avoids logging a crc
+ * update in a lot of places.
+ */
+ if (fields == XFS_BB_ALL_BITS)
+ fields = XFS_BB_ALL_BITS_CRC;
+ nbits = XFS_BB_NUM_BITS_CRC;
+ } else {
+ nbits = XFS_BB_NUM_BITS;
+ }
xfs_btree_offsets(fields,
(cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
loffsets : soffsets,
- XFS_BB_NUM_BITS, &first, &last);
+ nbits, &first, &last);
+ xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLF_BTREE_BUF);
xfs_trans_log_buf(cur->bc_tp, bp, first, last);
} else {
xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
@@ -2204,7 +2370,7 @@ xfs_btree_split(
goto error0;
/* Fill in the btree header for the new right block. */
- xfs_btree_init_block_cur(cur, xfs_btree_get_level(left), 0, rbp);
+ xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);
/*
* Split the entries between the old and the new block evenly.
@@ -2513,7 +2679,7 @@ xfs_btree_new_root(
nptr = 2;
}
/* Fill in the new block's btree header and log it. */
- xfs_btree_init_block_cur(cur, cur->bc_nlevels, 2, nbp);
+ xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
!xfs_btree_ptr_is_null(cur, &rptr));