diff options
Diffstat (limited to 'fs/xfs/libxfs')
37 files changed, 2117 insertions, 1305 deletions
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index c68a36688474..778ec52cce70 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -27,6 +27,276 @@ #include "xfs_defer.h" #include "xfs_log_format.h" #include "xfs_trans.h" +#include "xfs_trace.h" +#include "xfs_inode.h" +#include "xfs_icache.h" + + +/* + * Passive reference counting access wrappers to the perag structures. If the + * per-ag structure is to be freed, the freeing code is responsible for cleaning + * up objects with passive references before freeing the structure. This is + * things like cached buffers. + */ +struct xfs_perag * +xfs_perag_get( + struct xfs_mount *mp, + xfs_agnumber_t agno) +{ + struct xfs_perag *pag; + int ref = 0; + + rcu_read_lock(); + pag = radix_tree_lookup(&mp->m_perag_tree, agno); + if (pag) { + ASSERT(atomic_read(&pag->pag_ref) >= 0); + ref = atomic_inc_return(&pag->pag_ref); + } + rcu_read_unlock(); + trace_xfs_perag_get(mp, agno, ref, _RET_IP_); + return pag; +} + +/* + * search from @first to find the next perag with the given tag set. + */ +struct xfs_perag * +xfs_perag_get_tag( + struct xfs_mount *mp, + xfs_agnumber_t first, + unsigned int tag) +{ + struct xfs_perag *pag; + int found; + int ref; + + rcu_read_lock(); + found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, + (void **)&pag, first, 1, tag); + if (found <= 0) { + rcu_read_unlock(); + return NULL; + } + ref = atomic_inc_return(&pag->pag_ref); + rcu_read_unlock(); + trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_); + return pag; +} + +void +xfs_perag_put( + struct xfs_perag *pag) +{ + int ref; + + ASSERT(atomic_read(&pag->pag_ref) > 0); + ref = atomic_dec_return(&pag->pag_ref); + trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_); +} + +/* + * xfs_initialize_perag_data + * + * Read in each per-ag structure so we can count up the number of + * allocated inodes, free inodes and used filesystem blocks as this + * information is no longer persistent in the superblock. Once we have + * this information, write it into the in-core superblock structure. + */ +int +xfs_initialize_perag_data( + struct xfs_mount *mp, + xfs_agnumber_t agcount) +{ + xfs_agnumber_t index; + struct xfs_perag *pag; + struct xfs_sb *sbp = &mp->m_sb; + uint64_t ifree = 0; + uint64_t ialloc = 0; + uint64_t bfree = 0; + uint64_t bfreelst = 0; + uint64_t btree = 0; + uint64_t fdblocks; + int error = 0; + + for (index = 0; index < agcount; index++) { + /* + * read the agf, then the agi. This gets us + * all the information we need and populates the + * per-ag structures for us. + */ + error = xfs_alloc_pagf_init(mp, NULL, index, 0); + if (error) + return error; + + error = xfs_ialloc_pagi_init(mp, NULL, index); + if (error) + return error; + pag = xfs_perag_get(mp, index); + ifree += pag->pagi_freecount; + ialloc += pag->pagi_count; + bfree += pag->pagf_freeblks; + bfreelst += pag->pagf_flcount; + btree += pag->pagf_btreeblks; + xfs_perag_put(pag); + } + fdblocks = bfree + bfreelst + btree; + + /* + * If the new summary counts are obviously incorrect, fail the + * mount operation because that implies the AGFs are also corrupt. + * Clear FS_COUNTERS so that we don't unmount with a dirty log, which + * will prevent xfs_repair from fixing anything. + */ + if (fdblocks > sbp->sb_dblocks || ifree > ialloc) { + xfs_alert(mp, "AGF corruption. Please run xfs_repair."); + error = -EFSCORRUPTED; + goto out; + } + + /* Overwrite incore superblock counters with just-read data */ + spin_lock(&mp->m_sb_lock); + sbp->sb_ifree = ifree; + sbp->sb_icount = ialloc; + sbp->sb_fdblocks = fdblocks; + spin_unlock(&mp->m_sb_lock); + + xfs_reinit_percpu_counters(mp); +out: + xfs_fs_mark_healthy(mp, XFS_SICK_FS_COUNTERS); + return error; +} + +STATIC void +__xfs_free_perag( + struct rcu_head *head) +{ + struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head); + + ASSERT(!delayed_work_pending(&pag->pag_blockgc_work)); + ASSERT(atomic_read(&pag->pag_ref) == 0); + kmem_free(pag); +} + +/* + * Free up the per-ag resources associated with the mount structure. + */ +void +xfs_free_perag( + struct xfs_mount *mp) +{ + struct xfs_perag *pag; + xfs_agnumber_t agno; + + for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { + spin_lock(&mp->m_perag_lock); + pag = radix_tree_delete(&mp->m_perag_tree, agno); + spin_unlock(&mp->m_perag_lock); + ASSERT(pag); + ASSERT(atomic_read(&pag->pag_ref) == 0); + + cancel_delayed_work_sync(&pag->pag_blockgc_work); + xfs_iunlink_destroy(pag); + xfs_buf_hash_destroy(pag); + + call_rcu(&pag->rcu_head, __xfs_free_perag); + } +} + +int +xfs_initialize_perag( + struct xfs_mount *mp, + xfs_agnumber_t agcount, + xfs_agnumber_t *maxagi) +{ + struct xfs_perag *pag; + xfs_agnumber_t index; + xfs_agnumber_t first_initialised = NULLAGNUMBER; + int error; + + /* + * Walk the current per-ag tree so we don't try to initialise AGs + * that already exist (growfs case). Allocate and insert all the + * AGs we don't find ready for initialisation. + */ + for (index = 0; index < agcount; index++) { + pag = xfs_perag_get(mp, index); + if (pag) { + xfs_perag_put(pag); + continue; + } + + pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL); + if (!pag) { + error = -ENOMEM; + goto out_unwind_new_pags; + } + pag->pag_agno = index; + pag->pag_mount = mp; + + error = radix_tree_preload(GFP_NOFS); + if (error) + goto out_free_pag; + + spin_lock(&mp->m_perag_lock); + if (radix_tree_insert(&mp->m_perag_tree, index, pag)) { + WARN_ON_ONCE(1); + spin_unlock(&mp->m_perag_lock); + radix_tree_preload_end(); + error = -EEXIST; + goto out_free_pag; + } + spin_unlock(&mp->m_perag_lock); + radix_tree_preload_end(); + + /* Place kernel structure only init below this point. */ + spin_lock_init(&pag->pag_ici_lock); + spin_lock_init(&pag->pagb_lock); + spin_lock_init(&pag->pag_state_lock); + INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker); + INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); + init_waitqueue_head(&pag->pagb_wait); + pag->pagb_count = 0; + pag->pagb_tree = RB_ROOT; + + error = xfs_buf_hash_init(pag); + if (error) + goto out_remove_pag; + + error = xfs_iunlink_init(pag); + if (error) + goto out_hash_destroy; + + /* first new pag is fully initialized */ + if (first_initialised == NULLAGNUMBER) + first_initialised = index; + } + + index = xfs_set_inode_alloc(mp, agcount); + + if (maxagi) + *maxagi = index; + + mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp); + return 0; + +out_hash_destroy: + xfs_buf_hash_destroy(pag); +out_remove_pag: + radix_tree_delete(&mp->m_perag_tree, index); +out_free_pag: + kmem_free(pag); +out_unwind_new_pags: + /* unwind any prior newly initialized pags */ + for (index = first_initialised; index < agcount; index++) { + pag = radix_tree_delete(&mp->m_perag_tree, index); + if (!pag) + break; + xfs_buf_hash_destroy(pag); + xfs_iunlink_destroy(pag); + kmem_free(pag); + } + return error; +} static int xfs_get_aghdr_buf( @@ -43,7 +313,6 @@ xfs_get_aghdr_buf( if (error) return error; - xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); bp->b_bn = blkno; bp->b_maps[0].bm_bn = blkno; bp->b_ops = ops; @@ -510,6 +779,7 @@ xfs_ag_shrink_space( struct xfs_buf *agibp, *agfbp; struct xfs_agi *agi; struct xfs_agf *agf; + xfs_agblock_t aglen; int error, err2; ASSERT(agno == mp->m_sb.sb_agcount - 1); @@ -524,14 +794,14 @@ xfs_ag_shrink_space( return error; agf = agfbp->b_addr; + aglen = be32_to_cpu(agi->agi_length); /* some extra paranoid checks before we shrink the ag */ if (XFS_IS_CORRUPT(mp, agf->agf_length != agi->agi_length)) return -EFSCORRUPTED; - if (delta >= agi->agi_length) + if (delta >= aglen) return -EINVAL; - args.fsbno = XFS_AGB_TO_FSB(mp, agno, - be32_to_cpu(agi->agi_length) - delta); + args.fsbno = XFS_AGB_TO_FSB(mp, agno, aglen - delta); /* * Disable perag reservations so it doesn't cause the allocation request @@ -646,7 +916,7 @@ xfs_ag_extend_space( * XFS_RMAP_OINFO_SKIP_UPDATE is used here to tell the rmap btree that * this doesn't actually exist in the rmap btree. */ - error = xfs_rmap_free(tp, bp, id->agno, + error = xfs_rmap_free(tp, bp, bp->b_pag, be32_to_cpu(agf->agf_length) - len, len, &XFS_RMAP_OINFO_SKIP_UPDATE); if (error) diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 4535de1d88ea..4c6f9045baca 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -9,6 +9,142 @@ struct xfs_mount; struct xfs_trans; +struct xfs_perag; + +/* + * Per-ag infrastructure + */ + +/* per-AG block reservation data structures*/ +struct xfs_ag_resv { + /* number of blocks originally reserved here */ + xfs_extlen_t ar_orig_reserved; + /* number of blocks reserved here */ + xfs_extlen_t ar_reserved; + /* number of blocks originally asked for */ + xfs_extlen_t ar_asked; +}; + +/* + * Per-ag incore structure, copies of information in agf and agi, to improve the + * performance of allocation group selection. + */ +struct xfs_perag { + struct xfs_mount *pag_mount; /* owner filesystem */ + xfs_agnumber_t pag_agno; /* AG this structure belongs to */ + atomic_t pag_ref; /* perag reference count */ + char pagf_init; /* this agf's entry is initialized */ + char pagi_init; /* this agi's entry is initialized */ + char pagf_metadata; /* the agf is preferred to be metadata */ + char pagi_inodeok; /* The agi is ok for inodes */ + uint8_t pagf_levels[XFS_BTNUM_AGF]; + /* # of levels in bno & cnt btree */ + bool pagf_agflreset; /* agfl requires reset before use */ + uint32_t pagf_flcount; /* count of blocks in freelist */ + xfs_extlen_t pagf_freeblks; /* total free blocks */ + xfs_extlen_t pagf_longest; /* longest free space */ + uint32_t pagf_btreeblks; /* # of blocks held in AGF btrees */ + xfs_agino_t pagi_freecount; /* number of free inodes */ + xfs_agino_t pagi_count; /* number of allocated inodes */ + + /* + * Inode allocation search lookup optimisation. + * If the pagino matches, the search for new inodes + * doesn't need to search the near ones again straight away + */ + xfs_agino_t pagl_pagino; + xfs_agino_t pagl_leftrec; + xfs_agino_t pagl_rightrec; + + int pagb_count; /* pagb slots in use */ + uint8_t pagf_refcount_level; /* recount btree height */ + + /* Blocks reserved for all kinds of metadata. */ + struct xfs_ag_resv pag_meta_resv; + /* Blocks reserved for the reverse mapping btree. */ + struct xfs_ag_resv pag_rmapbt_resv; + + /* -- kernel only structures below this line -- */ + + /* + * Bitsets of per-ag metadata that have been checked and/or are sick. + * Callers should hold pag_state_lock before accessing this field. + */ + uint16_t pag_checked; + uint16_t pag_sick; + spinlock_t pag_state_lock; + + spinlock_t pagb_lock; /* lock for pagb_tree */ + struct rb_root pagb_tree; /* ordered tree of busy extents */ + unsigned int pagb_gen; /* generation count for pagb_tree */ + wait_queue_head_t pagb_wait; /* woken when pagb_gen changes */ + + atomic_t pagf_fstrms; /* # of filestreams active in this AG */ + + spinlock_t pag_ici_lock; /* incore inode cache lock */ + struct radix_tree_root pag_ici_root; /* incore inode cache root */ + int pag_ici_reclaimable; /* reclaimable inodes */ + unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */ + + /* buffer cache index */ + spinlock_t pag_buf_lock; /* lock for pag_buf_hash */ + struct rhashtable pag_buf_hash; + + /* for rcu-safe freeing */ + struct rcu_head rcu_head; + + /* background prealloc block trimming */ + struct delayed_work pag_blockgc_work; + + /* + * Unlinked inode information. This incore information reflects + * data stored in the AGI, so callers must hold the AGI buffer lock + * or have some other means to control concurrency. + */ + struct rhashtable pagi_unlinked_hash; +}; + +int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t agcount, + xfs_agnumber_t *maxagi); +int xfs_initialize_perag_data(struct xfs_mount *mp, xfs_agnumber_t agno); +void xfs_free_perag(struct xfs_mount *mp); + +struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno); +struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno, + unsigned int tag); +void xfs_perag_put(struct xfs_perag *pag); + +/* + * Perag iteration APIs + * + * XXX: for_each_perag_range() usage really needs an iterator to clean up when + * we terminate at end_agno because we may have taken a reference to the perag + * beyond end_agno. Right now callers have to be careful to catch and clean that + * up themselves. This is not necessary for the callers of for_each_perag() and + * for_each_perag_from() because they terminate at sb_agcount where there are + * no perag structures in tree beyond end_agno. + */ +#define for_each_perag_range(mp, next_agno, end_agno, pag) \ + for ((pag) = xfs_perag_get((mp), (next_agno)); \ + (pag) != NULL && (next_agno) <= (end_agno); \ + (next_agno) = (pag)->pag_agno + 1, \ + xfs_perag_put(pag), \ + (pag) = xfs_perag_get((mp), (next_agno))) + +#define for_each_perag_from(mp, next_agno, pag) \ + for_each_perag_range((mp), (next_agno), (mp)->m_sb.sb_agcount, (pag)) + + +#define for_each_perag(mp, agno, pag) \ + (agno) = 0; \ + for_each_perag_from((mp), (agno), (pag)) + +#define for_each_perag_tag(mp, agno, pag, tag) \ + for ((agno) = 0, (pag) = xfs_perag_get_tag((mp), 0, (tag)); \ + (pag) != NULL; \ + (agno) = (pag)->pag_agno + 1, \ + xfs_perag_put(pag), \ + (pag) = xfs_perag_get_tag((mp), (agno), (tag))) struct aghdr_init_data { /* per ag data */ diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index e0d5cdc57cc2..2aa2b3484c28 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -19,7 +19,7 @@ #include "xfs_btree.h" #include "xfs_refcount_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_sb.h" +#include "xfs_ag.h" #include "xfs_ag_resv.h" /* @@ -250,7 +250,6 @@ xfs_ag_resv_init( struct xfs_trans *tp) { struct xfs_mount *mp = pag->pag_mount; - xfs_agnumber_t agno = pag->pag_agno; xfs_extlen_t ask; xfs_extlen_t used; int error = 0, error2; @@ -260,11 +259,11 @@ xfs_ag_resv_init( if (pag->pag_meta_resv.ar_asked == 0) { ask = used = 0; - error = xfs_refcountbt_calc_reserves(mp, tp, agno, &ask, &used); + error = xfs_refcountbt_calc_reserves(mp, tp, pag, &ask, &used); if (error) goto out; - error = xfs_finobt_calc_reserves(mp, tp, agno, &ask, &used); + error = xfs_finobt_calc_reserves(mp, tp, pag, &ask, &used); if (error) goto out; @@ -282,7 +281,7 @@ xfs_ag_resv_init( mp->m_finobt_nores = true; - error = xfs_refcountbt_calc_reserves(mp, tp, agno, &ask, + error = xfs_refcountbt_calc_reserves(mp, tp, pag, &ask, &used); if (error) goto out; @@ -300,7 +299,7 @@ xfs_ag_resv_init( if (pag->pag_rmapbt_resv.ar_asked == 0) { ask = used = 0; - error = xfs_rmapbt_calc_reserves(mp, tp, agno, &ask, &used); + error = xfs_rmapbt_calc_reserves(mp, tp, pag, &ask, &used); if (error) goto out; diff --git a/fs/xfs/libxfs/xfs_ag_resv.h b/fs/xfs/libxfs/xfs_ag_resv.h index 8a8eb4bc48bb..b74b210008ea 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.h +++ b/fs/xfs/libxfs/xfs_ag_resv.h @@ -18,6 +18,21 @@ void xfs_ag_resv_alloc_extent(struct xfs_perag *pag, enum xfs_ag_resv_type type, void xfs_ag_resv_free_extent(struct xfs_perag *pag, enum xfs_ag_resv_type type, struct xfs_trans *tp, xfs_extlen_t len); +static inline struct xfs_ag_resv * +xfs_perag_resv( + struct xfs_perag *pag, + enum xfs_ag_resv_type type) +{ + switch (type) { + case XFS_AG_RESV_METADATA: + return &pag->pag_meta_resv; + case XFS_AG_RESV_RMAPBT: + return &pag->pag_rmapbt_resv; + default: + return NULL; + } +} + /* * RMAPBT reservation accounting wrappers. Since rmapbt blocks are sourced from * the AGFL, they are allocated one at a time and the reservation updates don't diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index af3d5f9271f6..6929157d8d6e 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -10,7 +10,6 @@ #include "xfs_shared.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_btree.h" @@ -24,6 +23,7 @@ #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_log.h" +#include "xfs_ag.h" #include "xfs_ag_resv.h" #include "xfs_bmap.h" @@ -230,7 +230,7 @@ xfs_alloc_get_rec( int *stat) /* output: success/failure */ { struct xfs_mount *mp = cur->bc_mp; - xfs_agnumber_t agno = cur->bc_ag.agno; + xfs_agnumber_t agno = cur->bc_ag.pag->pag_agno; union xfs_btree_rec *rec; int error; @@ -776,7 +776,7 @@ xfs_alloc_cur_setup( */ if (!acur->cnt) acur->cnt = xfs_allocbt_init_cursor(args->mp, args->tp, - args->agbp, args->agno, XFS_BTNUM_CNT); + args->agbp, args->pag, XFS_BTNUM_CNT); error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i); if (error) return error; @@ -786,10 +786,10 @@ xfs_alloc_cur_setup( */ if (!acur->bnolt) acur->bnolt = xfs_allocbt_init_cursor(args->mp, args->tp, - args->agbp, args->agno, XFS_BTNUM_BNO); + args->agbp, args->pag, XFS_BTNUM_BNO); if (!acur->bnogt) acur->bnogt = xfs_allocbt_init_cursor(args->mp, args->tp, - args->agbp, args->agno, XFS_BTNUM_BNO); + args->agbp, args->pag, XFS_BTNUM_BNO); return i == 1 ? 0 : -ENOSPC; } @@ -1063,7 +1063,7 @@ xfs_alloc_ag_vextent_small( if (fbno == NULLAGBLOCK) goto out; - xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1, + xfs_extent_busy_reuse(args->mp, args->pag, fbno, 1, (args->datatype & XFS_ALLOC_NOBUSY)); if (args->datatype & XFS_ALLOC_USERDATA) { @@ -1089,7 +1089,7 @@ xfs_alloc_ag_vextent_small( * If we're feeding an AGFL block to something that doesn't live in the * free space, we need to clear out the OWN_AG rmap. */ - error = xfs_rmap_free(args->tp, args->agbp, args->agno, fbno, 1, + error = xfs_rmap_free(args->tp, args->agbp, args->pag, fbno, 1, &XFS_RMAP_OINFO_AG); if (error) goto error; @@ -1166,7 +1166,7 @@ xfs_alloc_ag_vextent( /* if not file data, insert new block into the reverse map btree */ if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) { - error = xfs_rmap_alloc(args->tp, args->agbp, args->agno, + error = xfs_rmap_alloc(args->tp, args->agbp, args->pag, args->agbno, args->len, &args->oinfo); if (error) return error; @@ -1178,7 +1178,7 @@ xfs_alloc_ag_vextent( if (error) return error; - ASSERT(!xfs_extent_busy_search(args->mp, args->agno, + ASSERT(!xfs_extent_busy_search(args->mp, args->pag, args->agbno, args->len)); } @@ -1217,7 +1217,7 @@ xfs_alloc_ag_vextent_exact( * Allocate/initialize a cursor for the by-number freespace btree. */ bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_BNO); + args->pag, XFS_BTNUM_BNO); /* * Lookup bno and minlen in the btree (minlen is irrelevant, really). @@ -1277,7 +1277,7 @@ xfs_alloc_ag_vextent_exact( * Allocate/initialize a cursor for the by-size btree. */ cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_CNT); + args->pag, XFS_BTNUM_CNT); ASSERT(args->agbno + args->len <= be32_to_cpu(agf->agf_length)); error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno, args->len, XFSA_FIXUP_BNO_OK); @@ -1674,9 +1674,8 @@ restart: * Allocate and initialize a cursor for the by-size btree. */ cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_CNT); + args->pag, XFS_BTNUM_CNT); bno_cur = NULL; - busy = false; /* * Look for an entry >= maxlen+alignment-1 blocks. @@ -1837,7 +1836,7 @@ restart: * Allocate and initialize a cursor for the by-block tree. */ bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_BNO); + args->pag, XFS_BTNUM_BNO); if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, rbno, rlen, XFSA_FIXUP_CNT_OK))) goto error0; @@ -1896,12 +1895,13 @@ xfs_free_ag_extent( int haveright; /* have a right neighbor */ int i; int error; + struct xfs_perag *pag = agbp->b_pag; bno_cur = cnt_cur = NULL; mp = tp->t_mountp; if (!xfs_rmap_should_skip_owner_update(oinfo)) { - error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo); + error = xfs_rmap_free(tp, agbp, pag, bno, len, oinfo); if (error) goto error0; } @@ -1909,7 +1909,7 @@ xfs_free_ag_extent( /* * Allocate and initialize a cursor for the by-block btree. */ - bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO); + bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_BNO); /* * Look for a neighboring block on the left (lower block numbers) * that is contiguous with this space. @@ -1979,7 +1979,7 @@ xfs_free_ag_extent( /* * Now allocate and initialize a cursor for the by-size tree. */ - cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT); + cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_CNT); /* * Have both left and right contiguous neighbors. * Merge all three into a single free block. @@ -2490,7 +2490,7 @@ xfs_exact_minlen_extent_available( int error = 0; cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, agbp, - args->agno, XFS_BTNUM_CNT); + args->pag, XFS_BTNUM_CNT); error = xfs_alloc_lookup_ge(cnt_cur, 0, args->minlen, stat); if (error) goto out; @@ -2693,21 +2693,21 @@ out_no_agbp: * Get a block from the freelist. * Returns with the buffer for the block gotten. */ -int /* error */ +int xfs_alloc_get_freelist( - xfs_trans_t *tp, /* transaction pointer */ - struct xfs_buf *agbp, /* buffer containing the agf structure */ - xfs_agblock_t *bnop, /* block address retrieved from freelist */ - int btreeblk) /* destination is a AGF btree */ + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_agblock_t *bnop, + int btreeblk) { - struct xfs_agf *agf = agbp->b_addr; - struct xfs_buf *agflbp;/* buffer for a.g. freelist structure */ - xfs_agblock_t bno; /* block number returned */ - __be32 *agfl_bno; - int error; - int logflags; - xfs_mount_t *mp = tp->t_mountp; - xfs_perag_t *pag; /* per allocation group data */ + struct xfs_agf *agf = agbp->b_addr; + struct xfs_buf *agflbp; + xfs_agblock_t bno; + __be32 *agfl_bno; + int error; + int logflags; + struct xfs_mount *mp = tp->t_mountp; + struct xfs_perag *pag; /* * Freelist is empty, give up. @@ -2817,20 +2817,20 @@ xfs_alloc_pagf_init( /* * Put the block on the freelist for the allocation group. */ -int /* error */ +int xfs_alloc_put_freelist( - xfs_trans_t *tp, /* transaction pointer */ - struct xfs_buf *agbp, /* buffer for a.g. freelist header */ - struct xfs_buf *agflbp,/* buffer for a.g. free block array */ - xfs_agblock_t bno, /* block being freed */ - int btreeblk) /* block came from a AGF btree */ + struct xfs_trans *tp, + struct xfs_buf *agbp, + struct xfs_buf *agflbp, + xfs_agblock_t bno, + int btreeblk) { struct xfs_mount *mp = tp->t_mountp; struct xfs_agf *agf = agbp->b_addr; - __be32 *blockp;/* pointer to array entry */ + struct xfs_perag *pag; + __be32 *blockp; int error; int logflags; - xfs_perag_t *pag; /* per allocation group data */ __be32 *agfl_bno; int startoff; @@ -3292,7 +3292,7 @@ error0: int xfs_free_extent_fix_freelist( struct xfs_trans *tp, - xfs_agnumber_t agno, + struct xfs_perag *pag, struct xfs_buf **agbp) { struct xfs_alloc_arg args; @@ -3301,7 +3301,8 @@ xfs_free_extent_fix_freelist( memset(&args, 0, sizeof(struct xfs_alloc_arg)); args.tp = tp; args.mp = tp->t_mountp; - args.agno = agno; + args.agno = pag->pag_agno; + args.pag = pag; /* * validate that the block number is legal - the enables us to detect @@ -3310,17 +3311,12 @@ xfs_free_extent_fix_freelist( if (args.agno >= args.mp->m_sb.sb_agcount) return -EFSCORRUPTED; - args.pag = xfs_perag_get(args.mp, args.agno); - ASSERT(args.pag); - error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); if (error) - goto out; + return error; *agbp = args.agbp; -out: - xfs_perag_put(args.pag); - return error; + return 0; } /* @@ -3344,6 +3340,7 @@ __xfs_free_extent( struct xfs_agf *agf; int error; unsigned int busy_flags = 0; + struct xfs_perag *pag; ASSERT(len != 0); ASSERT(type != XFS_AG_RESV_AGFL); @@ -3352,33 +3349,37 @@ __xfs_free_extent( XFS_ERRTAG_FREE_EXTENT)) return -EIO; - error = xfs_free_extent_fix_freelist(tp, agno, &agbp); + pag = xfs_perag_get(mp, agno); + error = xfs_free_extent_fix_freelist(tp, pag, &agbp); if (error) - return error; + goto err; agf = agbp->b_addr; if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) { error = -EFSCORRUPTED; - goto err; + goto err_release; } /* validate the extent size is legal now we have the agf locked */ if (XFS_IS_CORRUPT(mp, agbno + len > be32_to_cpu(agf->agf_length))) { error = -EFSCORRUPTED; - goto err; + goto err_release; } error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type); if (error) - goto err; + goto err_release; if (skip_discard) busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD; - xfs_extent_busy_insert(tp, agno, agbno, len, busy_flags); + xfs_extent_busy_insert(tp, pag, agbno, len, busy_flags); + xfs_perag_put(pag); return 0; -err: +err_release: xfs_trans_brelse(tp, agbp); +err: + xfs_perag_put(pag); return error; } diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index a4427c5775c2..e30900b6f8ba 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -214,7 +214,7 @@ int xfs_alloc_read_agfl(struct xfs_mount *mp, struct xfs_trans *tp, int xfs_free_agfl_block(struct xfs_trans *, xfs_agnumber_t, xfs_agblock_t, struct xfs_buf *, struct xfs_owner_info *); int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags); -int xfs_free_extent_fix_freelist(struct xfs_trans *tp, xfs_agnumber_t agno, +int xfs_free_extent_fix_freelist(struct xfs_trans *tp, struct xfs_perag *pag, struct xfs_buf **agbp); xfs_extlen_t xfs_prealloc_blocks(struct xfs_mount *mp); diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index a43e4c50e69b..6b363f78cfa2 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -9,7 +9,6 @@ #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_btree.h" #include "xfs_btree_staging.h" @@ -19,6 +18,7 @@ #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_trans.h" +#include "xfs_ag.h" STATIC struct xfs_btree_cur * @@ -26,8 +26,7 @@ xfs_allocbt_dup_cursor( struct xfs_btree_cur *cur) { return xfs_allocbt_init_cursor(cur->bc_mp, cur->bc_tp, - cur->bc_ag.agbp, cur->bc_ag.agno, - cur->bc_btnum); + cur->bc_ag.agbp, cur->bc_ag.pag, cur->bc_btnum); } STATIC void @@ -39,13 +38,12 @@ xfs_allocbt_set_root( struct xfs_buf *agbp = cur->bc_ag.agbp; struct xfs_agf *agf = agbp->b_addr; int btnum = cur->bc_btnum; - struct xfs_perag *pag = agbp->b_pag; ASSERT(ptr->s != 0); agf->agf_roots[btnum] = ptr->s; be32_add_cpu(&agf->agf_levels[btnum], inc); - pag->pagf_levels[btnum] += inc; + cur->bc_ag.pag->pagf_levels[btnum] += inc; xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); } @@ -72,7 +70,7 @@ xfs_allocbt_alloc_block( } atomic64_inc(&cur->bc_mp->m_allocbt_blks); - xfs_extent_busy_reuse(cur->bc_mp, cur->bc_ag.agno, bno, 1, false); + xfs_extent_busy_reuse(cur->bc_mp, cur->bc_ag.agbp->b_pag, bno, 1, false); new->s = cpu_to_be32(bno); @@ -86,7 +84,6 @@ xfs_allocbt_free_block( struct xfs_buf *bp) { struct xfs_buf *agbp = cur->bc_ag.agbp; - struct xfs_agf *agf = agbp->b_addr; xfs_agblock_t bno; int error; @@ -96,7 +93,7 @@ xfs_allocbt_free_block( return error; atomic64_dec(&cur->bc_mp->m_allocbt_blks); - xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, + xfs_extent_busy_insert(cur->bc_tp, agbp->b_pag, bno, 1, XFS_EXTENT_BUSY_SKIP_DISCARD); return 0; } @@ -225,7 +222,7 @@ xfs_allocbt_init_ptr_from_cur( { struct xfs_agf *agf = cur->bc_ag.agbp->b_addr; - ASSERT(cur->bc_ag.agno == be32_to_cpu(agf->agf_seqno)); + ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno)); ptr->s = agf->agf_roots[cur->bc_btnum]; } @@ -473,7 +470,7 @@ STATIC struct xfs_btree_cur * xfs_allocbt_init_common( struct xfs_mount *mp, struct xfs_trans *tp, - xfs_agnumber_t agno, + struct xfs_perag *pag, xfs_btnum_t btnum) { struct xfs_btree_cur *cur; @@ -486,6 +483,7 @@ xfs_allocbt_init_common( cur->bc_mp = mp; cur->bc_btnum = btnum; cur->bc_blocklog = mp->m_sb.sb_blocklog; + cur->bc_ag.abt.active = false; if (btnum == XFS_BTNUM_CNT) { cur->bc_ops = &xfs_cntbt_ops; @@ -496,8 +494,9 @@ xfs_allocbt_init_common( cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2); } - cur->bc_ag.agno = agno; - cur->bc_ag.abt.active = false; + /* take a reference for the cursor */ + atomic_inc(&pag->pag_ref); + cur->bc_ag.pag = pag; if (xfs_sb_version_hascrc(&mp->m_sb)) cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; @@ -513,13 +512,13 @@ xfs_allocbt_init_cursor( struct xfs_mount *mp, /* file system mount point */ struct xfs_trans *tp, /* transaction pointer */ struct xfs_buf *agbp, /* buffer for agf structure */ - xfs_agnumber_t agno, /* allocation group number */ + struct xfs_perag *pag, xfs_btnum_t btnum) /* btree identifier */ { struct xfs_agf *agf = agbp->b_addr; struct xfs_btree_cur *cur; - cur = xfs_allocbt_init_common(mp, tp, agno, btnum); + cur = xfs_allocbt_init_common(mp, tp, pag, btnum); if (btnum == XFS_BTNUM_CNT) cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]); else @@ -535,12 +534,12 @@ struct xfs_btree_cur * xfs_allocbt_stage_cursor( struct xfs_mount *mp, struct xbtree_afakeroot *afake, - xfs_agnumber_t agno, + struct xfs_perag *pag, xfs_btnum_t btnum) { struct xfs_btree_cur *cur; - cur = xfs_allocbt_init_common(mp, NULL, agno, btnum); + cur = xfs_allocbt_init_common(mp, NULL, pag, btnum); xfs_btree_stage_afakeroot(cur, afake); return cur; } diff --git a/fs/xfs/libxfs/xfs_alloc_btree.h b/fs/xfs/libxfs/xfs_alloc_btree.h index a5b998e950fe..9eb4c667a6b8 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.h +++ b/fs/xfs/libxfs/xfs_alloc_btree.h @@ -13,6 +13,7 @@ struct xfs_buf; struct xfs_btree_cur; struct xfs_mount; +struct xfs_perag; struct xbtree_afakeroot; /* @@ -46,11 +47,11 @@ struct xbtree_afakeroot; (maxrecs) * sizeof(xfs_alloc_key_t) + \ ((index) - 1) * sizeof(xfs_alloc_ptr_t))) -extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *, - struct xfs_trans *, struct xfs_buf *, - xfs_agnumber_t, xfs_btnum_t); +extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *mp, + struct xfs_trans *tp, struct xfs_buf *bp, + struct xfs_perag *pag, xfs_btnum_t btnum); struct xfs_btree_cur *xfs_allocbt_stage_cursor(struct xfs_mount *mp, - struct xbtree_afakeroot *afake, xfs_agnumber_t agno, + struct xbtree_afakeroot *afake, struct xfs_perag *pag, xfs_btnum_t btnum); extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int); extern xfs_extlen_t xfs_allocbt_calc_size(struct xfs_mount *mp, diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 96146f425e50..d9d7d5137b73 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -44,20 +44,27 @@ STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args); * Internal routines when attribute list is one block. */ STATIC int xfs_attr_leaf_get(xfs_da_args_t *args); -STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args); STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args); STATIC int xfs_attr_leaf_hasname(struct xfs_da_args *args, struct xfs_buf **bp); +STATIC int xfs_attr_leaf_try_add(struct xfs_da_args *args, struct xfs_buf *bp); /* * Internal routines when attribute list is more than one block. */ STATIC int xfs_attr_node_get(xfs_da_args_t *args); -STATIC int xfs_attr_node_addname(xfs_da_args_t *args); -STATIC int xfs_attr_node_removename(xfs_da_args_t *args); +STATIC void xfs_attr_restore_rmt_blk(struct xfs_da_args *args); +STATIC int xfs_attr_node_addname(struct xfs_delattr_context *dac); +STATIC int xfs_attr_node_addname_find_attr(struct xfs_delattr_context *dac); +STATIC int xfs_attr_node_addname_clear_incomplete( + struct xfs_delattr_context *dac); STATIC int xfs_attr_node_hasname(xfs_da_args_t *args, struct xfs_da_state **state); STATIC int xfs_attr_fillstate(xfs_da_state_t *state); STATIC int xfs_attr_refillstate(xfs_da_state_t *state); +STATIC int xfs_attr_set_iter(struct xfs_delattr_context *dac, + struct xfs_buf **leaf_bp); +STATIC int xfs_attr_node_removename(struct xfs_da_args *args, + struct xfs_da_state *state); int xfs_inode_hasattr( @@ -237,27 +244,77 @@ xfs_attr_is_shortform( } /* - * Attempts to set an attr in shortform, or converts short form to leaf form if - * there is not enough room. If the attr is set, the transaction is committed - * and set to NULL. + * Checks to see if a delayed attribute transaction should be rolled. If so, + * transaction is finished or rolled as needed. */ STATIC int -xfs_attr_set_shortform( - struct xfs_da_args *args, - struct xfs_buf **leaf_bp) +xfs_attr_trans_roll( + struct xfs_delattr_context *dac) { - struct xfs_inode *dp = args->dp; - int error, error2 = 0; + struct xfs_da_args *args = dac->da_args; + int error; + + if (dac->flags & XFS_DAC_DEFER_FINISH) { + /* + * The caller wants us to finish all the deferred ops so that we + * avoid pinning the log tail with a large number of deferred + * ops. + */ + dac->flags &= ~XFS_DAC_DEFER_FINISH; + error = xfs_defer_finish(&args->trans); + } else + error = xfs_trans_roll_inode(&args->trans, args->dp); + + return error; +} + +/* + * Set the attribute specified in @args. + */ +int +xfs_attr_set_args( + struct xfs_da_args *args) +{ + struct xfs_buf *leaf_bp = NULL; + int error = 0; + struct xfs_delattr_context dac = { + .da_args = args, + }; + + do { + error = xfs_attr_set_iter(&dac, &leaf_bp); + if (error != -EAGAIN) + break; + + error = xfs_attr_trans_roll(&dac); + if (error) { + if (leaf_bp) + xfs_trans_brelse(args->trans, leaf_bp); + return error; + } + } while (true); + + return error; +} + +STATIC int +xfs_attr_sf_addname( + struct xfs_delattr_context *dac, + struct xfs_buf **leaf_bp) +{ + struct xfs_da_args *args = dac->da_args; + struct xfs_inode *dp = args->dp; + int error = 0; /* * Try to add the attr to the attribute list in the inode. */ error = xfs_attr_try_sf_addname(dp, args); - if (error != -ENOSPC) { - error2 = xfs_trans_commit(args->trans); - args->trans = NULL; - return error ? error : error2; - } + + /* Should only be 0, -EEXIST or -ENOSPC */ + if (error != -ENOSPC) + return error; + /* * It won't fit in the shortform, transform to a leaf block. GROT: * another possible req'mt for a double-split btree op. @@ -269,85 +326,300 @@ xfs_attr_set_shortform( /* * Prevent the leaf buffer from being unlocked so that a concurrent AIL * push cannot grab the half-baked leaf buffer and run into problems - * with the write verifier. Once we're done rolling the transaction we - * can release the hold and add the attr to the leaf. + * with the write verifier. */ xfs_trans_bhold(args->trans, *leaf_bp); - error = xfs_defer_finish(&args->trans); - xfs_trans_bhold_release(args->trans, *leaf_bp); - if (error) { - xfs_trans_brelse(args->trans, *leaf_bp); - return error; - } - return 0; + /* + * We're still in XFS_DAS_UNINIT state here. We've converted + * the attr fork to leaf format and will restart with the leaf + * add. + */ + dac->flags |= XFS_DAC_DEFER_FINISH; + return -EAGAIN; } /* * Set the attribute specified in @args. + * This routine is meant to function as a delayed operation, and may return + * -EAGAIN when the transaction needs to be rolled. Calling functions will need + * to handle this, and recall the function until a successful error code is + * returned. */ int -xfs_attr_set_args( - struct xfs_da_args *args) +xfs_attr_set_iter( + struct xfs_delattr_context *dac, + struct xfs_buf **leaf_bp) { - struct xfs_inode *dp = args->dp; - struct xfs_buf *leaf_bp = NULL; - int error = 0; + struct xfs_da_args *args = dac->da_args; + struct xfs_inode *dp = args->dp; + struct xfs_buf *bp = NULL; + int forkoff, error = 0; + + /* State machine switch */ + switch (dac->dela_state) { + case XFS_DAS_UNINIT: + /* + * If the fork is shortform, attempt to add the attr. If there + * is no space, this converts to leaf format and returns + * -EAGAIN with the leaf buffer held across the roll. The caller + * will deal with a transaction roll error, but otherwise + * release the hold once we return with a clean transaction. + */ + if (xfs_attr_is_shortform(dp)) + return xfs_attr_sf_addname(dac, leaf_bp); + if (*leaf_bp != NULL) { + xfs_trans_bhold_release(args->trans, *leaf_bp); + *leaf_bp = NULL; + } - /* - * If the attribute list is already in leaf format, jump straight to - * leaf handling. Otherwise, try to add the attribute to the shortform - * list; if there's no room then convert the list to leaf format and try - * again. - */ - if (xfs_attr_is_shortform(dp)) { + if (xfs_attr_is_leaf(dp)) { + error = xfs_attr_leaf_try_add(args, *leaf_bp); + if (error == -ENOSPC) { + error = xfs_attr3_leaf_to_node(args); + if (error) + return error; + + /* + * Finish any deferred work items and roll the + * transaction once more. The goal here is to + * call node_addname with the inode and + * transaction in the same state (inode locked + * and joined, transaction clean) no matter how + * we got to this step. + * + * At this point, we are still in + * XFS_DAS_UNINIT, but when we come back, we'll + * be a node, so we'll fall down into the node + * handling code below + */ + dac->flags |= XFS_DAC_DEFER_FINISH; + return -EAGAIN; + } else if (error) { + return error; + } + + dac->dela_state = XFS_DAS_FOUND_LBLK; + } else { + error = xfs_attr_node_addname_find_attr(dac); + if (error) + return error; + + error = xfs_attr_node_addname(dac); + if (error) + return error; + + dac->dela_state = XFS_DAS_FOUND_NBLK; + } + return -EAGAIN; + case XFS_DAS_FOUND_LBLK: + /* + * If there was an out-of-line value, allocate the blocks we + * identified for its storage and copy the value. This is done + * after we create the attribute so that we don't overflow the + * maximum size of a transaction and/or hit a deadlock. + */ + + /* Open coded xfs_attr_rmtval_set without trans handling */ + if ((dac->flags & XFS_DAC_LEAF_ADDNAME_INIT) == 0) { + dac->flags |= XFS_DAC_LEAF_ADDNAME_INIT; + if (args->rmtblkno > 0) { + error = xfs_attr_rmtval_find_space(dac); + if (error) + return error; + } + } /* - * If the attr was successfully set in shortform, the - * transaction is committed and set to NULL. Otherwise, is it - * converted from shortform to leaf, and the transaction is - * retained. + * Repeat allocating remote blocks for the attr value until + * blkcnt drops to zero. */ - error = xfs_attr_set_shortform(args, &leaf_bp); - if (error || !args->trans) + if (dac->blkcnt > 0) { + error = xfs_attr_rmtval_set_blk(dac); + if (error) + return error; + return -EAGAIN; + } + + error = xfs_attr_rmtval_set_value(args); + if (error) return error; - } - if (xfs_attr_is_leaf(dp)) { - error = xfs_attr_leaf_addname(args); - if (error != -ENOSPC) + /* + * If this is not a rename, clear the incomplete flag and we're + * done. + */ + if (!(args->op_flags & XFS_DA_OP_RENAME)) { + if (args->rmtblkno > 0) + error = xfs_attr3_leaf_clearflag(args); return error; + } /* - * Promote the attribute list to the Btree format. + * If this is an atomic rename operation, we must "flip" the + * incomplete flags on the "new" and "old" attribute/value pairs + * so that one disappears and one appears atomically. Then we + * must remove the "old" attribute/value pair. + * + * In a separate transaction, set the incomplete flag on the + * "old" attr and clear the incomplete flag on the "new" attr. */ - error = xfs_attr3_leaf_to_node(args); + error = xfs_attr3_leaf_flipflags(args); if (error) return error; + /* + * Commit the flag value change and start the next trans in + * series. + */ + dac->dela_state = XFS_DAS_FLIP_LFLAG; + return -EAGAIN; + case XFS_DAS_FLIP_LFLAG: + /* + * Dismantle the "old" attribute/value pair by removing a + * "remote" value (if it exists). + */ + xfs_attr_restore_rmt_blk(args); + error = xfs_attr_rmtval_invalidate(args); + if (error) + return error; + + /* fallthrough */ + case XFS_DAS_RM_LBLK: + /* Set state in case xfs_attr_rmtval_remove returns -EAGAIN */ + dac->dela_state = XFS_DAS_RM_LBLK; + if (args->rmtblkno) { + error = __xfs_attr_rmtval_remove(dac); + if (error) + return error; + + dac->dela_state = XFS_DAS_RD_LEAF; + return -EAGAIN; + } + /* fallthrough */ + case XFS_DAS_RD_LEAF: /* - * Finish any deferred work items and roll the transaction once - * more. The goal here is to call node_addname with the inode - * and transaction in the same state (inode locked and joined, - * transaction clean) no matter how we got to this step. + * This is the last step for leaf format. Read the block with + * the old attr, remove the old attr, check for shortform + * conversion and return. */ - error = xfs_defer_finish(&args->trans); + error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, + &bp); if (error) return error; + xfs_attr3_leaf_remove(bp, args); + + forkoff = xfs_attr_shortform_allfit(bp, dp); + if (forkoff) + error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); + /* bp is gone due to xfs_da_shrink_inode */ + + return error; + + case XFS_DAS_FOUND_NBLK: + /* + * Find space for remote blocks and fall into the allocation + * state. + */ + if (args->rmtblkno > 0) { + error = xfs_attr_rmtval_find_space(dac); + if (error) + return error; + } + + /* fallthrough */ + case XFS_DAS_ALLOC_NODE: + /* + * If there was an out-of-line value, allocate the blocks we + * identified for its storage and copy the value. This is done + * after we create the attribute so that we don't overflow the + * maximum size of a transaction and/or hit a deadlock. + */ + dac->dela_state = XFS_DAS_ALLOC_NODE; + if (args->rmtblkno > 0) { + if (dac->blkcnt > 0) { + error = xfs_attr_rmtval_set_blk(dac); + if (error) + return error; + return -EAGAIN; + } + + error = xfs_attr_rmtval_set_value(args); + if (error) + return error; + } + /* - * Commit the current trans (including the inode) and - * start a new one. + * If this was not a rename, clear the incomplete flag and we're + * done. */ - error = xfs_trans_roll_inode(&args->trans, dp); + if (!(args->op_flags & XFS_DA_OP_RENAME)) { + if (args->rmtblkno > 0) + error = xfs_attr3_leaf_clearflag(args); + goto out; + } + + /* + * If this is an atomic rename operation, we must "flip" the + * incomplete flags on the "new" and "old" attribute/value pairs + * so that one disappears and one appears atomically. Then we + * must remove the "old" attribute/value pair. + * + * In a separate transaction, set the incomplete flag on the + * "old" attr and clear the incomplete flag on the "new" attr. + */ + error = xfs_attr3_leaf_flipflags(args); + if (error) + goto out; + /* + * Commit the flag value change and start the next trans in + * series + */ + dac->dela_state = XFS_DAS_FLIP_NFLAG; + return -EAGAIN; + + case XFS_DAS_FLIP_NFLAG: + /* + * Dismantle the "old" attribute/value pair by removing a + * "remote" value (if it exists). + */ + xfs_attr_restore_rmt_blk(args); + + error = xfs_attr_rmtval_invalidate(args); if (error) return error; - } - error = xfs_attr_node_addname(args); + /* fallthrough */ + case XFS_DAS_RM_NBLK: + /* Set state in case xfs_attr_rmtval_remove returns -EAGAIN */ + dac->dela_state = XFS_DAS_RM_NBLK; + if (args->rmtblkno) { + error = __xfs_attr_rmtval_remove(dac); + if (error) + return error; + + dac->dela_state = XFS_DAS_CLR_FLAG; + return -EAGAIN; + } + + /* fallthrough */ + case XFS_DAS_CLR_FLAG: + /* + * The last state for node format. Look up the old attr and + * remove it. + */ + error = xfs_attr_node_addname_clear_incomplete(dac); + break; + default: + ASSERT(0); + break; + } +out: return error; } + /* * Return EEXIST if attr is found, or ENOATTR if not */ @@ -382,16 +654,25 @@ xfs_has_attr( */ int xfs_attr_remove_args( - struct xfs_da_args *args) + struct xfs_da_args *args) { - if (!xfs_inode_hasattr(args->dp)) - return -ENOATTR; + int error; + struct xfs_delattr_context dac = { + .da_args = args, + }; - if (args->dp->i_afp->if_format == XFS_DINODE_FMT_LOCAL) - return xfs_attr_shortform_remove(args); - if (xfs_attr_is_leaf(args->dp)) - return xfs_attr_leaf_removename(args); - return xfs_attr_node_removename(args); + do { + error = xfs_attr_remove_iter(&dac); + if (error != -EAGAIN) + break; + + error = xfs_attr_trans_roll(&dac); + if (error) + return error; + + } while (true); + + return error; } /* @@ -559,7 +840,7 @@ xfs_attr_shortform_addname(xfs_da_args_t *args) if (retval == -EEXIST) { if (args->attr_flags & XATTR_CREATE) return retval; - retval = xfs_attr_shortform_remove(args); + retval = xfs_attr_sf_removename(args); if (retval) return retval; /* @@ -670,115 +951,6 @@ out_brelse: return retval; } - -/* - * Add a name to the leaf attribute list structure - * - * This leaf block cannot have a "remote" value, we only call this routine - * if bmap_one_block() says there is only one block (ie: no remote blks). - */ -STATIC int -xfs_attr_leaf_addname( - struct xfs_da_args *args) -{ - int error, forkoff; - struct xfs_buf *bp = NULL; - struct xfs_inode *dp = args->dp; - - trace_xfs_attr_leaf_addname(args); - - error = xfs_attr_leaf_try_add(args, bp); - if (error) - return error; - - /* - * Commit the transaction that added the attr name so that - * later routines can manage their own transactions. - */ - error = xfs_trans_roll_inode(&args->trans, dp); - if (error) - return error; - - /* - * If there was an out-of-line value, allocate the blocks we - * identified for its storage and copy the value. This is done - * after we create the attribute so that we don't overflow the - * maximum size of a transaction and/or hit a deadlock. - */ - if (args->rmtblkno > 0) { - error = xfs_attr_rmtval_set(args); - if (error) - return error; - } - - if (!(args->op_flags & XFS_DA_OP_RENAME)) { - /* - * Added a "remote" value, just clear the incomplete flag. - */ - if (args->rmtblkno > 0) - error = xfs_attr3_leaf_clearflag(args); - - return error; - } - - /* - * If this is an atomic rename operation, we must "flip" the incomplete - * flags on the "new" and "old" attribute/value pairs so that one - * disappears and one appears atomically. Then we must remove the "old" - * attribute/value pair. - * - * In a separate transaction, set the incomplete flag on the "old" attr - * and clear the incomplete flag on the "new" attr. - */ - - error = xfs_attr3_leaf_flipflags(args); - if (error) - return error; - /* - * Commit the flag value change and start the next trans in series. - */ - error = xfs_trans_roll_inode(&args->trans, args->dp); - if (error) - return error; - - /* - * Dismantle the "old" attribute/value pair by removing a "remote" value - * (if it exists). - */ - xfs_attr_restore_rmt_blk(args); - - if (args->rmtblkno) { - error = xfs_attr_rmtval_invalidate(args); - if (error) - return error; - - error = xfs_attr_rmtval_remove(args); - if (error) - return error; - } - - /* - * Read in the block containing the "old" attr, then remove the "old" - * attr from that block (neat, huh!) - */ - error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, - &bp); - if (error) - return error; - - xfs_attr3_leaf_remove(bp, args); - - /* - * If the result is small enough, shrink it all into the inode. - */ - forkoff = xfs_attr_shortform_allfit(bp, dp); - if (forkoff) - error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); - /* bp is gone due to xfs_da_shrink_inode */ - - return error; -} - /* * Return EEXIST if attr is found, or ENOATTR if not */ @@ -909,48 +1081,26 @@ xfs_attr_node_hasname( * External routines when attribute list size > geo->blksize *========================================================================*/ -/* - * Add a name to a Btree-format attribute list. - * - * This will involve walking down the Btree, and may involve splitting - * leaf nodes and even splitting intermediate nodes up to and including - * the root node (a special case of an intermediate node). - * - * "Remote" attribute values confuse the issue and atomic rename operations - * add a whole extra layer of confusion on top of that. - */ STATIC int -xfs_attr_node_addname( - struct xfs_da_args *args) +xfs_attr_node_addname_find_attr( + struct xfs_delattr_context *dac) { - struct xfs_da_state *state; - struct xfs_da_state_blk *blk; - struct xfs_inode *dp; - int retval, error; - - trace_xfs_attr_node_addname(args); + struct xfs_da_args *args = dac->da_args; + int retval; /* - * Fill in bucket of arguments/results/context to carry around. - */ - dp = args->dp; -restart: - /* * Search to see if name already exists, and get back a pointer * to where it should go. */ - error = 0; - retval = xfs_attr_node_hasname(args, &state); + retval = xfs_attr_node_hasname(args, &dac->da_state); if (retval != -ENOATTR && retval != -EEXIST) - goto out; + return retval; - blk = &state->path.blk[ state->path.active-1 ]; - ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); if (retval == -ENOATTR && (args->attr_flags & XATTR_REPLACE)) - goto out; + goto error; if (retval == -EEXIST) { if (args->attr_flags & XATTR_CREATE) - goto out; + goto error; trace_xfs_attr_node_replace(args); @@ -968,8 +1118,44 @@ restart: args->rmtvaluelen = 0; } - retval = xfs_attr3_leaf_add(blk->bp, state->args); - if (retval == -ENOSPC) { + return 0; +error: + if (dac->da_state) + xfs_da_state_free(dac->da_state); + return retval; +} + +/* + * Add a name to a Btree-format attribute list. + * + * This will involve walking down the Btree, and may involve splitting + * leaf nodes and even splitting intermediate nodes up to and including + * the root node (a special case of an intermediate node). + * + * "Remote" attribute values confuse the issue and atomic rename operations + * add a whole extra layer of confusion on top of that. + * + * This routine is meant to function as a delayed operation, and may return + * -EAGAIN when the transaction needs to be rolled. Calling functions will need + * to handle this, and recall the function until a successful error code is + *returned. + */ +STATIC int +xfs_attr_node_addname( + struct xfs_delattr_context *dac) +{ + struct xfs_da_args *args = dac->da_args; + struct xfs_da_state *state = dac->da_state; + struct xfs_da_state_blk *blk; + int error; + + trace_xfs_attr_node_addname(args); + + blk = &state->path.blk[state->path.active-1]; + ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); + + error = xfs_attr3_leaf_add(blk->bp, state->args); + if (error == -ENOSPC) { if (state->path.active == 1) { /* * Its really a single leaf node, but it had @@ -981,19 +1167,16 @@ restart: error = xfs_attr3_leaf_to_node(args); if (error) goto out; - error = xfs_defer_finish(&args->trans); - if (error) - goto out; /* - * Commit the node conversion and start the next - * trans in the chain. + * Now that we have converted the leaf to a node, we can + * roll the transaction, and try xfs_attr3_leaf_add + * again on re-entry. No need to set dela_state to do + * this. dela_state is still unset by this function at + * this point. */ - error = xfs_trans_roll_inode(&args->trans, dp); - if (error) - goto out; - - goto restart; + dac->flags |= XFS_DAC_DEFER_FINISH; + return -EAGAIN; } /* @@ -1005,9 +1188,7 @@ restart: error = xfs_da3_split(state); if (error) goto out; - error = xfs_defer_finish(&args->trans); - if (error) - goto out; + dac->flags |= XFS_DAC_DEFER_FINISH; } else { /* * Addition succeeded, update Btree hashvals. @@ -1015,77 +1196,21 @@ restart: xfs_da3_fixhashpath(state, &state->path); } - /* - * Kill the state structure, we're done with it and need to - * allow the buffers to come back later. - */ - xfs_da_state_free(state); - state = NULL; - - /* - * Commit the leaf addition or btree split and start the next - * trans in the chain. - */ - error = xfs_trans_roll_inode(&args->trans, dp); - if (error) - goto out; - - /* - * If there was an out-of-line value, allocate the blocks we - * identified for its storage and copy the value. This is done - * after we create the attribute so that we don't overflow the - * maximum size of a transaction and/or hit a deadlock. - */ - if (args->rmtblkno > 0) { - error = xfs_attr_rmtval_set(args); - if (error) - return error; - } - - if (!(args->op_flags & XFS_DA_OP_RENAME)) { - /* - * Added a "remote" value, just clear the incomplete flag. - */ - if (args->rmtblkno > 0) - error = xfs_attr3_leaf_clearflag(args); - retval = error; - goto out; - } - - /* - * If this is an atomic rename operation, we must "flip" the incomplete - * flags on the "new" and "old" attribute/value pairs so that one - * disappears and one appears atomically. Then we must remove the "old" - * attribute/value pair. - * - * In a separate transaction, set the incomplete flag on the "old" attr - * and clear the incomplete flag on the "new" attr. - */ - error = xfs_attr3_leaf_flipflags(args); - if (error) - goto out; - /* - * Commit the flag value change and start the next trans in series - */ - error = xfs_trans_roll_inode(&args->trans, args->dp); - if (error) - goto out; - - /* - * Dismantle the "old" attribute/value pair by removing a "remote" value - * (if it exists). - */ - xfs_attr_restore_rmt_blk(args); +out: + if (state) + xfs_da_state_free(state); + return error; +} - if (args->rmtblkno) { - error = xfs_attr_rmtval_invalidate(args); - if (error) - return error; - error = xfs_attr_rmtval_remove(args); - if (error) - return error; - } +STATIC int +xfs_attr_node_addname_clear_incomplete( + struct xfs_delattr_context *dac) +{ + struct xfs_da_args *args = dac->da_args; + struct xfs_da_state *state = NULL; + int retval = 0; + int error = 0; /* * Re-find the "old" attribute entry after any split ops. The INCOMPLETE @@ -1098,13 +1223,7 @@ restart: if (error) goto out; - /* - * Remove the name and update the hashvals in the tree. - */ - blk = &state->path.blk[state->path.active-1]; - ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); - error = xfs_attr3_leaf_remove(blk->bp, args); - xfs_da3_fixhashpath(state, &state->path); + error = xfs_attr_node_removename(args, state); /* * Check to see if the tree needs to be collapsed. @@ -1190,14 +1309,16 @@ xfs_attr_leaf_mark_incomplete( */ STATIC int xfs_attr_node_removename_setup( - struct xfs_da_args *args, - struct xfs_da_state **state) + struct xfs_delattr_context *dac) { - int error; + struct xfs_da_args *args = dac->da_args; + struct xfs_da_state **state = &dac->da_state; + int error; error = xfs_attr_node_hasname(args, state); if (error != -EEXIST) return error; + error = 0; ASSERT((*state)->path.blk[(*state)->path.active - 1].bp != NULL); ASSERT((*state)->path.blk[(*state)->path.active - 1].magic == @@ -1206,97 +1327,164 @@ int xfs_attr_node_removename_setup( if (args->rmtblkno > 0) { error = xfs_attr_leaf_mark_incomplete(args, *state); if (error) - return error; + goto out; - return xfs_attr_rmtval_invalidate(args); + error = xfs_attr_rmtval_invalidate(args); } +out: + if (error) + xfs_da_state_free(*state); - return 0; + return error; } STATIC int -xfs_attr_node_remove_rmt( +xfs_attr_node_removename( struct xfs_da_args *args, struct xfs_da_state *state) { - int error = 0; - - error = xfs_attr_rmtval_remove(args); - if (error) - return error; + struct xfs_da_state_blk *blk; + int retval; /* - * Refill the state structure with buffers, the prior calls released our - * buffers. + * Remove the name and update the hashvals in the tree. */ - return xfs_attr_refillstate(state); + blk = &state->path.blk[state->path.active-1]; + ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); + retval = xfs_attr3_leaf_remove(blk->bp, args); + xfs_da3_fixhashpath(state, &state->path); + + return retval; } /* - * Remove a name from a B-tree attribute list. + * Remove the attribute specified in @args. * * This will involve walking down the Btree, and may involve joining * leaf nodes and even joining intermediate nodes up to and including * the root node (a special case of an intermediate node). + * + * This routine is meant to function as either an in-line or delayed operation, + * and may return -EAGAIN when the transaction needs to be rolled. Calling + * functions will need to handle this, and call the function until a + * successful error code is returned. */ -STATIC int -xfs_attr_node_removename( - struct xfs_da_args *args) +int +xfs_attr_remove_iter( + struct xfs_delattr_context *dac) { - struct xfs_da_state *state; - struct xfs_da_state_blk *blk; - int retval, error; - struct xfs_inode *dp = args->dp; + struct xfs_da_args *args = dac->da_args; + struct xfs_da_state *state = dac->da_state; + int retval, error = 0; + struct xfs_inode *dp = args->dp; trace_xfs_attr_node_removename(args); - error = xfs_attr_node_removename_setup(args, &state); - if (error) - goto out; + switch (dac->dela_state) { + case XFS_DAS_UNINIT: + if (!xfs_inode_hasattr(dp)) + return -ENOATTR; - /* - * If there is an out-of-line value, de-allocate the blocks. - * This is done before we remove the attribute so that we don't - * overflow the maximum size of a transaction and/or hit a deadlock. - */ - if (args->rmtblkno > 0) { - error = xfs_attr_node_remove_rmt(args, state); - if (error) - goto out; - } + /* + * Shortform or leaf formats don't require transaction rolls and + * thus state transitions. Call the right helper and return. + */ + if (dp->i_afp->if_format == XFS_DINODE_FMT_LOCAL) + return xfs_attr_sf_removename(args); - /* - * Remove the name and update the hashvals in the tree. - */ - blk = &state->path.blk[ state->path.active-1 ]; - ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); - retval = xfs_attr3_leaf_remove(blk->bp, args); - xfs_da3_fixhashpath(state, &state->path); + if (xfs_attr_is_leaf(dp)) + return xfs_attr_leaf_removename(args); - /* - * Check to see if the tree needs to be collapsed. - */ - if (retval && (state->path.active > 1)) { - error = xfs_da3_join(state); - if (error) - goto out; - error = xfs_defer_finish(&args->trans); - if (error) - goto out; /* - * Commit the Btree join operation and start a new trans. + * Node format may require transaction rolls. Set up the + * state context and fall into the state machine. */ - error = xfs_trans_roll_inode(&args->trans, dp); - if (error) - goto out; - } + if (!dac->da_state) { + error = xfs_attr_node_removename_setup(dac); + if (error) + return error; + state = dac->da_state; + } - /* - * If the result is small enough, push it all into the inode. - */ - if (xfs_attr_is_leaf(dp)) - error = xfs_attr_node_shrink(args, state); + /* fallthrough */ + case XFS_DAS_RMTBLK: + dac->dela_state = XFS_DAS_RMTBLK; + /* + * If there is an out-of-line value, de-allocate the blocks. + * This is done before we remove the attribute so that we don't + * overflow the maximum size of a transaction and/or hit a + * deadlock. + */ + if (args->rmtblkno > 0) { + /* + * May return -EAGAIN. Roll and repeat until all remote + * blocks are removed. + */ + error = __xfs_attr_rmtval_remove(dac); + if (error == -EAGAIN) + return error; + else if (error) + goto out; + + /* + * Refill the state structure with buffers (the prior + * calls released our buffers) and close out this + * transaction before proceeding. + */ + ASSERT(args->rmtblkno == 0); + error = xfs_attr_refillstate(state); + if (error) + goto out; + dac->dela_state = XFS_DAS_RM_NAME; + dac->flags |= XFS_DAC_DEFER_FINISH; + return -EAGAIN; + } + + /* fallthrough */ + case XFS_DAS_RM_NAME: + /* + * If we came here fresh from a transaction roll, reattach all + * the buffers to the current transaction. + */ + if (dac->dela_state == XFS_DAS_RM_NAME) { + error = xfs_attr_refillstate(state); + if (error) + goto out; + } + + retval = xfs_attr_node_removename(args, state); + + /* + * Check to see if the tree needs to be collapsed. If so, roll + * the transacton and fall into the shrink state. + */ + if (retval && (state->path.active > 1)) { + error = xfs_da3_join(state); + if (error) + goto out; + + dac->flags |= XFS_DAC_DEFER_FINISH; + dac->dela_state = XFS_DAS_RM_SHRINK; + return -EAGAIN; + } + + /* fallthrough */ + case XFS_DAS_RM_SHRINK: + /* + * If the result is small enough, push it all into the inode. + * This is our final state so it's safe to return a dirty + * transaction. + */ + if (xfs_attr_is_leaf(dp)) + error = xfs_attr_node_shrink(args, state); + ASSERT(error != -EAGAIN); + break; + default: + ASSERT(0); + error = -EINVAL; + goto out; + } out: if (state) xfs_da_state_free(state); diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index 2b1f61987a9d..8de5d1d2733e 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -74,6 +74,406 @@ struct xfs_attr_list_context { }; +/* + * ======================================================================== + * Structure used to pass context around among the delayed routines. + * ======================================================================== + */ + +/* + * Below is a state machine diagram for attr remove operations. The XFS_DAS_* + * states indicate places where the function would return -EAGAIN, and then + * immediately resume from after being called by the calling function. States + * marked as a "subroutine state" indicate that they belong to a subroutine, and + * so the calling function needs to pass them back to that subroutine to allow + * it to finish where it left off. But they otherwise do not have a role in the + * calling function other than just passing through. + * + * xfs_attr_remove_iter() + * │ + * v + * have attr to remove? ──n──> done + * │ + * y + * │ + * v + * are we short form? ──y──> xfs_attr_shortform_remove ──> done + * │ + * n + * │ + * V + * are we leaf form? ──y──> xfs_attr_leaf_removename ──> done + * │ + * n + * │ + * V + * ┌── need to setup state? + * │ │ + * n y + * │ │ + * │ v + * │ find attr and get state + * │ attr has remote blks? ──n─┐ + * │ │ v + * │ │ find and invalidate + * │ y the remote blocks. + * │ │ mark attr incomplete + * │ ├────────────────┘ + * └──────────┤ + * │ + * v + * Have remote blks to remove? ───y─────┐ + * │ ^ remove the blks + * │ │ │ + * │ │ v + * │ XFS_DAS_RMTBLK <─n── done? + * │ re-enter with │ + * │ one less blk to y + * │ remove │ + * │ V + * │ refill the state + * n │ + * │ v + * │ XFS_DAS_RM_NAME + * │ │ + * ├─────────────────────────┘ + * │ + * v + * remove leaf and + * update hash with + * xfs_attr_node_remove_cleanup + * │ + * v + * need to + * shrink tree? ─n─┐ + * │ │ + * y │ + * │ │ + * v │ + * join leaf │ + * │ │ + * v │ + * XFS_DAS_RM_SHRINK │ + * │ │ + * v │ + * do the shrink │ + * │ │ + * v │ + * free state <──┘ + * │ + * v + * done + * + * + * Below is a state machine diagram for attr set operations. + * + * It seems the challenge with understanding this system comes from trying to + * absorb the state machine all at once, when really one should only be looking + * at it with in the context of a single function. Once a state sensitive + * function is called, the idea is that it "takes ownership" of the + * state machine. It isn't concerned with the states that may have belonged to + * it's calling parent. Only the states relevant to itself or any other + * subroutines there in. Once a calling function hands off the state machine to + * a subroutine, it needs to respect the simple rule that it doesn't "own" the + * state machine anymore, and it's the responsibility of that calling function + * to propagate the -EAGAIN back up the call stack. Upon reentry, it is + * committed to re-calling that subroutine until it returns something other than + * -EAGAIN. Once that subroutine signals completion (by returning anything other + * than -EAGAIN), the calling function can resume using the state machine. + * + * xfs_attr_set_iter() + * │ + * v + * ┌─y─ has an attr fork? + * │ | + * │ n + * │ | + * │ V + * │ add a fork + * │ │ + * └──────────┤ + * │ + * V + * ┌─── is shortform? + * │ │ + * │ y + * │ │ + * │ V + * │ xfs_attr_set_fmt + * │ | + * │ V + * │ xfs_attr_try_sf_addname + * │ │ + * │ V + * │ had enough ──y──> done + * │ space? + * n │ + * │ n + * │ │ + * │ V + * │ transform to leaf + * │ │ + * │ V + * │ hold the leaf buffer + * │ │ + * │ V + * │ return -EAGAIN + * │ Re-enter in + * │ leaf form + * │ + * └─> release leaf buffer + * if needed + * │ + * V + * ┌───n── fork has + * │ only 1 blk? + * │ │ + * │ y + * │ │ + * │ v + * │ xfs_attr_leaf_try_add() + * │ │ + * │ v + * │ had enough ──────────────y─────────────┐ + * │ space? │ + * │ │ │ + * │ n │ + * │ │ │ + * │ v │ + * │ return -EAGAIN │ + * │ re-enter in │ + * │ node form │ + * │ │ │ + * └──────────┤ │ + * │ │ + * V │ + * xfs_attr_node_addname_find_attr │ + * determines if this │ + * is create or rename │ + * find space to store attr │ + * │ │ + * v │ + * xfs_attr_node_addname │ + * │ │ + * v │ + * fits in a node leaf? ────n─────┐ │ + * │ ^ v │ + * │ │ single leaf node? │ + * │ │ │ │ │ + * y │ y n │ + * │ │ │ │ │ + * v │ v v │ + * update │ grow the leaf split if │ + * hashvals └── return -EAGAIN needed │ + * │ retry leaf add │ │ + * │ on reentry │ │ + * ├────────────────────────────┘ │ + * │ │ + * v │ + * need to alloc │ + * ┌─y── or flip flag? │ + * │ │ │ + * │ n │ + * │ │ │ + * │ v │ + * │ done │ + * │ │ + * │ │ + * │ XFS_DAS_FOUND_LBLK <────────────────┘ + * │ │ + * │ V + * │ xfs_attr_leaf_addname() + * │ │ + * │ v + * │ ┌──first time through? + * │ │ │ + * │ │ y + * │ │ │ + * │ n v + * │ │ if we have rmt blks + * │ │ find space for them + * │ │ │ + * │ └──────────┤ + * │ │ + * │ v + * │ still have + * │ ┌─n─ blks to alloc? <──┐ + * │ │ │ │ + * │ │ y │ + * │ │ │ │ + * │ │ v │ + * │ │ alloc one blk │ + * │ │ return -EAGAIN ──┘ + * │ │ re-enter with one + * │ │ less blk to alloc + * │ │ + * │ │ + * │ └───> set the rmt + * │ value + * │ │ + * │ v + * │ was this + * │ a rename? ──n─┐ + * │ │ │ + * │ y │ + * │ │ │ + * │ v │ + * │ flip incomplete │ + * │ flag │ + * │ │ │ + * │ v │ + * │ XFS_DAS_FLIP_LFLAG │ + * │ │ │ + * │ v │ + * │ need to remove │ + * │ old bks? ──n──┤ + * │ │ │ + * │ y │ + * │ │ │ + * │ V │ + * │ remove │ + * │ ┌───> old blks │ + * │ │ │ │ + * │ XFS_DAS_RM_LBLK │ │ + * │ ^ │ │ + * │ │ v │ + * │ └──y── more to │ + * │ remove? │ + * │ │ │ + * │ n │ + * │ │ │ + * │ v │ + * │ XFS_DAS_RD_LEAF │ + * │ │ │ + * │ v │ + * │ remove leaf │ + * │ │ │ + * │ v │ + * │ shrink to sf │ + * │ if needed │ + * │ │ │ + * │ v │ + * │ done <──────┘ + * │ + * └──────> XFS_DAS_FOUND_NBLK + * │ + * v + * ┌─────n── need to + * │ alloc blks? + * │ │ + * │ y + * │ │ + * │ v + * │ find space + * │ │ + * │ v + * │ ┌─>XFS_DAS_ALLOC_NODE + * │ │ │ + * │ │ v + * │ │ alloc blk + * │ │ │ + * │ │ v + * │ └──y── need to alloc + * │ more blocks? + * │ │ + * │ n + * │ │ + * │ v + * │ set the rmt value + * │ │ + * │ v + * │ was this + * └────────> a rename? ──n─┐ + * │ │ + * y │ + * │ │ + * v │ + * flip incomplete │ + * flag │ + * │ │ + * v │ + * XFS_DAS_FLIP_NFLAG │ + * │ │ + * v │ + * need to │ + * remove blks? ─n──┤ + * │ │ + * y │ + * │ │ + * v │ + * remove │ + * ┌────────> old blks │ + * │ │ │ + * XFS_DAS_RM_NBLK │ │ + * ^ │ │ + * │ v │ + * └──────y── more to │ + * remove │ + * │ │ + * n │ + * │ │ + * v │ + * XFS_DAS_CLR_FLAG │ + * │ │ + * v │ + * clear flags │ + * │ │ + * ├──────────┘ + * │ + * v + * done + */ + +/* + * Enum values for xfs_delattr_context.da_state + * + * These values are used by delayed attribute operations to keep track of where + * they were before they returned -EAGAIN. A return code of -EAGAIN signals the + * calling function to roll the transaction, and then call the subroutine to + * finish the operation. The enum is then used by the subroutine to jump back + * to where it was and resume executing where it left off. + */ +enum xfs_delattr_state { + XFS_DAS_UNINIT = 0, /* No state has been set yet */ + XFS_DAS_RMTBLK, /* Removing remote blks */ + XFS_DAS_RM_NAME, /* Remove attr name */ + XFS_DAS_RM_SHRINK, /* We are shrinking the tree */ + XFS_DAS_FOUND_LBLK, /* We found leaf blk for attr */ + XFS_DAS_FOUND_NBLK, /* We found node blk for attr */ + XFS_DAS_FLIP_LFLAG, /* Flipped leaf INCOMPLETE attr flag */ + XFS_DAS_RM_LBLK, /* A rename is removing leaf blocks */ + XFS_DAS_RD_LEAF, /* Read in the new leaf */ + XFS_DAS_ALLOC_NODE, /* We are allocating node blocks */ + XFS_DAS_FLIP_NFLAG, /* Flipped node INCOMPLETE attr flag */ + XFS_DAS_RM_NBLK, /* A rename is removing node blocks */ + XFS_DAS_CLR_FLAG, /* Clear incomplete flag */ +}; + +/* + * Defines for xfs_delattr_context.flags + */ +#define XFS_DAC_DEFER_FINISH 0x01 /* finish the transaction */ +#define XFS_DAC_LEAF_ADDNAME_INIT 0x02 /* xfs_attr_leaf_addname init*/ + +/* + * Context used for keeping track of delayed attribute operations + */ +struct xfs_delattr_context { + struct xfs_da_args *da_args; + + /* Used in xfs_attr_rmtval_set_blk to roll through allocating blocks */ + struct xfs_bmbt_irec map; + xfs_dablk_t lblkno; + int blkcnt; + + /* Used in xfs_attr_node_removename to roll through removing blocks */ + struct xfs_da_state *da_state; + + /* Used to keep track of current state of delayed operation */ + unsigned int flags; + enum xfs_delattr_state dela_state; +}; + /*======================================================================== * Function prototypes for the kernel. *========================================================================*/ @@ -92,6 +492,9 @@ int xfs_attr_set(struct xfs_da_args *args); int xfs_attr_set_args(struct xfs_da_args *args); int xfs_has_attr(struct xfs_da_args *args); int xfs_attr_remove_args(struct xfs_da_args *args); +int xfs_attr_remove_iter(struct xfs_delattr_context *dac); bool xfs_attr_namecheck(const void *name, size_t length); +void xfs_delattr_context_init(struct xfs_delattr_context *dac, + struct xfs_da_args *args); #endif /* __XFS_ATTR_H__ */ diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 556184b63061..b910bd209949 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -19,14 +19,15 @@ #include "xfs_bmap_btree.h" #include "xfs_bmap.h" #include "xfs_attr_sf.h" -#include "xfs_attr_remote.h" #include "xfs_attr.h" +#include "xfs_attr_remote.h" #include "xfs_attr_leaf.h" #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_buf_item.h" #include "xfs_dir2.h" #include "xfs_log.h" +#include "xfs_ag.h" /* @@ -773,7 +774,7 @@ xfs_attr_fork_remove( * Remove an attribute from the shortform attribute list structure. */ int -xfs_attr_shortform_remove( +xfs_attr_sf_removename( struct xfs_da_args *args) { struct xfs_attr_shortform *sf; diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index 9b1c59f40a26..efa757f1e912 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -51,7 +51,7 @@ int xfs_attr_shortform_lookup(struct xfs_da_args *args); int xfs_attr_shortform_getvalue(struct xfs_da_args *args); int xfs_attr_shortform_to_leaf(struct xfs_da_args *args, struct xfs_buf **leaf_bp); -int xfs_attr_shortform_remove(struct xfs_da_args *args); +int xfs_attr_sf_removename(struct xfs_da_args *args); int xfs_attr_sf_findname(struct xfs_da_args *args, struct xfs_attr_sf_entry **sfep, unsigned int *basep); diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 48d8e9caf86f..0c8bee3abc3b 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -439,9 +439,9 @@ xfs_attr_rmtval_get( /* * Find a "hole" in the attribute address space large enough for us to drop the - * new attribute's value into + * new attributes value into */ -STATIC int +int xfs_attr_rmt_find_hole( struct xfs_da_args *args) { @@ -468,7 +468,7 @@ xfs_attr_rmt_find_hole( return 0; } -STATIC int +int xfs_attr_rmtval_set_value( struct xfs_da_args *args) { @@ -562,69 +562,66 @@ xfs_attr_rmtval_stale( } /* - * Write the value associated with an attribute into the out-of-line buffer - * that we have defined for it. + * Find a hole for the attr and store it in the delayed attr context. This + * initializes the context to roll through allocating an attr extent for a + * delayed attr operation */ int -xfs_attr_rmtval_set( - struct xfs_da_args *args) +xfs_attr_rmtval_find_space( + struct xfs_delattr_context *dac) { - struct xfs_inode *dp = args->dp; - struct xfs_bmbt_irec map; - xfs_dablk_t lblkno; - int blkcnt; - int nmap; - int error; + struct xfs_da_args *args = dac->da_args; + struct xfs_bmbt_irec *map = &dac->map; + int error; - trace_xfs_attr_rmtval_set(args); + dac->lblkno = 0; + dac->blkcnt = 0; + args->rmtblkcnt = 0; + args->rmtblkno = 0; + memset(map, 0, sizeof(struct xfs_bmbt_irec)); error = xfs_attr_rmt_find_hole(args); if (error) return error; - blkcnt = args->rmtblkcnt; - lblkno = (xfs_dablk_t)args->rmtblkno; - /* - * Roll through the "value", allocating blocks on disk as required. - */ - while (blkcnt > 0) { - /* - * Allocate a single extent, up to the size of the value. - * - * Note that we have to consider this a data allocation as we - * write the remote attribute without logging the contents. - * Hence we must ensure that we aren't using blocks that are on - * the busy list so that we don't overwrite blocks which have - * recently been freed but their transactions are not yet - * committed to disk. If we overwrite the contents of a busy - * extent and then crash then the block may not contain the - * correct metadata after log recovery occurs. - */ - nmap = 1; - error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, - blkcnt, XFS_BMAPI_ATTRFORK, args->total, &map, - &nmap); - if (error) - return error; - error = xfs_defer_finish(&args->trans); - if (error) - return error; + dac->blkcnt = args->rmtblkcnt; + dac->lblkno = args->rmtblkno; - ASSERT(nmap == 1); - ASSERT((map.br_startblock != DELAYSTARTBLOCK) && - (map.br_startblock != HOLESTARTBLOCK)); - lblkno += map.br_blockcount; - blkcnt -= map.br_blockcount; + return 0; +} - /* - * Start the next trans in the chain. - */ - error = xfs_trans_roll_inode(&args->trans, dp); - if (error) - return error; - } +/* + * Write one block of the value associated with an attribute into the + * out-of-line buffer that we have defined for it. This is similar to a subset + * of xfs_attr_rmtval_set, but records the current block to the delayed attr + * context, and leaves transaction handling to the caller. + */ +int +xfs_attr_rmtval_set_blk( + struct xfs_delattr_context *dac) +{ + struct xfs_da_args *args = dac->da_args; + struct xfs_inode *dp = args->dp; + struct xfs_bmbt_irec *map = &dac->map; + int nmap; + int error; + + nmap = 1; + error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)dac->lblkno, + dac->blkcnt, XFS_BMAPI_ATTRFORK, args->total, + map, &nmap); + if (error) + return error; + + ASSERT(nmap == 1); + ASSERT((map->br_startblock != DELAYSTARTBLOCK) && + (map->br_startblock != HOLESTARTBLOCK)); - return xfs_attr_rmtval_set_value(args); + /* roll attribute extent map forwards */ + dac->lblkno += map->br_blockcount; + dac->blkcnt -= map->br_blockcount; + + return 0; } /* @@ -669,47 +666,17 @@ xfs_attr_rmtval_invalidate( } /* - * Remove the value associated with an attribute by deleting the - * out-of-line buffer that it is stored on. - */ -int -xfs_attr_rmtval_remove( - struct xfs_da_args *args) -{ - int error; - int retval; - - trace_xfs_attr_rmtval_remove(args); - - /* - * Keep de-allocating extents until the remote-value region is gone. - */ - do { - retval = __xfs_attr_rmtval_remove(args); - if (retval && retval != -EAGAIN) - return retval; - - /* - * Close out trans and start the next one in the chain. - */ - error = xfs_trans_roll_inode(&args->trans, args->dp); - if (error) - return error; - } while (retval == -EAGAIN); - - return 0; -} - -/* * Remove the value associated with an attribute by deleting the out-of-line - * buffer that it is stored on. Returns EAGAIN for the caller to refresh the - * transaction and re-call the function + * buffer that it is stored on. Returns -EAGAIN for the caller to refresh the + * transaction and re-call the function. Callers should keep calling this + * routine until it returns something other than -EAGAIN. */ int __xfs_attr_rmtval_remove( - struct xfs_da_args *args) + struct xfs_delattr_context *dac) { - int error, done; + struct xfs_da_args *args = dac->da_args; + int error, done; /* * Unmap value blocks for this attr. @@ -719,12 +686,20 @@ __xfs_attr_rmtval_remove( if (error) return error; - error = xfs_defer_finish(&args->trans); - if (error) - return error; - - if (!done) + /* + * We don't need an explicit state here to pick up where we left off. We + * can figure it out using the !done return code. The actual value of + * attr->xattri_dela_state may be some value reminiscent of the calling + * function, but it's value is irrelevant with in the context of this + * function. Once we are done here, the next state is set as needed by + * the parent + */ + if (!done) { + dac->flags |= XFS_DAC_DEFER_FINISH; return -EAGAIN; + } - return error; + args->rmtblkno = 0; + args->rmtblkcnt = 0; + return 0; } diff --git a/fs/xfs/libxfs/xfs_attr_remote.h b/fs/xfs/libxfs/xfs_attr_remote.h index 9eee615da156..61b85b918db8 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.h +++ b/fs/xfs/libxfs/xfs_attr_remote.h @@ -9,10 +9,12 @@ int xfs_attr3_rmt_blocks(struct xfs_mount *mp, int attrlen); int xfs_attr_rmtval_get(struct xfs_da_args *args); -int xfs_attr_rmtval_set(struct xfs_da_args *args); -int xfs_attr_rmtval_remove(struct xfs_da_args *args); int xfs_attr_rmtval_stale(struct xfs_inode *ip, struct xfs_bmbt_irec *map, xfs_buf_flags_t incore_flags); int xfs_attr_rmtval_invalidate(struct xfs_da_args *args); -int __xfs_attr_rmtval_remove(struct xfs_da_args *args); +int __xfs_attr_rmtval_remove(struct xfs_delattr_context *dac); +int xfs_attr_rmt_find_hole(struct xfs_da_args *args); +int xfs_attr_rmtval_set_value(struct xfs_da_args *args); +int xfs_attr_rmtval_set_blk(struct xfs_delattr_context *dac); +int xfs_attr_rmtval_find_space(struct xfs_delattr_context *dac); #endif /* __XFS_ATTR_REMOTE_H__ */ diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index a3e0e6f672d6..948092babb6a 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -31,6 +31,7 @@ #include "xfs_attr_leaf.h" #include "xfs_filestream.h" #include "xfs_rmap.h" +#include "xfs_ag.h" #include "xfs_ag_resv.h" #include "xfs_refcount.h" #include "xfs_icache.h" @@ -1028,7 +1029,7 @@ xfs_bmap_add_attrfork_local( /* * Set an inode attr fork offset based on the format of the data fork. */ -int +static int xfs_bmap_set_attrforkoff( struct xfs_inode *ip, int size, diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index f9a390ecfb1d..67641f669918 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -187,7 +187,6 @@ void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, xfs_filblks_t len); unsigned int xfs_bmap_compute_attr_offset(struct xfs_mount *mp); int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); -int xfs_bmap_set_attrforkoff(struct xfs_inode *ip, int size, int *version); void xfs_bmap_local_to_extents_empty(struct xfs_trans *tp, struct xfs_inode *ip, int whichfork); void __xfs_bmap_add_free(struct xfs_trans *tp, xfs_fsblock_t bno, diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 5b6fcb9b44e2..be74a6b53689 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -21,6 +21,7 @@ #include "xfs_alloc.h" #include "xfs_log.h" #include "xfs_btree_staging.h" +#include "xfs_ag.h" /* * Cursor allocation zone. @@ -215,7 +216,7 @@ xfs_btree_check_sptr( { if (level <= 0) return false; - return xfs_verify_agbno(cur->bc_mp, cur->bc_ag.agno, agbno); + return xfs_verify_agbno(cur->bc_mp, cur->bc_ag.pag->pag_agno, agbno); } /* @@ -244,7 +245,7 @@ xfs_btree_check_ptr( return 0; xfs_err(cur->bc_mp, "AG %u: Corrupt btree %d pointer at level %d index %d.", - cur->bc_ag.agno, cur->bc_btnum, + cur->bc_ag.pag->pag_agno, cur->bc_btnum, level, index); } @@ -376,6 +377,8 @@ xfs_btree_del_cursor( XFS_FORCED_SHUTDOWN(cur->bc_mp)); if (unlikely(cur->bc_flags & XFS_BTREE_STAGING)) kmem_free(cur->bc_ops); + if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS) && cur->bc_ag.pag) + xfs_perag_put(cur->bc_ag.pag); kmem_cache_free(xfs_btree_cur_zone, cur); } @@ -885,13 +888,13 @@ xfs_btree_readahead_sblock( if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) { - xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.agno, + xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno, left, 1, cur->bc_ops->buf_ops); rval++; } if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) { - xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.agno, + xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno, right, 1, cur->bc_ops->buf_ops); rval++; } @@ -949,7 +952,7 @@ xfs_btree_ptr_to_daddr( *daddr = XFS_FSB_TO_DADDR(cur->bc_mp, fsbno); } else { agbno = be32_to_cpu(ptr->s); - *daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_ag.agno, + *daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_ag.pag->pag_agno, agbno); } @@ -1150,7 +1153,7 @@ xfs_btree_init_block_cur( if (cur->bc_flags & XFS_BTREE_LONG_PTRS) owner = cur->bc_ino.ip->i_ino; else - owner = cur->bc_ag.agno; + owner = cur->bc_ag.pag->pag_agno; xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, cur->bc_btnum, level, numrecs, diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 10e50cbacacf..4dbdc659c396 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -11,6 +11,7 @@ struct xfs_inode; struct xfs_mount; struct xfs_trans; struct xfs_ifork; +struct xfs_perag; extern kmem_zone_t *xfs_btree_cur_zone; @@ -180,11 +181,11 @@ union xfs_btree_irec { /* Per-AG btree information. */ struct xfs_btree_cur_ag { + struct xfs_perag *pag; union { struct xfs_buf *agbp; struct xbtree_afakeroot *afake; /* for staging cursor */ }; - xfs_agnumber_t agno; union { struct { unsigned long nr_ops; /* # record updates */ @@ -231,6 +232,13 @@ typedef struct xfs_btree_cur uint8_t bc_blocklog; /* log2(blocksize) of btree blocks */ xfs_btnum_t bc_btnum; /* identifies which btree type */ int bc_statoff; /* offset of btre stats array */ + + /* + * Short btree pointers need an agno to be able to turn the pointers + * into physical addresses for IO, so the btree cursor switches between + * bc_ino and bc_ag based on whether XFS_BTREE_LONG_PTRS is set for the + * cursor. + */ union { struct xfs_btree_cur_ag bc_ag; struct xfs_btree_cur_ino bc_ino; diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index eefdb518fe64..57d9cb632983 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -10,7 +10,6 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_btree.h" @@ -27,6 +26,7 @@ #include "xfs_trace.h" #include "xfs_log.h" #include "xfs_rmap.h" +#include "xfs_ag.h" /* * Lookup a record by ino in the btree given by cur. @@ -105,7 +105,7 @@ xfs_inobt_get_rec( int *stat) { struct xfs_mount *mp = cur->bc_mp; - xfs_agnumber_t agno = cur->bc_ag.agno; + xfs_agnumber_t agno = cur->bc_ag.pag->pag_agno; union xfs_btree_rec *rec; int error; uint64_t realfree; @@ -172,18 +172,17 @@ xfs_inobt_insert( struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_buf *agbp, + struct xfs_perag *pag, xfs_agino_t newino, xfs_agino_t newlen, xfs_btnum_t btnum) { struct xfs_btree_cur *cur; - struct xfs_agi *agi = agbp->b_addr; - xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); xfs_agino_t thisino; int i; int error; - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum); + cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, btnum); for (thisino = newino; thisino < newino + newlen; @@ -215,10 +214,9 @@ xfs_inobt_insert( * Verify that the number of free inodes in the AGI is correct. */ #ifdef DEBUG -STATIC int +static int xfs_check_agi_freecount( - struct xfs_btree_cur *cur, - struct xfs_agi *agi) + struct xfs_btree_cur *cur) { if (cur->bc_nlevels == 1) { xfs_inobt_rec_incore_t rec; @@ -244,12 +242,12 @@ xfs_check_agi_freecount( } while (i == 1); if (!XFS_FORCED_SHUTDOWN(cur->bc_mp)) - ASSERT(freecount == be32_to_cpu(agi->agi_freecount)); + ASSERT(freecount == cur->bc_ag.pag->pagi_freecount); } return 0; } #else -#define xfs_check_agi_freecount(cur, agi) 0 +#define xfs_check_agi_freecount(cur) 0 #endif /* @@ -520,18 +518,17 @@ xfs_inobt_insert_sprec( struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_buf *agbp, + struct xfs_perag *pag, int btnum, struct xfs_inobt_rec_incore *nrec, /* in/out: new/merged rec. */ bool merge) /* merge or replace */ { struct xfs_btree_cur *cur; - struct xfs_agi *agi = agbp->b_addr; - xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); int error; int i; struct xfs_inobt_rec_incore rec; - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum); + cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, btnum); /* the new record is pre-aligned so we know where to look */ error = xfs_inobt_lookup(cur, nrec->ir_startino, XFS_LOOKUP_EQ, &i); @@ -578,14 +575,14 @@ xfs_inobt_insert_sprec( goto error; } - trace_xfs_irec_merge_pre(mp, agno, rec.ir_startino, + trace_xfs_irec_merge_pre(mp, pag->pag_agno, rec.ir_startino, rec.ir_holemask, nrec->ir_startino, nrec->ir_holemask); /* merge to nrec to output the updated record */ __xfs_inobt_rec_merge(nrec, &rec); - trace_xfs_irec_merge_post(mp, agno, nrec->ir_startino, + trace_xfs_irec_merge_post(mp, pag->pag_agno, nrec->ir_startino, nrec->ir_holemask); error = xfs_inobt_rec_check_count(mp, nrec); @@ -606,28 +603,28 @@ error: } /* - * Allocate new inodes in the allocation group specified by agbp. - * Returns 0 if inodes were allocated in this AG; 1 if there was no space - * in this AG; or the usual negative error code. + * Allocate new inodes in the allocation group specified by agbp. Returns 0 if + * inodes were allocated in this AG; -EAGAIN if there was no space in this AG so + * the caller knows it can try another AG, a hard -ENOSPC when over the maximum + * inode count threshold, or the usual negative error code for other errors. */ STATIC int xfs_ialloc_ag_alloc( struct xfs_trans *tp, - struct xfs_buf *agbp) + struct xfs_buf *agbp, + struct xfs_perag *pag) { struct xfs_agi *agi; struct xfs_alloc_arg args; - xfs_agnumber_t agno; int error; xfs_agino_t newino; /* new first inode's number */ xfs_agino_t newlen; /* new number of inodes */ int isaligned = 0; /* inode allocation at stripe */ /* unit boundary */ /* init. to full chunk */ - uint16_t allocmask = (uint16_t) -1; struct xfs_inobt_rec_incore rec; - struct xfs_perag *pag; struct xfs_ino_geometry *igeo = M_IGEO(tp->t_mountp); + uint16_t allocmask = (uint16_t) -1; int do_sparse = 0; memset(&args, 0, sizeof(args)); @@ -660,14 +657,13 @@ xfs_ialloc_ag_alloc( */ agi = agbp->b_addr; newino = be32_to_cpu(agi->agi_newino); - agno = be32_to_cpu(agi->agi_seqno); args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + igeo->ialloc_blks; if (do_sparse) goto sparse_alloc; if (likely(newino != NULLAGINO && (args.agbno < be32_to_cpu(agi->agi_length)))) { - args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); + args.fsbno = XFS_AGB_TO_FSB(args.mp, pag->pag_agno, args.agbno); args.type = XFS_ALLOCTYPE_THIS_BNO; args.prod = 1; @@ -727,7 +723,7 @@ xfs_ialloc_ag_alloc( * For now, just allocate blocks up front. */ args.agbno = be32_to_cpu(agi->agi_root); - args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); + args.fsbno = XFS_AGB_TO_FSB(args.mp, pag->pag_agno, args.agbno); /* * Allocate a fixed-size extent of inodes. */ @@ -748,7 +744,7 @@ xfs_ialloc_ag_alloc( if (isaligned && args.fsbno == NULLFSBLOCK) { args.type = XFS_ALLOCTYPE_NEAR_BNO; args.agbno = be32_to_cpu(agi->agi_root); - args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); + args.fsbno = XFS_AGB_TO_FSB(args.mp, pag->pag_agno, args.agbno); args.alignment = igeo->cluster_align; if ((error = xfs_alloc_vextent(&args))) return error; @@ -764,7 +760,7 @@ xfs_ialloc_ag_alloc( sparse_alloc: args.type = XFS_ALLOCTYPE_NEAR_BNO; args.agbno = be32_to_cpu(agi->agi_root); - args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); + args.fsbno = XFS_AGB_TO_FSB(args.mp, pag->pag_agno, args.agbno); args.alignment = args.mp->m_sb.sb_spino_align; args.prod = 1; @@ -796,7 +792,7 @@ sparse_alloc: } if (args.fsbno == NULLFSBLOCK) - return 1; + return -EAGAIN; ASSERT(args.len == args.minlen); @@ -809,7 +805,7 @@ sparse_alloc: * rather than a linear progression to prevent the next generation * number from being easily guessable. */ - error = xfs_ialloc_inode_init(args.mp, tp, NULL, newlen, agno, + error = xfs_ialloc_inode_init(args.mp, tp, NULL, newlen, pag->pag_agno, args.agbno, args.len, prandom_u32()); if (error) @@ -836,12 +832,12 @@ sparse_alloc: * if necessary. If a merge does occur, rec is updated to the * merged record. */ - error = xfs_inobt_insert_sprec(args.mp, tp, agbp, XFS_BTNUM_INO, - &rec, true); + error = xfs_inobt_insert_sprec(args.mp, tp, agbp, pag, + XFS_BTNUM_INO, &rec, true); if (error == -EFSCORRUPTED) { xfs_alert(args.mp, "invalid sparse inode record: ino 0x%llx holemask 0x%x count %u", - XFS_AGINO_TO_INO(args.mp, agno, + XFS_AGINO_TO_INO(args.mp, pag->pag_agno, rec.ir_startino), rec.ir_holemask, rec.ir_count); xfs_force_shutdown(args.mp, SHUTDOWN_CORRUPT_INCORE); @@ -861,21 +857,20 @@ sparse_alloc: * existing record with this one. */ if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) { - error = xfs_inobt_insert_sprec(args.mp, tp, agbp, - XFS_BTNUM_FINO, &rec, - false); + error = xfs_inobt_insert_sprec(args.mp, tp, agbp, pag, + XFS_BTNUM_FINO, &rec, false); if (error) return error; } } else { /* full chunk - insert new records to both btrees */ - error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen, + error = xfs_inobt_insert(args.mp, tp, agbp, pag, newino, newlen, XFS_BTNUM_INO); if (error) return error; if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) { - error = xfs_inobt_insert(args.mp, tp, agbp, newino, + error = xfs_inobt_insert(args.mp, tp, agbp, pag, newino, newlen, XFS_BTNUM_FINO); if (error) return error; @@ -887,7 +882,6 @@ sparse_alloc: */ be32_add_cpu(&agi->agi_count, newlen); be32_add_cpu(&agi->agi_freecount, newlen); - pag = agbp->b_pag; pag->pagi_freecount += newlen; pag->pagi_count += newlen; agi->agi_newino = cpu_to_be32(newino); @@ -905,139 +899,6 @@ sparse_alloc: return 0; } -STATIC xfs_agnumber_t -xfs_ialloc_next_ag( - xfs_mount_t *mp) -{ - xfs_agnumber_t agno; - - spin_lock(&mp->m_agirotor_lock); - agno = mp->m_agirotor; - if (++mp->m_agirotor >= mp->m_maxagi) - mp->m_agirotor = 0; - spin_unlock(&mp->m_agirotor_lock); - - return agno; -} - -/* - * Select an allocation group to look for a free inode in, based on the parent - * inode and the mode. Return the allocation group buffer. - */ -STATIC xfs_agnumber_t -xfs_ialloc_ag_select( - xfs_trans_t *tp, /* transaction pointer */ - xfs_ino_t parent, /* parent directory inode number */ - umode_t mode) /* bits set to indicate file type */ -{ - xfs_agnumber_t agcount; /* number of ag's in the filesystem */ - xfs_agnumber_t agno; /* current ag number */ - int flags; /* alloc buffer locking flags */ - xfs_extlen_t ineed; /* blocks needed for inode allocation */ - xfs_extlen_t longest = 0; /* longest extent available */ - xfs_mount_t *mp; /* mount point structure */ - int needspace; /* file mode implies space allocated */ - xfs_perag_t *pag; /* per allocation group data */ - xfs_agnumber_t pagno; /* parent (starting) ag number */ - int error; - - /* - * Files of these types need at least one block if length > 0 - * (and they won't fit in the inode, but that's hard to figure out). - */ - needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode); - mp = tp->t_mountp; - agcount = mp->m_maxagi; - if (S_ISDIR(mode)) - pagno = xfs_ialloc_next_ag(mp); - else { - pagno = XFS_INO_TO_AGNO(mp, parent); - if (pagno >= agcount) - pagno = 0; - } - - ASSERT(pagno < agcount); - - /* - * Loop through allocation groups, looking for one with a little - * free space in it. Note we don't look for free inodes, exactly. - * Instead, we include whether there is a need to allocate inodes - * to mean that blocks must be allocated for them, - * if none are currently free. - */ - agno = pagno; - flags = XFS_ALLOC_FLAG_TRYLOCK; - for (;;) { - pag = xfs_perag_get(mp, agno); - if (!pag->pagi_inodeok) { - xfs_ialloc_next_ag(mp); - goto nextag; - } - - if (!pag->pagi_init) { - error = xfs_ialloc_pagi_init(mp, tp, agno); - if (error) - goto nextag; - } - - if (pag->pagi_freecount) { - xfs_perag_put(pag); - return agno; - } - - if (!pag->pagf_init) { - error = xfs_alloc_pagf_init(mp, tp, agno, flags); - if (error) - goto nextag; - } - - /* - * Check that there is enough free space for the file plus a - * chunk of inodes if we need to allocate some. If this is the - * first pass across the AGs, take into account the potential - * space needed for alignment of inode chunks when checking the - * longest contiguous free space in the AG - this prevents us - * from getting ENOSPC because we have free space larger than - * ialloc_blks but alignment constraints prevent us from using - * it. - * - * If we can't find an AG with space for full alignment slack to - * be taken into account, we must be near ENOSPC in all AGs. - * Hence we don't include alignment for the second pass and so - * if we fail allocation due to alignment issues then it is most - * likely a real ENOSPC condition. - */ - ineed = M_IGEO(mp)->ialloc_min_blks; - if (flags && ineed > 1) - ineed += M_IGEO(mp)->cluster_align; - longest = pag->pagf_longest; - if (!longest) - longest = pag->pagf_flcount > 0; - - if (pag->pagf_freeblks >= needspace + ineed && - longest >= ineed) { - xfs_perag_put(pag); - return agno; - } -nextag: - xfs_perag_put(pag); - /* - * No point in iterating over the rest, if we're shutting - * down. - */ - if (XFS_FORCED_SHUTDOWN(mp)) - return NULLAGNUMBER; - agno++; - if (agno >= agcount) - agno = 0; - if (agno == pagno) { - if (flags == 0) - return NULLAGNUMBER; - flags = 0; - } - } -} - /* * Try to retrieve the next record to the left/right from the current one. */ @@ -1123,15 +984,14 @@ STATIC int xfs_dialloc_ag_inobt( struct xfs_trans *tp, struct xfs_buf *agbp, + struct xfs_perag *pag, xfs_ino_t parent, xfs_ino_t *inop) { struct xfs_mount *mp = tp->t_mountp; struct xfs_agi *agi = agbp->b_addr; - xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent); xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent); - struct xfs_perag *pag = agbp->b_pag; struct xfs_btree_cur *cur, *tcur; struct xfs_inobt_rec_incore rec, trec; xfs_ino_t ino; @@ -1145,7 +1005,7 @@ xfs_dialloc_ag_inobt( ASSERT(pag->pagi_freecount > 0); restart_pagno: - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); + cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_INO); /* * If pagino is 0 (this is the root inode allocation) use newino. * This must work because we've just allocated some. @@ -1153,14 +1013,14 @@ xfs_dialloc_ag_inobt( if (!pagino) pagino = be32_to_cpu(agi->agi_newino); - error = xfs_check_agi_freecount(cur, agi); + error = xfs_check_agi_freecount(cur); if (error) goto error0; /* * If in the same AG as the parent, try to get near the parent. */ - if (pagno == agno) { + if (pagno == pag->pag_agno) { int doneleft; /* done, to the left */ int doneright; /* done, to the right */ @@ -1363,7 +1223,7 @@ alloc_inode: ASSERT(offset < XFS_INODES_PER_CHUNK); ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % XFS_INODES_PER_CHUNK) == 0); - ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); + ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset); rec.ir_free &= ~XFS_INOBT_MASK(offset); rec.ir_freecount--; error = xfs_inobt_update(cur, &rec); @@ -1373,7 +1233,7 @@ alloc_inode: xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); pag->pagi_freecount--; - error = xfs_check_agi_freecount(cur, agi); + error = xfs_check_agi_freecount(cur); if (error) goto error0; @@ -1568,16 +1428,16 @@ xfs_dialloc_ag_update_inobt( * The caller selected an AG for us, and made sure that free inodes are * available. */ -int +static int xfs_dialloc_ag( struct xfs_trans *tp, struct xfs_buf *agbp, + struct xfs_perag *pag, xfs_ino_t parent, xfs_ino_t *inop) { struct xfs_mount *mp = tp->t_mountp; struct xfs_agi *agi = agbp->b_addr; - xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent); xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent); struct xfs_btree_cur *cur; /* finobt cursor */ @@ -1589,7 +1449,7 @@ xfs_dialloc_ag( int i; if (!xfs_sb_version_hasfinobt(&mp->m_sb)) - return xfs_dialloc_ag_inobt(tp, agbp, parent, inop); + return xfs_dialloc_ag_inobt(tp, agbp, pag, parent, inop); /* * If pagino is 0 (this is the root inode allocation) use newino. @@ -1598,9 +1458,9 @@ xfs_dialloc_ag( if (!pagino) pagino = be32_to_cpu(agi->agi_newino); - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO); + cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_FINO); - error = xfs_check_agi_freecount(cur, agi); + error = xfs_check_agi_freecount(cur); if (error) goto error_cur; @@ -1609,7 +1469,7 @@ xfs_dialloc_ag( * parent. If so, find the closest available inode to the parent. If * not, consider the agi hint or find the first free inode in the AG. */ - if (agno == pagno) + if (pag->pag_agno == pagno) error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec); else error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec); @@ -1621,7 +1481,7 @@ xfs_dialloc_ag( ASSERT(offset < XFS_INODES_PER_CHUNK); ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % XFS_INODES_PER_CHUNK) == 0); - ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); + ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset); /* * Modify or remove the finobt record. @@ -1641,9 +1501,9 @@ xfs_dialloc_ag( * the original freecount. If all is well, make the equivalent update to * the inobt using the finobt record and offset information. */ - icur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); + icur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_INO); - error = xfs_check_agi_freecount(icur, agi); + error = xfs_check_agi_freecount(icur); if (error) goto error_icur; @@ -1657,14 +1517,14 @@ xfs_dialloc_ag( */ be32_add_cpu(&agi->agi_freecount, -1); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); - agbp->b_pag->pagi_freecount--; + pag->pagi_freecount--; xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); - error = xfs_check_agi_freecount(icur, agi); + error = xfs_check_agi_freecount(icur); if (error) goto error_icur; - error = xfs_check_agi_freecount(cur, agi); + error = xfs_check_agi_freecount(cur); if (error) goto error_icur; @@ -1708,54 +1568,195 @@ xfs_dialloc_roll( /* Re-attach the quota info that we detached from prev trx. */ tp->t_dqinfo = dqinfo; + /* + * Join the buffer even on commit error so that the buffer is released + * when the caller cancels the transaction and doesn't have to handle + * this error case specially. + */ + xfs_trans_bjoin(tp, agibp); *tpp = tp; + return error; +} + +static xfs_agnumber_t +xfs_ialloc_next_ag( + xfs_mount_t *mp) +{ + xfs_agnumber_t agno; + + spin_lock(&mp->m_agirotor_lock); + agno = mp->m_agirotor; + if (++mp->m_agirotor >= mp->m_maxagi) + mp->m_agirotor = 0; + spin_unlock(&mp->m_agirotor_lock); + + return agno; +} + +static bool +xfs_dialloc_good_ag( + struct xfs_trans *tp, + struct xfs_perag *pag, + umode_t mode, + int flags, + bool ok_alloc) +{ + struct xfs_mount *mp = tp->t_mountp; + xfs_extlen_t ineed; + xfs_extlen_t longest = 0; + int needspace; + int error; + + if (!pag->pagi_inodeok) + return false; + + if (!pag->pagi_init) { + error = xfs_ialloc_pagi_init(mp, tp, pag->pag_agno); + if (error) + return false; + } + + if (pag->pagi_freecount) + return true; + if (!ok_alloc) + return false; + + if (!pag->pagf_init) { + error = xfs_alloc_pagf_init(mp, tp, pag->pag_agno, flags); + if (error) + return false; + } + + /* + * Check that there is enough free space for the file plus a chunk of + * inodes if we need to allocate some. If this is the first pass across + * the AGs, take into account the potential space needed for alignment + * of inode chunks when checking the longest contiguous free space in + * the AG - this prevents us from getting ENOSPC because we have free + * space larger than ialloc_blks but alignment constraints prevent us + * from using it. + * + * If we can't find an AG with space for full alignment slack to be + * taken into account, we must be near ENOSPC in all AGs. Hence we + * don't include alignment for the second pass and so if we fail + * allocation due to alignment issues then it is most likely a real + * ENOSPC condition. + * + * XXX(dgc): this calculation is now bogus thanks to the per-ag + * reservations that xfs_alloc_fix_freelist() now does via + * xfs_alloc_space_available(). When the AG fills up, pagf_freeblks will + * be more than large enough for the check below to succeed, but + * xfs_alloc_space_available() will fail because of the non-zero + * metadata reservation and hence we won't actually be able to allocate + * more inodes in this AG. We do soooo much unnecessary work near ENOSPC + * because of this. + */ + ineed = M_IGEO(mp)->ialloc_min_blks; + if (flags && ineed > 1) + ineed += M_IGEO(mp)->cluster_align; + longest = pag->pagf_longest; + if (!longest) + longest = pag->pagf_flcount > 0; + needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode); + + if (pag->pagf_freeblks < needspace + ineed || longest < ineed) + return false; + return true; +} + +static int +xfs_dialloc_try_ag( + struct xfs_trans **tpp, + struct xfs_perag *pag, + xfs_ino_t parent, + xfs_ino_t *new_ino, + bool ok_alloc) +{ + struct xfs_buf *agbp; + xfs_ino_t ino; + int error; + + /* + * Then read in the AGI buffer and recheck with the AGI buffer + * lock held. + */ + error = xfs_ialloc_read_agi(pag->pag_mount, *tpp, pag->pag_agno, &agbp); if (error) return error; - xfs_trans_bjoin(tp, agibp); - return 0; + + if (!pag->pagi_freecount) { + if (!ok_alloc) { + error = -EAGAIN; + goto out_release; + } + + error = xfs_ialloc_ag_alloc(*tpp, agbp, pag); + if (error < 0) + goto out_release; + + /* + * We successfully allocated space for an inode cluster in this + * AG. Roll the transaction so that we can allocate one of the + * new inodes. + */ + ASSERT(pag->pagi_freecount > 0); + error = xfs_dialloc_roll(tpp, agbp); + if (error) + goto out_release; + } + + /* Allocate an inode in the found AG */ + error = xfs_dialloc_ag(*tpp, agbp, pag, parent, &ino); + if (!error) + *new_ino = ino; + return error; + +out_release: + xfs_trans_brelse(*tpp, agbp); + return error; } /* - * Select and prepare an AG for inode allocation. + * Allocate an on-disk inode. * * Mode is used to tell whether the new inode is a directory and hence where to - * locate it. - * - * This function will ensure that the selected AG has free inodes available to - * allocate from. The selected AGI will be returned locked to the caller, and it - * will allocate more free inodes if required. If no free inodes are found or - * can be allocated, no AGI will be returned. + * locate it. The on-disk inode that is allocated will be returned in @new_ino + * on success, otherwise an error will be set to indicate the failure (e.g. + * -ENOSPC). */ int -xfs_dialloc_select_ag( +xfs_dialloc( struct xfs_trans **tpp, xfs_ino_t parent, umode_t mode, - struct xfs_buf **IO_agbp) + xfs_ino_t *new_ino) { struct xfs_mount *mp = (*tpp)->t_mountp; - struct xfs_buf *agbp; xfs_agnumber_t agno; - int error; - bool noroom = false; + int error = 0; xfs_agnumber_t start_agno; struct xfs_perag *pag; struct xfs_ino_geometry *igeo = M_IGEO(mp); - bool okalloc = true; - - *IO_agbp = NULL; + bool ok_alloc = true; + int flags; + xfs_ino_t ino; /* - * We do not have an agbp, so select an initial allocation - * group for inode allocation. + * Directories, symlinks, and regular files frequently allocate at least + * one block, so factor that potential expansion when we examine whether + * an AG has enough space for file creation. */ - start_agno = xfs_ialloc_ag_select(*tpp, parent, mode); - if (start_agno == NULLAGNUMBER) - return 0; + if (S_ISDIR(mode)) + start_agno = xfs_ialloc_next_ag(mp); + else { + start_agno = XFS_INO_TO_AGNO(mp, parent); + if (start_agno >= mp->m_maxagi) + start_agno = 0; + } /* * If we have already hit the ceiling of inode blocks then clear - * okalloc so we scan all available agi structures for a free + * ok_alloc so we scan all available agi structures for a free * inode. * * Read rough value of mp->m_icount by percpu_counter_read_positive, @@ -1764,8 +1765,7 @@ xfs_dialloc_select_ag( if (igeo->maxicount && percpu_counter_read_positive(&mp->m_icount) + igeo->ialloc_inos > igeo->maxicount) { - noroom = true; - okalloc = false; + ok_alloc = false; } /* @@ -1774,82 +1774,36 @@ xfs_dialloc_select_ag( * allocation groups upward, wrapping at the end. */ agno = start_agno; + flags = XFS_ALLOC_FLAG_TRYLOCK; for (;;) { pag = xfs_perag_get(mp, agno); - if (!pag->pagi_inodeok) { - xfs_ialloc_next_ag(mp); - goto nextag; - } - - if (!pag->pagi_init) { - error = xfs_ialloc_pagi_init(mp, *tpp, agno); - if (error) + if (xfs_dialloc_good_ag(*tpp, pag, mode, flags, ok_alloc)) { + error = xfs_dialloc_try_ag(tpp, pag, parent, + &ino, ok_alloc); + if (error != -EAGAIN) break; } - /* - * Do a first racy fast path check if this AG is usable. - */ - if (!pag->pagi_freecount && !okalloc) - goto nextag; - - /* - * Then read in the AGI buffer and recheck with the AGI buffer - * lock held. - */ - error = xfs_ialloc_read_agi(mp, *tpp, agno, &agbp); - if (error) - break; - - if (pag->pagi_freecount) { - xfs_perag_put(pag); - goto found_ag; - } - - if (!okalloc) - goto nextag_relse_buffer; - - error = xfs_ialloc_ag_alloc(*tpp, agbp); - if (error < 0) { - xfs_trans_brelse(*tpp, agbp); - - if (error == -ENOSPC) - error = 0; + if (XFS_FORCED_SHUTDOWN(mp)) { + error = -EFSCORRUPTED; break; } - - if (error == 0) { - /* - * We successfully allocated space for an inode cluster - * in this AG. Roll the transaction so that we can - * allocate one of the new inodes. - */ - ASSERT(pag->pagi_freecount > 0); - xfs_perag_put(pag); - - error = xfs_dialloc_roll(tpp, agbp); - if (error) { - xfs_buf_relse(agbp); - return error; + if (++agno == mp->m_maxagi) + agno = 0; + if (agno == start_agno) { + if (!flags) { + error = -ENOSPC; + break; } - goto found_ag; + flags = 0; } - -nextag_relse_buffer: - xfs_trans_brelse(*tpp, agbp); -nextag: xfs_perag_put(pag); - if (++agno == mp->m_sb.sb_agcount) - agno = 0; - if (agno == start_agno) - return noroom ? -ENOSPC : 0; } + if (!error) + *new_ino = ino; xfs_perag_put(pag); return error; -found_ag: - *IO_agbp = agbp; - return 0; } /* @@ -1935,12 +1889,12 @@ xfs_difree_inobt( struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_buf *agbp, + struct xfs_perag *pag, xfs_agino_t agino, struct xfs_icluster *xic, struct xfs_inobt_rec_incore *orec) { struct xfs_agi *agi = agbp->b_addr; - xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); struct xfs_btree_cur *cur; struct xfs_inobt_rec_incore rec; int ilen; @@ -1954,9 +1908,9 @@ xfs_difree_inobt( /* * Initialize the cursor. */ - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); + cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_INO); - error = xfs_check_agi_freecount(cur, agi); + error = xfs_check_agi_freecount(cur); if (error) goto error0; @@ -2005,7 +1959,8 @@ xfs_difree_inobt( struct xfs_perag *pag = agbp->b_pag; xic->deleted = true; - xic->first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino); + xic->first_ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, + rec.ir_startino); xic->alloc = xfs_inobt_irec_to_allocmask(&rec); /* @@ -2028,7 +1983,7 @@ xfs_difree_inobt( goto error0; } - xfs_difree_inode_chunk(tp, agno, &rec); + xfs_difree_inode_chunk(tp, pag->pag_agno, &rec); } else { xic->deleted = false; @@ -2044,11 +1999,11 @@ xfs_difree_inobt( */ be32_add_cpu(&agi->agi_freecount, 1); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); - agbp->b_pag->pagi_freecount++; + pag->pagi_freecount++; xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); } - error = xfs_check_agi_freecount(cur, agi); + error = xfs_check_agi_freecount(cur); if (error) goto error0; @@ -2069,18 +2024,17 @@ xfs_difree_finobt( struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_buf *agbp, + struct xfs_perag *pag, xfs_agino_t agino, struct xfs_inobt_rec_incore *ibtrec) /* inobt record */ { - struct xfs_agi *agi = agbp->b_addr; - xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); struct xfs_btree_cur *cur; struct xfs_inobt_rec_incore rec; int offset = agino - ibtrec->ir_startino; int error; int i; - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO); + cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_FINO); error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i); if (error) @@ -2158,7 +2112,7 @@ xfs_difree_finobt( } out: - error = xfs_check_agi_freecount(cur, agi); + error = xfs_check_agi_freecount(cur); if (error) goto error; @@ -2178,36 +2132,33 @@ error: */ int xfs_difree( - struct xfs_trans *tp, /* transaction pointer */ - xfs_ino_t inode, /* inode to be freed */ - struct xfs_icluster *xic) /* cluster info if deleted */ + struct xfs_trans *tp, + struct xfs_perag *pag, + xfs_ino_t inode, + struct xfs_icluster *xic) { /* REFERENCED */ xfs_agblock_t agbno; /* block number containing inode */ struct xfs_buf *agbp; /* buffer for allocation group header */ xfs_agino_t agino; /* allocation group inode number */ - xfs_agnumber_t agno; /* allocation group number */ int error; /* error return value */ - struct xfs_mount *mp; /* mount structure for filesystem */ + struct xfs_mount *mp = tp->t_mountp; struct xfs_inobt_rec_incore rec;/* btree record */ - mp = tp->t_mountp; - /* * Break up inode number into its components. */ - agno = XFS_INO_TO_AGNO(mp, inode); - if (agno >= mp->m_sb.sb_agcount) { - xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).", - __func__, agno, mp->m_sb.sb_agcount); + if (pag->pag_agno != XFS_INO_TO_AGNO(mp, inode)) { + xfs_warn(mp, "%s: agno != pag->pag_agno (%d != %d).", + __func__, XFS_INO_TO_AGNO(mp, inode), pag->pag_agno); ASSERT(0); return -EINVAL; } agino = XFS_INO_TO_AGINO(mp, inode); - if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { + if (inode != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) { xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).", __func__, (unsigned long long)inode, - (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino)); + (unsigned long long)XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)); ASSERT(0); return -EINVAL; } @@ -2221,7 +2172,7 @@ xfs_difree( /* * Get the allocation group header. */ - error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); + error = xfs_ialloc_read_agi(mp, tp, pag->pag_agno, &agbp); if (error) { xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.", __func__, error); @@ -2231,7 +2182,7 @@ xfs_difree( /* * Fix up the inode allocation btree. */ - error = xfs_difree_inobt(mp, tp, agbp, agino, xic, &rec); + error = xfs_difree_inobt(mp, tp, agbp, pag, agino, xic, &rec); if (error) goto error0; @@ -2239,7 +2190,7 @@ xfs_difree( * Fix up the free inode btree. */ if (xfs_sb_version_hasfinobt(&mp->m_sb)) { - error = xfs_difree_finobt(mp, tp, agbp, agino, &rec); + error = xfs_difree_finobt(mp, tp, agbp, pag, agino, &rec); if (error) goto error0; } @@ -2254,7 +2205,7 @@ STATIC int xfs_imap_lookup( struct xfs_mount *mp, struct xfs_trans *tp, - xfs_agnumber_t agno, + struct xfs_perag *pag, xfs_agino_t agino, xfs_agblock_t agbno, xfs_agblock_t *chunk_agbno, @@ -2267,11 +2218,11 @@ xfs_imap_lookup( int error; int i; - error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); + error = xfs_ialloc_read_agi(mp, tp, pag->pag_agno, &agbp); if (error) { xfs_alert(mp, "%s: xfs_ialloc_read_agi() returned error %d, agno %d", - __func__, error, agno); + __func__, error, pag->pag_agno); return error; } @@ -2281,7 +2232,7 @@ xfs_imap_lookup( * we have a record, we need to ensure it contains the inode number * we are looking up. */ - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); + cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_INO); error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); if (!error) { if (i) @@ -2315,42 +2266,44 @@ xfs_imap_lookup( */ int xfs_imap( - xfs_mount_t *mp, /* file system mount structure */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_ino_t ino, /* inode to locate */ - struct xfs_imap *imap, /* location map structure */ - uint flags) /* flags for inode btree lookup */ + struct xfs_mount *mp, /* file system mount structure */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_ino_t ino, /* inode to locate */ + struct xfs_imap *imap, /* location map structure */ + uint flags) /* flags for inode btree lookup */ { - xfs_agblock_t agbno; /* block number of inode in the alloc group */ - xfs_agino_t agino; /* inode number within alloc group */ - xfs_agnumber_t agno; /* allocation group number */ - xfs_agblock_t chunk_agbno; /* first block in inode chunk */ - xfs_agblock_t cluster_agbno; /* first block in inode cluster */ - int error; /* error code */ - int offset; /* index of inode in its buffer */ - xfs_agblock_t offset_agbno; /* blks from chunk start to inode */ + xfs_agblock_t agbno; /* block number of inode in the alloc group */ + xfs_agino_t agino; /* inode number within alloc group */ + xfs_agblock_t chunk_agbno; /* first block in inode chunk */ + xfs_agblock_t cluster_agbno; /* first block in inode cluster */ + int error; /* error code */ + int offset; /* index of inode in its buffer */ + xfs_agblock_t offset_agbno; /* blks from chunk start to inode */ + struct xfs_perag *pag; ASSERT(ino != NULLFSINO); /* * Split up the inode number into its parts. */ - agno = XFS_INO_TO_AGNO(mp, ino); + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); agino = XFS_INO_TO_AGINO(mp, ino); agbno = XFS_AGINO_TO_AGBNO(mp, agino); - if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks || - ino != XFS_AGINO_TO_INO(mp, agno, agino)) { + if (!pag || agbno >= mp->m_sb.sb_agblocks || + ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) { + error = -EINVAL; #ifdef DEBUG /* * Don't output diagnostic information for untrusted inodes * as they can be invalid without implying corruption. */ if (flags & XFS_IGET_UNTRUSTED) - return -EINVAL; - if (agno >= mp->m_sb.sb_agcount) { + goto out_drop; + if (!pag) { xfs_alert(mp, "%s: agno (%d) >= mp->m_sb.sb_agcount (%d)", - __func__, agno, mp->m_sb.sb_agcount); + __func__, XFS_INO_TO_AGNO(mp, ino), + mp->m_sb.sb_agcount); } if (agbno >= mp->m_sb.sb_agblocks) { xfs_alert(mp, @@ -2358,15 +2311,15 @@ xfs_imap( __func__, (unsigned long long)agbno, (unsigned long)mp->m_sb.sb_agblocks); } - if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) { + if (pag && ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) { xfs_alert(mp, "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)", __func__, ino, - XFS_AGINO_TO_INO(mp, agno, agino)); + XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)); } xfs_stack_trace(); #endif /* DEBUG */ - return -EINVAL; + goto out_drop; } /* @@ -2377,10 +2330,10 @@ xfs_imap( * in all cases where an untrusted inode number is passed. */ if (flags & XFS_IGET_UNTRUSTED) { - error = xfs_imap_lookup(mp, tp, agno, agino, agbno, + error = xfs_imap_lookup(mp, tp, pag, agino, agbno, &chunk_agbno, &offset_agbno, flags); if (error) - return error; + goto out_drop; goto out_map; } @@ -2392,11 +2345,12 @@ xfs_imap( offset = XFS_INO_TO_OFFSET(mp, ino); ASSERT(offset < mp->m_sb.sb_inopblock); - imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno); + imap->im_blkno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, agbno); imap->im_len = XFS_FSB_TO_BB(mp, 1); imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog); - return 0; + error = 0; + goto out_drop; } /* @@ -2408,10 +2362,10 @@ xfs_imap( offset_agbno = agbno & M_IGEO(mp)->inoalign_mask; chunk_agbno = agbno - offset_agbno; } else { - error = xfs_imap_lookup(mp, tp, agno, agino, agbno, + error = xfs_imap_lookup(mp, tp, pag, agino, agbno, &chunk_agbno, &offset_agbno, flags); if (error) - return error; + goto out_drop; } out_map: @@ -2422,7 +2376,7 @@ out_map: offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + XFS_INO_TO_OFFSET(mp, ino); - imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno); + imap->im_blkno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, cluster_agbno); imap->im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster); imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog); @@ -2439,9 +2393,14 @@ out_map: __func__, (unsigned long long) imap->im_blkno, (unsigned long long) imap->im_len, XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); - return -EINVAL; + error = -EINVAL; + goto out_drop; } - return 0; + error = 0; +out_drop: + if (pag) + xfs_perag_put(pag); + return error; } /* diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h index 3511086a7ae1..9df7c80408ff 100644 --- a/fs/xfs/libxfs/xfs_ialloc.h +++ b/fs/xfs/libxfs/xfs_ialloc.h @@ -33,42 +33,14 @@ xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o) } /* - * Allocate an inode on disk. - * Mode is used to tell whether the new inode will need space, and whether - * it is a directory. - * - * There are two phases to inode allocation: selecting an AG and ensuring - * that it contains free inodes, followed by allocating one of the free - * inodes. xfs_dialloc_select_ag() does the former and returns a locked AGI - * to the caller, ensuring that followup call to xfs_dialloc_ag() will - * have free inodes to allocate from. xfs_dialloc_ag() will return the inode - * number of the free inode we allocated. + * Allocate an inode on disk. Mode is used to tell whether the new inode will + * need space, and whether it is a directory. */ -int /* error */ -xfs_dialloc_select_ag( - struct xfs_trans **tpp, /* double pointer of transaction */ - xfs_ino_t parent, /* parent inode (directory) */ - umode_t mode, /* mode bits for new inode */ - struct xfs_buf **IO_agbp); - -int -xfs_dialloc_ag( - struct xfs_trans *tp, - struct xfs_buf *agbp, - xfs_ino_t parent, - xfs_ino_t *inop); +int xfs_dialloc(struct xfs_trans **tpp, xfs_ino_t parent, umode_t mode, + xfs_ino_t *new_ino); -/* - * Free disk inode. Carefully avoids touching the incore inode, all - * manipulations incore are the caller's responsibility. - * The on-disk inode is not changed by this operation, only the - * btree (free inode mask) is changed. - */ -int /* error */ -xfs_difree( - struct xfs_trans *tp, /* transaction pointer */ - xfs_ino_t inode, /* inode to be freed */ - struct xfs_icluster *ifree); /* cluster info if deleted */ +int xfs_difree(struct xfs_trans *tp, struct xfs_perag *pag, + xfs_ino_t ino, struct xfs_icluster *ifree); /* * Return the location of the inode in imap, for mapping it into a buffer. diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 4c5831646bd9..823a038939f8 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -20,6 +20,7 @@ #include "xfs_trace.h" #include "xfs_trans.h" #include "xfs_rmap.h" +#include "xfs_ag.h" STATIC int xfs_inobt_get_minrecs( @@ -34,8 +35,7 @@ xfs_inobt_dup_cursor( struct xfs_btree_cur *cur) { return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp, - cur->bc_ag.agbp, cur->bc_ag.agno, - cur->bc_btnum); + cur->bc_ag.agbp, cur->bc_ag.pag, cur->bc_btnum); } STATIC void @@ -102,7 +102,7 @@ __xfs_inobt_alloc_block( args.tp = cur->bc_tp; args.mp = cur->bc_mp; args.oinfo = XFS_RMAP_OINFO_INOBT; - args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_ag.agno, sbno); + args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_ag.pag->pag_agno, sbno); args.minlen = 1; args.maxlen = 1; args.prod = 1; @@ -235,7 +235,7 @@ xfs_inobt_init_ptr_from_cur( { struct xfs_agi *agi = cur->bc_ag.agbp->b_addr; - ASSERT(cur->bc_ag.agno == be32_to_cpu(agi->agi_seqno)); + ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agi->agi_seqno)); ptr->s = agi->agi_root; } @@ -247,7 +247,7 @@ xfs_finobt_init_ptr_from_cur( { struct xfs_agi *agi = cur->bc_ag.agbp->b_addr; - ASSERT(cur->bc_ag.agno == be32_to_cpu(agi->agi_seqno)); + ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agi->agi_seqno)); ptr->s = agi->agi_free_root; } @@ -427,7 +427,7 @@ static struct xfs_btree_cur * xfs_inobt_init_common( struct xfs_mount *mp, /* file system mount point */ struct xfs_trans *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ + struct xfs_perag *pag, xfs_btnum_t btnum) /* ialloc or free ino btree */ { struct xfs_btree_cur *cur; @@ -449,7 +449,9 @@ xfs_inobt_init_common( if (xfs_sb_version_hascrc(&mp->m_sb)) cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; - cur->bc_ag.agno = agno; + /* take a reference for the cursor */ + atomic_inc(&pag->pag_ref); + cur->bc_ag.pag = pag; return cur; } @@ -459,13 +461,13 @@ xfs_inobt_init_cursor( struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_buf *agbp, - xfs_agnumber_t agno, + struct xfs_perag *pag, xfs_btnum_t btnum) { struct xfs_btree_cur *cur; struct xfs_agi *agi = agbp->b_addr; - cur = xfs_inobt_init_common(mp, tp, agno, btnum); + cur = xfs_inobt_init_common(mp, tp, pag, btnum); if (btnum == XFS_BTNUM_INO) cur->bc_nlevels = be32_to_cpu(agi->agi_level); else @@ -479,12 +481,12 @@ struct xfs_btree_cur * xfs_inobt_stage_cursor( struct xfs_mount *mp, struct xbtree_afakeroot *afake, - xfs_agnumber_t agno, + struct xfs_perag *pag, xfs_btnum_t btnum) { struct xfs_btree_cur *cur; - cur = xfs_inobt_init_common(mp, NULL, agno, btnum); + cur = xfs_inobt_init_common(mp, NULL, pag, btnum); xfs_btree_stage_afakeroot(cur, afake); return cur; } @@ -656,7 +658,7 @@ int xfs_inobt_cur( struct xfs_mount *mp, struct xfs_trans *tp, - xfs_agnumber_t agno, + struct xfs_perag *pag, xfs_btnum_t which, struct xfs_btree_cur **curpp, struct xfs_buf **agi_bpp) @@ -667,11 +669,11 @@ xfs_inobt_cur( ASSERT(*agi_bpp == NULL); ASSERT(*curpp == NULL); - error = xfs_ialloc_read_agi(mp, tp, agno, agi_bpp); + error = xfs_ialloc_read_agi(mp, tp, pag->pag_agno, agi_bpp); if (error) return error; - cur = xfs_inobt_init_cursor(mp, tp, *agi_bpp, agno, which); + cur = xfs_inobt_init_cursor(mp, tp, *agi_bpp, pag, which); *curpp = cur; return 0; } @@ -680,7 +682,7 @@ static int xfs_inobt_count_blocks( struct xfs_mount *mp, struct xfs_trans *tp, - xfs_agnumber_t agno, + struct xfs_perag *pag, xfs_btnum_t btnum, xfs_extlen_t *tree_blocks) { @@ -688,7 +690,7 @@ xfs_inobt_count_blocks( struct xfs_btree_cur *cur = NULL; int error; - error = xfs_inobt_cur(mp, tp, agno, btnum, &cur, &agbp); + error = xfs_inobt_cur(mp, tp, pag, btnum, &cur, &agbp); if (error) return error; @@ -704,14 +706,14 @@ static int xfs_finobt_read_blocks( struct xfs_mount *mp, struct xfs_trans *tp, - xfs_agnumber_t agno, + struct xfs_perag *pag, xfs_extlen_t *tree_blocks) { struct xfs_buf *agbp; struct xfs_agi *agi; int error; - error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); + error = xfs_ialloc_read_agi(mp, tp, pag->pag_agno, &agbp); if (error) return error; @@ -728,7 +730,7 @@ int xfs_finobt_calc_reserves( struct xfs_mount *mp, struct xfs_trans *tp, - xfs_agnumber_t agno, + struct xfs_perag *pag, xfs_extlen_t *ask, xfs_extlen_t *used) { @@ -739,14 +741,14 @@ xfs_finobt_calc_reserves( return 0; if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) - error = xfs_finobt_read_blocks(mp, tp, agno, &tree_len); + error = xfs_finobt_read_blocks(mp, tp, pag, &tree_len); else - error = xfs_inobt_count_blocks(mp, tp, agno, XFS_BTNUM_FINO, + error = xfs_inobt_count_blocks(mp, tp, pag, XFS_BTNUM_FINO, &tree_len); if (error) return error; - *ask += xfs_inobt_max_size(mp, agno); + *ask += xfs_inobt_max_size(mp, pag->pag_agno); *used += tree_len; return 0; } diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h index 35bbd978c272..e530c82b2217 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.h +++ b/fs/xfs/libxfs/xfs_ialloc_btree.h @@ -13,6 +13,7 @@ struct xfs_buf; struct xfs_btree_cur; struct xfs_mount; +struct xfs_perag; /* * Btree block header size depends on a superblock flag. @@ -45,11 +46,11 @@ struct xfs_mount; (maxrecs) * sizeof(xfs_inobt_key_t) + \ ((index) - 1) * sizeof(xfs_inobt_ptr_t))) -extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, - struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t, - xfs_btnum_t); +extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *mp, + struct xfs_trans *tp, struct xfs_buf *agbp, + struct xfs_perag *pag, xfs_btnum_t btnum); struct xfs_btree_cur *xfs_inobt_stage_cursor(struct xfs_mount *mp, - struct xbtree_afakeroot *afake, xfs_agnumber_t agno, + struct xbtree_afakeroot *afake, struct xfs_perag *pag, xfs_btnum_t btnum); extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int); @@ -64,11 +65,11 @@ int xfs_inobt_rec_check_count(struct xfs_mount *, #endif /* DEBUG */ int xfs_finobt_calc_reserves(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used); + struct xfs_perag *pag, xfs_extlen_t *ask, xfs_extlen_t *used); extern xfs_extlen_t xfs_iallocbt_calc_size(struct xfs_mount *mp, unsigned long long len); int xfs_inobt_cur(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_agnumber_t agno, xfs_btnum_t btnum, + struct xfs_perag *pag, xfs_btnum_t btnum, struct xfs_btree_cur **curpp, struct xfs_buf **agi_bpp); void xfs_inobt_commit_staged_btree(struct xfs_btree_cur *cur, diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index f3254a4f4cb4..04ce361688f7 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -612,7 +612,7 @@ xfs_inode_validate_extsize( */ if (rt_flag) - blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; + blocksize_bytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize); else blocksize_bytes = mp->m_sb.sb_blocksize; diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 3e15ea29fb8d..d548ea4b6aab 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -34,9 +34,6 @@ typedef uint32_t xlog_tid_t; #define XLOG_MIN_RECORD_BSHIFT 14 /* 16384 == 1 << 14 */ #define XLOG_BIG_RECORD_BSHIFT 15 /* 32k == 1 << 15 */ #define XLOG_MAX_RECORD_BSHIFT 18 /* 256k == 1 << 18 */ -#define XLOG_BTOLSUNIT(log, b) (((b)+(log)->l_mp->m_sb.sb_logsunit-1) / \ - (log)->l_mp->m_sb.sb_logsunit) -#define XLOG_LSUNITTOB(log, su) ((su) * (log)->l_mp->m_sb.sb_logsunit) #define XLOG_HEADER_SIZE 512 diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 2037b9f23069..860a0c9801ba 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -22,6 +22,7 @@ #include "xfs_bit.h" #include "xfs_refcount.h" #include "xfs_rmap.h" +#include "xfs_ag.h" /* Allowable refcount adjustment amounts. */ enum xfs_refc_adjust_op { @@ -46,7 +47,7 @@ xfs_refcount_lookup_le( xfs_agblock_t bno, int *stat) { - trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.agno, bno, + trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno, XFS_LOOKUP_LE); cur->bc_rec.rc.rc_startblock = bno; cur->bc_rec.rc.rc_blockcount = 0; @@ -63,7 +64,7 @@ xfs_refcount_lookup_ge( xfs_agblock_t bno, int *stat) { - trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.agno, bno, + trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno, XFS_LOOKUP_GE); cur->bc_rec.rc.rc_startblock = bno; cur->bc_rec.rc.rc_blockcount = 0; @@ -80,7 +81,7 @@ xfs_refcount_lookup_eq( xfs_agblock_t bno, int *stat) { - trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.agno, bno, + trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno, XFS_LOOKUP_LE); cur->bc_rec.rc.rc_startblock = bno; cur->bc_rec.rc.rc_blockcount = 0; @@ -108,7 +109,7 @@ xfs_refcount_get_rec( int *stat) { struct xfs_mount *mp = cur->bc_mp; - xfs_agnumber_t agno = cur->bc_ag.agno; + xfs_agnumber_t agno = cur->bc_ag.pag->pag_agno; union xfs_btree_rec *rec; int error; xfs_agblock_t realstart; @@ -119,7 +120,7 @@ xfs_refcount_get_rec( xfs_refcount_btrec_to_irec(rec, irec); - agno = cur->bc_ag.agno; + agno = cur->bc_ag.pag->pag_agno; if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN) goto out_bad_rec; @@ -144,7 +145,7 @@ xfs_refcount_get_rec( if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT) goto out_bad_rec; - trace_xfs_refcount_get(cur->bc_mp, cur->bc_ag.agno, irec); + trace_xfs_refcount_get(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); return 0; out_bad_rec: @@ -169,14 +170,14 @@ xfs_refcount_update( union xfs_btree_rec rec; int error; - trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.agno, irec); + trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock); rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); error = xfs_btree_update(cur, &rec); if (error) trace_xfs_refcount_update_error(cur->bc_mp, - cur->bc_ag.agno, error, _RET_IP_); + cur->bc_ag.pag->pag_agno, error, _RET_IP_); return error; } @@ -193,7 +194,7 @@ xfs_refcount_insert( { int error; - trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.agno, irec); + trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); cur->bc_rec.rc.rc_startblock = irec->rc_startblock; cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; cur->bc_rec.rc.rc_refcount = irec->rc_refcount; @@ -208,7 +209,7 @@ xfs_refcount_insert( out_error: if (error) trace_xfs_refcount_insert_error(cur->bc_mp, - cur->bc_ag.agno, error, _RET_IP_); + cur->bc_ag.pag->pag_agno, error, _RET_IP_); return error; } @@ -234,7 +235,7 @@ xfs_refcount_delete( error = -EFSCORRUPTED; goto out_error; } - trace_xfs_refcount_delete(cur->bc_mp, cur->bc_ag.agno, &irec); + trace_xfs_refcount_delete(cur->bc_mp, cur->bc_ag.pag->pag_agno, &irec); error = xfs_btree_delete(cur, i); if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { error = -EFSCORRUPTED; @@ -246,7 +247,7 @@ xfs_refcount_delete( out_error: if (error) trace_xfs_refcount_delete_error(cur->bc_mp, - cur->bc_ag.agno, error, _RET_IP_); + cur->bc_ag.pag->pag_agno, error, _RET_IP_); return error; } @@ -366,7 +367,7 @@ xfs_refcount_split_extent( return 0; *shape_changed = true; - trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_ag.agno, + trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, &rcext, agbno); /* Establish the right extent. */ @@ -391,7 +392,7 @@ xfs_refcount_split_extent( out_error: trace_xfs_refcount_split_extent_error(cur->bc_mp, - cur->bc_ag.agno, error, _RET_IP_); + cur->bc_ag.pag->pag_agno, error, _RET_IP_); return error; } @@ -411,7 +412,7 @@ xfs_refcount_merge_center_extents( int found_rec; trace_xfs_refcount_merge_center_extents(cur->bc_mp, - cur->bc_ag.agno, left, center, right); + cur->bc_ag.pag->pag_agno, left, center, right); /* * Make sure the center and right extents are not in the btree. @@ -468,7 +469,7 @@ xfs_refcount_merge_center_extents( out_error: trace_xfs_refcount_merge_center_extents_error(cur->bc_mp, - cur->bc_ag.agno, error, _RET_IP_); + cur->bc_ag.pag->pag_agno, error, _RET_IP_); return error; } @@ -487,7 +488,7 @@ xfs_refcount_merge_left_extent( int found_rec; trace_xfs_refcount_merge_left_extent(cur->bc_mp, - cur->bc_ag.agno, left, cleft); + cur->bc_ag.pag->pag_agno, left, cleft); /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ if (cleft->rc_refcount > 1) { @@ -530,7 +531,7 @@ xfs_refcount_merge_left_extent( out_error: trace_xfs_refcount_merge_left_extent_error(cur->bc_mp, - cur->bc_ag.agno, error, _RET_IP_); + cur->bc_ag.pag->pag_agno, error, _RET_IP_); return error; } @@ -548,7 +549,7 @@ xfs_refcount_merge_right_extent( int found_rec; trace_xfs_refcount_merge_right_extent(cur->bc_mp, - cur->bc_ag.agno, cright, right); + cur->bc_ag.pag->pag_agno, cright, right); /* * If the extent ending at agbno+aglen (cright) wasn't synthesized, @@ -594,7 +595,7 @@ xfs_refcount_merge_right_extent( out_error: trace_xfs_refcount_merge_right_extent_error(cur->bc_mp, - cur->bc_ag.agno, error, _RET_IP_); + cur->bc_ag.pag->pag_agno, error, _RET_IP_); return error; } @@ -679,13 +680,13 @@ xfs_refcount_find_left_extents( cleft->rc_blockcount = aglen; cleft->rc_refcount = 1; } - trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.agno, + trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, left, cleft, agbno); return error; out_error: trace_xfs_refcount_find_left_extent_error(cur->bc_mp, - cur->bc_ag.agno, error, _RET_IP_); + cur->bc_ag.pag->pag_agno, error, _RET_IP_); return error; } @@ -768,13 +769,13 @@ xfs_refcount_find_right_extents( cright->rc_blockcount = aglen; cright->rc_refcount = 1; } - trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.agno, + trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, cright, right, agbno + aglen); return error; out_error: trace_xfs_refcount_find_right_extent_error(cur->bc_mp, - cur->bc_ag.agno, error, _RET_IP_); + cur->bc_ag.pag->pag_agno, error, _RET_IP_); return error; } @@ -952,7 +953,7 @@ xfs_refcount_adjust_extents( ext.rc_startblock - *agbno); tmp.rc_refcount = 1 + adj; trace_xfs_refcount_modify_extent(cur->bc_mp, - cur->bc_ag.agno, &tmp); + cur->bc_ag.pag->pag_agno, &tmp); /* * Either cover the hole (increment) or @@ -971,7 +972,7 @@ xfs_refcount_adjust_extents( cur->bc_ag.refc.nr_ops++; } else { fsbno = XFS_AGB_TO_FSB(cur->bc_mp, - cur->bc_ag.agno, + cur->bc_ag.pag->pag_agno, tmp.rc_startblock); xfs_bmap_add_free(cur->bc_tp, fsbno, tmp.rc_blockcount, oinfo); @@ -998,7 +999,7 @@ xfs_refcount_adjust_extents( goto skip; ext.rc_refcount += adj; trace_xfs_refcount_modify_extent(cur->bc_mp, - cur->bc_ag.agno, &ext); + cur->bc_ag.pag->pag_agno, &ext); if (ext.rc_refcount > 1) { error = xfs_refcount_update(cur, &ext); if (error) @@ -1016,7 +1017,7 @@ xfs_refcount_adjust_extents( goto advloop; } else { fsbno = XFS_AGB_TO_FSB(cur->bc_mp, - cur->bc_ag.agno, + cur->bc_ag.pag->pag_agno, ext.rc_startblock); xfs_bmap_add_free(cur->bc_tp, fsbno, ext.rc_blockcount, oinfo); @@ -1035,7 +1036,7 @@ advloop: return error; out_error: trace_xfs_refcount_modify_extent_error(cur->bc_mp, - cur->bc_ag.agno, error, _RET_IP_); + cur->bc_ag.pag->pag_agno, error, _RET_IP_); return error; } @@ -1057,10 +1058,10 @@ xfs_refcount_adjust( *new_agbno = agbno; *new_aglen = aglen; if (adj == XFS_REFCOUNT_ADJUST_INCREASE) - trace_xfs_refcount_increase(cur->bc_mp, cur->bc_ag.agno, + trace_xfs_refcount_increase(cur->bc_mp, cur->bc_ag.pag->pag_agno, agbno, aglen); else - trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_ag.agno, + trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_ag.pag->pag_agno, agbno, aglen); /* @@ -1099,7 +1100,7 @@ xfs_refcount_adjust( return 0; out_error: - trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_ag.agno, + trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_ag.pag->pag_agno, error, _RET_IP_); return error; } @@ -1142,30 +1143,30 @@ xfs_refcount_finish_one( struct xfs_btree_cur *rcur; struct xfs_buf *agbp = NULL; int error = 0; - xfs_agnumber_t agno; xfs_agblock_t bno; xfs_agblock_t new_agbno; unsigned long nr_ops = 0; int shape_changes = 0; + struct xfs_perag *pag; - agno = XFS_FSB_TO_AGNO(mp, startblock); - ASSERT(agno != NULLAGNUMBER); + pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, startblock)); bno = XFS_FSB_TO_AGBNO(mp, startblock); trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, startblock), type, XFS_FSB_TO_AGBNO(mp, startblock), blockcount); - if (XFS_TEST_ERROR(false, mp, - XFS_ERRTAG_REFCOUNT_FINISH_ONE)) - return -EIO; + if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE)) { + error = -EIO; + goto out_drop; + } /* * If we haven't gotten a cursor or the cursor AG doesn't match * the startblock, get one now. */ rcur = *pcur; - if (rcur != NULL && rcur->bc_ag.agno != agno) { + if (rcur != NULL && rcur->bc_ag.pag != pag) { nr_ops = rcur->bc_ag.refc.nr_ops; shape_changes = rcur->bc_ag.refc.shape_changes; xfs_refcount_finish_one_cleanup(tp, rcur, 0); @@ -1173,12 +1174,12 @@ xfs_refcount_finish_one( *pcur = NULL; } if (rcur == NULL) { - error = xfs_alloc_read_agf(tp->t_mountp, tp, agno, + error = xfs_alloc_read_agf(tp->t_mountp, tp, pag->pag_agno, XFS_ALLOC_FLAG_FREEING, &agbp); if (error) - return error; + goto out_drop; - rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno); + rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag); rcur->bc_ag.refc.nr_ops = nr_ops; rcur->bc_ag.refc.shape_changes = shape_changes; } @@ -1188,12 +1189,12 @@ xfs_refcount_finish_one( case XFS_REFCOUNT_INCREASE: error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, new_len, XFS_REFCOUNT_ADJUST_INCREASE, NULL); - *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); + *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); break; case XFS_REFCOUNT_DECREASE: error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, new_len, XFS_REFCOUNT_ADJUST_DECREASE, NULL); - *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); + *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); break; case XFS_REFCOUNT_ALLOC_COW: *new_fsb = startblock + blockcount; @@ -1210,8 +1211,10 @@ xfs_refcount_finish_one( error = -EFSCORRUPTED; } if (!error && *new_len > 0) - trace_xfs_refcount_finish_one_leftover(mp, agno, type, + trace_xfs_refcount_finish_one_leftover(mp, pag->pag_agno, type, bno, blockcount, new_agbno, *new_len); +out_drop: + xfs_perag_put(pag); return error; } @@ -1294,7 +1297,7 @@ xfs_refcount_find_shared( int have; int error; - trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_ag.agno, + trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_ag.pag->pag_agno, agbno, aglen); /* By default, skip the whole range */ @@ -1374,12 +1377,12 @@ xfs_refcount_find_shared( done: trace_xfs_refcount_find_shared_result(cur->bc_mp, - cur->bc_ag.agno, *fbno, *flen); + cur->bc_ag.pag->pag_agno, *fbno, *flen); out_error: if (error) trace_xfs_refcount_find_shared_error(cur->bc_mp, - cur->bc_ag.agno, error, _RET_IP_); + cur->bc_ag.pag->pag_agno, error, _RET_IP_); return error; } @@ -1476,7 +1479,7 @@ xfs_refcount_adjust_cow_extents( tmp.rc_blockcount = aglen; tmp.rc_refcount = 1; trace_xfs_refcount_modify_extent(cur->bc_mp, - cur->bc_ag.agno, &tmp); + cur->bc_ag.pag->pag_agno, &tmp); error = xfs_refcount_insert(cur, &tmp, &found_tmp); @@ -1504,7 +1507,7 @@ xfs_refcount_adjust_cow_extents( ext.rc_refcount = 0; trace_xfs_refcount_modify_extent(cur->bc_mp, - cur->bc_ag.agno, &ext); + cur->bc_ag.pag->pag_agno, &ext); error = xfs_refcount_delete(cur, &found_rec); if (error) goto out_error; @@ -1520,7 +1523,7 @@ xfs_refcount_adjust_cow_extents( return error; out_error: trace_xfs_refcount_modify_extent_error(cur->bc_mp, - cur->bc_ag.agno, error, _RET_IP_); + cur->bc_ag.pag->pag_agno, error, _RET_IP_); return error; } @@ -1566,7 +1569,7 @@ xfs_refcount_adjust_cow( return 0; out_error: - trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_ag.agno, + trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_ag.pag->pag_agno, error, _RET_IP_); return error; } @@ -1580,7 +1583,7 @@ __xfs_refcount_cow_alloc( xfs_agblock_t agbno, xfs_extlen_t aglen) { - trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_ag.agno, + trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, agbno, aglen); /* Add refcount btree reservation */ @@ -1597,7 +1600,7 @@ __xfs_refcount_cow_free( xfs_agblock_t agbno, xfs_extlen_t aglen) { - trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_ag.agno, + trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, agbno, aglen); /* Remove refcount btree reservation */ @@ -1672,7 +1675,7 @@ xfs_refcount_recover_extent( int xfs_refcount_recover_cow_leftovers( struct xfs_mount *mp, - xfs_agnumber_t agno) + struct xfs_perag *pag) { struct xfs_trans *tp; struct xfs_btree_cur *cur; @@ -1704,10 +1707,10 @@ xfs_refcount_recover_cow_leftovers( if (error) return error; - error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); + error = xfs_alloc_read_agf(mp, tp, pag->pag_agno, 0, &agbp); if (error) goto out_trans; - cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno); + cur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag); /* Find all the leftover CoW staging extents. */ memset(&low, 0, sizeof(low)); @@ -1729,11 +1732,12 @@ xfs_refcount_recover_cow_leftovers( if (error) goto out_free; - trace_xfs_refcount_recover_extent(mp, agno, &rr->rr_rrec); + trace_xfs_refcount_recover_extent(mp, pag->pag_agno, + &rr->rr_rrec); /* Free the orphan record */ agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START; - fsb = XFS_AGB_TO_FSB(mp, agno, agbno); + fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, agbno); xfs_refcount_free_cow_extent(tp, fsb, rr->rr_rrec.rc_blockcount); diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index 209795539c8d..9f6e9aae4da0 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -6,6 +6,13 @@ #ifndef __XFS_REFCOUNT_H__ #define __XFS_REFCOUNT_H__ +struct xfs_trans; +struct xfs_mount; +struct xfs_perag; +struct xfs_btree_cur; +struct xfs_bmbt_irec; +struct xfs_refcount_irec; + extern int xfs_refcount_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t bno, int *stat); extern int xfs_refcount_lookup_ge(struct xfs_btree_cur *cur, @@ -50,7 +57,7 @@ void xfs_refcount_alloc_cow_extent(struct xfs_trans *tp, xfs_fsblock_t fsb, void xfs_refcount_free_cow_extent(struct xfs_trans *tp, xfs_fsblock_t fsb, xfs_extlen_t len); extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp, - xfs_agnumber_t agno); + struct xfs_perag *pag); /* * While we're adjusting the refcounts records of an extent, we have diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index a6ac60ae9421..92d336c17e83 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -9,7 +9,6 @@ #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_btree.h" #include "xfs_btree_staging.h" @@ -20,13 +19,14 @@ #include "xfs_trans.h" #include "xfs_bit.h" #include "xfs_rmap.h" +#include "xfs_ag.h" static struct xfs_btree_cur * xfs_refcountbt_dup_cursor( struct xfs_btree_cur *cur) { return xfs_refcountbt_init_cursor(cur->bc_mp, cur->bc_tp, - cur->bc_ag.agbp, cur->bc_ag.agno); + cur->bc_ag.agbp, cur->bc_ag.pag); } STATIC void @@ -65,7 +65,7 @@ xfs_refcountbt_alloc_block( args.tp = cur->bc_tp; args.mp = cur->bc_mp; args.type = XFS_ALLOCTYPE_NEAR_BNO; - args.fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.agno, + args.fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno, xfs_refc_block(args.mp)); args.oinfo = XFS_RMAP_OINFO_REFC; args.minlen = args.maxlen = args.prod = 1; @@ -74,13 +74,13 @@ xfs_refcountbt_alloc_block( error = xfs_alloc_vextent(&args); if (error) goto out_error; - trace_xfs_refcountbt_alloc_block(cur->bc_mp, cur->bc_ag.agno, + trace_xfs_refcountbt_alloc_block(cur->bc_mp, cur->bc_ag.pag->pag_agno, args.agbno, 1); if (args.fsbno == NULLFSBLOCK) { *stat = 0; return 0; } - ASSERT(args.agno == cur->bc_ag.agno); + ASSERT(args.agno == cur->bc_ag.pag->pag_agno); ASSERT(args.len == 1); new->s = cpu_to_be32(args.agbno); @@ -105,7 +105,7 @@ xfs_refcountbt_free_block( xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp)); int error; - trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_ag.agno, + trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_ag.pag->pag_agno, XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1); be32_add_cpu(&agf->agf_refcount_blocks, -1); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS); @@ -170,7 +170,7 @@ xfs_refcountbt_init_ptr_from_cur( { struct xfs_agf *agf = cur->bc_ag.agbp->b_addr; - ASSERT(cur->bc_ag.agno == be32_to_cpu(agf->agf_seqno)); + ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno)); ptr->s = agf->agf_refcount_root; } @@ -316,12 +316,11 @@ static struct xfs_btree_cur * xfs_refcountbt_init_common( struct xfs_mount *mp, struct xfs_trans *tp, - xfs_agnumber_t agno) + struct xfs_perag *pag) { struct xfs_btree_cur *cur; - ASSERT(agno != NULLAGNUMBER); - ASSERT(agno < mp->m_sb.sb_agcount); + ASSERT(pag->pag_agno < mp->m_sb.sb_agcount); cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); cur->bc_tp = tp; @@ -330,9 +329,12 @@ xfs_refcountbt_init_common( cur->bc_blocklog = mp->m_sb.sb_blocklog; cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_refcbt_2); - cur->bc_ag.agno = agno; cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; + /* take a reference for the cursor */ + atomic_inc(&pag->pag_ref); + cur->bc_ag.pag = pag; + cur->bc_ag.refc.nr_ops = 0; cur->bc_ag.refc.shape_changes = 0; cur->bc_ops = &xfs_refcountbt_ops; @@ -345,12 +347,12 @@ xfs_refcountbt_init_cursor( struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_buf *agbp, - xfs_agnumber_t agno) + struct xfs_perag *pag) { struct xfs_agf *agf = agbp->b_addr; struct xfs_btree_cur *cur; - cur = xfs_refcountbt_init_common(mp, tp, agno); + cur = xfs_refcountbt_init_common(mp, tp, pag); cur->bc_nlevels = be32_to_cpu(agf->agf_refcount_level); cur->bc_ag.agbp = agbp; return cur; @@ -361,11 +363,11 @@ struct xfs_btree_cur * xfs_refcountbt_stage_cursor( struct xfs_mount *mp, struct xbtree_afakeroot *afake, - xfs_agnumber_t agno) + struct xfs_perag *pag) { struct xfs_btree_cur *cur; - cur = xfs_refcountbt_init_common(mp, NULL, agno); + cur = xfs_refcountbt_init_common(mp, NULL, pag); xfs_btree_stage_afakeroot(cur, afake); return cur; } @@ -450,7 +452,7 @@ int xfs_refcountbt_calc_reserves( struct xfs_mount *mp, struct xfs_trans *tp, - xfs_agnumber_t agno, + struct xfs_perag *pag, xfs_extlen_t *ask, xfs_extlen_t *used) { @@ -463,8 +465,7 @@ xfs_refcountbt_calc_reserves( if (!xfs_sb_version_hasreflink(&mp->m_sb)) return 0; - - error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); + error = xfs_alloc_read_agf(mp, tp, pag->pag_agno, 0, &agbp); if (error) return error; @@ -479,7 +480,7 @@ xfs_refcountbt_calc_reserves( * expansion. We therefore can pretend the space isn't there. */ if (mp->m_sb.sb_logstart && - XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == agno) + XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == pag->pag_agno) agblocks -= mp->m_sb.sb_logblocks; *ask += xfs_refcountbt_max_size(mp, agblocks); diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h index 69dc515db671..bd9ed9e1e41f 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.h +++ b/fs/xfs/libxfs/xfs_refcount_btree.h @@ -13,6 +13,7 @@ struct xfs_buf; struct xfs_btree_cur; struct xfs_mount; +struct xfs_perag; struct xbtree_afakeroot; /* @@ -46,9 +47,9 @@ struct xbtree_afakeroot; extern struct xfs_btree_cur *xfs_refcountbt_init_cursor(struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_buf *agbp, - xfs_agnumber_t agno); + struct xfs_perag *pag); struct xfs_btree_cur *xfs_refcountbt_stage_cursor(struct xfs_mount *mp, - struct xbtree_afakeroot *afake, xfs_agnumber_t agno); + struct xbtree_afakeroot *afake, struct xfs_perag *pag); extern int xfs_refcountbt_maxrecs(int blocklen, bool leaf); extern void xfs_refcountbt_compute_maxlevels(struct xfs_mount *mp); @@ -58,7 +59,7 @@ extern xfs_extlen_t xfs_refcountbt_max_size(struct xfs_mount *mp, xfs_agblock_t agblocks); extern int xfs_refcountbt_calc_reserves(struct xfs_mount *mp, - struct xfs_trans *tp, xfs_agnumber_t agno, xfs_extlen_t *ask, + struct xfs_trans *tp, struct xfs_perag *pag, xfs_extlen_t *ask, xfs_extlen_t *used); void xfs_refcountbt_commit_staged_btree(struct xfs_btree_cur *cur, diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 10e0cf9949a2..d1dfad0204e3 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -11,6 +11,7 @@ #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_mount.h" +#include "xfs_sb.h" #include "xfs_defer.h" #include "xfs_btree.h" #include "xfs_trans.h" @@ -21,6 +22,7 @@ #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_inode.h" +#include "xfs_ag.h" /* * Lookup the first record less than or equal to [bno, len, owner, offset] @@ -79,7 +81,7 @@ xfs_rmap_update( union xfs_btree_rec rec; int error; - trace_xfs_rmap_update(cur->bc_mp, cur->bc_ag.agno, + trace_xfs_rmap_update(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec->rm_startblock, irec->rm_blockcount, irec->rm_owner, irec->rm_offset, irec->rm_flags); @@ -91,7 +93,7 @@ xfs_rmap_update( error = xfs_btree_update(cur, &rec); if (error) trace_xfs_rmap_update_error(cur->bc_mp, - cur->bc_ag.agno, error, _RET_IP_); + cur->bc_ag.pag->pag_agno, error, _RET_IP_); return error; } @@ -107,7 +109,7 @@ xfs_rmap_insert( int i; int error; - trace_xfs_rmap_insert(rcur->bc_mp, rcur->bc_ag.agno, agbno, + trace_xfs_rmap_insert(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, agbno, len, owner, offset, flags); error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i); @@ -133,7 +135,7 @@ xfs_rmap_insert( done: if (error) trace_xfs_rmap_insert_error(rcur->bc_mp, - rcur->bc_ag.agno, error, _RET_IP_); + rcur->bc_ag.pag->pag_agno, error, _RET_IP_); return error; } @@ -149,7 +151,7 @@ xfs_rmap_delete( int i; int error; - trace_xfs_rmap_delete(rcur->bc_mp, rcur->bc_ag.agno, agbno, + trace_xfs_rmap_delete(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, agbno, len, owner, offset, flags); error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i); @@ -170,7 +172,7 @@ xfs_rmap_delete( done: if (error) trace_xfs_rmap_delete_error(rcur->bc_mp, - rcur->bc_ag.agno, error, _RET_IP_); + rcur->bc_ag.pag->pag_agno, error, _RET_IP_); return error; } @@ -197,7 +199,7 @@ xfs_rmap_get_rec( int *stat) { struct xfs_mount *mp = cur->bc_mp; - xfs_agnumber_t agno = cur->bc_ag.agno; + xfs_agnumber_t agno = cur->bc_ag.pag->pag_agno; union xfs_btree_rec *rec; int error; @@ -260,7 +262,7 @@ xfs_rmap_find_left_neighbor_helper( struct xfs_find_left_neighbor_info *info = priv; trace_xfs_rmap_find_left_neighbor_candidate(cur->bc_mp, - cur->bc_ag.agno, rec->rm_startblock, + cur->bc_ag.pag->pag_agno, rec->rm_startblock, rec->rm_blockcount, rec->rm_owner, rec->rm_offset, rec->rm_flags); @@ -312,7 +314,7 @@ xfs_rmap_find_left_neighbor( info.stat = stat; trace_xfs_rmap_find_left_neighbor_query(cur->bc_mp, - cur->bc_ag.agno, bno, 0, owner, offset, flags); + cur->bc_ag.pag->pag_agno, bno, 0, owner, offset, flags); error = xfs_rmap_query_range(cur, &info.high, &info.high, xfs_rmap_find_left_neighbor_helper, &info); @@ -320,7 +322,7 @@ xfs_rmap_find_left_neighbor( error = 0; if (*stat) trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp, - cur->bc_ag.agno, irec->rm_startblock, + cur->bc_ag.pag->pag_agno, irec->rm_startblock, irec->rm_blockcount, irec->rm_owner, irec->rm_offset, irec->rm_flags); return error; @@ -336,7 +338,7 @@ xfs_rmap_lookup_le_range_helper( struct xfs_find_left_neighbor_info *info = priv; trace_xfs_rmap_lookup_le_range_candidate(cur->bc_mp, - cur->bc_ag.agno, rec->rm_startblock, + cur->bc_ag.pag->pag_agno, rec->rm_startblock, rec->rm_blockcount, rec->rm_owner, rec->rm_offset, rec->rm_flags); @@ -385,14 +387,14 @@ xfs_rmap_lookup_le_range( info.stat = stat; trace_xfs_rmap_lookup_le_range(cur->bc_mp, - cur->bc_ag.agno, bno, 0, owner, offset, flags); + cur->bc_ag.pag->pag_agno, bno, 0, owner, offset, flags); error = xfs_rmap_query_range(cur, &info.high, &info.high, xfs_rmap_lookup_le_range_helper, &info); if (error == -ECANCELED) error = 0; if (*stat) trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, - cur->bc_ag.agno, irec->rm_startblock, + cur->bc_ag.pag->pag_agno, irec->rm_startblock, irec->rm_blockcount, irec->rm_owner, irec->rm_offset, irec->rm_flags); return error; @@ -498,7 +500,7 @@ xfs_rmap_unmap( (flags & XFS_RMAP_BMBT_BLOCK); if (unwritten) flags |= XFS_RMAP_UNWRITTEN; - trace_xfs_rmap_unmap(mp, cur->bc_ag.agno, bno, len, + trace_xfs_rmap_unmap(mp, cur->bc_ag.pag->pag_agno, bno, len, unwritten, oinfo); /* @@ -522,7 +524,7 @@ xfs_rmap_unmap( goto out_error; } trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, - cur->bc_ag.agno, ltrec.rm_startblock, + cur->bc_ag.pag->pag_agno, ltrec.rm_startblock, ltrec.rm_blockcount, ltrec.rm_owner, ltrec.rm_offset, ltrec.rm_flags); ltoff = ltrec.rm_offset; @@ -588,7 +590,7 @@ xfs_rmap_unmap( if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) { /* exact match, simply remove the record from rmap tree */ - trace_xfs_rmap_delete(mp, cur->bc_ag.agno, + trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno, ltrec.rm_startblock, ltrec.rm_blockcount, ltrec.rm_owner, ltrec.rm_offset, ltrec.rm_flags); @@ -666,7 +668,7 @@ xfs_rmap_unmap( else cur->bc_rec.r.rm_offset = offset + len; cur->bc_rec.r.rm_flags = flags; - trace_xfs_rmap_insert(mp, cur->bc_ag.agno, + trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, cur->bc_rec.r.rm_startblock, cur->bc_rec.r.rm_blockcount, cur->bc_rec.r.rm_owner, @@ -678,11 +680,11 @@ xfs_rmap_unmap( } out_done: - trace_xfs_rmap_unmap_done(mp, cur->bc_ag.agno, bno, len, + trace_xfs_rmap_unmap_done(mp, cur->bc_ag.pag->pag_agno, bno, len, unwritten, oinfo); out_error: if (error) - trace_xfs_rmap_unmap_error(mp, cur->bc_ag.agno, + trace_xfs_rmap_unmap_error(mp, cur->bc_ag.pag->pag_agno, error, _RET_IP_); return error; } @@ -694,7 +696,7 @@ int xfs_rmap_free( struct xfs_trans *tp, struct xfs_buf *agbp, - xfs_agnumber_t agno, + struct xfs_perag *pag, xfs_agblock_t bno, xfs_extlen_t len, const struct xfs_owner_info *oinfo) @@ -706,7 +708,7 @@ xfs_rmap_free( if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) return 0; - cur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno); + cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag); error = xfs_rmap_unmap(cur, bno, len, false, oinfo); @@ -773,7 +775,7 @@ xfs_rmap_map( (flags & XFS_RMAP_BMBT_BLOCK); if (unwritten) flags |= XFS_RMAP_UNWRITTEN; - trace_xfs_rmap_map(mp, cur->bc_ag.agno, bno, len, + trace_xfs_rmap_map(mp, cur->bc_ag.pag->pag_agno, bno, len, unwritten, oinfo); ASSERT(!xfs_rmap_should_skip_owner_update(oinfo)); @@ -795,7 +797,7 @@ xfs_rmap_map( goto out_error; } trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, - cur->bc_ag.agno, ltrec.rm_startblock, + cur->bc_ag.pag->pag_agno, ltrec.rm_startblock, ltrec.rm_blockcount, ltrec.rm_owner, ltrec.rm_offset, ltrec.rm_flags); @@ -831,7 +833,7 @@ xfs_rmap_map( goto out_error; } trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp, - cur->bc_ag.agno, gtrec.rm_startblock, + cur->bc_ag.pag->pag_agno, gtrec.rm_startblock, gtrec.rm_blockcount, gtrec.rm_owner, gtrec.rm_offset, gtrec.rm_flags); if (!xfs_rmap_is_mergeable(>rec, owner, flags)) @@ -870,7 +872,7 @@ xfs_rmap_map( * result: |rrrrrrrrrrrrrrrrrrrrrrrrrrrrr| */ ltrec.rm_blockcount += gtrec.rm_blockcount; - trace_xfs_rmap_delete(mp, cur->bc_ag.agno, + trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno, gtrec.rm_startblock, gtrec.rm_blockcount, gtrec.rm_owner, @@ -921,7 +923,7 @@ xfs_rmap_map( cur->bc_rec.r.rm_owner = owner; cur->bc_rec.r.rm_offset = offset; cur->bc_rec.r.rm_flags = flags; - trace_xfs_rmap_insert(mp, cur->bc_ag.agno, bno, len, + trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, bno, len, owner, offset, flags); error = xfs_btree_insert(cur, &i); if (error) @@ -932,11 +934,11 @@ xfs_rmap_map( } } - trace_xfs_rmap_map_done(mp, cur->bc_ag.agno, bno, len, + trace_xfs_rmap_map_done(mp, cur->bc_ag.pag->pag_agno, bno, len, unwritten, oinfo); out_error: if (error) - trace_xfs_rmap_map_error(mp, cur->bc_ag.agno, + trace_xfs_rmap_map_error(mp, cur->bc_ag.pag->pag_agno, error, _RET_IP_); return error; } @@ -948,7 +950,7 @@ int xfs_rmap_alloc( struct xfs_trans *tp, struct xfs_buf *agbp, - xfs_agnumber_t agno, + struct xfs_perag *pag, xfs_agblock_t bno, xfs_extlen_t len, const struct xfs_owner_info *oinfo) @@ -960,7 +962,7 @@ xfs_rmap_alloc( if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) return 0; - cur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno); + cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag); error = xfs_rmap_map(cur, bno, len, false, oinfo); xfs_btree_del_cursor(cur, error); @@ -1010,7 +1012,7 @@ xfs_rmap_convert( (flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)))); oldext = unwritten ? XFS_RMAP_UNWRITTEN : 0; new_endoff = offset + len; - trace_xfs_rmap_convert(mp, cur->bc_ag.agno, bno, len, + trace_xfs_rmap_convert(mp, cur->bc_ag.pag->pag_agno, bno, len, unwritten, oinfo); /* @@ -1034,7 +1036,7 @@ xfs_rmap_convert( goto done; } trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, - cur->bc_ag.agno, PREV.rm_startblock, + cur->bc_ag.pag->pag_agno, PREV.rm_startblock, PREV.rm_blockcount, PREV.rm_owner, PREV.rm_offset, PREV.rm_flags); @@ -1076,7 +1078,7 @@ xfs_rmap_convert( goto done; } trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp, - cur->bc_ag.agno, LEFT.rm_startblock, + cur->bc_ag.pag->pag_agno, LEFT.rm_startblock, LEFT.rm_blockcount, LEFT.rm_owner, LEFT.rm_offset, LEFT.rm_flags); if (LEFT.rm_startblock + LEFT.rm_blockcount == bno && @@ -1114,7 +1116,7 @@ xfs_rmap_convert( goto done; } trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp, - cur->bc_ag.agno, RIGHT.rm_startblock, + cur->bc_ag.pag->pag_agno, RIGHT.rm_startblock, RIGHT.rm_blockcount, RIGHT.rm_owner, RIGHT.rm_offset, RIGHT.rm_flags); if (bno + len == RIGHT.rm_startblock && @@ -1132,7 +1134,7 @@ xfs_rmap_convert( RIGHT.rm_blockcount > XFS_RMAP_LEN_MAX) state &= ~RMAP_RIGHT_CONTIG; - trace_xfs_rmap_convert_state(mp, cur->bc_ag.agno, state, + trace_xfs_rmap_convert_state(mp, cur->bc_ag.pag->pag_agno, state, _RET_IP_); /* reset the cursor back to PREV */ @@ -1162,7 +1164,7 @@ xfs_rmap_convert( error = -EFSCORRUPTED; goto done; } - trace_xfs_rmap_delete(mp, cur->bc_ag.agno, + trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno, RIGHT.rm_startblock, RIGHT.rm_blockcount, RIGHT.rm_owner, RIGHT.rm_offset, RIGHT.rm_flags); @@ -1180,7 +1182,7 @@ xfs_rmap_convert( error = -EFSCORRUPTED; goto done; } - trace_xfs_rmap_delete(mp, cur->bc_ag.agno, + trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno, PREV.rm_startblock, PREV.rm_blockcount, PREV.rm_owner, PREV.rm_offset, PREV.rm_flags); @@ -1210,7 +1212,7 @@ xfs_rmap_convert( * Setting all of a previous oldext extent to newext. * The left neighbor is contiguous, the right is not. */ - trace_xfs_rmap_delete(mp, cur->bc_ag.agno, + trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno, PREV.rm_startblock, PREV.rm_blockcount, PREV.rm_owner, PREV.rm_offset, PREV.rm_flags); @@ -1247,7 +1249,7 @@ xfs_rmap_convert( error = -EFSCORRUPTED; goto done; } - trace_xfs_rmap_delete(mp, cur->bc_ag.agno, + trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno, RIGHT.rm_startblock, RIGHT.rm_blockcount, RIGHT.rm_owner, RIGHT.rm_offset, RIGHT.rm_flags); @@ -1326,7 +1328,7 @@ xfs_rmap_convert( NEW.rm_blockcount = len; NEW.rm_flags = newext; cur->bc_rec.r = NEW; - trace_xfs_rmap_insert(mp, cur->bc_ag.agno, bno, + trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, bno, len, owner, offset, newext); error = xfs_btree_insert(cur, &i); if (error) @@ -1383,7 +1385,7 @@ xfs_rmap_convert( NEW.rm_blockcount = len; NEW.rm_flags = newext; cur->bc_rec.r = NEW; - trace_xfs_rmap_insert(mp, cur->bc_ag.agno, bno, + trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, bno, len, owner, offset, newext); error = xfs_btree_insert(cur, &i); if (error) @@ -1414,7 +1416,7 @@ xfs_rmap_convert( NEW = PREV; NEW.rm_blockcount = offset - PREV.rm_offset; cur->bc_rec.r = NEW; - trace_xfs_rmap_insert(mp, cur->bc_ag.agno, + trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, NEW.rm_startblock, NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset, NEW.rm_flags); @@ -1441,7 +1443,7 @@ xfs_rmap_convert( /* new middle extent - newext */ cur->bc_rec.r.rm_flags &= ~XFS_RMAP_UNWRITTEN; cur->bc_rec.r.rm_flags |= newext; - trace_xfs_rmap_insert(mp, cur->bc_ag.agno, bno, len, + trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, bno, len, owner, offset, newext); error = xfs_btree_insert(cur, &i); if (error) @@ -1465,12 +1467,12 @@ xfs_rmap_convert( ASSERT(0); } - trace_xfs_rmap_convert_done(mp, cur->bc_ag.agno, bno, len, + trace_xfs_rmap_convert_done(mp, cur->bc_ag.pag->pag_agno, bno, len, unwritten, oinfo); done: if (error) trace_xfs_rmap_convert_error(cur->bc_mp, - cur->bc_ag.agno, error, _RET_IP_); + cur->bc_ag.pag->pag_agno, error, _RET_IP_); return error; } @@ -1506,7 +1508,7 @@ xfs_rmap_convert_shared( (flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)))); oldext = unwritten ? XFS_RMAP_UNWRITTEN : 0; new_endoff = offset + len; - trace_xfs_rmap_convert(mp, cur->bc_ag.agno, bno, len, + trace_xfs_rmap_convert(mp, cur->bc_ag.pag->pag_agno, bno, len, unwritten, oinfo); /* @@ -1573,7 +1575,7 @@ xfs_rmap_convert_shared( goto done; } trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp, - cur->bc_ag.agno, RIGHT.rm_startblock, + cur->bc_ag.pag->pag_agno, RIGHT.rm_startblock, RIGHT.rm_blockcount, RIGHT.rm_owner, RIGHT.rm_offset, RIGHT.rm_flags); if (xfs_rmap_is_mergeable(&RIGHT, owner, newext)) @@ -1589,7 +1591,7 @@ xfs_rmap_convert_shared( RIGHT.rm_blockcount > XFS_RMAP_LEN_MAX) state &= ~RMAP_RIGHT_CONTIG; - trace_xfs_rmap_convert_state(mp, cur->bc_ag.agno, state, + trace_xfs_rmap_convert_state(mp, cur->bc_ag.pag->pag_agno, state, _RET_IP_); /* * Switch out based on the FILLING and CONTIG state bits. @@ -1880,12 +1882,12 @@ xfs_rmap_convert_shared( ASSERT(0); } - trace_xfs_rmap_convert_done(mp, cur->bc_ag.agno, bno, len, + trace_xfs_rmap_convert_done(mp, cur->bc_ag.pag->pag_agno, bno, len, unwritten, oinfo); done: if (error) trace_xfs_rmap_convert_error(cur->bc_mp, - cur->bc_ag.agno, error, _RET_IP_); + cur->bc_ag.pag->pag_agno, error, _RET_IP_); return error; } @@ -1923,7 +1925,7 @@ xfs_rmap_unmap_shared( xfs_owner_info_unpack(oinfo, &owner, &offset, &flags); if (unwritten) flags |= XFS_RMAP_UNWRITTEN; - trace_xfs_rmap_unmap(mp, cur->bc_ag.agno, bno, len, + trace_xfs_rmap_unmap(mp, cur->bc_ag.pag->pag_agno, bno, len, unwritten, oinfo); /* @@ -2072,12 +2074,12 @@ xfs_rmap_unmap_shared( goto out_error; } - trace_xfs_rmap_unmap_done(mp, cur->bc_ag.agno, bno, len, + trace_xfs_rmap_unmap_done(mp, cur->bc_ag.pag->pag_agno, bno, len, unwritten, oinfo); out_error: if (error) trace_xfs_rmap_unmap_error(cur->bc_mp, - cur->bc_ag.agno, error, _RET_IP_); + cur->bc_ag.pag->pag_agno, error, _RET_IP_); return error; } @@ -2112,7 +2114,7 @@ xfs_rmap_map_shared( xfs_owner_info_unpack(oinfo, &owner, &offset, &flags); if (unwritten) flags |= XFS_RMAP_UNWRITTEN; - trace_xfs_rmap_map(mp, cur->bc_ag.agno, bno, len, + trace_xfs_rmap_map(mp, cur->bc_ag.pag->pag_agno, bno, len, unwritten, oinfo); /* Is there a left record that abuts our range? */ @@ -2138,7 +2140,7 @@ xfs_rmap_map_shared( goto out_error; } trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp, - cur->bc_ag.agno, gtrec.rm_startblock, + cur->bc_ag.pag->pag_agno, gtrec.rm_startblock, gtrec.rm_blockcount, gtrec.rm_owner, gtrec.rm_offset, gtrec.rm_flags); @@ -2231,12 +2233,12 @@ xfs_rmap_map_shared( goto out_error; } - trace_xfs_rmap_map_done(mp, cur->bc_ag.agno, bno, len, + trace_xfs_rmap_map_done(mp, cur->bc_ag.pag->pag_agno, bno, len, unwritten, oinfo); out_error: if (error) trace_xfs_rmap_map_error(cur->bc_mp, - cur->bc_ag.agno, error, _RET_IP_); + cur->bc_ag.pag->pag_agno, error, _RET_IP_); return error; } @@ -2362,31 +2364,32 @@ xfs_rmap_finish_one( struct xfs_btree_cur **pcur) { struct xfs_mount *mp = tp->t_mountp; + struct xfs_perag *pag; struct xfs_btree_cur *rcur; struct xfs_buf *agbp = NULL; int error = 0; - xfs_agnumber_t agno; struct xfs_owner_info oinfo; xfs_agblock_t bno; bool unwritten; - agno = XFS_FSB_TO_AGNO(mp, startblock); - ASSERT(agno != NULLAGNUMBER); + pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, startblock)); bno = XFS_FSB_TO_AGBNO(mp, startblock); - trace_xfs_rmap_deferred(mp, agno, type, bno, owner, whichfork, + trace_xfs_rmap_deferred(mp, pag->pag_agno, type, bno, owner, whichfork, startoff, blockcount, state); - if (XFS_TEST_ERROR(false, mp, - XFS_ERRTAG_RMAP_FINISH_ONE)) - return -EIO; + if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_RMAP_FINISH_ONE)) { + error = -EIO; + goto out_drop; + } + /* * If we haven't gotten a cursor or the cursor AG doesn't match * the startblock, get one now. */ rcur = *pcur; - if (rcur != NULL && rcur->bc_ag.agno != agno) { + if (rcur != NULL && rcur->bc_ag.pag != pag) { xfs_rmap_finish_one_cleanup(tp, rcur, 0); rcur = NULL; *pcur = NULL; @@ -2397,13 +2400,15 @@ xfs_rmap_finish_one( * rmapbt, because a shape change could cause us to * allocate blocks. */ - error = xfs_free_extent_fix_freelist(tp, agno, &agbp); + error = xfs_free_extent_fix_freelist(tp, pag, &agbp); if (error) - return error; - if (XFS_IS_CORRUPT(tp->t_mountp, !agbp)) - return -EFSCORRUPTED; + goto out_drop; + if (XFS_IS_CORRUPT(tp->t_mountp, !agbp)) { + error = -EFSCORRUPTED; + goto out_drop; + } - rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno); + rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag); } *pcur = rcur; @@ -2441,6 +2446,8 @@ xfs_rmap_finish_one( ASSERT(0); error = -EFSCORRUPTED; } +out_drop: + xfs_perag_put(pag); return error; } diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h index abe633403fd1..f2423cf7f1e2 100644 --- a/fs/xfs/libxfs/xfs_rmap.h +++ b/fs/xfs/libxfs/xfs_rmap.h @@ -6,6 +6,8 @@ #ifndef __XFS_RMAP_H__ #define __XFS_RMAP_H__ +struct xfs_perag; + static inline void xfs_rmap_ino_bmbt_owner( struct xfs_owner_info *oi, @@ -113,10 +115,10 @@ xfs_owner_info_pack( } int xfs_rmap_alloc(struct xfs_trans *tp, struct xfs_buf *agbp, - xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len, + struct xfs_perag *pag, xfs_agblock_t bno, xfs_extlen_t len, const struct xfs_owner_info *oinfo); int xfs_rmap_free(struct xfs_trans *tp, struct xfs_buf *agbp, - xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len, + struct xfs_perag *pag, xfs_agblock_t bno, xfs_extlen_t len, const struct xfs_owner_info *oinfo); int xfs_rmap_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t bno, diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 9f5bcbd834c3..f29bc71b9950 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -9,7 +9,6 @@ #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_trans.h" #include "xfs_alloc.h" @@ -20,6 +19,7 @@ #include "xfs_trace.h" #include "xfs_error.h" #include "xfs_extent_busy.h" +#include "xfs_ag.h" #include "xfs_ag_resv.h" /* @@ -52,7 +52,7 @@ xfs_rmapbt_dup_cursor( struct xfs_btree_cur *cur) { return xfs_rmapbt_init_cursor(cur->bc_mp, cur->bc_tp, - cur->bc_ag.agbp, cur->bc_ag.agno); + cur->bc_ag.agbp, cur->bc_ag.pag); } STATIC void @@ -64,13 +64,12 @@ xfs_rmapbt_set_root( struct xfs_buf *agbp = cur->bc_ag.agbp; struct xfs_agf *agf = agbp->b_addr; int btnum = cur->bc_btnum; - struct xfs_perag *pag = agbp->b_pag; ASSERT(ptr->s != 0); agf->agf_roots[btnum] = ptr->s; be32_add_cpu(&agf->agf_levels[btnum], inc); - pag->pagf_levels[btnum] += inc; + cur->bc_ag.pag->pagf_levels[btnum] += inc; xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); } @@ -84,6 +83,7 @@ xfs_rmapbt_alloc_block( { struct xfs_buf *agbp = cur->bc_ag.agbp; struct xfs_agf *agf = agbp->b_addr; + struct xfs_perag *pag = cur->bc_ag.pag; int error; xfs_agblock_t bno; @@ -93,21 +93,19 @@ xfs_rmapbt_alloc_block( if (error) return error; - trace_xfs_rmapbt_alloc_block(cur->bc_mp, cur->bc_ag.agno, - bno, 1); + trace_xfs_rmapbt_alloc_block(cur->bc_mp, pag->pag_agno, bno, 1); if (bno == NULLAGBLOCK) { *stat = 0; return 0; } - xfs_extent_busy_reuse(cur->bc_mp, cur->bc_ag.agno, bno, 1, - false); + xfs_extent_busy_reuse(cur->bc_mp, pag, bno, 1, false); new->s = cpu_to_be32(bno); be32_add_cpu(&agf->agf_rmap_blocks, 1); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS); - xfs_ag_resv_rmapbt_alloc(cur->bc_mp, cur->bc_ag.agno); + xfs_ag_resv_rmapbt_alloc(cur->bc_mp, pag->pag_agno); *stat = 1; return 0; @@ -120,12 +118,12 @@ xfs_rmapbt_free_block( { struct xfs_buf *agbp = cur->bc_ag.agbp; struct xfs_agf *agf = agbp->b_addr; - struct xfs_perag *pag; + struct xfs_perag *pag = cur->bc_ag.pag; xfs_agblock_t bno; int error; bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp)); - trace_xfs_rmapbt_free_block(cur->bc_mp, cur->bc_ag.agno, + trace_xfs_rmapbt_free_block(cur->bc_mp, pag->pag_agno, bno, 1); be32_add_cpu(&agf->agf_rmap_blocks, -1); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS); @@ -133,10 +131,9 @@ xfs_rmapbt_free_block( if (error) return error; - xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, + xfs_extent_busy_insert(cur->bc_tp, pag, bno, 1, XFS_EXTENT_BUSY_SKIP_DISCARD); - pag = cur->bc_ag.agbp->b_pag; xfs_ag_resv_free_extent(pag, XFS_AG_RESV_RMAPBT, NULL, 1); return 0; } @@ -215,7 +212,7 @@ xfs_rmapbt_init_ptr_from_cur( { struct xfs_agf *agf = cur->bc_ag.agbp->b_addr; - ASSERT(cur->bc_ag.agno == be32_to_cpu(agf->agf_seqno)); + ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno)); ptr->s = agf->agf_roots[cur->bc_btnum]; } @@ -450,7 +447,7 @@ static struct xfs_btree_cur * xfs_rmapbt_init_common( struct xfs_mount *mp, struct xfs_trans *tp, - xfs_agnumber_t agno) + struct xfs_perag *pag) { struct xfs_btree_cur *cur; @@ -462,9 +459,12 @@ xfs_rmapbt_init_common( cur->bc_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING; cur->bc_blocklog = mp->m_sb.sb_blocklog; cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2); - cur->bc_ag.agno = agno; cur->bc_ops = &xfs_rmapbt_ops; + /* take a reference for the cursor */ + atomic_inc(&pag->pag_ref); + cur->bc_ag.pag = pag; + return cur; } @@ -474,12 +474,12 @@ xfs_rmapbt_init_cursor( struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_buf *agbp, - xfs_agnumber_t agno) + struct xfs_perag *pag) { struct xfs_agf *agf = agbp->b_addr; struct xfs_btree_cur *cur; - cur = xfs_rmapbt_init_common(mp, tp, agno); + cur = xfs_rmapbt_init_common(mp, tp, pag); cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]); cur->bc_ag.agbp = agbp; return cur; @@ -490,11 +490,11 @@ struct xfs_btree_cur * xfs_rmapbt_stage_cursor( struct xfs_mount *mp, struct xbtree_afakeroot *afake, - xfs_agnumber_t agno) + struct xfs_perag *pag) { struct xfs_btree_cur *cur; - cur = xfs_rmapbt_init_common(mp, NULL, agno); + cur = xfs_rmapbt_init_common(mp, NULL, pag); xfs_btree_stage_afakeroot(cur, afake); return cur; } @@ -596,7 +596,7 @@ int xfs_rmapbt_calc_reserves( struct xfs_mount *mp, struct xfs_trans *tp, - xfs_agnumber_t agno, + struct xfs_perag *pag, xfs_extlen_t *ask, xfs_extlen_t *used) { @@ -609,7 +609,7 @@ xfs_rmapbt_calc_reserves( if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) return 0; - error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); + error = xfs_alloc_read_agf(mp, tp, pag->pag_agno, 0, &agbp); if (error) return error; @@ -624,7 +624,7 @@ xfs_rmapbt_calc_reserves( * expansion. We therefore can pretend the space isn't there. */ if (mp->m_sb.sb_logstart && - XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == agno) + XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == pag->pag_agno) agblocks -= mp->m_sb.sb_logblocks; /* Reserve 1% of the AG or enough for 1 block per record. */ diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h index 115c3455a734..88d8d18788a2 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.h +++ b/fs/xfs/libxfs/xfs_rmap_btree.h @@ -43,9 +43,9 @@ struct xbtree_afakeroot; struct xfs_btree_cur *xfs_rmapbt_init_cursor(struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_buf *bp, - xfs_agnumber_t agno); + struct xfs_perag *pag); struct xfs_btree_cur *xfs_rmapbt_stage_cursor(struct xfs_mount *mp, - struct xbtree_afakeroot *afake, xfs_agnumber_t agno); + struct xbtree_afakeroot *afake, struct xfs_perag *pag); void xfs_rmapbt_commit_staged_btree(struct xfs_btree_cur *cur, struct xfs_trans *tp, struct xfs_buf *agbp); int xfs_rmapbt_maxrecs(int blocklen, int leaf); @@ -57,6 +57,6 @@ extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp, xfs_agblock_t agblocks); extern int xfs_rmapbt_calc_reserves(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used); + struct xfs_perag *pag, xfs_extlen_t *ask, xfs_extlen_t *used); #endif /* __XFS_RMAP_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index dfbbcbd448c1..04f5386446db 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -15,7 +15,6 @@ #include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_error.h" -#include "xfs_trace.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_bmap_btree.h" @@ -25,72 +24,12 @@ #include "xfs_refcount_btree.h" #include "xfs_da_format.h" #include "xfs_health.h" +#include "xfs_ag.h" /* * Physical superblock buffer manipulations. Shared with libxfs in userspace. */ -/* - * Reference counting access wrappers to the perag structures. - * Because we never free per-ag structures, the only thing we - * have to protect against changes is the tree structure itself. - */ -struct xfs_perag * -xfs_perag_get( - struct xfs_mount *mp, - xfs_agnumber_t agno) -{ - struct xfs_perag *pag; - int ref = 0; - - rcu_read_lock(); - pag = radix_tree_lookup(&mp->m_perag_tree, agno); - if (pag) { - ASSERT(atomic_read(&pag->pag_ref) >= 0); - ref = atomic_inc_return(&pag->pag_ref); - } - rcu_read_unlock(); - trace_xfs_perag_get(mp, agno, ref, _RET_IP_); - return pag; -} - -/* - * search from @first to find the next perag with the given tag set. - */ -struct xfs_perag * -xfs_perag_get_tag( - struct xfs_mount *mp, - xfs_agnumber_t first, - int tag) -{ - struct xfs_perag *pag; - int found; - int ref; - - rcu_read_lock(); - found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, - (void **)&pag, first, 1, tag); - if (found <= 0) { - rcu_read_unlock(); - return NULL; - } - ref = atomic_inc_return(&pag->pag_ref); - rcu_read_unlock(); - trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_); - return pag; -} - -void -xfs_perag_put( - struct xfs_perag *pag) -{ - int ref; - - ASSERT(atomic_read(&pag->pag_ref) > 0); - ref = atomic_dec_return(&pag->pag_ref); - trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_); -} - /* Check all the superblock fields we care about when reading one in. */ STATIC int xfs_validate_sb_read( @@ -842,78 +781,6 @@ xfs_sb_mount_common( } /* - * xfs_initialize_perag_data - * - * Read in each per-ag structure so we can count up the number of - * allocated inodes, free inodes and used filesystem blocks as this - * information is no longer persistent in the superblock. Once we have - * this information, write it into the in-core superblock structure. - */ -int -xfs_initialize_perag_data( - struct xfs_mount *mp, - xfs_agnumber_t agcount) -{ - xfs_agnumber_t index; - xfs_perag_t *pag; - xfs_sb_t *sbp = &mp->m_sb; - uint64_t ifree = 0; - uint64_t ialloc = 0; - uint64_t bfree = 0; - uint64_t bfreelst = 0; - uint64_t btree = 0; - uint64_t fdblocks; - int error = 0; - - for (index = 0; index < agcount; index++) { - /* - * read the agf, then the agi. This gets us - * all the information we need and populates the - * per-ag structures for us. - */ - error = xfs_alloc_pagf_init(mp, NULL, index, 0); - if (error) - return error; - - error = xfs_ialloc_pagi_init(mp, NULL, index); - if (error) - return error; - pag = xfs_perag_get(mp, index); - ifree += pag->pagi_freecount; - ialloc += pag->pagi_count; - bfree += pag->pagf_freeblks; - bfreelst += pag->pagf_flcount; - btree += pag->pagf_btreeblks; - xfs_perag_put(pag); - } - fdblocks = bfree + bfreelst + btree; - - /* - * If the new summary counts are obviously incorrect, fail the - * mount operation because that implies the AGFs are also corrupt. - * Clear FS_COUNTERS so that we don't unmount with a dirty log, which - * will prevent xfs_repair from fixing anything. - */ - if (fdblocks > sbp->sb_dblocks || ifree > ialloc) { - xfs_alert(mp, "AGF corruption. Please run xfs_repair."); - error = -EFSCORRUPTED; - goto out; - } - - /* Overwrite incore superblock counters with just-read data */ - spin_lock(&mp->m_sb_lock); - sbp->sb_ifree = ifree; - sbp->sb_icount = ialloc; - sbp->sb_fdblocks = fdblocks; - spin_unlock(&mp->m_sb_lock); - - xfs_reinit_percpu_counters(mp); -out: - xfs_fs_mark_healthy(mp, XFS_SICK_FS_COUNTERS); - return error; -} - -/* * xfs_log_sb() can be used to copy arbitrary changes to the in-core superblock * into the superblock buffer to be logged. It does not provide the higher * level of locking that is needed to protect the in-core superblock from @@ -989,17 +856,18 @@ int xfs_update_secondary_sbs( struct xfs_mount *mp) { - xfs_agnumber_t agno; + struct xfs_perag *pag; + xfs_agnumber_t agno = 1; int saved_error = 0; int error = 0; LIST_HEAD (buffer_list); /* update secondary superblocks. */ - for (agno = 1; agno < mp->m_sb.sb_agcount; agno++) { + for_each_perag_from(mp, agno, pag) { struct xfs_buf *bp; error = xfs_buf_get(mp->m_ddev_targp, - XFS_AG_DADDR(mp, agno, XFS_SB_DADDR), + XFS_AG_DADDR(mp, pag->pag_agno, XFS_SB_DADDR), XFS_FSS_TO_BB(mp, 1), &bp); /* * If we get an error reading or writing alternate superblocks, @@ -1011,7 +879,7 @@ xfs_update_secondary_sbs( if (error) { xfs_warn(mp, "error allocating secondary superblock for ag %d", - agno); + pag->pag_agno); if (!saved_error) saved_error = error; continue; @@ -1032,7 +900,7 @@ xfs_update_secondary_sbs( if (error) { xfs_warn(mp, "write error %d updating a secondary superblock near ag %d", - error, agno); + error, pag->pag_agno); if (!saved_error) saved_error = error; continue; diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index f79f9dc632b6..0c1602d9b53d 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -13,15 +13,6 @@ struct xfs_trans; struct xfs_fsop_geom; struct xfs_perag; -/* - * perag get/put wrappers for ref counting - */ -extern struct xfs_perag *xfs_perag_get(struct xfs_mount *, xfs_agnumber_t); -extern struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *, xfs_agnumber_t, - int tag); -extern void xfs_perag_put(struct xfs_perag *pag); -extern int xfs_initialize_perag_data(struct xfs_mount *, xfs_agnumber_t); - extern void xfs_log_sb(struct xfs_trans *tp); extern int xfs_sync_sb(struct xfs_mount *mp, bool wait); extern int xfs_sync_sb_buf(struct xfs_mount *mp); diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index 782fdd08f759..25c4cab58851 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -22,30 +22,26 @@ struct xfs_inode; * Buffer verifier operations are widely used, including userspace tools */ extern const struct xfs_buf_ops xfs_agf_buf_ops; -extern const struct xfs_buf_ops xfs_agi_buf_ops; -extern const struct xfs_buf_ops xfs_agf_buf_ops; extern const struct xfs_buf_ops xfs_agfl_buf_ops; -extern const struct xfs_buf_ops xfs_bnobt_buf_ops; -extern const struct xfs_buf_ops xfs_cntbt_buf_ops; -extern const struct xfs_buf_ops xfs_rmapbt_buf_ops; -extern const struct xfs_buf_ops xfs_refcountbt_buf_ops; +extern const struct xfs_buf_ops xfs_agi_buf_ops; extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops; extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops; extern const struct xfs_buf_ops xfs_bmbt_buf_ops; +extern const struct xfs_buf_ops xfs_bnobt_buf_ops; +extern const struct xfs_buf_ops xfs_cntbt_buf_ops; extern const struct xfs_buf_ops xfs_da3_node_buf_ops; extern const struct xfs_buf_ops xfs_dquot_buf_ops; -extern const struct xfs_buf_ops xfs_symlink_buf_ops; -extern const struct xfs_buf_ops xfs_agi_buf_ops; -extern const struct xfs_buf_ops xfs_inobt_buf_ops; +extern const struct xfs_buf_ops xfs_dquot_buf_ra_ops; extern const struct xfs_buf_ops xfs_finobt_buf_ops; +extern const struct xfs_buf_ops xfs_inobt_buf_ops; extern const struct xfs_buf_ops xfs_inode_buf_ops; extern const struct xfs_buf_ops xfs_inode_buf_ra_ops; -extern const struct xfs_buf_ops xfs_dquot_buf_ops; -extern const struct xfs_buf_ops xfs_dquot_buf_ra_ops; +extern const struct xfs_buf_ops xfs_refcountbt_buf_ops; +extern const struct xfs_buf_ops xfs_rmapbt_buf_ops; +extern const struct xfs_buf_ops xfs_rtbuf_ops; extern const struct xfs_buf_ops xfs_sb_buf_ops; extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops; extern const struct xfs_buf_ops xfs_symlink_buf_ops; -extern const struct xfs_buf_ops xfs_rtbuf_ops; /* log size calculation functions */ int xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes); diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c index 04801362e1a7..e8f4abee7892 100644 --- a/fs/xfs/libxfs/xfs_types.c +++ b/fs/xfs/libxfs/xfs_types.c @@ -11,6 +11,7 @@ #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_mount.h" +#include "xfs_ag.h" /* Find the size of the AG, in blocks. */ inline xfs_agblock_t @@ -222,12 +223,13 @@ xfs_icount_range( unsigned long long *max) { unsigned long long nr_inos = 0; + struct xfs_perag *pag; xfs_agnumber_t agno; /* root, rtbitmap, rtsum all live in the first chunk */ *min = XFS_INODES_PER_CHUNK; - for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { + for_each_perag(mp, agno, pag) { xfs_agino_t first, last; xfs_agino_range(mp, agno, &first, &last); diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 064bd6e8c922..0870ef6f933d 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -21,6 +21,7 @@ typedef int32_t xfs_suminfo_t; /* type of bitmap summary info */ typedef uint32_t xfs_rtword_t; /* word type for bitmap manipulations */ typedef int64_t xfs_lsn_t; /* log sequence number */ +typedef int64_t xfs_csn_t; /* CIL sequence number */ typedef uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ typedef uint32_t xfs_dahash_t; /* dir/attr hash value */ |