From 9f920f116426806bfa34c1422742e1bf7b7a2b4b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 13 Mar 2012 08:52:35 +0000 Subject: xfs: use per-filesystem radix trees for dquot lookup Replace the global hash tables for looking up in-memory dquot structures with per-filesystem radix trees to allow scaling to a large number of in-memory dquot structures. Reviewed-by: Dave Chinner Signed-off-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_dquot.c | 188 ++++++++++++++--------------------------------------- 1 file changed, 48 insertions(+), 140 deletions(-) (limited to 'fs/xfs/xfs_dquot.c') diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index fec1a3d78e9f..49456e555cfa 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -43,7 +43,7 @@ * Lock order: * * ip->i_lock - * qh->qh_lock + * qi->qi_tree_lock * qi->qi_dqlist_lock * dquot->q_qlock (xfs_dqlock() and friends) * dquot->q_flush (xfs_dqflock() and friends) @@ -601,60 +601,6 @@ error0: return error; } -/* - * Lookup a dquot in the incore dquot hashtable. We keep two separate - * hashtables for user and group dquots; and, these are global tables - * inside the XQM, not per-filesystem tables. - * The hash chain must be locked by caller, and it is left locked - * on return. Returning dquot is locked. - */ -STATIC int -xfs_qm_dqlookup( - xfs_mount_t *mp, - xfs_dqid_t id, - xfs_dqhash_t *qh, - xfs_dquot_t **O_dqpp) -{ - xfs_dquot_t *dqp; - - ASSERT(mutex_is_locked(&qh->qh_lock)); - - /* - * Traverse the hashchain looking for a match - */ - list_for_each_entry(dqp, &qh->qh_list, q_hashlist) { - /* - * We already have the hashlock. We don't need the - * dqlock to look at the id field of the dquot, since the - * id can't be modified without the hashlock anyway. - */ - if (be32_to_cpu(dqp->q_core.d_id) != id || dqp->q_mount != mp) - continue; - - trace_xfs_dqlookup_found(dqp); - - xfs_dqlock(dqp); - if (dqp->dq_flags & XFS_DQ_FREEING) { - *O_dqpp = NULL; - xfs_dqunlock(dqp); - return -1; - } - - dqp->q_nrefs++; - - /* - * move the dquot to the front of the hashchain - */ - list_move(&dqp->q_hashlist, &qh->qh_list); - trace_xfs_dqlookup_done(dqp); - *O_dqpp = dqp; - return 0; - } - - *O_dqpp = NULL; - return 1; -} - /* * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a * a locked dquot, doing an allocation (if requested) as needed. @@ -672,10 +618,10 @@ xfs_qm_dqget( uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */ xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */ { - xfs_dquot_t *dqp, *dqp1; - xfs_dqhash_t *h; - uint version; - int error; + struct xfs_quotainfo *qi = mp->m_quotainfo; + struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type); + struct xfs_dquot *dqp; + int error; ASSERT(XFS_IS_QUOTA_RUNNING(mp)); if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) || @@ -683,7 +629,6 @@ xfs_qm_dqget( (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) { return (ESRCH); } - h = XFS_DQ_HASH(mp, id, type); #ifdef DEBUG if (xfs_do_dqerror) { @@ -704,34 +649,28 @@ xfs_qm_dqget( #endif restart: - mutex_lock(&h->qh_lock); + mutex_lock(&qi->qi_tree_lock); + dqp = radix_tree_lookup(tree, id); + if (dqp) { + xfs_dqlock(dqp); + if (dqp->dq_flags & XFS_DQ_FREEING) { + xfs_dqunlock(dqp); + mutex_unlock(&qi->qi_tree_lock); + trace_xfs_dqget_freeing(dqp); + delay(1); + goto restart; + } - /* - * Look in the cache (hashtable). - * The chain is kept locked during lookup. - */ - switch (xfs_qm_dqlookup(mp, id, h, O_dqpp)) { - case -1: - XFS_STATS_INC(xs_qm_dquot_dups); - mutex_unlock(&h->qh_lock); - delay(1); - goto restart; - case 0: + dqp->q_nrefs++; + mutex_unlock(&qi->qi_tree_lock); + + trace_xfs_dqget_hit(dqp); XFS_STATS_INC(xs_qm_dqcachehits); - /* - * The dquot was found, moved to the front of the chain, - * taken off the freelist if it was on it, and locked - * at this point. Just unlock the hashchain and return. - */ - ASSERT(*O_dqpp); - ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp)); - mutex_unlock(&h->qh_lock); - trace_xfs_dqget_hit(*O_dqpp); - return 0; /* success */ - default: - XFS_STATS_INC(xs_qm_dqcachemisses); - break; + *O_dqpp = dqp; + return 0; } + mutex_unlock(&qi->qi_tree_lock); + XFS_STATS_INC(xs_qm_dqcachemisses); /* * Dquot cache miss. We don't want to keep the inode lock across @@ -742,12 +681,6 @@ restart: */ if (ip) xfs_iunlock(ip, XFS_ILOCK_EXCL); - /* - * Save the hashchain version stamp, and unlock the chain, so that - * we don't keep the lock across a disk read - */ - version = h->qh_version; - mutex_unlock(&h->qh_lock); error = xfs_qm_dqread(mp, id, type, flags, &dqp); @@ -757,15 +690,14 @@ restart: if (error) return error; - /* - * Dquot lock comes after hashlock in the lock ordering - */ if (ip) { /* * A dquot could be attached to this inode by now, since * we had dropped the ilock. */ if (xfs_this_quota_on(mp, type)) { + struct xfs_dquot *dqp1; + dqp1 = xfs_inode_dquot(ip, type); if (dqp1) { xfs_qm_dqdestroy(dqp); @@ -780,51 +712,27 @@ restart: } } - /* - * Hashlock comes after ilock in lock order - */ - mutex_lock(&h->qh_lock); - if (version != h->qh_version) { - xfs_dquot_t *tmpdqp; + mutex_lock(&qi->qi_tree_lock); + error = -radix_tree_insert(tree, id, dqp); + if (unlikely(error)) { + WARN_ON(error != EEXIST); + /* - * Now, see if somebody else put the dquot in the - * hashtable before us. This can happen because we didn't - * keep the hashchain lock. We don't have to worry about - * lock order between the two dquots here since dqp isn't - * on any findable lists yet. + * Duplicate found. Just throw away the new dquot and start + * over. */ - switch (xfs_qm_dqlookup(mp, id, h, &tmpdqp)) { - case 0: - case -1: - /* - * Duplicate found, either in cache or on its way out. - * Just throw away the new dquot and start over. - */ - if (tmpdqp) - xfs_qm_dqput(tmpdqp); - mutex_unlock(&h->qh_lock); - xfs_qm_dqdestroy(dqp); - XFS_STATS_INC(xs_qm_dquot_dups); - goto restart; - default: - break; - } + mutex_unlock(&qi->qi_tree_lock); + trace_xfs_dqget_dup(dqp); + xfs_qm_dqdestroy(dqp); + XFS_STATS_INC(xs_qm_dquot_dups); + goto restart; } - /* - * Put the dquot at the beginning of the hash-chain and mp's list - * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock .. - */ - ASSERT(mutex_is_locked(&h->qh_lock)); - dqp->q_hash = h; - list_add(&dqp->q_hashlist, &h->qh_list); - h->qh_version++; - /* * Attach this dquot to this filesystem's list of all dquots, * kept inside the mount structure in m_quotainfo field */ - mutex_lock(&mp->m_quotainfo->qi_dqlist_lock); + mutex_lock(&qi->qi_dqlist_lock); /* * We return a locked dquot to the caller, with a reference taken @@ -832,10 +740,11 @@ restart: xfs_dqlock(dqp); dqp->q_nrefs = 1; - list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist); - mp->m_quotainfo->qi_dquots++; - mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); - mutex_unlock(&h->qh_lock); + list_add(&dqp->q_mplist, &qi->qi_dqlist); + qi->qi_dquots++; + mutex_unlock(&qi->qi_dqlist_lock); + mutex_unlock(&qi->qi_tree_lock); + dqret: ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); trace_xfs_dqget_miss(dqp); @@ -1117,7 +1026,6 @@ xfs_qm_dqpurge( struct xfs_dquot *dqp) { struct xfs_mount *mp = dqp->q_mount; - struct xfs_dqhash *qh = dqp->q_hash; struct xfs_quotainfo *qi = mp->m_quotainfo; xfs_dqlock(dqp); @@ -1164,10 +1072,10 @@ xfs_qm_dqpurge( xfs_dqfunlock(dqp); xfs_dqunlock(dqp); - mutex_lock(&qh->qh_lock); - list_del_init(&dqp->q_hashlist); - qh->qh_version++; - mutex_unlock(&qh->qh_lock); + mutex_lock(&qi->qi_tree_lock); + radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags), + be32_to_cpu(dqp->q_core.d_id)); + mutex_unlock(&qi->qi_tree_lock); mutex_lock(&qi->qi_dqlist_lock); list_del_init(&dqp->q_mplist); -- cgit v1.2.3