diff options
author | David Chinner <dgc@sgi.com> | 2007-08-28 06:00:13 +0200 |
---|---|---|
committer | Tim Shimmin <tes@chook.melbourne.sgi.com> | 2007-10-15 08:50:50 +0200 |
commit | da353b0d64e070ae7c5342a0d56ec20ae9ef5cfb (patch) | |
tree | 84454023d649df67cc6b125c73746ddb341ac34e /fs/xfs/xfs_mount.c | |
parent | [XFS] kill move.[ch] (diff) | |
download | linux-da353b0d64e070ae7c5342a0d56ec20ae9ef5cfb.tar.xz linux-da353b0d64e070ae7c5342a0d56ec20ae9ef5cfb.zip |
[XFS] Radix tree based inode caching
One of the perpetual scaling problems XFS has is indexing it's incore
inodes. We currently uses hashes and the default hash sizes chosen can
only ever be a tradeoff between memory consumption and the maximum
realistic size of the cache.
As a result, anyone who has millions of inodes cached on a filesystem
needs to tunes the size of the cache via the ihashsize mount option to
allow decent scalability with inode cache operations.
A further problem is the separate inode cluster hash, whose size is based
on the ihashsize but is smaller, and so under certain conditions (sparse
cluster cache population) this can become a limitation long before the
inode hash is causing issues.
The following patchset removes the inode hash and cluster hash and
replaces them with radix trees to avoid the scalability limitations of the
hashes. It also reduces the size of the inodes by 3 pointers....
SGI-PV: 969561
SGI-Modid: xfs-linux-melb:xfs-kern:29481a
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_mount.c')
-rw-r--r-- | fs/xfs/xfs_mount.c | 27 |
1 files changed, 13 insertions, 14 deletions
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index f4daf1ec9931..71f25947251d 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -160,11 +160,6 @@ xfs_mount_free( xfs_mount_t *mp, int remove_bhv) { - if (mp->m_ihash) - xfs_ihash_free(mp); - if (mp->m_chash) - xfs_chash_free(mp); - if (mp->m_perag) { int agno; @@ -342,6 +337,17 @@ xfs_mount_validate_sb( return 0; } +STATIC void +xfs_initialize_perag_icache( + xfs_perag_t *pag) +{ + if (!pag->pag_ici_init) { + rwlock_init(&pag->pag_ici_lock); + INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); + pag->pag_ici_init = 1; + } +} + xfs_agnumber_t xfs_initialize_perag( bhv_vfs_t *vfs, @@ -396,12 +402,14 @@ xfs_initialize_perag( pag->pagi_inodeok = 1; if (index < max_metadata) pag->pagf_metadata = 1; + xfs_initialize_perag_icache(pag); } } else { /* Setup default behavior for smaller filesystems */ for (index = 0; index < agcount; index++) { pag = &mp->m_perag[index]; pag->pagi_inodeok = 1; + xfs_initialize_perag_icache(pag); } } return index; @@ -1033,13 +1041,6 @@ xfs_mountfs( xfs_trans_init(mp); /* - * Allocate and initialize the inode hash table for this - * file system. - */ - xfs_ihash_init(mp); - xfs_chash_init(mp); - - /* * Allocate and initialize the per-ag data. */ init_rwsem(&mp->m_peraglock); @@ -1190,8 +1191,6 @@ xfs_mountfs( error3: xfs_log_unmount_dealloc(mp); error2: - xfs_ihash_free(mp); - xfs_chash_free(mp); for (agno = 0; agno < sbp->sb_agcount; agno++) if (mp->m_perag[agno].pagb_list) kmem_free(mp->m_perag[agno].pagb_list, |