diff options
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r-- | fs/ext4/mballoc.c | 133 |
1 files changed, 131 insertions, 2 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 6dc2c6c535ef..c350ae9cb0d9 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -922,7 +922,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp) bh[i] = NULL; continue; } - bh[i] = ext4_read_block_bitmap_nowait(sb, group); + bh[i] = ext4_read_block_bitmap_nowait(sb, group, false); if (IS_ERR(bh[i])) { err = PTR_ERR(bh[i]); bh[i] = NULL; @@ -2209,12 +2209,93 @@ out: return ret; } +/* + * Start prefetching @nr block bitmaps starting at @group. + * Return the next group which needs to be prefetched. + */ +static ext4_group_t +ext4_mb_prefetch(struct super_block *sb, ext4_group_t group, + unsigned int nr, int *cnt) +{ + ext4_group_t ngroups = ext4_get_groups_count(sb); + struct buffer_head *bh; + struct blk_plug plug; + + blk_start_plug(&plug); + while (nr-- > 0) { + struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, + NULL); + struct ext4_group_info *grp = ext4_get_group_info(sb, group); + + /* + * Prefetch block groups with free blocks; but don't + * bother if it is marked uninitialized on disk, since + * it won't require I/O to read. Also only try to + * prefetch once, so we avoid getblk() call, which can + * be expensive. + */ + if (!EXT4_MB_GRP_TEST_AND_SET_READ(grp) && + EXT4_MB_GRP_NEED_INIT(grp) && + ext4_free_group_clusters(sb, gdp) > 0 && + !(ext4_has_group_desc_csum(sb) && + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) { + bh = ext4_read_block_bitmap_nowait(sb, group, true); + if (bh && !IS_ERR(bh)) { + if (!buffer_uptodate(bh) && cnt) + (*cnt)++; + brelse(bh); + } + } + if (++group >= ngroups) + group = 0; + } + blk_finish_plug(&plug); + return group; +} + +/* + * Prefetching reads the block bitmap into the buffer cache; but we + * need to make sure that the buddy bitmap in the page cache has been + * initialized. Note that ext4_mb_init_group() will block if the I/O + * is not yet completed, or indeed if it was not initiated by + * ext4_mb_prefetch did not start the I/O. + * + * TODO: We should actually kick off the buddy bitmap setup in a work + * queue when the buffer I/O is completed, so that we don't block + * waiting for the block allocation bitmap read to finish when + * ext4_mb_prefetch_fini is called from ext4_mb_regular_allocator(). + */ +static void +ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group, + unsigned int nr) +{ + while (nr-- > 0) { + struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, + NULL); + struct ext4_group_info *grp = ext4_get_group_info(sb, group); + + if (!group) + group = ext4_get_groups_count(sb); + group--; + grp = ext4_get_group_info(sb, group); + + if (EXT4_MB_GRP_NEED_INIT(grp) && + ext4_free_group_clusters(sb, gdp) > 0 && + !(ext4_has_group_desc_csum(sb) && + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) { + if (ext4_mb_init_group(sb, group, GFP_NOFS)) + break; + } + } +} + static noinline_for_stack int ext4_mb_regular_allocator(struct ext4_allocation_context *ac) { - ext4_group_t ngroups, group, i; + ext4_group_t prefetch_grp = 0, ngroups, group, i; int cr = -1; int err = 0, first_err = 0; + unsigned int nr = 0, prefetch_ios = 0; struct ext4_sb_info *sbi; struct super_block *sb; struct ext4_buddy e4b; @@ -2282,6 +2363,7 @@ repeat: * from the goal value specified */ group = ac->ac_g_ex.fe_group; + prefetch_grp = group; for (i = 0; i < ngroups; group++, i++) { int ret = 0; @@ -2293,6 +2375,29 @@ repeat: if (group >= ngroups) group = 0; + /* + * Batch reads of the block allocation bitmaps + * to get multiple READs in flight; limit + * prefetching at cr=0/1, otherwise mballoc can + * spend a lot of time loading imperfect groups + */ + if ((prefetch_grp == group) && + (cr > 1 || + prefetch_ios < sbi->s_mb_prefetch_limit)) { + unsigned int curr_ios = prefetch_ios; + + nr = sbi->s_mb_prefetch; + if (ext4_has_feature_flex_bg(sb)) { + nr = (group / sbi->s_mb_prefetch) * + sbi->s_mb_prefetch; + nr = nr + sbi->s_mb_prefetch - group; + } + prefetch_grp = ext4_mb_prefetch(sb, group, + nr, &prefetch_ios); + if (prefetch_ios == curr_ios) + nr = 0; + } + /* This now checks without needing the buddy page */ ret = ext4_mb_good_group_nolock(ac, group, cr); if (ret <= 0) { @@ -2367,6 +2472,10 @@ out: mb_debug(sb, "Best len %d, origin len %d, ac_status %u, ac_flags 0x%x, cr %d ret %d\n", ac->ac_b_ex.fe_len, ac->ac_o_ex.fe_len, ac->ac_status, ac->ac_flags, cr, err); + + if (nr) + ext4_mb_prefetch_fini(sb, prefetch_grp, nr); + return err; } @@ -2613,6 +2722,26 @@ static int ext4_mb_init_backend(struct super_block *sb) goto err_freebuddy; } + if (ext4_has_feature_flex_bg(sb)) { + /* a single flex group is supposed to be read by a single IO */ + sbi->s_mb_prefetch = 1 << sbi->s_es->s_log_groups_per_flex; + sbi->s_mb_prefetch *= 8; /* 8 prefetch IOs in flight at most */ + } else { + sbi->s_mb_prefetch = 32; + } + if (sbi->s_mb_prefetch > ext4_get_groups_count(sb)) + sbi->s_mb_prefetch = ext4_get_groups_count(sb); + /* now many real IOs to prefetch within a single allocation at cr=0 + * given cr=0 is an CPU-related optimization we shouldn't try to + * load too many groups, at some point we should start to use what + * we've got in memory. + * with an average random access time 5ms, it'd take a second to get + * 200 groups (* N with flex_bg), so let's make this limit 4 + */ + sbi->s_mb_prefetch_limit = sbi->s_mb_prefetch * 4; + if (sbi->s_mb_prefetch_limit > ext4_get_groups_count(sb)) + sbi->s_mb_prefetch_limit = ext4_get_groups_count(sb); + return 0; err_freebuddy: |