diff options
Diffstat (limited to 'fs/ext4/extents_status.c')
-rw-r--r-- | fs/ext4/extents_status.c | 207 |
1 files changed, 129 insertions, 78 deletions
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 595abb9e7d74..9b5b8951afb4 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -144,9 +144,11 @@ static struct kmem_cache *ext4_es_cachep; static struct kmem_cache *ext4_pending_cachep; -static int __es_insert_extent(struct inode *inode, struct extent_status *newes); +static int __es_insert_extent(struct inode *inode, struct extent_status *newes, + struct extent_status *prealloc); static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, - ext4_lblk_t end, int *reserved); + ext4_lblk_t end, int *reserved, + struct extent_status *prealloc); static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan); static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, struct ext4_inode_info *locked_ei); @@ -446,22 +448,36 @@ static void ext4_es_list_del(struct inode *inode) spin_unlock(&sbi->s_es_lock); } -static struct extent_status * -ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, - ext4_fsblk_t pblk) +/* + * Returns true if we cannot fail to allocate memory for this extent_status + * entry and cannot reclaim it until its status changes. + */ +static inline bool ext4_es_must_keep(struct extent_status *es) +{ + /* fiemap, bigalloc, and seek_data/hole need to use it. */ + if (ext4_es_is_delayed(es)) + return true; + + return false; +} + +static inline struct extent_status *__es_alloc_extent(bool nofail) +{ + if (!nofail) + return kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC); + + return kmem_cache_zalloc(ext4_es_cachep, GFP_KERNEL | __GFP_NOFAIL); +} + +static void ext4_es_init_extent(struct inode *inode, struct extent_status *es, + ext4_lblk_t lblk, ext4_lblk_t len, ext4_fsblk_t pblk) { - struct extent_status *es; - es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC); - if (es == NULL) - return NULL; es->es_lblk = lblk; es->es_len = len; es->es_pblk = pblk; - /* - * We don't count delayed extent because we never try to reclaim them - */ - if (!ext4_es_is_delayed(es)) { + /* We never try to reclaim a must kept extent, so we don't count it. */ + if (!ext4_es_must_keep(es)) { if (!EXT4_I(inode)->i_es_shk_nr++) ext4_es_list_add(inode); percpu_counter_inc(&EXT4_SB(inode->i_sb)-> @@ -470,8 +486,11 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, EXT4_I(inode)->i_es_all_nr++; percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); +} - return es; +static inline void __es_free_extent(struct extent_status *es) +{ + kmem_cache_free(ext4_es_cachep, es); } static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) @@ -479,8 +498,8 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) EXT4_I(inode)->i_es_all_nr--; percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); - /* Decrease the shrink counter when this es is not delayed */ - if (!ext4_es_is_delayed(es)) { + /* Decrease the shrink counter when we can reclaim the extent. */ + if (!ext4_es_must_keep(es)) { BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0); if (!--EXT4_I(inode)->i_es_shk_nr) ext4_es_list_del(inode); @@ -488,7 +507,7 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) s_es_stats.es_stats_shk_cnt); } - kmem_cache_free(ext4_es_cachep, es); + __es_free_extent(es); } /* @@ -749,7 +768,8 @@ static inline void ext4_es_insert_extent_check(struct inode *inode, } #endif -static int __es_insert_extent(struct inode *inode, struct extent_status *newes) +static int __es_insert_extent(struct inode *inode, struct extent_status *newes, + struct extent_status *prealloc) { struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree; struct rb_node **p = &tree->root.rb_node; @@ -789,10 +809,15 @@ static int __es_insert_extent(struct inode *inode, struct extent_status *newes) } } - es = ext4_es_alloc_extent(inode, newes->es_lblk, newes->es_len, - newes->es_pblk); + if (prealloc) + es = prealloc; + else + es = __es_alloc_extent(false); if (!es) return -ENOMEM; + ext4_es_init_extent(inode, es, newes->es_lblk, newes->es_len, + newes->es_pblk); + rb_link_node(&es->rb_node, parent, p); rb_insert_color(&es->rb_node, &tree->root); @@ -804,26 +829,27 @@ out: /* * ext4_es_insert_extent() adds information to an inode's extent * status tree. - * - * Return 0 on success, error code on failure. */ -int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, - ext4_lblk_t len, ext4_fsblk_t pblk, - unsigned int status) +void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, + ext4_lblk_t len, ext4_fsblk_t pblk, + unsigned int status) { struct extent_status newes; ext4_lblk_t end = lblk + len - 1; - int err = 0; + int err1 = 0; + int err2 = 0; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + struct extent_status *es1 = NULL; + struct extent_status *es2 = NULL; if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) - return 0; + return; es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n", lblk, len, pblk, status, inode->i_ino); if (!len) - return 0; + return; BUG_ON(end < lblk); @@ -842,29 +868,40 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, ext4_es_insert_extent_check(inode, &newes); +retry: + if (err1 && !es1) + es1 = __es_alloc_extent(true); + if ((err1 || err2) && !es2) + es2 = __es_alloc_extent(true); write_lock(&EXT4_I(inode)->i_es_lock); - err = __es_remove_extent(inode, lblk, end, NULL); - if (err != 0) + + err1 = __es_remove_extent(inode, lblk, end, NULL, es1); + if (err1 != 0) + goto error; + + err2 = __es_insert_extent(inode, &newes, es2); + if (err2 == -ENOMEM && !ext4_es_must_keep(&newes)) + err2 = 0; + if (err2 != 0) goto error; -retry: - err = __es_insert_extent(inode, &newes); - if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb), - 128, EXT4_I(inode))) - goto retry; - if (err == -ENOMEM && !ext4_es_is_delayed(&newes)) - err = 0; if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) && (status & EXTENT_STATUS_WRITTEN || status & EXTENT_STATUS_UNWRITTEN)) __revise_pending(inode, lblk, len); + /* es is pre-allocated but not used, free it. */ + if (es1 && !es1->es_len) + __es_free_extent(es1); + if (es2 && !es2->es_len) + __es_free_extent(es2); error: write_unlock(&EXT4_I(inode)->i_es_lock); + if (err1 || err2) + goto retry; ext4_es_print_tree(inode); - - return err; + return; } /* @@ -897,7 +934,7 @@ void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk, es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk); if (!es || es->es_lblk > end) - __es_insert_extent(inode, &newes); + __es_insert_extent(inode, &newes, NULL); write_unlock(&EXT4_I(inode)->i_es_lock); } @@ -1287,6 +1324,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end, * @lblk - first block in range * @end - last block in range * @reserved - number of cluster reservations released + * @prealloc - pre-allocated es to avoid memory allocation failures * * If @reserved is not NULL and delayed allocation is enabled, counts * block/cluster reservations freed by removing range and if bigalloc @@ -1294,7 +1332,8 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end, * error code on failure. */ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, - ext4_lblk_t end, int *reserved) + ext4_lblk_t end, int *reserved, + struct extent_status *prealloc) { struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree; struct rb_node *node; @@ -1302,14 +1341,12 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, struct extent_status orig_es; ext4_lblk_t len1, len2; ext4_fsblk_t block; - int err; + int err = 0; bool count_reserved = true; struct rsvd_count rc; if (reserved == NULL || !test_opt(inode->i_sb, DELALLOC)) count_reserved = false; -retry: - err = 0; es = __es_tree_search(&tree->root, lblk); if (!es) @@ -1343,14 +1380,13 @@ retry: orig_es.es_len - len2; ext4_es_store_pblock_status(&newes, block, ext4_es_status(&orig_es)); - err = __es_insert_extent(inode, &newes); + err = __es_insert_extent(inode, &newes, prealloc); if (err) { + if (!ext4_es_must_keep(&newes)) + return 0; + es->es_lblk = orig_es.es_lblk; es->es_len = orig_es.es_len; - if ((err == -ENOMEM) && - __es_shrink(EXT4_SB(inode->i_sb), - 128, EXT4_I(inode))) - goto retry; goto out; } } else { @@ -1422,39 +1458,48 @@ out: * @len - number of blocks to remove * * Reduces block/cluster reservation count and for bigalloc cancels pending - * reservations as needed. Returns 0 on success, error code on failure. + * reservations as needed. */ -int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, - ext4_lblk_t len) +void ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, + ext4_lblk_t len) { ext4_lblk_t end; int err = 0; int reserved = 0; + struct extent_status *es = NULL; if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) - return 0; + return; trace_ext4_es_remove_extent(inode, lblk, len); es_debug("remove [%u/%u) from extent status tree of inode %lu\n", lblk, len, inode->i_ino); if (!len) - return err; + return; end = lblk + len - 1; BUG_ON(end < lblk); +retry: + if (err && !es) + es = __es_alloc_extent(true); /* * ext4_clear_inode() depends on us taking i_es_lock unconditionally * so that we are sure __es_shrink() is done with the inode before it * is reclaimed. */ write_lock(&EXT4_I(inode)->i_es_lock); - err = __es_remove_extent(inode, lblk, end, &reserved); + err = __es_remove_extent(inode, lblk, end, &reserved, es); + if (es && !es->es_len) + __es_free_extent(es); write_unlock(&EXT4_I(inode)->i_es_lock); + if (err) + goto retry; + ext4_es_print_tree(inode); ext4_da_release_space(inode, reserved); - return err; + return; } static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, @@ -1702,11 +1747,8 @@ static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end, (*nr_to_scan)--; node = rb_next(&es->rb_node); - /* - * We can't reclaim delayed extent from status tree because - * fiemap, bigallic, and seek_data/hole need to use it. - */ - if (ext4_es_is_delayed(es)) + + if (ext4_es_must_keep(es)) goto next; if (ext4_es_is_referenced(es)) { ext4_es_clear_referenced(es); @@ -1770,7 +1812,7 @@ void ext4_clear_inode_es(struct inode *inode) while (node) { es = rb_entry(node, struct extent_status, rb_node); node = rb_next(node); - if (!ext4_es_is_delayed(es)) { + if (!ext4_es_must_keep(es)) { rb_erase(&es->rb_node, &tree->root); ext4_es_free_extent(inode, es); } @@ -1972,17 +2014,18 @@ bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk) * @lblk - logical block to be added * @allocated - indicates whether a physical cluster has been allocated for * the logical cluster that contains the block - * - * Returns 0 on success, negative error code on failure. */ -int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk, - bool allocated) +void ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk, + bool allocated) { struct extent_status newes; - int err = 0; + int err1 = 0; + int err2 = 0; + struct extent_status *es1 = NULL; + struct extent_status *es2 = NULL; if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) - return 0; + return; es_debug("add [%u/1) delayed to extent status tree of inode %lu\n", lblk, inode->i_ino); @@ -1994,29 +2037,37 @@ int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk, ext4_es_insert_extent_check(inode, &newes); +retry: + if (err1 && !es1) + es1 = __es_alloc_extent(true); + if ((err1 || err2) && !es2) + es2 = __es_alloc_extent(true); write_lock(&EXT4_I(inode)->i_es_lock); - err = __es_remove_extent(inode, lblk, lblk, NULL); - if (err != 0) + err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1); + if (err1 != 0) goto error; -retry: - err = __es_insert_extent(inode, &newes); - if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb), - 128, EXT4_I(inode))) - goto retry; - if (err != 0) + + err2 = __es_insert_extent(inode, &newes, es2); + if (err2 != 0) goto error; if (allocated) __insert_pending(inode, lblk); + /* es is pre-allocated but not used, free it. */ + if (es1 && !es1->es_len) + __es_free_extent(es1); + if (es2 && !es2->es_len) + __es_free_extent(es2); error: write_unlock(&EXT4_I(inode)->i_es_lock); + if (err1 || err2) + goto retry; ext4_es_print_tree(inode); ext4_print_pending_tree(inode); - - return err; + return; } /* |