diff options
Diffstat (limited to 'fs/btrfs/block-group.c')
-rw-r--r-- | fs/btrfs/block-group.c | 540 |
1 files changed, 296 insertions, 244 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 404e050ce8ee..613920c17ac1 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -7,7 +7,6 @@ #include "disk-io.h" #include "free-space-cache.h" #include "free-space-tree.h" -#include "disk-io.h" #include "volumes.h" #include "transaction.h" #include "ref-verify.h" @@ -66,11 +65,8 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_fs_info *fs_info, u64 flags) spin_lock(&fs_info->balance_lock); target = get_restripe_target(fs_info, flags); if (target) { - /* Pick target profile only if it's already available */ - if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) { - spin_unlock(&fs_info->balance_lock); - return extended_to_chunk(target); - } + spin_unlock(&fs_info->balance_lock); + return extended_to_chunk(target); } spin_unlock(&fs_info->balance_lock); @@ -119,12 +115,12 @@ u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags) void btrfs_get_block_group(struct btrfs_block_group *cache) { - atomic_inc(&cache->count); + refcount_inc(&cache->refs); } void btrfs_put_block_group(struct btrfs_block_group *cache) { - if (atomic_dec_and_test(&cache->count)) { + if (refcount_dec_and_test(&cache->refs)) { WARN_ON(cache->pinned > 0); WARN_ON(cache->reserved > 0); @@ -161,6 +157,8 @@ static int btrfs_add_block_group_cache(struct btrfs_fs_info *info, struct rb_node *parent = NULL; struct btrfs_block_group *cache; + ASSERT(block_group->length != 0); + spin_lock(&info->block_group_cache_lock); p = &info->block_group_cache_tree.rb_node; @@ -460,7 +458,7 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end int ret; while (start < end) { - ret = find_first_extent_bit(info->pinned_extents, start, + ret = find_first_extent_bit(&info->excluded_extents, start, &extent_start, &extent_end, EXTENT_DIRTY | EXTENT_UPTODATE, NULL); @@ -856,18 +854,41 @@ static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags) found_raid1c34 = true; up_read(&sinfo->groups_sem); } - if (found_raid56) + if (!found_raid56) btrfs_clear_fs_incompat(fs_info, RAID56); - if (found_raid1c34) + if (!found_raid1c34) btrfs_clear_fs_incompat(fs_info, RAID1C34); } } +static int remove_block_group_item(struct btrfs_trans_handle *trans, + struct btrfs_path *path, + struct btrfs_block_group *block_group) +{ + struct btrfs_fs_info *fs_info = trans->fs_info; + struct btrfs_root *root; + struct btrfs_key key; + int ret; + + root = fs_info->extent_root; + key.objectid = block_group->start; + key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + key.offset = block_group->length; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) + ret = -ENOENT; + if (ret < 0) + return ret; + + ret = btrfs_del_item(trans, root, path); + return ret; +} + int btrfs_remove_block_group(struct btrfs_trans_handle *trans, u64 group_start, struct extent_map *em) { struct btrfs_fs_info *fs_info = trans->fs_info; - struct btrfs_root *root = fs_info->extent_root; struct btrfs_path *path; struct btrfs_block_group *block_group; struct btrfs_free_cluster *cluster; @@ -992,6 +1013,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, &fs_info->block_group_cache_tree); RB_CLEAR_NODE(&block_group->cache_node); + /* Once for the block groups rbtree */ + btrfs_put_block_group(block_group); + if (fs_info->first_logical_byte == block_group->start) fs_info->first_logical_byte = (u64)-1; spin_unlock(&fs_info->block_group_cache_lock); @@ -1065,26 +1089,43 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, spin_unlock(&block_group->space_info->lock); - key.objectid = block_group->start; - key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; - key.offset = block_group->length; + /* + * Remove the free space for the block group from the free space tree + * and the block group's item from the extent tree before marking the + * block group as removed. This is to prevent races with tasks that + * freeze and unfreeze a block group, this task and another task + * allocating a new block group - the unfreeze task ends up removing + * the block group's extent map before the task calling this function + * deletes the block group item from the extent tree, allowing for + * another task to attempt to create another block group with the same + * item key (and failing with -EEXIST and a transaction abort). + */ + ret = remove_block_group_free_space(trans, block_group); + if (ret) + goto out; + + ret = remove_block_group_item(trans, path, block_group); + if (ret < 0) + goto out; - mutex_lock(&fs_info->chunk_mutex); spin_lock(&block_group->lock); block_group->removed = 1; /* - * At this point trimming can't start on this block group, because we - * removed the block group from the tree fs_info->block_group_cache_tree - * so no one can't find it anymore and even if someone already got this - * block group before we removed it from the rbtree, they have already - * incremented block_group->trimming - if they didn't, they won't find - * any free space entries because we already removed them all when we - * called btrfs_remove_free_space_cache(). + * At this point trimming or scrub can't start on this block group, + * because we removed the block group from the rbtree + * fs_info->block_group_cache_tree so no one can't find it anymore and + * even if someone already got this block group before we removed it + * from the rbtree, they have already incremented block_group->frozen - + * if they didn't, for the trimming case they won't find any free space + * entries because we already removed them all when we called + * btrfs_remove_free_space_cache(). * * And we must not remove the extent map from the fs_info->mapping_tree * to prevent the same logical address range and physical device space - * ranges from being reused for a new block group. This is because our - * fs trim operation (btrfs_trim_fs() / btrfs_ioctl_fitrim()) is + * ranges from being reused for a new block group. This is needed to + * avoid races with trimming and scrub. + * + * An fs trim operation (btrfs_trim_fs() / btrfs_ioctl_fitrim()) is * completely transactionless, so while it is trimming a range the * currently running transaction might finish and a new one start, * allowing for new block groups to be created that can reuse the same @@ -1095,28 +1136,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, * in place until the extents have been discarded completely when * the transaction commit has completed. */ - remove_em = (atomic_read(&block_group->trimming) == 0); + remove_em = (atomic_read(&block_group->frozen) == 0); spin_unlock(&block_group->lock); - mutex_unlock(&fs_info->chunk_mutex); - - ret = remove_block_group_free_space(trans, block_group); - if (ret) - goto out; - - btrfs_put_block_group(block_group); - btrfs_put_block_group(block_group); - - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret > 0) - ret = -EIO; - if (ret < 0) - goto out; - - ret = btrfs_del_item(trans, root, path); - if (ret) - goto out; - if (remove_em) { struct extent_map_tree *em_tree; @@ -1127,7 +1149,10 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, /* once for the tree */ free_extent_map(em); } + out: + /* Once for the lookup reference */ + btrfs_put_block_group(block_group); if (remove_rsv) btrfs_delayed_refs_rsv_release(fs_info, 1); btrfs_free_path(path); @@ -1171,7 +1196,7 @@ struct btrfs_trans_handle *btrfs_start_trans_remove_block_group( free_extent_map(em); return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root, - num_items, 1); + num_items); } /* @@ -1248,6 +1273,51 @@ out: return ret; } +static bool clean_pinned_extents(struct btrfs_trans_handle *trans, + struct btrfs_block_group *bg) +{ + struct btrfs_fs_info *fs_info = bg->fs_info; + struct btrfs_transaction *prev_trans = NULL; + const u64 start = bg->start; + const u64 end = start + bg->length - 1; + int ret; + + spin_lock(&fs_info->trans_lock); + if (trans->transaction->list.prev != &fs_info->trans_list) { + prev_trans = list_last_entry(&trans->transaction->list, + struct btrfs_transaction, list); + refcount_inc(&prev_trans->use_count); + } + spin_unlock(&fs_info->trans_lock); + + /* + * Hold the unused_bg_unpin_mutex lock to avoid racing with + * btrfs_finish_extent_commit(). If we are at transaction N, another + * task might be running finish_extent_commit() for the previous + * transaction N - 1, and have seen a range belonging to the block + * group in pinned_extents before we were able to clear the whole block + * group range from pinned_extents. This means that task can lookup for + * the block group after we unpinned it from pinned_extents and removed + * it, leading to a BUG_ON() at unpin_extent_range(). + */ + mutex_lock(&fs_info->unused_bg_unpin_mutex); + if (prev_trans) { + ret = clear_extent_bits(&prev_trans->pinned_extents, start, end, + EXTENT_DIRTY); + if (ret) + goto out; + } + + ret = clear_extent_bits(&trans->transaction->pinned_extents, start, end, + EXTENT_DIRTY); +out: + mutex_unlock(&fs_info->unused_bg_unpin_mutex); + if (prev_trans) + btrfs_put_transaction(prev_trans); + + return ret == 0; +} + /* * Process the unused_bgs list and remove any that don't have any allocated * space inside of them. @@ -1265,7 +1335,6 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) spin_lock(&fs_info->unused_bgs_lock); while (!list_empty(&fs_info->unused_bgs)) { - u64 start, end; int trimming; block_group = list_first_entry(&fs_info->unused_bgs, @@ -1344,35 +1413,10 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) * We could have pending pinned extents for this block group, * just delete them, we don't care about them anymore. */ - start = block_group->start; - end = start + block_group->length - 1; - /* - * Hold the unused_bg_unpin_mutex lock to avoid racing with - * btrfs_finish_extent_commit(). If we are at transaction N, - * another task might be running finish_extent_commit() for the - * previous transaction N - 1, and have seen a range belonging - * to the block group in freed_extents[] before we were able to - * clear the whole block group range from freed_extents[]. This - * means that task can lookup for the block group after we - * unpinned it from freed_extents[] and removed it, leading to - * a BUG_ON() at btrfs_unpin_extent_range(). - */ - mutex_lock(&fs_info->unused_bg_unpin_mutex); - ret = clear_extent_bits(&fs_info->freed_extents[0], start, end, - EXTENT_DIRTY); - if (ret) { - mutex_unlock(&fs_info->unused_bg_unpin_mutex); + if (!clean_pinned_extents(trans, block_group)) { btrfs_dec_block_group_ro(block_group); goto end_trans; } - ret = clear_extent_bits(&fs_info->freed_extents[1], start, end, - EXTENT_DIRTY); - if (ret) { - mutex_unlock(&fs_info->unused_bg_unpin_mutex); - btrfs_dec_block_group_ro(block_group); - goto end_trans; - } - mutex_unlock(&fs_info->unused_bg_unpin_mutex); /* * At this point, the block_group is read only and should fail @@ -1421,7 +1465,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) /* Implicit trim during transaction commit. */ if (trimming) - btrfs_get_block_group_trimming(block_group); + btrfs_freeze_block_group(block_group); /* * Btrfs_remove_chunk will abort the transaction if things go @@ -1431,7 +1475,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) if (ret) { if (trimming) - btrfs_put_block_group_trimming(block_group); + btrfs_unfreeze_block_group(block_group); goto end_trans; } @@ -1482,21 +1526,70 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg) spin_unlock(&fs_info->unused_bgs_lock); } +static int read_bg_from_eb(struct btrfs_fs_info *fs_info, struct btrfs_key *key, + struct btrfs_path *path) +{ + struct extent_map_tree *em_tree; + struct extent_map *em; + struct btrfs_block_group_item bg; + struct extent_buffer *leaf; + int slot; + u64 flags; + int ret = 0; + + slot = path->slots[0]; + leaf = path->nodes[0]; + + em_tree = &fs_info->mapping_tree; + read_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, key->objectid, key->offset); + read_unlock(&em_tree->lock); + if (!em) { + btrfs_err(fs_info, + "logical %llu len %llu found bg but no related chunk", + key->objectid, key->offset); + return -ENOENT; + } + + if (em->start != key->objectid || em->len != key->offset) { + btrfs_err(fs_info, + "block group %llu len %llu mismatch with chunk %llu len %llu", + key->objectid, key->offset, em->start, em->len); + ret = -EUCLEAN; + goto out_free_em; + } + + read_extent_buffer(leaf, &bg, btrfs_item_ptr_offset(leaf, slot), + sizeof(bg)); + flags = btrfs_stack_block_group_flags(&bg) & + BTRFS_BLOCK_GROUP_TYPE_MASK; + + if (flags != (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) { + btrfs_err(fs_info, +"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx", + key->objectid, key->offset, flags, + (BTRFS_BLOCK_GROUP_TYPE_MASK & em->map_lookup->type)); + ret = -EUCLEAN; + } + +out_free_em: + free_extent_map(em); + return ret; +} + static int find_first_block_group(struct btrfs_fs_info *fs_info, struct btrfs_path *path, struct btrfs_key *key) { struct btrfs_root *root = fs_info->extent_root; - int ret = 0; + int ret; struct btrfs_key found_key; struct extent_buffer *leaf; - struct btrfs_block_group_item bg; - u64 flags; int slot; ret = btrfs_search_slot(NULL, root, key, path, 0, 0); if (ret < 0) - goto out; + return ret; while (1) { slot = path->slots[0]; @@ -1513,49 +1606,10 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info, if (found_key.objectid >= key->objectid && found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { - struct extent_map_tree *em_tree; - struct extent_map *em; - - em_tree = &root->fs_info->mapping_tree; - read_lock(&em_tree->lock); - em = lookup_extent_mapping(em_tree, found_key.objectid, - found_key.offset); - read_unlock(&em_tree->lock); - if (!em) { - btrfs_err(fs_info, - "logical %llu len %llu found bg but no related chunk", - found_key.objectid, found_key.offset); - ret = -ENOENT; - } else if (em->start != found_key.objectid || - em->len != found_key.offset) { - btrfs_err(fs_info, - "block group %llu len %llu mismatch with chunk %llu len %llu", - found_key.objectid, found_key.offset, - em->start, em->len); - ret = -EUCLEAN; - } else { - read_extent_buffer(leaf, &bg, - btrfs_item_ptr_offset(leaf, slot), - sizeof(bg)); - flags = btrfs_stack_block_group_flags(&bg) & - BTRFS_BLOCK_GROUP_TYPE_MASK; - - if (flags != (em->map_lookup->type & - BTRFS_BLOCK_GROUP_TYPE_MASK)) { - btrfs_err(fs_info, -"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx", - found_key.objectid, - found_key.offset, flags, - (BTRFS_BLOCK_GROUP_TYPE_MASK & - em->map_lookup->type)); - ret = -EUCLEAN; - } else { - ret = 0; - } - } - free_extent_map(em); - goto out; + ret = read_bg_from_eb(fs_info, &found_key, path); + break; } + path->slots[0]++; } out: @@ -1607,19 +1661,12 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start, return -EIO; map = em->map_lookup; - data_stripe_length = em->len; + data_stripe_length = em->orig_block_len; io_stripe_size = map->stripe_len; - if (map->type & BTRFS_BLOCK_GROUP_RAID10) - data_stripe_length = div_u64(data_stripe_length, - map->num_stripes / map->sub_stripes); - else if (map->type & BTRFS_BLOCK_GROUP_RAID0) - data_stripe_length = div_u64(data_stripe_length, map->num_stripes); - else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { - data_stripe_length = div_u64(data_stripe_length, - nr_data_stripes(map)); + /* For RAID5/6 adjust to a full IO stripe length */ + if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) io_stripe_size = map->stripe_len * nr_data_stripes(map); - } buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS); if (!buf) { @@ -1698,25 +1745,12 @@ static int exclude_super_stripes(struct btrfs_block_group *cache) return ret; while (nr--) { - u64 start, len; - - if (logical[nr] > cache->start + cache->length) - continue; - - if (logical[nr] + stripe_len <= cache->start) - continue; - - start = logical[nr]; - if (start < cache->start) { - start = cache->start; - len = (logical[nr] + stripe_len) - start; - } else { - len = min_t(u64, stripe_len, - cache->start + cache->length - start); - } + u64 len = min_t(u64, stripe_len, + cache->start + cache->length - logical[nr]); cache->bytes_super += len; - ret = btrfs_add_excluded_extent(fs_info, start, len); + ret = btrfs_add_excluded_extent(fs_info, logical[nr], + len); if (ret) { kfree(logical); return ret; @@ -1745,7 +1779,7 @@ static void link_block_group(struct btrfs_block_group *cache) } static struct btrfs_block_group *btrfs_create_block_group_cache( - struct btrfs_fs_info *fs_info, u64 start, u64 size) + struct btrfs_fs_info *fs_info, u64 start) { struct btrfs_block_group *cache; @@ -1761,7 +1795,6 @@ static struct btrfs_block_group *btrfs_create_block_group_cache( } cache->start = start; - cache->length = size; cache->fs_info = fs_info; cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start); @@ -1769,7 +1802,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache( cache->discard_index = BTRFS_DISCARD_INDEX_UNUSED; - atomic_set(&cache->count, 1); + refcount_set(&cache->refs, 1); spin_lock_init(&cache->lock); init_rwsem(&cache->data_rwsem); INIT_LIST_HEAD(&cache->list); @@ -1780,7 +1813,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache( INIT_LIST_HEAD(&cache->dirty_list); INIT_LIST_HEAD(&cache->io_list); btrfs_init_free_space_ctl(cache); - atomic_set(&cache->trimming, 0); + atomic_set(&cache->frozen, 0); mutex_init(&cache->free_space_lock); btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root); @@ -1841,25 +1874,44 @@ static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info) return ret; } +static int read_block_group_item(struct btrfs_block_group *cache, + struct btrfs_path *path, + const struct btrfs_key *key) +{ + struct extent_buffer *leaf = path->nodes[0]; + struct btrfs_block_group_item bgi; + int slot = path->slots[0]; + + cache->length = key->offset; + + read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot), + sizeof(bgi)); + cache->used = btrfs_stack_block_group_used(&bgi); + cache->flags = btrfs_stack_block_group_flags(&bgi); + + return 0; +} + static int read_one_block_group(struct btrfs_fs_info *info, struct btrfs_path *path, const struct btrfs_key *key, int need_clear) { - struct extent_buffer *leaf = path->nodes[0]; struct btrfs_block_group *cache; struct btrfs_space_info *space_info; - struct btrfs_block_group_item bgi; const bool mixed = btrfs_fs_incompat(info, MIXED_GROUPS); - int slot = path->slots[0]; int ret; ASSERT(key->type == BTRFS_BLOCK_GROUP_ITEM_KEY); - cache = btrfs_create_block_group_cache(info, key->objectid, key->offset); + cache = btrfs_create_block_group_cache(info, key->objectid); if (!cache) return -ENOMEM; + ret = read_block_group_item(cache, path, key); + if (ret < 0) + goto error; + if (need_clear) { /* * When we mount with old space cache, we need to @@ -1874,10 +1926,6 @@ static int read_one_block_group(struct btrfs_fs_info *info, if (btrfs_test_opt(info, SPACE_CACHE)) cache->disk_cache_state = BTRFS_DC_CLEAR; } - read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot), - sizeof(bgi)); - cache->used = btrfs_stack_block_group_used(&bgi); - cache->flags = btrfs_stack_block_group_flags(&bgi); if (!mixed && ((cache->flags & BTRFS_BLOCK_GROUP_METADATA) && (cache->flags & BTRFS_BLOCK_GROUP_DATA))) { btrfs_err(info, @@ -1905,15 +1953,15 @@ static int read_one_block_group(struct btrfs_fs_info *info, * are empty, and we can just add all the space in and be done with it. * This saves us _a_lot_ of time, particularly in the full case. */ - if (key->offset == cache->used) { + if (cache->length == cache->used) { cache->last_byte_to_unpin = (u64)-1; cache->cached = BTRFS_CACHE_FINISHED; btrfs_free_excluded_extents(cache); } else if (cache->used == 0) { cache->last_byte_to_unpin = (u64)-1; cache->cached = BTRFS_CACHE_FINISHED; - add_new_free_space(cache, key->objectid, - key->objectid + key->offset); + add_new_free_space(cache, cache->start, + cache->start + cache->length); btrfs_free_excluded_extents(cache); } @@ -1923,7 +1971,7 @@ static int read_one_block_group(struct btrfs_fs_info *info, goto error; } trace_btrfs_add_block_group(info, cache, 0); - btrfs_update_space_info(info, cache->flags, key->offset, + btrfs_update_space_info(info, cache->flags, cache->length, cache->used, cache->bytes_super, &space_info); cache->space_info = space_info; @@ -1962,7 +2010,6 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) path = btrfs_alloc_path(); if (!path) return -ENOMEM; - path->reada = READA_FORWARD; cache_gen = btrfs_super_cache_generation(info->super_copy); if (btrfs_test_opt(info, SPACE_CACHE) && @@ -1987,6 +2034,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) btrfs_release_path(path); } + rcu_read_lock(); list_for_each_entry_rcu(space_info, &info->space_info, list) { if (!(btrfs_get_alloc_profile(info, space_info->flags) & (BTRFS_BLOCK_GROUP_RAID10 | @@ -2007,6 +2055,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) list) inc_block_group_ro(cache, 1); } + rcu_read_unlock(); btrfs_init_global_block_rsv(info); ret = check_chunk_block_group_mappings(info); @@ -2015,13 +2064,32 @@ error: return ret; } +static int insert_block_group_item(struct btrfs_trans_handle *trans, + struct btrfs_block_group *block_group) +{ + struct btrfs_fs_info *fs_info = trans->fs_info; + struct btrfs_block_group_item bgi; + struct btrfs_root *root; + struct btrfs_key key; + + spin_lock(&block_group->lock); + btrfs_set_stack_block_group_used(&bgi, block_group->used); + btrfs_set_stack_block_group_chunk_objectid(&bgi, + BTRFS_FIRST_CHUNK_TREE_OBJECTID); + btrfs_set_stack_block_group_flags(&bgi, block_group->flags); + key.objectid = block_group->start; + key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + key.offset = block_group->length; + spin_unlock(&block_group->lock); + + root = fs_info->extent_root; + return btrfs_insert_item(trans, root, &key, &bgi, sizeof(bgi)); +} + void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_block_group *block_group; - struct btrfs_root *extent_root = fs_info->extent_root; - struct btrfs_block_group_item item; - struct btrfs_key key; int ret = 0; if (!trans->can_flush_pending_bgs) @@ -2034,21 +2102,11 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans) if (ret) goto next; - spin_lock(&block_group->lock); - btrfs_set_stack_block_group_used(&item, block_group->used); - btrfs_set_stack_block_group_chunk_objectid(&item, - BTRFS_FIRST_CHUNK_TREE_OBJECTID); - btrfs_set_stack_block_group_flags(&item, block_group->flags); - key.objectid = block_group->start; - key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; - key.offset = block_group->length; - spin_unlock(&block_group->lock); - - ret = btrfs_insert_item(trans, extent_root, &key, &item, - sizeof(item)); + ret = insert_block_group_item(trans, block_group); if (ret) btrfs_abort_transaction(trans, ret); - ret = btrfs_finish_chunk_alloc(trans, key.objectid, key.offset); + ret = btrfs_finish_chunk_alloc(trans, block_group->start, + block_group->length); if (ret) btrfs_abort_transaction(trans, ret); add_block_group_free_space(trans, block_group); @@ -2069,10 +2127,11 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used, btrfs_set_log_full_commit(trans); - cache = btrfs_create_block_group_cache(fs_info, chunk_offset, size); + cache = btrfs_create_block_group_cache(fs_info, chunk_offset); if (!cache) return -ENOMEM; + cache->length = size; cache->used = bytes_used; cache->flags = type; cache->last_byte_to_unpin = (u64)-1; @@ -2132,54 +2191,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used, return 0; } -static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags) -{ - u64 num_devices; - u64 stripped; - - /* - * if restripe for this chunk_type is on pick target profile and - * return, otherwise do the usual balance - */ - stripped = get_restripe_target(fs_info, flags); - if (stripped) - return extended_to_chunk(stripped); - - num_devices = fs_info->fs_devices->rw_devices; - - stripped = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID56_MASK | - BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10; - - if (num_devices == 1) { - stripped |= BTRFS_BLOCK_GROUP_DUP; - stripped = flags & ~stripped; - - /* turn raid0 into single device chunks */ - if (flags & BTRFS_BLOCK_GROUP_RAID0) - return stripped; - - /* turn mirroring into duplication */ - if (flags & (BTRFS_BLOCK_GROUP_RAID1_MASK | - BTRFS_BLOCK_GROUP_RAID10)) - return stripped | BTRFS_BLOCK_GROUP_DUP; - } else { - /* they already had raid on here, just return */ - if (flags & stripped) - return flags; - - stripped |= BTRFS_BLOCK_GROUP_DUP; - stripped = flags & ~stripped; - - /* switch duplicated blocks with raid1 */ - if (flags & BTRFS_BLOCK_GROUP_DUP) - return stripped | BTRFS_BLOCK_GROUP_RAID1; - - /* this is drive concat, leave it alone */ - } - - return flags; -} - /* * Mark one block group RO, can be called several times for the same block * group. @@ -2225,7 +2236,7 @@ again: * If we are changing raid levels, try to allocate a * corresponding block group with the new raid level. */ - alloc_flags = update_block_group_flags(fs_info, cache->flags); + alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags); if (alloc_flags != cache->flags) { ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); @@ -2252,7 +2263,7 @@ again: ret = inc_block_group_ro(cache, 0); out: if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) { - alloc_flags = update_block_group_flags(fs_info, cache->flags); + alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags); mutex_lock(&fs_info->chunk_mutex); check_system_chunk(trans, alloc_flags); mutex_unlock(&fs_info->chunk_mutex); @@ -2283,13 +2294,13 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache) spin_unlock(&sinfo->lock); } -static int write_one_cache_group(struct btrfs_trans_handle *trans, - struct btrfs_path *path, - struct btrfs_block_group *cache) +static int update_block_group_item(struct btrfs_trans_handle *trans, + struct btrfs_path *path, + struct btrfs_block_group *cache) { struct btrfs_fs_info *fs_info = trans->fs_info; int ret; - struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_root *root = fs_info->extent_root; unsigned long bi; struct extent_buffer *leaf; struct btrfs_block_group_item bgi; @@ -2299,7 +2310,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; key.offset = cache->length; - ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1); + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); if (ret) { if (ret > 0) ret = -ENOENT; @@ -2345,7 +2356,7 @@ static int cache_save_setup(struct btrfs_block_group *block_group, return 0; } - if (trans->aborted) + if (TRANS_ABORTED(trans)) return 0; again: inode = lookup_free_space_inode(block_group, path); @@ -2446,7 +2457,8 @@ again: num_pages *= 16; num_pages *= PAGE_SIZE; - ret = btrfs_check_data_free_space(inode, &data_reserved, 0, num_pages); + ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved, 0, + num_pages); if (ret) goto out_put; @@ -2611,7 +2623,7 @@ again: } } if (!ret) { - ret = write_one_cache_group(trans, path, cache); + ret = update_block_group_item(trans, path, cache); /* * Our block group might still be attached to the list * of new block groups in the transaction handle of some @@ -2760,7 +2772,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans) } } if (!ret) { - ret = write_one_cache_group(trans, path, cache); + ret = update_block_group_item(trans, path, cache); /* * One of the free space endio workers might have * created a new block group while updating a free space @@ -2777,7 +2789,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans) if (ret == -ENOENT) { wait_event(cur_trans->writer_wait, atomic_read(&cur_trans->num_writers) == 1); - ret = write_one_cache_group(trans, path, cache); + ret = update_block_group_item(trans, path, cache); } if (ret) btrfs_abort_transaction(trans, ret); @@ -2881,7 +2893,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, &cache->space_info->total_bytes_pinned, num_bytes, BTRFS_TOTAL_BYTES_PINNED_BATCH); - set_extent_dirty(info->pinned_extents, + set_extent_dirty(&trans->transaction->pinned_extents, bytenr, bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); } @@ -3317,7 +3329,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) ASSERT(list_empty(&block_group->dirty_list)); ASSERT(list_empty(&block_group->io_list)); ASSERT(list_empty(&block_group->bg_list)); - ASSERT(atomic_read(&block_group->count) == 1); + ASSERT(refcount_read(&block_group->refs) == 1); btrfs_put_block_group(block_group); spin_lock(&info->block_group_cache_lock); @@ -3347,8 +3359,48 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) space_info->bytes_reserved > 0 || space_info->bytes_may_use > 0)) btrfs_dump_space_info(info, space_info, 0, 0); + WARN_ON(space_info->reclaim_size > 0); list_del(&space_info->list); btrfs_sysfs_remove_space_info(space_info); } return 0; } + +void btrfs_freeze_block_group(struct btrfs_block_group *cache) +{ + atomic_inc(&cache->frozen); +} + +void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group) +{ + struct btrfs_fs_info *fs_info = block_group->fs_info; + struct extent_map_tree *em_tree; + struct extent_map *em; + bool cleanup; + + spin_lock(&block_group->lock); + cleanup = (atomic_dec_and_test(&block_group->frozen) && + block_group->removed); + spin_unlock(&block_group->lock); + + if (cleanup) { + em_tree = &fs_info->mapping_tree; + write_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, block_group->start, + 1); + BUG_ON(!em); /* logic error, can't happen */ + remove_extent_mapping(em_tree, em); + write_unlock(&em_tree->lock); + + /* once for us and once for the tree */ + free_extent_map(em); + free_extent_map(em); + + /* + * We may have left one free space entry and other possible + * tasks trimming this block group have left 1 entry each one. + * Free them if any. + */ + __btrfs_remove_free_space_cache(block_group->free_space_ctl); + } +} |