diff options
Diffstat (limited to 'fs/btrfs/block-group.c')
-rw-r--r-- | fs/btrfs/block-group.c | 255 |
1 files changed, 100 insertions, 155 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 176e8a292fd1..613920c17ac1 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -65,11 +65,8 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_fs_info *fs_info, u64 flags) spin_lock(&fs_info->balance_lock); target = get_restripe_target(fs_info, flags); if (target) { - /* Pick target profile only if it's already available */ - if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) { - spin_unlock(&fs_info->balance_lock); - return extended_to_chunk(target); - } + spin_unlock(&fs_info->balance_lock); + return extended_to_chunk(target); } spin_unlock(&fs_info->balance_lock); @@ -118,12 +115,12 @@ u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags) void btrfs_get_block_group(struct btrfs_block_group *cache) { - atomic_inc(&cache->count); + refcount_inc(&cache->refs); } void btrfs_put_block_group(struct btrfs_block_group *cache) { - if (atomic_dec_and_test(&cache->count)) { + if (refcount_dec_and_test(&cache->refs)) { WARN_ON(cache->pinned > 0); WARN_ON(cache->reserved > 0); @@ -940,7 +937,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; - goto out_put_group; + goto out; } /* @@ -978,7 +975,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ret = btrfs_orphan_add(trans, BTRFS_I(inode)); if (ret) { btrfs_add_delayed_iput(inode); - goto out_put_group; + goto out; } clear_nlink(inode); /* One for the block groups ref */ @@ -1001,13 +998,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); if (ret < 0) - goto out_put_group; + goto out; if (ret > 0) btrfs_release_path(path); if (ret == 0) { ret = btrfs_del_item(trans, tree_root, path); if (ret) - goto out_put_group; + goto out; btrfs_release_path(path); } @@ -1016,6 +1013,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, &fs_info->block_group_cache_tree); RB_CLEAR_NODE(&block_group->cache_node); + /* Once for the block groups rbtree */ + btrfs_put_block_group(block_group); + if (fs_info->first_logical_byte == block_group->start) fs_info->first_logical_byte = (u64)-1; spin_unlock(&fs_info->block_group_cache_lock); @@ -1089,7 +1089,25 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, spin_unlock(&block_group->space_info->lock); - mutex_lock(&fs_info->chunk_mutex); + /* + * Remove the free space for the block group from the free space tree + * and the block group's item from the extent tree before marking the + * block group as removed. This is to prevent races with tasks that + * freeze and unfreeze a block group, this task and another task + * allocating a new block group - the unfreeze task ends up removing + * the block group's extent map before the task calling this function + * deletes the block group item from the extent tree, allowing for + * another task to attempt to create another block group with the same + * item key (and failing with -EEXIST and a transaction abort). + */ + ret = remove_block_group_free_space(trans, block_group); + if (ret) + goto out; + + ret = remove_block_group_item(trans, path, block_group); + if (ret < 0) + goto out; + spin_lock(&block_group->lock); block_group->removed = 1; /* @@ -1121,19 +1139,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, remove_em = (atomic_read(&block_group->frozen) == 0); spin_unlock(&block_group->lock); - mutex_unlock(&fs_info->chunk_mutex); - - ret = remove_block_group_free_space(trans, block_group); - if (ret) - goto out_put_group; - - /* Once for the block groups rbtree */ - btrfs_put_block_group(block_group); - - ret = remove_block_group_item(trans, path, block_group); - if (ret < 0) - goto out; - if (remove_em) { struct extent_map_tree *em_tree; @@ -1145,10 +1150,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, free_extent_map(em); } -out_put_group: +out: /* Once for the lookup reference */ btrfs_put_block_group(block_group); -out: if (remove_rsv) btrfs_delayed_refs_rsv_release(fs_info, 1); btrfs_free_path(path); @@ -1522,21 +1526,70 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg) spin_unlock(&fs_info->unused_bgs_lock); } +static int read_bg_from_eb(struct btrfs_fs_info *fs_info, struct btrfs_key *key, + struct btrfs_path *path) +{ + struct extent_map_tree *em_tree; + struct extent_map *em; + struct btrfs_block_group_item bg; + struct extent_buffer *leaf; + int slot; + u64 flags; + int ret = 0; + + slot = path->slots[0]; + leaf = path->nodes[0]; + + em_tree = &fs_info->mapping_tree; + read_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, key->objectid, key->offset); + read_unlock(&em_tree->lock); + if (!em) { + btrfs_err(fs_info, + "logical %llu len %llu found bg but no related chunk", + key->objectid, key->offset); + return -ENOENT; + } + + if (em->start != key->objectid || em->len != key->offset) { + btrfs_err(fs_info, + "block group %llu len %llu mismatch with chunk %llu len %llu", + key->objectid, key->offset, em->start, em->len); + ret = -EUCLEAN; + goto out_free_em; + } + + read_extent_buffer(leaf, &bg, btrfs_item_ptr_offset(leaf, slot), + sizeof(bg)); + flags = btrfs_stack_block_group_flags(&bg) & + BTRFS_BLOCK_GROUP_TYPE_MASK; + + if (flags != (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) { + btrfs_err(fs_info, +"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx", + key->objectid, key->offset, flags, + (BTRFS_BLOCK_GROUP_TYPE_MASK & em->map_lookup->type)); + ret = -EUCLEAN; + } + +out_free_em: + free_extent_map(em); + return ret; +} + static int find_first_block_group(struct btrfs_fs_info *fs_info, struct btrfs_path *path, struct btrfs_key *key) { struct btrfs_root *root = fs_info->extent_root; - int ret = 0; + int ret; struct btrfs_key found_key; struct extent_buffer *leaf; - struct btrfs_block_group_item bg; - u64 flags; int slot; ret = btrfs_search_slot(NULL, root, key, path, 0, 0); if (ret < 0) - goto out; + return ret; while (1) { slot = path->slots[0]; @@ -1553,49 +1606,10 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info, if (found_key.objectid >= key->objectid && found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { - struct extent_map_tree *em_tree; - struct extent_map *em; - - em_tree = &root->fs_info->mapping_tree; - read_lock(&em_tree->lock); - em = lookup_extent_mapping(em_tree, found_key.objectid, - found_key.offset); - read_unlock(&em_tree->lock); - if (!em) { - btrfs_err(fs_info, - "logical %llu len %llu found bg but no related chunk", - found_key.objectid, found_key.offset); - ret = -ENOENT; - } else if (em->start != found_key.objectid || - em->len != found_key.offset) { - btrfs_err(fs_info, - "block group %llu len %llu mismatch with chunk %llu len %llu", - found_key.objectid, found_key.offset, - em->start, em->len); - ret = -EUCLEAN; - } else { - read_extent_buffer(leaf, &bg, - btrfs_item_ptr_offset(leaf, slot), - sizeof(bg)); - flags = btrfs_stack_block_group_flags(&bg) & - BTRFS_BLOCK_GROUP_TYPE_MASK; - - if (flags != (em->map_lookup->type & - BTRFS_BLOCK_GROUP_TYPE_MASK)) { - btrfs_err(fs_info, -"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx", - found_key.objectid, - found_key.offset, flags, - (BTRFS_BLOCK_GROUP_TYPE_MASK & - em->map_lookup->type)); - ret = -EUCLEAN; - } else { - ret = 0; - } - } - free_extent_map(em); - goto out; + ret = read_bg_from_eb(fs_info, &found_key, path); + break; } + path->slots[0]++; } out: @@ -1647,19 +1661,12 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start, return -EIO; map = em->map_lookup; - data_stripe_length = em->len; + data_stripe_length = em->orig_block_len; io_stripe_size = map->stripe_len; - if (map->type & BTRFS_BLOCK_GROUP_RAID10) - data_stripe_length = div_u64(data_stripe_length, - map->num_stripes / map->sub_stripes); - else if (map->type & BTRFS_BLOCK_GROUP_RAID0) - data_stripe_length = div_u64(data_stripe_length, map->num_stripes); - else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { - data_stripe_length = div_u64(data_stripe_length, - nr_data_stripes(map)); + /* For RAID5/6 adjust to a full IO stripe length */ + if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) io_stripe_size = map->stripe_len * nr_data_stripes(map); - } buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS); if (!buf) { @@ -1738,25 +1745,12 @@ static int exclude_super_stripes(struct btrfs_block_group *cache) return ret; while (nr--) { - u64 start, len; - - if (logical[nr] > cache->start + cache->length) - continue; - - if (logical[nr] + stripe_len <= cache->start) - continue; - - start = logical[nr]; - if (start < cache->start) { - start = cache->start; - len = (logical[nr] + stripe_len) - start; - } else { - len = min_t(u64, stripe_len, - cache->start + cache->length - start); - } + u64 len = min_t(u64, stripe_len, + cache->start + cache->length - logical[nr]); cache->bytes_super += len; - ret = btrfs_add_excluded_extent(fs_info, start, len); + ret = btrfs_add_excluded_extent(fs_info, logical[nr], + len); if (ret) { kfree(logical); return ret; @@ -1808,7 +1802,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache( cache->discard_index = BTRFS_DISCARD_INDEX_UNUSED; - atomic_set(&cache->count, 1); + refcount_set(&cache->refs, 1); spin_lock_init(&cache->lock); init_rwsem(&cache->data_rwsem); INIT_LIST_HEAD(&cache->list); @@ -2197,54 +2191,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used, return 0; } -static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags) -{ - u64 num_devices; - u64 stripped; - - /* - * if restripe for this chunk_type is on pick target profile and - * return, otherwise do the usual balance - */ - stripped = get_restripe_target(fs_info, flags); - if (stripped) - return extended_to_chunk(stripped); - - num_devices = fs_info->fs_devices->rw_devices; - - stripped = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID56_MASK | - BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10; - - if (num_devices == 1) { - stripped |= BTRFS_BLOCK_GROUP_DUP; - stripped = flags & ~stripped; - - /* turn raid0 into single device chunks */ - if (flags & BTRFS_BLOCK_GROUP_RAID0) - return stripped; - - /* turn mirroring into duplication */ - if (flags & (BTRFS_BLOCK_GROUP_RAID1_MASK | - BTRFS_BLOCK_GROUP_RAID10)) - return stripped | BTRFS_BLOCK_GROUP_DUP; - } else { - /* they already had raid on here, just return */ - if (flags & stripped) - return flags; - - stripped |= BTRFS_BLOCK_GROUP_DUP; - stripped = flags & ~stripped; - - /* switch duplicated blocks with raid1 */ - if (flags & BTRFS_BLOCK_GROUP_DUP) - return stripped | BTRFS_BLOCK_GROUP_RAID1; - - /* this is drive concat, leave it alone */ - } - - return flags; -} - /* * Mark one block group RO, can be called several times for the same block * group. @@ -2290,7 +2236,7 @@ again: * If we are changing raid levels, try to allocate a * corresponding block group with the new raid level. */ - alloc_flags = update_block_group_flags(fs_info, cache->flags); + alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags); if (alloc_flags != cache->flags) { ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); @@ -2317,7 +2263,7 @@ again: ret = inc_block_group_ro(cache, 0); out: if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) { - alloc_flags = update_block_group_flags(fs_info, cache->flags); + alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags); mutex_lock(&fs_info->chunk_mutex); check_system_chunk(trans, alloc_flags); mutex_unlock(&fs_info->chunk_mutex); @@ -2511,7 +2457,8 @@ again: num_pages *= 16; num_pages *= PAGE_SIZE; - ret = btrfs_check_data_free_space(inode, &data_reserved, 0, num_pages); + ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved, 0, + num_pages); if (ret) goto out_put; @@ -3382,7 +3329,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) ASSERT(list_empty(&block_group->dirty_list)); ASSERT(list_empty(&block_group->io_list)); ASSERT(list_empty(&block_group->bg_list)); - ASSERT(atomic_read(&block_group->count) == 1); + ASSERT(refcount_read(&block_group->refs) == 1); btrfs_put_block_group(block_group); spin_lock(&info->block_group_cache_lock); @@ -3437,7 +3384,6 @@ void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group) spin_unlock(&block_group->lock); if (cleanup) { - mutex_lock(&fs_info->chunk_mutex); em_tree = &fs_info->mapping_tree; write_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, block_group->start, @@ -3445,7 +3391,6 @@ void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group) BUG_ON(!em); /* logic error, can't happen */ remove_extent_mapping(em_tree, em); write_unlock(&em_tree->lock); - mutex_unlock(&fs_info->chunk_mutex); /* once for us and once for the tree */ free_extent_map(em); |