summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/block-group.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/block-group.c')
-rw-r--r--fs/btrfs/block-group.c540
1 files changed, 296 insertions, 244 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 404e050ce8ee..613920c17ac1 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -7,7 +7,6 @@
#include "disk-io.h"
#include "free-space-cache.h"
#include "free-space-tree.h"
-#include "disk-io.h"
#include "volumes.h"
#include "transaction.h"
#include "ref-verify.h"
@@ -66,11 +65,8 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_fs_info *fs_info, u64 flags)
spin_lock(&fs_info->balance_lock);
target = get_restripe_target(fs_info, flags);
if (target) {
- /* Pick target profile only if it's already available */
- if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) {
- spin_unlock(&fs_info->balance_lock);
- return extended_to_chunk(target);
- }
+ spin_unlock(&fs_info->balance_lock);
+ return extended_to_chunk(target);
}
spin_unlock(&fs_info->balance_lock);
@@ -119,12 +115,12 @@ u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags)
void btrfs_get_block_group(struct btrfs_block_group *cache)
{
- atomic_inc(&cache->count);
+ refcount_inc(&cache->refs);
}
void btrfs_put_block_group(struct btrfs_block_group *cache)
{
- if (atomic_dec_and_test(&cache->count)) {
+ if (refcount_dec_and_test(&cache->refs)) {
WARN_ON(cache->pinned > 0);
WARN_ON(cache->reserved > 0);
@@ -161,6 +157,8 @@ static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
struct rb_node *parent = NULL;
struct btrfs_block_group *cache;
+ ASSERT(block_group->length != 0);
+
spin_lock(&info->block_group_cache_lock);
p = &info->block_group_cache_tree.rb_node;
@@ -460,7 +458,7 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end
int ret;
while (start < end) {
- ret = find_first_extent_bit(info->pinned_extents, start,
+ ret = find_first_extent_bit(&info->excluded_extents, start,
&extent_start, &extent_end,
EXTENT_DIRTY | EXTENT_UPTODATE,
NULL);
@@ -856,18 +854,41 @@ static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags)
found_raid1c34 = true;
up_read(&sinfo->groups_sem);
}
- if (found_raid56)
+ if (!found_raid56)
btrfs_clear_fs_incompat(fs_info, RAID56);
- if (found_raid1c34)
+ if (!found_raid1c34)
btrfs_clear_fs_incompat(fs_info, RAID1C34);
}
}
+static int remove_block_group_item(struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
+ struct btrfs_block_group *block_group)
+{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ struct btrfs_root *root;
+ struct btrfs_key key;
+ int ret;
+
+ root = fs_info->extent_root;
+ key.objectid = block_group->start;
+ key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+ key.offset = block_group->length;
+
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret > 0)
+ ret = -ENOENT;
+ if (ret < 0)
+ return ret;
+
+ ret = btrfs_del_item(trans, root, path);
+ return ret;
+}
+
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
u64 group_start, struct extent_map *em)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_root *root = fs_info->extent_root;
struct btrfs_path *path;
struct btrfs_block_group *block_group;
struct btrfs_free_cluster *cluster;
@@ -992,6 +1013,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
&fs_info->block_group_cache_tree);
RB_CLEAR_NODE(&block_group->cache_node);
+ /* Once for the block groups rbtree */
+ btrfs_put_block_group(block_group);
+
if (fs_info->first_logical_byte == block_group->start)
fs_info->first_logical_byte = (u64)-1;
spin_unlock(&fs_info->block_group_cache_lock);
@@ -1065,26 +1089,43 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&block_group->space_info->lock);
- key.objectid = block_group->start;
- key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
- key.offset = block_group->length;
+ /*
+ * Remove the free space for the block group from the free space tree
+ * and the block group's item from the extent tree before marking the
+ * block group as removed. This is to prevent races with tasks that
+ * freeze and unfreeze a block group, this task and another task
+ * allocating a new block group - the unfreeze task ends up removing
+ * the block group's extent map before the task calling this function
+ * deletes the block group item from the extent tree, allowing for
+ * another task to attempt to create another block group with the same
+ * item key (and failing with -EEXIST and a transaction abort).
+ */
+ ret = remove_block_group_free_space(trans, block_group);
+ if (ret)
+ goto out;
+
+ ret = remove_block_group_item(trans, path, block_group);
+ if (ret < 0)
+ goto out;
- mutex_lock(&fs_info->chunk_mutex);
spin_lock(&block_group->lock);
block_group->removed = 1;
/*
- * At this point trimming can't start on this block group, because we
- * removed the block group from the tree fs_info->block_group_cache_tree
- * so no one can't find it anymore and even if someone already got this
- * block group before we removed it from the rbtree, they have already
- * incremented block_group->trimming - if they didn't, they won't find
- * any free space entries because we already removed them all when we
- * called btrfs_remove_free_space_cache().
+ * At this point trimming or scrub can't start on this block group,
+ * because we removed the block group from the rbtree
+ * fs_info->block_group_cache_tree so no one can't find it anymore and
+ * even if someone already got this block group before we removed it
+ * from the rbtree, they have already incremented block_group->frozen -
+ * if they didn't, for the trimming case they won't find any free space
+ * entries because we already removed them all when we called
+ * btrfs_remove_free_space_cache().
*
* And we must not remove the extent map from the fs_info->mapping_tree
* to prevent the same logical address range and physical device space
- * ranges from being reused for a new block group. This is because our
- * fs trim operation (btrfs_trim_fs() / btrfs_ioctl_fitrim()) is
+ * ranges from being reused for a new block group. This is needed to
+ * avoid races with trimming and scrub.
+ *
+ * An fs trim operation (btrfs_trim_fs() / btrfs_ioctl_fitrim()) is
* completely transactionless, so while it is trimming a range the
* currently running transaction might finish and a new one start,
* allowing for new block groups to be created that can reuse the same
@@ -1095,28 +1136,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
* in place until the extents have been discarded completely when
* the transaction commit has completed.
*/
- remove_em = (atomic_read(&block_group->trimming) == 0);
+ remove_em = (atomic_read(&block_group->frozen) == 0);
spin_unlock(&block_group->lock);
- mutex_unlock(&fs_info->chunk_mutex);
-
- ret = remove_block_group_free_space(trans, block_group);
- if (ret)
- goto out;
-
- btrfs_put_block_group(block_group);
- btrfs_put_block_group(block_group);
-
- ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret > 0)
- ret = -EIO;
- if (ret < 0)
- goto out;
-
- ret = btrfs_del_item(trans, root, path);
- if (ret)
- goto out;
-
if (remove_em) {
struct extent_map_tree *em_tree;
@@ -1127,7 +1149,10 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
/* once for the tree */
free_extent_map(em);
}
+
out:
+ /* Once for the lookup reference */
+ btrfs_put_block_group(block_group);
if (remove_rsv)
btrfs_delayed_refs_rsv_release(fs_info, 1);
btrfs_free_path(path);
@@ -1171,7 +1196,7 @@ struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
free_extent_map(em);
return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root,
- num_items, 1);
+ num_items);
}
/*
@@ -1248,6 +1273,51 @@ out:
return ret;
}
+static bool clean_pinned_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *bg)
+{
+ struct btrfs_fs_info *fs_info = bg->fs_info;
+ struct btrfs_transaction *prev_trans = NULL;
+ const u64 start = bg->start;
+ const u64 end = start + bg->length - 1;
+ int ret;
+
+ spin_lock(&fs_info->trans_lock);
+ if (trans->transaction->list.prev != &fs_info->trans_list) {
+ prev_trans = list_last_entry(&trans->transaction->list,
+ struct btrfs_transaction, list);
+ refcount_inc(&prev_trans->use_count);
+ }
+ spin_unlock(&fs_info->trans_lock);
+
+ /*
+ * Hold the unused_bg_unpin_mutex lock to avoid racing with
+ * btrfs_finish_extent_commit(). If we are at transaction N, another
+ * task might be running finish_extent_commit() for the previous
+ * transaction N - 1, and have seen a range belonging to the block
+ * group in pinned_extents before we were able to clear the whole block
+ * group range from pinned_extents. This means that task can lookup for
+ * the block group after we unpinned it from pinned_extents and removed
+ * it, leading to a BUG_ON() at unpin_extent_range().
+ */
+ mutex_lock(&fs_info->unused_bg_unpin_mutex);
+ if (prev_trans) {
+ ret = clear_extent_bits(&prev_trans->pinned_extents, start, end,
+ EXTENT_DIRTY);
+ if (ret)
+ goto out;
+ }
+
+ ret = clear_extent_bits(&trans->transaction->pinned_extents, start, end,
+ EXTENT_DIRTY);
+out:
+ mutex_unlock(&fs_info->unused_bg_unpin_mutex);
+ if (prev_trans)
+ btrfs_put_transaction(prev_trans);
+
+ return ret == 0;
+}
+
/*
* Process the unused_bgs list and remove any that don't have any allocated
* space inside of them.
@@ -1265,7 +1335,6 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
spin_lock(&fs_info->unused_bgs_lock);
while (!list_empty(&fs_info->unused_bgs)) {
- u64 start, end;
int trimming;
block_group = list_first_entry(&fs_info->unused_bgs,
@@ -1344,35 +1413,10 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
* We could have pending pinned extents for this block group,
* just delete them, we don't care about them anymore.
*/
- start = block_group->start;
- end = start + block_group->length - 1;
- /*
- * Hold the unused_bg_unpin_mutex lock to avoid racing with
- * btrfs_finish_extent_commit(). If we are at transaction N,
- * another task might be running finish_extent_commit() for the
- * previous transaction N - 1, and have seen a range belonging
- * to the block group in freed_extents[] before we were able to
- * clear the whole block group range from freed_extents[]. This
- * means that task can lookup for the block group after we
- * unpinned it from freed_extents[] and removed it, leading to
- * a BUG_ON() at btrfs_unpin_extent_range().
- */
- mutex_lock(&fs_info->unused_bg_unpin_mutex);
- ret = clear_extent_bits(&fs_info->freed_extents[0], start, end,
- EXTENT_DIRTY);
- if (ret) {
- mutex_unlock(&fs_info->unused_bg_unpin_mutex);
+ if (!clean_pinned_extents(trans, block_group)) {
btrfs_dec_block_group_ro(block_group);
goto end_trans;
}
- ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
- EXTENT_DIRTY);
- if (ret) {
- mutex_unlock(&fs_info->unused_bg_unpin_mutex);
- btrfs_dec_block_group_ro(block_group);
- goto end_trans;
- }
- mutex_unlock(&fs_info->unused_bg_unpin_mutex);
/*
* At this point, the block_group is read only and should fail
@@ -1421,7 +1465,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
/* Implicit trim during transaction commit. */
if (trimming)
- btrfs_get_block_group_trimming(block_group);
+ btrfs_freeze_block_group(block_group);
/*
* Btrfs_remove_chunk will abort the transaction if things go
@@ -1431,7 +1475,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
if (ret) {
if (trimming)
- btrfs_put_block_group_trimming(block_group);
+ btrfs_unfreeze_block_group(block_group);
goto end_trans;
}
@@ -1482,21 +1526,70 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
spin_unlock(&fs_info->unused_bgs_lock);
}
+static int read_bg_from_eb(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
+ struct btrfs_path *path)
+{
+ struct extent_map_tree *em_tree;
+ struct extent_map *em;
+ struct btrfs_block_group_item bg;
+ struct extent_buffer *leaf;
+ int slot;
+ u64 flags;
+ int ret = 0;
+
+ slot = path->slots[0];
+ leaf = path->nodes[0];
+
+ em_tree = &fs_info->mapping_tree;
+ read_lock(&em_tree->lock);
+ em = lookup_extent_mapping(em_tree, key->objectid, key->offset);
+ read_unlock(&em_tree->lock);
+ if (!em) {
+ btrfs_err(fs_info,
+ "logical %llu len %llu found bg but no related chunk",
+ key->objectid, key->offset);
+ return -ENOENT;
+ }
+
+ if (em->start != key->objectid || em->len != key->offset) {
+ btrfs_err(fs_info,
+ "block group %llu len %llu mismatch with chunk %llu len %llu",
+ key->objectid, key->offset, em->start, em->len);
+ ret = -EUCLEAN;
+ goto out_free_em;
+ }
+
+ read_extent_buffer(leaf, &bg, btrfs_item_ptr_offset(leaf, slot),
+ sizeof(bg));
+ flags = btrfs_stack_block_group_flags(&bg) &
+ BTRFS_BLOCK_GROUP_TYPE_MASK;
+
+ if (flags != (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
+ btrfs_err(fs_info,
+"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
+ key->objectid, key->offset, flags,
+ (BTRFS_BLOCK_GROUP_TYPE_MASK & em->map_lookup->type));
+ ret = -EUCLEAN;
+ }
+
+out_free_em:
+ free_extent_map(em);
+ return ret;
+}
+
static int find_first_block_group(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
struct btrfs_key *key)
{
struct btrfs_root *root = fs_info->extent_root;
- int ret = 0;
+ int ret;
struct btrfs_key found_key;
struct extent_buffer *leaf;
- struct btrfs_block_group_item bg;
- u64 flags;
int slot;
ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
if (ret < 0)
- goto out;
+ return ret;
while (1) {
slot = path->slots[0];
@@ -1513,49 +1606,10 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info,
if (found_key.objectid >= key->objectid &&
found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
- struct extent_map_tree *em_tree;
- struct extent_map *em;
-
- em_tree = &root->fs_info->mapping_tree;
- read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, found_key.objectid,
- found_key.offset);
- read_unlock(&em_tree->lock);
- if (!em) {
- btrfs_err(fs_info,
- "logical %llu len %llu found bg but no related chunk",
- found_key.objectid, found_key.offset);
- ret = -ENOENT;
- } else if (em->start != found_key.objectid ||
- em->len != found_key.offset) {
- btrfs_err(fs_info,
- "block group %llu len %llu mismatch with chunk %llu len %llu",
- found_key.objectid, found_key.offset,
- em->start, em->len);
- ret = -EUCLEAN;
- } else {
- read_extent_buffer(leaf, &bg,
- btrfs_item_ptr_offset(leaf, slot),
- sizeof(bg));
- flags = btrfs_stack_block_group_flags(&bg) &
- BTRFS_BLOCK_GROUP_TYPE_MASK;
-
- if (flags != (em->map_lookup->type &
- BTRFS_BLOCK_GROUP_TYPE_MASK)) {
- btrfs_err(fs_info,
-"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
- found_key.objectid,
- found_key.offset, flags,
- (BTRFS_BLOCK_GROUP_TYPE_MASK &
- em->map_lookup->type));
- ret = -EUCLEAN;
- } else {
- ret = 0;
- }
- }
- free_extent_map(em);
- goto out;
+ ret = read_bg_from_eb(fs_info, &found_key, path);
+ break;
}
+
path->slots[0]++;
}
out:
@@ -1607,19 +1661,12 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
return -EIO;
map = em->map_lookup;
- data_stripe_length = em->len;
+ data_stripe_length = em->orig_block_len;
io_stripe_size = map->stripe_len;
- if (map->type & BTRFS_BLOCK_GROUP_RAID10)
- data_stripe_length = div_u64(data_stripe_length,
- map->num_stripes / map->sub_stripes);
- else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
- data_stripe_length = div_u64(data_stripe_length, map->num_stripes);
- else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
- data_stripe_length = div_u64(data_stripe_length,
- nr_data_stripes(map));
+ /* For RAID5/6 adjust to a full IO stripe length */
+ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
io_stripe_size = map->stripe_len * nr_data_stripes(map);
- }
buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS);
if (!buf) {
@@ -1698,25 +1745,12 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
return ret;
while (nr--) {
- u64 start, len;
-
- if (logical[nr] > cache->start + cache->length)
- continue;
-
- if (logical[nr] + stripe_len <= cache->start)
- continue;
-
- start = logical[nr];
- if (start < cache->start) {
- start = cache->start;
- len = (logical[nr] + stripe_len) - start;
- } else {
- len = min_t(u64, stripe_len,
- cache->start + cache->length - start);
- }
+ u64 len = min_t(u64, stripe_len,
+ cache->start + cache->length - logical[nr]);
cache->bytes_super += len;
- ret = btrfs_add_excluded_extent(fs_info, start, len);
+ ret = btrfs_add_excluded_extent(fs_info, logical[nr],
+ len);
if (ret) {
kfree(logical);
return ret;
@@ -1745,7 +1779,7 @@ static void link_block_group(struct btrfs_block_group *cache)
}
static struct btrfs_block_group *btrfs_create_block_group_cache(
- struct btrfs_fs_info *fs_info, u64 start, u64 size)
+ struct btrfs_fs_info *fs_info, u64 start)
{
struct btrfs_block_group *cache;
@@ -1761,7 +1795,6 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
}
cache->start = start;
- cache->length = size;
cache->fs_info = fs_info;
cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start);
@@ -1769,7 +1802,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
cache->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
- atomic_set(&cache->count, 1);
+ refcount_set(&cache->refs, 1);
spin_lock_init(&cache->lock);
init_rwsem(&cache->data_rwsem);
INIT_LIST_HEAD(&cache->list);
@@ -1780,7 +1813,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
INIT_LIST_HEAD(&cache->dirty_list);
INIT_LIST_HEAD(&cache->io_list);
btrfs_init_free_space_ctl(cache);
- atomic_set(&cache->trimming, 0);
+ atomic_set(&cache->frozen, 0);
mutex_init(&cache->free_space_lock);
btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root);
@@ -1841,25 +1874,44 @@ static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
return ret;
}
+static int read_block_group_item(struct btrfs_block_group *cache,
+ struct btrfs_path *path,
+ const struct btrfs_key *key)
+{
+ struct extent_buffer *leaf = path->nodes[0];
+ struct btrfs_block_group_item bgi;
+ int slot = path->slots[0];
+
+ cache->length = key->offset;
+
+ read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
+ sizeof(bgi));
+ cache->used = btrfs_stack_block_group_used(&bgi);
+ cache->flags = btrfs_stack_block_group_flags(&bgi);
+
+ return 0;
+}
+
static int read_one_block_group(struct btrfs_fs_info *info,
struct btrfs_path *path,
const struct btrfs_key *key,
int need_clear)
{
- struct extent_buffer *leaf = path->nodes[0];
struct btrfs_block_group *cache;
struct btrfs_space_info *space_info;
- struct btrfs_block_group_item bgi;
const bool mixed = btrfs_fs_incompat(info, MIXED_GROUPS);
- int slot = path->slots[0];
int ret;
ASSERT(key->type == BTRFS_BLOCK_GROUP_ITEM_KEY);
- cache = btrfs_create_block_group_cache(info, key->objectid, key->offset);
+ cache = btrfs_create_block_group_cache(info, key->objectid);
if (!cache)
return -ENOMEM;
+ ret = read_block_group_item(cache, path, key);
+ if (ret < 0)
+ goto error;
+
if (need_clear) {
/*
* When we mount with old space cache, we need to
@@ -1874,10 +1926,6 @@ static int read_one_block_group(struct btrfs_fs_info *info,
if (btrfs_test_opt(info, SPACE_CACHE))
cache->disk_cache_state = BTRFS_DC_CLEAR;
}
- read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
- sizeof(bgi));
- cache->used = btrfs_stack_block_group_used(&bgi);
- cache->flags = btrfs_stack_block_group_flags(&bgi);
if (!mixed && ((cache->flags & BTRFS_BLOCK_GROUP_METADATA) &&
(cache->flags & BTRFS_BLOCK_GROUP_DATA))) {
btrfs_err(info,
@@ -1905,15 +1953,15 @@ static int read_one_block_group(struct btrfs_fs_info *info,
* are empty, and we can just add all the space in and be done with it.
* This saves us _a_lot_ of time, particularly in the full case.
*/
- if (key->offset == cache->used) {
+ if (cache->length == cache->used) {
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
btrfs_free_excluded_extents(cache);
} else if (cache->used == 0) {
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
- add_new_free_space(cache, key->objectid,
- key->objectid + key->offset);
+ add_new_free_space(cache, cache->start,
+ cache->start + cache->length);
btrfs_free_excluded_extents(cache);
}
@@ -1923,7 +1971,7 @@ static int read_one_block_group(struct btrfs_fs_info *info,
goto error;
}
trace_btrfs_add_block_group(info, cache, 0);
- btrfs_update_space_info(info, cache->flags, key->offset,
+ btrfs_update_space_info(info, cache->flags, cache->length,
cache->used, cache->bytes_super, &space_info);
cache->space_info = space_info;
@@ -1962,7 +2010,6 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- path->reada = READA_FORWARD;
cache_gen = btrfs_super_cache_generation(info->super_copy);
if (btrfs_test_opt(info, SPACE_CACHE) &&
@@ -1987,6 +2034,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
btrfs_release_path(path);
}
+ rcu_read_lock();
list_for_each_entry_rcu(space_info, &info->space_info, list) {
if (!(btrfs_get_alloc_profile(info, space_info->flags) &
(BTRFS_BLOCK_GROUP_RAID10 |
@@ -2007,6 +2055,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
list)
inc_block_group_ro(cache, 1);
}
+ rcu_read_unlock();
btrfs_init_global_block_rsv(info);
ret = check_chunk_block_group_mappings(info);
@@ -2015,13 +2064,32 @@ error:
return ret;
}
+static int insert_block_group_item(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group)
+{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ struct btrfs_block_group_item bgi;
+ struct btrfs_root *root;
+ struct btrfs_key key;
+
+ spin_lock(&block_group->lock);
+ btrfs_set_stack_block_group_used(&bgi, block_group->used);
+ btrfs_set_stack_block_group_chunk_objectid(&bgi,
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+ btrfs_set_stack_block_group_flags(&bgi, block_group->flags);
+ key.objectid = block_group->start;
+ key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+ key.offset = block_group->length;
+ spin_unlock(&block_group->lock);
+
+ root = fs_info->extent_root;
+ return btrfs_insert_item(trans, root, &key, &bgi, sizeof(bgi));
+}
+
void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_block_group *block_group;
- struct btrfs_root *extent_root = fs_info->extent_root;
- struct btrfs_block_group_item item;
- struct btrfs_key key;
int ret = 0;
if (!trans->can_flush_pending_bgs)
@@ -2034,21 +2102,11 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
if (ret)
goto next;
- spin_lock(&block_group->lock);
- btrfs_set_stack_block_group_used(&item, block_group->used);
- btrfs_set_stack_block_group_chunk_objectid(&item,
- BTRFS_FIRST_CHUNK_TREE_OBJECTID);
- btrfs_set_stack_block_group_flags(&item, block_group->flags);
- key.objectid = block_group->start;
- key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
- key.offset = block_group->length;
- spin_unlock(&block_group->lock);
-
- ret = btrfs_insert_item(trans, extent_root, &key, &item,
- sizeof(item));
+ ret = insert_block_group_item(trans, block_group);
if (ret)
btrfs_abort_transaction(trans, ret);
- ret = btrfs_finish_chunk_alloc(trans, key.objectid, key.offset);
+ ret = btrfs_finish_chunk_alloc(trans, block_group->start,
+ block_group->length);
if (ret)
btrfs_abort_transaction(trans, ret);
add_block_group_free_space(trans, block_group);
@@ -2069,10 +2127,11 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
btrfs_set_log_full_commit(trans);
- cache = btrfs_create_block_group_cache(fs_info, chunk_offset, size);
+ cache = btrfs_create_block_group_cache(fs_info, chunk_offset);
if (!cache)
return -ENOMEM;
+ cache->length = size;
cache->used = bytes_used;
cache->flags = type;
cache->last_byte_to_unpin = (u64)-1;
@@ -2132,54 +2191,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
return 0;
}
-static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags)
-{
- u64 num_devices;
- u64 stripped;
-
- /*
- * if restripe for this chunk_type is on pick target profile and
- * return, otherwise do the usual balance
- */
- stripped = get_restripe_target(fs_info, flags);
- if (stripped)
- return extended_to_chunk(stripped);
-
- num_devices = fs_info->fs_devices->rw_devices;
-
- stripped = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID56_MASK |
- BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10;
-
- if (num_devices == 1) {
- stripped |= BTRFS_BLOCK_GROUP_DUP;
- stripped = flags & ~stripped;
-
- /* turn raid0 into single device chunks */
- if (flags & BTRFS_BLOCK_GROUP_RAID0)
- return stripped;
-
- /* turn mirroring into duplication */
- if (flags & (BTRFS_BLOCK_GROUP_RAID1_MASK |
- BTRFS_BLOCK_GROUP_RAID10))
- return stripped | BTRFS_BLOCK_GROUP_DUP;
- } else {
- /* they already had raid on here, just return */
- if (flags & stripped)
- return flags;
-
- stripped |= BTRFS_BLOCK_GROUP_DUP;
- stripped = flags & ~stripped;
-
- /* switch duplicated blocks with raid1 */
- if (flags & BTRFS_BLOCK_GROUP_DUP)
- return stripped | BTRFS_BLOCK_GROUP_RAID1;
-
- /* this is drive concat, leave it alone */
- }
-
- return flags;
-}
-
/*
* Mark one block group RO, can be called several times for the same block
* group.
@@ -2225,7 +2236,7 @@ again:
* If we are changing raid levels, try to allocate a
* corresponding block group with the new raid level.
*/
- alloc_flags = update_block_group_flags(fs_info, cache->flags);
+ alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
if (alloc_flags != cache->flags) {
ret = btrfs_chunk_alloc(trans, alloc_flags,
CHUNK_ALLOC_FORCE);
@@ -2252,7 +2263,7 @@ again:
ret = inc_block_group_ro(cache, 0);
out:
if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
- alloc_flags = update_block_group_flags(fs_info, cache->flags);
+ alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
mutex_lock(&fs_info->chunk_mutex);
check_system_chunk(trans, alloc_flags);
mutex_unlock(&fs_info->chunk_mutex);
@@ -2283,13 +2294,13 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache)
spin_unlock(&sinfo->lock);
}
-static int write_one_cache_group(struct btrfs_trans_handle *trans,
- struct btrfs_path *path,
- struct btrfs_block_group *cache)
+static int update_block_group_item(struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
+ struct btrfs_block_group *cache)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
int ret;
- struct btrfs_root *extent_root = fs_info->extent_root;
+ struct btrfs_root *root = fs_info->extent_root;
unsigned long bi;
struct extent_buffer *leaf;
struct btrfs_block_group_item bgi;
@@ -2299,7 +2310,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
key.offset = cache->length;
- ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1);
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret) {
if (ret > 0)
ret = -ENOENT;
@@ -2345,7 +2356,7 @@ static int cache_save_setup(struct btrfs_block_group *block_group,
return 0;
}
- if (trans->aborted)
+ if (TRANS_ABORTED(trans))
return 0;
again:
inode = lookup_free_space_inode(block_group, path);
@@ -2446,7 +2457,8 @@ again:
num_pages *= 16;
num_pages *= PAGE_SIZE;
- ret = btrfs_check_data_free_space(inode, &data_reserved, 0, num_pages);
+ ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved, 0,
+ num_pages);
if (ret)
goto out_put;
@@ -2611,7 +2623,7 @@ again:
}
}
if (!ret) {
- ret = write_one_cache_group(trans, path, cache);
+ ret = update_block_group_item(trans, path, cache);
/*
* Our block group might still be attached to the list
* of new block groups in the transaction handle of some
@@ -2760,7 +2772,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
}
}
if (!ret) {
- ret = write_one_cache_group(trans, path, cache);
+ ret = update_block_group_item(trans, path, cache);
/*
* One of the free space endio workers might have
* created a new block group while updating a free space
@@ -2777,7 +2789,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
if (ret == -ENOENT) {
wait_event(cur_trans->writer_wait,
atomic_read(&cur_trans->num_writers) == 1);
- ret = write_one_cache_group(trans, path, cache);
+ ret = update_block_group_item(trans, path, cache);
}
if (ret)
btrfs_abort_transaction(trans, ret);
@@ -2881,7 +2893,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
&cache->space_info->total_bytes_pinned,
num_bytes,
BTRFS_TOTAL_BYTES_PINNED_BATCH);
- set_extent_dirty(info->pinned_extents,
+ set_extent_dirty(&trans->transaction->pinned_extents,
bytenr, bytenr + num_bytes - 1,
GFP_NOFS | __GFP_NOFAIL);
}
@@ -3317,7 +3329,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
ASSERT(list_empty(&block_group->dirty_list));
ASSERT(list_empty(&block_group->io_list));
ASSERT(list_empty(&block_group->bg_list));
- ASSERT(atomic_read(&block_group->count) == 1);
+ ASSERT(refcount_read(&block_group->refs) == 1);
btrfs_put_block_group(block_group);
spin_lock(&info->block_group_cache_lock);
@@ -3347,8 +3359,48 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
space_info->bytes_reserved > 0 ||
space_info->bytes_may_use > 0))
btrfs_dump_space_info(info, space_info, 0, 0);
+ WARN_ON(space_info->reclaim_size > 0);
list_del(&space_info->list);
btrfs_sysfs_remove_space_info(space_info);
}
return 0;
}
+
+void btrfs_freeze_block_group(struct btrfs_block_group *cache)
+{
+ atomic_inc(&cache->frozen);
+}
+
+void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group)
+{
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
+ struct extent_map_tree *em_tree;
+ struct extent_map *em;
+ bool cleanup;
+
+ spin_lock(&block_group->lock);
+ cleanup = (atomic_dec_and_test(&block_group->frozen) &&
+ block_group->removed);
+ spin_unlock(&block_group->lock);
+
+ if (cleanup) {
+ em_tree = &fs_info->mapping_tree;
+ write_lock(&em_tree->lock);
+ em = lookup_extent_mapping(em_tree, block_group->start,
+ 1);
+ BUG_ON(!em); /* logic error, can't happen */
+ remove_extent_mapping(em_tree, em);
+ write_unlock(&em_tree->lock);
+
+ /* once for us and once for the tree */
+ free_extent_map(em);
+ free_extent_map(em);
+
+ /*
+ * We may have left one free space entry and other possible
+ * tasks trimming this block group have left 1 entry each one.
+ * Free them if any.
+ */
+ __btrfs_remove_free_space_cache(block_group->free_space_ctl);
+ }
+}