summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/block-group.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/block-group.c')
-rw-r--r--fs/btrfs/block-group.c250
1 files changed, 167 insertions, 83 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 786849fcc319..176e8a292fd1 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -7,7 +7,6 @@
#include "disk-io.h"
#include "free-space-cache.h"
#include "free-space-tree.h"
-#include "disk-io.h"
#include "volumes.h"
#include "transaction.h"
#include "ref-verify.h"
@@ -161,6 +160,8 @@ static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
struct rb_node *parent = NULL;
struct btrfs_block_group *cache;
+ ASSERT(block_group->length != 0);
+
spin_lock(&info->block_group_cache_lock);
p = &info->block_group_cache_tree.rb_node;
@@ -863,11 +864,34 @@ static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags)
}
}
+static int remove_block_group_item(struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
+ struct btrfs_block_group *block_group)
+{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ struct btrfs_root *root;
+ struct btrfs_key key;
+ int ret;
+
+ root = fs_info->extent_root;
+ key.objectid = block_group->start;
+ key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+ key.offset = block_group->length;
+
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret > 0)
+ ret = -ENOENT;
+ if (ret < 0)
+ return ret;
+
+ ret = btrfs_del_item(trans, root, path);
+ return ret;
+}
+
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
u64 group_start, struct extent_map *em)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_root *root = fs_info->extent_root;
struct btrfs_path *path;
struct btrfs_block_group *block_group;
struct btrfs_free_cluster *cluster;
@@ -916,7 +940,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
- goto out;
+ goto out_put_group;
}
/*
@@ -954,7 +978,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
ret = btrfs_orphan_add(trans, BTRFS_I(inode));
if (ret) {
btrfs_add_delayed_iput(inode);
- goto out;
+ goto out_put_group;
}
clear_nlink(inode);
/* One for the block groups ref */
@@ -977,13 +1001,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
if (ret < 0)
- goto out;
+ goto out_put_group;
if (ret > 0)
btrfs_release_path(path);
if (ret == 0) {
ret = btrfs_del_item(trans, tree_root, path);
if (ret)
- goto out;
+ goto out_put_group;
btrfs_release_path(path);
}
@@ -1065,26 +1089,25 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&block_group->space_info->lock);
- key.objectid = block_group->start;
- key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
- key.offset = block_group->length;
-
mutex_lock(&fs_info->chunk_mutex);
spin_lock(&block_group->lock);
block_group->removed = 1;
/*
- * At this point trimming can't start on this block group, because we
- * removed the block group from the tree fs_info->block_group_cache_tree
- * so no one can't find it anymore and even if someone already got this
- * block group before we removed it from the rbtree, they have already
- * incremented block_group->trimming - if they didn't, they won't find
- * any free space entries because we already removed them all when we
- * called btrfs_remove_free_space_cache().
+ * At this point trimming or scrub can't start on this block group,
+ * because we removed the block group from the rbtree
+ * fs_info->block_group_cache_tree so no one can't find it anymore and
+ * even if someone already got this block group before we removed it
+ * from the rbtree, they have already incremented block_group->frozen -
+ * if they didn't, for the trimming case they won't find any free space
+ * entries because we already removed them all when we called
+ * btrfs_remove_free_space_cache().
*
* And we must not remove the extent map from the fs_info->mapping_tree
* to prevent the same logical address range and physical device space
- * ranges from being reused for a new block group. This is because our
- * fs trim operation (btrfs_trim_fs() / btrfs_ioctl_fitrim()) is
+ * ranges from being reused for a new block group. This is needed to
+ * avoid races with trimming and scrub.
+ *
+ * An fs trim operation (btrfs_trim_fs() / btrfs_ioctl_fitrim()) is
* completely transactionless, so while it is trimming a range the
* currently running transaction might finish and a new one start,
* allowing for new block groups to be created that can reuse the same
@@ -1095,28 +1118,22 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
* in place until the extents have been discarded completely when
* the transaction commit has completed.
*/
- remove_em = (atomic_read(&block_group->trimming) == 0);
+ remove_em = (atomic_read(&block_group->frozen) == 0);
spin_unlock(&block_group->lock);
mutex_unlock(&fs_info->chunk_mutex);
ret = remove_block_group_free_space(trans, block_group);
if (ret)
- goto out;
+ goto out_put_group;
- btrfs_put_block_group(block_group);
+ /* Once for the block groups rbtree */
btrfs_put_block_group(block_group);
- ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret > 0)
- ret = -EIO;
+ ret = remove_block_group_item(trans, path, block_group);
if (ret < 0)
goto out;
- ret = btrfs_del_item(trans, root, path);
- if (ret)
- goto out;
-
if (remove_em) {
struct extent_map_tree *em_tree;
@@ -1127,6 +1144,10 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
/* once for the tree */
free_extent_map(em);
}
+
+out_put_group:
+ /* Once for the lookup reference */
+ btrfs_put_block_group(block_group);
out:
if (remove_rsv)
btrfs_delayed_refs_rsv_release(fs_info, 1);
@@ -1171,7 +1192,7 @@ struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
free_extent_map(em);
return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root,
- num_items, 1);
+ num_items);
}
/*
@@ -1280,21 +1301,17 @@ static bool clean_pinned_extents(struct btrfs_trans_handle *trans,
ret = clear_extent_bits(&prev_trans->pinned_extents, start, end,
EXTENT_DIRTY);
if (ret)
- goto err;
+ goto out;
}
ret = clear_extent_bits(&trans->transaction->pinned_extents, start, end,
EXTENT_DIRTY);
- if (ret)
- goto err;
+out:
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
+ if (prev_trans)
+ btrfs_put_transaction(prev_trans);
- return true;
-
-err:
- mutex_unlock(&fs_info->unused_bg_unpin_mutex);
- btrfs_dec_block_group_ro(bg);
- return false;
+ return ret == 0;
}
/*
@@ -1392,8 +1409,10 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
* We could have pending pinned extents for this block group,
* just delete them, we don't care about them anymore.
*/
- if (!clean_pinned_extents(trans, block_group))
+ if (!clean_pinned_extents(trans, block_group)) {
+ btrfs_dec_block_group_ro(block_group);
goto end_trans;
+ }
/*
* At this point, the block_group is read only and should fail
@@ -1442,7 +1461,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
/* Implicit trim during transaction commit. */
if (trimming)
- btrfs_get_block_group_trimming(block_group);
+ btrfs_freeze_block_group(block_group);
/*
* Btrfs_remove_chunk will abort the transaction if things go
@@ -1452,7 +1471,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
if (ret) {
if (trimming)
- btrfs_put_block_group_trimming(block_group);
+ btrfs_unfreeze_block_group(block_group);
goto end_trans;
}
@@ -1766,7 +1785,7 @@ static void link_block_group(struct btrfs_block_group *cache)
}
static struct btrfs_block_group *btrfs_create_block_group_cache(
- struct btrfs_fs_info *fs_info, u64 start, u64 size)
+ struct btrfs_fs_info *fs_info, u64 start)
{
struct btrfs_block_group *cache;
@@ -1782,7 +1801,6 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
}
cache->start = start;
- cache->length = size;
cache->fs_info = fs_info;
cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start);
@@ -1801,7 +1819,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
INIT_LIST_HEAD(&cache->dirty_list);
INIT_LIST_HEAD(&cache->io_list);
btrfs_init_free_space_ctl(cache);
- atomic_set(&cache->trimming, 0);
+ atomic_set(&cache->frozen, 0);
mutex_init(&cache->free_space_lock);
btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root);
@@ -1862,25 +1880,44 @@ static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
return ret;
}
+static int read_block_group_item(struct btrfs_block_group *cache,
+ struct btrfs_path *path,
+ const struct btrfs_key *key)
+{
+ struct extent_buffer *leaf = path->nodes[0];
+ struct btrfs_block_group_item bgi;
+ int slot = path->slots[0];
+
+ cache->length = key->offset;
+
+ read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
+ sizeof(bgi));
+ cache->used = btrfs_stack_block_group_used(&bgi);
+ cache->flags = btrfs_stack_block_group_flags(&bgi);
+
+ return 0;
+}
+
static int read_one_block_group(struct btrfs_fs_info *info,
struct btrfs_path *path,
const struct btrfs_key *key,
int need_clear)
{
- struct extent_buffer *leaf = path->nodes[0];
struct btrfs_block_group *cache;
struct btrfs_space_info *space_info;
- struct btrfs_block_group_item bgi;
const bool mixed = btrfs_fs_incompat(info, MIXED_GROUPS);
- int slot = path->slots[0];
int ret;
ASSERT(key->type == BTRFS_BLOCK_GROUP_ITEM_KEY);
- cache = btrfs_create_block_group_cache(info, key->objectid, key->offset);
+ cache = btrfs_create_block_group_cache(info, key->objectid);
if (!cache)
return -ENOMEM;
+ ret = read_block_group_item(cache, path, key);
+ if (ret < 0)
+ goto error;
+
if (need_clear) {
/*
* When we mount with old space cache, we need to
@@ -1895,10 +1932,6 @@ static int read_one_block_group(struct btrfs_fs_info *info,
if (btrfs_test_opt(info, SPACE_CACHE))
cache->disk_cache_state = BTRFS_DC_CLEAR;
}
- read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
- sizeof(bgi));
- cache->used = btrfs_stack_block_group_used(&bgi);
- cache->flags = btrfs_stack_block_group_flags(&bgi);
if (!mixed && ((cache->flags & BTRFS_BLOCK_GROUP_METADATA) &&
(cache->flags & BTRFS_BLOCK_GROUP_DATA))) {
btrfs_err(info,
@@ -1926,15 +1959,15 @@ static int read_one_block_group(struct btrfs_fs_info *info,
* are empty, and we can just add all the space in and be done with it.
* This saves us _a_lot_ of time, particularly in the full case.
*/
- if (key->offset == cache->used) {
+ if (cache->length == cache->used) {
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
btrfs_free_excluded_extents(cache);
} else if (cache->used == 0) {
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
- add_new_free_space(cache, key->objectid,
- key->objectid + key->offset);
+ add_new_free_space(cache, cache->start,
+ cache->start + cache->length);
btrfs_free_excluded_extents(cache);
}
@@ -1944,7 +1977,7 @@ static int read_one_block_group(struct btrfs_fs_info *info,
goto error;
}
trace_btrfs_add_block_group(info, cache, 0);
- btrfs_update_space_info(info, cache->flags, key->offset,
+ btrfs_update_space_info(info, cache->flags, cache->length,
cache->used, cache->bytes_super, &space_info);
cache->space_info = space_info;
@@ -1983,7 +2016,6 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- path->reada = READA_FORWARD;
cache_gen = btrfs_super_cache_generation(info->super_copy);
if (btrfs_test_opt(info, SPACE_CACHE) &&
@@ -2038,13 +2070,32 @@ error:
return ret;
}
+static int insert_block_group_item(struct btrfs_trans_handle *trans,
+ struct btrfs_block_group *block_group)
+{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ struct btrfs_block_group_item bgi;
+ struct btrfs_root *root;
+ struct btrfs_key key;
+
+ spin_lock(&block_group->lock);
+ btrfs_set_stack_block_group_used(&bgi, block_group->used);
+ btrfs_set_stack_block_group_chunk_objectid(&bgi,
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+ btrfs_set_stack_block_group_flags(&bgi, block_group->flags);
+ key.objectid = block_group->start;
+ key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+ key.offset = block_group->length;
+ spin_unlock(&block_group->lock);
+
+ root = fs_info->extent_root;
+ return btrfs_insert_item(trans, root, &key, &bgi, sizeof(bgi));
+}
+
void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_block_group *block_group;
- struct btrfs_root *extent_root = fs_info->extent_root;
- struct btrfs_block_group_item item;
- struct btrfs_key key;
int ret = 0;
if (!trans->can_flush_pending_bgs)
@@ -2057,21 +2108,11 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
if (ret)
goto next;
- spin_lock(&block_group->lock);
- btrfs_set_stack_block_group_used(&item, block_group->used);
- btrfs_set_stack_block_group_chunk_objectid(&item,
- BTRFS_FIRST_CHUNK_TREE_OBJECTID);
- btrfs_set_stack_block_group_flags(&item, block_group->flags);
- key.objectid = block_group->start;
- key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
- key.offset = block_group->length;
- spin_unlock(&block_group->lock);
-
- ret = btrfs_insert_item(trans, extent_root, &key, &item,
- sizeof(item));
+ ret = insert_block_group_item(trans, block_group);
if (ret)
btrfs_abort_transaction(trans, ret);
- ret = btrfs_finish_chunk_alloc(trans, key.objectid, key.offset);
+ ret = btrfs_finish_chunk_alloc(trans, block_group->start,
+ block_group->length);
if (ret)
btrfs_abort_transaction(trans, ret);
add_block_group_free_space(trans, block_group);
@@ -2092,10 +2133,11 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
btrfs_set_log_full_commit(trans);
- cache = btrfs_create_block_group_cache(fs_info, chunk_offset, size);
+ cache = btrfs_create_block_group_cache(fs_info, chunk_offset);
if (!cache)
return -ENOMEM;
+ cache->length = size;
cache->used = bytes_used;
cache->flags = type;
cache->last_byte_to_unpin = (u64)-1;
@@ -2306,13 +2348,13 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache)
spin_unlock(&sinfo->lock);
}
-static int write_one_cache_group(struct btrfs_trans_handle *trans,
- struct btrfs_path *path,
- struct btrfs_block_group *cache)
+static int update_block_group_item(struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
+ struct btrfs_block_group *cache)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
int ret;
- struct btrfs_root *extent_root = fs_info->extent_root;
+ struct btrfs_root *root = fs_info->extent_root;
unsigned long bi;
struct extent_buffer *leaf;
struct btrfs_block_group_item bgi;
@@ -2322,7 +2364,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
key.offset = cache->length;
- ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1);
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret) {
if (ret > 0)
ret = -ENOENT;
@@ -2634,7 +2676,7 @@ again:
}
}
if (!ret) {
- ret = write_one_cache_group(trans, path, cache);
+ ret = update_block_group_item(trans, path, cache);
/*
* Our block group might still be attached to the list
* of new block groups in the transaction handle of some
@@ -2783,7 +2825,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
}
}
if (!ret) {
- ret = write_one_cache_group(trans, path, cache);
+ ret = update_block_group_item(trans, path, cache);
/*
* One of the free space endio workers might have
* created a new block group while updating a free space
@@ -2800,7 +2842,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
if (ret == -ENOENT) {
wait_event(cur_trans->writer_wait,
atomic_read(&cur_trans->num_writers) == 1);
- ret = write_one_cache_group(trans, path, cache);
+ ret = update_block_group_item(trans, path, cache);
}
if (ret)
btrfs_abort_transaction(trans, ret);
@@ -3370,8 +3412,50 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
space_info->bytes_reserved > 0 ||
space_info->bytes_may_use > 0))
btrfs_dump_space_info(info, space_info, 0, 0);
+ WARN_ON(space_info->reclaim_size > 0);
list_del(&space_info->list);
btrfs_sysfs_remove_space_info(space_info);
}
return 0;
}
+
+void btrfs_freeze_block_group(struct btrfs_block_group *cache)
+{
+ atomic_inc(&cache->frozen);
+}
+
+void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group)
+{
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
+ struct extent_map_tree *em_tree;
+ struct extent_map *em;
+ bool cleanup;
+
+ spin_lock(&block_group->lock);
+ cleanup = (atomic_dec_and_test(&block_group->frozen) &&
+ block_group->removed);
+ spin_unlock(&block_group->lock);
+
+ if (cleanup) {
+ mutex_lock(&fs_info->chunk_mutex);
+ em_tree = &fs_info->mapping_tree;
+ write_lock(&em_tree->lock);
+ em = lookup_extent_mapping(em_tree, block_group->start,
+ 1);
+ BUG_ON(!em); /* logic error, can't happen */
+ remove_extent_mapping(em_tree, em);
+ write_unlock(&em_tree->lock);
+ mutex_unlock(&fs_info->chunk_mutex);
+
+ /* once for us and once for the tree */
+ free_extent_map(em);
+ free_extent_map(em);
+
+ /*
+ * We may have left one free space entry and other possible
+ * tasks trimming this block group have left 1 entry each one.
+ * Free them if any.
+ */
+ __btrfs_remove_free_space_cache(block_group->free_space_ctl);
+ }
+}