summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/transaction.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/transaction.c')
-rw-r--r--fs/btrfs/transaction.c208
1 files changed, 101 insertions, 107 deletions
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index beb6c69cd1e5..20c6ac1a5de7 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -21,6 +21,7 @@
#include "dev-replace.h"
#include "qgroup.h"
#include "block-group.h"
+#include "space-info.h"
#define BTRFS_ROOT_TRANS_TAG 0
@@ -141,7 +142,7 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
struct btrfs_block_group,
bg_list);
list_del_init(&cache->bg_list);
- btrfs_put_block_group_trimming(cache);
+ btrfs_unfreeze_block_group(cache);
btrfs_put_block_group(cache);
}
WARN_ON(!list_empty(&transaction->dev_update_list));
@@ -221,7 +222,7 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
WARN_ON_ONCE(!list_empty(&trans->new_bgs));
btrfs_block_rsv_release(fs_info, &fs_info->chunk_block_rsv,
- trans->chunk_bytes_reserved);
+ trans->chunk_bytes_reserved, NULL);
trans->chunk_bytes_reserved = 0;
}
@@ -243,7 +244,7 @@ loop:
cur_trans = fs_info->running_transaction;
if (cur_trans) {
- if (cur_trans->aborted) {
+ if (TRANS_ABORTED(cur_trans)) {
spin_unlock(&fs_info->trans_lock);
return cur_trans->aborted;
}
@@ -336,6 +337,8 @@ loop:
list_add_tail(&cur_trans->list, &fs_info->trans_list);
extent_io_tree_init(fs_info, &cur_trans->dirty_pages,
IO_TREE_TRANS_DIRTY_PAGES, fs_info->btree_inode);
+ extent_io_tree_init(fs_info, &cur_trans->pinned_extents,
+ IO_TREE_FS_PINNED_EXTENTS, NULL);
fs_info->generation++;
cur_trans->transid = fs_info->generation;
fs_info->running_transaction = cur_trans;
@@ -346,10 +349,10 @@ loop:
}
/*
- * this does all the record keeping required to make sure that a reference
- * counted root is properly recorded in a given transaction. This is required
- * to make sure the old root from before we joined the transaction is deleted
- * when the transaction commits
+ * This does all the record keeping required to make sure that a shareable root
+ * is properly recorded in a given transaction. This is required to make sure
+ * the old root from before we joined the transaction is deleted when the
+ * transaction commits.
*/
static int record_root_in_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -357,7 +360,7 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans,
{
struct btrfs_fs_info *fs_info = root->fs_info;
- if ((test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+ if ((test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
root->last_trans < trans->transid) || force) {
WARN_ON(root == fs_info->extent_root);
WARN_ON(!force && root->commit_root != root->node);
@@ -436,7 +439,7 @@ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
{
struct btrfs_fs_info *fs_info = root->fs_info;
- if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
+ if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
return 0;
/*
@@ -459,7 +462,7 @@ static inline int is_transaction_blocked(struct btrfs_transaction *trans)
{
return (trans->state >= TRANS_STATE_COMMIT_START &&
trans->state < TRANS_STATE_UNBLOCKED &&
- !trans->aborted);
+ !TRANS_ABORTED(trans));
}
/* wait for commit against the current transaction to become unblocked
@@ -478,7 +481,7 @@ static void wait_current_trans(struct btrfs_fs_info *fs_info)
wait_event(fs_info->transaction_wait,
cur_trans->state >= TRANS_STATE_UNBLOCKED ||
- cur_trans->aborted);
+ TRANS_ABORTED(cur_trans));
btrfs_put_transaction(cur_trans);
} else {
spin_unlock(&fs_info->trans_lock);
@@ -501,7 +504,7 @@ static inline bool need_reserve_reloc_root(struct btrfs_root *root)
struct btrfs_fs_info *fs_info = root->fs_info;
if (!fs_info->reloc_ctl ||
- !test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
+ !test_bit(BTRFS_ROOT_SHAREABLE, &root->state) ||
root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
root->reloc_root)
return false;
@@ -521,6 +524,7 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
u64 num_bytes = 0;
u64 qgroup_reserved = 0;
bool reloc_reserved = false;
+ bool do_chunk_alloc = false;
int ret;
/* Send isn't supposed to start transactions. */
@@ -561,7 +565,8 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
* refill that amount for whatever is missing in the reserve.
*/
num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items);
- if (delayed_refs_rsv->full == 0) {
+ if (flush == BTRFS_RESERVE_FLUSH_ALL &&
+ delayed_refs_rsv->full == 0) {
delayed_refs_bytes = num_bytes;
num_bytes <<= 1;
}
@@ -582,6 +587,9 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
delayed_refs_bytes);
num_bytes -= delayed_refs_bytes;
}
+
+ if (rsv->space_info->force_alloc)
+ do_chunk_alloc = true;
} else if (num_items == 0 && flush == BTRFS_RESERVE_FLUSH_ALL &&
!delayed_refs_rsv->full) {
/*
@@ -660,10 +668,32 @@ again:
}
got_it:
- btrfs_record_root_in_trans(h, root);
-
if (!current->journal_info)
current->journal_info = h;
+
+ /*
+ * If the space_info is marked ALLOC_FORCE then we'll get upgraded to
+ * ALLOC_FORCE the first run through, and then we won't allocate for
+ * anybody else who races in later. We don't care about the return
+ * value here.
+ */
+ if (do_chunk_alloc && num_bytes) {
+ u64 flags = h->block_rsv->space_info->flags;
+
+ btrfs_chunk_alloc(h, btrfs_get_alloc_profile(fs_info, flags),
+ CHUNK_ALLOC_NO_FORCE);
+ }
+
+ /*
+ * btrfs_record_root_in_trans() needs to alloc new extents, and may
+ * call btrfs_join_transaction() while we're also starting a
+ * transaction.
+ *
+ * Thus it need to be called after current->journal_info initialized,
+ * or we can deadlock.
+ */
+ btrfs_record_root_in_trans(h, root);
+
return h;
join_fail:
@@ -673,7 +703,7 @@ join_fail:
alloc_fail:
if (num_bytes)
btrfs_block_rsv_release(fs_info, &fs_info->trans_block_rsv,
- num_bytes);
+ num_bytes, NULL);
reserve_fail:
btrfs_qgroup_free_meta_pertrans(root, qgroup_reserved);
return ERR_PTR(ret);
@@ -688,43 +718,10 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
struct btrfs_root *root,
- unsigned int num_items,
- int min_factor)
+ unsigned int num_items)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_trans_handle *trans;
- u64 num_bytes;
- int ret;
-
- /*
- * We have two callers: unlink and block group removal. The
- * former should succeed even if we will temporarily exceed
- * quota and the latter operates on the extent root so
- * qgroup enforcement is ignored anyway.
- */
- trans = start_transaction(root, num_items, TRANS_START,
- BTRFS_RESERVE_FLUSH_ALL, false);
- if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
- return trans;
-
- trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans))
- return trans;
-
- num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items);
- ret = btrfs_cond_migrate_bytes(fs_info, &fs_info->trans_block_rsv,
- num_bytes, min_factor);
- if (ret) {
- btrfs_end_transaction(trans);
- return ERR_PTR(ret);
- }
-
- trans->block_rsv = &fs_info->trans_block_rsv;
- trans->bytes_reserved = num_bytes;
- trace_btrfs_space_reservation(fs_info, "transaction",
- trans->transid, num_bytes, 1);
-
- return trans;
+ return start_transaction(root, num_items, TRANS_START,
+ BTRFS_RESERVE_FLUSH_ALL_STEAL, false);
}
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
@@ -896,7 +893,7 @@ static void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans)
trace_btrfs_space_reservation(fs_info, "transaction",
trans->transid, trans->bytes_reserved, 0);
btrfs_block_rsv_release(fs_info, trans->block_rsv,
- trans->bytes_reserved);
+ trans->bytes_reserved, NULL);
trans->bytes_reserved = 0;
}
@@ -937,10 +934,13 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
if (throttle)
btrfs_run_delayed_iputs(info);
- if (trans->aborted ||
+ if (TRANS_ABORTED(trans) ||
test_bit(BTRFS_FS_STATE_ERROR, &info->fs_state)) {
wake_up_process(info->transaction_kthread);
- err = -EIO;
+ if (TRANS_ABORTED(trans))
+ err = trans->aborted;
+ else
+ err = -EROFS;
}
kmem_cache_free(btrfs_trans_handle_cachep, trans);
@@ -1262,8 +1262,10 @@ void btrfs_add_dead_root(struct btrfs_root *root)
struct btrfs_fs_info *fs_info = root->fs_info;
spin_lock(&fs_info->trans_lock);
- if (list_empty(&root->root_list))
+ if (list_empty(&root->root_list)) {
+ btrfs_grab_root(root);
list_add_tail(&root->root_list, &fs_info->dead_roots);
+ }
spin_unlock(&fs_info->trans_lock);
}
@@ -1477,7 +1479,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
u64 index = 0;
u64 objectid;
u64 root_flags;
- uuid_le new_uuid;
ASSERT(pending->path);
path = pending->path;
@@ -1570,8 +1571,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
btrfs_set_root_generation_v2(new_root_item,
trans->transid);
- uuid_le_gen(&new_uuid);
- memcpy(new_root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE);
+ generate_random_guid(new_root_item->uuid);
memcpy(new_root_item->parent_uuid, root->root_item.uuid,
BTRFS_UUID_SIZE);
if (!(root_flags & BTRFS_ROOT_SUBVOL_RDONLY)) {
@@ -1633,7 +1633,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
}
key.offset = (u64)-1;
- pending->snap = btrfs_read_fs_root_no_name(fs_info, &key);
+ pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev);
if (IS_ERR(pending->snap)) {
ret = PTR_ERR(pending->snap);
btrfs_abort_transaction(trans, ret);
@@ -1682,7 +1682,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
btrfs_abort_transaction(trans, ret);
goto fail;
}
- ret = btrfs_uuid_tree_add(trans, new_uuid.b, BTRFS_UUID_KEY_SUBVOL,
+ ret = btrfs_uuid_tree_add(trans, new_root_item->uuid,
+ BTRFS_UUID_KEY_SUBVOL,
objectid);
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -1794,7 +1795,8 @@ static void wait_current_trans_commit_start(struct btrfs_fs_info *fs_info,
struct btrfs_transaction *trans)
{
wait_event(fs_info->transaction_blocked_wait,
- trans->state >= TRANS_STATE_COMMIT_START || trans->aborted);
+ trans->state >= TRANS_STATE_COMMIT_START ||
+ TRANS_ABORTED(trans));
}
/*
@@ -1806,7 +1808,8 @@ static void wait_current_trans_commit_start_and_unblock(
struct btrfs_transaction *trans)
{
wait_event(fs_info->transaction_wait,
- trans->state >= TRANS_STATE_UNBLOCKED || trans->aborted);
+ trans->state >= TRANS_STATE_UNBLOCKED ||
+ TRANS_ABORTED(trans));
}
/*
@@ -2026,7 +2029,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
trans->dirty = true;
/* Stop the commit early if ->aborted is set */
- if (unlikely(READ_ONCE(cur_trans->aborted))) {
+ if (TRANS_ABORTED(cur_trans)) {
ret = cur_trans->aborted;
btrfs_end_transaction(trans);
return ret;
@@ -2100,7 +2103,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
wait_for_commit(cur_trans);
- if (unlikely(cur_trans->aborted))
+ if (TRANS_ABORTED(cur_trans))
ret = cur_trans->aborted;
btrfs_put_transaction(cur_trans);
@@ -2119,7 +2122,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
spin_unlock(&fs_info->trans_lock);
wait_for_commit(prev_trans);
- ret = prev_trans->aborted;
+ ret = READ_ONCE(prev_trans->aborted);
btrfs_put_transaction(prev_trans);
if (ret)
@@ -2173,8 +2176,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
wait_event(cur_trans->writer_wait,
atomic_read(&cur_trans->num_writers) == 1);
- /* ->aborted might be set after the previous check, so check it */
- if (unlikely(READ_ONCE(cur_trans->aborted))) {
+ if (TRANS_ABORTED(cur_trans)) {
ret = cur_trans->aborted;
goto scrub_continue;
}
@@ -2191,10 +2193,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
* core function of the snapshot creation.
*/
ret = create_pending_snapshots(trans);
- if (ret) {
- mutex_unlock(&fs_info->reloc_mutex);
- goto scrub_continue;
- }
+ if (ret)
+ goto unlock_reloc;
/*
* We insert the dir indexes of the snapshots and update the inode
@@ -2207,16 +2207,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
* the nodes and leaves.
*/
ret = btrfs_run_delayed_items(trans);
- if (ret) {
- mutex_unlock(&fs_info->reloc_mutex);
- goto scrub_continue;
- }
+ if (ret)
+ goto unlock_reloc;
ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
- if (ret) {
- mutex_unlock(&fs_info->reloc_mutex);
- goto scrub_continue;
- }
+ if (ret)
+ goto unlock_reloc;
/*
* make sure none of the code above managed to slip in a
@@ -2242,11 +2238,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
mutex_lock(&fs_info->tree_log_mutex);
ret = commit_fs_roots(trans);
- if (ret) {
- mutex_unlock(&fs_info->tree_log_mutex);
- mutex_unlock(&fs_info->reloc_mutex);
- goto scrub_continue;
- }
+ if (ret)
+ goto unlock_tree_log;
/*
* Since the transaction is done, we can apply the pending changes
@@ -2264,39 +2257,28 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
* new delayed refs. Must handle them or qgroup can be wrong.
*/
ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
- if (ret) {
- mutex_unlock(&fs_info->tree_log_mutex);
- mutex_unlock(&fs_info->reloc_mutex);
- goto scrub_continue;
- }
+ if (ret)
+ goto unlock_tree_log;
/*
* Since fs roots are all committed, we can get a quite accurate
* new_roots. So let's do quota accounting.
*/
ret = btrfs_qgroup_account_extents(trans);
- if (ret < 0) {
- mutex_unlock(&fs_info->tree_log_mutex);
- mutex_unlock(&fs_info->reloc_mutex);
- goto scrub_continue;
- }
+ if (ret < 0)
+ goto unlock_tree_log;
ret = commit_cowonly_roots(trans);
- if (ret) {
- mutex_unlock(&fs_info->tree_log_mutex);
- mutex_unlock(&fs_info->reloc_mutex);
- goto scrub_continue;
- }
+ if (ret)
+ goto unlock_tree_log;
/*
* The tasks which save the space cache and inode cache may also
* update ->aborted, check it.
*/
- if (unlikely(READ_ONCE(cur_trans->aborted))) {
+ if (TRANS_ABORTED(cur_trans)) {
ret = cur_trans->aborted;
- mutex_unlock(&fs_info->tree_log_mutex);
- mutex_unlock(&fs_info->reloc_mutex);
- goto scrub_continue;
+ goto unlock_tree_log;
}
btrfs_prepare_extent_commit(fs_info);
@@ -2343,6 +2325,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
if (ret) {
btrfs_handle_fs_error(fs_info, ret,
"Error while writing out transaction");
+ /*
+ * reloc_mutex has been unlocked, tree_log_mutex is still held
+ * but we can't jump to unlock_tree_log causing double unlock
+ */
mutex_unlock(&fs_info->tree_log_mutex);
goto scrub_continue;
}
@@ -2368,7 +2354,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
*/
cur_trans->state = TRANS_STATE_COMPLETED;
wake_up(&cur_trans->commit_wait);
- clear_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags);
spin_lock(&fs_info->trans_lock);
list_del_init(&cur_trans->list);
@@ -2391,6 +2376,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
return ret;
+unlock_tree_log:
+ mutex_unlock(&fs_info->tree_log_mutex);
+unlock_reloc:
+ mutex_unlock(&fs_info->reloc_mutex);
scrub_continue:
btrfs_scrub_continue(fs_info);
cleanup_transaction:
@@ -2434,13 +2423,18 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
btrfs_debug(fs_info, "cleaner removing %llu", root->root_key.objectid);
btrfs_kill_all_delayed_nodes(root);
+ if (root->ino_cache_inode) {
+ iput(root->ino_cache_inode);
+ root->ino_cache_inode = NULL;
+ }
if (btrfs_header_backref_rev(root->node) <
BTRFS_MIXED_BACKREF_REV)
- ret = btrfs_drop_snapshot(root, NULL, 0, 0);
+ ret = btrfs_drop_snapshot(root, 0, 0);
else
- ret = btrfs_drop_snapshot(root, NULL, 1, 0);
+ ret = btrfs_drop_snapshot(root, 1, 0);
+ btrfs_put_root(root);
return (ret < 0) ? 0 : 1;
}