diff options
Diffstat (limited to 'fs/btrfs/transaction.c')
-rw-r--r-- | fs/btrfs/transaction.c | 208 |
1 files changed, 101 insertions, 107 deletions
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index beb6c69cd1e5..20c6ac1a5de7 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -21,6 +21,7 @@ #include "dev-replace.h" #include "qgroup.h" #include "block-group.h" +#include "space-info.h" #define BTRFS_ROOT_TRANS_TAG 0 @@ -141,7 +142,7 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) struct btrfs_block_group, bg_list); list_del_init(&cache->bg_list); - btrfs_put_block_group_trimming(cache); + btrfs_unfreeze_block_group(cache); btrfs_put_block_group(cache); } WARN_ON(!list_empty(&transaction->dev_update_list)); @@ -221,7 +222,7 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans) WARN_ON_ONCE(!list_empty(&trans->new_bgs)); btrfs_block_rsv_release(fs_info, &fs_info->chunk_block_rsv, - trans->chunk_bytes_reserved); + trans->chunk_bytes_reserved, NULL); trans->chunk_bytes_reserved = 0; } @@ -243,7 +244,7 @@ loop: cur_trans = fs_info->running_transaction; if (cur_trans) { - if (cur_trans->aborted) { + if (TRANS_ABORTED(cur_trans)) { spin_unlock(&fs_info->trans_lock); return cur_trans->aborted; } @@ -336,6 +337,8 @@ loop: list_add_tail(&cur_trans->list, &fs_info->trans_list); extent_io_tree_init(fs_info, &cur_trans->dirty_pages, IO_TREE_TRANS_DIRTY_PAGES, fs_info->btree_inode); + extent_io_tree_init(fs_info, &cur_trans->pinned_extents, + IO_TREE_FS_PINNED_EXTENTS, NULL); fs_info->generation++; cur_trans->transid = fs_info->generation; fs_info->running_transaction = cur_trans; @@ -346,10 +349,10 @@ loop: } /* - * this does all the record keeping required to make sure that a reference - * counted root is properly recorded in a given transaction. This is required - * to make sure the old root from before we joined the transaction is deleted - * when the transaction commits + * This does all the record keeping required to make sure that a shareable root + * is properly recorded in a given transaction. This is required to make sure + * the old root from before we joined the transaction is deleted when the + * transaction commits. */ static int record_root_in_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -357,7 +360,7 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans, { struct btrfs_fs_info *fs_info = root->fs_info; - if ((test_bit(BTRFS_ROOT_REF_COWS, &root->state) && + if ((test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && root->last_trans < trans->transid) || force) { WARN_ON(root == fs_info->extent_root); WARN_ON(!force && root->commit_root != root->node); @@ -436,7 +439,7 @@ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, { struct btrfs_fs_info *fs_info = root->fs_info; - if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) + if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) return 0; /* @@ -459,7 +462,7 @@ static inline int is_transaction_blocked(struct btrfs_transaction *trans) { return (trans->state >= TRANS_STATE_COMMIT_START && trans->state < TRANS_STATE_UNBLOCKED && - !trans->aborted); + !TRANS_ABORTED(trans)); } /* wait for commit against the current transaction to become unblocked @@ -478,7 +481,7 @@ static void wait_current_trans(struct btrfs_fs_info *fs_info) wait_event(fs_info->transaction_wait, cur_trans->state >= TRANS_STATE_UNBLOCKED || - cur_trans->aborted); + TRANS_ABORTED(cur_trans)); btrfs_put_transaction(cur_trans); } else { spin_unlock(&fs_info->trans_lock); @@ -501,7 +504,7 @@ static inline bool need_reserve_reloc_root(struct btrfs_root *root) struct btrfs_fs_info *fs_info = root->fs_info; if (!fs_info->reloc_ctl || - !test_bit(BTRFS_ROOT_REF_COWS, &root->state) || + !test_bit(BTRFS_ROOT_SHAREABLE, &root->state) || root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || root->reloc_root) return false; @@ -521,6 +524,7 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, u64 num_bytes = 0; u64 qgroup_reserved = 0; bool reloc_reserved = false; + bool do_chunk_alloc = false; int ret; /* Send isn't supposed to start transactions. */ @@ -561,7 +565,8 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, * refill that amount for whatever is missing in the reserve. */ num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items); - if (delayed_refs_rsv->full == 0) { + if (flush == BTRFS_RESERVE_FLUSH_ALL && + delayed_refs_rsv->full == 0) { delayed_refs_bytes = num_bytes; num_bytes <<= 1; } @@ -582,6 +587,9 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, delayed_refs_bytes); num_bytes -= delayed_refs_bytes; } + + if (rsv->space_info->force_alloc) + do_chunk_alloc = true; } else if (num_items == 0 && flush == BTRFS_RESERVE_FLUSH_ALL && !delayed_refs_rsv->full) { /* @@ -660,10 +668,32 @@ again: } got_it: - btrfs_record_root_in_trans(h, root); - if (!current->journal_info) current->journal_info = h; + + /* + * If the space_info is marked ALLOC_FORCE then we'll get upgraded to + * ALLOC_FORCE the first run through, and then we won't allocate for + * anybody else who races in later. We don't care about the return + * value here. + */ + if (do_chunk_alloc && num_bytes) { + u64 flags = h->block_rsv->space_info->flags; + + btrfs_chunk_alloc(h, btrfs_get_alloc_profile(fs_info, flags), + CHUNK_ALLOC_NO_FORCE); + } + + /* + * btrfs_record_root_in_trans() needs to alloc new extents, and may + * call btrfs_join_transaction() while we're also starting a + * transaction. + * + * Thus it need to be called after current->journal_info initialized, + * or we can deadlock. + */ + btrfs_record_root_in_trans(h, root); + return h; join_fail: @@ -673,7 +703,7 @@ join_fail: alloc_fail: if (num_bytes) btrfs_block_rsv_release(fs_info, &fs_info->trans_block_rsv, - num_bytes); + num_bytes, NULL); reserve_fail: btrfs_qgroup_free_meta_pertrans(root, qgroup_reserved); return ERR_PTR(ret); @@ -688,43 +718,10 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv( struct btrfs_root *root, - unsigned int num_items, - int min_factor) + unsigned int num_items) { - struct btrfs_fs_info *fs_info = root->fs_info; - struct btrfs_trans_handle *trans; - u64 num_bytes; - int ret; - - /* - * We have two callers: unlink and block group removal. The - * former should succeed even if we will temporarily exceed - * quota and the latter operates on the extent root so - * qgroup enforcement is ignored anyway. - */ - trans = start_transaction(root, num_items, TRANS_START, - BTRFS_RESERVE_FLUSH_ALL, false); - if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) - return trans; - - trans = btrfs_start_transaction(root, 0); - if (IS_ERR(trans)) - return trans; - - num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items); - ret = btrfs_cond_migrate_bytes(fs_info, &fs_info->trans_block_rsv, - num_bytes, min_factor); - if (ret) { - btrfs_end_transaction(trans); - return ERR_PTR(ret); - } - - trans->block_rsv = &fs_info->trans_block_rsv; - trans->bytes_reserved = num_bytes; - trace_btrfs_space_reservation(fs_info, "transaction", - trans->transid, num_bytes, 1); - - return trans; + return start_transaction(root, num_items, TRANS_START, + BTRFS_RESERVE_FLUSH_ALL_STEAL, false); } struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root) @@ -896,7 +893,7 @@ static void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans) trace_btrfs_space_reservation(fs_info, "transaction", trans->transid, trans->bytes_reserved, 0); btrfs_block_rsv_release(fs_info, trans->block_rsv, - trans->bytes_reserved); + trans->bytes_reserved, NULL); trans->bytes_reserved = 0; } @@ -937,10 +934,13 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, if (throttle) btrfs_run_delayed_iputs(info); - if (trans->aborted || + if (TRANS_ABORTED(trans) || test_bit(BTRFS_FS_STATE_ERROR, &info->fs_state)) { wake_up_process(info->transaction_kthread); - err = -EIO; + if (TRANS_ABORTED(trans)) + err = trans->aborted; + else + err = -EROFS; } kmem_cache_free(btrfs_trans_handle_cachep, trans); @@ -1262,8 +1262,10 @@ void btrfs_add_dead_root(struct btrfs_root *root) struct btrfs_fs_info *fs_info = root->fs_info; spin_lock(&fs_info->trans_lock); - if (list_empty(&root->root_list)) + if (list_empty(&root->root_list)) { + btrfs_grab_root(root); list_add_tail(&root->root_list, &fs_info->dead_roots); + } spin_unlock(&fs_info->trans_lock); } @@ -1477,7 +1479,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, u64 index = 0; u64 objectid; u64 root_flags; - uuid_le new_uuid; ASSERT(pending->path); path = pending->path; @@ -1570,8 +1571,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_set_root_generation_v2(new_root_item, trans->transid); - uuid_le_gen(&new_uuid); - memcpy(new_root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE); + generate_random_guid(new_root_item->uuid); memcpy(new_root_item->parent_uuid, root->root_item.uuid, BTRFS_UUID_SIZE); if (!(root_flags & BTRFS_ROOT_SUBVOL_RDONLY)) { @@ -1633,7 +1633,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, } key.offset = (u64)-1; - pending->snap = btrfs_read_fs_root_no_name(fs_info, &key); + pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev); if (IS_ERR(pending->snap)) { ret = PTR_ERR(pending->snap); btrfs_abort_transaction(trans, ret); @@ -1682,7 +1682,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_abort_transaction(trans, ret); goto fail; } - ret = btrfs_uuid_tree_add(trans, new_uuid.b, BTRFS_UUID_KEY_SUBVOL, + ret = btrfs_uuid_tree_add(trans, new_root_item->uuid, + BTRFS_UUID_KEY_SUBVOL, objectid); if (ret) { btrfs_abort_transaction(trans, ret); @@ -1794,7 +1795,8 @@ static void wait_current_trans_commit_start(struct btrfs_fs_info *fs_info, struct btrfs_transaction *trans) { wait_event(fs_info->transaction_blocked_wait, - trans->state >= TRANS_STATE_COMMIT_START || trans->aborted); + trans->state >= TRANS_STATE_COMMIT_START || + TRANS_ABORTED(trans)); } /* @@ -1806,7 +1808,8 @@ static void wait_current_trans_commit_start_and_unblock( struct btrfs_transaction *trans) { wait_event(fs_info->transaction_wait, - trans->state >= TRANS_STATE_UNBLOCKED || trans->aborted); + trans->state >= TRANS_STATE_UNBLOCKED || + TRANS_ABORTED(trans)); } /* @@ -2026,7 +2029,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) trans->dirty = true; /* Stop the commit early if ->aborted is set */ - if (unlikely(READ_ONCE(cur_trans->aborted))) { + if (TRANS_ABORTED(cur_trans)) { ret = cur_trans->aborted; btrfs_end_transaction(trans); return ret; @@ -2100,7 +2103,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) wait_for_commit(cur_trans); - if (unlikely(cur_trans->aborted)) + if (TRANS_ABORTED(cur_trans)) ret = cur_trans->aborted; btrfs_put_transaction(cur_trans); @@ -2119,7 +2122,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) spin_unlock(&fs_info->trans_lock); wait_for_commit(prev_trans); - ret = prev_trans->aborted; + ret = READ_ONCE(prev_trans->aborted); btrfs_put_transaction(prev_trans); if (ret) @@ -2173,8 +2176,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) wait_event(cur_trans->writer_wait, atomic_read(&cur_trans->num_writers) == 1); - /* ->aborted might be set after the previous check, so check it */ - if (unlikely(READ_ONCE(cur_trans->aborted))) { + if (TRANS_ABORTED(cur_trans)) { ret = cur_trans->aborted; goto scrub_continue; } @@ -2191,10 +2193,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) * core function of the snapshot creation. */ ret = create_pending_snapshots(trans); - if (ret) { - mutex_unlock(&fs_info->reloc_mutex); - goto scrub_continue; - } + if (ret) + goto unlock_reloc; /* * We insert the dir indexes of the snapshots and update the inode @@ -2207,16 +2207,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) * the nodes and leaves. */ ret = btrfs_run_delayed_items(trans); - if (ret) { - mutex_unlock(&fs_info->reloc_mutex); - goto scrub_continue; - } + if (ret) + goto unlock_reloc; ret = btrfs_run_delayed_refs(trans, (unsigned long)-1); - if (ret) { - mutex_unlock(&fs_info->reloc_mutex); - goto scrub_continue; - } + if (ret) + goto unlock_reloc; /* * make sure none of the code above managed to slip in a @@ -2242,11 +2238,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) mutex_lock(&fs_info->tree_log_mutex); ret = commit_fs_roots(trans); - if (ret) { - mutex_unlock(&fs_info->tree_log_mutex); - mutex_unlock(&fs_info->reloc_mutex); - goto scrub_continue; - } + if (ret) + goto unlock_tree_log; /* * Since the transaction is done, we can apply the pending changes @@ -2264,39 +2257,28 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) * new delayed refs. Must handle them or qgroup can be wrong. */ ret = btrfs_run_delayed_refs(trans, (unsigned long)-1); - if (ret) { - mutex_unlock(&fs_info->tree_log_mutex); - mutex_unlock(&fs_info->reloc_mutex); - goto scrub_continue; - } + if (ret) + goto unlock_tree_log; /* * Since fs roots are all committed, we can get a quite accurate * new_roots. So let's do quota accounting. */ ret = btrfs_qgroup_account_extents(trans); - if (ret < 0) { - mutex_unlock(&fs_info->tree_log_mutex); - mutex_unlock(&fs_info->reloc_mutex); - goto scrub_continue; - } + if (ret < 0) + goto unlock_tree_log; ret = commit_cowonly_roots(trans); - if (ret) { - mutex_unlock(&fs_info->tree_log_mutex); - mutex_unlock(&fs_info->reloc_mutex); - goto scrub_continue; - } + if (ret) + goto unlock_tree_log; /* * The tasks which save the space cache and inode cache may also * update ->aborted, check it. */ - if (unlikely(READ_ONCE(cur_trans->aborted))) { + if (TRANS_ABORTED(cur_trans)) { ret = cur_trans->aborted; - mutex_unlock(&fs_info->tree_log_mutex); - mutex_unlock(&fs_info->reloc_mutex); - goto scrub_continue; + goto unlock_tree_log; } btrfs_prepare_extent_commit(fs_info); @@ -2343,6 +2325,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) if (ret) { btrfs_handle_fs_error(fs_info, ret, "Error while writing out transaction"); + /* + * reloc_mutex has been unlocked, tree_log_mutex is still held + * but we can't jump to unlock_tree_log causing double unlock + */ mutex_unlock(&fs_info->tree_log_mutex); goto scrub_continue; } @@ -2368,7 +2354,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) */ cur_trans->state = TRANS_STATE_COMPLETED; wake_up(&cur_trans->commit_wait); - clear_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags); spin_lock(&fs_info->trans_lock); list_del_init(&cur_trans->list); @@ -2391,6 +2376,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) return ret; +unlock_tree_log: + mutex_unlock(&fs_info->tree_log_mutex); +unlock_reloc: + mutex_unlock(&fs_info->reloc_mutex); scrub_continue: btrfs_scrub_continue(fs_info); cleanup_transaction: @@ -2434,13 +2423,18 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root) btrfs_debug(fs_info, "cleaner removing %llu", root->root_key.objectid); btrfs_kill_all_delayed_nodes(root); + if (root->ino_cache_inode) { + iput(root->ino_cache_inode); + root->ino_cache_inode = NULL; + } if (btrfs_header_backref_rev(root->node) < BTRFS_MIXED_BACKREF_REV) - ret = btrfs_drop_snapshot(root, NULL, 0, 0); + ret = btrfs_drop_snapshot(root, 0, 0); else - ret = btrfs_drop_snapshot(root, NULL, 1, 0); + ret = btrfs_drop_snapshot(root, 1, 0); + btrfs_put_root(root); return (ret < 0) ? 0 : 1; } |