summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/tree-log.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r--fs/btrfs/tree-log.c702
1 files changed, 364 insertions, 338 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 7dd7552f53a4..39da9db35278 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -18,6 +18,8 @@
#include "compression.h"
#include "qgroup.h"
#include "inode-map.h"
+#include "block-group.h"
+#include "space-info.h"
/* magic values for the inode_only field in btrfs_log_inode:
*
@@ -167,6 +169,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
if (ret)
goto out;
+ set_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state);
clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state);
root->log_start_pid = current->pid;
}
@@ -193,6 +196,9 @@ static int join_running_log_trans(struct btrfs_root *root)
{
int ret = -ENOENT;
+ if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state))
+ return ret;
+
mutex_lock(&root->log_mutex);
if (root->log_root) {
ret = 0;
@@ -311,7 +317,7 @@ static int process_one_buffer(struct btrfs_root *log,
}
if (wc->pin)
- ret = btrfs_pin_extent_for_log_replay(fs_info, eb->start,
+ ret = btrfs_pin_extent_for_log_replay(wc->trans, eb->start,
eb->len);
if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) {
@@ -503,13 +509,8 @@ insert:
*/
if (S_ISREG(btrfs_inode_mode(eb, src_item)) &&
S_ISREG(btrfs_inode_mode(dst_eb, dst_item)) &&
- ino_size != 0) {
- struct btrfs_map_token token;
-
- btrfs_init_map_token(&token, dst_eb);
- btrfs_set_token_inode_size(dst_eb, dst_item,
- ino_size, &token);
- }
+ ino_size != 0)
+ btrfs_set_inode_size(dst_eb, dst_item, ino_size);
goto no_copy;
}
@@ -553,13 +554,9 @@ no_copy:
static noinline struct inode *read_one_inode(struct btrfs_root *root,
u64 objectid)
{
- struct btrfs_key key;
struct inode *inode;
- key.objectid = objectid;
- key.type = BTRFS_INODE_ITEM_KEY;
- key.offset = 0;
- inode = btrfs_iget(root->fs_info->sb, &key, root);
+ inode = btrfs_iget(root->fs_info->sb, objectid, root);
if (IS_ERR(inode))
inode = NULL;
return inode;
@@ -830,6 +827,11 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
goto out;
}
+ ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), start,
+ extent_end - start);
+ if (ret)
+ goto out;
+
inode_add_bytes(inode, nbytes);
update_inode:
ret = btrfs_update_inode(trans, root, inode);
@@ -2659,18 +2661,39 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
return ret;
}
+/*
+ * Correctly adjust the reserved bytes occupied by a log tree extent buffer
+ */
+static void unaccount_log_buffer(struct btrfs_fs_info *fs_info, u64 start)
+{
+ struct btrfs_block_group *cache;
+
+ cache = btrfs_lookup_block_group(fs_info, start);
+ if (!cache) {
+ btrfs_err(fs_info, "unable to find block group for %llu", start);
+ return;
+ }
+
+ spin_lock(&cache->space_info->lock);
+ spin_lock(&cache->lock);
+ cache->reserved -= fs_info->nodesize;
+ cache->space_info->bytes_reserved -= fs_info->nodesize;
+ spin_unlock(&cache->lock);
+ spin_unlock(&cache->space_info->lock);
+
+ btrfs_put_block_group(cache);
+}
+
static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, int *level,
struct walk_control *wc)
{
struct btrfs_fs_info *fs_info = root->fs_info;
- u64 root_owner;
u64 bytenr;
u64 ptr_gen;
struct extent_buffer *next;
struct extent_buffer *cur;
- struct extent_buffer *parent;
u32 blocksize;
int ret = 0;
@@ -2690,9 +2713,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
btrfs_node_key_to_cpu(cur, &first_key, path->slots[*level]);
blocksize = fs_info->nodesize;
- parent = path->nodes[*level];
- root_owner = btrfs_header_owner(parent);
-
next = btrfs_find_create_tree_block(fs_info, bytenr);
if (IS_ERR(next))
return PTR_ERR(next);
@@ -2720,18 +2740,16 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
btrfs_clean_tree_block(next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
+ ret = btrfs_pin_reserved_extent(trans,
+ bytenr, blocksize);
+ if (ret) {
+ free_extent_buffer(next);
+ return ret;
+ }
} else {
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
clear_extent_buffer_dirty(next);
- }
-
- WARN_ON(root_owner !=
- BTRFS_TREE_LOG_OBJECTID);
- ret = btrfs_pin_reserved_extent(fs_info,
- bytenr, blocksize);
- if (ret) {
- free_extent_buffer(next);
- return ret;
+ unaccount_log_buffer(fs_info, bytenr);
}
}
free_extent_buffer(next);
@@ -2762,7 +2780,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
struct walk_control *wc)
{
struct btrfs_fs_info *fs_info = root->fs_info;
- u64 root_owner;
int i;
int slot;
int ret;
@@ -2775,13 +2792,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
WARN_ON(*level == 0);
return 0;
} else {
- struct extent_buffer *parent;
- if (path->nodes[*level] == root->node)
- parent = path->nodes[*level];
- else
- parent = path->nodes[*level + 1];
-
- root_owner = btrfs_header_owner(parent);
ret = wc->process_func(root, path->nodes[*level], wc,
btrfs_header_generation(path->nodes[*level]),
*level);
@@ -2799,17 +2809,18 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
btrfs_clean_tree_block(next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
+ ret = btrfs_pin_reserved_extent(trans,
+ path->nodes[*level]->start,
+ path->nodes[*level]->len);
+ if (ret)
+ return ret;
} else {
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
clear_extent_buffer_dirty(next);
- }
- WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
- ret = btrfs_pin_reserved_extent(fs_info,
- path->nodes[*level]->start,
- path->nodes[*level]->len);
- if (ret)
- return ret;
+ unaccount_log_buffer(fs_info,
+ path->nodes[*level]->start);
+ }
}
free_extent_buffer(path->nodes[*level]);
path->nodes[*level] = NULL;
@@ -2880,15 +2891,15 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
btrfs_clean_tree_block(next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
+ ret = btrfs_pin_reserved_extent(trans,
+ next->start, next->len);
+ if (ret)
+ goto out;
} else {
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
clear_extent_buffer_dirty(next);
+ unaccount_log_buffer(fs_info, next->start);
}
-
- ret = btrfs_pin_reserved_extent(fs_info, next->start,
- next->len);
- if (ret)
- goto out;
}
}
@@ -3105,29 +3116,17 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
btrfs_init_log_ctx(&root_log_ctx, NULL);
mutex_lock(&log_root_tree->log_mutex);
- atomic_inc(&log_root_tree->log_batch);
- atomic_inc(&log_root_tree->log_writers);
index2 = log_root_tree->log_transid % 2;
list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]);
root_log_ctx.log_transid = log_root_tree->log_transid;
- mutex_unlock(&log_root_tree->log_mutex);
-
- mutex_lock(&log_root_tree->log_mutex);
-
/*
* Now we are safe to update the log_root_tree because we're under the
* log_mutex, and we're a current writer so we're holding the commit
* open until we drop the log_mutex.
*/
ret = update_log_root(trans, log, &new_root_item);
-
- if (atomic_dec_and_test(&log_root_tree->log_writers)) {
- /* atomic_dec_and_test implies a barrier */
- cond_wake_up_nomb(&log_root_tree->log_writer_wait);
- }
-
if (ret) {
if (!list_empty(&root_log_ctx.list))
list_del_init(&root_log_ctx.list);
@@ -3173,8 +3172,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
root_log_ctx.log_transid - 1);
}
- wait_for_writer(log_root_tree);
-
/*
* now that we've moved on to the tree of log tree roots,
* check the full commit flag again
@@ -3283,8 +3280,8 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
clear_extent_bits(&log->dirty_log_pages, 0, (u64)-1,
EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT);
- free_extent_buffer(log->node);
- kfree(log);
+ extent_io_tree_release(&log->log_csum_range);
+ btrfs_put_root(log);
}
/*
@@ -3296,6 +3293,7 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
if (root->log_root) {
free_log_tree(trans, root->log_root);
root->log_root = NULL;
+ clear_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state);
}
return 0;
}
@@ -3451,11 +3449,13 @@ fail:
btrfs_free_path(path);
out_unlock:
mutex_unlock(&dir->log_mutex);
- if (ret == -ENOSPC) {
+ if (err == -ENOSPC) {
btrfs_set_log_full_commit(trans);
- ret = 0;
- } else if (ret < 0)
- btrfs_abort_transaction(trans, ret);
+ err = 0;
+ } else if (err < 0 && err != -ENOENT) {
+ /* ENOENT can be returned if the entry hasn't been fsynced yet */
+ btrfs_abort_transaction(trans, err);
+ }
btrfs_end_log_trans(root);
@@ -3801,8 +3801,7 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
found_key.offset = 0;
found_key.type = 0;
- ret = btrfs_bin_search(path->nodes[0], &found_key, 0,
- &start_slot);
+ ret = btrfs_bin_search(path->nodes[0], &found_key, &start_slot);
if (ret < 0)
break;
@@ -3838,44 +3837,41 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
* just to say 'this inode exists' and a logging
* to say 'update this inode with these values'
*/
- btrfs_set_token_inode_generation(leaf, item, 0, &token);
- btrfs_set_token_inode_size(leaf, item, logged_isize, &token);
+ btrfs_set_token_inode_generation(&token, item, 0);
+ btrfs_set_token_inode_size(&token, item, logged_isize);
} else {
- btrfs_set_token_inode_generation(leaf, item,
- BTRFS_I(inode)->generation,
- &token);
- btrfs_set_token_inode_size(leaf, item, inode->i_size, &token);
- }
-
- btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
- btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
- btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
- btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
-
- btrfs_set_token_timespec_sec(leaf, &item->atime,
- inode->i_atime.tv_sec, &token);
- btrfs_set_token_timespec_nsec(leaf, &item->atime,
- inode->i_atime.tv_nsec, &token);
-
- btrfs_set_token_timespec_sec(leaf, &item->mtime,
- inode->i_mtime.tv_sec, &token);
- btrfs_set_token_timespec_nsec(leaf, &item->mtime,
- inode->i_mtime.tv_nsec, &token);
-
- btrfs_set_token_timespec_sec(leaf, &item->ctime,
- inode->i_ctime.tv_sec, &token);
- btrfs_set_token_timespec_nsec(leaf, &item->ctime,
- inode->i_ctime.tv_nsec, &token);
-
- btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
- &token);
-
- btrfs_set_token_inode_sequence(leaf, item,
- inode_peek_iversion(inode), &token);
- btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
- btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
- btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
- btrfs_set_token_inode_block_group(leaf, item, 0, &token);
+ btrfs_set_token_inode_generation(&token, item,
+ BTRFS_I(inode)->generation);
+ btrfs_set_token_inode_size(&token, item, inode->i_size);
+ }
+
+ btrfs_set_token_inode_uid(&token, item, i_uid_read(inode));
+ btrfs_set_token_inode_gid(&token, item, i_gid_read(inode));
+ btrfs_set_token_inode_mode(&token, item, inode->i_mode);
+ btrfs_set_token_inode_nlink(&token, item, inode->i_nlink);
+
+ btrfs_set_token_timespec_sec(&token, &item->atime,
+ inode->i_atime.tv_sec);
+ btrfs_set_token_timespec_nsec(&token, &item->atime,
+ inode->i_atime.tv_nsec);
+
+ btrfs_set_token_timespec_sec(&token, &item->mtime,
+ inode->i_mtime.tv_sec);
+ btrfs_set_token_timespec_nsec(&token, &item->mtime,
+ inode->i_mtime.tv_nsec);
+
+ btrfs_set_token_timespec_sec(&token, &item->ctime,
+ inode->i_ctime.tv_sec);
+ btrfs_set_token_timespec_nsec(&token, &item->ctime,
+ inode->i_ctime.tv_nsec);
+
+ btrfs_set_token_inode_nbytes(&token, item, inode_get_bytes(inode));
+
+ btrfs_set_token_inode_sequence(&token, item, inode_peek_iversion(inode));
+ btrfs_set_token_inode_transid(&token, item, trans->transid);
+ btrfs_set_token_inode_rdev(&token, item, inode->i_rdev);
+ btrfs_set_token_inode_flags(&token, item, BTRFS_I(inode)->flags);
+ btrfs_set_token_inode_block_group(&token, item, 0);
}
static int log_inode_item(struct btrfs_trans_handle *trans,
@@ -3898,12 +3894,33 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
}
static int log_csums(struct btrfs_trans_handle *trans,
+ struct btrfs_inode *inode,
struct btrfs_root *log_root,
struct btrfs_ordered_sum *sums)
{
+ const u64 lock_end = sums->bytenr + sums->len - 1;
+ struct extent_state *cached_state = NULL;
int ret;
/*
+ * If this inode was not used for reflink operations in the current
+ * transaction with new extents, then do the fast path, no need to
+ * worry about logging checksum items with overlapping ranges.
+ */
+ if (inode->last_reflink_trans < trans->transid)
+ return btrfs_csum_file_blocks(trans, log_root, sums);
+
+ /*
+ * Serialize logging for checksums. This is to avoid racing with the
+ * same checksum being logged by another task that is logging another
+ * file which happens to refer to the same extent as well. Such races
+ * can leave checksum items in the log with overlapping ranges.
+ */
+ ret = lock_extent_bits(&log_root->log_csum_range, sums->bytenr,
+ lock_end, &cached_state);
+ if (ret)
+ return ret;
+ /*
* Due to extent cloning, we might have logged a csum item that covers a
* subrange of a cloned extent, and later we can end up logging a csum
* item for a larger subrange of the same extent or the entire range.
@@ -3913,10 +3930,13 @@ static int log_csums(struct btrfs_trans_handle *trans,
* trim and adjust) any existing csum items in the log for this range.
*/
ret = btrfs_del_csums(trans, log_root, sums->bytenr, sums->len);
- if (ret)
- return ret;
+ if (!ret)
+ ret = btrfs_csum_file_blocks(trans, log_root, sums);
- return btrfs_csum_file_blocks(trans, log_root, sums);
+ unlock_extent_cached(&log_root->log_csum_range, sums->bytenr, lock_end,
+ &cached_state);
+
+ return ret;
}
static noinline int copy_items(struct btrfs_trans_handle *trans,
@@ -4018,11 +4038,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
fs_info->csum_root,
ds + cs, ds + cs + cl - 1,
&ordered_sums, 0);
- if (ret) {
- btrfs_release_path(dst_path);
- kfree(ins_data);
- return ret;
- }
+ if (ret)
+ break;
}
}
}
@@ -4035,13 +4052,12 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
* we have to do this after the loop above to avoid changing the
* log tree while trying to change the log tree.
*/
- ret = 0;
while (!list_empty(&ordered_sums)) {
struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next,
struct btrfs_ordered_sum,
list);
if (!ret)
- ret = log_csums(trans, log, sums);
+ ret = log_csums(trans, inode, log, sums);
list_del(&sums->list);
kfree(sums);
}
@@ -4100,7 +4116,7 @@ static int log_extent_csums(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum,
list);
if (!ret)
- ret = log_csums(trans, log_root, sums);
+ ret = log_csums(trans, inode, log_root, sums);
list_del(&sums->list);
kfree(sums);
}
@@ -4128,7 +4144,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
if (ret)
return ret;
- ret = __btrfs_drop_extents(trans, log, &inode->vfs_inode, path, em->start,
+ ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
em->start + em->len, NULL, 0, 1,
sizeof(*fi), &extent_inserted);
if (ret)
@@ -4149,43 +4165,35 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
- btrfs_set_token_file_extent_generation(leaf, fi, trans->transid,
- &token);
+ btrfs_set_token_file_extent_generation(&token, fi, trans->transid);
if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
- btrfs_set_token_file_extent_type(leaf, fi,
- BTRFS_FILE_EXTENT_PREALLOC,
- &token);
+ btrfs_set_token_file_extent_type(&token, fi,
+ BTRFS_FILE_EXTENT_PREALLOC);
else
- btrfs_set_token_file_extent_type(leaf, fi,
- BTRFS_FILE_EXTENT_REG,
- &token);
+ btrfs_set_token_file_extent_type(&token, fi,
+ BTRFS_FILE_EXTENT_REG);
block_len = max(em->block_len, em->orig_block_len);
if (em->compress_type != BTRFS_COMPRESS_NONE) {
- btrfs_set_token_file_extent_disk_bytenr(leaf, fi,
- em->block_start,
- &token);
- btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len,
- &token);
+ btrfs_set_token_file_extent_disk_bytenr(&token, fi,
+ em->block_start);
+ btrfs_set_token_file_extent_disk_num_bytes(&token, fi, block_len);
} else if (em->block_start < EXTENT_MAP_LAST_BYTE) {
- btrfs_set_token_file_extent_disk_bytenr(leaf, fi,
+ btrfs_set_token_file_extent_disk_bytenr(&token, fi,
em->block_start -
- extent_offset, &token);
- btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len,
- &token);
+ extent_offset);
+ btrfs_set_token_file_extent_disk_num_bytes(&token, fi, block_len);
} else {
- btrfs_set_token_file_extent_disk_bytenr(leaf, fi, 0, &token);
- btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, 0,
- &token);
- }
-
- btrfs_set_token_file_extent_offset(leaf, fi, extent_offset, &token);
- btrfs_set_token_file_extent_num_bytes(leaf, fi, em->len, &token);
- btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->ram_bytes, &token);
- btrfs_set_token_file_extent_compression(leaf, fi, em->compress_type,
- &token);
- btrfs_set_token_file_extent_encryption(leaf, fi, 0, &token);
- btrfs_set_token_file_extent_other_encoding(leaf, fi, 0, &token);
+ btrfs_set_token_file_extent_disk_bytenr(&token, fi, 0);
+ btrfs_set_token_file_extent_disk_num_bytes(&token, fi, 0);
+ }
+
+ btrfs_set_token_file_extent_offset(&token, fi, extent_offset);
+ btrfs_set_token_file_extent_num_bytes(&token, fi, em->len);
+ btrfs_set_token_file_extent_ram_bytes(&token, fi, em->ram_bytes);
+ btrfs_set_token_file_extent_compression(&token, fi, em->compress_type);
+ btrfs_set_token_file_extent_encryption(&token, fi, 0);
+ btrfs_set_token_file_extent_other_encoding(&token, fi, 0);
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
@@ -4211,6 +4219,9 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
const u64 ino = btrfs_ino(inode);
struct btrfs_path *dst_path = NULL;
bool dropped_extents = false;
+ u64 truncate_offset = i_size;
+ struct extent_buffer *leaf;
+ int slot;
int ins_nr = 0;
int start_slot;
int ret;
@@ -4225,9 +4236,43 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
if (ret < 0)
goto out;
+ /*
+ * We must check if there is a prealloc extent that starts before the
+ * i_size and crosses the i_size boundary. This is to ensure later we
+ * truncate down to the end of that extent and not to the i_size, as
+ * otherwise we end up losing part of the prealloc extent after a log
+ * replay and with an implicit hole if there is another prealloc extent
+ * that starts at an offset beyond i_size.
+ */
+ ret = btrfs_previous_item(root, path, ino, BTRFS_EXTENT_DATA_KEY);
+ if (ret < 0)
+ goto out;
+
+ if (ret == 0) {
+ struct btrfs_file_extent_item *ei;
+
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+
+ if (btrfs_file_extent_type(leaf, ei) ==
+ BTRFS_FILE_EXTENT_PREALLOC) {
+ u64 extent_end;
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ extent_end = key.offset +
+ btrfs_file_extent_num_bytes(leaf, ei);
+
+ if (extent_end > i_size)
+ truncate_offset = extent_end;
+ }
+ } else {
+ ret = 0;
+ }
+
while (true) {
- struct extent_buffer *leaf = path->nodes[0];
- int slot = path->slots[0];
+ leaf = path->nodes[0];
+ slot = path->slots[0];
if (slot >= btrfs_header_nritems(leaf)) {
if (ins_nr > 0) {
@@ -4265,7 +4310,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
ret = btrfs_truncate_inode_items(trans,
root->log_root,
&inode->vfs_inode,
- i_size,
+ truncate_offset,
BTRFS_EXTENT_DATA_KEY);
} while (ret == -EAGAIN);
if (ret)
@@ -4284,12 +4329,9 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
}
}
}
- if (ins_nr > 0) {
+ if (ins_nr > 0)
ret = copy_items(trans, inode, dst_path, path,
start_slot, ins_nr, 1, 0);
- if (ret > 0)
- ret = 0;
- }
out:
btrfs_release_path(path);
btrfs_free_path(dst_path);
@@ -4539,9 +4581,7 @@ static int btrfs_log_holes(struct btrfs_trans_handle *trans,
return ret;
while (true) {
- struct btrfs_file_extent_item *extent;
struct extent_buffer *leaf = path->nodes[0];
- u64 len;
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
ret = btrfs_next_leaf(root, path);
@@ -4590,18 +4630,7 @@ static int btrfs_log_holes(struct btrfs_trans_handle *trans,
leaf = path->nodes[0];
}
- extent = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
- if (btrfs_file_extent_type(leaf, extent) ==
- BTRFS_FILE_EXTENT_INLINE) {
- len = btrfs_file_extent_ram_bytes(leaf, extent);
- prev_extent_end = ALIGN(key.offset + len,
- fs_info->sectorsize);
- } else {
- len = btrfs_file_extent_num_bytes(leaf, extent);
- prev_extent_end = key.offset + len;
- }
-
+ prev_extent_end = btrfs_file_extent_end(path);
path->slots[0]++;
cond_resched();
}
@@ -4796,10 +4825,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
- key.objectid = ino;
- key.type = BTRFS_INODE_ITEM_KEY;
- key.offset = 0;
- inode = btrfs_iget(fs_info->sb, &key, root);
+ inode = btrfs_iget(fs_info->sb, ino, root);
/*
* If the other inode that had a conflicting dir entry was
* deleted in the current transaction, we need to log its parent
@@ -4808,8 +4834,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
if (ret == -ENOENT) {
- key.objectid = parent;
- inode = btrfs_iget(fs_info->sb, &key, root);
+ inode = btrfs_iget(fs_info->sb, parent, root);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
} else {
@@ -4938,6 +4963,138 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
return ret;
}
+static int copy_inode_items_to_log(struct btrfs_trans_handle *trans,
+ struct btrfs_inode *inode,
+ struct btrfs_key *min_key,
+ const struct btrfs_key *max_key,
+ struct btrfs_path *path,
+ struct btrfs_path *dst_path,
+ const u64 logged_isize,
+ const bool recursive_logging,
+ const int inode_only,
+ struct btrfs_log_ctx *ctx,
+ bool *need_log_inode_item)
+{
+ struct btrfs_root *root = inode->root;
+ int ins_start_slot = 0;
+ int ins_nr = 0;
+ int ret;
+
+ while (1) {
+ ret = btrfs_search_forward(root, min_key, path, trans->transid);
+ if (ret < 0)
+ return ret;
+ if (ret > 0) {
+ ret = 0;
+ break;
+ }
+again:
+ /* Note, ins_nr might be > 0 here, cleanup outside the loop */
+ if (min_key->objectid != max_key->objectid)
+ break;
+ if (min_key->type > max_key->type)
+ break;
+
+ if (min_key->type == BTRFS_INODE_ITEM_KEY)
+ *need_log_inode_item = false;
+
+ if ((min_key->type == BTRFS_INODE_REF_KEY ||
+ min_key->type == BTRFS_INODE_EXTREF_KEY) &&
+ inode->generation == trans->transid &&
+ !recursive_logging) {
+ u64 other_ino = 0;
+ u64 other_parent = 0;
+
+ ret = btrfs_check_ref_name_override(path->nodes[0],
+ path->slots[0], min_key, inode,
+ &other_ino, &other_parent);
+ if (ret < 0) {
+ return ret;
+ } else if (ret > 0 && ctx &&
+ other_ino != btrfs_ino(BTRFS_I(ctx->inode))) {
+ if (ins_nr > 0) {
+ ins_nr++;
+ } else {
+ ins_nr = 1;
+ ins_start_slot = path->slots[0];
+ }
+ ret = copy_items(trans, inode, dst_path, path,
+ ins_start_slot, ins_nr,
+ inode_only, logged_isize);
+ if (ret < 0)
+ return ret;
+ ins_nr = 0;
+
+ ret = log_conflicting_inodes(trans, root, path,
+ ctx, other_ino, other_parent);
+ if (ret)
+ return ret;
+ btrfs_release_path(path);
+ goto next_key;
+ }
+ }
+
+ /* Skip xattrs, we log them later with btrfs_log_all_xattrs() */
+ if (min_key->type == BTRFS_XATTR_ITEM_KEY) {
+ if (ins_nr == 0)
+ goto next_slot;
+ ret = copy_items(trans, inode, dst_path, path,
+ ins_start_slot,
+ ins_nr, inode_only, logged_isize);
+ if (ret < 0)
+ return ret;
+ ins_nr = 0;
+ goto next_slot;
+ }
+
+ if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
+ ins_nr++;
+ goto next_slot;
+ } else if (!ins_nr) {
+ ins_start_slot = path->slots[0];
+ ins_nr = 1;
+ goto next_slot;
+ }
+
+ ret = copy_items(trans, inode, dst_path, path, ins_start_slot,
+ ins_nr, inode_only, logged_isize);
+ if (ret < 0)
+ return ret;
+ ins_nr = 1;
+ ins_start_slot = path->slots[0];
+next_slot:
+ path->slots[0]++;
+ if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) {
+ btrfs_item_key_to_cpu(path->nodes[0], min_key,
+ path->slots[0]);
+ goto again;
+ }
+ if (ins_nr) {
+ ret = copy_items(trans, inode, dst_path, path,
+ ins_start_slot, ins_nr, inode_only,
+ logged_isize);
+ if (ret < 0)
+ return ret;
+ ins_nr = 0;
+ }
+ btrfs_release_path(path);
+next_key:
+ if (min_key->offset < (u64)-1) {
+ min_key->offset++;
+ } else if (min_key->type < max_key->type) {
+ min_key->type++;
+ min_key->offset = 0;
+ } else {
+ break;
+ }
+ }
+ if (ins_nr)
+ ret = copy_items(trans, inode, dst_path, path, ins_start_slot,
+ ins_nr, inode_only, logged_isize);
+
+ return ret;
+}
+
/* log a single inode in the tree log.
* At least one parent directory for this inode must exist in the tree
* or be logged already.
@@ -4959,17 +5116,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
const loff_t end,
struct btrfs_log_ctx *ctx)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_path *path;
struct btrfs_path *dst_path;
struct btrfs_key min_key;
struct btrfs_key max_key;
struct btrfs_root *log = root->log_root;
int err = 0;
- int ret;
- int nritems;
- int ins_start_slot = 0;
- int ins_nr;
+ int ret = 0;
bool fast_search = false;
u64 ino = btrfs_ino(inode);
struct extent_map_tree *em_tree = &inode->extent_tree;
@@ -5005,15 +5158,19 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
max_key.offset = (u64)-1;
/*
- * Only run delayed items if we are a dir or a new file.
- * Otherwise commit the delayed inode only, which is needed in
- * order for the log replay code to mark inodes for link count
- * fixup (create temporary BTRFS_TREE_LOG_FIXUP_OBJECTID items).
+ * Only run delayed items if we are a directory. We want to make sure
+ * all directory indexes hit the fs/subvolume tree so we can find them
+ * and figure out which index ranges have to be logged.
+ *
+ * Otherwise commit the delayed inode only if the full sync flag is set,
+ * as we want to make sure an up to date version is in the subvolume
+ * tree so copy_inode_items_to_log() / copy_items() can find it and copy
+ * it to the log tree. For a non full sync, we always log the inode item
+ * based on the in-memory struct btrfs_inode which is always up to date.
*/
- if (S_ISDIR(inode->vfs_inode.i_mode) ||
- inode->generation > fs_info->last_trans_committed)
+ if (S_ISDIR(inode->vfs_inode.i_mode))
ret = btrfs_commit_inode_delayed_items(trans, inode);
- else
+ else if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags))
ret = btrfs_commit_inode_delayed_inode(inode);
if (ret) {
@@ -5100,139 +5257,12 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
goto out_unlock;
}
- while (1) {
- ins_nr = 0;
- ret = btrfs_search_forward(root, &min_key,
- path, trans->transid);
- if (ret < 0) {
- err = ret;
- goto out_unlock;
- }
- if (ret != 0)
- break;
-again:
- /* note, ins_nr might be > 0 here, cleanup outside the loop */
- if (min_key.objectid != ino)
- break;
- if (min_key.type > max_key.type)
- break;
-
- if (min_key.type == BTRFS_INODE_ITEM_KEY)
- need_log_inode_item = false;
-
- if ((min_key.type == BTRFS_INODE_REF_KEY ||
- min_key.type == BTRFS_INODE_EXTREF_KEY) &&
- inode->generation == trans->transid &&
- !recursive_logging) {
- u64 other_ino = 0;
- u64 other_parent = 0;
-
- ret = btrfs_check_ref_name_override(path->nodes[0],
- path->slots[0], &min_key, inode,
- &other_ino, &other_parent);
- if (ret < 0) {
- err = ret;
- goto out_unlock;
- } else if (ret > 0 && ctx &&
- other_ino != btrfs_ino(BTRFS_I(ctx->inode))) {
- if (ins_nr > 0) {
- ins_nr++;
- } else {
- ins_nr = 1;
- ins_start_slot = path->slots[0];
- }
- ret = copy_items(trans, inode, dst_path, path,
- ins_start_slot,
- ins_nr, inode_only,
- logged_isize);
- if (ret < 0) {
- err = ret;
- goto out_unlock;
- }
- ins_nr = 0;
-
- err = log_conflicting_inodes(trans, root, path,
- ctx, other_ino, other_parent);
- if (err)
- goto out_unlock;
- btrfs_release_path(path);
- goto next_key;
- }
- }
-
- /* Skip xattrs, we log them later with btrfs_log_all_xattrs() */
- if (min_key.type == BTRFS_XATTR_ITEM_KEY) {
- if (ins_nr == 0)
- goto next_slot;
- ret = copy_items(trans, inode, dst_path, path,
- ins_start_slot,
- ins_nr, inode_only, logged_isize);
- if (ret < 0) {
- err = ret;
- goto out_unlock;
- }
- ins_nr = 0;
- goto next_slot;
- }
-
- if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
- ins_nr++;
- goto next_slot;
- } else if (!ins_nr) {
- ins_start_slot = path->slots[0];
- ins_nr = 1;
- goto next_slot;
- }
-
- ret = copy_items(trans, inode, dst_path, path,
- ins_start_slot, ins_nr, inode_only,
- logged_isize);
- if (ret < 0) {
- err = ret;
- goto out_unlock;
- }
- ins_nr = 1;
- ins_start_slot = path->slots[0];
-next_slot:
-
- nritems = btrfs_header_nritems(path->nodes[0]);
- path->slots[0]++;
- if (path->slots[0] < nritems) {
- btrfs_item_key_to_cpu(path->nodes[0], &min_key,
- path->slots[0]);
- goto again;
- }
- if (ins_nr) {
- ret = copy_items(trans, inode, dst_path, path,
- ins_start_slot,
- ins_nr, inode_only, logged_isize);
- if (ret < 0) {
- err = ret;
- goto out_unlock;
- }
- ins_nr = 0;
- }
- btrfs_release_path(path);
-next_key:
- if (min_key.offset < (u64)-1) {
- min_key.offset++;
- } else if (min_key.type < max_key.type) {
- min_key.type++;
- min_key.offset = 0;
- } else {
- break;
- }
- }
- if (ins_nr) {
- ret = copy_items(trans, inode, dst_path, path,
- ins_start_slot, ins_nr, inode_only,
- logged_isize);
- if (ret < 0) {
- err = ret;
- goto out_unlock;
- }
- ins_nr = 0;
- }
+ err = copy_inode_items_to_log(trans, inode, &min_key, &max_key,
+ path, dst_path, logged_isize,
+ recursive_logging, inode_only, ctx,
+ &need_log_inode_item);
+ if (err)
+ goto out_unlock;
btrfs_release_path(path);
btrfs_release_path(dst_path);
@@ -5546,7 +5576,7 @@ process_leaf:
continue;
btrfs_release_path(path);
- di_inode = btrfs_iget(fs_info->sb, &di_key, root);
+ di_inode = btrfs_iget(fs_info->sb, di_key.objectid, root);
if (IS_ERR(di_inode)) {
ret = PTR_ERR(di_inode);
goto next_dir_inode;
@@ -5672,7 +5702,8 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
cur_offset = item_size;
}
- dir_inode = btrfs_iget(fs_info->sb, &inode_key, root);
+ dir_inode = btrfs_iget(fs_info->sb, inode_key.objectid,
+ root);
/*
* If the parent inode was deleted, return an error to
* fallback to a transaction commit. This is to prevent
@@ -5739,14 +5770,17 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans,
int slot = path->slots[0];
struct btrfs_key search_key;
struct inode *inode;
+ u64 ino;
int ret = 0;
btrfs_release_path(path);
+ ino = found_key.offset;
+
search_key.objectid = found_key.offset;
search_key.type = BTRFS_INODE_ITEM_KEY;
search_key.offset = 0;
- inode = btrfs_iget(fs_info->sb, &search_key, root);
+ inode = btrfs_iget(fs_info->sb, ino, root);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -6091,7 +6125,6 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
struct btrfs_trans_handle *trans;
struct btrfs_key key;
struct btrfs_key found_key;
- struct btrfs_key tmp_key;
struct btrfs_root *log;
struct btrfs_fs_info *fs_info = log_root_tree->fs_info;
struct walk_control wc = {
@@ -6145,7 +6178,7 @@ again:
if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID)
break;
- log = btrfs_read_fs_root(log_root_tree, &found_key);
+ log = btrfs_read_tree_root(log_root_tree, &found_key);
if (IS_ERR(log)) {
ret = PTR_ERR(log);
btrfs_handle_fs_error(fs_info, ret,
@@ -6153,11 +6186,8 @@ again:
goto error;
}
- tmp_key.objectid = found_key.offset;
- tmp_key.type = BTRFS_ROOT_ITEM_KEY;
- tmp_key.offset = (u64)-1;
-
- wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
+ wc.replay_dest = btrfs_get_fs_root(fs_info, found_key.offset,
+ true);
if (IS_ERR(wc.replay_dest)) {
ret = PTR_ERR(wc.replay_dest);
@@ -6173,12 +6203,10 @@ again:
* each subsequent pass.
*/
if (ret == -ENOENT)
- ret = btrfs_pin_extent_for_log_replay(fs_info,
+ ret = btrfs_pin_extent_for_log_replay(trans,
log->node->start,
log->node->len);
- free_extent_buffer(log->node);
- free_extent_buffer(log->commit_root);
- kfree(log);
+ btrfs_put_root(log);
if (!ret)
goto next;
@@ -6214,9 +6242,8 @@ again:
}
wc.replay_dest->log_root = NULL;
- free_extent_buffer(log->node);
- free_extent_buffer(log->commit_root);
- kfree(log);
+ btrfs_put_root(wc.replay_dest);
+ btrfs_put_root(log);
if (ret)
goto error;
@@ -6247,10 +6274,9 @@ next:
if (ret)
return ret;
- free_extent_buffer(log_root_tree->node);
log_root_tree->log_root = NULL;
clear_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags);
- kfree(log_root_tree);
+ btrfs_put_root(log_root_tree);
return 0;
error: