summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c815
1 files changed, 420 insertions, 395 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7e8d8169779d..8e23780acfae 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -45,7 +45,6 @@
#include "compression.h"
#include "locking.h"
#include "free-space-cache.h"
-#include "inode-map.h"
#include "props.h"
#include "qgroup.h"
#include "delalloc-space.h"
@@ -62,7 +61,6 @@ struct btrfs_dio_data {
loff_t length;
ssize_t submitted;
struct extent_changeset *data_reserved;
- bool sync;
};
static const struct inode_operations btrfs_dir_inode_operations;
@@ -96,6 +94,51 @@ static void __endio_write_update_ordered(struct btrfs_inode *inode,
const bool uptodate);
/*
+ * btrfs_inode_lock - lock inode i_rwsem based on arguments passed
+ *
+ * ilock_flags can have the following bit set:
+ *
+ * BTRFS_ILOCK_SHARED - acquire a shared lock on the inode
+ * BTRFS_ILOCK_TRY - try to acquire the lock, if fails on first attempt
+ * return -EAGAIN
+ */
+int btrfs_inode_lock(struct inode *inode, unsigned int ilock_flags)
+{
+ if (ilock_flags & BTRFS_ILOCK_SHARED) {
+ if (ilock_flags & BTRFS_ILOCK_TRY) {
+ if (!inode_trylock_shared(inode))
+ return -EAGAIN;
+ else
+ return 0;
+ }
+ inode_lock_shared(inode);
+ } else {
+ if (ilock_flags & BTRFS_ILOCK_TRY) {
+ if (!inode_trylock(inode))
+ return -EAGAIN;
+ else
+ return 0;
+ }
+ inode_lock(inode);
+ }
+ return 0;
+}
+
+/*
+ * btrfs_inode_unlock - unock inode i_rwsem
+ *
+ * ilock_flags should contain the same bits set as passed to btrfs_inode_lock()
+ * to decide whether the lock acquired is shared or exclusive.
+ */
+void btrfs_inode_unlock(struct inode *inode, unsigned int ilock_flags)
+{
+ if (ilock_flags & BTRFS_ILOCK_SHARED)
+ inode_unlock_shared(inode);
+ else
+ inode_unlock(inode);
+}
+
+/*
* Cleanup all submitted ordered extents in specified range to handle errors
* from the btrfs_run_delalloc_range() callback.
*
@@ -158,7 +201,7 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
* no overlapping inline items exist in the btree
*/
static int insert_inline_extent(struct btrfs_trans_handle *trans,
- struct btrfs_path *path, int extent_inserted,
+ struct btrfs_path *path, bool extent_inserted,
struct btrfs_root *root, struct inode *inode,
u64 start, size_t size, size_t compressed_size,
int compress_type,
@@ -179,8 +222,6 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
if (compressed_size && compressed_pages)
cur_size = compressed_size;
- inode_add_bytes(inode, size);
-
if (!extent_inserted) {
struct btrfs_key key;
size_t datasize;
@@ -190,7 +231,6 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
key.type = BTRFS_EXTENT_DATA_KEY;
datasize = btrfs_file_extent_calc_inline_size(cur_size);
- path->leave_spinning = 1;
ret = btrfs_insert_empty_item(trans, root, path, &key,
datasize);
if (ret)
@@ -256,8 +296,6 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
* could end up racing with unlink.
*/
BTRFS_I(inode)->disk_i_size = inode->i_size;
- ret = btrfs_update_inode(trans, root, inode);
-
fail:
return ret;
}
@@ -273,6 +311,7 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
int compress_type,
struct page **compressed_pages)
{
+ struct btrfs_drop_extents_args drop_args = { 0 };
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_trans_handle *trans;
@@ -283,8 +322,6 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
u64 data_len = inline_len;
int ret;
struct btrfs_path *path;
- int extent_inserted = 0;
- u32 extent_item_size;
if (compressed_size)
data_len = compressed_size;
@@ -310,16 +347,20 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
}
trans->block_rsv = &inode->block_rsv;
+ drop_args.path = path;
+ drop_args.start = start;
+ drop_args.end = aligned_end;
+ drop_args.drop_cache = true;
+ drop_args.replace_extent = true;
+
if (compressed_size && compressed_pages)
- extent_item_size = btrfs_file_extent_calc_inline_size(
+ drop_args.extent_item_size = btrfs_file_extent_calc_inline_size(
compressed_size);
else
- extent_item_size = btrfs_file_extent_calc_inline_size(
+ drop_args.extent_item_size = btrfs_file_extent_calc_inline_size(
inline_len);
- ret = __btrfs_drop_extents(trans, root, inode, path, start, aligned_end,
- NULL, 1, 1, extent_item_size,
- &extent_inserted);
+ ret = btrfs_drop_extents(trans, root, inode, &drop_args);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -327,7 +368,7 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
if (isize > actual_end)
inline_len = min_t(u64, isize, actual_end);
- ret = insert_inline_extent(trans, path, extent_inserted,
+ ret = insert_inline_extent(trans, path, drop_args.extent_inserted,
root, &inode->vfs_inode, start,
inline_len, compressed_size,
compress_type, compressed_pages);
@@ -339,8 +380,17 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
goto out;
}
+ btrfs_update_inode_bytes(inode, inline_len, drop_args.bytes_found);
+ ret = btrfs_update_inode(trans, root, inode);
+ if (ret && ret != -ENOSPC) {
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ } else if (ret == -ENOSPC) {
+ ret = 1;
+ goto out;
+ }
+
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
- btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
out:
/*
* Don't forget to free the reserved space, as for inlined extent
@@ -1598,6 +1648,15 @@ next_slot:
goto out_check;
if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
goto out_check;
+
+ /*
+ * The following checks can be expensive, as they need to
+ * take other locks and do btree or rbtree searches, so
+ * release the path to avoid blocking other tasks for too
+ * long.
+ */
+ btrfs_release_path(path);
+
/* If extent is RO, we must COW it */
if (btrfs_extent_readonly(fs_info, disk_bytenr))
goto out_check;
@@ -1673,12 +1732,12 @@ out_check:
cur_offset = extent_end;
if (cur_offset > end)
break;
+ if (!path->nodes[0])
+ continue;
path->slots[0]++;
goto next_slot;
}
- btrfs_release_path(path);
-
/*
* COW range from cow_start to found_key.offset - 1. As the key
* will contain the beginning of the first extent that can be
@@ -2098,6 +2157,8 @@ void btrfs_clear_delalloc_extent(struct inode *vfs_inode,
spin_lock(&inode->lock);
ASSERT(inode->new_delalloc_bytes >= len);
inode->new_delalloc_bytes -= len;
+ if (*bits & EXTENT_ADD_INODE_BYTES)
+ inode_add_bytes(&inode->vfs_inode, len);
spin_unlock(&inode->lock);
}
}
@@ -2121,7 +2182,7 @@ int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
{
struct inode *inode = page->mapping->host;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- u64 logical = (u64)bio->bi_iter.bi_sector << 9;
+ u64 logical = bio->bi_iter.bi_sector << 9;
u64 length = 0;
u64 map_length;
int ret;
@@ -2150,11 +2211,9 @@ int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
* At IO completion time the cums attached on the ordered extent record
* are inserted into the btree
*/
-static blk_status_t btrfs_submit_bio_start(void *private_data, struct bio *bio,
- u64 bio_offset)
+static blk_status_t btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
+ u64 dio_file_offset)
{
- struct inode *inode = private_data;
-
return btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
}
@@ -2187,7 +2246,8 @@ blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
int skip_sum;
int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
- skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
+ skip_sum = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
+ !fs_info->csum_root;
if (btrfs_is_free_space_inode(BTRFS_I(inode)))
metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
@@ -2202,8 +2262,13 @@ blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
mirror_num,
bio_flags);
goto out;
- } else if (!skip_sum) {
- ret = btrfs_lookup_bio_sums(inode, bio, (u64)-1, NULL);
+ } else {
+ /*
+ * Lookup bio sums does extra checks around whether we
+ * need to csum or not, which is why we ignore skip_sum
+ * here.
+ */
+ ret = btrfs_lookup_bio_sums(inode, bio, NULL);
if (ret)
goto out;
}
@@ -2213,8 +2278,8 @@ blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
goto mapit;
/* we're doing a write, do the async checksumming */
- ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags,
- 0, inode, btrfs_submit_bio_start);
+ ret = btrfs_wq_submit_bio(inode, bio, mirror_num, bio_flags,
+ 0, btrfs_submit_bio_start);
goto out;
} else if (!skip_sum) {
ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
@@ -2282,8 +2347,8 @@ static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
ret = set_extent_bit(&inode->io_tree, search_start,
search_start + em_len - 1,
- EXTENT_DELALLOC_NEW,
- NULL, cached_state, GFP_NOFS);
+ EXTENT_DELALLOC_NEW, 0, NULL, cached_state,
+ GFP_NOFS, NULL);
next:
search_start = extent_map_end(em);
free_extent_map(em);
@@ -2511,9 +2576,11 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end)
static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode, u64 file_pos,
struct btrfs_file_extent_item *stack_fi,
+ const bool update_inode_bytes,
u64 qgroup_reserved)
{
struct btrfs_root *root = inode->root;
+ const u64 sectorsize = root->fs_info->sectorsize;
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_key ins;
@@ -2521,7 +2588,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
u64 disk_bytenr = btrfs_stack_file_extent_disk_bytenr(stack_fi);
u64 num_bytes = btrfs_stack_file_extent_num_bytes(stack_fi);
u64 ram_bytes = btrfs_stack_file_extent_ram_bytes(stack_fi);
- int extent_inserted = 0;
+ struct btrfs_drop_extents_args drop_args = { 0 };
int ret;
path = btrfs_alloc_path();
@@ -2537,18 +2604,20 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
* the caller is expected to unpin it and allow it to be merged
* with the others.
*/
- ret = __btrfs_drop_extents(trans, root, inode, path, file_pos,
- file_pos + num_bytes, NULL, 0,
- 1, sizeof(*stack_fi), &extent_inserted);
+ drop_args.path = path;
+ drop_args.start = file_pos;
+ drop_args.end = file_pos + num_bytes;
+ drop_args.replace_extent = true;
+ drop_args.extent_item_size = sizeof(*stack_fi);
+ ret = btrfs_drop_extents(trans, root, inode, &drop_args);
if (ret)
goto out;
- if (!extent_inserted) {
+ if (!drop_args.extent_inserted) {
ins.objectid = btrfs_ino(inode);
ins.offset = file_pos;
ins.type = BTRFS_EXTENT_DATA_KEY;
- path->leave_spinning = 1;
ret = btrfs_insert_empty_item(trans, root, path, &ins,
sizeof(*stack_fi));
if (ret)
@@ -2563,7 +2632,24 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
- inode_add_bytes(&inode->vfs_inode, num_bytes);
+ /*
+ * If we dropped an inline extent here, we know the range where it is
+ * was not marked with the EXTENT_DELALLOC_NEW bit, so we update the
+ * number of bytes only for that range contaning the inline extent.
+ * The remaining of the range will be processed when clearning the
+ * EXTENT_DELALLOC_BIT bit through the ordered extent completion.
+ */
+ if (file_pos == 0 && !IS_ALIGNED(drop_args.bytes_found, sectorsize)) {
+ u64 inline_size = round_down(drop_args.bytes_found, sectorsize);
+
+ inline_size = drop_args.bytes_found - inline_size;
+ btrfs_update_inode_bytes(inode, sectorsize, inline_size);
+ drop_args.bytes_found -= inline_size;
+ num_bytes -= sectorsize;
+ }
+
+ if (update_inode_bytes)
+ btrfs_update_inode_bytes(inode, num_bytes, drop_args.bytes_found);
ins.objectid = disk_bytenr;
ins.offset = disk_num_bytes;
@@ -2601,6 +2687,7 @@ static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans,
{
struct btrfs_file_extent_item stack_fi;
u64 logical_len;
+ bool update_inode_bytes;
memset(&stack_fi, 0, sizeof(stack_fi));
btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_REG);
@@ -2616,9 +2703,18 @@ static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans,
btrfs_set_stack_file_extent_compression(&stack_fi, oe->compress_type);
/* Encryption and other encoding is reserved and all 0 */
+ /*
+ * For delalloc, when completing an ordered extent we update the inode's
+ * bytes when clearing the range in the inode's io tree, so pass false
+ * as the argument 'update_inode_bytes' to insert_reserved_file_extent(),
+ * except if the ordered extent was truncated.
+ */
+ update_inode_bytes = test_bit(BTRFS_ORDERED_DIRECT, &oe->flags) ||
+ test_bit(BTRFS_ORDERED_TRUNCATED, &oe->flags);
+
return insert_reserved_file_extent(trans, BTRFS_I(oe->inode),
oe->file_offset, &stack_fi,
- oe->qgroup_rsv);
+ update_inode_bytes, oe->qgroup_rsv);
}
/*
@@ -2628,11 +2724,11 @@ static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans,
*/
static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
{
- struct inode *inode = ordered_extent->inode;
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_inode *inode = BTRFS_I(ordered_extent->inode);
+ struct btrfs_root *root = inode->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_trans_handle *trans = NULL;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ struct extent_io_tree *io_tree = &inode->io_tree;
struct extent_state *cached_state = NULL;
u64 start, end;
int compress_type = 0;
@@ -2640,10 +2736,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
u64 logical_len = ordered_extent->num_bytes;
bool freespace_inode;
bool truncated = false;
- bool range_locked = false;
- bool clear_new_delalloc_bytes = false;
bool clear_reserved_extent = true;
- unsigned int clear_bits;
+ unsigned int clear_bits = EXTENT_DEFRAG;
start = ordered_extent->file_offset;
end = start + ordered_extent->num_bytes - 1;
@@ -2651,16 +2745,16 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
!test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) &&
!test_bit(BTRFS_ORDERED_DIRECT, &ordered_extent->flags))
- clear_new_delalloc_bytes = true;
+ clear_bits |= EXTENT_DELALLOC_NEW;
- freespace_inode = btrfs_is_free_space_inode(BTRFS_I(inode));
+ freespace_inode = btrfs_is_free_space_inode(inode);
if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
ret = -EIO;
goto out;
}
- btrfs_free_io_failure_record(BTRFS_I(inode), start, end);
+ btrfs_free_io_failure_record(inode, start, end);
if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
truncated = true;
@@ -2683,14 +2777,14 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
trans = NULL;
goto out;
}
- trans->block_rsv = &BTRFS_I(inode)->block_rsv;
+ trans->block_rsv = &inode->block_rsv;
ret = btrfs_update_inode_fallback(trans, root, inode);
if (ret) /* -ENOMEM or corruption */
btrfs_abort_transaction(trans, ret);
goto out;
}
- range_locked = true;
+ clear_bits |= EXTENT_LOCKED;
lock_extent_bits(io_tree, start, end, &cached_state);
if (freespace_inode)
@@ -2703,13 +2797,13 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
goto out;
}
- trans->block_rsv = &BTRFS_I(inode)->block_rsv;
+ trans->block_rsv = &inode->block_rsv;
if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
compress_type = ordered_extent->compress_type;
if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
BUG_ON(compress_type);
- ret = btrfs_mark_extent_written(trans, BTRFS_I(inode),
+ ret = btrfs_mark_extent_written(trans, inode,
ordered_extent->file_offset,
ordered_extent->file_offset +
logical_len);
@@ -2723,8 +2817,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
ordered_extent->disk_num_bytes);
}
}
- unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
- ordered_extent->file_offset,
+ unpin_extent_cache(&inode->extent_tree, ordered_extent->file_offset,
ordered_extent->num_bytes, trans->transid);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
@@ -2737,6 +2830,17 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
goto out;
}
+ /*
+ * If this is a new delalloc range, clear its new delalloc flag to
+ * update the inode's number of bytes. This needs to be done first
+ * before updating the inode item.
+ */
+ if ((clear_bits & EXTENT_DELALLOC_NEW) &&
+ !test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags))
+ clear_extent_bit(&inode->io_tree, start, end,
+ EXTENT_DELALLOC_NEW | EXTENT_ADD_INODE_BYTES,
+ 0, 0, &cached_state);
+
btrfs_inode_safe_disk_i_size_write(inode, 0);
ret = btrfs_update_inode_fallback(trans, root, inode);
if (ret) { /* -ENOMEM or corruption */
@@ -2745,12 +2849,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
}
ret = 0;
out:
- clear_bits = EXTENT_DEFRAG;
- if (range_locked)
- clear_bits |= EXTENT_LOCKED;
- if (clear_new_delalloc_bytes)
- clear_bits |= EXTENT_DELALLOC_NEW;
- clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits,
+ clear_extent_bit(&inode->io_tree, start, end, clear_bits,
(clear_bits & EXTENT_LOCKED) ? 1 : 0, 0,
&cached_state);
@@ -2765,7 +2864,7 @@ out:
clear_extent_uptodate(io_tree, unwritten_start, end, NULL);
/* Drop the cache for the part of the extent we didn't write. */
- btrfs_drop_extent_cache(BTRFS_I(inode), unwritten_start, end, 0);
+ btrfs_drop_extent_cache(inode, unwritten_start, end, 0);
/*
* If the ordered extent had an IOERR or something else went
@@ -2800,7 +2899,7 @@ out:
* This needs to be done to make sure anybody waiting knows we are done
* updating everything for this ordered extent.
*/
- btrfs_remove_ordered_extent(BTRFS_I(inode), ordered_extent);
+ btrfs_remove_ordered_extent(inode, ordered_extent);
/* once for us */
btrfs_put_ordered_extent(ordered_extent);
@@ -2841,18 +2940,32 @@ void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
btrfs_queue_work(wq, &ordered_extent->work);
}
+/*
+ * check_data_csum - verify checksum of one sector of uncompressed data
+ * @inode: inode
+ * @io_bio: btrfs_io_bio which contains the csum
+ * @bio_offset: offset to the beginning of the bio (in bytes)
+ * @page: page where is the data to be verified
+ * @pgoff: offset inside the page
+ *
+ * The length of such check is always one sector size.
+ */
static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio,
- int icsum, struct page *page, int pgoff, u64 start,
- size_t len)
+ u32 bio_offset, struct page *page, u32 pgoff)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
char *kaddr;
- u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
+ u32 len = fs_info->sectorsize;
+ const u32 csum_size = fs_info->csum_size;
+ unsigned int offset_sectors;
u8 *csum_expected;
u8 csum[BTRFS_CSUM_SIZE];
- csum_expected = ((u8 *)io_bio->csum) + icsum * csum_size;
+ ASSERT(pgoff + len <= PAGE_SIZE);
+
+ offset_sectors = bio_offset >> fs_info->sectorsize_bits;
+ csum_expected = ((u8 *)io_bio->csum) + offset_sectors * csum_size;
kaddr = kmap_atomic(page);
shash->tfm = fs_info->csum_shash;
@@ -2865,8 +2978,8 @@ static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio,
kunmap_atomic(kaddr);
return 0;
zeroit:
- btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected,
- io_bio->mirror_num);
+ btrfs_print_data_csum_error(BTRFS_I(inode), page_offset(page) + pgoff,
+ csum, csum_expected, io_bio->mirror_num);
if (io_bio->device)
btrfs_dev_stat_inc_and_print(io_bio->device,
BTRFS_DEV_STAT_CORRUPTION_ERRS);
@@ -2877,17 +2990,23 @@ zeroit:
}
/*
- * when reads are done, we need to check csums to verify the data is correct
+ * When reads are done, we need to check csums to verify the data is correct.
* if there's a match, we allow the bio to finish. If not, the code in
* extent_io.c will try to find good copies for us.
+ *
+ * @bio_offset: offset to the beginning of the bio (in bytes)
+ * @start: file offset of the range start
+ * @end: file offset of the range end (inclusive)
+ * @mirror: mirror number
*/
-int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u64 phy_offset,
+int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset,
struct page *page, u64 start, u64 end, int mirror)
{
- size_t offset = start - page_offset(page);
struct inode *inode = page->mapping->host;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_root *root = BTRFS_I(inode)->root;
+ const u32 sectorsize = root->fs_info->sectorsize;
+ u32 pg_off;
if (PageChecked(page)) {
ClearPageChecked(page);
@@ -2897,15 +3016,27 @@ int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u64 phy_offset,
if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
return 0;
+ if (!root->fs_info->csum_root)
+ return 0;
+
if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM);
return 0;
}
- phy_offset >>= inode->i_sb->s_blocksize_bits;
- return check_data_csum(inode, io_bio, phy_offset, page, offset, start,
- (size_t)(end - start + 1));
+ ASSERT(page_offset(page) <= start &&
+ end <= page_offset(page) + PAGE_SIZE - 1);
+ for (pg_off = offset_in_page(start);
+ pg_off < offset_in_page(end);
+ pg_off += sectorsize, bio_offset += sectorsize) {
+ int ret;
+
+ ret = check_data_csum(inode, io_bio, bio_offset, page, pg_off);
+ if (ret < 0)
+ return -EIO;
+ }
+ return 0;
}
/*
@@ -3515,7 +3646,8 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
* copy everything in the in-memory inode into the btree.
*/
static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode)
+ struct btrfs_root *root,
+ struct btrfs_inode *inode)
{
struct btrfs_inode_item *inode_item;
struct btrfs_path *path;
@@ -3526,9 +3658,7 @@ static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
- path->leave_spinning = 1;
- ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location,
- 1);
+ ret = btrfs_lookup_inode(trans, root, path, &inode->location, 1);
if (ret) {
if (ret > 0)
ret = -ENOENT;
@@ -3539,9 +3669,9 @@ static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
inode_item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_inode_item);
- fill_inode_item(trans, leaf, inode_item, inode);
+ fill_inode_item(trans, leaf, inode_item, &inode->vfs_inode);
btrfs_mark_buffer_dirty(leaf);
- btrfs_set_inode_last_trans(trans, BTRFS_I(inode));
+ btrfs_set_inode_last_trans(trans, inode);
ret = 0;
failed:
btrfs_free_path(path);
@@ -3552,7 +3682,8 @@ failed:
* copy everything in the in-memory inode into the btree.
*/
noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode)
+ struct btrfs_root *root,
+ struct btrfs_inode *inode)
{
struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
@@ -3564,23 +3695,22 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
* The data relocation inode should also be directly updated
* without delay
*/
- if (!btrfs_is_free_space_inode(BTRFS_I(inode))
+ if (!btrfs_is_free_space_inode(inode)
&& root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
&& !test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
btrfs_update_root_times(trans, root);
ret = btrfs_delayed_update_inode(trans, root, inode);
if (!ret)
- btrfs_set_inode_last_trans(trans, BTRFS_I(inode));
+ btrfs_set_inode_last_trans(trans, inode);
return ret;
}
return btrfs_update_inode_item(trans, root, inode);
}
-noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *inode)
+int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_inode *inode)
{
int ret;
@@ -3615,7 +3745,6 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
goto out;
}
- path->leave_spinning = 1;
di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
name, name_len, -1);
if (IS_ERR_OR_NULL(di)) {
@@ -3695,7 +3824,7 @@ err:
inode_inc_iversion(&dir->vfs_inode);
inode->vfs_inode.i_ctime = dir->vfs_inode.i_mtime =
dir->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
- ret = btrfs_update_inode(trans, root, &dir->vfs_inode);
+ ret = btrfs_update_inode(trans, root, dir);
out:
return ret;
}
@@ -3709,7 +3838,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
if (!ret) {
drop_nlink(&inode->vfs_inode);
- ret = btrfs_update_inode(trans, root, &inode->vfs_inode);
+ ret = btrfs_update_inode(trans, root, inode);
}
return ret;
}
@@ -3858,7 +3987,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
btrfs_i_size_write(BTRFS_I(dir), dir->i_size - name_len * 2);
inode_inc_iversion(dir);
dir->i_mtime = dir->i_ctime = current_time(dir);
- ret = btrfs_update_inode_fallback(trans, root, dir);
+ ret = btrfs_update_inode_fallback(trans, root, BTRFS_I(dir));
if (ret)
btrfs_abort_transaction(trans, ret);
out:
@@ -3995,7 +4124,6 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
struct btrfs_block_rsv block_rsv;
u64 root_flags;
int ret;
- int err;
/*
* Don't allow to delete a subvolume with send in progress. This is
@@ -4017,8 +4145,8 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
down_write(&fs_info->subvol_sem);
- err = may_destroy_subvol(dest);
- if (err)
+ ret = may_destroy_subvol(dest);
+ if (ret)
goto out_up_write;
btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
@@ -4027,13 +4155,13 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
* two for dir entries,
* two for root ref/backref.
*/
- err = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
- if (err)
+ ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
+ if (ret)
goto out_up_write;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
- err = PTR_ERR(trans);
+ ret = PTR_ERR(trans);
goto out_release;
}
trans->block_rsv = &block_rsv;
@@ -4043,7 +4171,6 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
ret = btrfs_unlink_subvol(trans, dir, dentry);
if (ret) {
- err = ret;
btrfs_abort_transaction(trans, ret);
goto out_end_trans;
}
@@ -4052,7 +4179,7 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
memset(&dest->root_item.drop_progress, 0,
sizeof(dest->root_item.drop_progress));
- dest->root_item.drop_level = 0;
+ btrfs_set_root_drop_level(&dest->root_item, 0);
btrfs_set_root_refs(&dest->root_item, 0);
if (!test_and_set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &dest->state)) {
@@ -4061,7 +4188,6 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
dest->root_key.objectid);
if (ret) {
btrfs_abort_transaction(trans, ret);
- err = ret;
goto out_end_trans;
}
}
@@ -4071,7 +4197,6 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
dest->root_key.objectid);
if (ret && ret != -ENOENT) {
btrfs_abort_transaction(trans, ret);
- err = ret;
goto out_end_trans;
}
if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) {
@@ -4081,7 +4206,6 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
dest->root_key.objectid);
if (ret && ret != -ENOENT) {
btrfs_abort_transaction(trans, ret);
- err = ret;
goto out_end_trans;
}
}
@@ -4092,14 +4216,12 @@ out_end_trans:
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
ret = btrfs_end_transaction(trans);
- if (ret && !err)
- err = ret;
inode->i_flags |= S_DEAD;
out_release:
btrfs_subvolume_release_metadata(root, &block_rsv);
out_up_write:
up_write(&fs_info->subvol_sem);
- if (err) {
+ if (ret) {
spin_lock(&dest->root_item_lock);
root_flags = btrfs_root_flags(&dest->root_item);
btrfs_set_root_flags(&dest->root_item,
@@ -4109,15 +4231,9 @@ out_up_write:
d_invalidate(dentry);
btrfs_prune_dentries(dest);
ASSERT(dest->send_in_progress == 0);
-
- /* the last ref */
- if (dest->ino_cache_inode) {
- iput(dest->ino_cache_inode);
- dest->ino_cache_inode = NULL;
- }
}
- return err;
+ return ret;
}
static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
@@ -4194,7 +4310,7 @@ out:
*/
int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct inode *inode,
+ struct btrfs_inode *inode,
u64 new_size, u32 min_type)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -4215,7 +4331,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
int pending_del_slot = 0;
int extent_type = -1;
int ret;
- u64 ino = btrfs_ino(BTRFS_I(inode));
+ u64 ino = btrfs_ino(inode);
u64 bytes_deleted = 0;
bool be_nice = false;
bool should_throttle = false;
@@ -4229,7 +4345,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
* off from time to time. This means all inodes in subvolume roots,
* reloc roots, and data reloc roots.
*/
- if (!btrfs_is_free_space_inode(BTRFS_I(inode)) &&
+ if (!btrfs_is_free_space_inode(inode) &&
test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
be_nice = true;
@@ -4239,7 +4355,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
path->reada = READA_BACK;
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
- lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, (u64)-1,
+ lock_extent_bits(&inode->io_tree, lock_start, (u64)-1,
&cached_state);
/*
@@ -4247,7 +4363,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
* new size is not block aligned since we will be keeping the
* last block of the extent just the way it is.
*/
- btrfs_drop_extent_cache(BTRFS_I(inode), ALIGN(new_size,
+ btrfs_drop_extent_cache(inode, ALIGN(new_size,
fs_info->sectorsize),
(u64)-1, 0);
}
@@ -4258,8 +4374,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
* it is used to drop the logged items. So we shouldn't kill the delayed
* items.
*/
- if (min_type == 0 && root == BTRFS_I(inode)->root)
- btrfs_kill_delayed_inode_items(BTRFS_I(inode));
+ if (min_type == 0 && root == inode->root)
+ btrfs_kill_delayed_inode_items(inode);
key.objectid = ino;
key.offset = (u64)-1;
@@ -4315,14 +4431,13 @@ search_again:
btrfs_file_extent_num_bytes(leaf, fi);
trace_btrfs_truncate_show_fi_regular(
- BTRFS_I(inode), leaf, fi,
- found_key.offset);
+ inode, leaf, fi, found_key.offset);
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
item_end += btrfs_file_extent_ram_bytes(leaf,
fi);
trace_btrfs_truncate_show_fi_inline(
- BTRFS_I(inode), leaf, fi, path->slots[0],
+ inode, leaf, fi, path->slots[0],
found_key.offset);
}
item_end--;
@@ -4361,7 +4476,8 @@ search_again:
if (test_bit(BTRFS_ROOT_SHAREABLE,
&root->state) &&
extent_start != 0)
- inode_sub_bytes(inode, num_dec);
+ inode_sub_bytes(&inode->vfs_inode,
+ num_dec);
btrfs_mark_buffer_dirty(leaf);
} else {
extent_num_bytes =
@@ -4376,7 +4492,8 @@ search_again:
found_extent = 1;
if (test_bit(BTRFS_ROOT_SHAREABLE,
&root->state))
- inode_sub_bytes(inode, num_dec);
+ inode_sub_bytes(&inode->vfs_inode,
+ num_dec);
}
}
clear_len = num_dec;
@@ -4411,7 +4528,8 @@ search_again:
}
if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
- inode_sub_bytes(inode, item_end + 1 - new_size);
+ inode_sub_bytes(&inode->vfs_inode,
+ item_end + 1 - new_size);
}
delete:
/*
@@ -4419,8 +4537,8 @@ delete:
* multiple fsyncs, and in this case we don't want to clear the
* file extent range because it's just the log.
*/
- if (root == BTRFS_I(inode)->root) {
- ret = btrfs_inode_clear_file_extent_range(BTRFS_I(inode),
+ if (root == inode->root) {
+ ret = btrfs_inode_clear_file_extent_range(inode,
clear_start, clear_len);
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -4529,8 +4647,8 @@ out:
if (!ret && last_size > new_size)
last_size = new_size;
btrfs_inode_safe_disk_i_size_write(inode, last_size);
- unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start,
- (u64)-1, &cached_state);
+ unlock_extent_cached(&inode->io_tree, lock_start, (u64)-1,
+ &cached_state);
}
btrfs_free_path(path);
@@ -4548,12 +4666,12 @@ out:
* This will find the block for the "from" offset and cow the block and zero the
* part we want to zero. This is used with truncate and hole punching.
*/
-int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
- int front)
+int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
+ int front)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct address_space *mapping = inode->i_mapping;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct address_space *mapping = inode->vfs_inode.i_mapping;
+ struct extent_io_tree *io_tree = &inode->io_tree;
struct btrfs_ordered_extent *ordered;
struct extent_state *cached_state = NULL;
struct extent_changeset *data_reserved = NULL;
@@ -4576,30 +4694,29 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
block_start = round_down(from, blocksize);
block_end = block_start + blocksize - 1;
- ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved,
- block_start, blocksize);
+ ret = btrfs_check_data_free_space(inode, &data_reserved, block_start,
+ blocksize);
if (ret < 0) {
- if (btrfs_check_nocow_lock(BTRFS_I(inode), block_start,
- &write_bytes) > 0) {
+ if (btrfs_check_nocow_lock(inode, block_start, &write_bytes) > 0) {
/* For nocow case, no need to reserve data space */
only_release_metadata = true;
} else {
goto out;
}
}
- ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), blocksize);
+ ret = btrfs_delalloc_reserve_metadata(inode, blocksize);
if (ret < 0) {
if (!only_release_metadata)
- btrfs_free_reserved_data_space(BTRFS_I(inode),
- data_reserved, block_start, blocksize);
+ btrfs_free_reserved_data_space(inode, data_reserved,
+ block_start, blocksize);
goto out;
}
again:
page = find_or_create_page(mapping, index, mask);
if (!page) {
- btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
- block_start, blocksize, true);
- btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
+ btrfs_delalloc_release_space(inode, data_reserved, block_start,
+ blocksize, true);
+ btrfs_delalloc_release_extents(inode, blocksize);
ret = -ENOMEM;
goto out;
}
@@ -4622,7 +4739,7 @@ again:
lock_extent_bits(io_tree, block_start, block_end, &cached_state);
set_page_extent_mapped(page);
- ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode), block_start);
+ ordered = btrfs_lookup_ordered_extent(inode, block_start);
if (ordered) {
unlock_extent_cached(io_tree, block_start, block_end,
&cached_state);
@@ -4633,11 +4750,11 @@ again:
goto again;
}
- clear_extent_bit(&BTRFS_I(inode)->io_tree, block_start, block_end,
+ clear_extent_bit(&inode->io_tree, block_start, block_end,
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
0, 0, &cached_state);
- ret = btrfs_set_extent_delalloc(BTRFS_I(inode), block_start, block_end, 0,
+ ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0,
&cached_state);
if (ret) {
unlock_extent_cached(io_tree, block_start, block_end,
@@ -4663,34 +4780,33 @@ again:
unlock_extent_cached(io_tree, block_start, block_end, &cached_state);
if (only_release_metadata)
- set_extent_bit(&BTRFS_I(inode)->io_tree, block_start,
- block_end, EXTENT_NORESERVE, NULL, NULL,
- GFP_NOFS);
+ set_extent_bit(&inode->io_tree, block_start, block_end,
+ EXTENT_NORESERVE, 0, NULL, NULL, GFP_NOFS, NULL);
out_unlock:
if (ret) {
if (only_release_metadata)
- btrfs_delalloc_release_metadata(BTRFS_I(inode),
- blocksize, true);
+ btrfs_delalloc_release_metadata(inode, blocksize, true);
else
- btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
+ btrfs_delalloc_release_space(inode, data_reserved,
block_start, blocksize, true);
}
- btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
+ btrfs_delalloc_release_extents(inode, blocksize);
unlock_page(page);
put_page(page);
out:
if (only_release_metadata)
- btrfs_check_nocow_unlock(BTRFS_I(inode));
+ btrfs_check_nocow_unlock(inode);
extent_changeset_free(data_reserved);
return ret;
}
-static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
+static int maybe_insert_hole(struct btrfs_root *root, struct btrfs_inode *inode,
u64 offset, u64 len)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_trans_handle *trans;
+ struct btrfs_drop_extents_args drop_args = { 0 };
int ret;
/*
@@ -4698,9 +4814,9 @@ static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
* that any holes get logged if we fsync.
*/
if (btrfs_fs_incompat(fs_info, NO_HOLES)) {
- BTRFS_I(inode)->last_trans = fs_info->generation;
- BTRFS_I(inode)->last_sub_trans = root->log_transid;
- BTRFS_I(inode)->last_log_commit = root->last_log_commit;
+ inode->last_trans = fs_info->generation;
+ inode->last_sub_trans = root->log_transid;
+ inode->last_log_commit = root->last_log_commit;
return 0;
}
@@ -4713,19 +4829,25 @@ static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
if (IS_ERR(trans))
return PTR_ERR(trans);
- ret = btrfs_drop_extents(trans, root, inode, offset, offset + len, 1);
+ drop_args.start = offset;
+ drop_args.end = offset + len;
+ drop_args.drop_cache = true;
+
+ ret = btrfs_drop_extents(trans, root, inode, &drop_args);
if (ret) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
}
- ret = btrfs_insert_file_extent(trans, root, btrfs_ino(BTRFS_I(inode)),
+ ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode),
offset, 0, 0, len, 0, len, 0, 0, 0);
- if (ret)
+ if (ret) {
btrfs_abort_transaction(trans, ret);
- else
+ } else {
+ btrfs_update_inode_bytes(inode, 0, drop_args.bytes_found);
btrfs_update_inode(trans, root, inode);
+ }
btrfs_end_transaction(trans);
return ret;
}
@@ -4736,14 +4858,14 @@ static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
* these file extents so that btrfs_get_extent will return a EXTENT_MAP_HOLE for
* the range between oldsize and size
*/
-int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
+int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ struct btrfs_root *root = inode->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct extent_io_tree *io_tree = &inode->io_tree;
struct extent_map *em = NULL;
struct extent_state *cached_state = NULL;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_map_tree *em_tree = &inode->extent_tree;
u64 hole_start = ALIGN(oldsize, fs_info->sectorsize);
u64 block_end = ALIGN(size, fs_info->sectorsize);
u64 last_byte;
@@ -4763,11 +4885,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
if (size <= hole_start)
return 0;
- btrfs_lock_and_flush_ordered_range(BTRFS_I(inode), hole_start,
- block_end - 1, &cached_state);
+ btrfs_lock_and_flush_ordered_range(inode, hole_start, block_end - 1,
+ &cached_state);
cur_offset = hole_start;
while (1) {
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
+ em = btrfs_get_extent(inode, NULL, 0, cur_offset,
block_end - cur_offset);
if (IS_ERR(em)) {
err = PTR_ERR(em);
@@ -4786,17 +4908,17 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
if (err)
break;
- err = btrfs_inode_set_file_extent_range(BTRFS_I(inode),
+ err = btrfs_inode_set_file_extent_range(inode,
cur_offset, hole_size);
if (err)
break;
- btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
+ btrfs_drop_extent_cache(inode, cur_offset,
cur_offset + hole_size - 1, 0);
hole_em = alloc_extent_map();
if (!hole_em) {
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
- &BTRFS_I(inode)->runtime_flags);
+ &inode->runtime_flags);
goto next;
}
hole_em->start = cur_offset;
@@ -4816,14 +4938,13 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
write_unlock(&em_tree->lock);
if (err != -EEXIST)
break;
- btrfs_drop_extent_cache(BTRFS_I(inode),
- cur_offset,
+ btrfs_drop_extent_cache(inode, cur_offset,
cur_offset +
hole_size - 1, 0);
}
free_extent_map(hole_em);
} else {
- err = btrfs_inode_set_file_extent_range(BTRFS_I(inode),
+ err = btrfs_inode_set_file_extent_range(inode,
cur_offset, hole_size);
if (err)
break;
@@ -4871,7 +4992,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
* this truncation.
*/
btrfs_drew_write_lock(&root->snapshot_lock);
- ret = btrfs_cont_expand(inode, oldsize, newsize);
+ ret = btrfs_cont_expand(BTRFS_I(inode), oldsize, newsize);
if (ret) {
btrfs_drew_write_unlock(&root->snapshot_lock);
return ret;
@@ -4884,9 +5005,9 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
}
i_size_write(inode, newsize);
- btrfs_inode_safe_disk_i_size_write(inode, 0);
+ btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0);
pagecache_isize_extended(inode, oldsize, newsize);
- ret = btrfs_update_inode(trans, root, inode);
+ ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
btrfs_drew_write_unlock(&root->snapshot_lock);
btrfs_end_transaction(trans);
} else {
@@ -5157,7 +5278,8 @@ void btrfs_evict_inode(struct inode *inode)
trans->block_rsv = rsv;
- ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
+ ret = btrfs_truncate_inode_items(trans, root, BTRFS_I(inode),
+ 0, 0);
trans->block_rsv = &fs_info->trans_block_rsv;
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty(fs_info);
@@ -5184,10 +5306,6 @@ void btrfs_evict_inode(struct inode *inode)
btrfs_end_transaction(trans);
}
- if (!(root == fs_info->tree_root ||
- root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
- btrfs_return_ino(root, btrfs_ino(BTRFS_I(inode)));
-
free_rsv:
btrfs_free_block_rsv(fs_info, rsv);
no_delete:
@@ -5797,7 +5915,7 @@ static int btrfs_dirty_inode(struct inode *inode)
if (IS_ERR(trans))
return PTR_ERR(trans);
- ret = btrfs_update_inode(trans, root, inode);
+ ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
if (ret && ret == -ENOSPC) {
/* whoops, lets try again with the full transaction */
btrfs_end_transaction(trans);
@@ -5805,7 +5923,7 @@ static int btrfs_dirty_inode(struct inode *inode)
if (IS_ERR(trans))
return PTR_ERR(trans);
- ret = btrfs_update_inode(trans, root, inode);
+ ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
}
btrfs_end_transaction(trans);
if (BTRFS_I(inode)->delayed_node)
@@ -6068,7 +6186,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
goto fail;
}
- path->leave_spinning = 1;
ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems);
if (ret != 0)
goto fail_unlock;
@@ -6194,7 +6311,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
parent_inode->vfs_inode.i_mtime = now;
parent_inode->vfs_inode.i_ctime = now;
}
- ret = btrfs_update_inode(trans, root, &parent_inode->vfs_inode);
+ ret = btrfs_update_inode(trans, root, parent_inode);
if (ret)
btrfs_abort_transaction(trans, ret);
return ret;
@@ -6254,7 +6371,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
if (IS_ERR(trans))
return PTR_ERR(trans);
- err = btrfs_find_free_ino(root, &objectid);
+ err = btrfs_find_free_objectid(root, &objectid);
if (err)
goto out_unlock;
@@ -6285,7 +6402,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
if (err)
goto out_unlock;
- btrfs_update_inode(trans, root, inode);
+ btrfs_update_inode(trans, root, BTRFS_I(inode));
d_instantiate_new(dentry, inode);
out_unlock:
@@ -6318,7 +6435,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
if (IS_ERR(trans))
return PTR_ERR(trans);
- err = btrfs_find_free_ino(root, &objectid);
+ err = btrfs_find_free_objectid(root, &objectid);
if (err)
goto out_unlock;
@@ -6344,7 +6461,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
if (err)
goto out_unlock;
- err = btrfs_update_inode(trans, root, inode);
+ err = btrfs_update_inode(trans, root, BTRFS_I(inode));
if (err)
goto out_unlock;
@@ -6416,7 +6533,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
} else {
struct dentry *parent = dentry->d_parent;
- err = btrfs_update_inode(trans, root, inode);
+ err = btrfs_update_inode(trans, root, BTRFS_I(inode));
if (err)
goto fail;
if (inode->i_nlink == 1) {
@@ -6462,7 +6579,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
if (IS_ERR(trans))
return PTR_ERR(trans);
- err = btrfs_find_free_ino(root, &objectid);
+ err = btrfs_find_free_objectid(root, &objectid);
if (err)
goto out_fail;
@@ -6484,7 +6601,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
goto out_fail;
btrfs_i_size_write(BTRFS_I(inode), 0);
- err = btrfs_update_inode(trans, root, inode);
+ err = btrfs_update_inode(trans, root, BTRFS_I(inode));
if (err)
goto out_fail;
@@ -6621,12 +6738,14 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
path->reada = READA_FORWARD;
/*
- * Unless we're going to uncompress the inline extent, no sleep would
- * happen.
+ * The same explanation in load_free_space_cache applies here as well,
+ * we only read when we're loading the free space cache, and at that
+ * point the commit_root has everything we need.
*/
- path->leave_spinning = 1;
-
- path->recurse = btrfs_is_free_space_inode(inode);
+ if (btrfs_is_free_space_inode(inode)) {
+ path->search_commit_root = 1;
+ path->skip_locking = 1;
+ }
ret = btrfs_lookup_file_extent(NULL, root, path, objectid, start, 0);
if (ret < 0) {
@@ -6728,7 +6847,6 @@ next:
em->orig_start = em->start;
ptr = btrfs_file_extent_inline_start(item) + extent_offset;
- btrfs_set_path_blocking(path);
if (!PageUptodate(page)) {
if (btrfs_file_extent_compression(leaf, item) !=
BTRFS_COMPRESS_NONE) {
@@ -7377,17 +7495,6 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
int ret = 0;
u64 len = length;
bool unlock_extents = false;
- bool sync = (current->journal_info == BTRFS_DIO_SYNC_STUB);
-
- /*
- * We used current->journal_info here to see if we were sync, but
- * there's a lot of tests in the enospc machinery to not do flushing if
- * we have a journal_info set, so we need to clear this out and re-set
- * it in iomap_end.
- */
- ASSERT(current->journal_info == NULL ||
- current->journal_info == BTRFS_DIO_SYNC_STUB);
- current->journal_info = NULL;
if (!write)
len = min_t(u64, len, fs_info->sectorsize);
@@ -7413,7 +7520,6 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
if (!dio_data)
return -ENOMEM;
- dio_data->sync = sync;
dio_data->length = length;
if (write) {
dio_data->reserve = round_up(length, fs_info->sectorsize);
@@ -7561,14 +7667,6 @@ static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length,
extent_changeset_free(dio_data->data_reserved);
}
out:
- /*
- * We're all done, we can re-set the current->journal_info now safely
- * for our endio.
- */
- if (dio_data->sync) {
- ASSERT(current->journal_info == NULL);
- current->journal_info = BTRFS_DIO_SYNC_STUB;
- }
kfree(dio_data);
iomap->private = NULL;
@@ -7632,7 +7730,7 @@ static blk_status_t btrfs_check_read_dio_bio(struct inode *inode,
struct bio_vec bvec;
struct bvec_iter iter;
u64 start = io_bio->logical;
- int icsum = 0;
+ u32 bio_offset = 0;
blk_status_t err = BLK_STS_OK;
__bio_for_each_segment(bvec, &io_bio->bio, iter, io_bio->iter) {
@@ -7643,9 +7741,8 @@ static blk_status_t btrfs_check_read_dio_bio(struct inode *inode,
for (i = 0; i < nr_sectors; i++) {
ASSERT(pgoff < PAGE_SIZE);
if (uptodate &&
- (!csum || !check_data_csum(inode, io_bio, icsum,
- bvec.bv_page, pgoff,
- start, sectorsize))) {
+ (!csum || !check_data_csum(inode, io_bio,
+ bio_offset, bvec.bv_page, pgoff))) {
clean_io_failure(fs_info, failure_tree, io_tree,
start, bvec.bv_page,
btrfs_ino(BTRFS_I(inode)),
@@ -7653,6 +7750,7 @@ static blk_status_t btrfs_check_read_dio_bio(struct inode *inode,
} else {
blk_status_t status;
+ ASSERT((start - io_bio->logical) < UINT_MAX);
status = btrfs_submit_read_repair(inode,
&io_bio->bio,
start - io_bio->logical,
@@ -7665,7 +7763,8 @@ static blk_status_t btrfs_check_read_dio_bio(struct inode *inode,
err = status;
}
start += sectorsize;
- icsum++;
+ ASSERT(bio_offset + sectorsize > bio_offset);
+ bio_offset += sectorsize;
pgoff += sectorsize;
}
}
@@ -7715,12 +7814,11 @@ static void __endio_write_update_ordered(struct btrfs_inode *inode,
}
}
-static blk_status_t btrfs_submit_bio_start_direct_io(void *private_data,
- struct bio *bio, u64 offset)
+static blk_status_t btrfs_submit_bio_start_direct_io(struct inode *inode,
+ struct bio *bio,
+ u64 dio_file_offset)
{
- struct inode *inode = private_data;
-
- return btrfs_csum_one_bio(BTRFS_I(inode), bio, offset, 1);
+ return btrfs_csum_one_bio(BTRFS_I(inode), bio, dio_file_offset, 1);
}
static void btrfs_end_dio_bio(struct bio *bio)
@@ -7732,8 +7830,7 @@ static void btrfs_end_dio_bio(struct bio *bio)
btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
"direct IO failed ino %llu rw %d,%u sector %#Lx len %u err no %d",
btrfs_ino(BTRFS_I(dip->inode)), bio_op(bio),
- bio->bi_opf,
- (unsigned long long)bio->bi_iter.bi_sector,
+ bio->bi_opf, bio->bi_iter.bi_sector,
bio->bi_iter.bi_size, err);
if (bio_op(bio) == REQ_OP_READ) {
@@ -7770,8 +7867,7 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
goto map;
if (write && async_submit) {
- ret = btrfs_wq_submit_bio(fs_info, bio, 0, 0,
- file_offset, inode,
+ ret = btrfs_wq_submit_bio(inode, bio, 0, 0, file_offset,
btrfs_submit_bio_start_direct_io);
goto err;
} else if (write) {
@@ -7786,8 +7882,8 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
u64 csum_offset;
csum_offset = file_offset - dip->logical_offset;
- csum_offset >>= inode->i_sb->s_blocksize_bits;
- csum_offset *= btrfs_super_csum_size(fs_info->super_copy);
+ csum_offset >>= fs_info->sectorsize_bits;
+ csum_offset *= fs_info->csum_size;
btrfs_io_bio(bio)->csum = dip->csums + csum_offset;
}
map:
@@ -7812,11 +7908,10 @@ static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio,
dip_size = sizeof(*dip);
if (!write && csum) {
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
size_t nblocks;
- nblocks = dio_bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
- dip_size += csum_size * nblocks;
+ nblocks = dio_bio->bi_iter.bi_size >> fs_info->sectorsize_bits;
+ dip_size += fs_info->csum_size * nblocks;
}
dip = kzalloc(dip_size, GFP_NOFS);
@@ -7826,7 +7921,7 @@ static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio,
dip->inode = inode;
dip->logical_offset = file_offset;
dip->bytes = dio_bio->bi_iter.bi_size;
- dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9;
+ dip->disk_bytenr = dio_bio->bi_iter.bi_sector << 9;
dip->dio_bio = dio_bio;
refcount_set(&dip->refs, 1);
return dip;
@@ -7836,7 +7931,6 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
struct bio *dio_bio, loff_t file_offset)
{
const bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
- const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
const bool raid56 = (btrfs_data_alloc_profile(fs_info) &
BTRFS_BLOCK_GROUP_RAID56_MASK);
@@ -7863,13 +7957,14 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
return BLK_QC_T_NONE;
}
- if (!write && csum) {
+ if (!write) {
/*
* Load the csums up front to reduce csum tree searches and
* contention when submitting bios.
+ *
+ * If we have csums disabled this will do nothing.
*/
- status = btrfs_lookup_bio_sums(inode, dio_bio, file_offset,
- dip->csums);
+ status = btrfs_lookup_bio_sums(inode, dio_bio, dip->csums);
if (status != BLK_STS_OK)
goto out_err;
}
@@ -7944,129 +8039,15 @@ out_err:
return BLK_QC_T_NONE;
}
-static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
- const struct iov_iter *iter, loff_t offset)
-{
- int seg;
- int i;
- unsigned int blocksize_mask = fs_info->sectorsize - 1;
- ssize_t retval = -EINVAL;
-
- if (offset & blocksize_mask)
- goto out;
-
- if (iov_iter_alignment(iter) & blocksize_mask)
- goto out;
-
- /* If this is a write we don't need to check anymore */
- if (iov_iter_rw(iter) != READ || !iter_is_iovec(iter))
- return 0;
- /*
- * Check to make sure we don't have duplicate iov_base's in this
- * iovec, if so return EINVAL, otherwise we'll get csum errors
- * when reading back.
- */
- for (seg = 0; seg < iter->nr_segs; seg++) {
- for (i = seg + 1; i < iter->nr_segs; i++) {
- if (iter->iov[seg].iov_base == iter->iov[i].iov_base)
- goto out;
- }
- }
- retval = 0;
-out:
- return retval;
-}
-
-static inline int btrfs_maybe_fsync_end_io(struct kiocb *iocb, ssize_t size,
- int error, unsigned flags)
-{
- /*
- * Now if we're still in the context of our submitter we know we can't
- * safely run generic_write_sync(), so clear our flag here so that the
- * caller knows to follow up with a sync.
- */
- if (current->journal_info == BTRFS_DIO_SYNC_STUB) {
- current->journal_info = NULL;
- return error;
- }
-
- if (error)
- return error;
-
- if (size) {
- iocb->ki_flags |= IOCB_DSYNC;
- return generic_write_sync(iocb, size);
- }
-
- return 0;
-}
-
-static const struct iomap_ops btrfs_dio_iomap_ops = {
+const struct iomap_ops btrfs_dio_iomap_ops = {
.iomap_begin = btrfs_dio_iomap_begin,
.iomap_end = btrfs_dio_iomap_end,
};
-static const struct iomap_dio_ops btrfs_dio_ops = {
+const struct iomap_dio_ops btrfs_dio_ops = {
.submit_io = btrfs_submit_direct,
};
-static const struct iomap_dio_ops btrfs_sync_dops = {
- .submit_io = btrfs_submit_direct,
- .end_io = btrfs_maybe_fsync_end_io,
-};
-
-ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
-{
- struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct extent_changeset *data_reserved = NULL;
- loff_t offset = iocb->ki_pos;
- size_t count = 0;
- bool relock = false;
- ssize_t ret;
-
- if (check_direct_IO(fs_info, iter, offset))
- return 0;
-
- count = iov_iter_count(iter);
- if (iov_iter_rw(iter) == WRITE) {
- /*
- * If the write DIO is beyond the EOF, we need update
- * the isize, but it is protected by i_mutex. So we can
- * not unlock the i_mutex at this case.
- */
- if (offset + count <= inode->i_size) {
- inode_unlock(inode);
- relock = true;
- }
- down_read(&BTRFS_I(inode)->dio_sem);
- }
-
- /*
- * We have are actually a sync iocb, so we need our fancy endio to know
- * if we need to sync.
- */
- if (current->journal_info)
- ret = iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops,
- &btrfs_sync_dops, is_sync_kiocb(iocb));
- else
- ret = iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops,
- &btrfs_dio_ops, is_sync_kiocb(iocb));
-
- if (ret == -ENOTBLK)
- ret = 0;
-
- if (iov_iter_rw(iter) == WRITE)
- up_read(&BTRFS_I(inode)->dio_sem);
-
- if (relock)
- inode_lock(inode);
-
- extent_changeset_free(data_reserved);
- return ret;
-}
-
static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
@@ -8186,6 +8167,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
u64 start;
u64 end;
int inode_evicting = inode->vfs_inode.i_state & I_FREEING;
+ bool found_ordered = false;
+ bool completed_ordered = false;
/*
* we have the page locked, so new writeback can't start,
@@ -8207,15 +8190,17 @@ again:
start = page_start;
ordered = btrfs_lookup_ordered_range(inode, start, page_end - start + 1);
if (ordered) {
+ found_ordered = true;
end = min(page_end,
ordered->file_offset + ordered->num_bytes - 1);
/*
- * IO on this page will never be started, so we need
- * to account for any ordered extents now
+ * IO on this page will never be started, so we need to account
+ * for any ordered extents now. Don't clear EXTENT_DELALLOC_NEW
+ * here, must leave that up for the ordered extent completion.
*/
if (!inode_evicting)
clear_extent_bit(tree, start, end,
- EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
+ EXTENT_DELALLOC |
EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
EXTENT_DEFRAG, 1, 0, &cached_state);
/*
@@ -8237,8 +8222,10 @@ again:
if (btrfs_dec_test_ordered_pending(inode, &ordered,
start,
- end - start + 1, 1))
+ end - start + 1, 1)) {
btrfs_finish_ordered_io(ordered);
+ completed_ordered = true;
+ }
}
btrfs_put_ordered_extent(ordered);
if (!inode_evicting) {
@@ -8267,10 +8254,23 @@ again:
*/
btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
if (!inode_evicting) {
+ bool delete = true;
+
+ /*
+ * If there's an ordered extent for this range and we have not
+ * finished it ourselves, we must leave EXTENT_DELALLOC_NEW set
+ * in the range for the ordered extent completion. We must also
+ * not delete the range, otherwise we would lose that bit (and
+ * any other bits set in the range). Make sure EXTENT_UPTODATE
+ * is cleared if we don't delete, otherwise it can lead to
+ * corruptions if the i_size is extented later.
+ */
+ if (found_ordered && !completed_ordered)
+ delete = false;
clear_extent_bit(tree, page_start, page_end, EXTENT_LOCKED |
- EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
- EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
- &cached_state);
+ EXTENT_DELALLOC | EXTENT_UPTODATE |
+ EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1,
+ delete, &cached_state);
__btrfs_releasepage(page, GFP_NOFS);
}
@@ -8519,14 +8519,14 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
trans->block_rsv = rsv;
while (1) {
- ret = btrfs_truncate_inode_items(trans, root, inode,
+ ret = btrfs_truncate_inode_items(trans, root, BTRFS_I(inode),
inode->i_size,
BTRFS_EXTENT_DATA_KEY);
trans->block_rsv = &fs_info->trans_block_rsv;
if (ret != -ENOSPC && ret != -EAGAIN)
break;
- ret = btrfs_update_inode(trans, root, inode);
+ ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
if (ret)
break;
@@ -8557,7 +8557,7 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty(fs_info);
- ret = btrfs_truncate_block(inode, inode->i_size, 0, 0);
+ ret = btrfs_truncate_block(BTRFS_I(inode), inode->i_size, 0, 0);
if (ret)
goto out;
trans = btrfs_start_transaction(root, 1);
@@ -8565,14 +8565,14 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
ret = PTR_ERR(trans);
goto out;
}
- btrfs_inode_safe_disk_i_size_write(inode, 0);
+ btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0);
}
if (trans) {
int ret2;
trans->block_rsv = &fs_info->trans_block_rsv;
- ret2 = btrfs_update_inode(trans, root, inode);
+ ret2 = btrfs_update_inode(trans, root, BTRFS_I(inode));
if (ret2 && !ret)
ret = ret2;
@@ -8618,7 +8618,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
"error inheriting subvolume %llu properties: %d",
new_root->root_key.objectid, err);
- err = btrfs_update_inode(trans, new_root, inode);
+ err = btrfs_update_inode(trans, new_root, BTRFS_I(inode));
iput(inode);
return err;
@@ -8680,7 +8680,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
INIT_LIST_HEAD(&ei->delalloc_inodes);
INIT_LIST_HEAD(&ei->delayed_iput);
RB_CLEAR_NODE(&ei->rb_node);
- init_rwsem(&ei->dio_sem);
return inode;
}
@@ -8820,6 +8819,7 @@ static int btrfs_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags)
{
u64 delalloc_bytes;
+ u64 inode_bytes;
struct inode *inode = d_inode(path->dentry);
u32 blocksize = inode->i_sb->s_blocksize;
u32 bi_flags = BTRFS_I(inode)->flags;
@@ -8846,8 +8846,9 @@ static int btrfs_getattr(const struct path *path, struct kstat *stat,
spin_lock(&BTRFS_I(inode)->lock);
delalloc_bytes = BTRFS_I(inode)->new_delalloc_bytes;
+ inode_bytes = inode_get_bytes(inode);
spin_unlock(&BTRFS_I(inode)->lock);
- stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
+ stat->blocks = (ALIGN(inode_bytes, blocksize) +
ALIGN(delalloc_bytes, blocksize)) >> 9;
return 0;
}
@@ -8973,7 +8974,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
old_dentry->d_name.name,
old_dentry->d_name.len);
if (!ret)
- ret = btrfs_update_inode(trans, root, old_inode);
+ ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode));
}
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -8989,7 +8990,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
new_dentry->d_name.name,
new_dentry->d_name.len);
if (!ret)
- ret = btrfs_update_inode(trans, dest, new_inode);
+ ret = btrfs_update_inode(trans, dest, BTRFS_I(new_inode));
}
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -9078,7 +9079,7 @@ static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
u64 objectid;
u64 index;
- ret = btrfs_find_free_ino(root, &objectid);
+ ret = btrfs_find_free_objectid(root, &objectid);
if (ret)
return ret;
@@ -9109,7 +9110,7 @@ static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
if (ret)
goto out;
- ret = btrfs_update_inode(trans, root, inode);
+ ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
out:
unlock_new_inode(inode);
if (ret)
@@ -9243,7 +9244,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
old_dentry->d_name.name,
old_dentry->d_name.len);
if (!ret)
- ret = btrfs_update_inode(trans, root, old_inode);
+ ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode));
}
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -9541,7 +9542,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
if (IS_ERR(trans))
return PTR_ERR(trans);
- err = btrfs_find_free_ino(root, &objectid);
+ err = btrfs_find_free_objectid(root, &objectid);
if (err)
goto out_unlock;
@@ -9603,7 +9604,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
inode_nohighmem(inode);
inode_set_bytes(inode, name_len);
btrfs_i_size_write(BTRFS_I(inode), name_len);
- err = btrfs_update_inode(trans, root, inode);
+ err = btrfs_update_inode(trans, root, BTRFS_I(inode));
/*
* Last step, add directory indexes for our symlink inode. This is the
* last step to avoid extra cleanup of these indexes if an error happens
@@ -9629,7 +9630,8 @@ out_unlock:
static struct btrfs_trans_handle *insert_prealloc_file_extent(
struct btrfs_trans_handle *trans_in,
- struct inode *inode, struct btrfs_key *ins,
+ struct btrfs_inode *inode,
+ struct btrfs_key *ins,
u64 file_offset)
{
struct btrfs_file_extent_item stack_fi;
@@ -9650,13 +9652,14 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent(
btrfs_set_stack_file_extent_compression(&stack_fi, BTRFS_COMPRESS_NONE);
/* Encryption and other encoding is reserved and all 0 */
- ret = btrfs_qgroup_release_data(BTRFS_I(inode), file_offset, len);
+ ret = btrfs_qgroup_release_data(inode, file_offset, len);
if (ret < 0)
return ERR_PTR(ret);
if (trans) {
- ret = insert_reserved_file_extent(trans, BTRFS_I(inode),
- file_offset, &stack_fi, ret);
+ ret = insert_reserved_file_extent(trans, inode,
+ file_offset, &stack_fi,
+ true, ret);
if (ret)
return ERR_PTR(ret);
return trans;
@@ -9676,7 +9679,7 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent(
if (!path)
return ERR_PTR(-ENOMEM);
- ret = btrfs_replace_file_extents(inode, path, file_offset,
+ ret = btrfs_replace_file_extents(&inode->vfs_inode, path, file_offset,
file_offset + len - 1, &extent_info,
&trans);
btrfs_free_path(path);
@@ -9732,7 +9735,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
clear_offset += ins.offset;
last_alloc = ins.offset;
- trans = insert_prealloc_file_extent(trans, inode, &ins, cur_offset);
+ trans = insert_prealloc_file_extent(trans, BTRFS_I(inode),
+ &ins, cur_offset);
/*
* Now that we inserted the prealloc extent we can finally
* decrement the number of reservations in the block group.
@@ -9794,10 +9798,10 @@ next:
else
i_size = cur_offset;
i_size_write(inode, i_size);
- btrfs_inode_safe_disk_i_size_write(inode, 0);
+ btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0);
}
- ret = btrfs_update_inode(trans, root, inode);
+ ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -9872,7 +9876,7 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
if (IS_ERR(trans))
return PTR_ERR(trans);
- ret = btrfs_find_free_ino(root, &objectid);
+ ret = btrfs_find_free_objectid(root, &objectid);
if (ret)
goto out;
@@ -9893,7 +9897,7 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
if (ret)
goto out;
- ret = btrfs_update_inode(trans, root, inode);
+ ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
if (ret)
goto out;
ret = btrfs_orphan_add(trans, BTRFS_I(inode));
@@ -10272,6 +10276,27 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
}
#endif
+/*
+ * Update the number of bytes used in the VFS' inode. When we replace extents in
+ * a range (clone, dedupe, fallocate's zero range), we must update the number of
+ * bytes used by the inode in an atomic manner, so that concurrent stat(2) calls
+ * always get a correct value.
+ */
+void btrfs_update_inode_bytes(struct btrfs_inode *inode,
+ const u64 add_bytes,
+ const u64 del_bytes)
+{
+ if (add_bytes == del_bytes)
+ return;
+
+ spin_lock(&inode->lock);
+ if (del_bytes > 0)
+ inode_sub_bytes(&inode->vfs_inode, del_bytes);
+ if (add_bytes > 0)
+ inode_add_bytes(&inode->vfs_inode, add_bytes);
+ spin_unlock(&inode->lock);
+}
+
static const struct inode_operations btrfs_dir_inode_operations = {
.getattr = btrfs_getattr,
.lookup = btrfs_lookup,