diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-03-22 18:51:40 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-03-22 18:51:40 +0100 |
commit | 5191290407668028179f2544a11ae9b57f0bcf07 (patch) | |
tree | 1296aea3a45e412bf5155a9ddf8484097eb9538e /fs/btrfs/extent_io.c | |
parent | Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/t... (diff) | |
parent | btrfs: zoned: put block group after final usage (diff) | |
download | linux-5191290407668028179f2544a11ae9b57f0bcf07.tar.xz linux-5191290407668028179f2544a11ae9b57f0bcf07.zip |
Merge tag 'for-5.18-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"This contains feature updates, performance improvements, preparatory
and core work and some related VFS updates:
Features:
- encoded read/write ioctls, allows user space to read or write raw
data directly to extents (now compressed, encrypted in the future),
will be used by send/receive v2 where it saves processing time
- zoned mode now works with metadata DUP (the mkfs.btrfs default)
- error message header updates:
- print error state: transaction abort, other error, log tree
errors
- print transient filesystem state: remount, device replace,
ignored checksum verifications
- tree-checker: verify the transaction id of the to-be-written dirty
extent buffer
Performance improvements for fsync:
- directory logging speedups (up to -90% run time)
- avoid logging all directory changes during renames (up to -60% run
time)
- avoid inode logging during rename and link when possible (up to
-60% run time)
- prepare extents to be logged before locking a log tree path
(throughput +7%)
- stop copying old file extents when doing a full fsync()
- improved logging of old extents after truncate
Core, fixes:
- improved stale device identification by dev_t and not just path
(for devices that are behind other layers like device mapper)
- continued extent tree v2 preparatory work
- disable features that won't work yet
- add wrappers and abstractions for new tree roots
- improved error handling
- add super block write annotations around background block group
reclaim
- fix device scanning messages potentially accessing stale pointer
- cleanups and refactoring
VFS:
- allow reflinks/deduplication from two different mounts of the same
filesystem
- export and add helpers for read/write range verification, for the
encoded ioctls"
* tag 'for-5.18-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (98 commits)
btrfs: zoned: put block group after final usage
btrfs: don't access possibly stale fs_info data in device_list_add
btrfs: add lockdep_assert_held to need_preemptive_reclaim
btrfs: verify the tranisd of the to-be-written dirty extent buffer
btrfs: unify the error handling of btrfs_read_buffer()
btrfs: unify the error handling pattern for read_tree_block()
btrfs: factor out do_free_extent_accounting helper
btrfs: remove last_ref from the extent freeing code
btrfs: add a alloc_reserved_extent helper
btrfs: remove BUG_ON(ret) in alloc_reserved_tree_block
btrfs: add and use helper for unlinking inode during log replay
btrfs: extend locking to all space_info members accesses
btrfs: zoned: mark relocation as writing
fs: allow cross-vfsmount reflink/dedupe
btrfs: remove the cross file system checks from remap
btrfs: pass btrfs_fs_info to btrfs_recover_relocation
btrfs: pass btrfs_fs_info for deleting snapshots and cleaner
btrfs: add filesystems state details to error messages
btrfs: deal with unexpected extent type during reflinking
btrfs: fix unexpected error path when reflinking an inline extent
...
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 45 |
1 files changed, 26 insertions, 19 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 5923eec8caa8..df7c81255964 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2610,6 +2610,7 @@ static bool btrfs_check_repairable(struct inode *inode, * a good copy of the failed sector and if we succeed, we have setup * everything for repair_io_failure to do the rest for us. */ + ASSERT(failed_mirror); failrec->failed_mirror = failed_mirror; failrec->this_mirror++; if (failrec->this_mirror == failed_mirror) @@ -2639,7 +2640,6 @@ int btrfs_repair_one_sector(struct inode *inode, const int icsum = bio_offset >> fs_info->sectorsize_bits; struct bio *repair_bio; struct btrfs_bio *repair_bbio; - blk_status_t status; btrfs_debug(fs_info, "repair read error: read error at %llu", start); @@ -2678,13 +2678,13 @@ int btrfs_repair_one_sector(struct inode *inode, "repair read error: submitting new read to mirror %d", failrec->this_mirror); - status = submit_bio_hook(inode, repair_bio, failrec->this_mirror, - failrec->bio_flags); - if (status) { - free_io_failure(failure_tree, tree, failrec); - bio_put(repair_bio); - } - return blk_status_to_errno(status); + /* + * At this point we have a bio, so any errors from submit_bio_hook() + * will be handled by the endio on the repair_bio, so we can't return an + * error here. + */ + submit_bio_hook(inode, repair_bio, failrec->this_mirror, failrec->bio_flags); + return BLK_STS_OK; } static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len) @@ -3068,6 +3068,14 @@ static void end_bio_extent_readpage(struct bio *bio) if (is_data_inode(inode)) { /* + * If we failed to submit the IO at all we'll have a + * mirror_num == 0, in which case we need to just mark + * the page with an error and unlock it and carry on. + */ + if (mirror == 0) + goto readpage_ok; + + /* * btrfs_submit_read_repair() will handle all the good * and bad sectors, we just continue to the next bvec. */ @@ -3534,7 +3542,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset, } em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, start, len); - if (em_cached && !IS_ERR_OR_NULL(em)) { + if (em_cached && !IS_ERR(em)) { BUG_ON(*em_cached); refcount_inc(&em->refs); *em_cached = em; @@ -3563,7 +3571,6 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, u64 cur_end; struct extent_map *em; int ret = 0; - int nr = 0; size_t pg_offset = 0; size_t iosize; size_t blocksize = inode->i_sb->s_blocksize; @@ -3608,9 +3615,10 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, } em = __get_extent_map(inode, page, pg_offset, cur, end - cur + 1, em_cached); - if (IS_ERR_OR_NULL(em)) { + if (IS_ERR(em)) { unlock_extent(tree, cur, end); end_page_read(page, false, cur, end + 1 - cur); + ret = PTR_ERR(em); break; } extent_offset = cur - em->start; @@ -3721,9 +3729,7 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, end_bio_extent_readpage, 0, this_bio_flag, force_bio_submit); - if (!ret) { - nr++; - } else { + if (ret) { unlock_extent(tree, cur, cur + iosize - 1); end_page_read(page, false, cur, iosize); goto out; @@ -3951,7 +3957,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode, } em = btrfs_get_extent(inode, NULL, 0, cur, end - cur + 1); - if (IS_ERR_OR_NULL(em)) { + if (IS_ERR(em)) { btrfs_page_set_error(fs_info, page, cur, end - cur + 1); ret = PTR_ERR_OR_ZERO(em); break; @@ -4780,11 +4786,12 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc, return ret; } if (cache) { - /* Impiles write in zoned mode */ - btrfs_put_block_group(cache); - /* Mark the last eb in a block group */ + /* + * Implies write in zoned mode. Mark the last eb in a block group. + */ if (cache->seq_zone && eb->start + eb->len == cache->zone_capacity) set_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags); + btrfs_put_block_group(cache); } ret = write_one_eb(eb, wbc, epd); free_extent_buffer(eb); @@ -5390,7 +5397,7 @@ static struct extent_map *get_extent_skip_holes(struct btrfs_inode *inode, break; len = ALIGN(len, sectorsize); em = btrfs_get_extent_fiemap(inode, offset, len); - if (IS_ERR_OR_NULL(em)) + if (IS_ERR(em)) return em; /* if this isn't a hole return it */ |