diff options
author | Qu Wenruo <wqu@suse.com> | 2022-11-01 12:16:08 +0100 |
---|---|---|
committer | David Sterba <dsterba@suse.com> | 2022-12-05 18:00:49 +0100 |
commit | 5eb30ee26fa4dbd2d31f50ee3b4212933f86cb57 (patch) | |
tree | 240db57b5c1aa6288edf3bf9de1549cab95677e6 | |
parent | btrfs: raid56: extract rwm write bios assembly into a helper (diff) | |
download | linux-5eb30ee26fa4dbd2d31f50ee3b4212933f86cb57.tar.xz linux-5eb30ee26fa4dbd2d31f50ee3b4212933f86cb57.zip |
btrfs: raid56: introduce the main entrance for RMW path
The new entrance will be called rmw_rbio(), it will have a streamlined
workflow by using submit-and-wait method.
Thus there will be no weird jumps between tons of functions, thus way
more reader friendly, and will make later expansion easier, as it's now
a straight workflow, the timing is way more clear.
Unfortunately we can not yet migrate the RMW path to use this new
entrance as we still need extra work to address the plug and
unlock_stripe() function.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r-- | fs/btrfs/raid56.c | 161 | ||||
-rw-r--r-- | fs/btrfs/raid56.h | 5 |
2 files changed, 166 insertions, 0 deletions
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 5e5bfc2c21fc..8fd633f01d9e 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1253,6 +1253,14 @@ static int rmw_assemble_write_bios(struct btrfs_raid_bio *rbio, ASSERT(bitmap_weight(&rbio->dbitmap, rbio->stripe_nsectors)); /* + * Reset errors, as we may have errors inherited from from degraded + * write. + */ + atomic_set(&rbio->error, 0); + rbio->faila = -1; + rbio->failb = -1; + + /* * Start assembly. Make bios for everything from the higher layers (the * bio_list in our rbio) and our P/Q. Ignore everything else. */ @@ -1665,6 +1673,19 @@ cleanup: return ret; } +static int alloc_rbio_data_pages(struct btrfs_raid_bio *rbio) +{ + const int data_pages = rbio->nr_data * rbio->stripe_npages; + int ret; + + ret = btrfs_alloc_page_array(data_pages, rbio->stripe_pages); + if (ret < 0) + return ret; + + index_stripe_sectors(rbio); + return 0; +} + /* * the stripe must be locked by the caller. It will * unlock after all the writes are done @@ -2454,6 +2475,146 @@ void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc, start_async_work(rbio, recover_rbio_work); } +static int rmw_read_and_wait(struct btrfs_raid_bio *rbio) +{ + struct bio_list bio_list; + struct bio *bio; + int ret; + + bio_list_init(&bio_list); + atomic_set(&rbio->error, 0); + + ret = rmw_assemble_read_bios(rbio, &bio_list); + if (ret < 0) + goto out; + + submit_read_bios(rbio, &bio_list); + wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0); + return ret; +out: + while ((bio = bio_list_pop(&bio_list))) + bio_put(bio); + + return ret; +} + +static void raid_wait_write_end_io(struct bio *bio) +{ + struct btrfs_raid_bio *rbio = bio->bi_private; + blk_status_t err = bio->bi_status; + + if (err) + fail_bio_stripe(rbio, bio); + bio_put(bio); + if (atomic_dec_and_test(&rbio->stripes_pending)) + wake_up(&rbio->io_wait); +} + +static void submit_write_bios(struct btrfs_raid_bio *rbio, + struct bio_list *bio_list) +{ + struct bio *bio; + + atomic_set(&rbio->stripes_pending, bio_list_size(bio_list)); + while ((bio = bio_list_pop(bio_list))) { + bio->bi_end_io = raid_wait_write_end_io; + + if (trace_raid56_write_stripe_enabled()) { + struct raid56_bio_trace_info trace_info = { 0 }; + + bio_get_trace_info(rbio, bio, &trace_info); + trace_raid56_write_stripe(rbio, bio, &trace_info); + } + submit_bio(bio); + } +} + +int rmw_rbio(struct btrfs_raid_bio *rbio) +{ + struct bio_list bio_list; + int sectornr; + int ret = 0; + + /* + * Allocate the pages for parity first, as P/Q pages will always be + * needed for both full-stripe and sub-stripe writes. + */ + ret = alloc_rbio_parity_pages(rbio); + if (ret < 0) + return ret; + + /* Full stripe write, can write the full stripe right now. */ + if (rbio_is_full(rbio)) + goto write; + /* + * Now we're doing sub-stripe write, also need all data stripes to do + * the full RMW. + */ + ret = alloc_rbio_data_pages(rbio); + if (ret < 0) + return ret; + + atomic_set(&rbio->error, 0); + index_rbio_pages(rbio); + + ret = rmw_read_and_wait(rbio); + if (ret < 0) + return ret; + + /* Too many read errors, beyond our tolerance. */ + if (atomic_read(&rbio->error) > rbio->bioc->max_errors) + return ret; + + /* Have read failures but under tolerance, needs recovery. */ + if (rbio->faila >= 0 || rbio->failb >= 0) { + ret = recover_rbio(rbio); + if (ret < 0) + return ret; + } +write: + /* + * At this stage we're not allowed to add any new bios to the + * bio list any more, anyone else that wants to change this stripe + * needs to do their own rmw. + */ + spin_lock_irq(&rbio->bio_list_lock); + set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); + spin_unlock_irq(&rbio->bio_list_lock); + + atomic_set(&rbio->error, 0); + + index_rbio_pages(rbio); + + /* + * We don't cache full rbios because we're assuming + * the higher layers are unlikely to use this area of + * the disk again soon. If they do use it again, + * hopefully they will send another full bio. + */ + if (!rbio_is_full(rbio)) + cache_rbio_pages(rbio); + else + clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); + + for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++) + generate_pq_vertical(rbio, sectornr); + + bio_list_init(&bio_list); + ret = rmw_assemble_write_bios(rbio, &bio_list); + if (ret < 0) + return ret; + + /* We should have at least one bio assembled. */ + ASSERT(bio_list_size(&bio_list)); + submit_write_bios(rbio, &bio_list); + wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0); + + /* We have more errors than our tolerance during the read. */ + if (atomic_read(&rbio->error) > rbio->bioc->max_errors) + ret = -EIO; + return ret; +} + static void rmw_work(struct work_struct *work) { struct btrfs_raid_bio *rbio; diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h index 445e833fcfcf..0e77c77c5dba 100644 --- a/fs/btrfs/raid56.h +++ b/fs/btrfs/raid56.h @@ -185,4 +185,9 @@ void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio); int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info); void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info); +/* + * Placeholder definition to avoid warning, will be removed when + * the full write path is migrated. + */ +int rmw_rbio(struct btrfs_raid_bio *rbio); #endif |