diff options
author | NeilBrown <neilb@suse.com> | 2017-03-15 04:05:12 +0100 |
---|---|---|
committer | Shaohua Li <shli@fb.com> | 2017-03-23 03:15:57 +0100 |
commit | 16d997b78b157315f5c90fcbc2f9ce575cb3879f (patch) | |
tree | ae10253861afd7ce27aae967a4cf075f4b2621af /drivers/md | |
parent | md/raid5: use md_write_start to count stripes, not bios (diff) | |
download | linux-16d997b78b157315f5c90fcbc2f9ce575cb3879f.tar.xz linux-16d997b78b157315f5c90fcbc2f9ce575cb3879f.zip |
md/raid5: simplfy delaying of writes while metadata is updated.
If a device fails during a write, we must ensure the failure is
recorded in the metadata before the completion of the write is
acknowleged.
Commit c3cce6cda162 ("md/raid5: ensure device failure recorded before
write request returns.") added code for this, but it was
unnecessarily complicated. We already had similar functionality for
handling updates to the bad-block-list, thanks to Commit de393cdea66c
("md: make it easier to wait for bad blocks to be acknowledged.")
So revert most of the former commit, and instead avoid collecting
completed writes if MD_CHANGE_PENDING is set. raid5d() will then flush
the metadata and retry the stripe_head.
As this change can leave a stripe_head ready for handling immediately
after handle_active_stripes() returns, we change raid5_do_work() to
pause when MD_CHANGE_PENDING is set, so that it doesn't spin.
We check MD_CHANGE_PENDING *after* analyse_stripe() as it could be set
asynchronously. After analyse_stripe(), we have collected stable data
about the state of devices, which will be used to make decisions.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/raid5.c | 31 | ||||
-rw-r--r-- | drivers/md/raid5.h | 3 |
2 files changed, 8 insertions, 26 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index a684003fc965..a2c9ddc35335 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4691,7 +4691,8 @@ static void handle_stripe(struct stripe_head *sh) if (test_bit(STRIPE_LOG_TRAPPED, &sh->state)) goto finish; - if (s.handle_bad_blocks) { + if (s.handle_bad_blocks || + test_bit(MD_SB_CHANGE_PENDING, &conf->mddev->sb_flags)) { set_bit(STRIPE_HANDLE, &sh->state); goto finish; } @@ -5021,15 +5022,8 @@ finish: md_wakeup_thread(conf->mddev->thread); } - if (!bio_list_empty(&s.return_bi)) { - if (test_bit(MD_SB_CHANGE_PENDING, &conf->mddev->sb_flags)) { - spin_lock_irq(&conf->device_lock); - bio_list_merge(&conf->return_bi, &s.return_bi); - spin_unlock_irq(&conf->device_lock); - md_wakeup_thread(conf->mddev->thread); - } else - return_io(&s.return_bi); - } + if (!bio_list_empty(&s.return_bi)) + return_io(&s.return_bi); clear_bit_unlock(STRIPE_ACTIVE, &sh->state); } @@ -6226,6 +6220,7 @@ static void raid5_do_work(struct work_struct *work) struct r5worker *worker = container_of(work, struct r5worker, work); struct r5worker_group *group = worker->group; struct r5conf *conf = group->conf; + struct mddev *mddev = conf->mddev; int group_id = group - conf->worker_groups; int handled; struct blk_plug plug; @@ -6246,6 +6241,9 @@ static void raid5_do_work(struct work_struct *work) if (!batch_size && !released) break; handled += batch_size; + wait_event_lock_irq(mddev->sb_wait, + !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags), + conf->device_lock); } pr_debug("%d stripes handled\n", handled); @@ -6273,18 +6271,6 @@ static void raid5d(struct md_thread *thread) md_check_recovery(mddev); - if (!bio_list_empty(&conf->return_bi) && - !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) { - struct bio_list tmp = BIO_EMPTY_LIST; - spin_lock_irq(&conf->device_lock); - if (!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) { - bio_list_merge(&tmp, &conf->return_bi); - bio_list_init(&conf->return_bi); - } - spin_unlock_irq(&conf->device_lock); - return_io(&tmp); - } - blk_start_plug(&plug); handled = 0; spin_lock_irq(&conf->device_lock); @@ -6936,7 +6922,6 @@ static struct r5conf *setup_conf(struct mddev *mddev) INIT_LIST_HEAD(&conf->hold_list); INIT_LIST_HEAD(&conf->delayed_list); INIT_LIST_HEAD(&conf->bitmap_list); - bio_list_init(&conf->return_bi); init_llist_head(&conf->released_stripes); atomic_set(&conf->active_stripes, 0); atomic_set(&conf->preread_active_stripes, 0); diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index ba5b7a3790af..13800dc9dd88 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -638,9 +638,6 @@ struct r5conf { int skip_copy; /* Don't copy data from bio to stripe cache */ struct list_head *last_hold; /* detect hold_list promotions */ - /* bios to have bi_end_io called after metadata is synced */ - struct bio_list return_bi; - atomic_t reshape_stripes; /* stripes with pending writes for reshape */ /* unfortunately we need two cache names as we temporarily have * two caches. |