diff options
author | Li Nan <linan122@huawei.com> | 2023-06-02 11:18:38 +0200 |
---|---|---|
committer | Song Liu <song@kernel.org> | 2023-06-14 00:25:43 +0200 |
commit | 8d355a46c1e0cea59be3ea8395409a5e6eeed946 (patch) | |
tree | 787ce85757eda10e9a33542fc655f10a0adde090 /drivers/md/raid10.c | |
parent | md/raid10: clean up md_add_new_disk() (diff) | |
download | linux-8d355a46c1e0cea59be3ea8395409a5e6eeed946.tar.xz linux-8d355a46c1e0cea59be3ea8395409a5e6eeed946.zip |
md/raid10: Do not add spare disk when recovery fails
In raid10_sync_request(), if data cannot be read from any disk for
recovery, it will go to 'giveup' and let 'chunks_skipped' + 1. After
multiple 'giveup', when 'chunks_skipped >= geo.raid_disks', it will
return 'max_sector', indicating that the recovery has been completed.
However, the recovery is just aborted and the data remains inconsistent.
Fix it by setting mirror->recovery_disabled, which will prevent the spare
disk from being added to this mirror. The same issue also exists during
resync, it will be fixed afterwards.
Signed-off-by: Li Nan <linan122@huawei.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20230602091839.743798-2-linan666@huaweicloud.com
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r-- | drivers/md/raid10.c | 20 |
1 files changed, 18 insertions, 2 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index ab0e2485b2b7..1b953e788ce1 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3311,6 +3311,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, int chunks_skipped = 0; sector_t chunk_mask = conf->geo.chunk_mask; int page_idx = 0; + int error_disk = -1; /* * Allow skipping a full rebuild for incremental assembly @@ -3394,8 +3395,21 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, return reshape_request(mddev, sector_nr, skipped); if (chunks_skipped >= conf->geo.raid_disks) { - /* if there has been nothing to do on any drive, - * then there is nothing to do at all.. + pr_err("md/raid10:%s: %s fails\n", mdname(mddev), + test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ? "resync" : "recovery"); + if (error_disk >= 0 && + !test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { + /* + * recovery fails, set mirrors.recovery_disabled, + * device shouldn't be added to there. + */ + conf->mirrors[error_disk].recovery_disabled = + mddev->recovery_disabled; + return 0; + } + /* + * if there has been nothing to do on any drive, + * then there is nothing to do at all. */ *skipped = 1; return (max_sector - sector_nr) + sectors_skipped; @@ -3646,6 +3660,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, mdname(mddev)); mirror->recovery_disabled = mddev->recovery_disabled; + } else { + error_disk = i; } put_buf(r10_bio); if (rb2) |