diff options
author | Yu Kuai <yukuai3@huawei.com> | 2024-08-03 11:11:37 +0200 |
---|---|---|
committer | Song Liu <song@kernel.org> | 2024-08-15 22:38:17 +0200 |
commit | c916ca35308d3187c9928664f9be249b22a3a701 (patch) | |
tree | b6555d198877e184e09b9d9b90ddaad51fc77639 /drivers/md/raid1.c | |
parent | s390/dasd: fix error recovery leading to data corruption on ESE devices (diff) | |
download | linux-c916ca35308d3187c9928664f9be249b22a3a701.tar.xz linux-c916ca35308d3187c9928664f9be249b22a3a701.zip |
md/raid1: Fix data corruption for degraded array with slow disk
read_balance() will avoid reading from slow disks as much as possible,
however, if valid data only lands in slow disks, and a new normal disk
is still in recovery, unrecovered data can be read:
raid1_read_request
read_balance
raid1_should_read_first
-> return false
choose_best_rdev
-> normal disk is not recovered, return -1
choose_bb_rdev
-> missing the checking of recovery, return the normal disk
-> read unrecovered data
Root cause is that the checking of recovery is missing in
choose_bb_rdev(). Hence add such checking to fix the problem.
Also fix similar problem in choose_slow_rdev().
Cc: stable@vger.kernel.org
Fixes: 9f3ced792203 ("md/raid1: factor out choose_bb_rdev() from read_balance()")
Fixes: dfa8ecd167c1 ("md/raid1: factor out choose_slow_rdev() from read_balance()")
Reported-and-tested-by: Mateusz Jończyk <mat.jonczyk@o2.pl>
Closes: https://lore.kernel.org/all/9952f532-2554-44bf-b906-4880b2e88e3a@o2.pl/
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Link: https://lore.kernel.org/r/20240803091137.3197008-1-yukuai1@huaweicloud.com
Signed-off-by: Song Liu <song@kernel.org>
Diffstat (limited to '')
-rw-r--r-- | drivers/md/raid1.c | 14 |
1 files changed, 10 insertions, 4 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 7acfe7c9dc8d..761989d67906 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -617,6 +617,12 @@ static int choose_first_rdev(struct r1conf *conf, struct r1bio *r1_bio, return -1; } +static bool rdev_in_recovery(struct md_rdev *rdev, struct r1bio *r1_bio) +{ + return !test_bit(In_sync, &rdev->flags) && + rdev->recovery_offset < r1_bio->sector + r1_bio->sectors; +} + static int choose_bb_rdev(struct r1conf *conf, struct r1bio *r1_bio, int *max_sectors) { @@ -635,6 +641,7 @@ static int choose_bb_rdev(struct r1conf *conf, struct r1bio *r1_bio, rdev = conf->mirrors[disk].rdev; if (!rdev || test_bit(Faulty, &rdev->flags) || + rdev_in_recovery(rdev, r1_bio) || test_bit(WriteMostly, &rdev->flags)) continue; @@ -673,7 +680,8 @@ static int choose_slow_rdev(struct r1conf *conf, struct r1bio *r1_bio, rdev = conf->mirrors[disk].rdev; if (!rdev || test_bit(Faulty, &rdev->flags) || - !test_bit(WriteMostly, &rdev->flags)) + !test_bit(WriteMostly, &rdev->flags) || + rdev_in_recovery(rdev, r1_bio)) continue; /* there are no bad blocks, we can use this disk */ @@ -733,9 +741,7 @@ static bool rdev_readable(struct md_rdev *rdev, struct r1bio *r1_bio) if (!rdev || test_bit(Faulty, &rdev->flags)) return false; - /* still in recovery */ - if (!test_bit(In_sync, &rdev->flags) && - rdev->recovery_offset < r1_bio->sector + r1_bio->sectors) + if (rdev_in_recovery(rdev, r1_bio)) return false; /* don't read from slow disk unless have to */ |