diff options
author | NeilBrown <neilb@suse.com> | 2016-06-02 08:19:53 +0200 |
---|---|---|
committer | Shaohua Li <shli@fb.com> | 2016-06-13 20:54:21 +0200 |
commit | f5b67ae86ee317db20c0e10d54f16a0bbbd3207d (patch) | |
tree | 8db4539e9c9f05624f2b198ddd1ccce54b281ce6 /drivers/md/raid10.c | |
parent | md/multipath: add rcu protection to rdev access in multipath_status. (diff) | |
download | linux-f5b67ae86ee317db20c0e10d54f16a0bbbd3207d.tar.xz linux-f5b67ae86ee317db20c0e10d54f16a0bbbd3207d.zip |
md: be extra careful not to take a reference to a Faulty device.
It is important that we never increment rdev->nr_pending on a Faulty
device as ->hot_remove_disk() assumes that once the Faulty flag is visible
no code will take a new reference.
Some places take a new reference after only check In_sync. This should
be safe as the two are changed together. However to make the code more
obviously safe, add checks for 'Faulty' as well.
Note: the actual rule is:
Never increment nr_pending if Faulty is set and Blocked is clear,
never clear Faulty, and never set Blocked without holding a reference
through nr_pending.
fix build error (Shaohua)
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r-- | drivers/md/raid10.c | 6 |
1 files changed, 6 insertions, 0 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 526c1d82246e..34facda18e72 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2287,6 +2287,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 rdev = rcu_dereference(conf->mirrors[d].rdev); if (rdev && test_bit(In_sync, &rdev->flags) && + !test_bit(Faulty, &rdev->flags) && is_badblock(rdev, r10_bio->devs[sl].addr + sect, s, &first_bad, &bad_sectors) == 0) { atomic_inc(&rdev->nr_pending); @@ -2339,6 +2340,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 d = r10_bio->devs[sl].devnum; rdev = rcu_dereference(conf->mirrors[d].rdev); if (!rdev || + test_bit(Faulty, &rdev->flags) || !test_bit(In_sync, &rdev->flags)) continue; @@ -2378,6 +2380,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 d = r10_bio->devs[sl].devnum; rdev = rcu_dereference(conf->mirrors[d].rdev); if (!rdev || + test_bit(Faulty, &rdev->flags) || !test_bit(In_sync, &rdev->flags)) continue; @@ -2953,6 +2956,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, mreplace = rcu_dereference(mirror->replacement); if ((mrdev == NULL || + test_bit(Faulty, &mrdev->flags) || test_bit(In_sync, &mrdev->flags)) && (mreplace == NULL || test_bit(Faulty, &mreplace->flags))) { @@ -2971,6 +2975,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, rcu_read_unlock(); continue; } + if (mreplace && test_bit(Faulty, &mreplace->flags)) + mreplace = NULL; /* Unless we are doing a full sync, or a replacement * we only need to recover the block if it is set in * the bitmap |