summaryrefslogtreecommitdiffstats
path: root/drivers/md/raid10.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r--drivers/md/raid10.c301
1 files changed, 177 insertions, 124 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index c7de2a53e625..ed29fc899f06 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -707,7 +707,6 @@ static struct md_rdev *read_balance(struct r10conf *conf,
raid10_find_phys(conf, r10_bio);
rcu_read_lock();
-retry:
sectors = r10_bio->sectors;
best_slot = -1;
best_rdev = NULL;
@@ -804,13 +803,6 @@ retry:
if (slot >= 0) {
atomic_inc(&rdev->nr_pending);
- if (test_bit(Faulty, &rdev->flags)) {
- /* Cannot risk returning a device that failed
- * before we inc'ed nr_pending
- */
- rdev_dec_pending(rdev, conf->mddev);
- goto retry;
- }
r10_bio->read_slot = slot;
} else
rdev = NULL;
@@ -865,7 +857,7 @@ static void flush_pending_writes(struct r10conf *conf)
while (bio) { /* submit pending writes */
struct bio *next = bio->bi_next;
bio->bi_next = NULL;
- if (unlikely((bio->bi_rw & REQ_DISCARD) &&
+ if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
!blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
/* Just ignore it */
bio_endio(bio);
@@ -913,7 +905,7 @@ static void raise_barrier(struct r10conf *conf, int force)
/* Now wait for all pending IO to complete */
wait_event_lock_irq(conf->wait_barrier,
- !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
+ !atomic_read(&conf->nr_pending) && conf->barrier < RESYNC_DEPTH,
conf->resync_lock);
spin_unlock_irq(&conf->resync_lock);
@@ -944,23 +936,23 @@ static void wait_barrier(struct r10conf *conf)
*/
wait_event_lock_irq(conf->wait_barrier,
!conf->barrier ||
- (conf->nr_pending &&
+ (atomic_read(&conf->nr_pending) &&
current->bio_list &&
!bio_list_empty(current->bio_list)),
conf->resync_lock);
conf->nr_waiting--;
+ if (!conf->nr_waiting)
+ wake_up(&conf->wait_barrier);
}
- conf->nr_pending++;
+ atomic_inc(&conf->nr_pending);
spin_unlock_irq(&conf->resync_lock);
}
static void allow_barrier(struct r10conf *conf)
{
- unsigned long flags;
- spin_lock_irqsave(&conf->resync_lock, flags);
- conf->nr_pending--;
- spin_unlock_irqrestore(&conf->resync_lock, flags);
- wake_up(&conf->wait_barrier);
+ if ((atomic_dec_and_test(&conf->nr_pending)) ||
+ (conf->array_freeze_pending))
+ wake_up(&conf->wait_barrier);
}
static void freeze_array(struct r10conf *conf, int extra)
@@ -978,13 +970,15 @@ static void freeze_array(struct r10conf *conf, int extra)
* we continue.
*/
spin_lock_irq(&conf->resync_lock);
+ conf->array_freeze_pending++;
conf->barrier++;
conf->nr_waiting++;
wait_event_lock_irq_cmd(conf->wait_barrier,
- conf->nr_pending == conf->nr_queued+extra,
+ atomic_read(&conf->nr_pending) == conf->nr_queued+extra,
conf->resync_lock,
flush_pending_writes(conf));
+ conf->array_freeze_pending--;
spin_unlock_irq(&conf->resync_lock);
}
@@ -1041,7 +1035,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
while (bio) { /* submit pending writes */
struct bio *next = bio->bi_next;
bio->bi_next = NULL;
- if (unlikely((bio->bi_rw & REQ_DISCARD) &&
+ if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
!blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
/* Just ignore it */
bio_endio(bio);
@@ -1058,12 +1052,10 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
struct r10bio *r10_bio;
struct bio *read_bio;
int i;
+ const int op = bio_op(bio);
const int rw = bio_data_dir(bio);
const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
- const unsigned long do_discard = (bio->bi_rw
- & (REQ_DISCARD | REQ_SECURE));
- const unsigned long do_same = (bio->bi_rw & REQ_WRITE_SAME);
unsigned long flags;
struct md_rdev *blocked_rdev;
struct blk_plug_cb *cb;
@@ -1156,7 +1148,7 @@ read_again:
choose_data_offset(r10_bio, rdev);
read_bio->bi_bdev = rdev->bdev;
read_bio->bi_end_io = raid10_end_read_request;
- read_bio->bi_rw = READ | do_sync;
+ bio_set_op_attrs(read_bio, op, do_sync);
read_bio->bi_private = r10_bio;
if (max_sectors < r10_bio->sectors) {
@@ -1363,8 +1355,7 @@ retry_write:
rdev));
mbio->bi_bdev = rdev->bdev;
mbio->bi_end_io = raid10_end_write_request;
- mbio->bi_rw =
- WRITE | do_sync | do_fua | do_discard | do_same;
+ bio_set_op_attrs(mbio, op, do_sync | do_fua);
mbio->bi_private = r10_bio;
atomic_inc(&r10_bio->remaining);
@@ -1406,8 +1397,7 @@ retry_write:
r10_bio, rdev));
mbio->bi_bdev = rdev->bdev;
mbio->bi_end_io = raid10_end_write_request;
- mbio->bi_rw =
- WRITE | do_sync | do_fua | do_discard | do_same;
+ bio_set_op_attrs(mbio, op, do_sync | do_fua);
mbio->bi_private = r10_bio;
atomic_inc(&r10_bio->remaining);
@@ -1450,7 +1440,7 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio)
struct bio *split;
- if (unlikely(bio->bi_rw & REQ_FLUSH)) {
+ if (unlikely(bio->bi_rw & REQ_PREFLUSH)) {
md_flush_request(mddev, bio);
return;
}
@@ -1503,10 +1493,12 @@ static void raid10_status(struct seq_file *seq, struct mddev *mddev)
}
seq_printf(seq, " [%d/%d] [", conf->geo.raid_disks,
conf->geo.raid_disks - mddev->degraded);
- for (i = 0; i < conf->geo.raid_disks; i++)
- seq_printf(seq, "%s",
- conf->mirrors[i].rdev &&
- test_bit(In_sync, &conf->mirrors[i].rdev->flags) ? "U" : "_");
+ rcu_read_lock();
+ for (i = 0; i < conf->geo.raid_disks; i++) {
+ struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
+ seq_printf(seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
+ }
+ rcu_read_unlock();
seq_printf(seq, "]");
}
@@ -1604,7 +1596,7 @@ static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
static void print_conf(struct r10conf *conf)
{
int i;
- struct raid10_info *tmp;
+ struct md_rdev *rdev;
printk(KERN_DEBUG "RAID10 conf printout:\n");
if (!conf) {
@@ -1614,14 +1606,16 @@ static void print_conf(struct r10conf *conf)
printk(KERN_DEBUG " --- wd:%d rd:%d\n", conf->geo.raid_disks - conf->mddev->degraded,
conf->geo.raid_disks);
+ /* This is only called with ->reconfix_mutex held, so
+ * rcu protection of rdev is not needed */
for (i = 0; i < conf->geo.raid_disks; i++) {
char b[BDEVNAME_SIZE];
- tmp = conf->mirrors + i;
- if (tmp->rdev)
+ rdev = conf->mirrors[i].rdev;
+ if (rdev)
printk(KERN_DEBUG " disk %d, wo:%d, o:%d, dev:%s\n",
- i, !test_bit(In_sync, &tmp->rdev->flags),
- !test_bit(Faulty, &tmp->rdev->flags),
- bdevname(tmp->rdev->bdev,b));
+ i, !test_bit(In_sync, &rdev->flags),
+ !test_bit(Faulty, &rdev->flags),
+ bdevname(rdev->bdev,b));
}
}
@@ -1770,7 +1764,7 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
err = -EBUSY;
goto abort;
}
- /* Only remove faulty devices if recovery
+ /* Only remove non-faulty devices if recovery
* is not possible.
*/
if (!test_bit(Faulty, &rdev->flags) &&
@@ -1782,13 +1776,16 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
goto abort;
}
*rdevp = NULL;
- synchronize_rcu();
- if (atomic_read(&rdev->nr_pending)) {
- /* lost the race, try later */
- err = -EBUSY;
- *rdevp = rdev;
- goto abort;
- } else if (p->replacement) {
+ if (!test_bit(RemoveSynchronized, &rdev->flags)) {
+ synchronize_rcu();
+ if (atomic_read(&rdev->nr_pending)) {
+ /* lost the race, try later */
+ err = -EBUSY;
+ *rdevp = rdev;
+ goto abort;
+ }
+ }
+ if (p->replacement) {
/* We must have just cleared 'rdev' */
p->rdev = p->replacement;
clear_bit(Replacement, &p->replacement->flags);
@@ -1992,10 +1989,10 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
tbio->bi_vcnt = vcnt;
tbio->bi_iter.bi_size = fbio->bi_iter.bi_size;
- tbio->bi_rw = WRITE;
tbio->bi_private = r10_bio;
tbio->bi_iter.bi_sector = r10_bio->devs[i].addr;
tbio->bi_end_io = end_sync_write;
+ bio_set_op_attrs(tbio, REQ_OP_WRITE, 0);
bio_copy_data(tbio, fbio);
@@ -2078,7 +2075,7 @@ static void fix_recovery_read_error(struct r10bio *r10_bio)
addr,
s << 9,
bio->bi_io_vec[idx].bv_page,
- READ, false);
+ REQ_OP_READ, 0, false);
if (ok) {
rdev = conf->mirrors[dw].rdev;
addr = r10_bio->devs[1].addr + sect;
@@ -2086,7 +2083,7 @@ static void fix_recovery_read_error(struct r10bio *r10_bio)
addr,
s << 9,
bio->bi_io_vec[idx].bv_page,
- WRITE, false);
+ REQ_OP_WRITE, 0, false);
if (!ok) {
set_bit(WriteErrorSeen, &rdev->flags);
if (!test_and_set_bit(WantReplacement,
@@ -2175,21 +2172,20 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
*/
static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
{
- struct timespec cur_time_mon;
+ long cur_time_mon;
unsigned long hours_since_last;
unsigned int read_errors = atomic_read(&rdev->read_errors);
- ktime_get_ts(&cur_time_mon);
+ cur_time_mon = ktime_get_seconds();
- if (rdev->last_read_error.tv_sec == 0 &&
- rdev->last_read_error.tv_nsec == 0) {
+ if (rdev->last_read_error == 0) {
/* first time we've seen a read error */
rdev->last_read_error = cur_time_mon;
return;
}
- hours_since_last = (cur_time_mon.tv_sec -
- rdev->last_read_error.tv_sec) / 3600;
+ hours_since_last = (long)(cur_time_mon -
+ rdev->last_read_error) / 3600;
rdev->last_read_error = cur_time_mon;
@@ -2213,7 +2209,7 @@ static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors)
&& (rw == READ || test_bit(WriteErrorSeen, &rdev->flags)))
return -1;
- if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
+ if (sync_page_io(rdev, sector, sectors << 9, page, rw, 0, false))
/* success */
return 1;
if (rw == WRITE) {
@@ -2268,7 +2264,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
printk(KERN_NOTICE
"md/raid10:%s: %s: Failing raid device\n",
mdname(mddev), b);
- md_error(mddev, conf->mirrors[d].rdev);
+ md_error(mddev, rdev);
r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED;
return;
}
@@ -2291,6 +2287,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
rdev = rcu_dereference(conf->mirrors[d].rdev);
if (rdev &&
test_bit(In_sync, &rdev->flags) &&
+ !test_bit(Faulty, &rdev->flags) &&
is_badblock(rdev, r10_bio->devs[sl].addr + sect, s,
&first_bad, &bad_sectors) == 0) {
atomic_inc(&rdev->nr_pending);
@@ -2299,7 +2296,8 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
r10_bio->devs[sl].addr +
sect,
s<<9,
- conf->tmppage, READ, false);
+ conf->tmppage,
+ REQ_OP_READ, 0, false);
rdev_dec_pending(rdev, mddev);
rcu_read_lock();
if (success)
@@ -2343,6 +2341,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
d = r10_bio->devs[sl].devnum;
rdev = rcu_dereference(conf->mirrors[d].rdev);
if (!rdev ||
+ test_bit(Faulty, &rdev->flags) ||
!test_bit(In_sync, &rdev->flags))
continue;
@@ -2382,6 +2381,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
d = r10_bio->devs[sl].devnum;
rdev = rcu_dereference(conf->mirrors[d].rdev);
if (!rdev ||
+ test_bit(Faulty, &rdev->flags) ||
!test_bit(In_sync, &rdev->flags))
continue;
@@ -2474,7 +2474,9 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
choose_data_offset(r10_bio, rdev) +
(sector - r10_bio->sector));
wbio->bi_bdev = rdev->bdev;
- if (submit_bio_wait(WRITE, wbio) < 0)
+ bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
+
+ if (submit_bio_wait(wbio) < 0)
/* Failure! */
ok = rdev_set_badblocks(rdev, sector,
sectors, 0)
@@ -2548,7 +2550,7 @@ read_more:
bio->bi_iter.bi_sector = r10_bio->devs[slot].addr
+ choose_data_offset(r10_bio, rdev);
bio->bi_bdev = rdev->bdev;
- bio->bi_rw = READ | do_sync;
+ bio_set_op_attrs(bio, REQ_OP_READ, do_sync);
bio->bi_private = r10_bio;
bio->bi_end_io = raid10_end_read_request;
if (max_sectors < r10_bio->sectors) {
@@ -2877,11 +2879,14 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
/* Completed a full sync so the replacements
* are now fully recovered.
*/
- for (i = 0; i < conf->geo.raid_disks; i++)
- if (conf->mirrors[i].replacement)
- conf->mirrors[i].replacement
- ->recovery_offset
- = MaxSector;
+ rcu_read_lock();
+ for (i = 0; i < conf->geo.raid_disks; i++) {
+ struct md_rdev *rdev =
+ rcu_dereference(conf->mirrors[i].replacement);
+ if (rdev)
+ rdev->recovery_offset = MaxSector;
+ }
+ rcu_read_unlock();
}
conf->fullsync = 0;
}
@@ -2912,6 +2917,13 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
max_sector > (sector_nr | chunk_mask))
max_sector = (sector_nr | chunk_mask) + 1;
+ /*
+ * If there is non-resync activity waiting for a turn, then let it
+ * though before starting on this new sync request.
+ */
+ if (conf->nr_waiting)
+ schedule_timeout_uninterruptible(1);
+
/* Again, very different code for resync and recovery.
* Both must result in an r10bio with a list of bios that
* have bi_end_io, bi_sector, bi_bdev set,
@@ -2940,14 +2952,20 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
int must_sync;
int any_working;
struct raid10_info *mirror = &conf->mirrors[i];
+ struct md_rdev *mrdev, *mreplace;
- if ((mirror->rdev == NULL ||
- test_bit(In_sync, &mirror->rdev->flags))
- &&
- (mirror->replacement == NULL ||
- test_bit(Faulty,
- &mirror->replacement->flags)))
+ rcu_read_lock();
+ mrdev = rcu_dereference(mirror->rdev);
+ mreplace = rcu_dereference(mirror->replacement);
+
+ if ((mrdev == NULL ||
+ test_bit(Faulty, &mrdev->flags) ||
+ test_bit(In_sync, &mrdev->flags)) &&
+ (mreplace == NULL ||
+ test_bit(Faulty, &mreplace->flags))) {
+ rcu_read_unlock();
continue;
+ }
still_degraded = 0;
/* want to reconstruct this device */
@@ -2957,8 +2975,11 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
/* last stripe is not complete - don't
* try to recover this sector.
*/
+ rcu_read_unlock();
continue;
}
+ if (mreplace && test_bit(Faulty, &mreplace->flags))
+ mreplace = NULL;
/* Unless we are doing a full sync, or a replacement
* we only need to recover the block if it is set in
* the bitmap
@@ -2968,14 +2989,19 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
if (sync_blocks < max_sync)
max_sync = sync_blocks;
if (!must_sync &&
- mirror->replacement == NULL &&
+ mreplace == NULL &&
!conf->fullsync) {
/* yep, skip the sync_blocks here, but don't assume
* that there will never be anything to do here
*/
chunks_skipped = -1;
+ rcu_read_unlock();
continue;
}
+ atomic_inc(&mrdev->nr_pending);
+ if (mreplace)
+ atomic_inc(&mreplace->nr_pending);
+ rcu_read_unlock();
r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
r10_bio->state = 0;
@@ -2994,12 +3020,15 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
/* Need to check if the array will still be
* degraded
*/
- for (j = 0; j < conf->geo.raid_disks; j++)
- if (conf->mirrors[j].rdev == NULL ||
- test_bit(Faulty, &conf->mirrors[j].rdev->flags)) {
+ rcu_read_lock();
+ for (j = 0; j < conf->geo.raid_disks; j++) {
+ struct md_rdev *rdev = rcu_dereference(
+ conf->mirrors[j].rdev);
+ if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
still_degraded = 1;
break;
}
+ }
must_sync = bitmap_start_sync(mddev->bitmap, sect,
&sync_blocks, still_degraded);
@@ -3009,15 +3038,15 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
int k;
int d = r10_bio->devs[j].devnum;
sector_t from_addr, to_addr;
- struct md_rdev *rdev;
+ struct md_rdev *rdev =
+ rcu_dereference(conf->mirrors[d].rdev);
sector_t sector, first_bad;
int bad_sectors;
- if (!conf->mirrors[d].rdev ||
- !test_bit(In_sync, &conf->mirrors[d].rdev->flags))
+ if (!rdev ||
+ !test_bit(In_sync, &rdev->flags))
continue;
/* This is where we read from */
any_working = 1;
- rdev = conf->mirrors[d].rdev;
sector = r10_bio->devs[j].addr;
if (is_badblock(rdev, sector, max_sync,
@@ -3038,7 +3067,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
biolist = bio;
bio->bi_private = r10_bio;
bio->bi_end_io = end_sync_read;
- bio->bi_rw = READ;
+ bio_set_op_attrs(bio, REQ_OP_READ, 0);
from_addr = r10_bio->devs[j].addr;
bio->bi_iter.bi_sector = from_addr +
rdev->data_offset;
@@ -3056,18 +3085,17 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
r10_bio->devs[1].devnum = i;
r10_bio->devs[1].addr = to_addr;
- rdev = mirror->rdev;
- if (!test_bit(In_sync, &rdev->flags)) {
+ if (!test_bit(In_sync, &mrdev->flags)) {
bio = r10_bio->devs[1].bio;
bio_reset(bio);
bio->bi_next = biolist;
biolist = bio;
bio->bi_private = r10_bio;
bio->bi_end_io = end_sync_write;
- bio->bi_rw = WRITE;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
bio->bi_iter.bi_sector = to_addr
- + rdev->data_offset;
- bio->bi_bdev = rdev->bdev;
+ + mrdev->data_offset;
+ bio->bi_bdev = mrdev->bdev;
atomic_inc(&r10_bio->remaining);
} else
r10_bio->devs[1].bio->bi_end_io = NULL;
@@ -3076,8 +3104,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
bio = r10_bio->devs[1].repl_bio;
if (bio)
bio->bi_end_io = NULL;
- rdev = mirror->replacement;
- /* Note: if rdev != NULL, then bio
+ /* Note: if mreplace != NULL, then bio
* cannot be NULL as r10buf_pool_alloc will
* have allocated it.
* So the second test here is pointless.
@@ -3085,21 +3112,22 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
* this comment keeps human reviewers
* happy.
*/
- if (rdev == NULL || bio == NULL ||
- test_bit(Faulty, &rdev->flags))
+ if (mreplace == NULL || bio == NULL ||
+ test_bit(Faulty, &mreplace->flags))
break;
bio_reset(bio);
bio->bi_next = biolist;
biolist = bio;
bio->bi_private = r10_bio;
bio->bi_end_io = end_sync_write;
- bio->bi_rw = WRITE;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
bio->bi_iter.bi_sector = to_addr +
- rdev->data_offset;
- bio->bi_bdev = rdev->bdev;
+ mreplace->data_offset;
+ bio->bi_bdev = mreplace->bdev;
atomic_inc(&r10_bio->remaining);
break;
}
+ rcu_read_unlock();
if (j == conf->copies) {
/* Cannot recover, so abort the recovery or
* record a bad block */
@@ -3112,15 +3140,15 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
if (r10_bio->devs[k].devnum == i)
break;
if (!test_bit(In_sync,
- &mirror->rdev->flags)
+ &mrdev->flags)
&& !rdev_set_badblocks(
- mirror->rdev,
+ mrdev,
r10_bio->devs[k].addr,
max_sync, 0))
any_working = 0;
- if (mirror->replacement &&
+ if (mreplace &&
!rdev_set_badblocks(
- mirror->replacement,
+ mreplace,
r10_bio->devs[k].addr,
max_sync, 0))
any_working = 0;
@@ -3138,8 +3166,14 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
if (rb2)
atomic_dec(&rb2->remaining);
r10_bio = rb2;
+ rdev_dec_pending(mrdev, mddev);
+ if (mreplace)
+ rdev_dec_pending(mreplace, mddev);
break;
}
+ rdev_dec_pending(mrdev, mddev);
+ if (mreplace)
+ rdev_dec_pending(mreplace, mddev);
}
if (biolist == NULL) {
while (r10_bio) {
@@ -3184,6 +3218,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
int d = r10_bio->devs[i].devnum;
sector_t first_bad, sector;
int bad_sectors;
+ struct md_rdev *rdev;
if (r10_bio->devs[i].repl_bio)
r10_bio->devs[i].repl_bio->bi_end_io = NULL;
@@ -3191,12 +3226,14 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
bio = r10_bio->devs[i].bio;
bio_reset(bio);
bio->bi_error = -EIO;
- if (conf->mirrors[d].rdev == NULL ||
- test_bit(Faulty, &conf->mirrors[d].rdev->flags))
+ rcu_read_lock();
+ rdev = rcu_dereference(conf->mirrors[d].rdev);
+ if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
+ rcu_read_unlock();
continue;
+ }
sector = r10_bio->devs[i].addr;
- if (is_badblock(conf->mirrors[d].rdev,
- sector, max_sync,
+ if (is_badblock(rdev, sector, max_sync,
&first_bad, &bad_sectors)) {
if (first_bad > sector)
max_sync = first_bad - sector;
@@ -3204,25 +3241,28 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
bad_sectors -= (sector - first_bad);
if (max_sync > bad_sectors)
max_sync = bad_sectors;
+ rcu_read_unlock();
continue;
}
}
- atomic_inc(&conf->mirrors[d].rdev->nr_pending);
+ atomic_inc(&rdev->nr_pending);
atomic_inc(&r10_bio->remaining);
bio->bi_next = biolist;
biolist = bio;
bio->bi_private = r10_bio;
bio->bi_end_io = end_sync_read;
- bio->bi_rw = READ;
- bio->bi_iter.bi_sector = sector +
- conf->mirrors[d].rdev->data_offset;
- bio->bi_bdev = conf->mirrors[d].rdev->bdev;
+ bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bio->bi_iter.bi_sector = sector + rdev->data_offset;
+ bio->bi_bdev = rdev->bdev;
count++;
- if (conf->mirrors[d].replacement == NULL ||
- test_bit(Faulty,
- &conf->mirrors[d].replacement->flags))
+ rdev = rcu_dereference(conf->mirrors[d].replacement);
+ if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
+ rcu_read_unlock();
continue;
+ }
+ atomic_inc(&rdev->nr_pending);
+ rcu_read_unlock();
/* Need to set up for writing to the replacement */
bio = r10_bio->devs[i].repl_bio;
@@ -3230,15 +3270,13 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
bio->bi_error = -EIO;
sector = r10_bio->devs[i].addr;
- atomic_inc(&conf->mirrors[d].rdev->nr_pending);
bio->bi_next = biolist;
biolist = bio;
bio->bi_private = r10_bio;
bio->bi_end_io = end_sync_write;
- bio->bi_rw = WRITE;
- bio->bi_iter.bi_sector = sector +
- conf->mirrors[d].replacement->data_offset;
- bio->bi_bdev = conf->mirrors[d].replacement->bdev;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ bio->bi_iter.bi_sector = sector + rdev->data_offset;
+ bio->bi_bdev = rdev->bdev;
count++;
}
@@ -3505,6 +3543,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
spin_lock_init(&conf->resync_lock);
init_waitqueue_head(&conf->wait_barrier);
+ atomic_set(&conf->nr_pending, 0);
conf->thread = md_register_thread(raid10d, mddev, "raid10");
if (!conf->thread)
@@ -4320,7 +4359,7 @@ read_more:
+ rdev->data_offset);
read_bio->bi_private = r10_bio;
read_bio->bi_end_io = end_sync_read;
- read_bio->bi_rw = READ;
+ bio_set_op_attrs(read_bio, REQ_OP_READ, 0);
read_bio->bi_flags &= (~0UL << BIO_RESET_BITS);
read_bio->bi_error = 0;
read_bio->bi_vcnt = 0;
@@ -4334,15 +4373,16 @@ read_more:
blist = read_bio;
read_bio->bi_next = NULL;
+ rcu_read_lock();
for (s = 0; s < conf->copies*2; s++) {
struct bio *b;
int d = r10_bio->devs[s/2].devnum;
struct md_rdev *rdev2;
if (s&1) {
- rdev2 = conf->mirrors[d].replacement;
+ rdev2 = rcu_dereference(conf->mirrors[d].replacement);
b = r10_bio->devs[s/2].repl_bio;
} else {
- rdev2 = conf->mirrors[d].rdev;
+ rdev2 = rcu_dereference(conf->mirrors[d].rdev);
b = r10_bio->devs[s/2].bio;
}
if (!rdev2 || test_bit(Faulty, &rdev2->flags))
@@ -4354,7 +4394,7 @@ read_more:
rdev2->new_data_offset;
b->bi_private = r10_bio;
b->bi_end_io = end_reshape_write;
- b->bi_rw = WRITE;
+ bio_set_op_attrs(b, REQ_OP_WRITE, 0);
b->bi_next = blist;
blist = b;
}
@@ -4387,6 +4427,7 @@ read_more:
nr_sectors += len >> 9;
}
bio_full:
+ rcu_read_unlock();
r10_bio->sectors = nr_sectors;
/* Now submit the read */
@@ -4438,16 +4479,20 @@ static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio)
struct bio *b;
int d = r10_bio->devs[s/2].devnum;
struct md_rdev *rdev;
+ rcu_read_lock();
if (s&1) {
- rdev = conf->mirrors[d].replacement;
+ rdev = rcu_dereference(conf->mirrors[d].replacement);
b = r10_bio->devs[s/2].repl_bio;
} else {
- rdev = conf->mirrors[d].rdev;
+ rdev = rcu_dereference(conf->mirrors[d].rdev);
b = r10_bio->devs[s/2].bio;
}
- if (!rdev || test_bit(Faulty, &rdev->flags))
+ if (!rdev || test_bit(Faulty, &rdev->flags)) {
+ rcu_read_unlock();
continue;
+ }
atomic_inc(&rdev->nr_pending);
+ rcu_read_unlock();
md_sync_acct(b->bi_bdev, r10_bio->sectors);
atomic_inc(&r10_bio->remaining);
b->bi_next = NULL;
@@ -4508,9 +4553,10 @@ static int handle_reshape_read_error(struct mddev *mddev,
if (s > (PAGE_SIZE >> 9))
s = PAGE_SIZE >> 9;
+ rcu_read_lock();
while (!success) {
int d = r10b->devs[slot].devnum;
- struct md_rdev *rdev = conf->mirrors[d].rdev;
+ struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev);
sector_t addr;
if (rdev == NULL ||
test_bit(Faulty, &rdev->flags) ||
@@ -4518,11 +4564,15 @@ static int handle_reshape_read_error(struct mddev *mddev,
goto failed;
addr = r10b->devs[slot].addr + idx * PAGE_SIZE;
+ atomic_inc(&rdev->nr_pending);
+ rcu_read_unlock();
success = sync_page_io(rdev,
addr,
s << 9,
bvec[idx].bv_page,
- READ, false);
+ REQ_OP_READ, 0, false);
+ rdev_dec_pending(rdev, mddev);
+ rcu_read_lock();
if (success)
break;
failed:
@@ -4532,6 +4582,7 @@ static int handle_reshape_read_error(struct mddev *mddev,
if (slot == first_slot)
break;
}
+ rcu_read_unlock();
if (!success) {
/* couldn't read this block, must give up */
set_bit(MD_RECOVERY_INTR,
@@ -4601,16 +4652,18 @@ static void raid10_finish_reshape(struct mddev *mddev)
}
} else {
int d;
+ rcu_read_lock();
for (d = conf->geo.raid_disks ;
d < conf->geo.raid_disks - mddev->delta_disks;
d++) {
- struct md_rdev *rdev = conf->mirrors[d].rdev;
+ struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev);
if (rdev)
clear_bit(In_sync, &rdev->flags);
- rdev = conf->mirrors[d].replacement;
+ rdev = rcu_dereference(conf->mirrors[d].replacement);
if (rdev)
clear_bit(In_sync, &rdev->flags);
}
+ rcu_read_unlock();
}
mddev->layout = mddev->new_layout;
mddev->chunk_sectors = 1 << conf->geo.chunk_shift;