diff options
author | Jens Axboe <axboe@kernel.dk> | 2022-09-23 22:59:01 +0200 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2022-09-23 22:59:01 +0200 |
commit | 4324796ed0d140bac7a07fc2189bc3c3c3752978 (patch) | |
tree | 5b95f463a66f3d2c845eef166a09472e7adee3f7 | |
parent | block/blk-rq-qos: delete useless enmu RQ_QOS_IOPRIO (diff) | |
parent | md: Fix spelling mistake in comments of r5l_log (diff) | |
download | linux-4324796ed0d140bac7a07fc2189bc3c3c3752978.tar.xz linux-4324796ed0d140bac7a07fc2189bc3c3c3752978.zip |
Merge branch 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-6.1/block
Pull MD updates and fixes from Song:
"1. Various raid5 fix and clean up, by Logan Gunthorpe and David Sloan.
2. Raid10 performance optimization, by Yu Kuai."
* 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md:
md: Fix spelling mistake in comments of r5l_log
md/raid5: Wait for MD_SB_CHANGE_PENDING in raid5d
md/raid10: convert resync_lock to use seqlock
md/raid10: fix improper BUG_ON() in raise_barrier()
md/raid10: prevent unnecessary calls to wake_up() in fast path
md/raid10: don't modify 'nr_waitng' in wait_barrier() for the case nowait
md/raid10: factor out code from wait_barrier() to stop_waiting_barrier()
md: Remove extra mddev_get() in md_seq_start()
md/raid5: Remove unnecessary bio_put() in raid5_read_one_chunk()
md/raid5: Ensure stripe_fill happens on non-read IO with journal
md/raid5: Don't read ->active_stripes if it's not needed
md/raid5: Cleanup prototype of raid5_get_active_stripe()
md/raid5: Drop extern on function declarations in raid5.h
md/raid5: Refactor raid5_get_active_stripe()
md: Replace snprintf with scnprintf
md/raid10: fix compile warning
md/raid5: Fix spelling mistakes in comments
-rw-r--r-- | drivers/md/md.c | 1 | ||||
-rw-r--r-- | drivers/md/raid0.c | 2 | ||||
-rw-r--r-- | drivers/md/raid10.c | 151 | ||||
-rw-r--r-- | drivers/md/raid10.h | 2 | ||||
-rw-r--r-- | drivers/md/raid5-cache.c | 11 | ||||
-rw-r--r-- | drivers/md/raid5.c | 147 | ||||
-rw-r--r-- | drivers/md/raid5.h | 32 |
7 files changed, 204 insertions, 142 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index afaf36b2f6ab..9dc0175280b4 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8154,7 +8154,6 @@ static void *md_seq_start(struct seq_file *seq, loff_t *pos) list_for_each(tmp,&all_mddevs) if (!l--) { mddev = list_entry(tmp, struct mddev, all_mddevs); - mddev_get(mddev); if (!mddev_get(mddev)) continue; spin_unlock(&all_mddevs_lock); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 78addfe4a0c9..857c49399c28 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -47,7 +47,7 @@ static void dump_zones(struct mddev *mddev) int len = 0; for (k = 0; k < conf->strip_zone[j].nb_dev; k++) - len += snprintf(line+len, 200-len, "%s%pg", k?"/":"", + len += scnprintf(line+len, 200-len, "%s%pg", k?"/":"", conf->devlist[j * raid_disks + k]->bdev); pr_debug("md: zone%d=[%s]\n", j, line); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 9117fcdee1be..58c711912875 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -79,6 +79,21 @@ static void end_reshape(struct r10conf *conf); #include "raid1-10.c" +#define NULL_CMD +#define cmd_before(conf, cmd) \ + do { \ + write_sequnlock_irq(&(conf)->resync_lock); \ + cmd; \ + } while (0) +#define cmd_after(conf) write_seqlock_irq(&(conf)->resync_lock) + +#define wait_event_barrier_cmd(conf, cond, cmd) \ + wait_event_cmd((conf)->wait_barrier, cond, cmd_before(conf, cmd), \ + cmd_after(conf)) + +#define wait_event_barrier(conf, cond) \ + wait_event_barrier_cmd(conf, cond, NULL_CMD) + /* * for resync bio, r10bio pointer can be retrieved from the per-bio * 'struct resync_pages'. @@ -274,6 +289,12 @@ static void put_buf(struct r10bio *r10_bio) lower_barrier(conf); } +static void wake_up_barrier(struct r10conf *conf) +{ + if (wq_has_sleeper(&conf->wait_barrier)) + wake_up(&conf->wait_barrier); +} + static void reschedule_retry(struct r10bio *r10_bio) { unsigned long flags; @@ -930,78 +951,101 @@ static void flush_pending_writes(struct r10conf *conf) static void raise_barrier(struct r10conf *conf, int force) { + write_seqlock_irq(&conf->resync_lock); BUG_ON(force && !conf->barrier); - spin_lock_irq(&conf->resync_lock); /* Wait until no block IO is waiting (unless 'force') */ - wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting, - conf->resync_lock); + wait_event_barrier(conf, force || !conf->nr_waiting); /* block any new IO from starting */ - conf->barrier++; + WRITE_ONCE(conf->barrier, conf->barrier + 1); /* Now wait for all pending IO to complete */ - wait_event_lock_irq(conf->wait_barrier, - !atomic_read(&conf->nr_pending) && conf->barrier < RESYNC_DEPTH, - conf->resync_lock); + wait_event_barrier(conf, !atomic_read(&conf->nr_pending) && + conf->barrier < RESYNC_DEPTH); - spin_unlock_irq(&conf->resync_lock); + write_sequnlock_irq(&conf->resync_lock); } static void lower_barrier(struct r10conf *conf) { unsigned long flags; - spin_lock_irqsave(&conf->resync_lock, flags); - conf->barrier--; - spin_unlock_irqrestore(&conf->resync_lock, flags); + + write_seqlock_irqsave(&conf->resync_lock, flags); + WRITE_ONCE(conf->barrier, conf->barrier - 1); + write_sequnlock_irqrestore(&conf->resync_lock, flags); wake_up(&conf->wait_barrier); } +static bool stop_waiting_barrier(struct r10conf *conf) +{ + struct bio_list *bio_list = current->bio_list; + + /* barrier is dropped */ + if (!conf->barrier) + return true; + + /* + * If there are already pending requests (preventing the barrier from + * rising completely), and the pre-process bio queue isn't empty, then + * don't wait, as we need to empty that queue to get the nr_pending + * count down. + */ + if (atomic_read(&conf->nr_pending) && bio_list && + (!bio_list_empty(&bio_list[0]) || !bio_list_empty(&bio_list[1]))) + return true; + + /* move on if recovery thread is blocked by us */ + if (conf->mddev->thread->tsk == current && + test_bit(MD_RECOVERY_RUNNING, &conf->mddev->recovery) && + conf->nr_queued > 0) + return true; + + return false; +} + +static bool wait_barrier_nolock(struct r10conf *conf) +{ + unsigned int seq = read_seqbegin(&conf->resync_lock); + + if (READ_ONCE(conf->barrier)) + return false; + + atomic_inc(&conf->nr_pending); + if (!read_seqretry(&conf->resync_lock, seq)) + return true; + + if (atomic_dec_and_test(&conf->nr_pending)) + wake_up_barrier(conf); + + return false; +} + static bool wait_barrier(struct r10conf *conf, bool nowait) { bool ret = true; - spin_lock_irq(&conf->resync_lock); + if (wait_barrier_nolock(conf)) + return true; + + write_seqlock_irq(&conf->resync_lock); if (conf->barrier) { - struct bio_list *bio_list = current->bio_list; - conf->nr_waiting++; - /* Wait for the barrier to drop. - * However if there are already pending - * requests (preventing the barrier from - * rising completely), and the - * pre-process bio queue isn't empty, - * then don't wait, as we need to empty - * that queue to get the nr_pending - * count down. - */ /* Return false when nowait flag is set */ if (nowait) { ret = false; } else { + conf->nr_waiting++; raid10_log(conf->mddev, "wait barrier"); - wait_event_lock_irq(conf->wait_barrier, - !conf->barrier || - (atomic_read(&conf->nr_pending) && - bio_list && - (!bio_list_empty(&bio_list[0]) || - !bio_list_empty(&bio_list[1]))) || - /* move on if recovery thread is - * blocked by us - */ - (conf->mddev->thread->tsk == current && - test_bit(MD_RECOVERY_RUNNING, - &conf->mddev->recovery) && - conf->nr_queued > 0), - conf->resync_lock); + wait_event_barrier(conf, stop_waiting_barrier(conf)); + conf->nr_waiting--; } - conf->nr_waiting--; if (!conf->nr_waiting) wake_up(&conf->wait_barrier); } /* Only increment nr_pending when we wait */ if (ret) atomic_inc(&conf->nr_pending); - spin_unlock_irq(&conf->resync_lock); + write_sequnlock_irq(&conf->resync_lock); return ret; } @@ -1009,7 +1053,7 @@ static void allow_barrier(struct r10conf *conf) { if ((atomic_dec_and_test(&conf->nr_pending)) || (conf->array_freeze_pending)) - wake_up(&conf->wait_barrier); + wake_up_barrier(conf); } static void freeze_array(struct r10conf *conf, int extra) @@ -1026,27 +1070,24 @@ static void freeze_array(struct r10conf *conf, int extra) * must match the number of pending IOs (nr_pending) before * we continue. */ - spin_lock_irq(&conf->resync_lock); + write_seqlock_irq(&conf->resync_lock); conf->array_freeze_pending++; - conf->barrier++; + WRITE_ONCE(conf->barrier, conf->barrier + 1); conf->nr_waiting++; - wait_event_lock_irq_cmd(conf->wait_barrier, - atomic_read(&conf->nr_pending) == conf->nr_queued+extra, - conf->resync_lock, - flush_pending_writes(conf)); - + wait_event_barrier_cmd(conf, atomic_read(&conf->nr_pending) == + conf->nr_queued + extra, flush_pending_writes(conf)); conf->array_freeze_pending--; - spin_unlock_irq(&conf->resync_lock); + write_sequnlock_irq(&conf->resync_lock); } static void unfreeze_array(struct r10conf *conf) { /* reverse the effect of the freeze */ - spin_lock_irq(&conf->resync_lock); - conf->barrier--; + write_seqlock_irq(&conf->resync_lock); + WRITE_ONCE(conf->barrier, conf->barrier - 1); conf->nr_waiting--; wake_up(&conf->wait_barrier); - spin_unlock_irq(&conf->resync_lock); + write_sequnlock_irq(&conf->resync_lock); } static sector_t choose_data_offset(struct r10bio *r10_bio, @@ -1885,7 +1926,7 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio) __make_request(mddev, bio, sectors); /* In case raid10d snuck in to freeze_array */ - wake_up(&conf->wait_barrier); + wake_up_barrier(conf); return true; } @@ -1980,7 +2021,7 @@ static int enough(struct r10conf *conf, int ignore) * Otherwise, it must be degraded: * - recovery is interrupted. * - &mddev->degraded is bumped. - + * * @rdev is marked as &Faulty excluding case when array is failed and * &mddev->fail_last_dev is off. */ @@ -4033,7 +4074,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) INIT_LIST_HEAD(&conf->retry_list); INIT_LIST_HEAD(&conf->bio_end_io_list); - spin_lock_init(&conf->resync_lock); + seqlock_init(&conf->resync_lock); init_waitqueue_head(&conf->wait_barrier); atomic_set(&conf->nr_pending, 0); @@ -4352,7 +4393,7 @@ static void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs) rdev->new_raid_disk = rdev->raid_disk * 2; rdev->sectors = size; } - conf->barrier = 1; + WRITE_ONCE(conf->barrier, 1); } return conf; diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 5c0804d8bb1f..8c072ce0bc54 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h @@ -76,7 +76,7 @@ struct r10conf { /* queue pending writes and submit them on unplug */ struct bio_list pending_bio_list; - spinlock_t resync_lock; + seqlock_t resync_lock; atomic_t nr_pending; int nr_waiting; int nr_queued; diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index f4e1cc1ece43..79c73330020b 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -125,7 +125,7 @@ struct r5l_log { * reclaimed. if it's 0, reclaim spaces * used by io_units which are in * IO_UNIT_STRIPE_END state (eg, reclaim - * dones't wait for specific io_unit + * doesn't wait for specific io_unit * switching to IO_UNIT_STRIPE_END * state) */ wait_queue_head_t iounit_wait; @@ -1327,9 +1327,9 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log, * superblock is updated to new log tail. Updating superblock (either * directly call md_update_sb() or depend on md thread) must hold * reconfig mutex. On the other hand, raid5_quiesce is called with - * reconfig_mutex hold. The first step of raid5_quiesce() is waitting - * for all IO finish, hence waitting for reclaim thread, while reclaim - * thread is calling this function and waitting for reconfig mutex. So + * reconfig_mutex hold. The first step of raid5_quiesce() is waiting + * for all IO finish, hence waiting for reclaim thread, while reclaim + * thread is calling this function and waiting for reconfig mutex. So * there is a deadlock. We workaround this issue with a trylock. * FIXME: we could miss discard if we can't take reconfig mutex */ @@ -1923,7 +1923,8 @@ r5c_recovery_alloc_stripe( { struct stripe_head *sh; - sh = raid5_get_active_stripe(conf, stripe_sect, 0, noblock, 0); + sh = raid5_get_active_stripe(conf, NULL, stripe_sect, + noblock ? R5_GAS_NOBLOCK : 0); if (!sh) return NULL; /* no more stripe available */ diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 31a0cbf63384..7b820b81d8c2 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -36,6 +36,7 @@ */ #include <linux/blkdev.h> +#include <linux/delay.h> #include <linux/kthread.h> #include <linux/raid/pq.h> #include <linux/async_tx.h> @@ -789,87 +790,80 @@ struct stripe_request_ctx { */ static bool is_inactive_blocked(struct r5conf *conf, int hash) { - int active = atomic_read(&conf->active_stripes); - if (list_empty(conf->inactive_list + hash)) return false; if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) return true; - return active < (conf->max_nr_stripes * 3 / 4); + return (atomic_read(&conf->active_stripes) < + (conf->max_nr_stripes * 3 / 4)); } -static struct stripe_head *__raid5_get_active_stripe(struct r5conf *conf, +struct stripe_head *raid5_get_active_stripe(struct r5conf *conf, struct stripe_request_ctx *ctx, sector_t sector, - bool previous, bool noblock, bool noquiesce) + unsigned int flags) { struct stripe_head *sh; int hash = stripe_hash_locks_hash(conf, sector); + int previous = !!(flags & R5_GAS_PREVIOUS); pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector); spin_lock_irq(conf->hash_locks + hash); -retry: - if (!noquiesce && conf->quiesce) { - /* - * Must release the reference to batch_last before waiting, - * on quiesce, otherwise the batch_last will hold a reference - * to a stripe and raid5_quiesce() will deadlock waiting for - * active_stripes to go to zero. - */ - if (ctx && ctx->batch_last) { - raid5_release_stripe(ctx->batch_last); - ctx->batch_last = NULL; - } - - wait_event_lock_irq(conf->wait_for_quiescent, !conf->quiesce, - *(conf->hash_locks + hash)); - } + for (;;) { + if (!(flags & R5_GAS_NOQUIESCE) && conf->quiesce) { + /* + * Must release the reference to batch_last before + * waiting, on quiesce, otherwise the batch_last will + * hold a reference to a stripe and raid5_quiesce() + * will deadlock waiting for active_stripes to go to + * zero. + */ + if (ctx && ctx->batch_last) { + raid5_release_stripe(ctx->batch_last); + ctx->batch_last = NULL; + } - sh = find_get_stripe(conf, sector, conf->generation - previous, hash); - if (sh) - goto out; + wait_event_lock_irq(conf->wait_for_quiescent, + !conf->quiesce, + *(conf->hash_locks + hash)); + } - if (test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) - goto wait_for_stripe; + sh = find_get_stripe(conf, sector, conf->generation - previous, + hash); + if (sh) + break; - sh = get_free_stripe(conf, hash); - if (sh) { - r5c_check_stripe_cache_usage(conf); - init_stripe(sh, sector, previous); - atomic_inc(&sh->count); - goto out; - } + if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) { + sh = get_free_stripe(conf, hash); + if (sh) { + r5c_check_stripe_cache_usage(conf); + init_stripe(sh, sector, previous); + atomic_inc(&sh->count); + break; + } - if (!test_bit(R5_DID_ALLOC, &conf->cache_state)) - set_bit(R5_ALLOC_MORE, &conf->cache_state); + if (!test_bit(R5_DID_ALLOC, &conf->cache_state)) + set_bit(R5_ALLOC_MORE, &conf->cache_state); + } -wait_for_stripe: - if (noblock) - goto out; + if (flags & R5_GAS_NOBLOCK) + break; - set_bit(R5_INACTIVE_BLOCKED, &conf->cache_state); - r5l_wake_reclaim(conf->log, 0); - wait_event_lock_irq(conf->wait_for_stripe, - is_inactive_blocked(conf, hash), - *(conf->hash_locks + hash)); - clear_bit(R5_INACTIVE_BLOCKED, &conf->cache_state); - goto retry; + set_bit(R5_INACTIVE_BLOCKED, &conf->cache_state); + r5l_wake_reclaim(conf->log, 0); + wait_event_lock_irq(conf->wait_for_stripe, + is_inactive_blocked(conf, hash), + *(conf->hash_locks + hash)); + clear_bit(R5_INACTIVE_BLOCKED, &conf->cache_state); + } -out: spin_unlock_irq(conf->hash_locks + hash); return sh; } -struct stripe_head *raid5_get_active_stripe(struct r5conf *conf, - sector_t sector, bool previous, bool noblock, bool noquiesce) -{ - return __raid5_get_active_stripe(conf, NULL, sector, previous, noblock, - noquiesce); -} - static bool is_full_stripe_write(struct stripe_head *sh) { BUG_ON(sh->overwrite_disks > (sh->disks - sh->raid_conf->max_degraded)); @@ -4047,7 +4041,7 @@ static void handle_stripe_fill(struct stripe_head *sh, * back cache (prexor with orig_page, and then xor with * page) in the read path */ - if (s->injournal && s->failed) { + if (s->to_read && s->injournal && s->failed) { if (test_bit(STRIPE_R5C_CACHING, &sh->state)) r5c_make_stripe_write_out(sh); goto out; @@ -4636,7 +4630,8 @@ static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh) sector_t bn = raid5_compute_blocknr(sh, i, 1); sector_t s = raid5_compute_sector(conf, bn, 0, &dd_idx, NULL); - sh2 = raid5_get_active_stripe(conf, s, 0, 1, 1); + sh2 = raid5_get_active_stripe(conf, NULL, s, + R5_GAS_NOBLOCK | R5_GAS_NOQUIESCE); if (sh2 == NULL) /* so far only the early blocks of this stripe * have been requested. When later blocks @@ -5273,7 +5268,9 @@ static void handle_stripe(struct stripe_head *sh) /* Finish reconstruct operations initiated by the expansion process */ if (sh->reconstruct_state == reconstruct_state_result) { struct stripe_head *sh_src - = raid5_get_active_stripe(conf, sh->sector, 1, 1, 1); + = raid5_get_active_stripe(conf, NULL, sh->sector, + R5_GAS_PREVIOUS | R5_GAS_NOBLOCK | + R5_GAS_NOQUIESCE); if (sh_src && test_bit(STRIPE_EXPAND_SOURCE, &sh_src->state)) { /* sh cannot be written until sh_src has been read. * so arrange for sh to be delayed a little @@ -5542,7 +5539,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) if (is_badblock(rdev, sector, bio_sectors(raid_bio), &first_bad, &bad_sectors)) { - bio_put(raid_bio); rdev_dec_pending(rdev, mddev); return 0; } @@ -5823,7 +5819,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) DEFINE_WAIT(w); int d; again: - sh = raid5_get_active_stripe(conf, logical_sector, 0, 0, 0); + sh = raid5_get_active_stripe(conf, NULL, logical_sector, 0); prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags); @@ -5978,7 +5974,7 @@ static enum stripe_result make_stripe_request(struct mddev *mddev, enum stripe_result ret; struct stripe_head *sh; sector_t new_sector; - int previous = 0; + int previous = 0, flags = 0; int seq, dd_idx; seq = read_seqcount_begin(&conf->gen_lock); @@ -6012,8 +6008,11 @@ static enum stripe_result make_stripe_request(struct mddev *mddev, pr_debug("raid456: %s, sector %llu logical %llu\n", __func__, new_sector, logical_sector); - sh = __raid5_get_active_stripe(conf, ctx, new_sector, previous, - (bi->bi_opf & REQ_RAHEAD), 0); + if (previous) + flags |= R5_GAS_PREVIOUS; + if (bi->bi_opf & REQ_RAHEAD) + flags |= R5_GAS_NOBLOCK; + sh = raid5_get_active_stripe(conf, ctx, new_sector, flags); if (unlikely(!sh)) { /* cannot get stripe, just give-up */ bi->bi_status = BLK_STS_IOERR; @@ -6362,7 +6361,8 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk for (i = 0; i < reshape_sectors; i += RAID5_STRIPE_SECTORS(conf)) { int j; int skipped_disk = 0; - sh = raid5_get_active_stripe(conf, stripe_addr+i, 0, 0, 1); + sh = raid5_get_active_stripe(conf, NULL, stripe_addr+i, + R5_GAS_NOQUIESCE); set_bit(STRIPE_EXPANDING, &sh->state); atomic_inc(&conf->reshape_stripes); /* If any of this stripe is beyond the end of the old @@ -6411,7 +6411,8 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk if (last_sector >= mddev->dev_sectors) last_sector = mddev->dev_sectors - 1; while (first_sector <= last_sector) { - sh = raid5_get_active_stripe(conf, first_sector, 1, 0, 1); + sh = raid5_get_active_stripe(conf, NULL, first_sector, + R5_GAS_PREVIOUS | R5_GAS_NOQUIESCE); set_bit(STRIPE_EXPAND_SOURCE, &sh->state); set_bit(STRIPE_HANDLE, &sh->state); raid5_release_stripe(sh); @@ -6531,9 +6532,10 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n md_bitmap_cond_end_sync(mddev->bitmap, sector_nr, false); - sh = raid5_get_active_stripe(conf, sector_nr, 0, 1, 0); + sh = raid5_get_active_stripe(conf, NULL, sector_nr, + R5_GAS_NOBLOCK); if (sh == NULL) { - sh = raid5_get_active_stripe(conf, sector_nr, 0, 0, 0); + sh = raid5_get_active_stripe(conf, NULL, sector_nr, 0); /* make sure we don't swamp the stripe cache if someone else * is trying to get access */ @@ -6596,8 +6598,8 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio, /* already done this stripe */ continue; - sh = raid5_get_active_stripe(conf, sector, 0, 1, 1); - + sh = raid5_get_active_stripe(conf, NULL, sector, + R5_GAS_NOBLOCK | R5_GAS_NOQUIESCE); if (!sh) { /* failed to get a stripe - must wait */ conf->retry_read_aligned = raid_bio; @@ -6781,7 +6783,18 @@ static void raid5d(struct md_thread *thread) spin_unlock_irq(&conf->device_lock); md_check_recovery(mddev); spin_lock_irq(&conf->device_lock); + + /* + * Waiting on MD_SB_CHANGE_PENDING below may deadlock + * seeing md_check_recovery() is needed to clear + * the flag when using mdmon. + */ + continue; } + + wait_event_lock_irq(mddev->sb_wait, + !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags), + conf->device_lock); } pr_debug("%d stripes handled\n", handled); diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index a5082bed83c8..e873938a6125 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -803,16 +803,24 @@ raid5_get_dev_page(struct stripe_head *sh, int disk_idx) } #endif -extern void md_raid5_kick_device(struct r5conf *conf); -extern int raid5_set_cache_size(struct mddev *mddev, int size); -extern sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous); -extern void raid5_release_stripe(struct stripe_head *sh); -extern sector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector, - int previous, int *dd_idx, - struct stripe_head *sh); -extern struct stripe_head * -raid5_get_active_stripe(struct r5conf *conf, sector_t sector, - bool previous, bool noblock, bool noquiesce); -extern int raid5_calc_degraded(struct r5conf *conf); -extern int r5c_journal_mode_set(struct mddev *mddev, int journal_mode); +void md_raid5_kick_device(struct r5conf *conf); +int raid5_set_cache_size(struct mddev *mddev, int size); +sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous); +void raid5_release_stripe(struct stripe_head *sh); +sector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector, + int previous, int *dd_idx, struct stripe_head *sh); + +struct stripe_request_ctx; +/* get stripe from previous generation (when reshaping) */ +#define R5_GAS_PREVIOUS (1 << 0) +/* do not block waiting for a free stripe */ +#define R5_GAS_NOBLOCK (1 << 1) +/* do not block waiting for quiesce to be released */ +#define R5_GAS_NOQUIESCE (1 << 2) +struct stripe_head *raid5_get_active_stripe(struct r5conf *conf, + struct stripe_request_ctx *ctx, sector_t sector, + unsigned int flags); + +int raid5_calc_degraded(struct r5conf *conf); +int r5c_journal_mode_set(struct mddev *mddev, int journal_mode); #endif |