summaryrefslogtreecommitdiffstats
path: root/drivers/md/raid10.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-10-07 18:19:14 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2022-10-07 18:19:14 +0200
commit513389809e138ae903b6ef43c1d5d2ffaf4dca17 (patch)
treec71e478fab1568da4706868b14eb67a75c148a8b /drivers/md/raid10.c
parentMerge tag 'for-6.1/io_uring-2022-10-03' of git://git.kernel.dk/linux (diff)
parentsbitmap: fix lockup while swapping (diff)
downloadlinux-513389809e138ae903b6ef43c1d5d2ffaf4dca17.tar.xz
linux-513389809e138ae903b6ef43c1d5d2ffaf4dca17.zip
Merge tag 'for-6.1/block-2022-10-03' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe: - NVMe pull requests via Christoph: - handle number of queue changes in the TCP and RDMA drivers (Daniel Wagner) - allow changing the number of queues in nvmet (Daniel Wagner) - also consider host_iface when checking ip options (Daniel Wagner) - don't map pages which can't come from HIGHMEM (Fabio M. De Francesco) - avoid unnecessary flush bios in nvmet (Guixin Liu) - shrink and better pack the nvme_iod structure (Keith Busch) - add comment for unaligned "fake" nqn (Linjun Bao) - print actual source IP address through sysfs "address" attr (Martin Belanger) - various cleanups (Jackie Liu, Wolfram Sang, Genjian Zhang) - handle effects after freeing the request (Keith Busch) - copy firmware_rev on each init (Keith Busch) - restrict management ioctls to admin (Keith Busch) - ensure subsystem reset is single threaded (Keith Busch) - report the actual number of tagset maps in nvme-pci (Keith Busch) - small fabrics authentication fixups (Christoph Hellwig) - add common code for tagset allocation and freeing (Christoph Hellwig) - stop using the request_queue in nvmet (Christoph Hellwig) - set min_align_mask before calculating max_hw_sectors (Rishabh Bhatnagar) - send a rediscover uevent when a persistent discovery controller reconnects (Sagi Grimberg) - misc nvmet-tcp fixes (Varun Prakash, zhenwei pi) - MD pull request via Song: - Various raid5 fix and clean up, by Logan Gunthorpe and David Sloan. - Raid10 performance optimization, by Yu Kuai. - sbitmap wakeup hang fixes (Hugh, Keith, Jan, Yu) - IO scheduler switching quisce fix (Keith) - s390/dasd block driver updates (Stefan) - support for recovery for the ublk driver (ZiyangZhang) - rnbd drivers fixes and updates (Guoqing, Santosh, ye, Christoph) - blk-mq and null_blk map fixes (Bart) - various bcache fixes (Coly, Jilin, Jules) - nbd signal hang fix (Shigeru) - block writeback throttling fix (Yu) - optimize the passthrough mapping handling (me) - prepare block cgroups to being gendisk based (Christoph) - get rid of an old PSI hack in the block layer, moving it to the callers instead where it belongs (Christoph) - blk-throttle fixes and cleanups (Yu) - misc fixes and cleanups (Liu Shixin, Liu Song, Miaohe, Pankaj, Ping-Xiang, Wolfram, Saurabh, Li Jinlin, Li Lei, Lin, Li zeming, Miaohe, Bart, Coly, Gaosheng * tag 'for-6.1/block-2022-10-03' of git://git.kernel.dk/linux: (162 commits) sbitmap: fix lockup while swapping block: add rationale for not using blk_mq_plug() when applicable block: adapt blk_mq_plug() to not plug for writes that require a zone lock s390/dasd: use blk_mq_alloc_disk blk-cgroup: don't update the blkg lookup hint in blkg_conf_prep nvmet: don't look at the request_queue in nvmet_bdev_set_limits nvmet: don't look at the request_queue in nvmet_bdev_zone_mgmt_emulate_all blk-mq: use quiesced elevator switch when reinitializing queues block: replace blk_queue_nowait with bdev_nowait nvme: remove nvme_ctrl_init_connect_q nvme-loop: use the tagset alloc/free helpers nvme-loop: store the generic nvme_ctrl in set->driver_data nvme-loop: initialize sqsize later nvme-fc: use the tagset alloc/free helpers nvme-fc: store the generic nvme_ctrl in set->driver_data nvme-fc: keep ctrl->sqsize in sync with opts->queue_size nvme-rdma: use the tagset alloc/free helpers nvme-rdma: store the generic nvme_ctrl in set->driver_data nvme-tcp: use the tagset alloc/free helpers nvme-tcp: store the generic nvme_ctrl in set->driver_data ...
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r--drivers/md/raid10.c151
1 files changed, 96 insertions, 55 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 64d6e4cd8a3a..3aa8b6e11d58 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -79,6 +79,21 @@ static void end_reshape(struct r10conf *conf);
#include "raid1-10.c"
+#define NULL_CMD
+#define cmd_before(conf, cmd) \
+ do { \
+ write_sequnlock_irq(&(conf)->resync_lock); \
+ cmd; \
+ } while (0)
+#define cmd_after(conf) write_seqlock_irq(&(conf)->resync_lock)
+
+#define wait_event_barrier_cmd(conf, cond, cmd) \
+ wait_event_cmd((conf)->wait_barrier, cond, cmd_before(conf, cmd), \
+ cmd_after(conf))
+
+#define wait_event_barrier(conf, cond) \
+ wait_event_barrier_cmd(conf, cond, NULL_CMD)
+
/*
* for resync bio, r10bio pointer can be retrieved from the per-bio
* 'struct resync_pages'.
@@ -274,6 +289,12 @@ static void put_buf(struct r10bio *r10_bio)
lower_barrier(conf);
}
+static void wake_up_barrier(struct r10conf *conf)
+{
+ if (wq_has_sleeper(&conf->wait_barrier))
+ wake_up(&conf->wait_barrier);
+}
+
static void reschedule_retry(struct r10bio *r10_bio)
{
unsigned long flags;
@@ -930,78 +951,101 @@ static void flush_pending_writes(struct r10conf *conf)
static void raise_barrier(struct r10conf *conf, int force)
{
+ write_seqlock_irq(&conf->resync_lock);
BUG_ON(force && !conf->barrier);
- spin_lock_irq(&conf->resync_lock);
/* Wait until no block IO is waiting (unless 'force') */
- wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
- conf->resync_lock);
+ wait_event_barrier(conf, force || !conf->nr_waiting);
/* block any new IO from starting */
- conf->barrier++;
+ WRITE_ONCE(conf->barrier, conf->barrier + 1);
/* Now wait for all pending IO to complete */
- wait_event_lock_irq(conf->wait_barrier,
- !atomic_read(&conf->nr_pending) && conf->barrier < RESYNC_DEPTH,
- conf->resync_lock);
+ wait_event_barrier(conf, !atomic_read(&conf->nr_pending) &&
+ conf->barrier < RESYNC_DEPTH);
- spin_unlock_irq(&conf->resync_lock);
+ write_sequnlock_irq(&conf->resync_lock);
}
static void lower_barrier(struct r10conf *conf)
{
unsigned long flags;
- spin_lock_irqsave(&conf->resync_lock, flags);
- conf->barrier--;
- spin_unlock_irqrestore(&conf->resync_lock, flags);
+
+ write_seqlock_irqsave(&conf->resync_lock, flags);
+ WRITE_ONCE(conf->barrier, conf->barrier - 1);
+ write_sequnlock_irqrestore(&conf->resync_lock, flags);
wake_up(&conf->wait_barrier);
}
+static bool stop_waiting_barrier(struct r10conf *conf)
+{
+ struct bio_list *bio_list = current->bio_list;
+
+ /* barrier is dropped */
+ if (!conf->barrier)
+ return true;
+
+ /*
+ * If there are already pending requests (preventing the barrier from
+ * rising completely), and the pre-process bio queue isn't empty, then
+ * don't wait, as we need to empty that queue to get the nr_pending
+ * count down.
+ */
+ if (atomic_read(&conf->nr_pending) && bio_list &&
+ (!bio_list_empty(&bio_list[0]) || !bio_list_empty(&bio_list[1])))
+ return true;
+
+ /* move on if recovery thread is blocked by us */
+ if (conf->mddev->thread->tsk == current &&
+ test_bit(MD_RECOVERY_RUNNING, &conf->mddev->recovery) &&
+ conf->nr_queued > 0)
+ return true;
+
+ return false;
+}
+
+static bool wait_barrier_nolock(struct r10conf *conf)
+{
+ unsigned int seq = read_seqbegin(&conf->resync_lock);
+
+ if (READ_ONCE(conf->barrier))
+ return false;
+
+ atomic_inc(&conf->nr_pending);
+ if (!read_seqretry(&conf->resync_lock, seq))
+ return true;
+
+ if (atomic_dec_and_test(&conf->nr_pending))
+ wake_up_barrier(conf);
+
+ return false;
+}
+
static bool wait_barrier(struct r10conf *conf, bool nowait)
{
bool ret = true;
- spin_lock_irq(&conf->resync_lock);
+ if (wait_barrier_nolock(conf))
+ return true;
+
+ write_seqlock_irq(&conf->resync_lock);
if (conf->barrier) {
- struct bio_list *bio_list = current->bio_list;
- conf->nr_waiting++;
- /* Wait for the barrier to drop.
- * However if there are already pending
- * requests (preventing the barrier from
- * rising completely), and the
- * pre-process bio queue isn't empty,
- * then don't wait, as we need to empty
- * that queue to get the nr_pending
- * count down.
- */
/* Return false when nowait flag is set */
if (nowait) {
ret = false;
} else {
+ conf->nr_waiting++;
raid10_log(conf->mddev, "wait barrier");
- wait_event_lock_irq(conf->wait_barrier,
- !conf->barrier ||
- (atomic_read(&conf->nr_pending) &&
- bio_list &&
- (!bio_list_empty(&bio_list[0]) ||
- !bio_list_empty(&bio_list[1]))) ||
- /* move on if recovery thread is
- * blocked by us
- */
- (conf->mddev->thread->tsk == current &&
- test_bit(MD_RECOVERY_RUNNING,
- &conf->mddev->recovery) &&
- conf->nr_queued > 0),
- conf->resync_lock);
+ wait_event_barrier(conf, stop_waiting_barrier(conf));
+ conf->nr_waiting--;
}
- conf->nr_waiting--;
if (!conf->nr_waiting)
wake_up(&conf->wait_barrier);
}
/* Only increment nr_pending when we wait */
if (ret)
atomic_inc(&conf->nr_pending);
- spin_unlock_irq(&conf->resync_lock);
+ write_sequnlock_irq(&conf->resync_lock);
return ret;
}
@@ -1009,7 +1053,7 @@ static void allow_barrier(struct r10conf *conf)
{
if ((atomic_dec_and_test(&conf->nr_pending)) ||
(conf->array_freeze_pending))
- wake_up(&conf->wait_barrier);
+ wake_up_barrier(conf);
}
static void freeze_array(struct r10conf *conf, int extra)
@@ -1026,27 +1070,24 @@ static void freeze_array(struct r10conf *conf, int extra)
* must match the number of pending IOs (nr_pending) before
* we continue.
*/
- spin_lock_irq(&conf->resync_lock);
+ write_seqlock_irq(&conf->resync_lock);
conf->array_freeze_pending++;
- conf->barrier++;
+ WRITE_ONCE(conf->barrier, conf->barrier + 1);
conf->nr_waiting++;
- wait_event_lock_irq_cmd(conf->wait_barrier,
- atomic_read(&conf->nr_pending) == conf->nr_queued+extra,
- conf->resync_lock,
- flush_pending_writes(conf));
-
+ wait_event_barrier_cmd(conf, atomic_read(&conf->nr_pending) ==
+ conf->nr_queued + extra, flush_pending_writes(conf));
conf->array_freeze_pending--;
- spin_unlock_irq(&conf->resync_lock);
+ write_sequnlock_irq(&conf->resync_lock);
}
static void unfreeze_array(struct r10conf *conf)
{
/* reverse the effect of the freeze */
- spin_lock_irq(&conf->resync_lock);
- conf->barrier--;
+ write_seqlock_irq(&conf->resync_lock);
+ WRITE_ONCE(conf->barrier, conf->barrier - 1);
conf->nr_waiting--;
wake_up(&conf->wait_barrier);
- spin_unlock_irq(&conf->resync_lock);
+ write_sequnlock_irq(&conf->resync_lock);
}
static sector_t choose_data_offset(struct r10bio *r10_bio,
@@ -1885,7 +1926,7 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
__make_request(mddev, bio, sectors);
/* In case raid10d snuck in to freeze_array */
- wake_up(&conf->wait_barrier);
+ wake_up_barrier(conf);
return true;
}
@@ -1980,7 +2021,7 @@ static int enough(struct r10conf *conf, int ignore)
* Otherwise, it must be degraded:
* - recovery is interrupted.
* - &mddev->degraded is bumped.
-
+ *
* @rdev is marked as &Faulty excluding case when array is failed and
* &mddev->fail_last_dev is off.
*/
@@ -4032,7 +4073,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
INIT_LIST_HEAD(&conf->retry_list);
INIT_LIST_HEAD(&conf->bio_end_io_list);
- spin_lock_init(&conf->resync_lock);
+ seqlock_init(&conf->resync_lock);
init_waitqueue_head(&conf->wait_barrier);
atomic_set(&conf->nr_pending, 0);
@@ -4351,7 +4392,7 @@ static void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs)
rdev->new_raid_disk = rdev->raid_disk * 2;
rdev->sectors = size;
}
- conf->barrier = 1;
+ WRITE_ONCE(conf->barrier, 1);
}
return conf;