diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-16 13:33:06 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-16 13:33:06 +0200 |
commit | 26bb0d3f38a764b743a3ad5c8b6e5b5044d7ceb4 (patch) | |
tree | a08d01893b603d2f611a617f6055b54a835c03f0 /drivers/md/raid1.c | |
parent | Merge tag 'for-6.12/io_uring-20240913' of git://git.kernel.dk/linux (diff) | |
parent | Merge tag 'nvme-6.12-2024-09-13' of git://git.infradead.org/nvme into for-6.1... (diff) | |
download | linux-26bb0d3f38a764b743a3ad5c8b6e5b5044d7ceb4.tar.xz linux-26bb0d3f38a764b743a3ad5c8b6e5b5044d7ceb4.zip |
Merge tag 'for-6.12/block-20240913' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- MD changes via Song:
- md-bitmap refactoring (Yu Kuai)
- raid5 performance optimization (Artur Paszkiewicz)
- Other small fixes (Yu Kuai, Chen Ni)
- Add a sysfs entry 'new_level' (Xiao Ni)
- Improve information reported in /proc/mdstat (Mateusz Kusiak)
- NVMe changes via Keith:
- Asynchronous namespace scanning (Stuart)
- TCP TLS updates (Hannes)
- RDMA queue controller validation (Niklas)
- Align field names to the spec (Anuj)
- Metadata support validation (Puranjay)
- A syntax cleanup (Shen)
- Fix a Kconfig linking error (Arnd)
- New queue-depth quirk (Keith)
- Add missing unplug trace event (Keith)
- blk-iocost fixes (Colin, Konstantin)
- t10-pi modular removal and fixes (Alexey)
- Fix for potential BLKSECDISCARD overflow (Alexey)
- bio splitting cleanups and fixes (Christoph)
- Deal with folios rather than rather than pages, speeding up how the
block layer handles bigger IOs (Kundan)
- Use spinlocks rather than bit spinlocks in zram (Sebastian, Mike)
- Reduce zoned device overhead in ublk (Ming)
- Add and use sendpages_ok() for drbd and nvme-tcp (Ofir)
- Fix regression in partition error pointer checking (Riyan)
- Add support for write zeroes and rotational status in nbd (Wouter)
- Add Yu Kuai as new BFQ maintainer. The scheduler has been
unmaintained for quite a while.
- Various sets of fixes for BFQ (Yu Kuai)
- Misc fixes and cleanups (Alvaro, Christophe, Li, Md Haris, Mikhail,
Yang)
* tag 'for-6.12/block-20240913' of git://git.kernel.dk/linux: (120 commits)
nvme-pci: qdepth 1 quirk
block: fix potential invalid pointer dereference in blk_add_partition
blk_iocost: make read-only static array vrate_adj_pct const
block: unpin user pages belonging to a folio at once
mm: release number of pages of a folio
block: introduce folio awareness and add a bigger size from folio
block: Added folio-ized version of bio_add_hw_page()
block, bfq: factor out a helper to split bfqq in bfq_init_rq()
block, bfq: remove local variable 'bfqq_already_existing' in bfq_init_rq()
block, bfq: remove local variable 'split' in bfq_init_rq()
block, bfq: remove bfq_log_bfqg()
block, bfq: merge bfq_release_process_ref() into bfq_put_cooperator()
block, bfq: fix procress reference leakage for bfqq in merge chain
block, bfq: fix uaf for accessing waker_bfqq after splitting
blk-throttle: support prioritized processing of metadata
blk-throttle: remove last_low_overflow_time
drbd: Add NULL check for net_conf to prevent dereference in state validation
nvme-tcp: fix link failure for TCP auth
blk-mq: add missing unplug trace event
mtip32xx: Remove redundant null pointer checks in mtip_hw_debugfs_init()
...
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r-- | drivers/md/raid1.c | 99 |
1 files changed, 45 insertions, 54 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 761989d67906..6c9d24203f39 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -411,18 +411,20 @@ static void raid1_end_read_request(struct bio *bio) static void close_write(struct r1bio *r1_bio) { + struct mddev *mddev = r1_bio->mddev; + /* it really is the end of this request */ if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { bio_free_pages(r1_bio->behind_master_bio); bio_put(r1_bio->behind_master_bio); r1_bio->behind_master_bio = NULL; } + /* clear the bitmap if all writes complete successfully */ - md_bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, - r1_bio->sectors, - !test_bit(R1BIO_Degraded, &r1_bio->state), - test_bit(R1BIO_BehindIO, &r1_bio->state)); - md_write_end(r1_bio->mddev); + mddev->bitmap_ops->endwrite(mddev, r1_bio->sector, r1_bio->sectors, + !test_bit(R1BIO_Degraded, &r1_bio->state), + test_bit(R1BIO_BehindIO, &r1_bio->state)); + md_write_end(mddev); } static void r1_bio_write_done(struct r1bio *r1_bio) @@ -900,7 +902,7 @@ static void wake_up_barrier(struct r1conf *conf) static void flush_bio_list(struct r1conf *conf, struct bio *bio) { /* flush any pending bitmap writes to disk before proceeding w/ I/O */ - raid1_prepare_flush_writes(conf->mddev->bitmap); + raid1_prepare_flush_writes(conf->mddev); wake_up_barrier(conf); while (bio) { /* submit pending writes */ @@ -1317,13 +1319,11 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, struct r1conf *conf = mddev->private; struct raid1_info *mirror; struct bio *read_bio; - struct bitmap *bitmap = mddev->bitmap; const enum req_op op = bio_op(bio); const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC; int max_sectors; int rdisk; bool r1bio_existed = !!r1_bio; - char b[BDEVNAME_SIZE]; /* * If r1_bio is set, we are blocking the raid1d thread @@ -1332,16 +1332,6 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, */ gfp_t gfp = r1_bio ? (GFP_NOIO | __GFP_HIGH) : GFP_NOIO; - if (r1bio_existed) { - /* Need to get the block device name carefully */ - struct md_rdev *rdev = conf->mirrors[r1_bio->read_disk].rdev; - - if (rdev) - snprintf(b, sizeof(b), "%pg", rdev->bdev); - else - strcpy(b, "???"); - } - /* * Still need barrier for READ in case that whole * array is frozen. @@ -1363,15 +1353,13 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, * used and no empty request is available. */ rdisk = read_balance(conf, r1_bio, &max_sectors); - if (rdisk < 0) { /* couldn't find anywhere to read from */ - if (r1bio_existed) { - pr_crit_ratelimited("md/raid1:%s: %s: unrecoverable I/O read error for block %llu\n", + if (r1bio_existed) + pr_crit_ratelimited("md/raid1:%s: %pg: unrecoverable I/O read error for block %llu\n", mdname(mddev), - b, - (unsigned long long)r1_bio->sector); - } + conf->mirrors[r1_bio->read_disk].rdev->bdev, + r1_bio->sector); raid_end_bio_io(r1_bio); return; } @@ -1383,15 +1371,13 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, (unsigned long long)r1_bio->sector, mirror->rdev->bdev); - if (test_bit(WriteMostly, &mirror->rdev->flags) && - bitmap) { + if (test_bit(WriteMostly, &mirror->rdev->flags)) { /* * Reading from a write-mostly device must take care not to * over-take any writes that are 'behind' */ mddev_add_trace_msg(mddev, "raid1 wait behind writes"); - wait_event(bitmap->behind_wait, - atomic_read(&bitmap->behind_writes) == 0); + mddev->bitmap_ops->wait_behind_writes(mddev); } if (max_sectors < bio_sectors(bio)) { @@ -1432,7 +1418,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, struct r1conf *conf = mddev->private; struct r1bio *r1_bio; int i, disks; - struct bitmap *bitmap = mddev->bitmap; unsigned long flags; struct md_rdev *blocked_rdev; int first_clone; @@ -1585,7 +1570,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, * at a time and thus needs a new bio that can fit the whole payload * this bio in page sized chunks. */ - if (write_behind && bitmap) + if (write_behind && mddev->bitmap) max_sectors = min_t(int, max_sectors, BIO_MAX_VECS * (PAGE_SIZE >> 9)); if (max_sectors < bio_sectors(bio)) { @@ -1612,19 +1597,23 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, continue; if (first_clone) { + unsigned long max_write_behind = + mddev->bitmap_info.max_write_behind; + struct md_bitmap_stats stats; + int err; + /* do behind I/O ? * Not if there are too many, or cannot * allocate memory, or a reader on WriteMostly * is waiting for behind writes to flush */ - if (bitmap && write_behind && - (atomic_read(&bitmap->behind_writes) - < mddev->bitmap_info.max_write_behind) && - !waitqueue_active(&bitmap->behind_wait)) { + err = mddev->bitmap_ops->get_stats(mddev->bitmap, &stats); + if (!err && write_behind && !stats.behind_wait && + stats.behind_writes < max_write_behind) alloc_behind_master_bio(r1_bio, bio); - } - md_bitmap_startwrite(bitmap, r1_bio->sector, r1_bio->sectors, - test_bit(R1BIO_BehindIO, &r1_bio->state)); + mddev->bitmap_ops->startwrite( + mddev, r1_bio->sector, r1_bio->sectors, + test_bit(R1BIO_BehindIO, &r1_bio->state)); first_clone = 0; } @@ -2042,7 +2031,7 @@ static void abort_sync_write(struct mddev *mddev, struct r1bio *r1_bio) /* make sure these bits don't get cleared. */ do { - md_bitmap_end_sync(mddev->bitmap, s, &sync_blocks, 1); + mddev->bitmap_ops->end_sync(mddev, s, &sync_blocks); s += sync_blocks; sectors_to_go -= sync_blocks; } while (sectors_to_go > 0); @@ -2771,7 +2760,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, int wonly = -1; int write_targets = 0, read_targets = 0; sector_t sync_blocks; - int still_degraded = 0; + bool still_degraded = false; int good_sectors = RESYNC_SECTORS; int min_bad = 0; /* number of sectors that are bad in all devices */ int idx = sector_to_idx(sector_nr); @@ -2788,12 +2777,12 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, * We can find the current addess in mddev->curr_resync */ if (mddev->curr_resync < max_sector) /* aborted */ - md_bitmap_end_sync(mddev->bitmap, mddev->curr_resync, - &sync_blocks, 1); + mddev->bitmap_ops->end_sync(mddev, mddev->curr_resync, + &sync_blocks); else /* completed sync */ conf->fullsync = 0; - md_bitmap_close_sync(mddev->bitmap); + mddev->bitmap_ops->close_sync(mddev); close_sync(conf); if (mddev_is_clustered(mddev)) { @@ -2813,7 +2802,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, /* before building a request, check if we can skip these blocks.. * This call the bitmap_start_sync doesn't actually record anything */ - if (!md_bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && + if (!mddev->bitmap_ops->start_sync(mddev, sector_nr, &sync_blocks, true) && !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { /* We can skip this block, and probably several more */ *skipped = 1; @@ -2831,9 +2820,9 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, * sector_nr + two times RESYNC_SECTORS */ - md_bitmap_cond_end_sync(mddev->bitmap, sector_nr, - mddev_is_clustered(mddev) && (sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high)); - + mddev->bitmap_ops->cond_end_sync(mddev, sector_nr, + mddev_is_clustered(mddev) && + (sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high)); if (raise_barrier(conf, sector_nr)) return 0; @@ -2864,7 +2853,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, if (rdev == NULL || test_bit(Faulty, &rdev->flags)) { if (i < conf->raid_disks) - still_degraded = 1; + still_degraded = true; } else if (!test_bit(In_sync, &rdev->flags)) { bio->bi_opf = REQ_OP_WRITE; bio->bi_end_io = end_sync_write; @@ -2988,8 +2977,8 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, if (len == 0) break; if (sync_blocks == 0) { - if (!md_bitmap_start_sync(mddev->bitmap, sector_nr, - &sync_blocks, still_degraded) && + if (!mddev->bitmap_ops->start_sync(mddev, sector_nr, + &sync_blocks, still_degraded) && !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) break; @@ -3313,14 +3302,16 @@ static int raid1_resize(struct mddev *mddev, sector_t sectors) * worth it. */ sector_t newsize = raid1_size(mddev, sectors, 0); + int ret; + if (mddev->external_size && mddev->array_sectors > newsize) return -EINVAL; - if (mddev->bitmap) { - int ret = md_bitmap_resize(mddev->bitmap, newsize, 0, 0); - if (ret) - return ret; - } + + ret = mddev->bitmap_ops->resize(mddev, newsize, 0, false); + if (ret) + return ret; + md_set_array_sectors(mddev, newsize); if (sectors > mddev->dev_sectors && mddev->recovery_cp > mddev->dev_sectors) { |