diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-07-01 03:19:39 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-07-01 03:19:39 +0200 |
commit | 2cfa582be80081fb8db02d4d9b44bff34b82ac54 (patch) | |
tree | 2faf8db8426b389ca8c9ed76065c688431bb7eb9 /block | |
parent | Merge tag 'net-next-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/ne... (diff) | |
parent | dm writecache: make writeback pause configurable (diff) | |
download | linux-2cfa582be80081fb8db02d4d9b44bff34b82ac54.tar.xz linux-2cfa582be80081fb8db02d4d9b44bff34b82ac54.zip |
Merge tag 'for-5.14/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer:
- Various DM persistent-data library improvements and fixes that
benefit both the DM thinp and cache targets.
- A few small DM kcopyd efficiency improvements.
- Significant zoned related block core, DM core and DM zoned target
changes that culminate with adding zoned append emulation (which is
required to properly fix DM crypt's zoned support).
- Various DM writecache target changes that improve efficiency. Adds an
optional "metadata_only" feature that only promotes bios flagged with
REQ_META. But the most significant improvement is writecache's
ability to pause writeback, for a confiurable time, if/when the
working set is larger than the cache (and the cache is full) -- this
ensures performance is no worse than the slower origin device.
* tag 'for-5.14/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (35 commits)
dm writecache: make writeback pause configurable
dm writecache: pause writeback if cache full and origin being written directly
dm io tracker: factor out IO tracker
dm btree remove: assign new_root only when removal succeeds
dm zone: fix dm_revalidate_zones() memory allocation
dm ps io affinity: remove redundant continue statement
dm writecache: add optional "metadata_only" parameter
dm writecache: add "cleaner" and "max_age" to Documentation
dm writecache: write at least 4k when committing
dm writecache: flush origin device when writing and cache is full
dm writecache: have ssd writeback wait if the kcopyd workqueue is busy
dm writecache: use list_move instead of list_del/list_add in writecache_writeback()
dm writecache: commit just one block, not a full page
dm writecache: remove unused gfp_t argument from wc_add_block()
dm crypt: Fix zoned block device support
dm: introduce zone append emulation
dm: rearrange core declarations for extended use from dm-zone.c
block: introduce BIO_ZONE_WRITE_LOCKED bio flag
block: introduce bio zone helpers
block: improve handling of all zones reset operation
...
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-zoned.c | 119 |
1 files changed, 92 insertions, 27 deletions
diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 250cb76ee615..86fce751bb17 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -161,18 +161,89 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector, } EXPORT_SYMBOL_GPL(blkdev_report_zones); -static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev, - sector_t sector, - sector_t nr_sectors) +static inline unsigned long *blk_alloc_zone_bitmap(int node, + unsigned int nr_zones) { - if (!blk_queue_zone_resetall(bdev_get_queue(bdev))) - return false; + return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long), + GFP_NOIO, node); +} +static int blk_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx, + void *data) +{ /* - * REQ_OP_ZONE_RESET_ALL can be executed only if the number of sectors - * of the applicable zone range is the entire disk. + * For an all-zones reset, ignore conventional, empty, read-only + * and offline zones. */ - return !sector && nr_sectors == get_capacity(bdev->bd_disk); + switch (zone->cond) { + case BLK_ZONE_COND_NOT_WP: + case BLK_ZONE_COND_EMPTY: + case BLK_ZONE_COND_READONLY: + case BLK_ZONE_COND_OFFLINE: + return 0; + default: + set_bit(idx, (unsigned long *)data); + return 0; + } +} + +static int blkdev_zone_reset_all_emulated(struct block_device *bdev, + gfp_t gfp_mask) +{ + struct request_queue *q = bdev_get_queue(bdev); + sector_t capacity = get_capacity(bdev->bd_disk); + sector_t zone_sectors = blk_queue_zone_sectors(q); + unsigned long *need_reset; + struct bio *bio = NULL; + sector_t sector = 0; + int ret; + + need_reset = blk_alloc_zone_bitmap(q->node, q->nr_zones); + if (!need_reset) + return -ENOMEM; + + ret = bdev->bd_disk->fops->report_zones(bdev->bd_disk, 0, + q->nr_zones, blk_zone_need_reset_cb, + need_reset); + if (ret < 0) + goto out_free_need_reset; + + ret = 0; + while (sector < capacity) { + if (!test_bit(blk_queue_zone_no(q, sector), need_reset)) { + sector += zone_sectors; + continue; + } + + bio = blk_next_bio(bio, 0, gfp_mask); + bio_set_dev(bio, bdev); + bio->bi_opf = REQ_OP_ZONE_RESET | REQ_SYNC; + bio->bi_iter.bi_sector = sector; + sector += zone_sectors; + + /* This may take a while, so be nice to others */ + cond_resched(); + } + + if (bio) { + ret = submit_bio_wait(bio); + bio_put(bio); + } + +out_free_need_reset: + kfree(need_reset); + return ret; +} + +static int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask) +{ + struct bio bio; + + bio_init(&bio, NULL, 0); + bio_set_dev(&bio, bdev); + bio.bi_opf = REQ_OP_ZONE_RESET_ALL | REQ_SYNC; + + return submit_bio_wait(&bio); } /** @@ -200,7 +271,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, sector_t capacity = get_capacity(bdev->bd_disk); sector_t end_sector = sector + nr_sectors; struct bio *bio = NULL; - int ret; + int ret = 0; if (!blk_queue_is_zoned(q)) return -EOPNOTSUPP; @@ -222,20 +293,21 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, if ((nr_sectors & (zone_sectors - 1)) && end_sector != capacity) return -EINVAL; + /* + * In the case of a zone reset operation over all zones, + * REQ_OP_ZONE_RESET_ALL can be used with devices supporting this + * command. For other devices, we emulate this command behavior by + * identifying the zones needing a reset. + */ + if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity) { + if (!blk_queue_zone_resetall(q)) + return blkdev_zone_reset_all_emulated(bdev, gfp_mask); + return blkdev_zone_reset_all(bdev, gfp_mask); + } + while (sector < end_sector) { bio = blk_next_bio(bio, 0, gfp_mask); bio_set_dev(bio, bdev); - - /* - * Special case for the zone reset operation that reset all - * zones, this is useful for applications like mkfs. - */ - if (op == REQ_OP_ZONE_RESET && - blkdev_allow_reset_all_zones(bdev, sector, nr_sectors)) { - bio->bi_opf = REQ_OP_ZONE_RESET_ALL | REQ_SYNC; - break; - } - bio->bi_opf = op | REQ_SYNC; bio->bi_iter.bi_sector = sector; sector += zone_sectors; @@ -396,13 +468,6 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, return ret; } -static inline unsigned long *blk_alloc_zone_bitmap(int node, - unsigned int nr_zones) -{ - return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long), - GFP_NOIO, node); -} - void blk_queue_free_zone_bitmaps(struct request_queue *q) { kfree(q->conv_zones_bitmap); |