diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-13 22:04:41 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-13 22:04:41 +0200 |
commit | 7cd4ecd9177b94af783b8e21de7c65b41a871342 (patch) | |
tree | 3ca393f3eaeeaad56d4ab60f87e28d7197b0ba21 /drivers/block | |
parent | Merge tag 'libata-5.10-2020-10-12' of git://git.kernel.dk/linux-block (diff) | |
parent | Merge branch 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/son... (diff) | |
download | linux-7cd4ecd9177b94af783b8e21de7c65b41a871342.tar.xz linux-7cd4ecd9177b94af783b8e21de7c65b41a871342.zip |
Merge tag 'drivers-5.10-2020-10-12' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe:
"Here are the driver updates for 5.10.
A few SCSI updates in here too, in coordination with Martin as they
depend on core block changes for the shared tag bitmap.
This contains:
- NVMe pull requests via Christoph:
- fix keep alive timer modification (Amit Engel)
- order the PCI ID list more sensibly (Andy Shevchenko)
- cleanup the open by controller helper (Chaitanya Kulkarni)
- use an xarray for the CSE log lookup (Chaitanya Kulkarni)
- support ZNS in nvmet passthrough mode (Chaitanya Kulkarni)
- fix nvme_ns_report_zones (Christoph Hellwig)
- add a sanity check to nvmet-fc (James Smart)
- fix interrupt allocation when too many polled queues are
specified (Jeffle Xu)
- small nvmet-tcp optimization (Mark Wunderlich)
- fix a controller refcount leak on init failure (Chaitanya
Kulkarni)
- misc cleanups (Chaitanya Kulkarni)
- major refactoring of the scanning code (Christoph Hellwig)
- MD updates via Song:
- Bug fixes in bitmap code, from Zhao Heming
- Fix a work queue check, from Guoqing Jiang
- Fix raid5 oops with reshape, from Song Liu
- Clean up unused code, from Jason Yan
- Discard improvements, from Xiao Ni
- raid5/6 page offset support, from Yufen Yu
- Shared tag bitmap for SCSI/hisi_sas/null_blk (John, Kashyap,
Hannes)
- null_blk open/active zone limit support (Niklas)
- Set of bcache updates (Coly, Dongsheng, Qinglang)"
* tag 'drivers-5.10-2020-10-12' of git://git.kernel.dk/linux-block: (78 commits)
md/raid5: fix oops during stripe resizing
md/bitmap: fix memory leak of temporary bitmap
md: fix the checking of wrong work queue
md/bitmap: md_bitmap_get_counter returns wrong blocks
md/bitmap: md_bitmap_read_sb uses wrong bitmap blocks
md/raid0: remove unused function is_io_in_chunk_boundary()
nvme-core: remove extra condition for vwc
nvme-core: remove extra variable
nvme: remove nvme_identify_ns_list
nvme: refactor nvme_validate_ns
nvme: move nvme_validate_ns
nvme: query namespace identifiers before adding the namespace
nvme: revalidate zone bitmaps in nvme_update_ns_info
nvme: remove nvme_update_formats
nvme: update the known admin effects
nvme: set the queue limits in nvme_update_ns_info
nvme: remove the 0 lba_shift check in nvme_update_ns_info
nvme: clean up the check for too large logic block sizes
nvme: freeze the queue over ->lba_shift updates
nvme: factor out a nvme_configure_metadata helper
...
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/null_blk.h | 5 | ||||
-rw-r--r-- | drivers/block/null_blk_main.c | 22 | ||||
-rw-r--r-- | drivers/block/null_blk_zoned.c | 319 | ||||
-rw-r--r-- | drivers/block/rsxx/core.c | 2 |
4 files changed, 289 insertions, 59 deletions
diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h index daed4a9c3436..d2e7db43a52a 100644 --- a/drivers/block/null_blk.h +++ b/drivers/block/null_blk.h @@ -42,6 +42,9 @@ struct nullb_device { struct badblocks badblocks; unsigned int nr_zones; + unsigned int nr_zones_imp_open; + unsigned int nr_zones_exp_open; + unsigned int nr_zones_closed; struct blk_zone *zones; sector_t zone_size_sects; @@ -51,6 +54,8 @@ struct nullb_device { unsigned long zone_size; /* zone size in MB if device is zoned */ unsigned long zone_capacity; /* zone capacity in MB if device is zoned */ unsigned int zone_nr_conv; /* number of conventional zones */ + unsigned int zone_max_open; /* max number of open zones */ + unsigned int zone_max_active; /* max number of active zones */ unsigned int submit_queues; /* number of submission queues */ unsigned int home_node; /* home node for the device */ unsigned int queue_mode; /* block interface */ diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index d74443a9c8fa..4685ea401d5b 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -164,6 +164,10 @@ static bool shared_tags; module_param(shared_tags, bool, 0444); MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq"); +static bool g_shared_tag_bitmap; +module_param_named(shared_tag_bitmap, g_shared_tag_bitmap, bool, 0444); +MODULE_PARM_DESC(shared_tag_bitmap, "Use shared tag bitmap for all submission queues for blk-mq"); + static int g_irqmode = NULL_IRQ_SOFTIRQ; static int null_set_irqmode(const char *str, const struct kernel_param *kp) @@ -208,6 +212,14 @@ static unsigned int g_zone_nr_conv; module_param_named(zone_nr_conv, g_zone_nr_conv, uint, 0444); MODULE_PARM_DESC(zone_nr_conv, "Number of conventional zones when block device is zoned. Default: 0"); +static unsigned int g_zone_max_open; +module_param_named(zone_max_open, g_zone_max_open, uint, 0444); +MODULE_PARM_DESC(zone_max_open, "Maximum number of open zones when block device is zoned. Default: 0 (no limit)"); + +static unsigned int g_zone_max_active; +module_param_named(zone_max_active, g_zone_max_active, uint, 0444); +MODULE_PARM_DESC(zone_max_active, "Maximum number of active zones when block device is zoned. Default: 0 (no limit)"); + static struct nullb_device *null_alloc_dev(void); static void null_free_dev(struct nullb_device *dev); static void null_del_dev(struct nullb *nullb); @@ -347,6 +359,8 @@ NULLB_DEVICE_ATTR(zoned, bool, NULL); NULLB_DEVICE_ATTR(zone_size, ulong, NULL); NULLB_DEVICE_ATTR(zone_capacity, ulong, NULL); NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL); +NULLB_DEVICE_ATTR(zone_max_open, uint, NULL); +NULLB_DEVICE_ATTR(zone_max_active, uint, NULL); static ssize_t nullb_device_power_show(struct config_item *item, char *page) { @@ -464,6 +478,8 @@ static struct configfs_attribute *nullb_device_attrs[] = { &nullb_device_attr_zone_size, &nullb_device_attr_zone_capacity, &nullb_device_attr_zone_nr_conv, + &nullb_device_attr_zone_max_open, + &nullb_device_attr_zone_max_active, NULL, }; @@ -517,7 +533,7 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item) static ssize_t memb_group_features_show(struct config_item *item, char *page) { return snprintf(page, PAGE_SIZE, - "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv\n"); + "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active\n"); } CONFIGFS_ATTR_RO(memb_group_, features); @@ -580,6 +596,8 @@ static struct nullb_device *null_alloc_dev(void) dev->zone_size = g_zone_size; dev->zone_capacity = g_zone_capacity; dev->zone_nr_conv = g_zone_nr_conv; + dev->zone_max_open = g_zone_max_open; + dev->zone_max_active = g_zone_max_active; return dev; } @@ -1692,6 +1710,8 @@ static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set) set->flags = BLK_MQ_F_SHOULD_MERGE; if (g_no_sched) set->flags |= BLK_MQ_F_NO_SCHED; + if (g_shared_tag_bitmap) + set->flags |= BLK_MQ_F_TAG_HCTX_SHARED; set->driver_data = NULL; if ((nullb && nullb->dev->blocking) || g_blocking) diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c index 3d25c9ad2383..fa0cc70f05e6 100644 --- a/drivers/block/null_blk_zoned.c +++ b/drivers/block/null_blk_zoned.c @@ -51,6 +51,22 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) dev->zone_nr_conv); } + /* Max active zones has to be < nbr of seq zones in order to be enforceable */ + if (dev->zone_max_active >= dev->nr_zones - dev->zone_nr_conv) { + dev->zone_max_active = 0; + pr_info("zone_max_active limit disabled, limit >= zone count\n"); + } + + /* Max open zones has to be <= max active zones */ + if (dev->zone_max_active && dev->zone_max_open > dev->zone_max_active) { + dev->zone_max_open = dev->zone_max_active; + pr_info("changed the maximum number of open zones to %u\n", + dev->nr_zones); + } else if (dev->zone_max_open >= dev->nr_zones - dev->zone_nr_conv) { + dev->zone_max_open = 0; + pr_info("zone_max_open limit disabled, limit >= zone count\n"); + } + for (i = 0; i < dev->zone_nr_conv; i++) { struct blk_zone *zone = &dev->zones[i]; @@ -99,6 +115,8 @@ int null_register_zoned_dev(struct nullb *nullb) } blk_queue_max_zone_append_sectors(q, dev->zone_size_sects); + blk_queue_max_open_zones(q, dev->zone_max_open); + blk_queue_max_active_zones(q, dev->zone_max_active); return 0; } @@ -159,6 +177,103 @@ size_t null_zone_valid_read_len(struct nullb *nullb, return (zone->wp - sector) << SECTOR_SHIFT; } +static blk_status_t null_close_zone(struct nullb_device *dev, struct blk_zone *zone) +{ + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) + return BLK_STS_IOERR; + + switch (zone->cond) { + case BLK_ZONE_COND_CLOSED: + /* close operation on closed is not an error */ + return BLK_STS_OK; + case BLK_ZONE_COND_IMP_OPEN: + dev->nr_zones_imp_open--; + break; + case BLK_ZONE_COND_EXP_OPEN: + dev->nr_zones_exp_open--; + break; + case BLK_ZONE_COND_EMPTY: + case BLK_ZONE_COND_FULL: + default: + return BLK_STS_IOERR; + } + + if (zone->wp == zone->start) { + zone->cond = BLK_ZONE_COND_EMPTY; + } else { + zone->cond = BLK_ZONE_COND_CLOSED; + dev->nr_zones_closed++; + } + + return BLK_STS_OK; +} + +static void null_close_first_imp_zone(struct nullb_device *dev) +{ + unsigned int i; + + for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) { + if (dev->zones[i].cond == BLK_ZONE_COND_IMP_OPEN) { + null_close_zone(dev, &dev->zones[i]); + return; + } + } +} + +static bool null_can_set_active(struct nullb_device *dev) +{ + if (!dev->zone_max_active) + return true; + + return dev->nr_zones_exp_open + dev->nr_zones_imp_open + + dev->nr_zones_closed < dev->zone_max_active; +} + +static bool null_can_open(struct nullb_device *dev) +{ + if (!dev->zone_max_open) + return true; + + if (dev->nr_zones_exp_open + dev->nr_zones_imp_open < dev->zone_max_open) + return true; + + if (dev->nr_zones_imp_open && null_can_set_active(dev)) { + null_close_first_imp_zone(dev); + return true; + } + + return false; +} + +/* + * This function matches the manage open zone resources function in the ZBC standard, + * with the addition of max active zones support (added in the ZNS standard). + * + * The function determines if a zone can transition to implicit open or explicit open, + * while maintaining the max open zone (and max active zone) limit(s). It may close an + * implicit open zone in order to make additional zone resources available. + * + * ZBC states that an implicit open zone shall be closed only if there is not + * room within the open limit. However, with the addition of an active limit, + * it is not certain that closing an implicit open zone will allow a new zone + * to be opened, since we might already be at the active limit capacity. + */ +static bool null_has_zone_resources(struct nullb_device *dev, struct blk_zone *zone) +{ + switch (zone->cond) { + case BLK_ZONE_COND_EMPTY: + if (!null_can_set_active(dev)) + return false; + fallthrough; + case BLK_ZONE_COND_CLOSED: + return null_can_open(dev); + default: + /* Should never be called for other states */ + WARN_ON(1); + return false; + } +} + static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, unsigned int nr_sectors, bool append) { @@ -177,43 +292,155 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, /* Cannot write to a full zone */ return BLK_STS_IOERR; case BLK_ZONE_COND_EMPTY: + case BLK_ZONE_COND_CLOSED: + if (!null_has_zone_resources(dev, zone)) + return BLK_STS_IOERR; + break; case BLK_ZONE_COND_IMP_OPEN: case BLK_ZONE_COND_EXP_OPEN: + break; + default: + /* Invalid zone condition */ + return BLK_STS_IOERR; + } + + /* + * Regular writes must be at the write pointer position. + * Zone append writes are automatically issued at the write + * pointer and the position returned using the request or BIO + * sector. + */ + if (append) { + sector = zone->wp; + if (cmd->bio) + cmd->bio->bi_iter.bi_sector = sector; + else + cmd->rq->__sector = sector; + } else if (sector != zone->wp) { + return BLK_STS_IOERR; + } + + if (zone->wp + nr_sectors > zone->start + zone->capacity) + return BLK_STS_IOERR; + + if (zone->cond == BLK_ZONE_COND_CLOSED) { + dev->nr_zones_closed--; + dev->nr_zones_imp_open++; + } else if (zone->cond == BLK_ZONE_COND_EMPTY) { + dev->nr_zones_imp_open++; + } + if (zone->cond != BLK_ZONE_COND_EXP_OPEN) + zone->cond = BLK_ZONE_COND_IMP_OPEN; + + ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors); + if (ret != BLK_STS_OK) + return ret; + + zone->wp += nr_sectors; + if (zone->wp == zone->start + zone->capacity) { + if (zone->cond == BLK_ZONE_COND_EXP_OPEN) + dev->nr_zones_exp_open--; + else if (zone->cond == BLK_ZONE_COND_IMP_OPEN) + dev->nr_zones_imp_open--; + zone->cond = BLK_ZONE_COND_FULL; + } + return BLK_STS_OK; +} + +static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zone) +{ + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) + return BLK_STS_IOERR; + + switch (zone->cond) { + case BLK_ZONE_COND_EXP_OPEN: + /* open operation on exp open is not an error */ + return BLK_STS_OK; + case BLK_ZONE_COND_EMPTY: + if (!null_has_zone_resources(dev, zone)) + return BLK_STS_IOERR; + break; + case BLK_ZONE_COND_IMP_OPEN: + dev->nr_zones_imp_open--; + break; case BLK_ZONE_COND_CLOSED: - /* - * Regular writes must be at the write pointer position. - * Zone append writes are automatically issued at the write - * pointer and the position returned using the request or BIO - * sector. - */ - if (append) { - sector = zone->wp; - if (cmd->bio) - cmd->bio->bi_iter.bi_sector = sector; - else - cmd->rq->__sector = sector; - } else if (sector != zone->wp) { + if (!null_has_zone_resources(dev, zone)) return BLK_STS_IOERR; - } + dev->nr_zones_closed--; + break; + case BLK_ZONE_COND_FULL: + default: + return BLK_STS_IOERR; + } + + zone->cond = BLK_ZONE_COND_EXP_OPEN; + dev->nr_zones_exp_open++; - if (zone->wp + nr_sectors > zone->start + zone->capacity) + return BLK_STS_OK; +} + +static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone *zone) +{ + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) + return BLK_STS_IOERR; + + switch (zone->cond) { + case BLK_ZONE_COND_FULL: + /* finish operation on full is not an error */ + return BLK_STS_OK; + case BLK_ZONE_COND_EMPTY: + if (!null_has_zone_resources(dev, zone)) return BLK_STS_IOERR; + break; + case BLK_ZONE_COND_IMP_OPEN: + dev->nr_zones_imp_open--; + break; + case BLK_ZONE_COND_EXP_OPEN: + dev->nr_zones_exp_open--; + break; + case BLK_ZONE_COND_CLOSED: + if (!null_has_zone_resources(dev, zone)) + return BLK_STS_IOERR; + dev->nr_zones_closed--; + break; + default: + return BLK_STS_IOERR; + } - if (zone->cond != BLK_ZONE_COND_EXP_OPEN) - zone->cond = BLK_ZONE_COND_IMP_OPEN; + zone->cond = BLK_ZONE_COND_FULL; + zone->wp = zone->start + zone->len; - ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors); - if (ret != BLK_STS_OK) - return ret; + return BLK_STS_OK; +} + +static blk_status_t null_reset_zone(struct nullb_device *dev, struct blk_zone *zone) +{ + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) + return BLK_STS_IOERR; - zone->wp += nr_sectors; - if (zone->wp == zone->start + zone->capacity) - zone->cond = BLK_ZONE_COND_FULL; + switch (zone->cond) { + case BLK_ZONE_COND_EMPTY: + /* reset operation on empty is not an error */ return BLK_STS_OK; + case BLK_ZONE_COND_IMP_OPEN: + dev->nr_zones_imp_open--; + break; + case BLK_ZONE_COND_EXP_OPEN: + dev->nr_zones_exp_open--; + break; + case BLK_ZONE_COND_CLOSED: + dev->nr_zones_closed--; + break; + case BLK_ZONE_COND_FULL: + break; default: - /* Invalid zone condition */ return BLK_STS_IOERR; } + + zone->cond = BLK_ZONE_COND_EMPTY; + zone->wp = zone->start; + + return BLK_STS_OK; } static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, @@ -222,56 +449,34 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, struct nullb_device *dev = cmd->nq->dev; unsigned int zone_no = null_zone_no(dev, sector); struct blk_zone *zone = &dev->zones[zone_no]; + blk_status_t ret = BLK_STS_OK; size_t i; switch (op) { case REQ_OP_ZONE_RESET_ALL: - for (i = 0; i < dev->nr_zones; i++) { - if (zone[i].type == BLK_ZONE_TYPE_CONVENTIONAL) - continue; - zone[i].cond = BLK_ZONE_COND_EMPTY; - zone[i].wp = zone[i].start; - } + for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) + null_reset_zone(dev, &dev->zones[i]); break; case REQ_OP_ZONE_RESET: - if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) - return BLK_STS_IOERR; - - zone->cond = BLK_ZONE_COND_EMPTY; - zone->wp = zone->start; + ret = null_reset_zone(dev, zone); break; case REQ_OP_ZONE_OPEN: - if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) - return BLK_STS_IOERR; - if (zone->cond == BLK_ZONE_COND_FULL) - return BLK_STS_IOERR; - - zone->cond = BLK_ZONE_COND_EXP_OPEN; + ret = null_open_zone(dev, zone); break; case REQ_OP_ZONE_CLOSE: - if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) - return BLK_STS_IOERR; - if (zone->cond == BLK_ZONE_COND_FULL) - return BLK_STS_IOERR; - - if (zone->wp == zone->start) - zone->cond = BLK_ZONE_COND_EMPTY; - else - zone->cond = BLK_ZONE_COND_CLOSED; + ret = null_close_zone(dev, zone); break; case REQ_OP_ZONE_FINISH: - if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) - return BLK_STS_IOERR; - - zone->cond = BLK_ZONE_COND_FULL; - zone->wp = zone->start + zone->len; + ret = null_finish_zone(dev, zone); break; default: return BLK_STS_NOTSUPP; } - trace_nullb_zone_op(cmd, zone_no, zone->cond); - return BLK_STS_OK; + if (ret == BLK_STS_OK) + trace_nullb_zone_op(cmd, zone_no, zone->cond); + + return ret; } blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op, diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index 8799e3bab067..63f549889f87 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -439,7 +439,7 @@ static void card_state_change(struct rsxx_cardinfo *card, case CARD_STATE_FAULT: dev_crit(CARD_TO_DEV(card), "Hardware Fault reported!\n"); - /* Fall through. */ + fallthrough; /* Everything else, detach DMA interface if it's attached. */ case CARD_STATE_SHUTDOWN: |