diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-13 19:20:41 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-13 19:20:41 +0200 |
commit | 80a0d644d37296d7cac1a4956cb4e916770083e0 (patch) | |
tree | 9571808cfbe611496e7d7957504e78a61bc77cd4 | |
parent | Merge tag 'docs-4.14' of git://git.lwn.net/linux (diff) | |
parent | block: directly insert blk-mq request from blk_insert_cloned_request() (diff) | |
download | linux-80a0d644d37296d7cac1a4956cb4e916770083e0.tar.xz linux-80a0d644d37296d7cac1a4956cb4e916770083e0.zip |
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe:
"Small collection of fixes that would be nice to have in -rc1. This
contains:
- NVMe pull request form Christoph, mostly with fixes for nvme-pci,
host memory buffer in particular.
- Error handling fixup for cgwb_create(), in case allocation of 'wb'
fails. From Christophe Jaillet.
- Ensure that trace_block_getrq() gets the 'dev' in an appropriate
fashion, to avoid a potential NULL deref. From Greg Thelen.
- Regression fix for dm-mq with blk-mq, fixing a problem with
stacking IO schedulers. From me.
- string.h fixup, fixing an issue with memcpy_and_pad(). This
original change came in through an NVMe dependency, which is why
I'm including it here. From Martin Wilck.
- Fix potential int overflow in __blkdev_sectors_to_bio_pages(), from
Mikulas.
- MBR enable fix for sed-opal, from Scott"
* 'for-linus' of git://git.kernel.dk/linux-block:
block: directly insert blk-mq request from blk_insert_cloned_request()
mm/backing-dev.c: fix an error handling path in 'cgwb_create()'
string.h: un-fortify memcpy_and_pad
nvme-pci: implement the HMB entry number and size limitations
nvme-pci: propagate (some) errors from host memory buffer setup
nvme-pci: use appropriate initial chunk size for HMB allocation
nvme-pci: fix host memory buffer allocation fallback
nvme: fix lightnvm check
block: fix integer overflow in __blkdev_sectors_to_bio_pages()
block: sed-opal: Set MBRDone on S3 resume path if TPER is MBREnabled
block: tolerate tracing of NULL bio
-rw-r--r-- | block/blk-core.c | 7 | ||||
-rw-r--r-- | block/blk-lib.c | 4 | ||||
-rw-r--r-- | block/blk-mq.c | 16 | ||||
-rw-r--r-- | block/blk-mq.h | 1 | ||||
-rw-r--r-- | block/opal_proto.h | 1 | ||||
-rw-r--r-- | block/sed-opal.c | 32 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 11 | ||||
-rw-r--r-- | drivers/nvme/host/lightnvm.c | 26 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 13 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 74 | ||||
-rw-r--r-- | include/linux/nvme.h | 4 | ||||
-rw-r--r-- | include/linux/string.h | 15 | ||||
-rw-r--r-- | include/trace/events/block.h | 5 | ||||
-rw-r--r-- | mm/backing-dev.c | 6 |
14 files changed, 134 insertions, 81 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index d709c0e3a2ac..aebe676225e6 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2342,7 +2342,12 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request * if (q->mq_ops) { if (blk_queue_io_stat(q)) blk_account_io_start(rq, true); - blk_mq_sched_insert_request(rq, false, true, false, false); + /* + * Since we have a scheduler attached on the top device, + * bypass a potential scheduler on the bottom device for + * insert. + */ + blk_mq_request_bypass_insert(rq); return BLK_STS_OK; } diff --git a/block/blk-lib.c b/block/blk-lib.c index e01adb5145b3..62240f8832ca 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -269,9 +269,9 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev, */ static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects) { - sector_t bytes = (nr_sects << 9) + PAGE_SIZE - 1; + sector_t pages = DIV_ROUND_UP_SECTOR_T(nr_sects, PAGE_SIZE / 512); - return min(bytes >> PAGE_SHIFT, (sector_t)BIO_MAX_PAGES); + return min(pages, (sector_t)BIO_MAX_PAGES); } /** diff --git a/block/blk-mq.c b/block/blk-mq.c index 3f18cff80050..98a18609755e 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1401,6 +1401,22 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, blk_mq_hctx_mark_pending(hctx, ctx); } +/* + * Should only be used carefully, when the caller knows we want to + * bypass a potential IO scheduler on the target device. + */ +void blk_mq_request_bypass_insert(struct request *rq) +{ + struct blk_mq_ctx *ctx = rq->mq_ctx; + struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(rq->q, ctx->cpu); + + spin_lock(&hctx->lock); + list_add_tail(&rq->queuelist, &hctx->dispatch); + spin_unlock(&hctx->lock); + + blk_mq_run_hw_queue(hctx, false); +} + void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, struct list_head *list) diff --git a/block/blk-mq.h b/block/blk-mq.h index 98252b79b80b..ef15b3414da5 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -54,6 +54,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, */ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bool at_head); +void blk_mq_request_bypass_insert(struct request *rq); void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, struct list_head *list); diff --git a/block/opal_proto.h b/block/opal_proto.h index f40c9acf8895..e20be8258854 100644 --- a/block/opal_proto.h +++ b/block/opal_proto.h @@ -46,6 +46,7 @@ enum opal_response_token { #define GENERIC_HOST_SESSION_NUM 0x41 #define TPER_SYNC_SUPPORTED 0x01 +#define MBR_ENABLED_MASK 0x10 #define TINY_ATOM_DATA_MASK 0x3F #define TINY_ATOM_SIGNED 0x40 diff --git a/block/sed-opal.c b/block/sed-opal.c index 9b30ae5ab843..9ed51d0c6b1d 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -80,6 +80,7 @@ struct parsed_resp { struct opal_dev { bool supported; + bool mbr_enabled; void *data; sec_send_recv *send_recv; @@ -283,6 +284,14 @@ static bool check_tper(const void *data) return true; } +static bool check_mbrenabled(const void *data) +{ + const struct d0_locking_features *lfeat = data; + u8 sup_feat = lfeat->supported_features; + + return !!(sup_feat & MBR_ENABLED_MASK); +} + static bool check_sum(const void *data) { const struct d0_single_user_mode *sum = data; @@ -417,6 +426,7 @@ static int opal_discovery0_end(struct opal_dev *dev) u32 hlen = be32_to_cpu(hdr->length); print_buffer(dev->resp, hlen); + dev->mbr_enabled = false; if (hlen > IO_BUFFER_LENGTH - sizeof(*hdr)) { pr_debug("Discovery length overflows buffer (%zu+%u)/%u\n", @@ -442,6 +452,8 @@ static int opal_discovery0_end(struct opal_dev *dev) check_geometry(dev, body); break; case FC_LOCKING: + dev->mbr_enabled = check_mbrenabled(body->features); + break; case FC_ENTERPRISE: case FC_DATASTORE: /* some ignored properties */ @@ -2190,6 +2202,21 @@ static int __opal_lock_unlock(struct opal_dev *dev, return next(dev); } +static int __opal_set_mbr_done(struct opal_dev *dev, struct opal_key *key) +{ + u8 mbr_done_tf = 1; + const struct opal_step mbrdone_step [] = { + { opal_discovery0, }, + { start_admin1LSP_opal_session, key }, + { set_mbr_done, &mbr_done_tf }, + { end_opal_session, }, + { NULL, } + }; + + dev->steps = mbrdone_step; + return next(dev); +} + static int opal_lock_unlock(struct opal_dev *dev, struct opal_lock_unlock *lk_unlk) { @@ -2345,6 +2372,11 @@ bool opal_unlock_from_suspend(struct opal_dev *dev) suspend->unlk.session.sum); was_failure = true; } + if (dev->mbr_enabled) { + ret = __opal_set_mbr_done(dev, &suspend->unlk.session.opal_key); + if (ret) + pr_debug("Failed to set MBR Done in S3 resume\n"); + } } mutex_unlock(&dev->dev_lock); return was_failure; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 277a7a02cba5..acc816b67582 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1897,6 +1897,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->cntlid = le16_to_cpu(id->cntlid); ctrl->hmpre = le32_to_cpu(id->hmpre); ctrl->hmmin = le32_to_cpu(id->hmmin); + ctrl->hmminds = le32_to_cpu(id->hmminds); + ctrl->hmmaxd = le16_to_cpu(id->hmmaxd); } kfree(id); @@ -2377,10 +2379,11 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) nvme_report_ns_ids(ctrl, ns->ns_id, id, ns->eui, ns->nguid, &ns->uuid); - if (nvme_nvm_ns_supported(ns, id) && - nvme_nvm_register(ns, disk_name, node)) { - dev_warn(ctrl->device, "%s: LightNVM init failure\n", __func__); - goto out_free_id; + if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { + if (nvme_nvm_register(ns, disk_name, node)) { + dev_warn(ctrl->device, "LightNVM init failure\n"); + goto out_free_id; + } } disk = alloc_disk_node(0, node); diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index c1a28569e843..1f79e3f141e6 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -955,29 +955,3 @@ void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, &nvm_dev_attr_group); } - -/* move to shared place when used in multiple places. */ -#define PCI_VENDOR_ID_CNEX 0x1d1d -#define PCI_DEVICE_ID_CNEX_WL 0x2807 -#define PCI_DEVICE_ID_CNEX_QEMU 0x1f1f - -int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id) -{ - struct nvme_ctrl *ctrl = ns->ctrl; - /* XXX: this is poking into PCI structures from generic code! */ - struct pci_dev *pdev = to_pci_dev(ctrl->dev); - - /* QEMU NVMe simulator - PCI ID + Vendor specific bit */ - if (pdev->vendor == PCI_VENDOR_ID_CNEX && - pdev->device == PCI_DEVICE_ID_CNEX_QEMU && - id->vs[0] == 0x1) - return 1; - - /* CNEX Labs - PCI ID + Vendor specific bit */ - if (pdev->vendor == PCI_VENDOR_ID_CNEX && - pdev->device == PCI_DEVICE_ID_CNEX_WL && - id->vs[0] == 0x1) - return 1; - - return 0; -} diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a19a587d60ed..d3f3c4447515 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -75,6 +75,11 @@ enum nvme_quirks { * The deepest sleep state should not be used. */ NVME_QUIRK_NO_DEEPEST_PS = (1 << 5), + + /* + * Supports the LighNVM command set if indicated in vs[1]. + */ + NVME_QUIRK_LIGHTNVM = (1 << 6), }; /* @@ -176,8 +181,11 @@ struct nvme_ctrl { u64 ps_max_latency_us; bool apst_enabled; + /* PCIe only: */ u32 hmpre; u32 hmmin; + u32 hmminds; + u16 hmmaxd; /* Fabrics only */ u16 sqsize; @@ -320,7 +328,6 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); int nvme_reset_ctrl(struct nvme_ctrl *ctrl); #ifdef CONFIG_NVM -int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id); int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); void nvme_nvm_unregister(struct nvme_ns *ns); int nvme_nvm_register_sysfs(struct nvme_ns *ns); @@ -339,10 +346,6 @@ static inline int nvme_nvm_register_sysfs(struct nvme_ns *ns) return 0; } static inline void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) {}; -static inline int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id) -{ - return 0; -} static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg) { diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 198245faba6b..4a2121335f48 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1612,21 +1612,23 @@ static void nvme_free_host_mem(struct nvme_dev *dev) dev->host_mem_descs = NULL; } -static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred) +static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, + u32 chunk_size) { struct nvme_host_mem_buf_desc *descs; - u32 chunk_size, max_entries, len; + u32 max_entries, len; dma_addr_t descs_dma; int i = 0; void **bufs; u64 size = 0, tmp; - /* start big and work our way down */ - chunk_size = min(preferred, (u64)PAGE_SIZE << MAX_ORDER); -retry: tmp = (preferred + chunk_size - 1); do_div(tmp, chunk_size); max_entries = tmp; + + if (dev->ctrl.hmmaxd && dev->ctrl.hmmaxd < max_entries) + max_entries = dev->ctrl.hmmaxd; + descs = dma_zalloc_coherent(dev->dev, max_entries * sizeof(*descs), &descs_dma, GFP_KERNEL); if (!descs) @@ -1650,15 +1652,9 @@ retry: i++; } - if (!size || (min && size < min)) { - dev_warn(dev->ctrl.device, - "failed to allocate host memory buffer.\n"); + if (!size) goto out_free_bufs; - } - dev_info(dev->ctrl.device, - "allocated %lld MiB host memory buffer.\n", - size >> ilog2(SZ_1M)); dev->nr_host_mem_descs = i; dev->host_mem_size = size; dev->host_mem_descs = descs; @@ -1679,21 +1675,35 @@ out_free_descs: dma_free_coherent(dev->dev, max_entries * sizeof(*descs), descs, descs_dma); out: - /* try a smaller chunk size if we failed early */ - if (chunk_size >= PAGE_SIZE * 2 && (i == 0 || size < min)) { - chunk_size /= 2; - goto retry; - } dev->host_mem_descs = NULL; return -ENOMEM; } -static void nvme_setup_host_mem(struct nvme_dev *dev) +static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred) +{ + u32 chunk_size; + + /* start big and work our way down */ + for (chunk_size = min_t(u64, preferred, PAGE_SIZE * MAX_ORDER_NR_PAGES); + chunk_size >= max_t(u32, dev->ctrl.hmminds * 4096, PAGE_SIZE * 2); + chunk_size /= 2) { + if (!__nvme_alloc_host_mem(dev, preferred, chunk_size)) { + if (!min || dev->host_mem_size >= min) + return 0; + nvme_free_host_mem(dev); + } + } + + return -ENOMEM; +} + +static int nvme_setup_host_mem(struct nvme_dev *dev) { u64 max = (u64)max_host_mem_size_mb * SZ_1M; u64 preferred = (u64)dev->ctrl.hmpre * 4096; u64 min = (u64)dev->ctrl.hmmin * 4096; u32 enable_bits = NVME_HOST_MEM_ENABLE; + int ret = 0; preferred = min(preferred, max); if (min > max) { @@ -1701,7 +1711,7 @@ static void nvme_setup_host_mem(struct nvme_dev *dev) "min host memory (%lld MiB) above limit (%d MiB).\n", min >> ilog2(SZ_1M), max_host_mem_size_mb); nvme_free_host_mem(dev); - return; + return 0; } /* @@ -1715,12 +1725,21 @@ static void nvme_setup_host_mem(struct nvme_dev *dev) } if (!dev->host_mem_descs) { - if (nvme_alloc_host_mem(dev, min, preferred)) - return; + if (nvme_alloc_host_mem(dev, min, preferred)) { + dev_warn(dev->ctrl.device, + "failed to allocate host memory buffer.\n"); + return 0; /* controller must work without HMB */ + } + + dev_info(dev->ctrl.device, + "allocated %lld MiB host memory buffer.\n", + dev->host_mem_size >> ilog2(SZ_1M)); } - if (nvme_set_host_mem(dev, enable_bits)) + ret = nvme_set_host_mem(dev, enable_bits); + if (ret) nvme_free_host_mem(dev); + return ret; } static int nvme_setup_io_queues(struct nvme_dev *dev) @@ -2164,8 +2183,11 @@ static void nvme_reset_work(struct work_struct *work) "unable to allocate dma for dbbuf\n"); } - if (dev->ctrl.hmpre) - nvme_setup_host_mem(dev); + if (dev->ctrl.hmpre) { + result = nvme_setup_host_mem(dev); + if (result < 0) + goto out; + } result = nvme_setup_io_queues(dev); if (result) @@ -2497,6 +2519,10 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, + { PCI_DEVICE(0x1d1d, 0x1f1f), /* LighNVM qemu device */ + .driver_data = NVME_QUIRK_LIGHTNVM, }, + { PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */ + .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 5144f9103723..87723c86f136 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -226,7 +226,9 @@ struct nvme_id_ctrl { __le16 mntmt; __le16 mxtmt; __le32 sanicap; - __u8 rsvd332[180]; + __le32 hmminds; + __le16 hmmaxd; + __u8 rsvd338[174]; __u8 sqes; __u8 cqes; __le16 maxcmd; diff --git a/include/linux/string.h b/include/linux/string.h index e1eeb0a8a969..54d21783e18d 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -434,20 +434,9 @@ __FORTIFY_INLINE char *strcpy(char *p, const char *q) * @count: The number of bytes to copy * @pad: Character to use for padding if space is left in destination. */ -__FORTIFY_INLINE void memcpy_and_pad(void *dest, size_t dest_len, - const void *src, size_t count, int pad) +static inline void memcpy_and_pad(void *dest, size_t dest_len, + const void *src, size_t count, int pad) { - size_t dest_size = __builtin_object_size(dest, 0); - size_t src_size = __builtin_object_size(src, 0); - - if (__builtin_constant_p(dest_len) && __builtin_constant_p(count)) { - if (dest_size < dest_len && dest_size < count) - __write_overflow(); - else if (src_size < dest_len && src_size < count) - __read_overflow3(); - } - if (dest_size < dest_len) - fortify_panic(__func__); if (dest_len > count) { memcpy(dest, src, count); memset(dest + count, pad, dest_len - count); diff --git a/include/trace/events/block.h b/include/trace/events/block.h index f815aaaef755..1fd7ff1a46f7 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -397,7 +397,6 @@ DECLARE_EVENT_CLASS(block_get_rq, TP_fast_assign( __entry->dev = bio ? bio_dev(bio) : 0; - __entry->dev = bio_dev(bio); __entry->sector = bio ? bio->bi_iter.bi_sector : 0; __entry->nr_sector = bio ? bio_sectors(bio) : 0; blk_fill_rwbs(__entry->rwbs, @@ -414,7 +413,7 @@ DECLARE_EVENT_CLASS(block_get_rq, /** * block_getrq - get a free request entry in queue for block IO operations * @q: queue for operations - * @bio: pending block IO operation + * @bio: pending block IO operation (can be %NULL) * @rw: low bit indicates a read (%0) or a write (%1) * * A request struct for queue @q has been allocated to handle the @@ -430,7 +429,7 @@ DEFINE_EVENT(block_get_rq, block_getrq, /** * block_sleeprq - waiting to get a free request entry in queue for block IO operation * @q: queue for operation - * @bio: pending block IO operation + * @bio: pending block IO operation (can be %NULL) * @rw: low bit indicates a read (%0) or a write (%1) * * In the case where a request struct cannot be provided for queue @q diff --git a/mm/backing-dev.c b/mm/backing-dev.c index f028a9a472fd..e19606bb41a0 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -569,8 +569,10 @@ static int cgwb_create(struct backing_dev_info *bdi, /* need to create a new one */ wb = kmalloc(sizeof(*wb), gfp); - if (!wb) - return -ENOMEM; + if (!wb) { + ret = -ENOMEM; + goto out_put; + } ret = wb_init(wb, bdi, blkcg_css->id, gfp); if (ret) |