summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-13 19:20:41 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-13 19:20:41 +0200
commit80a0d644d37296d7cac1a4956cb4e916770083e0 (patch)
tree9571808cfbe611496e7d7957504e78a61bc77cd4
parentMerge tag 'docs-4.14' of git://git.lwn.net/linux (diff)
parentblock: directly insert blk-mq request from blk_insert_cloned_request() (diff)
downloadlinux-80a0d644d37296d7cac1a4956cb4e916770083e0.tar.xz
linux-80a0d644d37296d7cac1a4956cb4e916770083e0.zip
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: "Small collection of fixes that would be nice to have in -rc1. This contains: - NVMe pull request form Christoph, mostly with fixes for nvme-pci, host memory buffer in particular. - Error handling fixup for cgwb_create(), in case allocation of 'wb' fails. From Christophe Jaillet. - Ensure that trace_block_getrq() gets the 'dev' in an appropriate fashion, to avoid a potential NULL deref. From Greg Thelen. - Regression fix for dm-mq with blk-mq, fixing a problem with stacking IO schedulers. From me. - string.h fixup, fixing an issue with memcpy_and_pad(). This original change came in through an NVMe dependency, which is why I'm including it here. From Martin Wilck. - Fix potential int overflow in __blkdev_sectors_to_bio_pages(), from Mikulas. - MBR enable fix for sed-opal, from Scott" * 'for-linus' of git://git.kernel.dk/linux-block: block: directly insert blk-mq request from blk_insert_cloned_request() mm/backing-dev.c: fix an error handling path in 'cgwb_create()' string.h: un-fortify memcpy_and_pad nvme-pci: implement the HMB entry number and size limitations nvme-pci: propagate (some) errors from host memory buffer setup nvme-pci: use appropriate initial chunk size for HMB allocation nvme-pci: fix host memory buffer allocation fallback nvme: fix lightnvm check block: fix integer overflow in __blkdev_sectors_to_bio_pages() block: sed-opal: Set MBRDone on S3 resume path if TPER is MBREnabled block: tolerate tracing of NULL bio
-rw-r--r--block/blk-core.c7
-rw-r--r--block/blk-lib.c4
-rw-r--r--block/blk-mq.c16
-rw-r--r--block/blk-mq.h1
-rw-r--r--block/opal_proto.h1
-rw-r--r--block/sed-opal.c32
-rw-r--r--drivers/nvme/host/core.c11
-rw-r--r--drivers/nvme/host/lightnvm.c26
-rw-r--r--drivers/nvme/host/nvme.h13
-rw-r--r--drivers/nvme/host/pci.c74
-rw-r--r--include/linux/nvme.h4
-rw-r--r--include/linux/string.h15
-rw-r--r--include/trace/events/block.h5
-rw-r--r--mm/backing-dev.c6
14 files changed, 134 insertions, 81 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index d709c0e3a2ac..aebe676225e6 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2342,7 +2342,12 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
if (q->mq_ops) {
if (blk_queue_io_stat(q))
blk_account_io_start(rq, true);
- blk_mq_sched_insert_request(rq, false, true, false, false);
+ /*
+ * Since we have a scheduler attached on the top device,
+ * bypass a potential scheduler on the bottom device for
+ * insert.
+ */
+ blk_mq_request_bypass_insert(rq);
return BLK_STS_OK;
}
diff --git a/block/blk-lib.c b/block/blk-lib.c
index e01adb5145b3..62240f8832ca 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -269,9 +269,9 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
*/
static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects)
{
- sector_t bytes = (nr_sects << 9) + PAGE_SIZE - 1;
+ sector_t pages = DIV_ROUND_UP_SECTOR_T(nr_sects, PAGE_SIZE / 512);
- return min(bytes >> PAGE_SHIFT, (sector_t)BIO_MAX_PAGES);
+ return min(pages, (sector_t)BIO_MAX_PAGES);
}
/**
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 3f18cff80050..98a18609755e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1401,6 +1401,22 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
blk_mq_hctx_mark_pending(hctx, ctx);
}
+/*
+ * Should only be used carefully, when the caller knows we want to
+ * bypass a potential IO scheduler on the target device.
+ */
+void blk_mq_request_bypass_insert(struct request *rq)
+{
+ struct blk_mq_ctx *ctx = rq->mq_ctx;
+ struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(rq->q, ctx->cpu);
+
+ spin_lock(&hctx->lock);
+ list_add_tail(&rq->queuelist, &hctx->dispatch);
+ spin_unlock(&hctx->lock);
+
+ blk_mq_run_hw_queue(hctx, false);
+}
+
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
struct list_head *list)
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 98252b79b80b..ef15b3414da5 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -54,6 +54,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
*/
void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
bool at_head);
+void blk_mq_request_bypass_insert(struct request *rq);
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
struct list_head *list);
diff --git a/block/opal_proto.h b/block/opal_proto.h
index f40c9acf8895..e20be8258854 100644
--- a/block/opal_proto.h
+++ b/block/opal_proto.h
@@ -46,6 +46,7 @@ enum opal_response_token {
#define GENERIC_HOST_SESSION_NUM 0x41
#define TPER_SYNC_SUPPORTED 0x01
+#define MBR_ENABLED_MASK 0x10
#define TINY_ATOM_DATA_MASK 0x3F
#define TINY_ATOM_SIGNED 0x40
diff --git a/block/sed-opal.c b/block/sed-opal.c
index 9b30ae5ab843..9ed51d0c6b1d 100644
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -80,6 +80,7 @@ struct parsed_resp {
struct opal_dev {
bool supported;
+ bool mbr_enabled;
void *data;
sec_send_recv *send_recv;
@@ -283,6 +284,14 @@ static bool check_tper(const void *data)
return true;
}
+static bool check_mbrenabled(const void *data)
+{
+ const struct d0_locking_features *lfeat = data;
+ u8 sup_feat = lfeat->supported_features;
+
+ return !!(sup_feat & MBR_ENABLED_MASK);
+}
+
static bool check_sum(const void *data)
{
const struct d0_single_user_mode *sum = data;
@@ -417,6 +426,7 @@ static int opal_discovery0_end(struct opal_dev *dev)
u32 hlen = be32_to_cpu(hdr->length);
print_buffer(dev->resp, hlen);
+ dev->mbr_enabled = false;
if (hlen > IO_BUFFER_LENGTH - sizeof(*hdr)) {
pr_debug("Discovery length overflows buffer (%zu+%u)/%u\n",
@@ -442,6 +452,8 @@ static int opal_discovery0_end(struct opal_dev *dev)
check_geometry(dev, body);
break;
case FC_LOCKING:
+ dev->mbr_enabled = check_mbrenabled(body->features);
+ break;
case FC_ENTERPRISE:
case FC_DATASTORE:
/* some ignored properties */
@@ -2190,6 +2202,21 @@ static int __opal_lock_unlock(struct opal_dev *dev,
return next(dev);
}
+static int __opal_set_mbr_done(struct opal_dev *dev, struct opal_key *key)
+{
+ u8 mbr_done_tf = 1;
+ const struct opal_step mbrdone_step [] = {
+ { opal_discovery0, },
+ { start_admin1LSP_opal_session, key },
+ { set_mbr_done, &mbr_done_tf },
+ { end_opal_session, },
+ { NULL, }
+ };
+
+ dev->steps = mbrdone_step;
+ return next(dev);
+}
+
static int opal_lock_unlock(struct opal_dev *dev,
struct opal_lock_unlock *lk_unlk)
{
@@ -2345,6 +2372,11 @@ bool opal_unlock_from_suspend(struct opal_dev *dev)
suspend->unlk.session.sum);
was_failure = true;
}
+ if (dev->mbr_enabled) {
+ ret = __opal_set_mbr_done(dev, &suspend->unlk.session.opal_key);
+ if (ret)
+ pr_debug("Failed to set MBR Done in S3 resume\n");
+ }
}
mutex_unlock(&dev->dev_lock);
return was_failure;
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 277a7a02cba5..acc816b67582 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1897,6 +1897,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->cntlid = le16_to_cpu(id->cntlid);
ctrl->hmpre = le32_to_cpu(id->hmpre);
ctrl->hmmin = le32_to_cpu(id->hmmin);
+ ctrl->hmminds = le32_to_cpu(id->hmminds);
+ ctrl->hmmaxd = le16_to_cpu(id->hmmaxd);
}
kfree(id);
@@ -2377,10 +2379,11 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
nvme_report_ns_ids(ctrl, ns->ns_id, id, ns->eui, ns->nguid, &ns->uuid);
- if (nvme_nvm_ns_supported(ns, id) &&
- nvme_nvm_register(ns, disk_name, node)) {
- dev_warn(ctrl->device, "%s: LightNVM init failure\n", __func__);
- goto out_free_id;
+ if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
+ if (nvme_nvm_register(ns, disk_name, node)) {
+ dev_warn(ctrl->device, "LightNVM init failure\n");
+ goto out_free_id;
+ }
}
disk = alloc_disk_node(0, node);
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index c1a28569e843..1f79e3f141e6 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -955,29 +955,3 @@ void nvme_nvm_unregister_sysfs(struct nvme_ns *ns)
sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
&nvm_dev_attr_group);
}
-
-/* move to shared place when used in multiple places. */
-#define PCI_VENDOR_ID_CNEX 0x1d1d
-#define PCI_DEVICE_ID_CNEX_WL 0x2807
-#define PCI_DEVICE_ID_CNEX_QEMU 0x1f1f
-
-int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
-{
- struct nvme_ctrl *ctrl = ns->ctrl;
- /* XXX: this is poking into PCI structures from generic code! */
- struct pci_dev *pdev = to_pci_dev(ctrl->dev);
-
- /* QEMU NVMe simulator - PCI ID + Vendor specific bit */
- if (pdev->vendor == PCI_VENDOR_ID_CNEX &&
- pdev->device == PCI_DEVICE_ID_CNEX_QEMU &&
- id->vs[0] == 0x1)
- return 1;
-
- /* CNEX Labs - PCI ID + Vendor specific bit */
- if (pdev->vendor == PCI_VENDOR_ID_CNEX &&
- pdev->device == PCI_DEVICE_ID_CNEX_WL &&
- id->vs[0] == 0x1)
- return 1;
-
- return 0;
-}
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index a19a587d60ed..d3f3c4447515 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -75,6 +75,11 @@ enum nvme_quirks {
* The deepest sleep state should not be used.
*/
NVME_QUIRK_NO_DEEPEST_PS = (1 << 5),
+
+ /*
+ * Supports the LighNVM command set if indicated in vs[1].
+ */
+ NVME_QUIRK_LIGHTNVM = (1 << 6),
};
/*
@@ -176,8 +181,11 @@ struct nvme_ctrl {
u64 ps_max_latency_us;
bool apst_enabled;
+ /* PCIe only: */
u32 hmpre;
u32 hmmin;
+ u32 hmminds;
+ u16 hmmaxd;
/* Fabrics only */
u16 sqsize;
@@ -320,7 +328,6 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
#ifdef CONFIG_NVM
-int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id);
int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
void nvme_nvm_unregister(struct nvme_ns *ns);
int nvme_nvm_register_sysfs(struct nvme_ns *ns);
@@ -339,10 +346,6 @@ static inline int nvme_nvm_register_sysfs(struct nvme_ns *ns)
return 0;
}
static inline void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) {};
-static inline int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
-{
- return 0;
-}
static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd,
unsigned long arg)
{
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 198245faba6b..4a2121335f48 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1612,21 +1612,23 @@ static void nvme_free_host_mem(struct nvme_dev *dev)
dev->host_mem_descs = NULL;
}
-static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
+static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
+ u32 chunk_size)
{
struct nvme_host_mem_buf_desc *descs;
- u32 chunk_size, max_entries, len;
+ u32 max_entries, len;
dma_addr_t descs_dma;
int i = 0;
void **bufs;
u64 size = 0, tmp;
- /* start big and work our way down */
- chunk_size = min(preferred, (u64)PAGE_SIZE << MAX_ORDER);
-retry:
tmp = (preferred + chunk_size - 1);
do_div(tmp, chunk_size);
max_entries = tmp;
+
+ if (dev->ctrl.hmmaxd && dev->ctrl.hmmaxd < max_entries)
+ max_entries = dev->ctrl.hmmaxd;
+
descs = dma_zalloc_coherent(dev->dev, max_entries * sizeof(*descs),
&descs_dma, GFP_KERNEL);
if (!descs)
@@ -1650,15 +1652,9 @@ retry:
i++;
}
- if (!size || (min && size < min)) {
- dev_warn(dev->ctrl.device,
- "failed to allocate host memory buffer.\n");
+ if (!size)
goto out_free_bufs;
- }
- dev_info(dev->ctrl.device,
- "allocated %lld MiB host memory buffer.\n",
- size >> ilog2(SZ_1M));
dev->nr_host_mem_descs = i;
dev->host_mem_size = size;
dev->host_mem_descs = descs;
@@ -1679,21 +1675,35 @@ out_free_descs:
dma_free_coherent(dev->dev, max_entries * sizeof(*descs), descs,
descs_dma);
out:
- /* try a smaller chunk size if we failed early */
- if (chunk_size >= PAGE_SIZE * 2 && (i == 0 || size < min)) {
- chunk_size /= 2;
- goto retry;
- }
dev->host_mem_descs = NULL;
return -ENOMEM;
}
-static void nvme_setup_host_mem(struct nvme_dev *dev)
+static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
+{
+ u32 chunk_size;
+
+ /* start big and work our way down */
+ for (chunk_size = min_t(u64, preferred, PAGE_SIZE * MAX_ORDER_NR_PAGES);
+ chunk_size >= max_t(u32, dev->ctrl.hmminds * 4096, PAGE_SIZE * 2);
+ chunk_size /= 2) {
+ if (!__nvme_alloc_host_mem(dev, preferred, chunk_size)) {
+ if (!min || dev->host_mem_size >= min)
+ return 0;
+ nvme_free_host_mem(dev);
+ }
+ }
+
+ return -ENOMEM;
+}
+
+static int nvme_setup_host_mem(struct nvme_dev *dev)
{
u64 max = (u64)max_host_mem_size_mb * SZ_1M;
u64 preferred = (u64)dev->ctrl.hmpre * 4096;
u64 min = (u64)dev->ctrl.hmmin * 4096;
u32 enable_bits = NVME_HOST_MEM_ENABLE;
+ int ret = 0;
preferred = min(preferred, max);
if (min > max) {
@@ -1701,7 +1711,7 @@ static void nvme_setup_host_mem(struct nvme_dev *dev)
"min host memory (%lld MiB) above limit (%d MiB).\n",
min >> ilog2(SZ_1M), max_host_mem_size_mb);
nvme_free_host_mem(dev);
- return;
+ return 0;
}
/*
@@ -1715,12 +1725,21 @@ static void nvme_setup_host_mem(struct nvme_dev *dev)
}
if (!dev->host_mem_descs) {
- if (nvme_alloc_host_mem(dev, min, preferred))
- return;
+ if (nvme_alloc_host_mem(dev, min, preferred)) {
+ dev_warn(dev->ctrl.device,
+ "failed to allocate host memory buffer.\n");
+ return 0; /* controller must work without HMB */
+ }
+
+ dev_info(dev->ctrl.device,
+ "allocated %lld MiB host memory buffer.\n",
+ dev->host_mem_size >> ilog2(SZ_1M));
}
- if (nvme_set_host_mem(dev, enable_bits))
+ ret = nvme_set_host_mem(dev, enable_bits);
+ if (ret)
nvme_free_host_mem(dev);
+ return ret;
}
static int nvme_setup_io_queues(struct nvme_dev *dev)
@@ -2164,8 +2183,11 @@ static void nvme_reset_work(struct work_struct *work)
"unable to allocate dma for dbbuf\n");
}
- if (dev->ctrl.hmpre)
- nvme_setup_host_mem(dev);
+ if (dev->ctrl.hmpre) {
+ result = nvme_setup_host_mem(dev);
+ if (result < 0)
+ goto out;
+ }
result = nvme_setup_io_queues(dev);
if (result)
@@ -2497,6 +2519,10 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+ { PCI_DEVICE(0x1d1d, 0x1f1f), /* LighNVM qemu device */
+ .driver_data = NVME_QUIRK_LIGHTNVM, },
+ { PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */
+ .driver_data = NVME_QUIRK_LIGHTNVM, },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 5144f9103723..87723c86f136 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -226,7 +226,9 @@ struct nvme_id_ctrl {
__le16 mntmt;
__le16 mxtmt;
__le32 sanicap;
- __u8 rsvd332[180];
+ __le32 hmminds;
+ __le16 hmmaxd;
+ __u8 rsvd338[174];
__u8 sqes;
__u8 cqes;
__le16 maxcmd;
diff --git a/include/linux/string.h b/include/linux/string.h
index e1eeb0a8a969..54d21783e18d 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -434,20 +434,9 @@ __FORTIFY_INLINE char *strcpy(char *p, const char *q)
* @count: The number of bytes to copy
* @pad: Character to use for padding if space is left in destination.
*/
-__FORTIFY_INLINE void memcpy_and_pad(void *dest, size_t dest_len,
- const void *src, size_t count, int pad)
+static inline void memcpy_and_pad(void *dest, size_t dest_len,
+ const void *src, size_t count, int pad)
{
- size_t dest_size = __builtin_object_size(dest, 0);
- size_t src_size = __builtin_object_size(src, 0);
-
- if (__builtin_constant_p(dest_len) && __builtin_constant_p(count)) {
- if (dest_size < dest_len && dest_size < count)
- __write_overflow();
- else if (src_size < dest_len && src_size < count)
- __read_overflow3();
- }
- if (dest_size < dest_len)
- fortify_panic(__func__);
if (dest_len > count) {
memcpy(dest, src, count);
memset(dest + count, pad, dest_len - count);
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index f815aaaef755..1fd7ff1a46f7 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -397,7 +397,6 @@ DECLARE_EVENT_CLASS(block_get_rq,
TP_fast_assign(
__entry->dev = bio ? bio_dev(bio) : 0;
- __entry->dev = bio_dev(bio);
__entry->sector = bio ? bio->bi_iter.bi_sector : 0;
__entry->nr_sector = bio ? bio_sectors(bio) : 0;
blk_fill_rwbs(__entry->rwbs,
@@ -414,7 +413,7 @@ DECLARE_EVENT_CLASS(block_get_rq,
/**
* block_getrq - get a free request entry in queue for block IO operations
* @q: queue for operations
- * @bio: pending block IO operation
+ * @bio: pending block IO operation (can be %NULL)
* @rw: low bit indicates a read (%0) or a write (%1)
*
* A request struct for queue @q has been allocated to handle the
@@ -430,7 +429,7 @@ DEFINE_EVENT(block_get_rq, block_getrq,
/**
* block_sleeprq - waiting to get a free request entry in queue for block IO operation
* @q: queue for operation
- * @bio: pending block IO operation
+ * @bio: pending block IO operation (can be %NULL)
* @rw: low bit indicates a read (%0) or a write (%1)
*
* In the case where a request struct cannot be provided for queue @q
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index f028a9a472fd..e19606bb41a0 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -569,8 +569,10 @@ static int cgwb_create(struct backing_dev_info *bdi,
/* need to create a new one */
wb = kmalloc(sizeof(*wb), gfp);
- if (!wb)
- return -ENOMEM;
+ if (!wb) {
+ ret = -ENOMEM;
+ goto out_put;
+ }
ret = wb_init(wb, bdi, blkcg_css->id, gfp);
if (ret)