summaryrefslogtreecommitdiffstats
path: root/drivers/nvme/host
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host')
-rw-r--r--drivers/nvme/host/auth.c2
-rw-r--r--drivers/nvme/host/core.c99
-rw-r--r--drivers/nvme/host/hwmon.c2
-rw-r--r--drivers/nvme/host/ioctl.c7
-rw-r--r--drivers/nvme/host/multipath.c40
-rw-r--r--drivers/nvme/host/nvme.h1
-rw-r--r--drivers/nvme/host/pci.c19
-rw-r--r--drivers/nvme/host/pr.c2
-rw-r--r--drivers/nvme/host/rdma.c2
-rw-r--r--drivers/nvme/host/tcp.c7
-rw-r--r--drivers/nvme/host/trace.c2
11 files changed, 124 insertions, 59 deletions
diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c
index 371e14f0a203..5ea0e21709da 100644
--- a/drivers/nvme/host/auth.c
+++ b/drivers/nvme/host/auth.c
@@ -6,7 +6,7 @@
#include <linux/crc32.h>
#include <linux/base64.h>
#include <linux/prandom.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <crypto/hash.h>
#include <crypto/dh.h>
#include "nvme.h"
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index ba6508455e18..b149b638453f 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -22,7 +22,7 @@
#include <linux/nvme_ioctl.h>
#include <linux/pm_qos.h>
#include <linux/ratelimit.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include "nvme.h"
#include "fabrics.h"
@@ -92,6 +92,17 @@ MODULE_PARM_DESC(apst_secondary_latency_tol_us,
"secondary APST latency tolerance in us");
/*
+ * Older kernels didn't enable protection information if it was at an offset.
+ * Newer kernels do, so it breaks reads on the upgrade if such formats were
+ * used in prior kernels since the metadata written did not contain a valid
+ * checksum.
+ */
+static bool disable_pi_offsets = false;
+module_param(disable_pi_offsets, bool, 0444);
+MODULE_PARM_DESC(disable_pi_offsets,
+ "disable protection information if it has an offset");
+
+/*
* nvme_wq - hosts nvme related works that are not reset or delete
* nvme_reset_wq - hosts nvme reset works
* nvme_delete_wq - hosts nvme delete works
@@ -1292,14 +1303,12 @@ static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl)
queue_delayed_work(nvme_wq, &ctrl->ka_work, delay);
}
-static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
- blk_status_t status)
+static void nvme_keep_alive_finish(struct request *rq,
+ blk_status_t status, struct nvme_ctrl *ctrl)
{
- struct nvme_ctrl *ctrl = rq->end_io_data;
- unsigned long flags;
- bool startka = false;
unsigned long rtt = jiffies - (rq->deadline - rq->timeout);
unsigned long delay = nvme_keep_alive_work_period(ctrl);
+ enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
/*
* Subtract off the keepalive RTT so nvme_keep_alive_work runs
@@ -1313,25 +1322,17 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
delay = 0;
}
- blk_mq_free_request(rq);
-
if (status) {
dev_err(ctrl->device,
"failed nvme_keep_alive_end_io error=%d\n",
status);
- return RQ_END_IO_NONE;
+ return;
}
ctrl->ka_last_check_time = jiffies;
ctrl->comp_seen = false;
- spin_lock_irqsave(&ctrl->lock, flags);
- if (ctrl->state == NVME_CTRL_LIVE ||
- ctrl->state == NVME_CTRL_CONNECTING)
- startka = true;
- spin_unlock_irqrestore(&ctrl->lock, flags);
- if (startka)
+ if (state == NVME_CTRL_LIVE || state == NVME_CTRL_CONNECTING)
queue_delayed_work(nvme_wq, &ctrl->ka_work, delay);
- return RQ_END_IO_NONE;
}
static void nvme_keep_alive_work(struct work_struct *work)
@@ -1340,6 +1341,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
struct nvme_ctrl, ka_work);
bool comp_seen = ctrl->comp_seen;
struct request *rq;
+ blk_status_t status;
ctrl->ka_last_check_time = jiffies;
@@ -1362,9 +1364,9 @@ static void nvme_keep_alive_work(struct work_struct *work)
nvme_init_request(rq, &ctrl->ka_cmd);
rq->timeout = ctrl->kato * HZ;
- rq->end_io = nvme_keep_alive_end_io;
- rq->end_io_data = ctrl;
- blk_execute_rq_nowait(rq, false);
+ status = blk_execute_rq(rq, false);
+ nvme_keep_alive_finish(rq, status, ctrl);
+ blk_mq_free_request(rq);
}
static void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
@@ -1399,17 +1401,30 @@ static void nvme_update_keep_alive(struct nvme_ctrl *ctrl,
nvme_start_keep_alive(ctrl);
}
-/*
- * In NVMe 1.0 the CNS field was just a binary controller or namespace
- * flag, thus sending any new CNS opcodes has a big chance of not working.
- * Qemu unfortunately had that bug after reporting a 1.1 version compliance
- * (but not for any later version).
- */
-static bool nvme_ctrl_limited_cns(struct nvme_ctrl *ctrl)
+static bool nvme_id_cns_ok(struct nvme_ctrl *ctrl, u8 cns)
{
- if (ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)
- return ctrl->vs < NVME_VS(1, 2, 0);
- return ctrl->vs < NVME_VS(1, 1, 0);
+ /*
+ * The CNS field occupies a full byte starting with NVMe 1.2
+ */
+ if (ctrl->vs >= NVME_VS(1, 2, 0))
+ return true;
+
+ /*
+ * NVMe 1.1 expanded the CNS value to two bits, which means values
+ * larger than that could get truncated and treated as an incorrect
+ * value.
+ *
+ * Qemu implemented 1.0 behavior for controllers claiming 1.1
+ * compliance, so they need to be quirked here.
+ */
+ if (ctrl->vs >= NVME_VS(1, 1, 0) &&
+ !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS))
+ return cns <= 3;
+
+ /*
+ * NVMe 1.0 used a single bit for the CNS value.
+ */
+ return cns <= 1;
}
static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
@@ -1922,8 +1937,12 @@ static void nvme_configure_metadata(struct nvme_ctrl *ctrl,
if (head->pi_size && head->ms >= head->pi_size)
head->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
- if (!(id->dps & NVME_NS_DPS_PI_FIRST))
- info->pi_offset = head->ms - head->pi_size;
+ if (!(id->dps & NVME_NS_DPS_PI_FIRST)) {
+ if (disable_pi_offsets)
+ head->pi_type = 0;
+ else
+ info->pi_offset = head->ms - head->pi_size;
+ }
if (ctrl->ops->flags & NVME_F_FABRICS) {
/*
@@ -2458,8 +2477,13 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
else
ctrl->ctrl_config = NVME_CC_CSS_NVM;
- if (ctrl->cap & NVME_CAP_CRMS_CRWMS && ctrl->cap & NVME_CAP_CRMS_CRIMS)
- ctrl->ctrl_config |= NVME_CC_CRIME;
+ /*
+ * Setting CRIME results in CSTS.RDY before the media is ready. This
+ * makes it possible for media related commands to return the error
+ * NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY. Until the driver is
+ * restructured to handle retries, disable CC.CRIME.
+ */
+ ctrl->ctrl_config &= ~NVME_CC_CRIME;
ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
@@ -2489,10 +2513,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
* devices are known to get this wrong. Use the larger of the
* two values.
*/
- if (ctrl->ctrl_config & NVME_CC_CRIME)
- ready_timeout = NVME_CRTO_CRIMT(crto);
- else
- ready_timeout = NVME_CRTO_CRWMT(crto);
+ ready_timeout = NVME_CRTO_CRWMT(crto);
if (ready_timeout < timeout)
dev_warn_once(ctrl->device, "bad crto:%x cap:%llx\n",
@@ -3111,7 +3132,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
ctrl->max_zeroes_sectors = 0;
if (ctrl->subsys->subtype != NVME_NQN_NVME ||
- nvme_ctrl_limited_cns(ctrl) ||
+ !nvme_id_cns_ok(ctrl, NVME_ID_CNS_CS_CTRL) ||
test_bit(NVME_CTRL_SKIP_ID_CNS_CS, &ctrl->flags))
return 0;
@@ -4207,7 +4228,7 @@ static void nvme_scan_work(struct work_struct *work)
}
mutex_lock(&ctrl->scan_lock);
- if (nvme_ctrl_limited_cns(ctrl)) {
+ if (!nvme_id_cns_ok(ctrl, NVME_ID_CNS_NS_ACTIVE_LIST)) {
nvme_scan_ns_sequential(ctrl);
} else {
/*
diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c
index 8df73a0b3980..89a1a1043d63 100644
--- a/drivers/nvme/host/hwmon.c
+++ b/drivers/nvme/host/hwmon.c
@@ -6,7 +6,7 @@
#include <linux/hwmon.h>
#include <linux/units.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include "nvme.h"
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index b9b79ccfabf8..a96976b22fa7 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -421,10 +421,13 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
struct io_uring_cmd *ioucmd = req->end_io_data;
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
- if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
+ if (nvme_req(req)->flags & NVME_REQ_CANCELLED) {
pdu->status = -EINTR;
- else
+ } else {
pdu->status = nvme_req(req)->status;
+ if (!pdu->status)
+ pdu->status = blk_status_to_errno(err);
+ }
pdu->result = le64_to_cpu(nvme_req(req)->result.u64);
/*
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 48e7a8906d01..6a15873055b9 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -431,7 +431,6 @@ static bool nvme_available_path(struct nvme_ns_head *head)
case NVME_CTRL_LIVE:
case NVME_CTRL_RESETTING:
case NVME_CTRL_CONNECTING:
- /* fallthru */
return true;
default:
break;
@@ -580,6 +579,20 @@ static int nvme_add_ns_head_cdev(struct nvme_ns_head *head)
return ret;
}
+static void nvme_partition_scan_work(struct work_struct *work)
+{
+ struct nvme_ns_head *head =
+ container_of(work, struct nvme_ns_head, partition_scan_work);
+
+ if (WARN_ON_ONCE(!test_and_clear_bit(GD_SUPPRESS_PART_SCAN,
+ &head->disk->state)))
+ return;
+
+ mutex_lock(&head->disk->open_mutex);
+ bdev_disk_changed(head->disk, false);
+ mutex_unlock(&head->disk->open_mutex);
+}
+
static void nvme_requeue_work(struct work_struct *work)
{
struct nvme_ns_head *head =
@@ -606,6 +619,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
bio_list_init(&head->requeue_list);
spin_lock_init(&head->requeue_lock);
INIT_WORK(&head->requeue_work, nvme_requeue_work);
+ INIT_WORK(&head->partition_scan_work, nvme_partition_scan_work);
/*
* Add a multipath node if the subsystems supports multiple controllers.
@@ -629,6 +643,16 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
return PTR_ERR(head->disk);
head->disk->fops = &nvme_ns_head_ops;
head->disk->private_data = head;
+
+ /*
+ * We need to suppress the partition scan from occuring within the
+ * controller's scan_work context. If a path error occurs here, the IO
+ * will wait until a path becomes available or all paths are torn down,
+ * but that action also occurs within scan_work, so it would deadlock.
+ * Defer the partion scan to a different context that does not block
+ * scan_work.
+ */
+ set_bit(GD_SUPPRESS_PART_SCAN, &head->disk->state);
sprintf(head->disk->disk_name, "nvme%dn%d",
ctrl->subsys->instance, head->instance);
return 0;
@@ -655,6 +679,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
return;
}
nvme_add_ns_head_cdev(head);
+ kblockd_schedule_work(&head->partition_scan_work);
}
mutex_lock(&head->lock);
@@ -974,14 +999,14 @@ void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
return;
if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
nvme_cdev_del(&head->cdev, &head->cdev_device);
+ /*
+ * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared
+ * to allow multipath to fail all I/O.
+ */
+ synchronize_srcu(&head->srcu);
+ kblockd_schedule_work(&head->requeue_work);
del_gendisk(head->disk);
}
- /*
- * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared
- * to allow multipath to fail all I/O.
- */
- synchronize_srcu(&head->srcu);
- kblockd_schedule_work(&head->requeue_work);
}
void nvme_mpath_remove_disk(struct nvme_ns_head *head)
@@ -991,6 +1016,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
/* make sure all pending bios are cleaned up */
kblockd_schedule_work(&head->requeue_work);
flush_work(&head->requeue_work);
+ flush_work(&head->partition_scan_work);
put_disk(head->disk);
}
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 313a4f978a2c..093cb423f536 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -494,6 +494,7 @@ struct nvme_ns_head {
struct bio_list requeue_list;
spinlock_t requeue_lock;
struct work_struct requeue_work;
+ struct work_struct partition_scan_work;
struct mutex lock;
unsigned long flags;
#define NVME_NSHEAD_DISK_LIVE 0
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 7990c3f22ecf..4b9fda0b1d9a 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2506,17 +2506,29 @@ static unsigned int nvme_pci_nr_maps(struct nvme_dev *dev)
return 1;
}
-static void nvme_pci_update_nr_queues(struct nvme_dev *dev)
+static bool nvme_pci_update_nr_queues(struct nvme_dev *dev)
{
if (!dev->ctrl.tagset) {
nvme_alloc_io_tag_set(&dev->ctrl, &dev->tagset, &nvme_mq_ops,
nvme_pci_nr_maps(dev), sizeof(struct nvme_iod));
- return;
+ return true;
+ }
+
+ /* Give up if we are racing with nvme_dev_disable() */
+ if (!mutex_trylock(&dev->shutdown_lock))
+ return false;
+
+ /* Check if nvme_dev_disable() has been executed already */
+ if (!dev->online_queues) {
+ mutex_unlock(&dev->shutdown_lock);
+ return false;
}
blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1);
/* free previously allocated queues that are no longer usable */
nvme_free_queues(dev, dev->online_queues);
+ mutex_unlock(&dev->shutdown_lock);
+ return true;
}
static int nvme_pci_enable(struct nvme_dev *dev)
@@ -2797,7 +2809,8 @@ static void nvme_reset_work(struct work_struct *work)
nvme_dbbuf_set(dev);
nvme_unquiesce_io_queues(&dev->ctrl);
nvme_wait_freeze(&dev->ctrl);
- nvme_pci_update_nr_queues(dev);
+ if (!nvme_pci_update_nr_queues(dev))
+ goto out;
nvme_unfreeze(&dev->ctrl);
} else {
dev_warn(dev->ctrl.device, "IO queues lost\n");
diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c
index 7347ddf85f00..dc7922f22600 100644
--- a/drivers/nvme/host/pr.c
+++ b/drivers/nvme/host/pr.c
@@ -5,7 +5,7 @@
*/
#include <linux/blkdev.h>
#include <linux/pr.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include "nvme.h"
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index c8fd0e8f0237..24a2759798d0 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -18,7 +18,7 @@
#include <linux/mutex.h>
#include <linux/scatterlist.h>
#include <linux/nvme.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 89c44413c593..3e416af2659f 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2644,10 +2644,11 @@ static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
len = nvmf_get_address(ctrl, buf, size);
+ if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags))
+ return len;
+
mutex_lock(&queue->queue_lock);
- if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags))
- goto done;
ret = kernel_getsockname(queue->sock, (struct sockaddr *)&src_addr);
if (ret > 0) {
if (len > 0)
@@ -2655,7 +2656,7 @@ static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
len += scnprintf(buf + len, size - len, "%ssrc_addr=%pISc\n",
(len) ? "," : "", &src_addr);
}
-done:
+
mutex_unlock(&queue->queue_lock);
return len;
diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c
index 0288315f0050..87c437fc070d 100644
--- a/drivers/nvme/host/trace.c
+++ b/drivers/nvme/host/trace.c
@@ -4,7 +4,7 @@
* Copyright (c) 2018 Johannes Thumshirn, SUSE Linux GmbH
*/
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include "trace.h"
static const char *nvme_trace_delete_sq(struct trace_seq *p, u8 *cdw10)