summaryrefslogtreecommitdiffstats
path: root/drivers/nvme
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme')
-rw-r--r--drivers/nvme/host/core.c86
-rw-r--r--drivers/nvme/host/fc.c6
-rw-r--r--drivers/nvme/host/multipath.c69
-rw-r--r--drivers/nvme/host/nvme.h31
-rw-r--r--drivers/nvme/host/pci.c17
-rw-r--r--drivers/nvme/host/rdma.c2
-rw-r--r--drivers/nvme/host/tcp.c4
-rw-r--r--drivers/nvme/target/configfs.c1
-rw-r--r--drivers/nvme/target/core.c6
-rw-r--r--drivers/nvme/target/loop.c2
-rw-r--r--drivers/nvme/target/passthru.c25
11 files changed, 147 insertions, 102 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 96ee5a476c4d..154942fc64eb 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -241,17 +241,6 @@ static blk_status_t nvme_error_status(u16 status)
}
}
-static inline bool nvme_req_needs_retry(struct request *req)
-{
- if (blk_noretry_request(req))
- return false;
- if (nvme_req(req)->status & NVME_SC_DNR)
- return false;
- if (nvme_req(req)->retries >= nvme_max_retries)
- return false;
- return true;
-}
-
static void nvme_retry_req(struct request *req)
{
struct nvme_ns *ns = req->q->queuedata;
@@ -268,34 +257,67 @@ static void nvme_retry_req(struct request *req)
blk_mq_delay_kick_requeue_list(req->q, delay);
}
-void nvme_complete_rq(struct request *req)
+enum nvme_disposition {
+ COMPLETE,
+ RETRY,
+ FAILOVER,
+};
+
+static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
{
- blk_status_t status = nvme_error_status(nvme_req(req)->status);
+ if (likely(nvme_req(req)->status == 0))
+ return COMPLETE;
- trace_nvme_complete_rq(req);
+ if (blk_noretry_request(req) ||
+ (nvme_req(req)->status & NVME_SC_DNR) ||
+ nvme_req(req)->retries >= nvme_max_retries)
+ return COMPLETE;
- nvme_cleanup_cmd(req);
+ if (req->cmd_flags & REQ_NVME_MPATH) {
+ if (nvme_is_path_error(nvme_req(req)->status) ||
+ blk_queue_dying(req->q))
+ return FAILOVER;
+ } else {
+ if (blk_queue_dying(req->q))
+ return COMPLETE;
+ }
- if (nvme_req(req)->ctrl->kas)
- nvme_req(req)->ctrl->comp_seen = true;
+ return RETRY;
+}
- if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) {
- if ((req->cmd_flags & REQ_NVME_MPATH) && nvme_failover_req(req))
- return;
+static inline void nvme_end_req(struct request *req)
+{
+ blk_status_t status = nvme_error_status(nvme_req(req)->status);
- if (!blk_queue_dying(req->q)) {
- nvme_retry_req(req);
- return;
- }
- } else if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
- req_op(req) == REQ_OP_ZONE_APPEND) {
+ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
+ req_op(req) == REQ_OP_ZONE_APPEND)
req->__sector = nvme_lba_to_sect(req->q->queuedata,
le64_to_cpu(nvme_req(req)->result.u64));
- }
nvme_trace_bio_complete(req, status);
blk_mq_end_request(req, status);
}
+
+void nvme_complete_rq(struct request *req)
+{
+ trace_nvme_complete_rq(req);
+ nvme_cleanup_cmd(req);
+
+ if (nvme_req(req)->ctrl->kas)
+ nvme_req(req)->ctrl->comp_seen = true;
+
+ switch (nvme_decide_disposition(req)) {
+ case COMPLETE:
+ nvme_end_req(req);
+ return;
+ case RETRY:
+ nvme_retry_req(req);
+ return;
+ case FAILOVER:
+ nvme_failover_req(req);
+ return;
+ }
+}
EXPORT_SYMBOL_GPL(nvme_complete_rq);
bool nvme_cancel_request(struct request *req, void *data, bool reserved)
@@ -2075,7 +2097,7 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
}
}
- if (iob)
+ if (iob && !blk_queue_is_zoned(ns->queue))
blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(iob));
nvme_update_disk_info(disk, ns, id);
#ifdef CONFIG_NVME_MULTIPATH
@@ -2965,14 +2987,14 @@ static struct nvme_cel *nvme_find_cel(struct nvme_ctrl *ctrl, u8 csi)
{
struct nvme_cel *cel, *ret = NULL;
- spin_lock(&ctrl->lock);
+ spin_lock_irq(&ctrl->lock);
list_for_each_entry(cel, &ctrl->cels, entry) {
if (cel->csi == csi) {
ret = cel;
break;
}
}
- spin_unlock(&ctrl->lock);
+ spin_unlock_irq(&ctrl->lock);
return ret;
}
@@ -2999,9 +3021,9 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi,
cel->csi = csi;
- spin_lock(&ctrl->lock);
+ spin_lock_irq(&ctrl->lock);
list_add_tail(&cel->entry, &ctrl->cels);
- spin_unlock(&ctrl->lock);
+ spin_unlock_irq(&ctrl->lock);
out:
*log = &cel->log;
return 0;
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index eae43bb444e0..a7f474ddfff7 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2035,7 +2035,7 @@ done:
}
__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
- if (!nvme_end_request(rq, status, result))
+ if (!nvme_try_complete_req(rq, status, result))
nvme_fc_complete_rq(rq);
check_error:
@@ -2078,7 +2078,7 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl,
if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.cmddma)) {
dev_err(ctrl->dev,
"FCP Op failed - cmdiu dma mapping failed.\n");
- ret = EFAULT;
+ ret = -EFAULT;
goto out_on_error;
}
@@ -2088,7 +2088,7 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl,
if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.rspdma)) {
dev_err(ctrl->dev,
"FCP Op failed - rspiu dma mapping failed.\n");
- ret = EFAULT;
+ ret = -EFAULT;
}
atomic_set(&op->state, FCPOP_STATE_IDLE);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 3ded54d2c9c6..d4ba736c6c89 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -65,51 +65,30 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
}
}
-bool nvme_failover_req(struct request *req)
+void nvme_failover_req(struct request *req)
{
struct nvme_ns *ns = req->q->queuedata;
- u16 status = nvme_req(req)->status;
+ u16 status = nvme_req(req)->status & 0x7ff;
unsigned long flags;
- switch (status & 0x7ff) {
- case NVME_SC_ANA_TRANSITION:
- case NVME_SC_ANA_INACCESSIBLE:
- case NVME_SC_ANA_PERSISTENT_LOSS:
- /*
- * If we got back an ANA error we know the controller is alive,
- * but not ready to serve this namespaces. The spec suggests
- * we should update our general state here, but due to the fact
- * that the admin and I/O queues are not serialized that is
- * fundamentally racy. So instead just clear the current path,
- * mark the the path as pending and kick of a re-read of the ANA
- * log page ASAP.
- */
- nvme_mpath_clear_current_path(ns);
- if (ns->ctrl->ana_log_buf) {
- set_bit(NVME_NS_ANA_PENDING, &ns->flags);
- queue_work(nvme_wq, &ns->ctrl->ana_work);
- }
- break;
- case NVME_SC_HOST_PATH_ERROR:
- case NVME_SC_HOST_ABORTED_CMD:
- /*
- * Temporary transport disruption in talking to the controller.
- * Try to send on a new path.
- */
- nvme_mpath_clear_current_path(ns);
- break;
- default:
- /* This was a non-ANA error so follow the normal error path. */
- return false;
+ nvme_mpath_clear_current_path(ns);
+
+ /*
+ * If we got back an ANA error, we know the controller is alive but not
+ * ready to serve this namespace. Kick of a re-read of the ANA
+ * information page, and just try any other available path for now.
+ */
+ if (nvme_is_ana_error(status) && ns->ctrl->ana_log_buf) {
+ set_bit(NVME_NS_ANA_PENDING, &ns->flags);
+ queue_work(nvme_wq, &ns->ctrl->ana_work);
}
spin_lock_irqsave(&ns->head->requeue_lock, flags);
blk_steal_bios(&ns->head->requeue_list, req);
spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
- blk_mq_end_request(req, 0);
+ blk_mq_end_request(req, 0);
kblockd_schedule_work(&ns->head->requeue_work);
- return true;
}
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
@@ -233,7 +212,7 @@ static struct nvme_ns *nvme_next_ns(struct nvme_ns_head *head,
static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head,
int node, struct nvme_ns *old)
{
- struct nvme_ns *ns, *found, *fallback = NULL;
+ struct nvme_ns *ns, *found = NULL;
if (list_is_singular(&head->list)) {
if (nvme_path_is_disabled(old))
@@ -252,18 +231,22 @@ static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head,
goto out;
}
if (ns->ana_state == NVME_ANA_NONOPTIMIZED)
- fallback = ns;
+ found = ns;
}
- /* No optimized path found, re-check the current path */
+ /*
+ * The loop above skips the current path for round-robin semantics.
+ * Fall back to the current path if either:
+ * - no other optimized path found and current is optimized,
+ * - no other usable path found and current is usable.
+ */
if (!nvme_path_is_disabled(old) &&
- old->ana_state == NVME_ANA_OPTIMIZED) {
- found = old;
- goto out;
- }
- if (!fallback)
+ (old->ana_state == NVME_ANA_OPTIMIZED ||
+ (!found && old->ana_state == NVME_ANA_NONOPTIMIZED)))
+ return old;
+
+ if (!found)
return NULL;
- found = fallback;
out:
rcu_assign_pointer(head->current_path[node], found);
return found;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index ebb8c3ed3885..e9cf29449dd1 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -523,7 +523,31 @@ static inline u32 nvme_bytes_to_numd(size_t len)
return (len >> 2) - 1;
}
-static inline bool nvme_end_request(struct request *req, __le16 status,
+static inline bool nvme_is_ana_error(u16 status)
+{
+ switch (status & 0x7ff) {
+ case NVME_SC_ANA_TRANSITION:
+ case NVME_SC_ANA_INACCESSIBLE:
+ case NVME_SC_ANA_PERSISTENT_LOSS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool nvme_is_path_error(u16 status)
+{
+ /* check for a status code type of 'path related status' */
+ return (status & 0x700) == 0x300;
+}
+
+/*
+ * Fill in the status and result information from the CQE, and then figure out
+ * if blk-mq will need to use IPI magic to complete the request, and if yes do
+ * so. If not let the caller complete the request without an indirect function
+ * call.
+ */
+static inline bool nvme_try_complete_req(struct request *req, __le16 status,
union nvme_result result)
{
struct nvme_request *rq = nvme_req(req);
@@ -629,7 +653,7 @@ void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys);
void nvme_mpath_start_freeze(struct nvme_subsystem *subsys);
void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
struct nvme_ctrl *ctrl, int *flags);
-bool nvme_failover_req(struct request *req);
+void nvme_failover_req(struct request *req);
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id);
@@ -688,9 +712,8 @@ static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance);
}
-static inline bool nvme_failover_req(struct request *req)
+static inline void nvme_failover_req(struct request *req)
{
- return false;
}
static inline void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
{
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 24d174a0623b..9aa948782704 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -120,7 +120,7 @@ struct nvme_dev {
unsigned max_qid;
unsigned io_queues[HCTX_MAX_TYPES];
unsigned int num_vecs;
- u16 q_depth;
+ u32 q_depth;
int io_sqes;
u32 db_stride;
void __iomem *bar;
@@ -157,13 +157,13 @@ struct nvme_dev {
static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
{
int ret;
- u16 n;
+ u32 n;
- ret = kstrtou16(val, 10, &n);
+ ret = kstrtou32(val, 10, &n);
if (ret != 0 || n < 2)
return -EINVAL;
- return param_set_ushort(val, kp);
+ return param_set_uint(val, kp);
}
static inline unsigned int sq_idx(unsigned int qid, u32 stride)
@@ -195,7 +195,7 @@ struct nvme_queue {
dma_addr_t sq_dma_addr;
dma_addr_t cq_dma_addr;
u32 __iomem *q_db;
- u16 q_depth;
+ u32 q_depth;
u16 cq_vector;
u16 sq_tail;
u16 cq_head;
@@ -961,7 +961,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), cqe->command_id);
trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail);
- if (!nvme_end_request(req, cqe->status, cqe->result))
+ if (!nvme_try_complete_req(req, cqe->status, cqe->result))
nvme_pci_complete_rq(req);
}
@@ -2320,7 +2320,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
- dev->q_depth = min_t(u16, NVME_CAP_MQES(dev->ctrl.cap) + 1,
+ dev->q_depth = min_t(u32, NVME_CAP_MQES(dev->ctrl.cap) + 1,
io_queue_depth);
dev->ctrl.sqsize = dev->q_depth - 1; /* 0's based queue depth */
dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap);
@@ -2460,7 +2460,8 @@ static int nvme_disable_prepare_reset(struct nvme_dev *dev, bool shutdown)
static int nvme_setup_prp_pools(struct nvme_dev *dev)
{
dev->prp_page_pool = dma_pool_create("prp list page", dev->dev,
- PAGE_SIZE, PAGE_SIZE, 0);
+ NVME_CTRL_PAGE_SIZE,
+ NVME_CTRL_PAGE_SIZE, 0);
if (!dev->prp_page_pool)
return -ENOMEM;
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 610e0b6bb6e4..eb1d54af7c77 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1189,7 +1189,7 @@ static void nvme_rdma_end_request(struct nvme_rdma_request *req)
if (!refcount_dec_and_test(&req->ref))
return;
- if (!nvme_end_request(rq, req->status, req->result))
+ if (!nvme_try_complete_req(rq, req->status, req->result))
nvme_rdma_complete_rq(rq);
}
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index f0c4bb47d22d..a44d8ace3a81 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -481,7 +481,7 @@ static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
return -EINVAL;
}
- if (!nvme_end_request(rq, cqe->status, cqe->result))
+ if (!nvme_try_complete_req(rq, cqe->status, cqe->result))
nvme_complete_rq(rq);
queue->nr_cqe++;
@@ -672,7 +672,7 @@ static inline void nvme_tcp_end_request(struct request *rq, u16 status)
{
union nvme_result res = {};
- if (!nvme_end_request(rq, cpu_to_le16(status << 1), res))
+ if (!nvme_try_complete_req(rq, cpu_to_le16(status << 1), res))
nvme_complete_rq(rq);
}
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 74b2b61c773b..37e1d7784e17 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -1136,6 +1136,7 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
up_write(&nvmet_config_sem);
kfree_rcu(new_model, rcuhead);
+ kfree(new_model_number);
return count;
}
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index ca7a58dae275..b7b63330b5ef 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -397,6 +397,9 @@ static void nvmet_keep_alive_timer(struct work_struct *work)
static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
{
+ if (unlikely(ctrl->kato == 0))
+ return;
+
pr_debug("ctrl %d start keep-alive timer for %d secs\n",
ctrl->cntlid, ctrl->kato);
@@ -406,6 +409,9 @@ static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
{
+ if (unlikely(ctrl->kato == 0))
+ return;
+
pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid);
cancel_delayed_work_sync(&ctrl->ka_work);
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 4884ef1e46a2..0d6008cf66a2 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -115,7 +115,7 @@ static void nvme_loop_queue_response(struct nvmet_req *req)
return;
}
- if (!nvme_end_request(rq, cqe->status, cqe->result))
+ if (!nvme_try_complete_req(rq, cqe->status, cqe->result))
nvme_loop_complete_rq(rq);
}
}
diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c
index 89d91dc999a6..8bd7f656e240 100644
--- a/drivers/nvme/target/passthru.c
+++ b/drivers/nvme/target/passthru.c
@@ -165,7 +165,7 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w)
req->cqe->result = nvme_req(rq)->result;
nvmet_req_complete(req, status);
- blk_put_request(rq);
+ blk_mq_free_request(rq);
}
static void nvmet_passthru_req_done(struct request *rq,
@@ -175,7 +175,7 @@ static void nvmet_passthru_req_done(struct request *rq,
req->cqe->result = nvme_req(rq)->result;
nvmet_req_complete(req, nvme_req(rq)->status);
- blk_put_request(rq);
+ blk_mq_free_request(rq);
}
static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
@@ -230,7 +230,7 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
if (unlikely(!ns)) {
pr_err("failed to get passthru ns nsid:%u\n", nsid);
status = NVME_SC_INVALID_NS | NVME_SC_DNR;
- goto fail_out;
+ goto out;
}
q = ns->queue;
@@ -238,16 +238,15 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
rq = nvme_alloc_request(q, req->cmd, BLK_MQ_REQ_NOWAIT, NVME_QID_ANY);
if (IS_ERR(rq)) {
- rq = NULL;
status = NVME_SC_INTERNAL;
- goto fail_out;
+ goto out_put_ns;
}
if (req->sg_cnt) {
ret = nvmet_passthru_map_sg(req, rq);
if (unlikely(ret)) {
status = NVME_SC_INTERNAL;
- goto fail_out;
+ goto out_put_req;
}
}
@@ -274,11 +273,13 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
return;
-fail_out:
+out_put_req:
+ blk_mq_free_request(rq);
+out_put_ns:
if (ns)
nvme_put_ns(ns);
+out:
nvmet_req_complete(req, status);
- blk_put_request(rq);
}
/*
@@ -326,6 +327,10 @@ static u16 nvmet_setup_passthru_command(struct nvmet_req *req)
u16 nvmet_parse_passthru_io_cmd(struct nvmet_req *req)
{
+ /* Reject any commands with non-sgl flags set (ie. fused commands) */
+ if (req->cmd->common.flags & ~NVME_CMD_SGL_ALL)
+ return NVME_SC_INVALID_FIELD;
+
switch (req->cmd->common.opcode) {
case nvme_cmd_resv_register:
case nvme_cmd_resv_report:
@@ -396,6 +401,10 @@ static u16 nvmet_passthru_get_set_features(struct nvmet_req *req)
u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req)
{
+ /* Reject any commands with non-sgl flags set (ie. fused commands) */
+ if (req->cmd->common.flags & ~NVME_CMD_SGL_ALL)
+ return NVME_SC_INVALID_FIELD;
+
/*
* Passthru all vendor specific commands
*/