diff options
Diffstat (limited to 'drivers/nvme/host')
-rw-r--r-- | drivers/nvme/host/Makefile | 1 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 150 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.c | 83 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.h | 33 | ||||
-rw-r--r-- | drivers/nvme/host/fault_inject.c | 79 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 48 | ||||
-rw-r--r-- | drivers/nvme/host/lightnvm.c | 757 | ||||
-rw-r--r-- | drivers/nvme/host/multipath.c | 8 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 37 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 57 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 48 |
11 files changed, 962 insertions, 339 deletions
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile index 441e67e3a9d7..aea459c65ae1 100644 --- a/drivers/nvme/host/Makefile +++ b/drivers/nvme/host/Makefile @@ -12,6 +12,7 @@ nvme-core-y := core.o nvme-core-$(CONFIG_TRACING) += trace.o nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o nvme-core-$(CONFIG_NVM) += lightnvm.o +nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o nvme-y += pci.o diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 7aeca5db7916..9df4f71e58ca 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -100,11 +100,6 @@ static struct class *nvme_subsys_class; static void nvme_ns_remove(struct nvme_ns *ns); static int nvme_revalidate_disk(struct gendisk *disk); -static __le32 nvme_get_log_dw10(u8 lid, size_t size) -{ - return cpu_to_le32((((size / 4) - 1) << 16) | lid); -} - int nvme_reset_ctrl(struct nvme_ctrl *ctrl) { if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) @@ -135,6 +130,9 @@ static void nvme_delete_ctrl_work(struct work_struct *work) struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, delete_work); + dev_info(ctrl->device, + "Removing ctrl: NQN \"%s\"\n", ctrl->opts->subsysnqn); + flush_work(&ctrl->reset_work); nvme_stop_ctrl(ctrl); nvme_remove_namespaces(ctrl); @@ -378,6 +376,15 @@ static void nvme_put_ns(struct nvme_ns *ns) kref_put(&ns->kref, nvme_free_ns); } +static inline void nvme_clear_nvme_request(struct request *req) +{ + if (!(req->rq_flags & RQF_DONTPREP)) { + nvme_req(req)->retries = 0; + nvme_req(req)->flags = 0; + req->rq_flags |= RQF_DONTPREP; + } +} + struct request *nvme_alloc_request(struct request_queue *q, struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid) { @@ -394,6 +401,7 @@ struct request *nvme_alloc_request(struct request_queue *q, return req; req->cmd_flags |= REQ_FAILFAST_DRIVER; + nvme_clear_nvme_request(req); nvme_req(req)->cmd = cmd; return req; @@ -610,11 +618,7 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, { blk_status_t ret = BLK_STS_OK; - if (!(req->rq_flags & RQF_DONTPREP)) { - nvme_req(req)->retries = 0; - nvme_req(req)->flags = 0; - req->rq_flags |= RQF_DONTPREP; - } + nvme_clear_nvme_request(req); switch (req_op(req)) { case REQ_OP_DRV_IN: @@ -744,6 +748,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, return PTR_ERR(req); req->timeout = timeout ? timeout : ADMIN_TIMEOUT; + nvme_req(req)->flags |= NVME_REQ_USERCMD; if (ubuffer && bufflen) { ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, @@ -828,7 +833,7 @@ static void nvme_keep_alive_work(struct work_struct *work) } } -void nvme_start_keep_alive(struct nvme_ctrl *ctrl) +static void nvme_start_keep_alive(struct nvme_ctrl *ctrl) { if (unlikely(ctrl->kato == 0)) return; @@ -838,7 +843,6 @@ void nvme_start_keep_alive(struct nvme_ctrl *ctrl) ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive; schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); } -EXPORT_SYMBOL_GPL(nvme_start_keep_alive); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl) { @@ -948,7 +952,8 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n c.identify.opcode = nvme_admin_identify; c.identify.cns = NVME_ID_CNS_NS_ACTIVE_LIST; c.identify.nsid = cpu_to_le32(nsid); - return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000); + return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, + NVME_IDENTIFY_DATA_SIZE); } static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl, @@ -1104,7 +1109,7 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, } if (ctrl->effects) - effects = le32_to_cpu(ctrl->effects->iocs[opcode]); + effects = le32_to_cpu(ctrl->effects->acs[opcode]); else effects = nvme_known_admin_effects(opcode); @@ -1124,13 +1129,13 @@ static void nvme_update_formats(struct nvme_ctrl *ctrl) struct nvme_ns *ns, *next; LIST_HEAD(rm_list); - mutex_lock(&ctrl->namespaces_mutex); + down_write(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) { if (ns->disk && nvme_revalidate_disk(ns->disk)) { list_move_tail(&ns->list, &rm_list); } } - mutex_unlock(&ctrl->namespaces_mutex); + up_write(&ctrl->namespaces_rwsem); list_for_each_entry_safe(ns, next, &rm_list, list) nvme_ns_remove(ns); @@ -1358,7 +1363,7 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, blk_queue_max_discard_sectors(queue, UINT_MAX); blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES); - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, queue); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, queue); if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); @@ -1449,6 +1454,8 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) if (ns->noiob) nvme_set_chunk_size(ns); nvme_update_disk_info(disk, ns, id); + if (ns->ndev) + nvme_nvm_update_nvm_info(ns); #ifdef CONFIG_NVME_MULTIPATH if (ns->head->disk) nvme_update_disk_info(ns->head->disk, ns, id); @@ -2217,18 +2224,35 @@ out_unlock: return ret; } -static int nvme_get_log(struct nvme_ctrl *ctrl, u8 log_page, void *log, - size_t size) +int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns, + u8 log_page, void *log, + size_t size, u64 offset) { struct nvme_command c = { }; + unsigned long dwlen = size / 4 - 1; + + c.get_log_page.opcode = nvme_admin_get_log_page; + + if (ns) + c.get_log_page.nsid = cpu_to_le32(ns->head->ns_id); + else + c.get_log_page.nsid = cpu_to_le32(NVME_NSID_ALL); - c.common.opcode = nvme_admin_get_log_page; - c.common.nsid = cpu_to_le32(NVME_NSID_ALL); - c.common.cdw10[0] = nvme_get_log_dw10(log_page, size); + c.get_log_page.lid = log_page; + c.get_log_page.numdl = cpu_to_le16(dwlen & ((1 << 16) - 1)); + c.get_log_page.numdu = cpu_to_le16(dwlen >> 16); + c.get_log_page.lpol = cpu_to_le32(lower_32_bits(offset)); + c.get_log_page.lpou = cpu_to_le32(upper_32_bits(offset)); return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size); } +static int nvme_get_log(struct nvme_ctrl *ctrl, u8 log_page, void *log, + size_t size) +{ + return nvme_get_log_ext(ctrl, NULL, log_page, log, size, 0); +} + static int nvme_get_effects_log(struct nvme_ctrl *ctrl) { int ret; @@ -2440,7 +2464,7 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) struct nvme_ns *ns; int ret; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); if (list_empty(&ctrl->namespaces)) { ret = -ENOTTY; goto out_unlock; @@ -2457,14 +2481,14 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) dev_warn(ctrl->device, "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); kref_get(&ns->kref); - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); ret = nvme_user_cmd(ctrl, ns, argp); nvme_put_ns(ns); return ret; out_unlock: - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); return ret; } @@ -2793,6 +2817,7 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys, list_for_each_entry(h, &subsys->nsheads, entry) { if (nvme_ns_ids_valid(&new->ids) && + !list_empty(&h->list) && nvme_ns_ids_equal(&new->ids, &h->ids)) return -EINVAL; } @@ -2814,7 +2839,9 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, goto out_free_head; head->instance = ret; INIT_LIST_HEAD(&head->list); - init_srcu_struct(&head->srcu); + ret = init_srcu_struct(&head->srcu); + if (ret) + goto out_ida_remove; head->subsys = ctrl->subsys; head->ns_id = nsid; kref_init(&head->ref); @@ -2836,6 +2863,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, return head; out_cleanup_srcu: cleanup_srcu_struct(&head->srcu); +out_ida_remove: ida_simple_remove(&ctrl->subsys->ns_ida, head->instance); out_free_head: kfree(head); @@ -2893,7 +2921,7 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns *ns, *ret = NULL; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) { if (ns->head->ns_id == nsid) { if (!kref_get_unless_zero(&ns->kref)) @@ -2904,7 +2932,7 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) if (ns->head->ns_id > nsid) break; } - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); return ret; } @@ -2949,7 +2977,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ns->queue = blk_mq_init_queue(ctrl->tagset); if (IS_ERR(ns->queue)) goto out_free_ns; - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue); + blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue); ns->queue->queuedata = ns; ns->ctrl = ctrl; @@ -3015,9 +3043,9 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) __nvme_revalidate_disk(disk, id); - mutex_lock(&ctrl->namespaces_mutex); + down_write(&ctrl->namespaces_rwsem); list_add_tail(&ns->list, &ctrl->namespaces); - mutex_unlock(&ctrl->namespaces_mutex); + up_write(&ctrl->namespaces_rwsem); nvme_get_ctrl(ctrl); @@ -3033,6 +3061,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ns->disk->disk_name); nvme_mpath_add_disk(ns->head); + nvme_fault_inject_init(ns); return; out_unlink_ns: mutex_lock(&ctrl->subsys->lock); @@ -3051,6 +3080,7 @@ static void nvme_ns_remove(struct nvme_ns *ns) if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags)) return; + nvme_fault_inject_fini(ns); if (ns->disk && ns->disk->flags & GENHD_FL_UP) { sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, &nvme_ns_id_attr_group); @@ -3067,9 +3097,9 @@ static void nvme_ns_remove(struct nvme_ns *ns) list_del_rcu(&ns->siblings); mutex_unlock(&ns->ctrl->subsys->lock); - mutex_lock(&ns->ctrl->namespaces_mutex); + down_write(&ns->ctrl->namespaces_rwsem); list_del_init(&ns->list); - mutex_unlock(&ns->ctrl->namespaces_mutex); + up_write(&ns->ctrl->namespaces_rwsem); synchronize_srcu(&ns->head->srcu); nvme_mpath_check_last_path(ns); @@ -3093,11 +3123,18 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns *ns, *next; + LIST_HEAD(rm_list); + down_write(&ctrl->namespaces_rwsem); list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) { if (ns->head->ns_id > nsid) - nvme_ns_remove(ns); + list_move_tail(&ns->list, &rm_list); } + up_write(&ctrl->namespaces_rwsem); + + list_for_each_entry_safe(ns, next, &rm_list, list) + nvme_ns_remove(ns); + } static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) @@ -3107,7 +3144,7 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024); int ret = 0; - ns_list = kzalloc(0x1000, GFP_KERNEL); + ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL); if (!ns_list) return -ENOMEM; @@ -3173,9 +3210,9 @@ static void nvme_scan_work(struct work_struct *work) } nvme_scan_ns_sequential(ctrl, nn); done: - mutex_lock(&ctrl->namespaces_mutex); + down_write(&ctrl->namespaces_rwsem); list_sort(NULL, &ctrl->namespaces, ns_cmp); - mutex_unlock(&ctrl->namespaces_mutex); + up_write(&ctrl->namespaces_rwsem); kfree(id); } @@ -3197,6 +3234,7 @@ EXPORT_SYMBOL_GPL(nvme_queue_scan); void nvme_remove_namespaces(struct nvme_ctrl *ctrl) { struct nvme_ns *ns, *next; + LIST_HEAD(ns_list); /* * The dead states indicates the controller was not gracefully @@ -3207,7 +3245,11 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl) if (ctrl->state == NVME_CTRL_DEAD) nvme_kill_queues(ctrl); - list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) + down_write(&ctrl->namespaces_rwsem); + list_splice_init(&ctrl->namespaces, &ns_list); + up_write(&ctrl->namespaces_rwsem); + + list_for_each_entry_safe(ns, next, &ns_list, list) nvme_ns_remove(ns); } EXPORT_SYMBOL_GPL(nvme_remove_namespaces); @@ -3337,6 +3379,8 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl) flush_work(&ctrl->async_event_work); flush_work(&ctrl->scan_work); cancel_work_sync(&ctrl->fw_act_work); + if (ctrl->ops->stop_ctrl) + ctrl->ops->stop_ctrl(ctrl); } EXPORT_SYMBOL_GPL(nvme_stop_ctrl); @@ -3394,7 +3438,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ctrl->state = NVME_CTRL_NEW; spin_lock_init(&ctrl->lock); INIT_LIST_HEAD(&ctrl->namespaces); - mutex_init(&ctrl->namespaces_mutex); + init_rwsem(&ctrl->namespaces_rwsem); ctrl->dev = dev; ctrl->ops = ops; ctrl->quirks = quirks; @@ -3455,7 +3499,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); /* Forcibly unquiesce queues to avoid blocking dispatch */ if (ctrl->admin_q) @@ -3474,7 +3518,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) /* Forcibly unquiesce queues to avoid blocking dispatch */ blk_mq_unquiesce_queue(ns->queue); } - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); } EXPORT_SYMBOL_GPL(nvme_kill_queues); @@ -3482,10 +3526,10 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) blk_mq_unfreeze_queue(ns->queue); - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); } EXPORT_SYMBOL_GPL(nvme_unfreeze); @@ -3493,13 +3537,13 @@ void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout) { struct nvme_ns *ns; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) { timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout); if (timeout <= 0) break; } - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); } EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout); @@ -3507,10 +3551,10 @@ void nvme_wait_freeze(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) blk_mq_freeze_queue_wait(ns->queue); - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); } EXPORT_SYMBOL_GPL(nvme_wait_freeze); @@ -3518,10 +3562,10 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) blk_freeze_queue_start(ns->queue); - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); } EXPORT_SYMBOL_GPL(nvme_start_freeze); @@ -3529,10 +3573,10 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) blk_mq_quiesce_queue(ns->queue); - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); } EXPORT_SYMBOL_GPL(nvme_stop_queues); @@ -3540,10 +3584,10 @@ void nvme_start_queues(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) blk_mq_unquiesce_queue(ns->queue); - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); } EXPORT_SYMBOL_GPL(nvme_start_queues); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 8f0f34d06d46..124c458806df 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -536,6 +536,85 @@ static struct nvmf_transport_ops *nvmf_lookup_transport( return NULL; } +blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl, struct request *rq, + bool queue_live, bool is_connected) +{ + struct nvme_command *cmd = nvme_req(rq)->cmd; + + if (likely(ctrl->state == NVME_CTRL_LIVE && is_connected)) + return BLK_STS_OK; + + switch (ctrl->state) { + case NVME_CTRL_DELETING: + goto reject_io; + + case NVME_CTRL_NEW: + case NVME_CTRL_CONNECTING: + if (!is_connected) + /* + * This is the case of starting a new + * association but connectivity was lost + * before it was fully created. We need to + * error the commands used to initialize the + * controller so the reconnect can go into a + * retry attempt. The commands should all be + * marked REQ_FAILFAST_DRIVER, which will hit + * the reject path below. Anything else will + * be queued while the state settles. + */ + goto reject_or_queue_io; + + if ((queue_live && + !(nvme_req(rq)->flags & NVME_REQ_USERCMD)) || + (!queue_live && blk_rq_is_passthrough(rq) && + cmd->common.opcode == nvme_fabrics_command && + cmd->fabrics.fctype == nvme_fabrics_type_connect)) + /* + * If queue is live, allow only commands that + * are internally generated pass through. These + * are commands on the admin queue to initialize + * the controller. This will reject any ioctl + * admin cmds received while initializing. + * + * If the queue is not live, allow only a + * connect command. This will reject any ioctl + * admin cmd as well as initialization commands + * if the controller reverted the queue to non-live. + */ + return BLK_STS_OK; + + /* + * fall-thru to the reject_or_queue_io clause + */ + break; + + /* these cases fall-thru + * case NVME_CTRL_LIVE: + * case NVME_CTRL_RESETTING: + */ + default: + break; + } + +reject_or_queue_io: + /* + * Any other new io is something we're not in a state to send + * to the device. Default action is to busy it and retry it + * after the controller state is recovered. However, anything + * marked for failfast or nvme multipath is immediately failed. + * Note: commands used to initialize the controller will be + * marked for failfast. + * Note: nvme cli/ioctl commands are marked for failfast. + */ + if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) + return BLK_STS_RESOURCE; + +reject_io: + nvme_req(rq)->status = NVME_SC_ABORT_REQ; + return BLK_STS_IOERR; +} +EXPORT_SYMBOL_GPL(nvmf_check_if_ready); + static const match_table_t opt_tokens = { { NVMF_OPT_TRANSPORT, "transport=%s" }, { NVMF_OPT_TRADDR, "traddr=%s" }, @@ -608,8 +687,10 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, opts->discovery_nqn = !(strcmp(opts->subsysnqn, NVME_DISC_SUBSYS_NAME)); - if (opts->discovery_nqn) + if (opts->discovery_nqn) { + opts->kato = 0; opts->nr_io_queues = 0; + } break; case NVMF_OPT_TRADDR: p = match_strdup(args); diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index a3145d90c1d2..ef46c915b7b5 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -157,36 +157,7 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops); void nvmf_free_options(struct nvmf_ctrl_options *opts); int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); - -static inline blk_status_t nvmf_check_init_req(struct nvme_ctrl *ctrl, - struct request *rq) -{ - struct nvme_command *cmd = nvme_req(rq)->cmd; - - /* - * We cannot accept any other command until the connect command has - * completed, so only allow connect to pass. - */ - if (!blk_rq_is_passthrough(rq) || - cmd->common.opcode != nvme_fabrics_command || - cmd->fabrics.fctype != nvme_fabrics_type_connect) { - /* - * Connecting state means transport disruption or initial - * establishment, which can take a long time and even might - * fail permanently, fail fast to give upper layers a chance - * to failover. - * Deleting state means that the ctrl will never accept commands - * again, fail it permanently. - */ - if (ctrl->state == NVME_CTRL_CONNECTING || - ctrl->state == NVME_CTRL_DELETING) { - nvme_req(rq)->status = NVME_SC_ABORT_REQ; - return BLK_STS_IOERR; - } - return BLK_STS_RESOURCE; /* try again later */ - } - - return BLK_STS_OK; -} +blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl, + struct request *rq, bool queue_live, bool is_connected); #endif /* _NVME_FABRICS_H */ diff --git a/drivers/nvme/host/fault_inject.c b/drivers/nvme/host/fault_inject.c new file mode 100644 index 000000000000..02632266ac06 --- /dev/null +++ b/drivers/nvme/host/fault_inject.c @@ -0,0 +1,79 @@ +/* + * fault injection support for nvme. + * + * Copyright (c) 2018, Oracle and/or its affiliates + * + */ + +#include <linux/moduleparam.h> +#include "nvme.h" + +static DECLARE_FAULT_ATTR(fail_default_attr); +/* optional fault injection attributes boot time option: + * nvme_core.fail_request=<interval>,<probability>,<space>,<times> + */ +static char *fail_request; +module_param(fail_request, charp, 0000); + +void nvme_fault_inject_init(struct nvme_ns *ns) +{ + struct dentry *dir, *parent; + char *name = ns->disk->disk_name; + struct nvme_fault_inject *fault_inj = &ns->fault_inject; + struct fault_attr *attr = &fault_inj->attr; + + /* set default fault injection attribute */ + if (fail_request) + setup_fault_attr(&fail_default_attr, fail_request); + + /* create debugfs directory and attribute */ + parent = debugfs_create_dir(name, NULL); + if (!parent) { + pr_warn("%s: failed to create debugfs directory\n", name); + return; + } + + *attr = fail_default_attr; + dir = fault_create_debugfs_attr("fault_inject", parent, attr); + if (IS_ERR(dir)) { + pr_warn("%s: failed to create debugfs attr\n", name); + debugfs_remove_recursive(parent); + return; + } + ns->fault_inject.parent = parent; + + /* create debugfs for status code and dont_retry */ + fault_inj->status = NVME_SC_INVALID_OPCODE; + fault_inj->dont_retry = true; + debugfs_create_x16("status", 0600, dir, &fault_inj->status); + debugfs_create_bool("dont_retry", 0600, dir, &fault_inj->dont_retry); +} + +void nvme_fault_inject_fini(struct nvme_ns *ns) +{ + /* remove debugfs directories */ + debugfs_remove_recursive(ns->fault_inject.parent); +} + +void nvme_should_fail(struct request *req) +{ + struct gendisk *disk = req->rq_disk; + struct nvme_ns *ns = NULL; + u16 status; + + /* + * make sure this request is coming from a valid namespace + */ + if (!disk) + return; + + ns = disk->private_data; + if (ns && should_fail(&ns->fault_inject.attr, 1)) { + /* inject status code and DNR bit */ + status = ns->fault_inject.status; + if (ns->fault_inject.dont_retry) + status |= NVME_SC_DNR; + nvme_req(req)->status = status; + } +} +EXPORT_SYMBOL_GPL(nvme_should_fail); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 1dc1387b7134..6cb26bcf6ec0 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -588,6 +588,8 @@ nvme_fc_attach_to_suspended_rport(struct nvme_fc_lport *lport, return ERR_PTR(-ESTALE); } + rport->remoteport.port_role = pinfo->port_role; + rport->remoteport.port_id = pinfo->port_id; rport->remoteport.port_state = FC_OBJSTATE_ONLINE; rport->dev_loss_end = 0; @@ -768,8 +770,7 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl) */ if (nvme_reset_ctrl(&ctrl->ctrl)) { dev_warn(ctrl->ctrl.device, - "NVME-FC{%d}: Couldn't schedule reset. " - "Deleting controller.\n", + "NVME-FC{%d}: Couldn't schedule reset.\n", ctrl->cnum); nvme_delete_ctrl(&ctrl->ctrl); } @@ -836,8 +837,7 @@ nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr) /* if dev_loss_tmo==0, dev loss is immediate */ if (!portptr->dev_loss_tmo) { dev_warn(ctrl->ctrl.device, - "NVME-FC{%d}: controller connectivity lost. " - "Deleting controller.\n", + "NVME-FC{%d}: controller connectivity lost.\n", ctrl->cnum); nvme_delete_ctrl(&ctrl->ctrl); } else @@ -2076,20 +2076,10 @@ nvme_fc_timeout(struct request *rq, bool reserved) { struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); struct nvme_fc_ctrl *ctrl = op->ctrl; - int ret; - - if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE || - atomic_read(&op->state) == FCPOP_STATE_ABORTED) - return BLK_EH_RESET_TIMER; - - ret = __nvme_fc_abort_op(ctrl, op); - if (ret) - /* io wasn't active to abort */ - return BLK_EH_NOT_HANDLED; /* * we can't individually ABTS an io without affecting the queue, - * thus killing the queue, adn thus the association. + * thus killing the queue, and thus the association. * So resolve by performing a controller reset, which will stop * the host/io stack, terminate the association on the link, * and recreate an association on the link. @@ -2191,7 +2181,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; struct nvme_command *sqe = &cmdiu->sqe; u32 csn; - int ret; + int ret, opstate; /* * before attempting to send the io, check to see if we believe @@ -2269,6 +2259,9 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, queue->lldd_handle, &op->fcp_req); if (ret) { + opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE); + __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); + if (!(op->flags & FCOP_FLAGS_AEN)) nvme_fc_unmap_data(ctrl, op->rq, op); @@ -2284,14 +2277,6 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, return BLK_STS_OK; } -static inline blk_status_t nvme_fc_is_ready(struct nvme_fc_queue *queue, - struct request *rq) -{ - if (unlikely(!test_bit(NVME_FC_Q_LIVE, &queue->flags))) - return nvmf_check_init_req(&queue->ctrl->ctrl, rq); - return BLK_STS_OK; -} - static blk_status_t nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) @@ -2307,7 +2292,9 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, u32 data_len; blk_status_t ret; - ret = nvme_fc_is_ready(queue, rq); + ret = nvmf_check_if_ready(&queue->ctrl->ctrl, rq, + test_bit(NVME_FC_Q_LIVE, &queue->flags), + ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE); if (unlikely(ret)) return ret; @@ -2889,14 +2876,13 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) if (portptr->port_state == FC_OBJSTATE_ONLINE) dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: Max reconnect attempts (%d) " - "reached. Removing controller\n", + "reached.\n", ctrl->cnum, ctrl->ctrl.nr_reconnects); else dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: dev_loss_tmo (%d) expired " - "while waiting for remoteport connectivity. " - "Removing controller\n", ctrl->cnum, - portptr->dev_loss_tmo); + "while waiting for remoteport connectivity.\n", + ctrl->cnum, portptr->dev_loss_tmo); WARN_ON(nvme_delete_ctrl(&ctrl->ctrl)); } } @@ -3133,6 +3119,10 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, } if (ret) { + nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING); + cancel_work_sync(&ctrl->ctrl.reset_work); + cancel_delayed_work_sync(&ctrl->connect_work); + /* couldn't schedule retry - fail out */ dev_err(ctrl->ctrl.device, "NVME-FC{%d}: Connect retry failed\n", ctrl->cnum); diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 50ef71ee3d86..41279da799ed 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -35,6 +35,10 @@ enum nvme_nvm_admin_opcode { nvme_nvm_admin_set_bb_tbl = 0xf1, }; +enum nvme_nvm_log_page { + NVME_NVM_LOG_REPORT_CHUNK = 0xca, +}; + struct nvme_nvm_ph_rw { __u8 opcode; __u8 flags; @@ -51,6 +55,21 @@ struct nvme_nvm_ph_rw { __le64 resv; }; +struct nvme_nvm_erase_blk { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd[2]; + __le64 prp1; + __le64 prp2; + __le64 spba; + __le16 length; + __le16 control; + __le32 dsmgmt; + __le64 resv; +}; + struct nvme_nvm_identity { __u8 opcode; __u8 flags; @@ -59,8 +78,7 @@ struct nvme_nvm_identity { __u64 rsvd[2]; __le64 prp1; __le64 prp2; - __le32 chnl_off; - __u32 rsvd11[5]; + __u32 rsvd11[6]; }; struct nvme_nvm_getbbtbl { @@ -90,44 +108,18 @@ struct nvme_nvm_setbbtbl { __u32 rsvd4[3]; }; -struct nvme_nvm_erase_blk { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __u64 rsvd[2]; - __le64 prp1; - __le64 prp2; - __le64 spba; - __le16 length; - __le16 control; - __le32 dsmgmt; - __le64 resv; -}; - struct nvme_nvm_command { union { struct nvme_common_command common; - struct nvme_nvm_identity identity; struct nvme_nvm_ph_rw ph_rw; + struct nvme_nvm_erase_blk erase; + struct nvme_nvm_identity identity; struct nvme_nvm_getbbtbl get_bb; struct nvme_nvm_setbbtbl set_bb; - struct nvme_nvm_erase_blk erase; }; }; -#define NVME_NVM_LP_MLC_PAIRS 886 -struct nvme_nvm_lp_mlc { - __le16 num_pairs; - __u8 pairs[NVME_NVM_LP_MLC_PAIRS]; -}; - -struct nvme_nvm_lp_tbl { - __u8 id[8]; - struct nvme_nvm_lp_mlc mlc; -}; - -struct nvme_nvm_id_group { +struct nvme_nvm_id12_grp { __u8 mtype; __u8 fmtype; __le16 res16; @@ -150,11 +142,10 @@ struct nvme_nvm_id_group { __le32 mpos; __le32 mccap; __le16 cpar; - __u8 reserved[10]; - struct nvme_nvm_lp_tbl lptbl; + __u8 reserved[906]; } __packed; -struct nvme_nvm_addr_format { +struct nvme_nvm_id12_addrf { __u8 ch_offset; __u8 ch_len; __u8 lun_offset; @@ -165,21 +156,22 @@ struct nvme_nvm_addr_format { __u8 blk_len; __u8 pg_offset; __u8 pg_len; - __u8 sect_offset; - __u8 sect_len; + __u8 sec_offset; + __u8 sec_len; __u8 res[4]; } __packed; -struct nvme_nvm_id { +struct nvme_nvm_id12 { __u8 ver_id; __u8 vmnt; __u8 cgrps; __u8 res; __le32 cap; __le32 dom; - struct nvme_nvm_addr_format ppaf; + struct nvme_nvm_id12_addrf ppaf; __u8 resv[228]; - struct nvme_nvm_id_group groups[4]; + struct nvme_nvm_id12_grp grp; + __u8 resv2[2880]; } __packed; struct nvme_nvm_bb_tbl { @@ -196,6 +188,68 @@ struct nvme_nvm_bb_tbl { __u8 blk[0]; }; +struct nvme_nvm_id20_addrf { + __u8 grp_len; + __u8 pu_len; + __u8 chk_len; + __u8 lba_len; + __u8 resv[4]; +}; + +struct nvme_nvm_id20 { + __u8 mjr; + __u8 mnr; + __u8 resv[6]; + + struct nvme_nvm_id20_addrf lbaf; + + __le32 mccap; + __u8 resv2[12]; + + __u8 wit; + __u8 resv3[31]; + + /* Geometry */ + __le16 num_grp; + __le16 num_pu; + __le32 num_chk; + __le32 clba; + __u8 resv4[52]; + + /* Write data requirements */ + __le32 ws_min; + __le32 ws_opt; + __le32 mw_cunits; + __le32 maxoc; + __le32 maxocpu; + __u8 resv5[44]; + + /* Performance related metrics */ + __le32 trdt; + __le32 trdm; + __le32 twrt; + __le32 twrm; + __le32 tcrst; + __le32 tcrsm; + __u8 resv6[40]; + + /* Reserved area */ + __u8 resv7[2816]; + + /* Vendor specific */ + __u8 vs[1024]; +}; + +struct nvme_nvm_chk_meta { + __u8 state; + __u8 type; + __u8 wi; + __u8 rsvd[5]; + __le64 slba; + __le64 cnlb; + __le64 wp; +}; + /* * Check we didn't inadvertently grow the command struct */ @@ -203,105 +257,238 @@ static inline void _nvme_nvm_check_size(void) { BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64); + BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960); - BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16); - BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != NVME_IDENTIFY_DATA_SIZE); + BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_grp) != 960); + BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_addrf) != 16); + BUILD_BUG_ON(sizeof(struct nvme_nvm_id12) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64); + BUILD_BUG_ON(sizeof(struct nvme_nvm_id20_addrf) != 8); + BUILD_BUG_ON(sizeof(struct nvme_nvm_id20) != NVME_IDENTIFY_DATA_SIZE); + BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta) != 32); + BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta) != + sizeof(struct nvm_chk_meta)); +} + +static void nvme_nvm_set_addr_12(struct nvm_addrf_12 *dst, + struct nvme_nvm_id12_addrf *src) +{ + dst->ch_len = src->ch_len; + dst->lun_len = src->lun_len; + dst->blk_len = src->blk_len; + dst->pg_len = src->pg_len; + dst->pln_len = src->pln_len; + dst->sec_len = src->sec_len; + + dst->ch_offset = src->ch_offset; + dst->lun_offset = src->lun_offset; + dst->blk_offset = src->blk_offset; + dst->pg_offset = src->pg_offset; + dst->pln_offset = src->pln_offset; + dst->sec_offset = src->sec_offset; + + dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset; + dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset; + dst->blk_mask = ((1ULL << dst->blk_len) - 1) << dst->blk_offset; + dst->pg_mask = ((1ULL << dst->pg_len) - 1) << dst->pg_offset; + dst->pln_mask = ((1ULL << dst->pln_len) - 1) << dst->pln_offset; + dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset; } -static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id) +static int nvme_nvm_setup_12(struct nvme_nvm_id12 *id, + struct nvm_geo *geo) { - struct nvme_nvm_id_group *src; - struct nvm_id_group *grp; + struct nvme_nvm_id12_grp *src; int sec_per_pg, sec_per_pl, pg_per_blk; - if (nvme_nvm_id->cgrps != 1) + if (id->cgrps != 1) + return -EINVAL; + + src = &id->grp; + + if (src->mtype != 0) { + pr_err("nvm: memory type not supported\n"); return -EINVAL; + } + + /* 1.2 spec. only reports a single version id - unfold */ + geo->major_ver_id = id->ver_id; + geo->minor_ver_id = 2; - src = &nvme_nvm_id->groups[0]; - grp = &nvm_id->grp; + /* Set compacted version for upper layers */ + geo->version = NVM_OCSSD_SPEC_12; - grp->mtype = src->mtype; - grp->fmtype = src->fmtype; + geo->num_ch = src->num_ch; + geo->num_lun = src->num_lun; + geo->all_luns = geo->num_ch * geo->num_lun; - grp->num_ch = src->num_ch; - grp->num_lun = src->num_lun; + geo->num_chk = le16_to_cpu(src->num_chk); - grp->num_chk = le16_to_cpu(src->num_chk); - grp->csecs = le16_to_cpu(src->csecs); - grp->sos = le16_to_cpu(src->sos); + geo->csecs = le16_to_cpu(src->csecs); + geo->sos = le16_to_cpu(src->sos); pg_per_blk = le16_to_cpu(src->num_pg); - sec_per_pg = le16_to_cpu(src->fpg_sz) / grp->csecs; + sec_per_pg = le16_to_cpu(src->fpg_sz) / geo->csecs; sec_per_pl = sec_per_pg * src->num_pln; - grp->clba = sec_per_pl * pg_per_blk; - grp->ws_per_chk = pg_per_blk; - - grp->mpos = le32_to_cpu(src->mpos); - grp->cpar = le16_to_cpu(src->cpar); - grp->mccap = le32_to_cpu(src->mccap); - - grp->ws_opt = grp->ws_min = sec_per_pg; - grp->ws_seq = NVM_IO_SNGL_ACCESS; - - if (grp->mpos & 0x020202) { - grp->ws_seq = NVM_IO_DUAL_ACCESS; - grp->ws_opt <<= 1; - } else if (grp->mpos & 0x040404) { - grp->ws_seq = NVM_IO_QUAD_ACCESS; - grp->ws_opt <<= 2; - } + geo->clba = sec_per_pl * pg_per_blk; + + geo->all_chunks = geo->all_luns * geo->num_chk; + geo->total_secs = geo->clba * geo->all_chunks; + + geo->ws_min = sec_per_pg; + geo->ws_opt = sec_per_pg; + geo->mw_cunits = geo->ws_opt << 3; /* default to MLC safe values */ - grp->trdt = le32_to_cpu(src->trdt); - grp->trdm = le32_to_cpu(src->trdm); - grp->tprt = le32_to_cpu(src->tprt); - grp->tprm = le32_to_cpu(src->tprm); - grp->tbet = le32_to_cpu(src->tbet); - grp->tbem = le32_to_cpu(src->tbem); + /* Do not impose values for maximum number of open blocks as it is + * unspecified in 1.2. Users of 1.2 must be aware of this and eventually + * specify these values through a quirk if restrictions apply. + */ + geo->maxoc = geo->all_luns * geo->num_chk; + geo->maxocpu = geo->num_chk; + + geo->mccap = le32_to_cpu(src->mccap); + + geo->trdt = le32_to_cpu(src->trdt); + geo->trdm = le32_to_cpu(src->trdm); + geo->tprt = le32_to_cpu(src->tprt); + geo->tprm = le32_to_cpu(src->tprm); + geo->tbet = le32_to_cpu(src->tbet); + geo->tbem = le32_to_cpu(src->tbem); /* 1.2 compatibility */ - grp->num_pln = src->num_pln; - grp->num_pg = le16_to_cpu(src->num_pg); - grp->fpg_sz = le16_to_cpu(src->fpg_sz); + geo->vmnt = id->vmnt; + geo->cap = le32_to_cpu(id->cap); + geo->dom = le32_to_cpu(id->dom); + + geo->mtype = src->mtype; + geo->fmtype = src->fmtype; + + geo->cpar = le16_to_cpu(src->cpar); + geo->mpos = le32_to_cpu(src->mpos); + + geo->pln_mode = NVM_PLANE_SINGLE; + + if (geo->mpos & 0x020202) { + geo->pln_mode = NVM_PLANE_DOUBLE; + geo->ws_opt <<= 1; + } else if (geo->mpos & 0x040404) { + geo->pln_mode = NVM_PLANE_QUAD; + geo->ws_opt <<= 2; + } + + geo->num_pln = src->num_pln; + geo->num_pg = le16_to_cpu(src->num_pg); + geo->fpg_sz = le16_to_cpu(src->fpg_sz); + + nvme_nvm_set_addr_12((struct nvm_addrf_12 *)&geo->addrf, &id->ppaf); return 0; } -static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id) +static void nvme_nvm_set_addr_20(struct nvm_addrf *dst, + struct nvme_nvm_id20_addrf *src) +{ + dst->ch_len = src->grp_len; + dst->lun_len = src->pu_len; + dst->chk_len = src->chk_len; + dst->sec_len = src->lba_len; + + dst->sec_offset = 0; + dst->chk_offset = dst->sec_len; + dst->lun_offset = dst->chk_offset + dst->chk_len; + dst->ch_offset = dst->lun_offset + dst->lun_len; + + dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset; + dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset; + dst->chk_mask = ((1ULL << dst->chk_len) - 1) << dst->chk_offset; + dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset; +} + +static int nvme_nvm_setup_20(struct nvme_nvm_id20 *id, + struct nvm_geo *geo) +{ + geo->major_ver_id = id->mjr; + geo->minor_ver_id = id->mnr; + + /* Set compacted version for upper layers */ + geo->version = NVM_OCSSD_SPEC_20; + + if (!(geo->major_ver_id == 2 && geo->minor_ver_id == 0)) { + pr_err("nvm: OCSSD version not supported (v%d.%d)\n", + geo->major_ver_id, geo->minor_ver_id); + return -EINVAL; + } + + geo->num_ch = le16_to_cpu(id->num_grp); + geo->num_lun = le16_to_cpu(id->num_pu); + geo->all_luns = geo->num_ch * geo->num_lun; + + geo->num_chk = le32_to_cpu(id->num_chk); + geo->clba = le32_to_cpu(id->clba); + + geo->all_chunks = geo->all_luns * geo->num_chk; + geo->total_secs = geo->clba * geo->all_chunks; + + geo->ws_min = le32_to_cpu(id->ws_min); + geo->ws_opt = le32_to_cpu(id->ws_opt); + geo->mw_cunits = le32_to_cpu(id->mw_cunits); + geo->maxoc = le32_to_cpu(id->maxoc); + geo->maxocpu = le32_to_cpu(id->maxocpu); + + geo->trdt = le32_to_cpu(id->trdt); + geo->trdm = le32_to_cpu(id->trdm); + geo->tprt = le32_to_cpu(id->twrt); + geo->tprm = le32_to_cpu(id->twrm); + geo->tbet = le32_to_cpu(id->tcrst); + geo->tbem = le32_to_cpu(id->tcrsm); + + nvme_nvm_set_addr_20(&geo->addrf, &id->lbaf); + + return 0; +} + +static int nvme_nvm_identity(struct nvm_dev *nvmdev) { struct nvme_ns *ns = nvmdev->q->queuedata; - struct nvme_nvm_id *nvme_nvm_id; + struct nvme_nvm_id12 *id; struct nvme_nvm_command c = {}; int ret; c.identity.opcode = nvme_nvm_admin_identity; c.identity.nsid = cpu_to_le32(ns->head->ns_id); - c.identity.chnl_off = 0; - nvme_nvm_id = kmalloc(sizeof(struct nvme_nvm_id), GFP_KERNEL); - if (!nvme_nvm_id) + id = kmalloc(sizeof(struct nvme_nvm_id12), GFP_KERNEL); + if (!id) return -ENOMEM; ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c, - nvme_nvm_id, sizeof(struct nvme_nvm_id)); + id, sizeof(struct nvme_nvm_id12)); if (ret) { ret = -EIO; goto out; } - nvm_id->ver_id = nvme_nvm_id->ver_id; - nvm_id->vmnt = nvme_nvm_id->vmnt; - nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap); - nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom); - memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf, - sizeof(struct nvm_addr_format)); + /* + * The 1.2 and 2.0 specifications share the first byte in their geometry + * command to make it possible to know what version a device implements. + */ + switch (id->ver_id) { + case 1: + ret = nvme_nvm_setup_12(id, &nvmdev->geo); + break; + case 2: + ret = nvme_nvm_setup_20((struct nvme_nvm_id20 *)id, + &nvmdev->geo); + break; + default: + dev_err(ns->ctrl->device, "OCSSD revision not supported (%d)\n", + id->ver_id); + ret = -EINVAL; + } - ret = init_grps(nvm_id, nvme_nvm_id); out: - kfree(nvme_nvm_id); + kfree(id); return ret; } @@ -314,7 +501,7 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa, struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_nvm_command c = {}; struct nvme_nvm_bb_tbl *bb_tbl; - int nr_blks = geo->nr_chks * geo->plane_mode; + int nr_blks = geo->num_chk * geo->num_pln; int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blks; int ret = 0; @@ -355,7 +542,7 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa, goto out; } - memcpy(blks, bb_tbl->blk, geo->nr_chks * geo->plane_mode); + memcpy(blks, bb_tbl->blk, geo->num_chk * geo->num_pln); out: kfree(bb_tbl); return ret; @@ -382,6 +569,61 @@ static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr *ppas, return ret; } +/* + * Expect the lba in device format + */ +static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev, + struct nvm_chk_meta *meta, + sector_t slba, int nchks) +{ + struct nvm_geo *geo = &ndev->geo; + struct nvme_ns *ns = ndev->q->queuedata; + struct nvme_ctrl *ctrl = ns->ctrl; + struct nvme_nvm_chk_meta *dev_meta = (struct nvme_nvm_chk_meta *)meta; + struct ppa_addr ppa; + size_t left = nchks * sizeof(struct nvme_nvm_chk_meta); + size_t log_pos, offset, len; + int ret, i; + + /* Normalize lba address space to obtain log offset */ + ppa.ppa = slba; + ppa = dev_to_generic_addr(ndev, ppa); + + log_pos = ppa.m.chk; + log_pos += ppa.m.pu * geo->num_chk; + log_pos += ppa.m.grp * geo->num_lun * geo->num_chk; + + offset = log_pos * sizeof(struct nvme_nvm_chk_meta); + + while (left) { + len = min_t(unsigned int, left, ctrl->max_hw_sectors << 9); + + ret = nvme_get_log_ext(ctrl, ns, NVME_NVM_LOG_REPORT_CHUNK, + dev_meta, len, offset); + if (ret) { + dev_err(ctrl->device, "Get REPORT CHUNK log error\n"); + break; + } + + for (i = 0; i < len; i += sizeof(struct nvme_nvm_chk_meta)) { + meta->state = dev_meta->state; + meta->type = dev_meta->type; + meta->wi = dev_meta->wi; + meta->slba = le64_to_cpu(dev_meta->slba); + meta->cnlb = le64_to_cpu(dev_meta->cnlb); + meta->wp = le64_to_cpu(dev_meta->wp); + + meta++; + dev_meta++; + } + + offset += len; + left -= len; + } + + return ret; +} + static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns, struct nvme_nvm_command *c) { @@ -513,6 +755,8 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = { .get_bb_tbl = nvme_nvm_get_bb_tbl, .set_bb_tbl = nvme_nvm_set_bb_tbl, + .get_chk_meta = nvme_nvm_get_chk_meta, + .submit_io = nvme_nvm_submit_io, .submit_io_sync = nvme_nvm_submit_io_sync, @@ -520,8 +764,6 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = { .destroy_dma_pool = nvme_nvm_destroy_dma_pool, .dev_dma_alloc = nvme_nvm_dev_dma_alloc, .dev_dma_free = nvme_nvm_dev_dma_free, - - .max_phys_sect = 64, }; static int nvme_nvm_submit_user_cmd(struct request_queue *q, @@ -722,6 +964,15 @@ int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg) } } +void nvme_nvm_update_nvm_info(struct nvme_ns *ns) +{ + struct nvm_dev *ndev = ns->ndev; + struct nvm_geo *geo = &ndev->geo; + + geo->csecs = 1 << ns->lba_shift; + geo->sos = ns->ms; +} + int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) { struct request_queue *q = ns->queue; @@ -748,125 +999,205 @@ void nvme_nvm_unregister(struct nvme_ns *ns) } static ssize_t nvm_dev_attr_show(struct device *dev, - struct device_attribute *dattr, char *page) + struct device_attribute *dattr, char *page) { struct nvme_ns *ns = nvme_get_ns_from_dev(dev); struct nvm_dev *ndev = ns->ndev; - struct nvm_id *id; - struct nvm_id_group *grp; + struct nvm_geo *geo = &ndev->geo; struct attribute *attr; if (!ndev) return 0; - id = &ndev->identity; - grp = &id->grp; attr = &dattr->attr; if (strcmp(attr->name, "version") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", id->ver_id); - } else if (strcmp(attr->name, "vendor_opcode") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", id->vmnt); + if (geo->major_ver_id == 1) + return scnprintf(page, PAGE_SIZE, "%u\n", + geo->major_ver_id); + else + return scnprintf(page, PAGE_SIZE, "%u.%u\n", + geo->major_ver_id, + geo->minor_ver_id); } else if (strcmp(attr->name, "capabilities") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", id->cap); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->cap); + } else if (strcmp(attr->name, "read_typ") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->trdt); + } else if (strcmp(attr->name, "read_max") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->trdm); + } else { + return scnprintf(page, + PAGE_SIZE, + "Unhandled attr(%s) in `%s`\n", + attr->name, __func__); + } +} + +static ssize_t nvm_dev_attr_show_ppaf(struct nvm_addrf_12 *ppaf, char *page) +{ + return scnprintf(page, PAGE_SIZE, + "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", + ppaf->ch_offset, ppaf->ch_len, + ppaf->lun_offset, ppaf->lun_len, + ppaf->pln_offset, ppaf->pln_len, + ppaf->blk_offset, ppaf->blk_len, + ppaf->pg_offset, ppaf->pg_len, + ppaf->sec_offset, ppaf->sec_len); +} + +static ssize_t nvm_dev_attr_show_12(struct device *dev, + struct device_attribute *dattr, char *page) +{ + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); + struct nvm_dev *ndev = ns->ndev; + struct nvm_geo *geo = &ndev->geo; + struct attribute *attr; + + if (!ndev) + return 0; + + attr = &dattr->attr; + + if (strcmp(attr->name, "vendor_opcode") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->vmnt); } else if (strcmp(attr->name, "device_mode") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", id->dom); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->dom); /* kept for compatibility */ } else if (strcmp(attr->name, "media_manager") == 0) { return scnprintf(page, PAGE_SIZE, "%s\n", "gennvm"); } else if (strcmp(attr->name, "ppa_format") == 0) { - return scnprintf(page, PAGE_SIZE, - "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", - id->ppaf.ch_offset, id->ppaf.ch_len, - id->ppaf.lun_offset, id->ppaf.lun_len, - id->ppaf.pln_offset, id->ppaf.pln_len, - id->ppaf.blk_offset, id->ppaf.blk_len, - id->ppaf.pg_offset, id->ppaf.pg_len, - id->ppaf.sect_offset, id->ppaf.sect_len); + return nvm_dev_attr_show_ppaf((void *)&geo->addrf, page); } else if (strcmp(attr->name, "media_type") == 0) { /* u8 */ - return scnprintf(page, PAGE_SIZE, "%u\n", grp->mtype); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->mtype); } else if (strcmp(attr->name, "flash_media_type") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->fmtype); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->fmtype); } else if (strcmp(attr->name, "num_channels") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_ch); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_ch); } else if (strcmp(attr->name, "num_luns") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_lun); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_lun); } else if (strcmp(attr->name, "num_planes") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pln); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_pln); } else if (strcmp(attr->name, "num_blocks") == 0) { /* u16 */ - return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_chk); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_chk); } else if (strcmp(attr->name, "num_pages") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pg); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_pg); } else if (strcmp(attr->name, "page_size") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->fpg_sz); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->fpg_sz); } else if (strcmp(attr->name, "hw_sector_size") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->csecs); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->csecs); } else if (strcmp(attr->name, "oob_sector_size") == 0) {/* u32 */ - return scnprintf(page, PAGE_SIZE, "%u\n", grp->sos); - } else if (strcmp(attr->name, "read_typ") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->trdt); - } else if (strcmp(attr->name, "read_max") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->trdm); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->sos); } else if (strcmp(attr->name, "prog_typ") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->tprt); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprt); } else if (strcmp(attr->name, "prog_max") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->tprm); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprm); } else if (strcmp(attr->name, "erase_typ") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->tbet); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbet); } else if (strcmp(attr->name, "erase_max") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->tbem); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbem); } else if (strcmp(attr->name, "multiplane_modes") == 0) { - return scnprintf(page, PAGE_SIZE, "0x%08x\n", grp->mpos); + return scnprintf(page, PAGE_SIZE, "0x%08x\n", geo->mpos); } else if (strcmp(attr->name, "media_capabilities") == 0) { - return scnprintf(page, PAGE_SIZE, "0x%08x\n", grp->mccap); + return scnprintf(page, PAGE_SIZE, "0x%08x\n", geo->mccap); } else if (strcmp(attr->name, "max_phys_secs") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", - ndev->ops->max_phys_sect); + return scnprintf(page, PAGE_SIZE, "%u\n", NVM_MAX_VLBA); } else { - return scnprintf(page, - PAGE_SIZE, - "Unhandled attr(%s) in `nvm_dev_attr_show`\n", - attr->name); + return scnprintf(page, PAGE_SIZE, + "Unhandled attr(%s) in `%s`\n", + attr->name, __func__); + } +} + +static ssize_t nvm_dev_attr_show_20(struct device *dev, + struct device_attribute *dattr, char *page) +{ + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); + struct nvm_dev *ndev = ns->ndev; + struct nvm_geo *geo = &ndev->geo; + struct attribute *attr; + + if (!ndev) + return 0; + + attr = &dattr->attr; + + if (strcmp(attr->name, "groups") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_ch); + } else if (strcmp(attr->name, "punits") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_lun); + } else if (strcmp(attr->name, "chunks") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_chk); + } else if (strcmp(attr->name, "clba") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->clba); + } else if (strcmp(attr->name, "ws_min") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->ws_min); + } else if (strcmp(attr->name, "ws_opt") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->ws_opt); + } else if (strcmp(attr->name, "maxoc") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->maxoc); + } else if (strcmp(attr->name, "maxocpu") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->maxocpu); + } else if (strcmp(attr->name, "mw_cunits") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->mw_cunits); + } else if (strcmp(attr->name, "write_typ") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprt); + } else if (strcmp(attr->name, "write_max") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprm); + } else if (strcmp(attr->name, "reset_typ") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbet); + } else if (strcmp(attr->name, "reset_max") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbem); + } else { + return scnprintf(page, PAGE_SIZE, + "Unhandled attr(%s) in `%s`\n", + attr->name, __func__); } } -#define NVM_DEV_ATTR_RO(_name) \ +#define NVM_DEV_ATTR_RO(_name) \ DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show, NULL) +#define NVM_DEV_ATTR_12_RO(_name) \ + DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_12, NULL) +#define NVM_DEV_ATTR_20_RO(_name) \ + DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_20, NULL) +/* general attributes */ static NVM_DEV_ATTR_RO(version); -static NVM_DEV_ATTR_RO(vendor_opcode); static NVM_DEV_ATTR_RO(capabilities); -static NVM_DEV_ATTR_RO(device_mode); -static NVM_DEV_ATTR_RO(ppa_format); -static NVM_DEV_ATTR_RO(media_manager); - -static NVM_DEV_ATTR_RO(media_type); -static NVM_DEV_ATTR_RO(flash_media_type); -static NVM_DEV_ATTR_RO(num_channels); -static NVM_DEV_ATTR_RO(num_luns); -static NVM_DEV_ATTR_RO(num_planes); -static NVM_DEV_ATTR_RO(num_blocks); -static NVM_DEV_ATTR_RO(num_pages); -static NVM_DEV_ATTR_RO(page_size); -static NVM_DEV_ATTR_RO(hw_sector_size); -static NVM_DEV_ATTR_RO(oob_sector_size); + static NVM_DEV_ATTR_RO(read_typ); static NVM_DEV_ATTR_RO(read_max); -static NVM_DEV_ATTR_RO(prog_typ); -static NVM_DEV_ATTR_RO(prog_max); -static NVM_DEV_ATTR_RO(erase_typ); -static NVM_DEV_ATTR_RO(erase_max); -static NVM_DEV_ATTR_RO(multiplane_modes); -static NVM_DEV_ATTR_RO(media_capabilities); -static NVM_DEV_ATTR_RO(max_phys_secs); - -static struct attribute *nvm_dev_attrs[] = { + +/* 1.2 values */ +static NVM_DEV_ATTR_12_RO(vendor_opcode); +static NVM_DEV_ATTR_12_RO(device_mode); +static NVM_DEV_ATTR_12_RO(ppa_format); +static NVM_DEV_ATTR_12_RO(media_manager); +static NVM_DEV_ATTR_12_RO(media_type); +static NVM_DEV_ATTR_12_RO(flash_media_type); +static NVM_DEV_ATTR_12_RO(num_channels); +static NVM_DEV_ATTR_12_RO(num_luns); +static NVM_DEV_ATTR_12_RO(num_planes); +static NVM_DEV_ATTR_12_RO(num_blocks); +static NVM_DEV_ATTR_12_RO(num_pages); +static NVM_DEV_ATTR_12_RO(page_size); +static NVM_DEV_ATTR_12_RO(hw_sector_size); +static NVM_DEV_ATTR_12_RO(oob_sector_size); +static NVM_DEV_ATTR_12_RO(prog_typ); +static NVM_DEV_ATTR_12_RO(prog_max); +static NVM_DEV_ATTR_12_RO(erase_typ); +static NVM_DEV_ATTR_12_RO(erase_max); +static NVM_DEV_ATTR_12_RO(multiplane_modes); +static NVM_DEV_ATTR_12_RO(media_capabilities); +static NVM_DEV_ATTR_12_RO(max_phys_secs); + +static struct attribute *nvm_dev_attrs_12[] = { &dev_attr_version.attr, - &dev_attr_vendor_opcode.attr, &dev_attr_capabilities.attr, + + &dev_attr_vendor_opcode.attr, &dev_attr_device_mode.attr, &dev_attr_media_manager.attr, - &dev_attr_ppa_format.attr, &dev_attr_media_type.attr, &dev_attr_flash_media_type.attr, @@ -887,22 +1218,92 @@ static struct attribute *nvm_dev_attrs[] = { &dev_attr_multiplane_modes.attr, &dev_attr_media_capabilities.attr, &dev_attr_max_phys_secs.attr, + NULL, }; -static const struct attribute_group nvm_dev_attr_group = { +static const struct attribute_group nvm_dev_attr_group_12 = { .name = "lightnvm", - .attrs = nvm_dev_attrs, + .attrs = nvm_dev_attrs_12, +}; + +/* 2.0 values */ +static NVM_DEV_ATTR_20_RO(groups); +static NVM_DEV_ATTR_20_RO(punits); +static NVM_DEV_ATTR_20_RO(chunks); +static NVM_DEV_ATTR_20_RO(clba); +static NVM_DEV_ATTR_20_RO(ws_min); +static NVM_DEV_ATTR_20_RO(ws_opt); +static NVM_DEV_ATTR_20_RO(maxoc); +static NVM_DEV_ATTR_20_RO(maxocpu); +static NVM_DEV_ATTR_20_RO(mw_cunits); +static NVM_DEV_ATTR_20_RO(write_typ); +static NVM_DEV_ATTR_20_RO(write_max); +static NVM_DEV_ATTR_20_RO(reset_typ); +static NVM_DEV_ATTR_20_RO(reset_max); + +static struct attribute *nvm_dev_attrs_20[] = { + &dev_attr_version.attr, + &dev_attr_capabilities.attr, + + &dev_attr_groups.attr, + &dev_attr_punits.attr, + &dev_attr_chunks.attr, + &dev_attr_clba.attr, + &dev_attr_ws_min.attr, + &dev_attr_ws_opt.attr, + &dev_attr_maxoc.attr, + &dev_attr_maxocpu.attr, + &dev_attr_mw_cunits.attr, + + &dev_attr_read_typ.attr, + &dev_attr_read_max.attr, + &dev_attr_write_typ.attr, + &dev_attr_write_max.attr, + &dev_attr_reset_typ.attr, + &dev_attr_reset_max.attr, + + NULL, +}; + +static const struct attribute_group nvm_dev_attr_group_20 = { + .name = "lightnvm", + .attrs = nvm_dev_attrs_20, }; int nvme_nvm_register_sysfs(struct nvme_ns *ns) { - return sysfs_create_group(&disk_to_dev(ns->disk)->kobj, - &nvm_dev_attr_group); + struct nvm_dev *ndev = ns->ndev; + struct nvm_geo *geo = &ndev->geo; + + if (!ndev) + return -EINVAL; + + switch (geo->major_ver_id) { + case 1: + return sysfs_create_group(&disk_to_dev(ns->disk)->kobj, + &nvm_dev_attr_group_12); + case 2: + return sysfs_create_group(&disk_to_dev(ns->disk)->kobj, + &nvm_dev_attr_group_20); + } + + return -EINVAL; } void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) { - sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, - &nvm_dev_attr_group); + struct nvm_dev *ndev = ns->ndev; + struct nvm_geo *geo = &ndev->geo; + + switch (geo->major_ver_id) { + case 1: + sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, + &nvm_dev_attr_group_12); + break; + case 2: + sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, + &nvm_dev_attr_group_20); + break; + } } diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 060f69e03427..956e0b8e9c4d 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -44,12 +44,12 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) { if (ns->head->disk) kblockd_schedule_work(&ns->head->requeue_work); } - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); } static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head) @@ -162,13 +162,13 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath) return 0; - q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE); + q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL); if (!q) goto out; q->queuedata = head; blk_queue_make_request(q, nvme_ns_head_make_request); q->poll_fn = nvme_ns_head_poll; - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); + blk_queue_flag_set(QUEUE_FLAG_NONROT, q); /* set to a default value for 512 until disk is validated */ blk_queue_logical_block_size(q, 512); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index d733b14ede9d..061fecfd44f5 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -21,6 +21,7 @@ #include <linux/blk-mq.h> #include <linux/lightnvm.h> #include <linux/sed-opal.h> +#include <linux/fault-inject.h> extern unsigned int nvme_io_timeout; #define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) @@ -104,6 +105,7 @@ struct nvme_request { enum { NVME_REQ_CANCELLED = (1 << 0), + NVME_REQ_USERCMD = (1 << 1), }; static inline struct nvme_request *nvme_req(struct request *req) @@ -140,7 +142,7 @@ struct nvme_ctrl { struct blk_mq_tag_set *tagset; struct blk_mq_tag_set *admin_tagset; struct list_head namespaces; - struct mutex namespaces_mutex; + struct rw_semaphore namespaces_rwsem; struct device ctrl_device; struct device *device; /* char device */ struct cdev cdev; @@ -261,6 +263,15 @@ struct nvme_ns_head { int instance; }; +#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS +struct nvme_fault_inject { + struct fault_attr attr; + struct dentry *parent; + bool dont_retry; /* DNR, do not retry */ + u16 status; /* status code */ +}; +#endif + struct nvme_ns { struct list_head list; @@ -282,6 +293,11 @@ struct nvme_ns { #define NVME_NS_REMOVING 0 #define NVME_NS_DEAD 1 u16 noiob; + +#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS + struct nvme_fault_inject fault_inject; +#endif + }; struct nvme_ctrl_ops { @@ -298,8 +314,19 @@ struct nvme_ctrl_ops { void (*delete_ctrl)(struct nvme_ctrl *ctrl); int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); int (*reinit_request)(void *data, struct request *rq); + void (*stop_ctrl)(struct nvme_ctrl *ctrl); }; +#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS +void nvme_fault_inject_init(struct nvme_ns *ns); +void nvme_fault_inject_fini(struct nvme_ns *ns); +void nvme_should_fail(struct request *req); +#else +static inline void nvme_fault_inject_init(struct nvme_ns *ns) {} +static inline void nvme_fault_inject_fini(struct nvme_ns *ns) {} +static inline void nvme_should_fail(struct request *req) {} +#endif + static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl) { u32 val = 0; @@ -336,6 +363,8 @@ static inline void nvme_end_request(struct request *req, __le16 status, rq->status = le16_to_cpu(status) >> 1; rq->result = result; + /* inject error when permitted by fault injection framework */ + nvme_should_fail(req); blk_mq_complete_request(req); } @@ -394,13 +423,15 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, unsigned timeout, int qid, int at_head, blk_mq_req_flags_t flags); int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); -void nvme_start_keep_alive(struct nvme_ctrl *ctrl); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); int nvme_reset_ctrl(struct nvme_ctrl *ctrl); int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl); int nvme_delete_ctrl(struct nvme_ctrl *ctrl); int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl); +int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns, + u8 log_page, void *log, size_t size, u64 offset); + extern const struct attribute_group nvme_ns_id_attr_group; extern const struct block_device_operations nvme_ns_head_ops; @@ -461,12 +492,14 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) #endif /* CONFIG_NVME_MULTIPATH */ #ifdef CONFIG_NVM +void nvme_nvm_update_nvm_info(struct nvme_ns *ns); int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); void nvme_nvm_unregister(struct nvme_ns *ns); int nvme_nvm_register_sysfs(struct nvme_ns *ns); void nvme_nvm_unregister_sysfs(struct nvme_ns *ns); int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg); #else +static inline void nvme_nvm_update_nvm_info(struct nvme_ns *ns) {}; static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) { diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b6f43b738f03..fbc71fac6f1e 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -84,6 +84,7 @@ struct nvme_dev { struct dma_pool *prp_small_pool; unsigned online_queues; unsigned max_qid; + unsigned int num_vecs; int q_depth; u32 db_stride; void __iomem *bar; @@ -414,7 +415,8 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) { struct nvme_dev *dev = set->driver_data; - return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev)); + return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev), + dev->num_vecs > 1 ? 1 /* admin queue */ : 0); } /** @@ -1380,8 +1382,7 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, return 0; } -static int nvme_alloc_queue(struct nvme_dev *dev, int qid, - int depth, int node) +static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) { struct nvme_queue *nvmeq = &dev->queues[qid]; @@ -1457,7 +1458,11 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) nvmeq->sq_cmds_io = dev->cmb + offset; } - nvmeq->cq_vector = qid - 1; + /* + * A queue's vector matches the queue identifier unless the controller + * has only one vector available. + */ + nvmeq->cq_vector = dev->num_vecs == 1 ? 0 : qid; result = adapter_alloc_cq(dev, qid, nvmeq); if (result < 0) goto release_vector; @@ -1596,8 +1601,7 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev) if (result < 0) return result; - result = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH, - dev_to_node(dev->dev)); + result = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH); if (result) return result; @@ -1630,9 +1634,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev) int ret = 0; for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) { - /* vector == qid - 1, match nvme_create_queue */ - if (nvme_alloc_queue(dev, i, dev->q_depth, - pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) { + if (nvme_alloc_queue(dev, i, dev->q_depth)) { ret = -ENOMEM; break; } @@ -1914,6 +1916,10 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) int result, nr_io_queues; unsigned long size; + struct irq_affinity affd = { + .pre_vectors = 1 + }; + nr_io_queues = num_possible_cpus(); result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); if (result < 0) @@ -1949,11 +1955,12 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) * setting up the full range we need. */ pci_free_irq_vectors(pdev); - nr_io_queues = pci_alloc_irq_vectors(pdev, 1, nr_io_queues, - PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY); - if (nr_io_queues <= 0) + result = pci_alloc_irq_vectors_affinity(pdev, 1, nr_io_queues + 1, + PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd); + if (result <= 0) return -EIO; - dev->max_qid = nr_io_queues; + dev->num_vecs = result; + dev->max_qid = max(result - 1, 1); /* * Should investigate if there's a performance win from allocating @@ -2197,7 +2204,11 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) if (!dead) { if (shutdown) nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT); + } + nvme_stop_queues(&dev->ctrl); + + if (!dead && dev->ctrl.queue_count > 0) { /* * If the controller is still alive tell it to stop using the * host memory buffer. In theory the shutdown / reset should @@ -2206,11 +2217,6 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) */ if (dev->host_mem_descs) nvme_set_host_mem(dev, 0); - - } - nvme_stop_queues(&dev->ctrl); - - if (!dead) { nvme_disable_io_queues(dev); nvme_disable_admin_queue(dev, shutdown); } @@ -2416,6 +2422,13 @@ static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) return 0; } +static int nvme_pci_get_address(struct nvme_ctrl *ctrl, char *buf, int size) +{ + struct pci_dev *pdev = to_pci_dev(to_nvme_dev(ctrl)->dev); + + return snprintf(buf, size, "%s", dev_name(&pdev->dev)); +} + static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { .name = "pcie", .module = THIS_MODULE, @@ -2425,6 +2438,7 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { .reg_read64 = nvme_pci_reg_read64, .free_ctrl = nvme_pci_free_ctrl, .submit_async_event = nvme_pci_submit_async_event, + .get_address = nvme_pci_get_address, }; static int nvme_dev_map(struct nvme_dev *dev) @@ -2461,10 +2475,13 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) } else if (pdev->vendor == 0x144d && pdev->device == 0xa804) { /* * Samsung SSD 960 EVO drops off the PCIe bus after system - * suspend on a Ryzen board, ASUS PRIME B350M-A. + * suspend on a Ryzen board, ASUS PRIME B350M-A, as well as + * within few minutes after bootup on a Coffee Lake board - + * ASUS PRIME Z370-A */ if (dmi_match(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC.") && - dmi_match(DMI_BOARD_NAME, "PRIME B350M-A")) + (dmi_match(DMI_BOARD_NAME, "PRIME B350M-A") || + dmi_match(DMI_BOARD_NAME, "PRIME Z370-A"))) return NVME_QUIRK_NO_APST; } diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 4d84a73ee12d..1eb4438a8763 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -867,6 +867,14 @@ out_free_io_queues: return ret; } +static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl) +{ + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); + + cancel_work_sync(&ctrl->err_work); + cancel_delayed_work_sync(&ctrl->reconnect_work); +} + static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) { struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); @@ -899,7 +907,6 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl) queue_delayed_work(nvme_wq, &ctrl->reconnect_work, ctrl->ctrl.opts->reconnect_delay * HZ); } else { - dev_info(ctrl->ctrl.device, "Removing controller...\n"); nvme_delete_ctrl(&ctrl->ctrl); } } @@ -974,8 +981,8 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) nvme_start_queues(&ctrl->ctrl); if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { - /* state change failure should never happen */ - WARN_ON_ONCE(1); + /* state change failure is ok if we're in DELETING state */ + WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING); return; } @@ -1594,17 +1601,6 @@ nvme_rdma_timeout(struct request *rq, bool reserved) return BLK_EH_HANDLED; } -/* - * We cannot accept any other command until the Connect command has completed. - */ -static inline blk_status_t -nvme_rdma_is_ready(struct nvme_rdma_queue *queue, struct request *rq) -{ - if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) - return nvmf_check_init_req(&queue->ctrl->ctrl, rq); - return BLK_STS_OK; -} - static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -1620,7 +1616,8 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, WARN_ON_ONCE(rq->tag < 0); - ret = nvme_rdma_is_ready(queue, rq); + ret = nvmf_check_if_ready(&queue->ctrl->ctrl, rq, + test_bit(NVME_RDMA_Q_LIVE, &queue->flags), true); if (unlikely(ret)) return ret; @@ -1719,9 +1716,6 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) { - cancel_work_sync(&ctrl->err_work); - cancel_delayed_work_sync(&ctrl->reconnect_work); - if (ctrl->ctrl.queue_count > 1) { nvme_stop_queues(&ctrl->ctrl); blk_mq_tagset_busy_iter(&ctrl->tag_set, @@ -1799,6 +1793,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .submit_async_event = nvme_rdma_submit_async_event, .delete_ctrl = nvme_rdma_delete_ctrl, .get_address = nvmf_get_address, + .stop_ctrl = nvme_rdma_stop_ctrl, }; static inline bool @@ -2025,15 +2020,26 @@ static struct nvmf_transport_ops nvme_rdma_transport = { static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data) { struct nvme_rdma_ctrl *ctrl; + struct nvme_rdma_device *ndev; + bool found = false; + + mutex_lock(&device_list_mutex); + list_for_each_entry(ndev, &device_list, entry) { + if (ndev->dev == ib_device) { + found = true; + break; + } + } + mutex_unlock(&device_list_mutex); + + if (!found) + return; /* Delete all controllers using this device */ mutex_lock(&nvme_rdma_ctrl_mutex); list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) { if (ctrl->device->dev != ib_device) continue; - dev_info(ctrl->ctrl.device, - "Removing ctrl: NQN \"%s\", addr %pISp\n", - ctrl->ctrl.opts->subsysnqn, &ctrl->addr); nvme_delete_ctrl(&ctrl->ctrl); } mutex_unlock(&nvme_rdma_ctrl_mutex); |