diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-02-20 23:27:21 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-02-20 23:27:21 +0100 |
commit | 5b0ed5964928b0aaf0d644c17c886c7f5ea4bb3f (patch) | |
tree | 02df7848b8c28552039bf463e0034f5d5518b2a9 /drivers/nvme | |
parent | Merge tag 'for-6.3/dio-2023-02-16' of git://git.kernel.dk/linux (diff) | |
parent | brd: use radix_tree_maybe_preload instead of radix_tree_preload (diff) | |
download | linux-5b0ed5964928b0aaf0d644c17c886c7f5ea4bb3f.tar.xz linux-5b0ed5964928b0aaf0d644c17c886c7f5ea4bb3f.zip |
Merge tag 'for-6.3/block-2023-02-16' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- NVMe updates via Christoph:
- Small improvements to the logging functionality (Amit Engel)
- Authentication cleanups (Hannes Reinecke)
- Cleanup and optimize the DMA mapping cod in the PCIe driver
(Keith Busch)
- Work around the command effects for Format NVM (Keith Busch)
- Misc cleanups (Keith Busch, Christoph Hellwig)
- Fix and cleanup freeing single sgl (Keith Busch)
- MD updates via Song:
- Fix a rare crash during the takeover process
- Don't update recovery_cp when curr_resync is ACTIVE
- Free writes_pending in md_stop
- Change active_io to percpu
- Updates to drbd, inching us closer to unifying the out-of-tree driver
with the in-tree one (Andreas, Christoph, Lars, Robert)
- BFQ update adding support for multi-actuator drives (Paolo, Federico,
Davide)
- Make brd compliant with REQ_NOWAIT (me)
- Fix for IOPOLL and queue entering, fixing stalled IO waiting on
timeouts (me)
- Fix for REQ_NOWAIT with multiple bios (me)
- Fix memory leak in blktrace cleanup (Greg)
- Clean up sbitmap and fix a potential hang (Kemeng)
- Clean up some bits in BFQ, and fix a bug in the request injection
(Kemeng)
- Clean up the request allocation and issue code, and fix some bugs
related to that (Kemeng)
- ublk updates and fixes:
- Add support for unprivileged ublk (Ming)
- Improve device deletion handling (Ming)
- Misc (Liu, Ziyang)
- s390 dasd fixes (Alexander, Qiheng)
- Improve utility of request caching and fixes (Anuj, Xiao)
- zoned cleanups (Pankaj)
- More constification for kobjs (Thomas)
- blk-iocost cleanups (Yu)
- Remove bio splitting from drivers that don't need it (Christoph)
- Switch blk-cgroups to use struct gendisk. Some of this is now
incomplete as select late reverts were done. (Christoph)
- Add bvec initialization helpers, and convert callers to use that
rather than open-coding it (Christoph)
- Misc fixes and cleanups (Jinke, Keith, Arnd, Bart, Li, Martin,
Matthew, Ulf, Zhong)
* tag 'for-6.3/block-2023-02-16' of git://git.kernel.dk/linux: (169 commits)
brd: use radix_tree_maybe_preload instead of radix_tree_preload
block: use proper return value from bio_failfast()
block: bio-integrity: Copy flags when bio_integrity_payload is cloned
block: Fix io statistics for cgroup in throttle path
brd: mark as nowait compatible
brd: check for REQ_NOWAIT and set correct page allocation mask
brd: return 0/-error from brd_insert_page()
block: sync mixed merged request's failfast with 1st bio's
Revert "blk-cgroup: pin the gendisk in struct blkcg_gq"
Revert "blk-cgroup: pass a gendisk to blkg_lookup"
Revert "blk-cgroup: delay blk-cgroup initialization until add_disk"
Revert "blk-cgroup: delay calling blkcg_exit_disk until disk_release"
Revert "blk-cgroup: move the cgroup information to struct gendisk"
nvme-pci: remove iod use_sgls
nvme-pci: fix freeing single sgl
block: ublk: check IO buffer based on flag need_get_data
s390/dasd: Fix potential memleak in dasd_eckd_init()
s390/dasd: sort out physical vs virtual pointers usage
block: Remove the ALLOC_CACHE_SLACK constant
block: make kobj_type structures constant
...
Diffstat (limited to 'drivers/nvme')
-rw-r--r-- | drivers/nvme/host/auth.c | 30 | ||||
-rw-r--r-- | drivers/nvme/host/constants.c | 16 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 123 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.c | 19 | ||||
-rw-r--r-- | drivers/nvme/host/ioctl.c | 9 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 16 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 104 | ||||
-rw-r--r-- | drivers/nvme/host/tcp.c | 7 | ||||
-rw-r--r-- | drivers/nvme/target/admin-cmd.c | 2 | ||||
-rw-r--r-- | drivers/nvme/target/io-cmd-file.c | 10 | ||||
-rw-r--r-- | drivers/nvme/target/passthru.c | 5 | ||||
-rw-r--r-- | drivers/nvme/target/tcp.c | 5 | ||||
-rw-r--r-- | drivers/nvme/target/zns.c | 3 |
13 files changed, 175 insertions, 174 deletions
diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index bdb97496ba2d..901c59145811 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -160,7 +160,7 @@ static int nvme_auth_process_dhchap_challenge(struct nvme_ctrl *ctrl, if (size > CHAP_BUF_SIZE) { chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD; - return NVME_SC_INVALID_FIELD; + return -EINVAL; } hmac_name = nvme_auth_hmac_name(data->hashid); @@ -169,7 +169,7 @@ static int nvme_auth_process_dhchap_challenge(struct nvme_ctrl *ctrl, "qid %d: invalid HASH ID %d\n", chap->qid, data->hashid); chap->status = NVME_AUTH_DHCHAP_FAILURE_HASH_UNUSABLE; - return NVME_SC_INVALID_FIELD; + return -EPROTO; } if (chap->hash_id == data->hashid && chap->shash_tfm && @@ -195,7 +195,7 @@ static int nvme_auth_process_dhchap_challenge(struct nvme_ctrl *ctrl, chap->qid, hmac_name, PTR_ERR(chap->shash_tfm)); chap->shash_tfm = NULL; chap->status = NVME_AUTH_DHCHAP_FAILURE_FAILED; - return NVME_SC_AUTH_REQUIRED; + return -ENOMEM; } if (crypto_shash_digestsize(chap->shash_tfm) != data->hl) { @@ -205,7 +205,7 @@ static int nvme_auth_process_dhchap_challenge(struct nvme_ctrl *ctrl, crypto_free_shash(chap->shash_tfm); chap->shash_tfm = NULL; chap->status = NVME_AUTH_DHCHAP_FAILURE_HASH_UNUSABLE; - return NVME_SC_AUTH_REQUIRED; + return -EPROTO; } chap->hash_id = data->hashid; @@ -221,7 +221,7 @@ select_kpp: chap->qid, data->dhgid); chap->status = NVME_AUTH_DHCHAP_FAILURE_DHGROUP_UNUSABLE; /* Leave previous dh_tfm intact */ - return NVME_SC_AUTH_REQUIRED; + return -EPROTO; } if (chap->dhgroup_id == data->dhgid && @@ -244,7 +244,7 @@ select_kpp: "qid %d: empty DH value\n", chap->qid); chap->status = NVME_AUTH_DHCHAP_FAILURE_DHGROUP_UNUSABLE; - return NVME_SC_INVALID_FIELD; + return -EPROTO; } chap->dh_tfm = crypto_alloc_kpp(kpp_name, 0, 0); @@ -256,7 +256,7 @@ select_kpp: chap->qid, ret, gid_name); chap->status = NVME_AUTH_DHCHAP_FAILURE_DHGROUP_UNUSABLE; chap->dh_tfm = NULL; - return NVME_SC_AUTH_REQUIRED; + return -ret; } dev_dbg(ctrl->device, "qid %d: selected DH group %s\n", chap->qid, gid_name); @@ -265,7 +265,7 @@ select_kpp: "qid %d: invalid DH value for NULL DH\n", chap->qid); chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD; - return NVME_SC_INVALID_FIELD; + return -EPROTO; } chap->dhgroup_id = data->dhgid; @@ -276,7 +276,7 @@ skip_kpp: chap->ctrl_key = kmalloc(dhvlen, GFP_KERNEL); if (!chap->ctrl_key) { chap->status = NVME_AUTH_DHCHAP_FAILURE_FAILED; - return NVME_SC_AUTH_REQUIRED; + return -ENOMEM; } chap->ctrl_key_len = dhvlen; memcpy(chap->ctrl_key, data->cval + chap->hash_len, @@ -346,7 +346,7 @@ static int nvme_auth_process_dhchap_success1(struct nvme_ctrl *ctrl, if (size > CHAP_BUF_SIZE) { chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD; - return NVME_SC_INVALID_FIELD; + return -EINVAL; } if (data->hl != chap->hash_len) { @@ -354,7 +354,7 @@ static int nvme_auth_process_dhchap_success1(struct nvme_ctrl *ctrl, "qid %d: invalid hash length %u\n", chap->qid, data->hl); chap->status = NVME_AUTH_DHCHAP_FAILURE_HASH_UNUSABLE; - return NVME_SC_INVALID_FIELD; + return -EPROTO; } /* Just print out information for the admin queue */ @@ -378,7 +378,7 @@ static int nvme_auth_process_dhchap_success1(struct nvme_ctrl *ctrl, "qid %d: controller authentication failed\n", chap->qid); chap->status = NVME_AUTH_DHCHAP_FAILURE_FAILED; - return NVME_SC_AUTH_REQUIRED; + return -ECONNREFUSED; } /* Just print out information for the admin queue */ @@ -732,7 +732,7 @@ static void nvme_queue_auth_work(struct work_struct *work) NVME_AUTH_DHCHAP_MESSAGE_CHALLENGE); if (ret) { chap->status = ret; - chap->error = NVME_SC_AUTH_REQUIRED; + chap->error = -ECONNREFUSED; return; } @@ -800,7 +800,7 @@ static void nvme_queue_auth_work(struct work_struct *work) NVME_AUTH_DHCHAP_MESSAGE_SUCCESS1); if (ret) { chap->status = ret; - chap->error = NVME_SC_AUTH_REQUIRED; + chap->error = -ECONNREFUSED; return; } @@ -821,7 +821,7 @@ static void nvme_queue_auth_work(struct work_struct *work) ret = nvme_auth_process_dhchap_success1(ctrl, chap); if (ret) { /* Controller authentication failed */ - chap->error = NVME_SC_AUTH_REQUIRED; + chap->error = -ECONNREFUSED; goto fail2; } diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c index e958d5015585..bc523ca02254 100644 --- a/drivers/nvme/host/constants.c +++ b/drivers/nvme/host/constants.c @@ -54,6 +54,14 @@ static const char * const nvme_admin_ops[] = { [nvme_admin_get_lba_status] = "Get LBA Status", }; +static const char * const nvme_fabrics_ops[] = { + [nvme_fabrics_type_property_set] = "Property Set", + [nvme_fabrics_type_property_get] = "Property Get", + [nvme_fabrics_type_connect] = "Connect", + [nvme_fabrics_type_auth_send] = "Authentication Send", + [nvme_fabrics_type_auth_receive] = "Authentication Receive", +}; + static const char * const nvme_statuses[] = { [NVME_SC_SUCCESS] = "Success", [NVME_SC_INVALID_OPCODE] = "Invalid Command Opcode", @@ -185,3 +193,11 @@ const unsigned char *nvme_get_admin_opcode_str(u8 opcode) return nvme_admin_ops[opcode]; return "Unknown"; } +EXPORT_SYMBOL_GPL(nvme_get_admin_opcode_str); + +const unsigned char *nvme_get_fabrics_opcode_str(u8 opcode) { + if (opcode < ARRAY_SIZE(nvme_fabrics_ops) && nvme_fabrics_ops[opcode]) + return nvme_fabrics_ops[opcode]; + return "Unknown"; +} +EXPORT_SYMBOL_GPL(nvme_get_fabrics_opcode_str); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8b6421141162..8698410aeb84 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -806,9 +806,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, cmnd->dsm.nr = cpu_to_le32(segments - 1); cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); - req->special_vec.bv_page = virt_to_page(range); - req->special_vec.bv_offset = offset_in_page(range); - req->special_vec.bv_len = alloc_size; + bvec_set_virt(&req->special_vec, range, alloc_size); req->rq_flags |= RQF_SPECIAL_PAYLOAD; return BLK_STS_OK; @@ -1004,7 +1002,7 @@ EXPORT_SYMBOL_GPL(nvme_setup_cmd); * >0: nvme controller's cqe status response * <0: kernel error in lieu of controller response */ -static int nvme_execute_rq(struct request *rq, bool at_head) +int nvme_execute_rq(struct request *rq, bool at_head) { blk_status_t status; @@ -1015,6 +1013,7 @@ static int nvme_execute_rq(struct request *rq, bool at_head) return nvme_req(rq)->status; return blk_status_to_errno(status); } +EXPORT_SYMBOL_NS_GPL(nvme_execute_rq, NVME_TARGET_PASSTHRU); /* * Returns 0 on success. If the result is negative, it's a Linux error code; @@ -1060,41 +1059,12 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, } EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd); -static u32 nvme_known_admin_effects(u8 opcode) -{ - switch (opcode) { - case nvme_admin_format_nvm: - return NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_NCC | - NVME_CMD_EFFECTS_CSE_MASK; - case nvme_admin_sanitize_nvm: - return NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK; - default: - break; - } - return 0; -} - -static u32 nvme_known_nvm_effects(u8 opcode) -{ - switch (opcode) { - case nvme_cmd_write: - case nvme_cmd_write_zeroes: - case nvme_cmd_write_uncor: - return NVME_CMD_EFFECTS_LBCC; - default: - return 0; - } -} - u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode) { u32 effects = 0; if (ns) { - if (ns->head->effects) - effects = le32_to_cpu(ns->head->effects->iocs[opcode]); - if (ns->head->ids.csi == NVME_CSI_NVM) - effects |= nvme_known_nvm_effects(opcode); + effects = le32_to_cpu(ns->head->effects->iocs[opcode]); if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC)) dev_warn_once(ctrl->device, "IO command:%02x has unusual effects:%08x\n", @@ -1107,17 +1077,14 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode) */ effects &= ~NVME_CMD_EFFECTS_CSE_MASK; } else { - if (ctrl->effects) - effects = le32_to_cpu(ctrl->effects->acs[opcode]); - effects |= nvme_known_admin_effects(opcode); + effects = le32_to_cpu(ctrl->effects->acs[opcode]); } return effects; } EXPORT_SYMBOL_NS_GPL(nvme_command_effects, NVME_TARGET_PASSTHRU); -static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, - u8 opcode) +u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode) { u32 effects = nvme_command_effects(ctrl, ns, opcode); @@ -1135,6 +1102,7 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, } return effects; } +EXPORT_SYMBOL_NS_GPL(nvme_passthru_start, NVME_TARGET_PASSTHRU); void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, struct nvme_command *cmd, int status) @@ -1176,17 +1144,6 @@ void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, } EXPORT_SYMBOL_NS_GPL(nvme_passthru_end, NVME_TARGET_PASSTHRU); -int nvme_execute_passthru_rq(struct request *rq, u32 *effects) -{ - struct nvme_command *cmd = nvme_req(rq)->cmd; - struct nvme_ctrl *ctrl = nvme_req(rq)->ctrl; - struct nvme_ns *ns = rq->q->queuedata; - - *effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); - return nvme_execute_rq(rq, false); -} -EXPORT_SYMBOL_NS_GPL(nvme_execute_passthru_rq, NVME_TARGET_PASSTHRU); - /* * Recommended frequency for KATO commands per NVMe 1.4 section 7.12.1: * @@ -3122,6 +3079,62 @@ free_data: return ret; } +static void nvme_init_known_nvm_effects(struct nvme_ctrl *ctrl) +{ + struct nvme_effects_log *log = ctrl->effects; + + log->acs[nvme_admin_format_nvm] |= cpu_to_le32(NVME_CMD_EFFECTS_LBCC | + NVME_CMD_EFFECTS_NCC | + NVME_CMD_EFFECTS_CSE_MASK); + log->acs[nvme_admin_sanitize_nvm] |= cpu_to_le32(NVME_CMD_EFFECTS_LBCC | + NVME_CMD_EFFECTS_CSE_MASK); + + /* + * The spec says the result of a security receive command depends on + * the previous security send command. As such, many vendors log this + * command as one to submitted only when no other commands to the same + * namespace are outstanding. The intention is to tell the host to + * prevent mixing security send and receive. + * + * This driver can only enforce such exclusive access against IO + * queues, though. We are not readily able to enforce such a rule for + * two commands to the admin queue, which is the only queue that + * matters for this command. + * + * Rather than blindly freezing the IO queues for this effect that + * doesn't even apply to IO, mask it off. + */ + log->acs[nvme_admin_security_recv] &= ~NVME_CMD_EFFECTS_CSE_MASK; + + log->iocs[nvme_cmd_write] |= cpu_to_le32(NVME_CMD_EFFECTS_LBCC); + log->iocs[nvme_cmd_write_zeroes] |= cpu_to_le32(NVME_CMD_EFFECTS_LBCC); + log->iocs[nvme_cmd_write_uncor] |= cpu_to_le32(NVME_CMD_EFFECTS_LBCC); +} + +static int nvme_init_effects(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) +{ + int ret = 0; + + if (ctrl->effects) + return 0; + + if (id->lpa & NVME_CTRL_LPA_CMD_EFFECTS_LOG) { + ret = nvme_get_effects_log(ctrl, NVME_CSI_NVM, &ctrl->effects); + if (ret < 0) + return ret; + } + + if (!ctrl->effects) { + ctrl->effects = kzalloc(sizeof(*ctrl->effects), GFP_KERNEL); + if (!ctrl->effects) + return -ENOMEM; + xa_store(&ctrl->cels, NVME_CSI_NVM, ctrl->effects, GFP_KERNEL); + } + + nvme_init_known_nvm_effects(ctrl); + return 0; +} + static int nvme_init_identify(struct nvme_ctrl *ctrl) { struct nvme_id_ctrl *id; @@ -3135,12 +3148,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) return -EIO; } - if (id->lpa & NVME_CTRL_LPA_CMD_EFFECTS_LOG) { - ret = nvme_get_effects_log(ctrl, NVME_CSI_NVM, &ctrl->effects); - if (ret < 0) - goto out_free; - } - if (!(ctrl->ops->flags & NVME_F_FABRICS)) ctrl->cntlid = le16_to_cpu(id->cntlid); @@ -3163,6 +3170,10 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) ret = nvme_init_subsystem(ctrl, id); if (ret) goto out_free; + + ret = nvme_init_effects(ctrl, id); + if (ret) + goto out_free; } memcpy(ctrl->subsys->firmware_rev, id->fr, sizeof(ctrl->subsys->firmware_rev)); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index ce27276f552d..bbaa04a0c502 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -410,7 +410,14 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) result = le32_to_cpu(res.u32); ctrl->cntlid = result & 0xFFFF; - if ((result >> 16) & 0x3) { + if (result & (NVME_CONNECT_AUTHREQ_ATR | NVME_CONNECT_AUTHREQ_ASCR)) { + /* Secure concatenation is not implemented */ + if (result & NVME_CONNECT_AUTHREQ_ASCR) { + dev_warn(ctrl->device, + "qid 0: secure concatenation is not supported\n"); + ret = NVME_SC_AUTH_REQUIRED; + goto out_free_data; + } /* Authentication required */ ret = nvme_auth_negotiate(ctrl, 0); if (ret) { @@ -486,7 +493,14 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) &cmd, data); } result = le32_to_cpu(res.u32); - if ((result >> 16) & 2) { + if (result & (NVME_CONNECT_AUTHREQ_ATR | NVME_CONNECT_AUTHREQ_ASCR)) { + /* Secure concatenation is not implemented */ + if (result & NVME_CONNECT_AUTHREQ_ASCR) { + dev_warn(ctrl->device, + "qid 0: secure concatenation is not supported\n"); + ret = NVME_SC_AUTH_REQUIRED; + goto out_free_data; + } /* Authentication required */ ret = nvme_auth_negotiate(ctrl, qid); if (ret) { @@ -500,6 +514,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) "qid %u: authentication failed\n", qid); } } +out_free_data: kfree(data); return ret; } diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 06f52db34be9..723e7d5b778f 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -219,6 +219,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, void __user *meta_buffer, unsigned meta_len, u32 meta_seed, u64 *result, unsigned timeout, unsigned int flags) { + struct nvme_ns *ns = q->queuedata; struct nvme_ctrl *ctrl; struct request *req; void *meta = NULL; @@ -241,8 +242,8 @@ static int nvme_submit_user_cmd(struct request_queue *q, bio = req->bio; ctrl = nvme_req(req)->ctrl; - ret = nvme_execute_passthru_rq(req, &effects); - + effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); + ret = nvme_execute_rq(req, false); if (result) *result = le64_to_cpu(nvme_req(req)->result.u64); if (meta) @@ -554,7 +555,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, struct nvme_uring_data d; struct nvme_command c; struct request *req; - blk_opf_t rq_flags = 0; + blk_opf_t rq_flags = REQ_ALLOC_CACHE; blk_mq_req_flags_t blk_flags = 0; void *meta = NULL; int ret; @@ -590,7 +591,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, d.timeout_ms = READ_ONCE(cmd->timeout_ms); if (issue_flags & IO_URING_F_NONBLOCK) { - rq_flags = REQ_NOWAIT; + rq_flags |= REQ_NOWAIT; blk_flags = BLK_MQ_REQ_NOWAIT; } if (issue_flags & IO_URING_F_IOPOLL) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 424c8a467a0c..bf46f122e9e1 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -1070,7 +1070,8 @@ static inline void nvme_auth_free(struct nvme_ctrl *ctrl) {}; u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode); -int nvme_execute_passthru_rq(struct request *rq, u32 *effects); +u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode); +int nvme_execute_rq(struct request *rq, bool at_head); void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, struct nvme_command *cmd, int status); struct nvme_ctrl *nvme_ctrl_from_file(struct file *file); @@ -1086,6 +1087,7 @@ static inline bool nvme_multi_css(struct nvme_ctrl *ctrl) const unsigned char *nvme_get_error_status_str(u16 status); const unsigned char *nvme_get_opcode_str(u8 opcode); const unsigned char *nvme_get_admin_opcode_str(u8 opcode); +const unsigned char *nvme_get_fabrics_opcode_str(u8 opcode); #else /* CONFIG_NVME_VERBOSE_ERRORS */ static inline const unsigned char *nvme_get_error_status_str(u16 status) { @@ -1099,6 +1101,18 @@ static inline const unsigned char *nvme_get_admin_opcode_str(u8 opcode) { return "Admin Cmd"; } + +static inline const unsigned char *nvme_get_fabrics_opcode_str(u8 opcode) +{ + return "Fabrics Cmd"; +} #endif /* CONFIG_NVME_VERBOSE_ERRORS */ +static inline const unsigned char *nvme_opcode_str(int qid, u8 opcode, u8 fctype) +{ + if (opcode == nvme_fabrics_command) + return nvme_get_fabrics_opcode_str(fctype); + return qid ? nvme_get_opcode_str(opcode) : + nvme_get_admin_opcode_str(opcode); +} #endif /* _NVME_H */ diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c11e0cfeef0f..5b95c94ee40f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -42,8 +42,9 @@ * These can be higher, but we need to ensure that any command doesn't * require an sg allocation that needs more than a page of data. */ -#define NVME_MAX_KB_SZ 4096 -#define NVME_MAX_SEGS 127 +#define NVME_MAX_KB_SZ 8192 +#define NVME_MAX_SEGS 128 +#define NVME_MAX_NR_ALLOCATIONS 5 static int use_threaded_interrupts; module_param(use_threaded_interrupts, int, 0444); @@ -216,6 +217,11 @@ struct nvme_queue { struct completion delete_done; }; +union nvme_descriptor { + struct nvme_sgl_desc *sg_list; + __le64 *prp_list; +}; + /* * The nvme_iod describes the data in an I/O. * @@ -225,7 +231,6 @@ struct nvme_queue { struct nvme_iod { struct nvme_request req; struct nvme_command cmd; - bool use_sgl; bool aborted; s8 nr_allocations; /* PRP list pool allocations. 0 means small pool in use */ @@ -233,6 +238,7 @@ struct nvme_iod { dma_addr_t first_dma; dma_addr_t meta_dma; struct sg_table sgt; + union nvme_descriptor list[NVME_MAX_NR_ALLOCATIONS]; }; static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev) @@ -387,16 +393,6 @@ static int nvme_pci_npages_prp(void) return DIV_ROUND_UP(8 * nprps, NVME_CTRL_PAGE_SIZE - 8); } -/* - * Calculates the number of pages needed for the SGL segments. For example a 4k - * page can accommodate 256 SGL descriptors. - */ -static int nvme_pci_npages_sgl(void) -{ - return DIV_ROUND_UP(NVME_MAX_SEGS * sizeof(struct nvme_sgl_desc), - NVME_CTRL_PAGE_SIZE); -} - static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { @@ -510,16 +506,10 @@ static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx) spin_unlock(&nvmeq->sq_lock); } -static void **nvme_pci_iod_list(struct request *req) -{ - struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - return (void **)(iod->sgt.sgl + blk_rq_nr_phys_segments(req)); -} - -static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) +static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req, + int nseg) { struct nvme_queue *nvmeq = req->mq_hctx->driver_data; - int nseg = blk_rq_nr_phys_segments(req); unsigned int avg_seg_size; avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req), nseg); @@ -541,7 +531,7 @@ static void nvme_free_prps(struct nvme_dev *dev, struct request *req) int i; for (i = 0; i < iod->nr_allocations; i++) { - __le64 *prp_list = nvme_pci_iod_list(req)[i]; + __le64 *prp_list = iod->list[i].prp_list; dma_addr_t next_dma_addr = le64_to_cpu(prp_list[last_prp]); dma_pool_free(dev->prp_page_pool, prp_list, dma_addr); @@ -549,22 +539,6 @@ static void nvme_free_prps(struct nvme_dev *dev, struct request *req) } } -static void nvme_free_sgls(struct nvme_dev *dev, struct request *req) -{ - const int last_sg = SGES_PER_PAGE - 1; - struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - dma_addr_t dma_addr = iod->first_dma; - int i; - - for (i = 0; i < iod->nr_allocations; i++) { - struct nvme_sgl_desc *sg_list = nvme_pci_iod_list(req)[i]; - dma_addr_t next_dma_addr = le64_to_cpu((sg_list[last_sg]).addr); - - dma_pool_free(dev->prp_page_pool, sg_list, dma_addr); - dma_addr = next_dma_addr; - } -} - static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); @@ -580,10 +554,11 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) dma_unmap_sgtable(dev->dev, &iod->sgt, rq_dma_dir(req), 0); if (iod->nr_allocations == 0) - dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0], + dma_pool_free(dev->prp_small_pool, iod->list[0].sg_list, + iod->first_dma); + else if (iod->nr_allocations == 1) + dma_pool_free(dev->prp_page_pool, iod->list[0].sg_list, iod->first_dma); - else if (iod->use_sgl) - nvme_free_sgls(dev, req); else nvme_free_prps(dev, req); mempool_free(iod->sgt.sgl, dev->iod_mempool); @@ -614,7 +589,6 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev, u64 dma_addr = sg_dma_address(sg); int offset = dma_addr & (NVME_CTRL_PAGE_SIZE - 1); __le64 *prp_list; - void **list = nvme_pci_iod_list(req); dma_addr_t prp_dma; int nprps, i; @@ -652,7 +626,7 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev, iod->nr_allocations = -1; return BLK_STS_RESOURCE; } - list[0] = prp_list; + iod->list[0].prp_list = prp_list; iod->first_dma = prp_dma; i = 0; for (;;) { @@ -661,7 +635,7 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev, prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma); if (!prp_list) goto free_prps; - list[iod->nr_allocations++] = prp_list; + iod->list[iod->nr_allocations++].prp_list = prp_list; prp_list[0] = old_prp_list[i - 1]; old_prp_list[i - 1] = cpu_to_le64(prp_dma); i = 1; @@ -706,13 +680,8 @@ static void nvme_pci_sgl_set_seg(struct nvme_sgl_desc *sge, dma_addr_t dma_addr, int entries) { sge->addr = cpu_to_le64(dma_addr); - if (entries < SGES_PER_PAGE) { - sge->length = cpu_to_le32(entries * sizeof(*sge)); - sge->type = NVME_SGL_FMT_LAST_SEG_DESC << 4; - } else { - sge->length = cpu_to_le32(NVME_CTRL_PAGE_SIZE); - sge->type = NVME_SGL_FMT_SEG_DESC << 4; - } + sge->length = cpu_to_le32(entries * sizeof(*sge)); + sge->type = NVME_SGL_FMT_LAST_SEG_DESC << 4; } static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, @@ -748,34 +717,16 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, return BLK_STS_RESOURCE; } - nvme_pci_iod_list(req)[0] = sg_list; + iod->list[0].sg_list = sg_list; iod->first_dma = sgl_dma; nvme_pci_sgl_set_seg(&cmd->dptr.sgl, sgl_dma, entries); - do { - if (i == SGES_PER_PAGE) { - struct nvme_sgl_desc *old_sg_desc = sg_list; - struct nvme_sgl_desc *link = &old_sg_desc[i - 1]; - - sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma); - if (!sg_list) - goto free_sgls; - - i = 0; - nvme_pci_iod_list(req)[iod->nr_allocations++] = sg_list; - sg_list[i++] = *link; - nvme_pci_sgl_set_seg(link, sgl_dma, entries); - } - nvme_pci_sgl_set_data(&sg_list[i++], sg); sg = sg_next(sg); } while (--entries > 0); return BLK_STS_OK; -free_sgls: - nvme_free_sgls(dev, req); - return BLK_STS_RESOURCE; } static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev, @@ -857,8 +808,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, goto out_free_sg; } - iod->use_sgl = nvme_pci_use_sgls(dev, req); - if (iod->use_sgl) + if (nvme_pci_use_sgls(dev, req, iod->sgt.nents)) ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw); else ret = nvme_pci_setup_prps(dev, req, &cmnd->rw); @@ -2706,11 +2656,8 @@ static void nvme_release_prp_pools(struct nvme_dev *dev) static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev) { - size_t npages = max(nvme_pci_npages_prp(), nvme_pci_npages_sgl()); - size_t alloc_size = sizeof(__le64 *) * npages + - sizeof(struct scatterlist) * NVME_MAX_SEGS; + size_t alloc_size = sizeof(struct scatterlist) * NVME_MAX_SEGS; - WARN_ON_ONCE(alloc_size > PAGE_SIZE); dev->iod_mempool = mempool_create_node(1, mempool_kmalloc, mempool_kfree, (void *)alloc_size, GFP_KERNEL, @@ -3538,8 +3485,9 @@ static int __init nvme_init(void) BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64); BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2); - BUILD_BUG_ON(DIV_ROUND_UP(nvme_pci_npages_prp(), NVME_CTRL_PAGE_SIZE) > - S8_MAX); + BUILD_BUG_ON(NVME_MAX_SEGS > SGES_PER_PAGE); + BUILD_BUG_ON(sizeof(struct scatterlist) * NVME_MAX_SEGS > PAGE_SIZE); + BUILD_BUG_ON(nvme_pci_npages_prp() > NVME_MAX_NR_ALLOCATIONS); return pci_register_driver(&nvme_driver); } diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 8cedc1ef496c..d6100a787d39 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2282,10 +2282,13 @@ static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq) struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl; struct nvme_tcp_cmd_pdu *pdu = req->pdu; + u8 opc = pdu->cmd.common.opcode, fctype = pdu->cmd.fabrics.fctype; + int qid = nvme_tcp_queue_id(req->queue); dev_warn(ctrl->device, - "queue %d: timeout request %#x type %d\n", - nvme_tcp_queue_id(req->queue), rq->tag, pdu->hdr.type); + "queue %d: timeout cid %#x type %d opcode %#x (%s)\n", + nvme_tcp_queue_id(req->queue), nvme_cid(rq), pdu->hdr.type, + opc, nvme_opcode_str(qid, opc, fctype)); if (ctrl->state != NVME_CTRL_LIVE) { /* diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 6a54ed6fb121..80099df37314 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -840,7 +840,7 @@ void nvmet_execute_set_features(struct nvmet_req *req) u16 nsqr; u16 ncqr; - if (!nvmet_check_transfer_len(req, 0)) + if (!nvmet_check_data_len_lte(req, 0)) return; switch (cdw10 & 0xff) { diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index 871c4f32f443..2d068439b129 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -73,13 +73,6 @@ err: return ret; } -static void nvmet_file_init_bvec(struct bio_vec *bv, struct scatterlist *sg) -{ - bv->bv_page = sg_page(sg); - bv->bv_offset = sg->offset; - bv->bv_len = sg->length; -} - static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos, unsigned long nr_segs, size_t count, int ki_flags) { @@ -146,7 +139,8 @@ static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags) memset(&req->f.iocb, 0, sizeof(struct kiocb)); for_each_sg(req->sg, sg, req->sg_cnt, i) { - nvmet_file_init_bvec(&req->f.bvec[bv_cnt], sg); + bvec_set_page(&req->f.bvec[bv_cnt], sg_page(sg), sg->length, + sg->offset); len += req->f.bvec[bv_cnt].bv_len; total_len += req->f.bvec[bv_cnt].bv_len; bv_cnt++; diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index adc0958755d6..511c980d538d 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -216,11 +216,12 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w) struct nvmet_req *req = container_of(w, struct nvmet_req, p.work); struct request *rq = req->p.rq; struct nvme_ctrl *ctrl = nvme_req(rq)->ctrl; + struct nvme_ns *ns = rq->q->queuedata; u32 effects; int status; - status = nvme_execute_passthru_rq(rq, &effects); - + effects = nvme_passthru_start(ctrl, ns, req->cmd->common.opcode); + status = nvme_execute_rq(rq, false); if (status == NVME_SC_SUCCESS && req->cmd->common.opcode == nvme_admin_identify) { switch (req->cmd->identify.cns) { diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index cc05c094de22..c5759eb503d0 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -321,9 +321,8 @@ static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd) while (length) { u32 iov_len = min_t(u32, length, sg->length - sg_offset); - iov->bv_page = sg_page(sg); - iov->bv_len = sg->length; - iov->bv_offset = sg->offset + sg_offset; + bvec_set_page(iov, sg_page(sg), sg->length, + sg->offset + sg_offset); length -= iov_len; sg = sg_next(sg); diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c index 1254cf57e008..7e4292d88016 100644 --- a/drivers/nvme/target/zns.c +++ b/drivers/nvme/target/zns.c @@ -254,8 +254,7 @@ static unsigned long nvmet_req_nr_zones_from_slba(struct nvmet_req *req) { unsigned int sect = nvmet_lba_to_sect(req->ns, req->cmd->zmr.slba); - return bdev_nr_zones(req->ns->bdev) - - (sect >> ilog2(bdev_zone_sectors(req->ns->bdev))); + return bdev_nr_zones(req->ns->bdev) - bdev_zone_no(req->ns->bdev, sect); } static unsigned long get_nr_zones_from_buf(struct nvmet_req *req, u32 bufsize) |