summaryrefslogtreecommitdiffstats
path: root/drivers/nvme/target
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-08-14 19:23:25 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-14 19:23:25 +0200
commit73ba2fb33c492916853dfe63e3b3163da0be661d (patch)
treec2fda8ca1273744d2e884d24189a15ac1a7d63c2 /drivers/nvme/target
parentMerge branch 'l1tf-final' of git://git.kernel.org/pub/scm/linux/kernel/git/ti... (diff)
parentblkcg: Make blkg_root_lookup() work for queues in bypass mode (diff)
downloadlinux-73ba2fb33c492916853dfe63e3b3163da0be661d.tar.xz
linux-73ba2fb33c492916853dfe63e3b3163da0be661d.zip
Merge tag 'for-4.19/block-20180812' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: "First pull request for this merge window, there will also be a followup request with some stragglers. This pull request contains: - Fix for a thundering heard issue in the wbt block code (Anchal Agarwal) - A few NVMe pull requests: * Improved tracepoints (Keith) * Larger inline data support for RDMA (Steve Wise) * RDMA setup/teardown fixes (Sagi) * Effects log suppor for NVMe target (Chaitanya Kulkarni) * Buffered IO suppor for NVMe target (Chaitanya Kulkarni) * TP4004 (ANA) support (Christoph) * Various NVMe fixes - Block io-latency controller support. Much needed support for properly containing block devices. (Josef) - Series improving how we handle sense information on the stack (Kees) - Lightnvm fixes and updates/improvements (Mathias/Javier et al) - Zoned device support for null_blk (Matias) - AIX partition fixes (Mauricio Faria de Oliveira) - DIF checksum code made generic (Max Gurtovoy) - Add support for discard in iostats (Michael Callahan / Tejun) - Set of updates for BFQ (Paolo) - Removal of async write support for bsg (Christoph) - Bio page dirtying and clone fixups (Christoph) - Set of bcache fix/changes (via Coly) - Series improving blk-mq queue setup/teardown speed (Ming) - Series improving merging performance on blk-mq (Ming) - Lots of other fixes and cleanups from a slew of folks" * tag 'for-4.19/block-20180812' of git://git.kernel.dk/linux-block: (190 commits) blkcg: Make blkg_root_lookup() work for queues in bypass mode bcache: fix error setting writeback_rate through sysfs interface null_blk: add lock drop/acquire annotation Blk-throttle: reduce tail io latency when iops limit is enforced block: paride: pd: mark expected switch fall-throughs block: Ensure that a request queue is dissociated from the cgroup controller block: Introduce blk_exit_queue() blkcg: Introduce blkg_root_lookup() block: Remove two superfluous #include directives blk-mq: count the hctx as active before allocating tag block: bvec_nr_vecs() returns value for wrong slab bcache: trivial - remove tailing backslash in macro BTREE_FLAG bcache: make the pr_err statement used for ENOENT only in sysfs_attatch section bcache: set max writeback rate when I/O request is idle bcache: add code comments for bset.c bcache: fix mistaken comments in request.c bcache: fix mistaken code comments in bcache.h bcache: add a comment in super.c bcache: avoid unncessary cache prefetch bch_btree_node_get() bcache: display rate debug parameters to 0 when writeback is not running ...
Diffstat (limited to 'drivers/nvme/target')
-rw-r--r--drivers/nvme/target/admin-cmd.c221
-rw-r--r--drivers/nvme/target/configfs.c250
-rw-r--r--drivers/nvme/target/core.c104
-rw-r--r--drivers/nvme/target/discovery.c2
-rw-r--r--drivers/nvme/target/io-cmd-bdev.c7
-rw-r--r--drivers/nvme/target/io-cmd-file.c80
-rw-r--r--drivers/nvme/target/loop.c1
-rw-r--r--drivers/nvme/target/nvmet.h62
-rw-r--r--drivers/nvme/target/rdma.c197
9 files changed, 845 insertions, 79 deletions
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 38803576d5e1..a21caea1e080 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -19,6 +19,19 @@
#include <asm/unaligned.h>
#include "nvmet.h"
+/*
+ * This helper allows us to clear the AEN based on the RAE bit,
+ * Please use this helper when processing the log pages which are
+ * associated with the AEN.
+ */
+static inline void nvmet_clear_aen(struct nvmet_req *req, u32 aen_bit)
+{
+ int rae = le32_to_cpu(req->cmd->common.cdw10[0]) & 1 << 15;
+
+ if (!rae)
+ clear_bit(aen_bit, &req->sq->ctrl->aen_masked);
+}
+
u32 nvmet_get_log_page_len(struct nvme_command *cmd)
{
u32 len = le16_to_cpu(cmd->get_log_page.numdu);
@@ -128,6 +141,36 @@ out:
nvmet_req_complete(req, status);
}
+static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req)
+{
+ u16 status = NVME_SC_INTERNAL;
+ struct nvme_effects_log *log;
+
+ log = kzalloc(sizeof(*log), GFP_KERNEL);
+ if (!log)
+ goto out;
+
+ log->acs[nvme_admin_get_log_page] = cpu_to_le32(1 << 0);
+ log->acs[nvme_admin_identify] = cpu_to_le32(1 << 0);
+ log->acs[nvme_admin_abort_cmd] = cpu_to_le32(1 << 0);
+ log->acs[nvme_admin_set_features] = cpu_to_le32(1 << 0);
+ log->acs[nvme_admin_get_features] = cpu_to_le32(1 << 0);
+ log->acs[nvme_admin_async_event] = cpu_to_le32(1 << 0);
+ log->acs[nvme_admin_keep_alive] = cpu_to_le32(1 << 0);
+
+ log->iocs[nvme_cmd_read] = cpu_to_le32(1 << 0);
+ log->iocs[nvme_cmd_write] = cpu_to_le32(1 << 0);
+ log->iocs[nvme_cmd_flush] = cpu_to_le32(1 << 0);
+ log->iocs[nvme_cmd_dsm] = cpu_to_le32(1 << 0);
+ log->iocs[nvme_cmd_write_zeroes] = cpu_to_le32(1 << 0);
+
+ status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log));
+
+ kfree(log);
+out:
+ nvmet_req_complete(req, status);
+}
+
static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
@@ -146,12 +189,76 @@ static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req)
if (!status)
status = nvmet_zero_sgl(req, len, req->data_len - len);
ctrl->nr_changed_ns = 0;
- clear_bit(NVME_AEN_CFG_NS_ATTR, &ctrl->aen_masked);
+ nvmet_clear_aen(req, NVME_AEN_CFG_NS_ATTR);
mutex_unlock(&ctrl->lock);
out:
nvmet_req_complete(req, status);
}
+static u32 nvmet_format_ana_group(struct nvmet_req *req, u32 grpid,
+ struct nvme_ana_group_desc *desc)
+{
+ struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ struct nvmet_ns *ns;
+ u32 count = 0;
+
+ if (!(req->cmd->get_log_page.lsp & NVME_ANA_LOG_RGO)) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link)
+ if (ns->anagrpid == grpid)
+ desc->nsids[count++] = cpu_to_le32(ns->nsid);
+ rcu_read_unlock();
+ }
+
+ desc->grpid = cpu_to_le32(grpid);
+ desc->nnsids = cpu_to_le32(count);
+ desc->chgcnt = cpu_to_le64(nvmet_ana_chgcnt);
+ desc->state = req->port->ana_state[grpid];
+ memset(desc->rsvd17, 0, sizeof(desc->rsvd17));
+ return sizeof(struct nvme_ana_group_desc) + count * sizeof(__le32);
+}
+
+static void nvmet_execute_get_log_page_ana(struct nvmet_req *req)
+{
+ struct nvme_ana_rsp_hdr hdr = { 0, };
+ struct nvme_ana_group_desc *desc;
+ size_t offset = sizeof(struct nvme_ana_rsp_hdr); /* start beyond hdr */
+ size_t len;
+ u32 grpid;
+ u16 ngrps = 0;
+ u16 status;
+
+ status = NVME_SC_INTERNAL;
+ desc = kmalloc(sizeof(struct nvme_ana_group_desc) +
+ NVMET_MAX_NAMESPACES * sizeof(__le32), GFP_KERNEL);
+ if (!desc)
+ goto out;
+
+ down_read(&nvmet_ana_sem);
+ for (grpid = 1; grpid <= NVMET_MAX_ANAGRPS; grpid++) {
+ if (!nvmet_ana_group_enabled[grpid])
+ continue;
+ len = nvmet_format_ana_group(req, grpid, desc);
+ status = nvmet_copy_to_sgl(req, offset, desc, len);
+ if (status)
+ break;
+ offset += len;
+ ngrps++;
+ }
+
+ hdr.chgcnt = cpu_to_le64(nvmet_ana_chgcnt);
+ hdr.ngrps = cpu_to_le16(ngrps);
+ nvmet_clear_aen(req, NVME_AEN_CFG_ANA_CHANGE);
+ up_read(&nvmet_ana_sem);
+
+ kfree(desc);
+
+ /* copy the header last once we know the number of groups */
+ status = nvmet_copy_to_sgl(req, 0, &hdr, sizeof(hdr));
+out:
+ nvmet_req_complete(req, status);
+}
+
static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
@@ -183,8 +290,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
* the safest is to leave it as zeroes.
*/
- /* we support multiple ports and multiples hosts: */
- id->cmic = (1 << 0) | (1 << 1);
+ /* we support multiple ports, multiples hosts and ANA: */
+ id->cmic = (1 << 0) | (1 << 1) | (1 << 3);
/* no limit on data transfer sizes for now */
id->mdts = 0;
@@ -208,7 +315,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
/* first slot is read-only, only one slot supported */
id->frmw = (1 << 0) | (1 << 1);
- id->lpa = (1 << 0) | (1 << 2);
+ id->lpa = (1 << 0) | (1 << 1) | (1 << 2);
id->elpe = NVMET_ERROR_LOG_SLOTS - 1;
id->npss = 0;
@@ -222,6 +329,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
id->maxcmd = cpu_to_le16(NVMET_MAX_CMD);
id->nn = cpu_to_le32(ctrl->subsys->max_nsid);
+ id->mnan = cpu_to_le32(NVMET_MAX_NAMESPACES);
id->oncs = cpu_to_le16(NVME_CTRL_ONCS_DSM |
NVME_CTRL_ONCS_WRITE_ZEROES);
@@ -238,19 +346,24 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */
if (ctrl->ops->has_keyed_sgls)
id->sgls |= cpu_to_le32(1 << 2);
- if (ctrl->ops->sqe_inline_size)
+ if (req->port->inline_data_size)
id->sgls |= cpu_to_le32(1 << 20);
strcpy(id->subnqn, ctrl->subsys->subsysnqn);
/* Max command capsule size is sqe + single page of in-capsule data */
id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) +
- ctrl->ops->sqe_inline_size) / 16);
+ req->port->inline_data_size) / 16);
/* Max response capsule size is cqe */
id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16);
id->msdbd = ctrl->ops->msdbd;
+ id->anacap = (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4);
+ id->anatt = 10; /* random value */
+ id->anagrpmax = cpu_to_le32(NVMET_MAX_ANAGRPS);
+ id->nanagrpid = cpu_to_le32(NVMET_MAX_ANAGRPS);
+
/*
* Meh, we don't really support any power state. Fake up the same
* values that qemu does.
@@ -259,6 +372,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
id->psd[0].entry_lat = cpu_to_le32(0x10);
id->psd[0].exit_lat = cpu_to_le32(0x4);
+ id->nwpc = 1 << 0; /* write protect and no write protect */
+
status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
kfree(id);
@@ -292,8 +407,15 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
* nuse = ncap = nsze isn't always true, but we have no way to find
* that out from the underlying device.
*/
- id->ncap = id->nuse = id->nsze =
- cpu_to_le64(ns->size >> ns->blksize_shift);
+ id->ncap = id->nsze = cpu_to_le64(ns->size >> ns->blksize_shift);
+ switch (req->port->ana_state[ns->anagrpid]) {
+ case NVME_ANA_INACCESSIBLE:
+ case NVME_ANA_PERSISTENT_LOSS:
+ break;
+ default:
+ id->nuse = id->nsze;
+ break;
+ }
/*
* We just provide a single LBA format that matches what the
@@ -307,11 +429,14 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
* controllers, but also with any other user of the block device.
*/
id->nmic = (1 << 0);
+ id->anagrpid = cpu_to_le32(ns->anagrpid);
- memcpy(&id->nguid, &ns->nguid, sizeof(uuid_le));
+ memcpy(&id->nguid, &ns->nguid, sizeof(id->nguid));
id->lbaf[0].ds = ns->blksize_shift;
+ if (ns->readonly)
+ id->nsattr |= (1 << 0);
nvmet_put_namespace(ns);
done:
status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
@@ -424,6 +549,52 @@ static void nvmet_execute_abort(struct nvmet_req *req)
nvmet_req_complete(req, 0);
}
+static u16 nvmet_write_protect_flush_sync(struct nvmet_req *req)
+{
+ u16 status;
+
+ if (req->ns->file)
+ status = nvmet_file_flush(req);
+ else
+ status = nvmet_bdev_flush(req);
+
+ if (status)
+ pr_err("write protect flush failed nsid: %u\n", req->ns->nsid);
+ return status;
+}
+
+static u16 nvmet_set_feat_write_protect(struct nvmet_req *req)
+{
+ u32 write_protect = le32_to_cpu(req->cmd->common.cdw10[1]);
+ struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
+ u16 status = NVME_SC_FEATURE_NOT_CHANGEABLE;
+
+ req->ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->rw.nsid);
+ if (unlikely(!req->ns))
+ return status;
+
+ mutex_lock(&subsys->lock);
+ switch (write_protect) {
+ case NVME_NS_WRITE_PROTECT:
+ req->ns->readonly = true;
+ status = nvmet_write_protect_flush_sync(req);
+ if (status)
+ req->ns->readonly = false;
+ break;
+ case NVME_NS_NO_WRITE_PROTECT:
+ req->ns->readonly = false;
+ status = 0;
+ break;
+ default:
+ break;
+ }
+
+ if (!status)
+ nvmet_ns_changed(subsys, req->ns->nsid);
+ mutex_unlock(&subsys->lock);
+ return status;
+}
+
static void nvmet_execute_set_features(struct nvmet_req *req)
{
struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
@@ -454,6 +625,9 @@ static void nvmet_execute_set_features(struct nvmet_req *req)
case NVME_FEAT_HOST_ID:
status = NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
break;
+ case NVME_FEAT_WRITE_PROTECT:
+ status = nvmet_set_feat_write_protect(req);
+ break;
default:
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
break;
@@ -462,6 +636,26 @@ static void nvmet_execute_set_features(struct nvmet_req *req)
nvmet_req_complete(req, status);
}
+static u16 nvmet_get_feat_write_protect(struct nvmet_req *req)
+{
+ struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
+ u32 result;
+
+ req->ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->common.nsid);
+ if (!req->ns)
+ return NVME_SC_INVALID_NS | NVME_SC_DNR;
+
+ mutex_lock(&subsys->lock);
+ if (req->ns->readonly == true)
+ result = NVME_NS_WRITE_PROTECT;
+ else
+ result = NVME_NS_NO_WRITE_PROTECT;
+ nvmet_set_result(req, result);
+ mutex_unlock(&subsys->lock);
+
+ return 0;
+}
+
static void nvmet_execute_get_features(struct nvmet_req *req)
{
struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
@@ -513,6 +707,9 @@ static void nvmet_execute_get_features(struct nvmet_req *req)
status = nvmet_copy_to_sgl(req, 0, &req->sq->ctrl->hostid,
sizeof(req->sq->ctrl->hostid));
break;
+ case NVME_FEAT_WRITE_PROTECT:
+ status = nvmet_get_feat_write_protect(req);
+ break;
default:
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
break;
@@ -586,6 +783,12 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
case NVME_LOG_CHANGED_NS:
req->execute = nvmet_execute_get_log_changed_ns;
return 0;
+ case NVME_LOG_CMD_EFFECTS:
+ req->execute = nvmet_execute_get_log_cmd_effects_ns;
+ return 0;
+ case NVME_LOG_ANA:
+ req->execute = nvmet_execute_get_log_page_ana;
+ return 0;
}
break;
case nvme_admin_identify:
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index ebea1373d1b7..b37a8e3e3f80 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -218,6 +218,35 @@ static ssize_t nvmet_addr_trsvcid_store(struct config_item *item,
CONFIGFS_ATTR(nvmet_, addr_trsvcid);
+static ssize_t nvmet_param_inline_data_size_show(struct config_item *item,
+ char *page)
+{
+ struct nvmet_port *port = to_nvmet_port(item);
+
+ return snprintf(page, PAGE_SIZE, "%d\n", port->inline_data_size);
+}
+
+static ssize_t nvmet_param_inline_data_size_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_port *port = to_nvmet_port(item);
+ int ret;
+
+ if (port->enabled) {
+ pr_err("Cannot modify inline_data_size while port enabled\n");
+ pr_err("Disable the port before modifying\n");
+ return -EACCES;
+ }
+ ret = kstrtoint(page, 0, &port->inline_data_size);
+ if (ret) {
+ pr_err("Invalid value '%s' for inline_data_size\n", page);
+ return -EINVAL;
+ }
+ return count;
+}
+
+CONFIGFS_ATTR(nvmet_, param_inline_data_size);
+
static ssize_t nvmet_addr_trtype_show(struct config_item *item,
char *page)
{
@@ -387,6 +416,39 @@ out_unlock:
CONFIGFS_ATTR(nvmet_ns_, device_nguid);
+static ssize_t nvmet_ns_ana_grpid_show(struct config_item *item, char *page)
+{
+ return sprintf(page, "%u\n", to_nvmet_ns(item)->anagrpid);
+}
+
+static ssize_t nvmet_ns_ana_grpid_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_ns *ns = to_nvmet_ns(item);
+ u32 oldgrpid, newgrpid;
+ int ret;
+
+ ret = kstrtou32(page, 0, &newgrpid);
+ if (ret)
+ return ret;
+
+ if (newgrpid < 1 || newgrpid > NVMET_MAX_ANAGRPS)
+ return -EINVAL;
+
+ down_write(&nvmet_ana_sem);
+ oldgrpid = ns->anagrpid;
+ nvmet_ana_group_enabled[newgrpid]++;
+ ns->anagrpid = newgrpid;
+ nvmet_ana_group_enabled[oldgrpid]--;
+ nvmet_ana_chgcnt++;
+ up_write(&nvmet_ana_sem);
+
+ nvmet_send_ana_event(ns->subsys, NULL);
+ return count;
+}
+
+CONFIGFS_ATTR(nvmet_ns_, ana_grpid);
+
static ssize_t nvmet_ns_enable_show(struct config_item *item, char *page)
{
return sprintf(page, "%d\n", to_nvmet_ns(item)->enabled);
@@ -412,11 +474,41 @@ static ssize_t nvmet_ns_enable_store(struct config_item *item,
CONFIGFS_ATTR(nvmet_ns_, enable);
+static ssize_t nvmet_ns_buffered_io_show(struct config_item *item, char *page)
+{
+ return sprintf(page, "%d\n", to_nvmet_ns(item)->buffered_io);
+}
+
+static ssize_t nvmet_ns_buffered_io_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_ns *ns = to_nvmet_ns(item);
+ bool val;
+
+ if (strtobool(page, &val))
+ return -EINVAL;
+
+ mutex_lock(&ns->subsys->lock);
+ if (ns->enabled) {
+ pr_err("disable ns before setting buffered_io value.\n");
+ mutex_unlock(&ns->subsys->lock);
+ return -EINVAL;
+ }
+
+ ns->buffered_io = val;
+ mutex_unlock(&ns->subsys->lock);
+ return count;
+}
+
+CONFIGFS_ATTR(nvmet_ns_, buffered_io);
+
static struct configfs_attribute *nvmet_ns_attrs[] = {
&nvmet_ns_attr_device_path,
&nvmet_ns_attr_device_nguid,
&nvmet_ns_attr_device_uuid,
+ &nvmet_ns_attr_ana_grpid,
&nvmet_ns_attr_enable,
+ &nvmet_ns_attr_buffered_io,
NULL,
};
@@ -863,6 +955,134 @@ static const struct config_item_type nvmet_referrals_type = {
.ct_group_ops = &nvmet_referral_group_ops,
};
+static struct {
+ enum nvme_ana_state state;
+ const char *name;
+} nvmet_ana_state_names[] = {
+ { NVME_ANA_OPTIMIZED, "optimized" },
+ { NVME_ANA_NONOPTIMIZED, "non-optimized" },
+ { NVME_ANA_INACCESSIBLE, "inaccessible" },
+ { NVME_ANA_PERSISTENT_LOSS, "persistent-loss" },
+ { NVME_ANA_CHANGE, "change" },
+};
+
+static ssize_t nvmet_ana_group_ana_state_show(struct config_item *item,
+ char *page)
+{
+ struct nvmet_ana_group *grp = to_ana_group(item);
+ enum nvme_ana_state state = grp->port->ana_state[grp->grpid];
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) {
+ if (state != nvmet_ana_state_names[i].state)
+ continue;
+ return sprintf(page, "%s\n", nvmet_ana_state_names[i].name);
+ }
+
+ return sprintf(page, "\n");
+}
+
+static ssize_t nvmet_ana_group_ana_state_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_ana_group *grp = to_ana_group(item);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) {
+ if (sysfs_streq(page, nvmet_ana_state_names[i].name))
+ goto found;
+ }
+
+ pr_err("Invalid value '%s' for ana_state\n", page);
+ return -EINVAL;
+
+found:
+ down_write(&nvmet_ana_sem);
+ grp->port->ana_state[grp->grpid] = nvmet_ana_state_names[i].state;
+ nvmet_ana_chgcnt++;
+ up_write(&nvmet_ana_sem);
+
+ nvmet_port_send_ana_event(grp->port);
+ return count;
+}
+
+CONFIGFS_ATTR(nvmet_ana_group_, ana_state);
+
+static struct configfs_attribute *nvmet_ana_group_attrs[] = {
+ &nvmet_ana_group_attr_ana_state,
+ NULL,
+};
+
+static void nvmet_ana_group_release(struct config_item *item)
+{
+ struct nvmet_ana_group *grp = to_ana_group(item);
+
+ if (grp == &grp->port->ana_default_group)
+ return;
+
+ down_write(&nvmet_ana_sem);
+ grp->port->ana_state[grp->grpid] = NVME_ANA_INACCESSIBLE;
+ nvmet_ana_group_enabled[grp->grpid]--;
+ up_write(&nvmet_ana_sem);
+
+ nvmet_port_send_ana_event(grp->port);
+ kfree(grp);
+}
+
+static struct configfs_item_operations nvmet_ana_group_item_ops = {
+ .release = nvmet_ana_group_release,
+};
+
+static const struct config_item_type nvmet_ana_group_type = {
+ .ct_item_ops = &nvmet_ana_group_item_ops,
+ .ct_attrs = nvmet_ana_group_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct config_group *nvmet_ana_groups_make_group(
+ struct config_group *group, const char *name)
+{
+ struct nvmet_port *port = ana_groups_to_port(&group->cg_item);
+ struct nvmet_ana_group *grp;
+ u32 grpid;
+ int ret;
+
+ ret = kstrtou32(name, 0, &grpid);
+ if (ret)
+ goto out;
+
+ ret = -EINVAL;
+ if (grpid <= 1 || grpid > NVMET_MAX_ANAGRPS)
+ goto out;
+
+ ret = -ENOMEM;
+ grp = kzalloc(sizeof(*grp), GFP_KERNEL);
+ if (!grp)
+ goto out;
+ grp->port = port;
+ grp->grpid = grpid;
+
+ down_write(&nvmet_ana_sem);
+ nvmet_ana_group_enabled[grpid]++;
+ up_write(&nvmet_ana_sem);
+
+ nvmet_port_send_ana_event(grp->port);
+
+ config_group_init_type_name(&grp->group, name, &nvmet_ana_group_type);
+ return &grp->group;
+out:
+ return ERR_PTR(ret);
+}
+
+static struct configfs_group_operations nvmet_ana_groups_group_ops = {
+ .make_group = nvmet_ana_groups_make_group,
+};
+
+static const struct config_item_type nvmet_ana_groups_type = {
+ .ct_group_ops = &nvmet_ana_groups_group_ops,
+ .ct_owner = THIS_MODULE,
+};
+
/*
* Ports definitions.
*/
@@ -870,6 +1090,7 @@ static void nvmet_port_release(struct config_item *item)
{
struct nvmet_port *port = to_nvmet_port(item);
+ kfree(port->ana_state);
kfree(port);
}
@@ -879,6 +1100,7 @@ static struct configfs_attribute *nvmet_port_attrs[] = {
&nvmet_attr_addr_traddr,
&nvmet_attr_addr_trsvcid,
&nvmet_attr_addr_trtype,
+ &nvmet_attr_param_inline_data_size,
NULL,
};
@@ -897,6 +1119,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
{
struct nvmet_port *port;
u16 portid;
+ u32 i;
if (kstrtou16(name, 0, &portid))
return ERR_PTR(-EINVAL);
@@ -905,9 +1128,24 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
if (!port)
return ERR_PTR(-ENOMEM);
+ port->ana_state = kcalloc(NVMET_MAX_ANAGRPS + 1,
+ sizeof(*port->ana_state), GFP_KERNEL);
+ if (!port->ana_state) {
+ kfree(port);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ for (i = 1; i <= NVMET_MAX_ANAGRPS; i++) {
+ if (i == NVMET_DEFAULT_ANA_GRPID)
+ port->ana_state[1] = NVME_ANA_OPTIMIZED;
+ else
+ port->ana_state[i] = NVME_ANA_INACCESSIBLE;
+ }
+
INIT_LIST_HEAD(&port->entry);
INIT_LIST_HEAD(&port->subsystems);
INIT_LIST_HEAD(&port->referrals);
+ port->inline_data_size = -1; /* < 0 == let the transport choose */
port->disc_addr.portid = cpu_to_le16(portid);
config_group_init_type_name(&port->group, name, &nvmet_port_type);
@@ -920,6 +1158,18 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
"referrals", &nvmet_referrals_type);
configfs_add_default_group(&port->referrals_group, &port->group);
+ config_group_init_type_name(&port->ana_groups_group,
+ "ana_groups", &nvmet_ana_groups_type);
+ configfs_add_default_group(&port->ana_groups_group, &port->group);
+
+ port->ana_default_group.port = port;
+ port->ana_default_group.grpid = NVMET_DEFAULT_ANA_GRPID;
+ config_group_init_type_name(&port->ana_default_group.group,
+ __stringify(NVMET_DEFAULT_ANA_GRPID),
+ &nvmet_ana_group_type);
+ configfs_add_default_group(&port->ana_default_group.group,
+ &port->ana_groups_group);
+
return &port->group;
}
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 9838103f2d62..ebf3e7a6c49e 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -18,6 +18,7 @@
#include "nvmet.h"
+struct workqueue_struct *buffered_io_wq;
static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
static DEFINE_IDA(cntlid_ida);
@@ -39,6 +40,10 @@ static DEFINE_IDA(cntlid_ida);
*/
DECLARE_RWSEM(nvmet_config_sem);
+u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1];
+u64 nvmet_ana_chgcnt;
+DECLARE_RWSEM(nvmet_ana_sem);
+
static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
const char *subsysnqn);
@@ -175,7 +180,7 @@ out_unlock:
mutex_unlock(&ctrl->lock);
}
-static void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid)
+void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid)
{
struct nvmet_ctrl *ctrl;
@@ -189,6 +194,33 @@ static void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid)
}
}
+void nvmet_send_ana_event(struct nvmet_subsys *subsys,
+ struct nvmet_port *port)
+{
+ struct nvmet_ctrl *ctrl;
+
+ mutex_lock(&subsys->lock);
+ list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
+ if (port && ctrl->port != port)
+ continue;
+ if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_ANA_CHANGE))
+ continue;
+ nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
+ NVME_AER_NOTICE_ANA, NVME_LOG_ANA);
+ }
+ mutex_unlock(&subsys->lock);
+}
+
+void nvmet_port_send_ana_event(struct nvmet_port *port)
+{
+ struct nvmet_subsys_link *p;
+
+ down_read(&nvmet_config_sem);
+ list_for_each_entry(p, &port->subsystems, entry)
+ nvmet_send_ana_event(p->subsys, port);
+ up_read(&nvmet_config_sem);
+}
+
int nvmet_register_transport(const struct nvmet_fabrics_ops *ops)
{
int ret = 0;
@@ -241,6 +273,10 @@ int nvmet_enable_port(struct nvmet_port *port)
return ret;
}
+ /* If the transport didn't set inline_data_size, then disable it. */
+ if (port->inline_data_size < 0)
+ port->inline_data_size = 0;
+
port->enabled = true;
return 0;
}
@@ -332,9 +368,13 @@ static void nvmet_ns_dev_disable(struct nvmet_ns *ns)
int nvmet_ns_enable(struct nvmet_ns *ns)
{
struct nvmet_subsys *subsys = ns->subsys;
- int ret = 0;
+ int ret;
mutex_lock(&subsys->lock);
+ ret = -EMFILE;
+ if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES)
+ goto out_unlock;
+ ret = 0;
if (ns->enabled)
goto out_unlock;
@@ -369,6 +409,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
list_add_tail_rcu(&ns->dev_link, &old->dev_link);
}
+ subsys->nr_namespaces++;
nvmet_ns_changed(subsys, ns->nsid);
ns->enabled = true;
@@ -409,6 +450,7 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
percpu_ref_exit(&ns->ref);
mutex_lock(&subsys->lock);
+ subsys->nr_namespaces--;
nvmet_ns_changed(subsys, ns->nsid);
nvmet_ns_dev_disable(ns);
out_unlock:
@@ -419,6 +461,10 @@ void nvmet_ns_free(struct nvmet_ns *ns)
{
nvmet_ns_disable(ns);
+ down_write(&nvmet_ana_sem);
+ nvmet_ana_group_enabled[ns->anagrpid]--;
+ up_write(&nvmet_ana_sem);
+
kfree(ns->device_path);
kfree(ns);
}
@@ -436,7 +482,14 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
ns->nsid = nsid;
ns->subsys = subsys;
+
+ down_write(&nvmet_ana_sem);
+ ns->anagrpid = NVMET_DEFAULT_ANA_GRPID;
+ nvmet_ana_group_enabled[ns->anagrpid]++;
+ up_write(&nvmet_ana_sem);
+
uuid_gen(&ns->uuid);
+ ns->buffered_io = false;
return ns;
}
@@ -542,6 +595,35 @@ int nvmet_sq_init(struct nvmet_sq *sq)
}
EXPORT_SYMBOL_GPL(nvmet_sq_init);
+static inline u16 nvmet_check_ana_state(struct nvmet_port *port,
+ struct nvmet_ns *ns)
+{
+ enum nvme_ana_state state = port->ana_state[ns->anagrpid];
+
+ if (unlikely(state == NVME_ANA_INACCESSIBLE))
+ return NVME_SC_ANA_INACCESSIBLE;
+ if (unlikely(state == NVME_ANA_PERSISTENT_LOSS))
+ return NVME_SC_ANA_PERSISTENT_LOSS;
+ if (unlikely(state == NVME_ANA_CHANGE))
+ return NVME_SC_ANA_TRANSITION;
+ return 0;
+}
+
+static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req)
+{
+ if (unlikely(req->ns->readonly)) {
+ switch (req->cmd->common.opcode) {
+ case nvme_cmd_read:
+ case nvme_cmd_flush:
+ break;
+ default:
+ return NVME_SC_NS_WRITE_PROTECTED;
+ }
+ }
+
+ return 0;
+}
+
static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
{
struct nvme_command *cmd = req->cmd;
@@ -554,6 +636,12 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
if (unlikely(!req->ns))
return NVME_SC_INVALID_NS | NVME_SC_DNR;
+ ret = nvmet_check_ana_state(req->port, req->ns);
+ if (unlikely(ret))
+ return ret;
+ ret = nvmet_io_cmd_check_access(req);
+ if (unlikely(ret))
+ return ret;
if (req->ns->file)
return nvmet_file_parse_io_cmd(req);
@@ -870,6 +958,8 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
nvmet_init_cap(ctrl);
+ ctrl->port = req->port;
+
INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
INIT_LIST_HEAD(&ctrl->async_events);
@@ -1109,6 +1199,15 @@ static int __init nvmet_init(void)
{
int error;
+ nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1;
+
+ buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq",
+ WQ_MEM_RECLAIM, 0);
+ if (!buffered_io_wq) {
+ error = -ENOMEM;
+ goto out;
+ }
+
error = nvmet_init_discovery();
if (error)
goto out;
@@ -1129,6 +1228,7 @@ static void __exit nvmet_exit(void)
nvmet_exit_configfs();
nvmet_exit_discovery();
ida_destroy(&cntlid_ida);
+ destroy_workqueue(buffered_io_wq);
BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 08656b849bd6..eae29f493a07 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -171,7 +171,7 @@ static void nvmet_execute_identify_disc_ctrl(struct nvmet_req *req)
id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */
if (ctrl->ops->has_keyed_sgls)
id->sgls |= cpu_to_le32(1 << 2);
- if (ctrl->ops->sqe_inline_size)
+ if (req->port->inline_data_size)
id->sgls |= cpu_to_le32(1 << 20);
strcpy(id->subnqn, ctrl->subsys->subsysnqn);
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index e0b0f7df70c2..7bc9f6240432 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -124,6 +124,13 @@ static void nvmet_bdev_execute_flush(struct nvmet_req *req)
submit_bio(bio);
}
+u16 nvmet_bdev_flush(struct nvmet_req *req)
+{
+ if (blkdev_issue_flush(req->ns->bdev, GFP_KERNEL, NULL))
+ return NVME_SC_INTERNAL | NVME_SC_DNR;
+ return 0;
+}
+
static u16 nvmet_bdev_discard_range(struct nvmet_ns *ns,
struct nvme_dsm_range *range, struct bio **bio)
{
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index 8c42b3a8c420..81a9dc5290a8 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -16,6 +16,8 @@
void nvmet_file_ns_disable(struct nvmet_ns *ns)
{
if (ns->file) {
+ if (ns->buffered_io)
+ flush_workqueue(buffered_io_wq);
mempool_destroy(ns->bvec_pool);
ns->bvec_pool = NULL;
kmem_cache_destroy(ns->bvec_cache);
@@ -27,11 +29,14 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns)
int nvmet_file_ns_enable(struct nvmet_ns *ns)
{
- int ret;
+ int flags = O_RDWR | O_LARGEFILE;
struct kstat stat;
+ int ret;
+
+ if (!ns->buffered_io)
+ flags |= O_DIRECT;
- ns->file = filp_open(ns->device_path,
- O_RDWR | O_LARGEFILE | O_DIRECT, 0);
+ ns->file = filp_open(ns->device_path, flags, 0);
if (IS_ERR(ns->file)) {
pr_err("failed to open file %s: (%ld)\n",
ns->device_path, PTR_ERR(ns->file));
@@ -100,7 +105,7 @@ static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
iocb->ki_pos = pos;
iocb->ki_filp = req->ns->file;
- iocb->ki_flags = IOCB_DIRECT | ki_flags;
+ iocb->ki_flags = ki_flags | iocb_flags(req->ns->file);
ret = call_iter(iocb, &iter);
@@ -140,6 +145,12 @@ static void nvmet_file_execute_rw(struct nvmet_req *req)
return;
}
+ pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift;
+ if (unlikely(pos + req->data_len > req->ns->size)) {
+ nvmet_req_complete(req, NVME_SC_LBA_RANGE | NVME_SC_DNR);
+ return;
+ }
+
if (nr_bvec > NVMET_MAX_INLINE_BIOVEC)
req->f.bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec),
GFP_KERNEL);
@@ -155,8 +166,6 @@ static void nvmet_file_execute_rw(struct nvmet_req *req)
is_sync = true;
}
- pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift;
-
memset(&req->f.iocb, 0, sizeof(struct kiocb));
for_each_sg_page(req->sg, &sg_pg_iter, req->sg_cnt, 0) {
nvmet_file_init_bvec(&req->f.bvec[bv_cnt], &sg_pg_iter);
@@ -189,14 +198,31 @@ out:
nvmet_file_submit_bvec(req, pos, bv_cnt, total_len);
}
-static void nvmet_file_flush_work(struct work_struct *w)
+static void nvmet_file_buffered_io_work(struct work_struct *w)
{
struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
- int ret;
- ret = vfs_fsync(req->ns->file, 1);
+ nvmet_file_execute_rw(req);
+}
- nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+static void nvmet_file_execute_rw_buffered_io(struct nvmet_req *req)
+{
+ INIT_WORK(&req->f.work, nvmet_file_buffered_io_work);
+ queue_work(buffered_io_wq, &req->f.work);
+}
+
+u16 nvmet_file_flush(struct nvmet_req *req)
+{
+ if (vfs_fsync(req->ns->file, 1) < 0)
+ return NVME_SC_INTERNAL | NVME_SC_DNR;
+ return 0;
+}
+
+static void nvmet_file_flush_work(struct work_struct *w)
+{
+ struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
+
+ nvmet_req_complete(req, nvmet_file_flush(req));
}
static void nvmet_file_execute_flush(struct nvmet_req *req)
@@ -209,22 +235,30 @@ static void nvmet_file_execute_discard(struct nvmet_req *req)
{
int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
struct nvme_dsm_range range;
- loff_t offset;
- loff_t len;
- int i, ret;
+ loff_t offset, len;
+ u16 ret;
+ int i;
for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
- if (nvmet_copy_from_sgl(req, i * sizeof(range), &range,
- sizeof(range)))
+ ret = nvmet_copy_from_sgl(req, i * sizeof(range), &range,
+ sizeof(range));
+ if (ret)
break;
+
offset = le64_to_cpu(range.slba) << req->ns->blksize_shift;
len = le32_to_cpu(range.nlb) << req->ns->blksize_shift;
- ret = vfs_fallocate(req->ns->file, mode, offset, len);
- if (ret)
+ if (offset + len > req->ns->size) {
+ ret = NVME_SC_LBA_RANGE | NVME_SC_DNR;
break;
+ }
+
+ if (vfs_fallocate(req->ns->file, mode, offset, len)) {
+ ret = NVME_SC_INTERNAL | NVME_SC_DNR;
+ break;
+ }
}
- nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+ nvmet_req_complete(req, ret);
}
static void nvmet_file_dsm_work(struct work_struct *w)
@@ -263,6 +297,11 @@ static void nvmet_file_write_zeroes_work(struct work_struct *w)
len = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
req->ns->blksize_shift);
+ if (unlikely(offset + len > req->ns->size)) {
+ nvmet_req_complete(req, NVME_SC_LBA_RANGE | NVME_SC_DNR);
+ return;
+ }
+
ret = vfs_fallocate(req->ns->file, mode, offset, len);
nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
}
@@ -280,7 +319,10 @@ u16 nvmet_file_parse_io_cmd(struct nvmet_req *req)
switch (cmd->common.opcode) {
case nvme_cmd_read:
case nvme_cmd_write:
- req->execute = nvmet_file_execute_rw;
+ if (req->ns->buffered_io)
+ req->execute = nvmet_file_execute_rw_buffered_io;
+ else
+ req->execute = nvmet_file_execute_rw;
req->data_len = nvmet_rw_len(req);
return 0;
case nvme_cmd_flush:
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index ae7586b8be07..9908082b32c4 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -227,6 +227,7 @@ static int nvme_loop_init_request(struct blk_mq_tag_set *set,
{
struct nvme_loop_ctrl *ctrl = set->driver_data;
+ nvme_req(req)->ctrl = &ctrl->ctrl;
return nvme_loop_init_iod(ctrl, blk_mq_rq_to_pdu(req),
(set == &ctrl->tag_set) ? hctx_idx + 1 : 0);
}
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 480dfe10fad9..ec9af4ee03b6 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -30,12 +30,11 @@
#define NVMET_ASYNC_EVENTS 4
#define NVMET_ERROR_LOG_SLOTS 128
-
/*
* Supported optional AENs:
*/
#define NVMET_AEN_CFG_OPTIONAL \
- NVME_AEN_CFG_NS_ATTR
+ (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_ANA_CHANGE)
/*
* Plus mandatory SMART AENs (we'll never send them, but allow enabling them):
@@ -59,12 +58,15 @@ struct nvmet_ns {
struct percpu_ref ref;
struct block_device *bdev;
struct file *file;
+ bool readonly;
u32 nsid;
u32 blksize_shift;
loff_t size;
u8 nguid[16];
uuid_t uuid;
+ u32 anagrpid;
+ bool buffered_io;
bool enabled;
struct nvmet_subsys *subsys;
const char *device_path;
@@ -97,6 +99,18 @@ struct nvmet_sq {
struct completion confirm_done;
};
+struct nvmet_ana_group {
+ struct config_group group;
+ struct nvmet_port *port;
+ u32 grpid;
+};
+
+static inline struct nvmet_ana_group *to_ana_group(struct config_item *item)
+{
+ return container_of(to_config_group(item), struct nvmet_ana_group,
+ group);
+}
+
/**
* struct nvmet_port - Common structure to keep port
* information for the target.
@@ -114,8 +128,12 @@ struct nvmet_port {
struct list_head subsystems;
struct config_group referrals_group;
struct list_head referrals;
+ struct config_group ana_groups_group;
+ struct nvmet_ana_group ana_default_group;
+ enum nvme_ana_state *ana_state;
void *priv;
bool enabled;
+ int inline_data_size;
};
static inline struct nvmet_port *to_nvmet_port(struct config_item *item)
@@ -124,6 +142,13 @@ static inline struct nvmet_port *to_nvmet_port(struct config_item *item)
group);
}
+static inline struct nvmet_port *ana_groups_to_port(
+ struct config_item *item)
+{
+ return container_of(to_config_group(item), struct nvmet_port,
+ ana_groups_group);
+}
+
struct nvmet_ctrl {
struct nvmet_subsys *subsys;
struct nvmet_cq **cqs;
@@ -138,6 +163,8 @@ struct nvmet_ctrl {
u16 cntlid;
u32 kato;
+ struct nvmet_port *port;
+
u32 aen_enabled;
unsigned long aen_masked;
struct nvmet_req *async_event_cmds[NVMET_ASYNC_EVENTS];
@@ -166,6 +193,7 @@ struct nvmet_subsys {
struct kref ref;
struct list_head namespaces;
+ unsigned int nr_namespaces;
unsigned int max_nsid;
struct list_head ctrls;
@@ -225,7 +253,6 @@ struct nvmet_req;
struct nvmet_fabrics_ops {
struct module *owner;
unsigned int type;
- unsigned int sqe_inline_size;
unsigned int msdbd;
bool has_keyed_sgls : 1;
void (*queue_response)(struct nvmet_req *req);
@@ -269,6 +296,8 @@ struct nvmet_req {
const struct nvmet_fabrics_ops *ops;
};
+extern struct workqueue_struct *buffered_io_wq;
+
static inline void nvmet_set_status(struct nvmet_req *req, u16 status)
{
req->rsp->status = cpu_to_le16(status << 1);
@@ -337,6 +366,10 @@ void nvmet_ns_disable(struct nvmet_ns *ns);
struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid);
void nvmet_ns_free(struct nvmet_ns *ns);
+void nvmet_send_ana_event(struct nvmet_subsys *subsys,
+ struct nvmet_port *port);
+void nvmet_port_send_ana_event(struct nvmet_port *port);
+
int nvmet_register_transport(const struct nvmet_fabrics_ops *ops);
void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops);
@@ -357,6 +390,22 @@ u32 nvmet_get_log_page_len(struct nvme_command *cmd);
#define NVMET_QUEUE_SIZE 1024
#define NVMET_NR_QUEUES 128
#define NVMET_MAX_CMD NVMET_QUEUE_SIZE
+
+/*
+ * Nice round number that makes a list of nsids fit into a page.
+ * Should become tunable at some point in the future.
+ */
+#define NVMET_MAX_NAMESPACES 1024
+
+/*
+ * 0 is not a valid ANA group ID, so we start numbering at 1.
+ *
+ * ANA Group 1 exists without manual intervention, has namespaces assigned to it
+ * by default, and is available in an optimized state through all ports.
+ */
+#define NVMET_MAX_ANAGRPS 128
+#define NVMET_DEFAULT_ANA_GRPID 1
+
#define NVMET_KAS 10
#define NVMET_DISC_KATO 120
@@ -370,6 +419,10 @@ extern struct nvmet_subsys *nvmet_disc_subsys;
extern u64 nvmet_genctr;
extern struct rw_semaphore nvmet_config_sem;
+extern u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1];
+extern u64 nvmet_ana_chgcnt;
+extern struct rw_semaphore nvmet_ana_sem;
+
bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
const char *hostnqn);
@@ -377,6 +430,9 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns);
int nvmet_file_ns_enable(struct nvmet_ns *ns);
void nvmet_bdev_ns_disable(struct nvmet_ns *ns);
void nvmet_file_ns_disable(struct nvmet_ns *ns);
+u16 nvmet_bdev_flush(struct nvmet_req *req);
+u16 nvmet_file_flush(struct nvmet_req *req);
+void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid);
static inline u32 nvmet_rw_len(struct nvmet_req *req)
{
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 52e0c5d579a7..e7f43d1e1779 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -33,16 +33,17 @@
#include "nvmet.h"
/*
- * We allow up to a page of inline data to go with the SQE
+ * We allow at least 1 page, up to 4 SGEs, and up to 16KB of inline data
*/
-#define NVMET_RDMA_INLINE_DATA_SIZE PAGE_SIZE
+#define NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE PAGE_SIZE
+#define NVMET_RDMA_MAX_INLINE_SGE 4
+#define NVMET_RDMA_MAX_INLINE_DATA_SIZE max_t(int, SZ_16K, PAGE_SIZE)
struct nvmet_rdma_cmd {
- struct ib_sge sge[2];
+ struct ib_sge sge[NVMET_RDMA_MAX_INLINE_SGE + 1];
struct ib_cqe cqe;
struct ib_recv_wr wr;
- struct scatterlist inline_sg;
- struct page *inline_page;
+ struct scatterlist inline_sg[NVMET_RDMA_MAX_INLINE_SGE];
struct nvme_command *nvme_cmd;
struct nvmet_rdma_queue *queue;
};
@@ -116,6 +117,8 @@ struct nvmet_rdma_device {
size_t srq_size;
struct kref ref;
struct list_head entry;
+ int inline_data_size;
+ int inline_page_count;
};
static bool nvmet_rdma_use_srq;
@@ -138,6 +141,11 @@ static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue);
static const struct nvmet_fabrics_ops nvmet_rdma_ops;
+static int num_pages(int len)
+{
+ return 1 + (((len - 1) & PAGE_MASK) >> PAGE_SHIFT);
+}
+
/* XXX: really should move to a generic header sooner or later.. */
static inline u32 get_unaligned_le24(const u8 *p)
{
@@ -184,6 +192,71 @@ nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp)
spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags);
}
+static void nvmet_rdma_free_inline_pages(struct nvmet_rdma_device *ndev,
+ struct nvmet_rdma_cmd *c)
+{
+ struct scatterlist *sg;
+ struct ib_sge *sge;
+ int i;
+
+ if (!ndev->inline_data_size)
+ return;
+
+ sg = c->inline_sg;
+ sge = &c->sge[1];
+
+ for (i = 0; i < ndev->inline_page_count; i++, sg++, sge++) {
+ if (sge->length)
+ ib_dma_unmap_page(ndev->device, sge->addr,
+ sge->length, DMA_FROM_DEVICE);
+ if (sg_page(sg))
+ __free_page(sg_page(sg));
+ }
+}
+
+static int nvmet_rdma_alloc_inline_pages(struct nvmet_rdma_device *ndev,
+ struct nvmet_rdma_cmd *c)
+{
+ struct scatterlist *sg;
+ struct ib_sge *sge;
+ struct page *pg;
+ int len;
+ int i;
+
+ if (!ndev->inline_data_size)
+ return 0;
+
+ sg = c->inline_sg;
+ sg_init_table(sg, ndev->inline_page_count);
+ sge = &c->sge[1];
+ len = ndev->inline_data_size;
+
+ for (i = 0; i < ndev->inline_page_count; i++, sg++, sge++) {
+ pg = alloc_page(GFP_KERNEL);
+ if (!pg)
+ goto out_err;
+ sg_assign_page(sg, pg);
+ sge->addr = ib_dma_map_page(ndev->device,
+ pg, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+ if (ib_dma_mapping_error(ndev->device, sge->addr))
+ goto out_err;
+ sge->length = min_t(int, len, PAGE_SIZE);
+ sge->lkey = ndev->pd->local_dma_lkey;
+ len -= sge->length;
+ }
+
+ return 0;
+out_err:
+ for (; i >= 0; i--, sg--, sge--) {
+ if (sge->length)
+ ib_dma_unmap_page(ndev->device, sge->addr,
+ sge->length, DMA_FROM_DEVICE);
+ if (sg_page(sg))
+ __free_page(sg_page(sg));
+ }
+ return -ENOMEM;
+}
+
static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
struct nvmet_rdma_cmd *c, bool admin)
{
@@ -200,33 +273,17 @@ static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
c->sge[0].length = sizeof(*c->nvme_cmd);
c->sge[0].lkey = ndev->pd->local_dma_lkey;
- if (!admin) {
- c->inline_page = alloc_pages(GFP_KERNEL,
- get_order(NVMET_RDMA_INLINE_DATA_SIZE));
- if (!c->inline_page)
- goto out_unmap_cmd;
- c->sge[1].addr = ib_dma_map_page(ndev->device,
- c->inline_page, 0, NVMET_RDMA_INLINE_DATA_SIZE,
- DMA_FROM_DEVICE);
- if (ib_dma_mapping_error(ndev->device, c->sge[1].addr))
- goto out_free_inline_page;
- c->sge[1].length = NVMET_RDMA_INLINE_DATA_SIZE;
- c->sge[1].lkey = ndev->pd->local_dma_lkey;
- }
+ if (!admin && nvmet_rdma_alloc_inline_pages(ndev, c))
+ goto out_unmap_cmd;
c->cqe.done = nvmet_rdma_recv_done;
c->wr.wr_cqe = &c->cqe;
c->wr.sg_list = c->sge;
- c->wr.num_sge = admin ? 1 : 2;
+ c->wr.num_sge = admin ? 1 : ndev->inline_page_count + 1;
return 0;
-out_free_inline_page:
- if (!admin) {
- __free_pages(c->inline_page,
- get_order(NVMET_RDMA_INLINE_DATA_SIZE));
- }
out_unmap_cmd:
ib_dma_unmap_single(ndev->device, c->sge[0].addr,
sizeof(*c->nvme_cmd), DMA_FROM_DEVICE);
@@ -240,12 +297,8 @@ out:
static void nvmet_rdma_free_cmd(struct nvmet_rdma_device *ndev,
struct nvmet_rdma_cmd *c, bool admin)
{
- if (!admin) {
- ib_dma_unmap_page(ndev->device, c->sge[1].addr,
- NVMET_RDMA_INLINE_DATA_SIZE, DMA_FROM_DEVICE);
- __free_pages(c->inline_page,
- get_order(NVMET_RDMA_INLINE_DATA_SIZE));
- }
+ if (!admin)
+ nvmet_rdma_free_inline_pages(ndev, c);
ib_dma_unmap_single(ndev->device, c->sge[0].addr,
sizeof(*c->nvme_cmd), DMA_FROM_DEVICE);
kfree(c->nvme_cmd);
@@ -383,14 +436,21 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
struct nvmet_rdma_cmd *cmd)
{
struct ib_recv_wr *bad_wr;
+ int ret;
ib_dma_sync_single_for_device(ndev->device,
cmd->sge[0].addr, cmd->sge[0].length,
DMA_FROM_DEVICE);
if (ndev->srq)
- return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr);
- return ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, &bad_wr);
+ ret = ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr);
+ else
+ ret = ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, &bad_wr);
+
+ if (unlikely(ret))
+ pr_err("post_recv cmd failed\n");
+
+ return ret;
}
static void nvmet_rdma_process_wr_wait_list(struct nvmet_rdma_queue *queue)
@@ -429,7 +489,7 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
}
- if (rsp->req.sg != &rsp->cmd->inline_sg)
+ if (rsp->req.sg != rsp->cmd->inline_sg)
sgl_free(rsp->req.sg);
if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list)))
@@ -493,7 +553,7 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req)
rsp->send_sge.addr, rsp->send_sge.length,
DMA_TO_DEVICE);
- if (ib_post_send(cm_id->qp, first_wr, &bad_wr)) {
+ if (unlikely(ib_post_send(cm_id->qp, first_wr, &bad_wr))) {
pr_err("sending cmd response failed\n");
nvmet_rdma_release_rsp(rsp);
}
@@ -529,10 +589,25 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
static void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len,
u64 off)
{
- sg_init_table(&rsp->cmd->inline_sg, 1);
- sg_set_page(&rsp->cmd->inline_sg, rsp->cmd->inline_page, len, off);
- rsp->req.sg = &rsp->cmd->inline_sg;
- rsp->req.sg_cnt = 1;
+ int sg_count = num_pages(len);
+ struct scatterlist *sg;
+ int i;
+
+ sg = rsp->cmd->inline_sg;
+ for (i = 0; i < sg_count; i++, sg++) {
+ if (i < sg_count - 1)
+ sg_unmark_end(sg);
+ else
+ sg_mark_end(sg);
+ sg->offset = off;
+ sg->length = min_t(int, len, PAGE_SIZE - off);
+ len -= sg->length;
+ if (!i)
+ off = 0;
+ }
+
+ rsp->req.sg = rsp->cmd->inline_sg;
+ rsp->req.sg_cnt = sg_count;
}
static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp)
@@ -544,7 +619,7 @@ static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp)
if (!nvme_is_write(rsp->req.cmd))
return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
- if (off + len > NVMET_RDMA_INLINE_DATA_SIZE) {
+ if (off + len > rsp->queue->dev->inline_data_size) {
pr_err("invalid inline data offset!\n");
return NVME_SC_SGL_INVALID_OFFSET | NVME_SC_DNR;
}
@@ -743,7 +818,7 @@ static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
srq_size = 4095; /* XXX: tune */
srq_attr.attr.max_wr = srq_size;
- srq_attr.attr.max_sge = 2;
+ srq_attr.attr.max_sge = 1 + ndev->inline_page_count;
srq_attr.attr.srq_limit = 0;
srq_attr.srq_type = IB_SRQT_BASIC;
srq = ib_create_srq(ndev->pd, &srq_attr);
@@ -765,11 +840,16 @@ static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
ndev->srq = srq;
ndev->srq_size = srq_size;
- for (i = 0; i < srq_size; i++)
- nvmet_rdma_post_recv(ndev, &ndev->srq_cmds[i]);
+ for (i = 0; i < srq_size; i++) {
+ ret = nvmet_rdma_post_recv(ndev, &ndev->srq_cmds[i]);
+ if (ret)
+ goto out_free_cmds;
+ }
return 0;
+out_free_cmds:
+ nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false);
out_destroy_srq:
ib_destroy_srq(srq);
return ret;
@@ -793,7 +873,10 @@ static void nvmet_rdma_free_dev(struct kref *ref)
static struct nvmet_rdma_device *
nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
{
+ struct nvmet_port *port = cm_id->context;
struct nvmet_rdma_device *ndev;
+ int inline_page_count;
+ int inline_sge_count;
int ret;
mutex_lock(&device_list_mutex);
@@ -807,6 +890,18 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
if (!ndev)
goto out_err;
+ inline_page_count = num_pages(port->inline_data_size);
+ inline_sge_count = max(cm_id->device->attrs.max_sge_rd,
+ cm_id->device->attrs.max_sge) - 1;
+ if (inline_page_count > inline_sge_count) {
+ pr_warn("inline_data_size %d cannot be supported by device %s. Reducing to %lu.\n",
+ port->inline_data_size, cm_id->device->name,
+ inline_sge_count * PAGE_SIZE);
+ port->inline_data_size = inline_sge_count * PAGE_SIZE;
+ inline_page_count = inline_sge_count;
+ }
+ ndev->inline_data_size = port->inline_data_size;
+ ndev->inline_page_count = inline_page_count;
ndev->device = cm_id->device;
kref_init(&ndev->ref);
@@ -881,7 +976,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
} else {
/* +1 for drain */
qp_attr.cap.max_recv_wr = 1 + queue->recv_queue_size;
- qp_attr.cap.max_recv_sge = 2;
+ qp_attr.cap.max_recv_sge = 1 + ndev->inline_page_count;
}
ret = rdma_create_qp(queue->cm_id, ndev->pd, &qp_attr);
@@ -899,13 +994,17 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
if (!ndev->srq) {
for (i = 0; i < queue->recv_queue_size; i++) {
queue->cmds[i].queue = queue;
- nvmet_rdma_post_recv(ndev, &queue->cmds[i]);
+ ret = nvmet_rdma_post_recv(ndev, &queue->cmds[i]);
+ if (ret)
+ goto err_destroy_qp;
}
}
out:
return ret;
+err_destroy_qp:
+ rdma_destroy_qp(queue->cm_id);
err_destroy_cq:
ib_free_cq(queue->cq);
goto out;
@@ -1379,6 +1478,15 @@ static int nvmet_rdma_add_port(struct nvmet_port *port)
return -EINVAL;
}
+ if (port->inline_data_size < 0) {
+ port->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE;
+ } else if (port->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) {
+ pr_warn("inline_data_size %u is too large, reducing to %u\n",
+ port->inline_data_size,
+ NVMET_RDMA_MAX_INLINE_DATA_SIZE);
+ port->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE;
+ }
+
ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr,
port->disc_addr.trsvcid, &addr);
if (ret) {
@@ -1456,7 +1564,6 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
.owner = THIS_MODULE,
.type = NVMF_TRTYPE_RDMA,
- .sqe_inline_size = NVMET_RDMA_INLINE_DATA_SIZE,
.msdbd = 1,
.has_keyed_sgls = 1,
.add_port = nvmet_rdma_add_port,