diff options
Diffstat (limited to 'drivers')
656 files changed, 27009 insertions, 12020 deletions
diff --git a/drivers/acpi/nfit/Kconfig b/drivers/acpi/nfit/Kconfig index f7c57e33499e..52eefd732cf2 100644 --- a/drivers/acpi/nfit/Kconfig +++ b/drivers/acpi/nfit/Kconfig @@ -13,3 +13,14 @@ config ACPI_NFIT To compile this driver as a module, choose M here: the module will be called nfit. + +config NFIT_SECURITY_DEBUG + bool "Enable debug for NVDIMM security commands" + depends on ACPI_NFIT + help + Some NVDIMM devices and controllers support encryption and + other security features. The payloads for the commands that + enable those features may contain sensitive clear-text + security material. Disable debug of those command payloads + by default. If you are a kernel developer actively working + on NVDIMM security enabling say Y, otherwise say N. diff --git a/drivers/acpi/nfit/Makefile b/drivers/acpi/nfit/Makefile index a407e769f103..751081c47886 100644 --- a/drivers/acpi/nfit/Makefile +++ b/drivers/acpi/nfit/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_ACPI_NFIT) := nfit.o nfit-y := core.o +nfit-y += intel.o nfit-$(CONFIG_X86_MCE) += mce.o diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 5912d30020c7..011d3db19c80 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -24,6 +24,7 @@ #include <linux/nd.h> #include <asm/cacheflush.h> #include <acpi/nfit.h> +#include "intel.h" #include "nfit.h" #include "intel.h" @@ -380,6 +381,16 @@ static u8 nfit_dsm_revid(unsigned family, unsigned func) [NVDIMM_INTEL_QUERY_FWUPDATE] = 2, [NVDIMM_INTEL_SET_THRESHOLD] = 2, [NVDIMM_INTEL_INJECT_ERROR] = 2, + [NVDIMM_INTEL_GET_SECURITY_STATE] = 2, + [NVDIMM_INTEL_SET_PASSPHRASE] = 2, + [NVDIMM_INTEL_DISABLE_PASSPHRASE] = 2, + [NVDIMM_INTEL_UNLOCK_UNIT] = 2, + [NVDIMM_INTEL_FREEZE_LOCK] = 2, + [NVDIMM_INTEL_SECURE_ERASE] = 2, + [NVDIMM_INTEL_OVERWRITE] = 2, + [NVDIMM_INTEL_QUERY_OVERWRITE] = 2, + [NVDIMM_INTEL_SET_MASTER_PASSPHRASE] = 2, + [NVDIMM_INTEL_MASTER_SECURE_ERASE] = 2, }, }; u8 id; @@ -394,6 +405,17 @@ static u8 nfit_dsm_revid(unsigned family, unsigned func) return id; } +static bool payload_dumpable(struct nvdimm *nvdimm, unsigned int func) +{ + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + + if (nfit_mem && nfit_mem->family == NVDIMM_FAMILY_INTEL + && func >= NVDIMM_INTEL_GET_SECURITY_STATE + && func <= NVDIMM_INTEL_MASTER_SECURE_ERASE) + return IS_ENABLED(CONFIG_NFIT_SECURITY_DEBUG); + return true; +} + int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) { @@ -478,9 +500,10 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, dev_dbg(dev, "%s cmd: %d: func: %d input length: %d\n", dimm_name, cmd, func, in_buf.buffer.length); - print_hex_dump_debug("nvdimm in ", DUMP_PREFIX_OFFSET, 4, 4, - in_buf.buffer.pointer, - min_t(u32, 256, in_buf.buffer.length), true); + if (payload_dumpable(nvdimm, func)) + print_hex_dump_debug("nvdimm in ", DUMP_PREFIX_OFFSET, 4, 4, + in_buf.buffer.pointer, + min_t(u32, 256, in_buf.buffer.length), true); /* call the BIOS, prefer the named methods over _DSM if available */ if (nvdimm && cmd == ND_CMD_GET_CONFIG_SIZE @@ -1573,18 +1596,10 @@ static DEVICE_ATTR_RO(flags); static ssize_t id_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev); + struct nvdimm *nvdimm = to_nvdimm(dev); + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); - if (dcr->valid_fields & ACPI_NFIT_CONTROL_MFG_INFO_VALID) - return sprintf(buf, "%04x-%02x-%04x-%08x\n", - be16_to_cpu(dcr->vendor_id), - dcr->manufacturing_location, - be16_to_cpu(dcr->manufacturing_date), - be32_to_cpu(dcr->serial_number)); - else - return sprintf(buf, "%04x-%08x\n", - be16_to_cpu(dcr->vendor_id), - be32_to_cpu(dcr->serial_number)); + return sprintf(buf, "%s\n", nfit_mem->id); } static DEVICE_ATTR_RO(id); @@ -1780,10 +1795,23 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, const guid_t *guid; int i; int family = -1; + struct acpi_nfit_control_region *dcr = nfit_mem->dcr; /* nfit test assumes 1:1 relationship between commands and dsms */ nfit_mem->dsm_mask = acpi_desc->dimm_cmd_force_en; nfit_mem->family = NVDIMM_FAMILY_INTEL; + + if (dcr->valid_fields & ACPI_NFIT_CONTROL_MFG_INFO_VALID) + sprintf(nfit_mem->id, "%04x-%02x-%04x-%08x", + be16_to_cpu(dcr->vendor_id), + dcr->manufacturing_location, + be16_to_cpu(dcr->manufacturing_date), + be32_to_cpu(dcr->serial_number)); + else + sprintf(nfit_mem->id, "%04x-%08x", + be16_to_cpu(dcr->vendor_id), + be32_to_cpu(dcr->serial_number)); + adev = to_acpi_dev(acpi_desc); if (!adev) { /* unit test case */ @@ -1904,6 +1932,16 @@ static void shutdown_dimm_notify(void *data) mutex_unlock(&acpi_desc->init_mutex); } +static const struct nvdimm_security_ops *acpi_nfit_get_security_ops(int family) +{ + switch (family) { + case NVDIMM_FAMILY_INTEL: + return intel_security_ops; + default: + return NULL; + } +} + static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) { struct nfit_mem *nfit_mem; @@ -1970,10 +2008,11 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) flush = nfit_mem->nfit_flush ? nfit_mem->nfit_flush->flush : NULL; - nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem, + nvdimm = __nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem, acpi_nfit_dimm_attribute_groups, flags, cmd_mask, flush ? flush->hint_count : 0, - nfit_mem->flush_wpq); + nfit_mem->flush_wpq, &nfit_mem->id[0], + acpi_nfit_get_security_ops(nfit_mem->family)); if (!nvdimm) return -ENOMEM; @@ -2008,6 +2047,11 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) if (!nvdimm) continue; + rc = nvdimm_security_setup_events(nvdimm); + if (rc < 0) + dev_warn(acpi_desc->dev, + "security event setup failed: %d\n", rc); + nfit_kernfs = sysfs_get_dirent(nvdimm_kobj(nvdimm)->sd, "nfit"); if (nfit_kernfs) nfit_mem->flags_attr = sysfs_get_dirent(nfit_kernfs, @@ -3337,7 +3381,7 @@ static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc) return 0; } -static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, +static int __acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd) { struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); @@ -3359,6 +3403,23 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, return 0; } +/* prevent security commands from being issued via ioctl */ +static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, unsigned int cmd, void *buf) +{ + struct nd_cmd_pkg *call_pkg = buf; + unsigned int func; + + if (nvdimm && cmd == ND_CMD_CALL && + call_pkg->nd_family == NVDIMM_FAMILY_INTEL) { + func = call_pkg->nd_command; + if ((1 << func) & NVDIMM_INTEL_SECURITY_CMDMASK) + return -EOPNOTSUPP; + } + + return __acpi_nfit_clear_to_send(nd_desc, nvdimm, cmd); +} + int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, enum nfit_ars_state req_type) { @@ -3474,7 +3535,13 @@ static int acpi_nfit_add(struct acpi_device *adev) status = acpi_get_table(ACPI_SIG_NFIT, 0, &tbl); if (ACPI_FAILURE(status)) { - /* This is ok, we could have an nvdimm hotplugged later */ + /* The NVDIMM root device allows OS to trigger enumeration of + * NVDIMMs through NFIT at boot time and re-enumeration at + * root level via the _FIT method during runtime. + * This is ok to return 0 here, we could have an nvdimm + * hotplugged later and evaluate _FIT method which returns + * data in the format of a series of NFIT Structures. + */ dev_dbg(dev, "failed to find NFIT at startup\n"); return 0; } diff --git a/drivers/acpi/nfit/intel.c b/drivers/acpi/nfit/intel.c new file mode 100644 index 000000000000..850b2927b4e7 --- /dev/null +++ b/drivers/acpi/nfit/intel.c @@ -0,0 +1,388 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2018 Intel Corporation. All rights reserved. */ +#include <linux/libnvdimm.h> +#include <linux/ndctl.h> +#include <linux/acpi.h> +#include <asm/smp.h> +#include "intel.h" +#include "nfit.h" + +static enum nvdimm_security_state intel_security_state(struct nvdimm *nvdimm, + enum nvdimm_passphrase_type ptype) +{ + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + struct { + struct nd_cmd_pkg pkg; + struct nd_intel_get_security_state cmd; + } nd_cmd = { + .pkg = { + .nd_command = NVDIMM_INTEL_GET_SECURITY_STATE, + .nd_family = NVDIMM_FAMILY_INTEL, + .nd_size_out = + sizeof(struct nd_intel_get_security_state), + .nd_fw_size = + sizeof(struct nd_intel_get_security_state), + }, + }; + int rc; + + if (!test_bit(NVDIMM_INTEL_GET_SECURITY_STATE, &nfit_mem->dsm_mask)) + return -ENXIO; + + /* + * Short circuit the state retrieval while we are doing overwrite. + * The DSM spec states that the security state is indeterminate + * until the overwrite DSM completes. + */ + if (nvdimm_in_overwrite(nvdimm) && ptype == NVDIMM_USER) + return NVDIMM_SECURITY_OVERWRITE; + + rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL); + if (rc < 0) + return rc; + if (nd_cmd.cmd.status) + return -EIO; + + /* check and see if security is enabled and locked */ + if (ptype == NVDIMM_MASTER) { + if (nd_cmd.cmd.extended_state & ND_INTEL_SEC_ESTATE_ENABLED) + return NVDIMM_SECURITY_UNLOCKED; + else if (nd_cmd.cmd.extended_state & + ND_INTEL_SEC_ESTATE_PLIMIT) + return NVDIMM_SECURITY_FROZEN; + } else { + if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_UNSUPPORTED) + return -ENXIO; + else if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_ENABLED) { + if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_LOCKED) + return NVDIMM_SECURITY_LOCKED; + else if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_FROZEN + || nd_cmd.cmd.state & + ND_INTEL_SEC_STATE_PLIMIT) + return NVDIMM_SECURITY_FROZEN; + else + return NVDIMM_SECURITY_UNLOCKED; + } + } + + /* this should cover master security disabled as well */ + return NVDIMM_SECURITY_DISABLED; +} + +static int intel_security_freeze(struct nvdimm *nvdimm) +{ + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + struct { + struct nd_cmd_pkg pkg; + struct nd_intel_freeze_lock cmd; + } nd_cmd = { + .pkg = { + .nd_command = NVDIMM_INTEL_FREEZE_LOCK, + .nd_family = NVDIMM_FAMILY_INTEL, + .nd_size_out = ND_INTEL_STATUS_SIZE, + .nd_fw_size = ND_INTEL_STATUS_SIZE, + }, + }; + int rc; + + if (!test_bit(NVDIMM_INTEL_FREEZE_LOCK, &nfit_mem->dsm_mask)) + return -ENOTTY; + + rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL); + if (rc < 0) + return rc; + if (nd_cmd.cmd.status) + return -EIO; + return 0; +} + +static int intel_security_change_key(struct nvdimm *nvdimm, + const struct nvdimm_key_data *old_data, + const struct nvdimm_key_data *new_data, + enum nvdimm_passphrase_type ptype) +{ + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + unsigned int cmd = ptype == NVDIMM_MASTER ? + NVDIMM_INTEL_SET_MASTER_PASSPHRASE : + NVDIMM_INTEL_SET_PASSPHRASE; + struct { + struct nd_cmd_pkg pkg; + struct nd_intel_set_passphrase cmd; + } nd_cmd = { + .pkg = { + .nd_family = NVDIMM_FAMILY_INTEL, + .nd_size_in = ND_INTEL_PASSPHRASE_SIZE * 2, + .nd_size_out = ND_INTEL_STATUS_SIZE, + .nd_fw_size = ND_INTEL_STATUS_SIZE, + .nd_command = cmd, + }, + }; + int rc; + + if (!test_bit(cmd, &nfit_mem->dsm_mask)) + return -ENOTTY; + + if (old_data) + memcpy(nd_cmd.cmd.old_pass, old_data->data, + sizeof(nd_cmd.cmd.old_pass)); + memcpy(nd_cmd.cmd.new_pass, new_data->data, + sizeof(nd_cmd.cmd.new_pass)); + rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL); + if (rc < 0) + return rc; + + switch (nd_cmd.cmd.status) { + case 0: + return 0; + case ND_INTEL_STATUS_INVALID_PASS: + return -EINVAL; + case ND_INTEL_STATUS_NOT_SUPPORTED: + return -EOPNOTSUPP; + case ND_INTEL_STATUS_INVALID_STATE: + default: + return -EIO; + } +} + +static void nvdimm_invalidate_cache(void); + +static int intel_security_unlock(struct nvdimm *nvdimm, + const struct nvdimm_key_data *key_data) +{ + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + struct { + struct nd_cmd_pkg pkg; + struct nd_intel_unlock_unit cmd; + } nd_cmd = { + .pkg = { + .nd_command = NVDIMM_INTEL_UNLOCK_UNIT, + .nd_family = NVDIMM_FAMILY_INTEL, + .nd_size_in = ND_INTEL_PASSPHRASE_SIZE, + .nd_size_out = ND_INTEL_STATUS_SIZE, + .nd_fw_size = ND_INTEL_STATUS_SIZE, + }, + }; + int rc; + + if (!test_bit(NVDIMM_INTEL_UNLOCK_UNIT, &nfit_mem->dsm_mask)) + return -ENOTTY; + + memcpy(nd_cmd.cmd.passphrase, key_data->data, + sizeof(nd_cmd.cmd.passphrase)); + rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL); + if (rc < 0) + return rc; + switch (nd_cmd.cmd.status) { + case 0: + break; + case ND_INTEL_STATUS_INVALID_PASS: + return -EINVAL; + default: + return -EIO; + } + + /* DIMM unlocked, invalidate all CPU caches before we read it */ + nvdimm_invalidate_cache(); + + return 0; +} + +static int intel_security_disable(struct nvdimm *nvdimm, + const struct nvdimm_key_data *key_data) +{ + int rc; + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + struct { + struct nd_cmd_pkg pkg; + struct nd_intel_disable_passphrase cmd; + } nd_cmd = { + .pkg = { + .nd_command = NVDIMM_INTEL_DISABLE_PASSPHRASE, + .nd_family = NVDIMM_FAMILY_INTEL, + .nd_size_in = ND_INTEL_PASSPHRASE_SIZE, + .nd_size_out = ND_INTEL_STATUS_SIZE, + .nd_fw_size = ND_INTEL_STATUS_SIZE, + }, + }; + + if (!test_bit(NVDIMM_INTEL_DISABLE_PASSPHRASE, &nfit_mem->dsm_mask)) + return -ENOTTY; + + memcpy(nd_cmd.cmd.passphrase, key_data->data, + sizeof(nd_cmd.cmd.passphrase)); + rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL); + if (rc < 0) + return rc; + + switch (nd_cmd.cmd.status) { + case 0: + break; + case ND_INTEL_STATUS_INVALID_PASS: + return -EINVAL; + case ND_INTEL_STATUS_INVALID_STATE: + default: + return -ENXIO; + } + + return 0; +} + +static int intel_security_erase(struct nvdimm *nvdimm, + const struct nvdimm_key_data *key, + enum nvdimm_passphrase_type ptype) +{ + int rc; + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + unsigned int cmd = ptype == NVDIMM_MASTER ? + NVDIMM_INTEL_MASTER_SECURE_ERASE : NVDIMM_INTEL_SECURE_ERASE; + struct { + struct nd_cmd_pkg pkg; + struct nd_intel_secure_erase cmd; + } nd_cmd = { + .pkg = { + .nd_family = NVDIMM_FAMILY_INTEL, + .nd_size_in = ND_INTEL_PASSPHRASE_SIZE, + .nd_size_out = ND_INTEL_STATUS_SIZE, + .nd_fw_size = ND_INTEL_STATUS_SIZE, + .nd_command = cmd, + }, + }; + + if (!test_bit(cmd, &nfit_mem->dsm_mask)) + return -ENOTTY; + + /* flush all cache before we erase DIMM */ + nvdimm_invalidate_cache(); + memcpy(nd_cmd.cmd.passphrase, key->data, + sizeof(nd_cmd.cmd.passphrase)); + rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL); + if (rc < 0) + return rc; + + switch (nd_cmd.cmd.status) { + case 0: + break; + case ND_INTEL_STATUS_NOT_SUPPORTED: + return -EOPNOTSUPP; + case ND_INTEL_STATUS_INVALID_PASS: + return -EINVAL; + case ND_INTEL_STATUS_INVALID_STATE: + default: + return -ENXIO; + } + + /* DIMM erased, invalidate all CPU caches before we read it */ + nvdimm_invalidate_cache(); + return 0; +} + +static int intel_security_query_overwrite(struct nvdimm *nvdimm) +{ + int rc; + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + struct { + struct nd_cmd_pkg pkg; + struct nd_intel_query_overwrite cmd; + } nd_cmd = { + .pkg = { + .nd_command = NVDIMM_INTEL_QUERY_OVERWRITE, + .nd_family = NVDIMM_FAMILY_INTEL, + .nd_size_out = ND_INTEL_STATUS_SIZE, + .nd_fw_size = ND_INTEL_STATUS_SIZE, + }, + }; + + if (!test_bit(NVDIMM_INTEL_QUERY_OVERWRITE, &nfit_mem->dsm_mask)) + return -ENOTTY; + + rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL); + if (rc < 0) + return rc; + + switch (nd_cmd.cmd.status) { + case 0: + break; + case ND_INTEL_STATUS_OQUERY_INPROGRESS: + return -EBUSY; + default: + return -ENXIO; + } + + /* flush all cache before we make the nvdimms available */ + nvdimm_invalidate_cache(); + return 0; +} + +static int intel_security_overwrite(struct nvdimm *nvdimm, + const struct nvdimm_key_data *nkey) +{ + int rc; + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + struct { + struct nd_cmd_pkg pkg; + struct nd_intel_overwrite cmd; + } nd_cmd = { + .pkg = { + .nd_command = NVDIMM_INTEL_OVERWRITE, + .nd_family = NVDIMM_FAMILY_INTEL, + .nd_size_in = ND_INTEL_PASSPHRASE_SIZE, + .nd_size_out = ND_INTEL_STATUS_SIZE, + .nd_fw_size = ND_INTEL_STATUS_SIZE, + }, + }; + + if (!test_bit(NVDIMM_INTEL_OVERWRITE, &nfit_mem->dsm_mask)) + return -ENOTTY; + + /* flush all cache before we erase DIMM */ + nvdimm_invalidate_cache(); + if (nkey) + memcpy(nd_cmd.cmd.passphrase, nkey->data, + sizeof(nd_cmd.cmd.passphrase)); + rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL); + if (rc < 0) + return rc; + + switch (nd_cmd.cmd.status) { + case 0: + return 0; + case ND_INTEL_STATUS_OVERWRITE_UNSUPPORTED: + return -ENOTSUPP; + case ND_INTEL_STATUS_INVALID_PASS: + return -EINVAL; + case ND_INTEL_STATUS_INVALID_STATE: + default: + return -ENXIO; + } +} + +/* + * TODO: define a cross arch wbinvd equivalent when/if + * NVDIMM_FAMILY_INTEL command support arrives on another arch. + */ +#ifdef CONFIG_X86 +static void nvdimm_invalidate_cache(void) +{ + wbinvd_on_all_cpus(); +} +#else +static void nvdimm_invalidate_cache(void) +{ + WARN_ON_ONCE("cache invalidation required after unlock\n"); +} +#endif + +static const struct nvdimm_security_ops __intel_security_ops = { + .state = intel_security_state, + .freeze = intel_security_freeze, + .change_key = intel_security_change_key, + .disable = intel_security_disable, +#ifdef CONFIG_X86 + .unlock = intel_security_unlock, + .erase = intel_security_erase, + .overwrite = intel_security_overwrite, + .query_overwrite = intel_security_query_overwrite, +#endif +}; + +const struct nvdimm_security_ops *intel_security_ops = &__intel_security_ops; diff --git a/drivers/acpi/nfit/intel.h b/drivers/acpi/nfit/intel.h index 86746312381f..0aca682ab9d7 100644 --- a/drivers/acpi/nfit/intel.h +++ b/drivers/acpi/nfit/intel.h @@ -35,4 +35,80 @@ struct nd_intel_smart { }; } __packed; +extern const struct nvdimm_security_ops *intel_security_ops; + +#define ND_INTEL_STATUS_SIZE 4 +#define ND_INTEL_PASSPHRASE_SIZE 32 + +#define ND_INTEL_STATUS_NOT_SUPPORTED 1 +#define ND_INTEL_STATUS_RETRY 5 +#define ND_INTEL_STATUS_NOT_READY 9 +#define ND_INTEL_STATUS_INVALID_STATE 10 +#define ND_INTEL_STATUS_INVALID_PASS 11 +#define ND_INTEL_STATUS_OVERWRITE_UNSUPPORTED 0x10007 +#define ND_INTEL_STATUS_OQUERY_INPROGRESS 0x10007 +#define ND_INTEL_STATUS_OQUERY_SEQUENCE_ERR 0x20007 + +#define ND_INTEL_SEC_STATE_ENABLED 0x02 +#define ND_INTEL_SEC_STATE_LOCKED 0x04 +#define ND_INTEL_SEC_STATE_FROZEN 0x08 +#define ND_INTEL_SEC_STATE_PLIMIT 0x10 +#define ND_INTEL_SEC_STATE_UNSUPPORTED 0x20 +#define ND_INTEL_SEC_STATE_OVERWRITE 0x40 + +#define ND_INTEL_SEC_ESTATE_ENABLED 0x01 +#define ND_INTEL_SEC_ESTATE_PLIMIT 0x02 + +struct nd_intel_get_security_state { + u32 status; + u8 extended_state; + u8 reserved[3]; + u8 state; + u8 reserved1[3]; +} __packed; + +struct nd_intel_set_passphrase { + u8 old_pass[ND_INTEL_PASSPHRASE_SIZE]; + u8 new_pass[ND_INTEL_PASSPHRASE_SIZE]; + u32 status; +} __packed; + +struct nd_intel_unlock_unit { + u8 passphrase[ND_INTEL_PASSPHRASE_SIZE]; + u32 status; +} __packed; + +struct nd_intel_disable_passphrase { + u8 passphrase[ND_INTEL_PASSPHRASE_SIZE]; + u32 status; +} __packed; + +struct nd_intel_freeze_lock { + u32 status; +} __packed; + +struct nd_intel_secure_erase { + u8 passphrase[ND_INTEL_PASSPHRASE_SIZE]; + u32 status; +} __packed; + +struct nd_intel_overwrite { + u8 passphrase[ND_INTEL_PASSPHRASE_SIZE]; + u32 status; +} __packed; + +struct nd_intel_query_overwrite { + u32 status; +} __packed; + +struct nd_intel_set_master_passphrase { + u8 old_pass[ND_INTEL_PASSPHRASE_SIZE]; + u8 new_pass[ND_INTEL_PASSPHRASE_SIZE]; + u32 status; +} __packed; + +struct nd_intel_master_secure_erase { + u8 passphrase[ND_INTEL_PASSPHRASE_SIZE]; + u32 status; +} __packed; #endif diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index df0f6b8407e7..33691aecfcee 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -60,14 +60,33 @@ enum nvdimm_family_cmds { NVDIMM_INTEL_QUERY_FWUPDATE = 16, NVDIMM_INTEL_SET_THRESHOLD = 17, NVDIMM_INTEL_INJECT_ERROR = 18, + NVDIMM_INTEL_GET_SECURITY_STATE = 19, + NVDIMM_INTEL_SET_PASSPHRASE = 20, + NVDIMM_INTEL_DISABLE_PASSPHRASE = 21, + NVDIMM_INTEL_UNLOCK_UNIT = 22, + NVDIMM_INTEL_FREEZE_LOCK = 23, + NVDIMM_INTEL_SECURE_ERASE = 24, + NVDIMM_INTEL_OVERWRITE = 25, + NVDIMM_INTEL_QUERY_OVERWRITE = 26, + NVDIMM_INTEL_SET_MASTER_PASSPHRASE = 27, + NVDIMM_INTEL_MASTER_SECURE_ERASE = 28, }; +#define NVDIMM_INTEL_SECURITY_CMDMASK \ +(1 << NVDIMM_INTEL_GET_SECURITY_STATE | 1 << NVDIMM_INTEL_SET_PASSPHRASE \ +| 1 << NVDIMM_INTEL_DISABLE_PASSPHRASE | 1 << NVDIMM_INTEL_UNLOCK_UNIT \ +| 1 << NVDIMM_INTEL_FREEZE_LOCK | 1 << NVDIMM_INTEL_SECURE_ERASE \ +| 1 << NVDIMM_INTEL_OVERWRITE | 1 << NVDIMM_INTEL_QUERY_OVERWRITE \ +| 1 << NVDIMM_INTEL_SET_MASTER_PASSPHRASE \ +| 1 << NVDIMM_INTEL_MASTER_SECURE_ERASE) + #define NVDIMM_INTEL_CMDMASK \ (NVDIMM_STANDARD_CMDMASK | 1 << NVDIMM_INTEL_GET_MODES \ | 1 << NVDIMM_INTEL_GET_FWINFO | 1 << NVDIMM_INTEL_START_FWUPDATE \ | 1 << NVDIMM_INTEL_SEND_FWUPDATE | 1 << NVDIMM_INTEL_FINISH_FWUPDATE \ | 1 << NVDIMM_INTEL_QUERY_FWUPDATE | 1 << NVDIMM_INTEL_SET_THRESHOLD \ - | 1 << NVDIMM_INTEL_INJECT_ERROR | 1 << NVDIMM_INTEL_LATCH_SHUTDOWN) + | 1 << NVDIMM_INTEL_INJECT_ERROR | 1 << NVDIMM_INTEL_LATCH_SHUTDOWN \ + | NVDIMM_INTEL_SECURITY_CMDMASK) enum nfit_uuids { /* for simplicity alias the uuid index with the family id */ @@ -164,6 +183,8 @@ enum nfit_mem_flags { NFIT_MEM_DIRTY_COUNT, }; +#define NFIT_DIMM_ID_LEN 22 + /* assembled tables for a given dimm/memory-device */ struct nfit_mem { struct nvdimm *nvdimm; @@ -181,6 +202,7 @@ struct nfit_mem { struct list_head list; struct acpi_device *adev; struct acpi_nfit_desc *acpi_desc; + char id[NFIT_DIMM_ID_LEN+1]; struct resource *flush_wpq; unsigned long dsm_mask; unsigned long flags; diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index e9eda5558c1f..5efd4219f112 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1456,6 +1456,11 @@ int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr) const struct iommu_ops *iommu; u64 dma_addr = 0, size = 0; + if (attr == DEV_DMA_NOT_SUPPORTED) { + set_dma_ops(dev, &dma_dummy_ops); + return 0; + } + iort_dma_setup(dev, &dma_addr, &size); iommu = iort_iommu_configure(dev); diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 01306c018398..938ed513b070 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -919,8 +919,6 @@ static void ata_eh_set_pending(struct ata_port *ap, int fastdrain) void ata_qc_schedule_eh(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; - struct request_queue *q = qc->scsicmd->device->request_queue; - unsigned long flags; WARN_ON(!ap->ops->error_handler); @@ -932,9 +930,7 @@ void ata_qc_schedule_eh(struct ata_queued_cmd *qc) * Note that ATA_QCFLAG_FAILED is unconditionally set after * this function completes. */ - spin_lock_irqsave(q->queue_lock, flags); blk_abort_request(qc->scsicmd->request); - spin_unlock_irqrestore(q->queue_lock, flags); } /** diff --git a/drivers/ata/pata_palmld.c b/drivers/ata/pata_palmld.c index d071ab6864a8..26817fd91700 100644 --- a/drivers/ata/pata_palmld.c +++ b/drivers/ata/pata_palmld.c @@ -26,16 +26,17 @@ #include <linux/irq.h> #include <linux/platform_device.h> #include <linux/delay.h> -#include <linux/gpio.h> +#include <linux/gpio/consumer.h> #include <scsi/scsi_host.h> #include <mach/palmld.h> #define DRV_NAME "pata_palmld" -static struct gpio palmld_hdd_gpios[] = { - { GPIO_NR_PALMLD_IDE_PWEN, GPIOF_INIT_HIGH, "HDD Power" }, - { GPIO_NR_PALMLD_IDE_RESET, GPIOF_INIT_LOW, "HDD Reset" }, +struct palmld_pata { + struct ata_host *host; + struct gpio_desc *power; + struct gpio_desc *reset; }; static struct scsi_host_template palmld_sht = { @@ -50,39 +51,44 @@ static struct ata_port_operations palmld_port_ops = { static int palmld_pata_probe(struct platform_device *pdev) { - struct ata_host *host; + struct palmld_pata *lda; struct ata_port *ap; void __iomem *mem; + struct device *dev = &pdev->dev; int ret; + lda = devm_kzalloc(dev, sizeof(*lda), GFP_KERNEL); + if (!lda) + return -ENOMEM; + /* allocate host */ - host = ata_host_alloc(&pdev->dev, 1); - if (!host) { - ret = -ENOMEM; - goto err1; - } + lda->host = ata_host_alloc(dev, 1); + if (!lda->host) + return -ENOMEM; /* remap drive's physical memory address */ - mem = devm_ioremap(&pdev->dev, PALMLD_IDE_PHYS, 0x1000); - if (!mem) { - ret = -ENOMEM; - goto err1; + mem = devm_ioremap(dev, PALMLD_IDE_PHYS, 0x1000); + if (!mem) + return -ENOMEM; + + /* request and activate power and reset GPIOs */ + lda->power = devm_gpiod_get(dev, "power", GPIOD_OUT_HIGH); + if (IS_ERR(lda->power)) + return PTR_ERR(lda->power); + lda->reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH); + if (IS_ERR(lda->reset)) { + gpiod_set_value(lda->power, 0); + return PTR_ERR(lda->reset); } - /* request and activate power GPIO, IRQ GPIO */ - ret = gpio_request_array(palmld_hdd_gpios, - ARRAY_SIZE(palmld_hdd_gpios)); - if (ret) - goto err1; - - /* reset the drive */ - gpio_set_value(GPIO_NR_PALMLD_IDE_RESET, 0); + /* Assert reset to reset the drive */ + gpiod_set_value(lda->reset, 1); msleep(30); - gpio_set_value(GPIO_NR_PALMLD_IDE_RESET, 1); + gpiod_set_value(lda->reset, 0); msleep(30); /* setup the ata port */ - ap = host->ports[0]; + ap = lda->host->ports[0]; ap->ops = &palmld_port_ops; ap->pio_mask = ATA_PIO4; ap->flags |= ATA_FLAG_PIO_POLLING; @@ -96,27 +102,26 @@ static int palmld_pata_probe(struct platform_device *pdev) ata_sff_std_ports(&ap->ioaddr); /* activate host */ - ret = ata_host_activate(host, 0, NULL, IRQF_TRIGGER_RISING, - &palmld_sht); - if (ret) - goto err2; - - return ret; + ret = ata_host_activate(lda->host, 0, NULL, IRQF_TRIGGER_RISING, + &palmld_sht); + /* power down on failure */ + if (ret) { + gpiod_set_value(lda->power, 0); + return ret; + } -err2: - gpio_free_array(palmld_hdd_gpios, ARRAY_SIZE(palmld_hdd_gpios)); -err1: - return ret; + platform_set_drvdata(pdev, lda); + return 0; } -static int palmld_pata_remove(struct platform_device *dev) +static int palmld_pata_remove(struct platform_device *pdev) { - ata_platform_remove_one(dev); + struct palmld_pata *lda = platform_get_drvdata(pdev); - /* power down the HDD */ - gpio_set_value(GPIO_NR_PALMLD_IDE_PWEN, 0); + ata_platform_remove_one(pdev); - gpio_free_array(palmld_hdd_gpios, ARRAY_SIZE(palmld_hdd_gpios)); + /* power down the HDD */ + gpiod_set_value(lda->power, 0); return 0; } diff --git a/drivers/ata/pata_pxa.c b/drivers/ata/pata_pxa.c index e8b6a2e464c9..4b9b9e120188 100644 --- a/drivers/ata/pata_pxa.c +++ b/drivers/ata/pata_pxa.c @@ -25,7 +25,6 @@ #include <linux/libata.h> #include <linux/platform_device.h> #include <linux/dmaengine.h> -#include <linux/gpio.h> #include <linux/slab.h> #include <linux/completion.h> diff --git a/drivers/ata/pata_rb532_cf.c b/drivers/ata/pata_rb532_cf.c index 653b9a0bf727..66bb5bff126b 100644 --- a/drivers/ata/pata_rb532_cf.c +++ b/drivers/ata/pata_rb532_cf.c @@ -27,7 +27,7 @@ #include <linux/io.h> #include <linux/interrupt.h> #include <linux/irq.h> -#include <linux/gpio.h> +#include <linux/gpio/consumer.h> #include <linux/libata.h> #include <scsi/scsi_host.h> @@ -49,7 +49,7 @@ struct rb532_cf_info { void __iomem *iobase; - unsigned int gpio_line; + struct gpio_desc *gpio_line; unsigned int irq; }; @@ -60,7 +60,7 @@ static irqreturn_t rb532_pata_irq_handler(int irq, void *dev_instance) struct ata_host *ah = dev_instance; struct rb532_cf_info *info = ah->private_data; - if (gpio_get_value(info->gpio_line)) { + if (gpiod_get_value(info->gpio_line)) { irq_set_irq_type(info->irq, IRQ_TYPE_LEVEL_LOW); ata_sff_interrupt(info->irq, dev_instance); } else { @@ -106,10 +106,9 @@ static void rb532_pata_setup_ports(struct ata_host *ah) static int rb532_pata_driver_probe(struct platform_device *pdev) { int irq; - int gpio; + struct gpio_desc *gpiod; struct resource *res; struct ata_host *ah; - struct cf_device *pdata; struct rb532_cf_info *info; int ret; @@ -125,23 +124,12 @@ static int rb532_pata_driver_probe(struct platform_device *pdev) return -ENOENT; } - pdata = dev_get_platdata(&pdev->dev); - if (!pdata) { - dev_err(&pdev->dev, "no platform data specified\n"); - return -EINVAL; - } - - gpio = pdata->gpio_pin; - if (gpio < 0) { + gpiod = devm_gpiod_get(&pdev->dev, NULL, GPIOD_IN); + if (IS_ERR(gpiod)) { dev_err(&pdev->dev, "no GPIO found for irq%d\n", irq); - return -ENOENT; - } - - ret = gpio_request(gpio, DRV_NAME); - if (ret) { - dev_err(&pdev->dev, "GPIO request failed\n"); - return ret; + return PTR_ERR(gpiod); } + gpiod_set_consumer_name(gpiod, DRV_NAME); /* allocate host */ ah = ata_host_alloc(&pdev->dev, RB500_CF_MAXPORTS); @@ -153,7 +141,7 @@ static int rb532_pata_driver_probe(struct platform_device *pdev) return -ENOMEM; ah->private_data = info; - info->gpio_line = gpio; + info->gpio_line = gpiod; info->irq = irq; info->iobase = devm_ioremap_nocache(&pdev->dev, res->start, @@ -161,26 +149,14 @@ static int rb532_pata_driver_probe(struct platform_device *pdev) if (!info->iobase) return -ENOMEM; - ret = gpio_direction_input(gpio); - if (ret) { - dev_err(&pdev->dev, "unable to set GPIO direction, err=%d\n", - ret); - goto err_free_gpio; - } - rb532_pata_setup_ports(ah); ret = ata_host_activate(ah, irq, rb532_pata_irq_handler, IRQF_TRIGGER_LOW, &rb532_pata_sht); if (ret) - goto err_free_gpio; + return ret; return 0; - -err_free_gpio: - gpio_free(gpio); - - return ret; } static int rb532_pata_driver_remove(struct platform_device *pdev) @@ -189,7 +165,6 @@ static int rb532_pata_driver_remove(struct platform_device *pdev) struct rb532_cf_info *info = ah->private_data; ata_host_detach(ah); - gpio_free(info->gpio_line); return 0; } diff --git a/drivers/ata/sata_highbank.c b/drivers/ata/sata_highbank.c index e67815b896fc..c8fc9280d6e4 100644 --- a/drivers/ata/sata_highbank.c +++ b/drivers/ata/sata_highbank.c @@ -31,8 +31,7 @@ #include <linux/interrupt.h> #include <linux/delay.h> #include <linux/export.h> -#include <linux/gpio.h> -#include <linux/of_gpio.h> +#include <linux/gpio/consumer.h> #include "ahci.h" @@ -85,7 +84,7 @@ struct ecx_plat_data { /* number of extra clocks that the SGPIO PIC controller expects */ u32 pre_clocks; u32 post_clocks; - unsigned sgpio_gpio[SGPIO_PINS]; + struct gpio_desc *sgpio_gpiod[SGPIO_PINS]; u32 sgpio_pattern; u32 port_to_sgpio[SGPIO_PORTS]; }; @@ -131,9 +130,9 @@ static void ecx_parse_sgpio(struct ecx_plat_data *pdata, u32 port, u32 state) */ static void ecx_led_cycle_clock(struct ecx_plat_data *pdata) { - gpio_set_value(pdata->sgpio_gpio[SCLOCK], 1); + gpiod_set_value(pdata->sgpio_gpiod[SCLOCK], 1); udelay(50); - gpio_set_value(pdata->sgpio_gpio[SCLOCK], 0); + gpiod_set_value(pdata->sgpio_gpiod[SCLOCK], 0); udelay(50); } @@ -164,15 +163,15 @@ static ssize_t ecx_transmit_led_message(struct ata_port *ap, u32 state, for (i = 0; i < pdata->pre_clocks; i++) ecx_led_cycle_clock(pdata); - gpio_set_value(pdata->sgpio_gpio[SLOAD], 1); + gpiod_set_value(pdata->sgpio_gpiod[SLOAD], 1); ecx_led_cycle_clock(pdata); - gpio_set_value(pdata->sgpio_gpio[SLOAD], 0); + gpiod_set_value(pdata->sgpio_gpiod[SLOAD], 0); /* * bit-bang out the SGPIO pattern, by consuming a bit and then * clocking it out. */ for (i = 0; i < (SGPIO_SIGNALS * pdata->n_ports); i++) { - gpio_set_value(pdata->sgpio_gpio[SDATA], sgpio_out & 1); + gpiod_set_value(pdata->sgpio_gpiod[SDATA], sgpio_out & 1); sgpio_out >>= 1; ecx_led_cycle_clock(pdata); } @@ -193,21 +192,19 @@ static void highbank_set_em_messages(struct device *dev, struct device_node *np = dev->of_node; struct ecx_plat_data *pdata = hpriv->plat_data; int i; - int err; for (i = 0; i < SGPIO_PINS; i++) { - err = of_get_named_gpio(np, "calxeda,sgpio-gpio", i); - if (err < 0) - return; - - pdata->sgpio_gpio[i] = err; - err = gpio_request(pdata->sgpio_gpio[i], "CX SGPIO"); - if (err) { - pr_err("sata_highbank gpio_request %d failed: %d\n", - i, err); - return; + struct gpio_desc *gpiod; + + gpiod = devm_gpiod_get_index(dev, "calxeda,sgpio", i, + GPIOD_OUT_HIGH); + if (IS_ERR(gpiod)) { + dev_err(dev, "failed to get GPIO %d\n", i); + continue; } - gpio_direction_output(pdata->sgpio_gpio[i], 1); + gpiod_set_consumer_name(gpiod, "CX SGPIO"); + + pdata->sgpio_gpiod[i] = gpiod; } of_property_read_u32_array(np, "calxeda,led-order", pdata->port_to_sgpio, diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c index 4b1ff5bc256a..59b2317acea9 100644 --- a/drivers/ata/sata_rcar.c +++ b/drivers/ata/sata_rcar.c @@ -891,7 +891,9 @@ static int sata_rcar_probe(struct platform_device *pdev) int ret = 0; irq = platform_get_irq(pdev, 0); - if (irq <= 0) + if (irq < 0) + return irq; + if (!irq) return -EINVAL; priv = devm_kzalloc(dev, sizeof(struct sata_rcar_priv), GFP_KERNEL); diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 0fb5f140f1b0..445cbd8266ca 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -1137,8 +1137,7 @@ int platform_dma_configure(struct device *dev) ret = of_dma_configure(dev, dev->of_node, true); } else if (has_acpi_companion(dev)) { attr = acpi_get_dma_attr(to_acpi_device_node(dev->fwnode)); - if (attr != DEV_DMA_NOT_SUPPORTED) - ret = acpi_dma_configure(dev, attr); + ret = acpi_dma_configure(dev, attr); } return ret; @@ -1178,37 +1177,6 @@ int __init platform_bus_init(void) return error; } -#ifndef ARCH_HAS_DMA_GET_REQUIRED_MASK -static u64 dma_default_get_required_mask(struct device *dev) -{ - u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT); - u32 high_totalram = ((max_pfn - 1) >> (32 - PAGE_SHIFT)); - u64 mask; - - if (!high_totalram) { - /* convert to mask just covering totalram */ - low_totalram = (1 << (fls(low_totalram) - 1)); - low_totalram += low_totalram - 1; - mask = low_totalram; - } else { - high_totalram = (1 << (fls(high_totalram) - 1)); - high_totalram += high_totalram - 1; - mask = (((u64)high_totalram) << 32) + 0xffffffff; - } - return mask; -} - -u64 dma_get_required_mask(struct device *dev) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - if (ops->get_required_mask) - return ops->get_required_mask(dev); - return dma_default_get_required_mask(dev); -} -EXPORT_SYMBOL_GPL(dma_get_required_mask); -#endif - static __initdata LIST_HEAD(early_platform_driver_list); static __initdata LIST_HEAD(early_platform_device_list); diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index 7ca76ed2e71a..84d0fcebd6af 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h @@ -100,6 +100,10 @@ enum { MAX_TAINT = 1000, /* cap on aoetgt taint */ }; +struct aoe_req { + unsigned long nr_bios; +}; + struct buf { ulong nframesout; struct bio *bio; diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index ed26b7287256..e2c6aae2d636 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -387,6 +387,7 @@ aoeblk_gdalloc(void *vp) set = &d->tag_set; set->ops = &aoeblk_mq_ops; + set->cmd_size = sizeof(struct aoe_req); set->nr_hw_queues = 1; set->queue_depth = 128; set->numa_node = NUMA_NO_NODE; diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index bb2fba651bd2..3cf9bc5d8d95 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -822,17 +822,6 @@ out: spin_unlock_irqrestore(&d->lock, flags); } -static unsigned long -rqbiocnt(struct request *r) -{ - struct bio *bio; - unsigned long n = 0; - - __rq_for_each_bio(bio, r) - n++; - return n; -} - static void bufinit(struct buf *buf, struct request *rq, struct bio *bio) { @@ -847,6 +836,7 @@ nextbuf(struct aoedev *d) { struct request *rq; struct request_queue *q; + struct aoe_req *req; struct buf *buf; struct bio *bio; @@ -865,7 +855,11 @@ nextbuf(struct aoedev *d) blk_mq_start_request(rq); d->ip.rq = rq; d->ip.nxbio = rq->bio; - rq->special = (void *) rqbiocnt(rq); + + req = blk_mq_rq_to_pdu(rq); + req->nr_bios = 0; + __rq_for_each_bio(bio, rq) + req->nr_bios++; } buf = mempool_alloc(d->bufpool, GFP_ATOMIC); if (buf == NULL) { @@ -1069,16 +1063,13 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail) static void aoe_end_buf(struct aoedev *d, struct buf *buf) { - struct request *rq; - unsigned long n; + struct request *rq = buf->rq; + struct aoe_req *req = blk_mq_rq_to_pdu(rq); if (buf == d->ip.buf) d->ip.buf = NULL; - rq = buf->rq; mempool_free(buf, d->bufpool); - n = (unsigned long) rq->special; - rq->special = (void *) --n; - if (n == 0) + if (--req->nr_bios == 0) aoe_end_request(d, rq, 0); } diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index 9063f8efbd3b..5b49f1b33ebe 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -160,21 +160,22 @@ static void aoe_failip(struct aoedev *d) { struct request *rq; + struct aoe_req *req; struct bio *bio; - unsigned long n; aoe_failbuf(d, d->ip.buf); - rq = d->ip.rq; if (rq == NULL) return; + + req = blk_mq_rq_to_pdu(rq); while ((bio = d->ip.nxbio)) { bio->bi_status = BLK_STS_IOERR; d->ip.nxbio = bio->bi_next; - n = (unsigned long) rq->special; - rq->special = (void *) --n; + req->nr_bios--; } - if ((unsigned long) rq->special == 0) + + if (!req->nr_bios) aoe_end_request(d, rq, 0); } diff --git a/drivers/block/aoe/aoemain.c b/drivers/block/aoe/aoemain.c index 251482066977..1e4e2971171c 100644 --- a/drivers/block/aoe/aoemain.c +++ b/drivers/block/aoe/aoemain.c @@ -24,7 +24,7 @@ static void discover_timer(struct timer_list *t) aoecmd_cfg(0xffff, 0xff); } -static void +static void __exit aoe_exit(void) { del_timer_sync(&timer); diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index f88b4c26d422..b0dbbdfeb33e 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1471,6 +1471,15 @@ static void setup_req_params( int drive ) ReqTrack, ReqSector, (unsigned long)ReqData )); } +static void ataflop_commit_rqs(struct blk_mq_hw_ctx *hctx) +{ + spin_lock_irq(&ataflop_lock); + atari_disable_irq(IRQ_MFP_FDC); + finish_fdc(); + atari_enable_irq(IRQ_MFP_FDC); + spin_unlock_irq(&ataflop_lock); +} + static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -1947,6 +1956,7 @@ static const struct block_device_operations floppy_fops = { static const struct blk_mq_ops ataflop_mq_ops = { .queue_rq = ataflop_queue_rq, + .commit_rqs = ataflop_commit_rqs, }; static struct kobject *floppy_find(dev_t dev, int *part, void *data) @@ -1982,6 +1992,7 @@ static int __init atari_floppy_init (void) &ataflop_mq_ops, 2, BLK_MQ_F_SHOULD_MERGE); if (IS_ERR(unit[i].disk->queue)) { + put_disk(unit[i].disk); ret = PTR_ERR(unit[i].disk->queue); unit[i].disk->queue = NULL; goto err; @@ -2033,18 +2044,13 @@ static int __init atari_floppy_init (void) return 0; err: - do { + while (--i >= 0) { struct gendisk *disk = unit[i].disk; - if (disk) { - if (disk->queue) { - blk_cleanup_queue(disk->queue); - disk->queue = NULL; - } - blk_mq_free_tag_set(&unit[i].tag_set); - put_disk(unit[i].disk); - } - } while (i--); + blk_cleanup_queue(disk->queue); + blk_mq_free_tag_set(&unit[i].tag_set); + put_disk(unit[i].disk); + } unregister_blkdev(FLOPPY_MAJOR, "fd"); return ret; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index fa8204214ac0..f973a2a845c8 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2792,7 +2792,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_init_set_defaults(device); - q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, &resource->req_lock); + q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE); if (!q) goto out_no_q; device->rq_queue = q; diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index fb23578e9a41..6f2856c6d0f2 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -2231,7 +2231,6 @@ static void request_done(int uptodate) { struct request *req = current_req; struct request_queue *q; - unsigned long flags; int block; char msg[sizeof("request done ") + sizeof(int) * 3]; @@ -2254,10 +2253,7 @@ static void request_done(int uptodate) if (block > _floppy->sect) DRS->maxtrack = 1; - /* unlock chained buffers */ - spin_lock_irqsave(q->queue_lock, flags); floppy_end_request(req, 0); - spin_unlock_irqrestore(q->queue_lock, flags); } else { if (rq_data_dir(req) == WRITE) { /* record write error information */ @@ -2269,9 +2265,7 @@ static void request_done(int uptodate) DRWE->last_error_sector = blk_rq_pos(req); DRWE->last_error_generation = DRS->generation; } - spin_lock_irqsave(q->queue_lock, flags); floppy_end_request(req, BLK_STS_IOERR); - spin_unlock_irqrestore(q->queue_lock, flags); } } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index cb0cc8685076..0939f36548c9 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -77,13 +77,14 @@ #include <linux/falloc.h> #include <linux/uio.h> #include <linux/ioprio.h> +#include <linux/blk-cgroup.h> #include "loop.h" #include <linux/uaccess.h> static DEFINE_IDR(loop_index_idr); -static DEFINE_MUTEX(loop_index_mutex); +static DEFINE_MUTEX(loop_ctl_mutex); static int max_part; static int part_shift; @@ -630,18 +631,7 @@ static void loop_reread_partitions(struct loop_device *lo, { int rc; - /* - * bd_mutex has been held already in release path, so don't - * acquire it if this function is called in such case. - * - * If the reread partition isn't from release path, lo_refcnt - * must be at least one and it can only become zero when the - * current holder is released. - */ - if (!atomic_read(&lo->lo_refcnt)) - rc = __blkdev_reread_part(bdev); - else - rc = blkdev_reread_part(bdev); + rc = blkdev_reread_part(bdev); if (rc) pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n", __func__, lo->lo_number, lo->lo_file_name, rc); @@ -688,26 +678,30 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, unsigned int arg) { - struct file *file, *old_file; + struct file *file = NULL, *old_file; int error; + bool partscan; + error = mutex_lock_killable(&loop_ctl_mutex); + if (error) + return error; error = -ENXIO; if (lo->lo_state != Lo_bound) - goto out; + goto out_err; /* the loop device has to be read-only */ error = -EINVAL; if (!(lo->lo_flags & LO_FLAGS_READ_ONLY)) - goto out; + goto out_err; error = -EBADF; file = fget(arg); if (!file) - goto out; + goto out_err; error = loop_validate_file(file, bdev); if (error) - goto out_putf; + goto out_err; old_file = lo->lo_backing_file; @@ -715,7 +709,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, /* size of the new backing store needs to be the same */ if (get_loop_size(lo, file) != get_loop_size(lo, old_file)) - goto out_putf; + goto out_err; /* and ... switch */ blk_mq_freeze_queue(lo->lo_queue); @@ -726,15 +720,22 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); loop_update_dio(lo); blk_mq_unfreeze_queue(lo->lo_queue); - + partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; + mutex_unlock(&loop_ctl_mutex); + /* + * We must drop file reference outside of loop_ctl_mutex as dropping + * the file ref can take bd_mutex which creates circular locking + * dependency. + */ fput(old_file); - if (lo->lo_flags & LO_FLAGS_PARTSCAN) + if (partscan) loop_reread_partitions(lo, bdev); return 0; - out_putf: - fput(file); - out: +out_err: + mutex_unlock(&loop_ctl_mutex); + if (file) + fput(file); return error; } @@ -909,6 +910,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, int lo_flags = 0; int error; loff_t size; + bool partscan; /* This is safe, since we have a reference from open(). */ __module_get(THIS_MODULE); @@ -918,13 +920,17 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, if (!file) goto out; + error = mutex_lock_killable(&loop_ctl_mutex); + if (error) + goto out_putf; + error = -EBUSY; if (lo->lo_state != Lo_unbound) - goto out_putf; + goto out_unlock; error = loop_validate_file(file, bdev); if (error) - goto out_putf; + goto out_unlock; mapping = file->f_mapping; inode = mapping->host; @@ -936,10 +942,10 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, error = -EFBIG; size = get_loop_size(lo, file); if ((loff_t)(sector_t)size != size) - goto out_putf; + goto out_unlock; error = loop_prepare_queue(lo); if (error) - goto out_putf; + goto out_unlock; error = 0; @@ -971,18 +977,22 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, lo->lo_state = Lo_bound; if (part_shift) lo->lo_flags |= LO_FLAGS_PARTSCAN; - if (lo->lo_flags & LO_FLAGS_PARTSCAN) - loop_reread_partitions(lo, bdev); + partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; /* Grab the block_device to prevent its destruction after we - * put /dev/loopXX inode. Later in loop_clr_fd() we bdput(bdev). + * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev). */ bdgrab(bdev); + mutex_unlock(&loop_ctl_mutex); + if (partscan) + loop_reread_partitions(lo, bdev); return 0; - out_putf: +out_unlock: + mutex_unlock(&loop_ctl_mutex); +out_putf: fput(file); - out: +out: /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); return error; @@ -1025,39 +1035,31 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer, return err; } -static int loop_clr_fd(struct loop_device *lo) +static int __loop_clr_fd(struct loop_device *lo, bool release) { - struct file *filp = lo->lo_backing_file; + struct file *filp = NULL; gfp_t gfp = lo->old_gfp_mask; struct block_device *bdev = lo->lo_device; + int err = 0; + bool partscan = false; + int lo_number; - if (lo->lo_state != Lo_bound) - return -ENXIO; - - /* - * If we've explicitly asked to tear down the loop device, - * and it has an elevated reference count, set it for auto-teardown when - * the last reference goes away. This stops $!~#$@ udev from - * preventing teardown because it decided that it needs to run blkid on - * the loopback device whenever they appear. xfstests is notorious for - * failing tests because blkid via udev races with a losetup - * <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d - * command to fail with EBUSY. - */ - if (atomic_read(&lo->lo_refcnt) > 1) { - lo->lo_flags |= LO_FLAGS_AUTOCLEAR; - mutex_unlock(&lo->lo_ctl_mutex); - return 0; + mutex_lock(&loop_ctl_mutex); + if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) { + err = -ENXIO; + goto out_unlock; } - if (filp == NULL) - return -EINVAL; + filp = lo->lo_backing_file; + if (filp == NULL) { + err = -EINVAL; + goto out_unlock; + } /* freeze request queue during the transition */ blk_mq_freeze_queue(lo->lo_queue); spin_lock_irq(&lo->lo_lock); - lo->lo_state = Lo_rundown; lo->lo_backing_file = NULL; spin_unlock_irq(&lo->lo_lock); @@ -1093,21 +1095,73 @@ static int loop_clr_fd(struct loop_device *lo) module_put(THIS_MODULE); blk_mq_unfreeze_queue(lo->lo_queue); - if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev) - loop_reread_partitions(lo, bdev); + partscan = lo->lo_flags & LO_FLAGS_PARTSCAN && bdev; + lo_number = lo->lo_number; lo->lo_flags = 0; if (!part_shift) lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN; loop_unprepare_queue(lo); - mutex_unlock(&lo->lo_ctl_mutex); +out_unlock: + mutex_unlock(&loop_ctl_mutex); + if (partscan) { + /* + * bd_mutex has been held already in release path, so don't + * acquire it if this function is called in such case. + * + * If the reread partition isn't from release path, lo_refcnt + * must be at least one and it can only become zero when the + * current holder is released. + */ + if (release) + err = __blkdev_reread_part(bdev); + else + err = blkdev_reread_part(bdev); + pr_warn("%s: partition scan of loop%d failed (rc=%d)\n", + __func__, lo_number, err); + /* Device is gone, no point in returning error */ + err = 0; + } /* - * Need not hold lo_ctl_mutex to fput backing file. - * Calling fput holding lo_ctl_mutex triggers a circular + * Need not hold loop_ctl_mutex to fput backing file. + * Calling fput holding loop_ctl_mutex triggers a circular * lock dependency possibility warning as fput can take - * bd_mutex which is usually taken before lo_ctl_mutex. + * bd_mutex which is usually taken before loop_ctl_mutex. */ - fput(filp); - return 0; + if (filp) + fput(filp); + return err; +} + +static int loop_clr_fd(struct loop_device *lo) +{ + int err; + + err = mutex_lock_killable(&loop_ctl_mutex); + if (err) + return err; + if (lo->lo_state != Lo_bound) { + mutex_unlock(&loop_ctl_mutex); + return -ENXIO; + } + /* + * If we've explicitly asked to tear down the loop device, + * and it has an elevated reference count, set it for auto-teardown when + * the last reference goes away. This stops $!~#$@ udev from + * preventing teardown because it decided that it needs to run blkid on + * the loopback device whenever they appear. xfstests is notorious for + * failing tests because blkid via udev races with a losetup + * <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d + * command to fail with EBUSY. + */ + if (atomic_read(&lo->lo_refcnt) > 1) { + lo->lo_flags |= LO_FLAGS_AUTOCLEAR; + mutex_unlock(&loop_ctl_mutex); + return 0; + } + lo->lo_state = Lo_rundown; + mutex_unlock(&loop_ctl_mutex); + + return __loop_clr_fd(lo, false); } static int @@ -1116,47 +1170,58 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) int err; struct loop_func_table *xfer; kuid_t uid = current_uid(); + struct block_device *bdev; + bool partscan = false; + err = mutex_lock_killable(&loop_ctl_mutex); + if (err) + return err; if (lo->lo_encrypt_key_size && !uid_eq(lo->lo_key_owner, uid) && - !capable(CAP_SYS_ADMIN)) - return -EPERM; - if (lo->lo_state != Lo_bound) - return -ENXIO; - if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) - return -EINVAL; + !capable(CAP_SYS_ADMIN)) { + err = -EPERM; + goto out_unlock; + } + if (lo->lo_state != Lo_bound) { + err = -ENXIO; + goto out_unlock; + } + if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) { + err = -EINVAL; + goto out_unlock; + } /* I/O need to be drained during transfer transition */ blk_mq_freeze_queue(lo->lo_queue); err = loop_release_xfer(lo); if (err) - goto exit; + goto out_unfreeze; if (info->lo_encrypt_type) { unsigned int type = info->lo_encrypt_type; if (type >= MAX_LO_CRYPT) { err = -EINVAL; - goto exit; + goto out_unfreeze; } xfer = xfer_funcs[type]; if (xfer == NULL) { err = -EINVAL; - goto exit; + goto out_unfreeze; } } else xfer = NULL; err = loop_init_xfer(lo, xfer, info); if (err) - goto exit; + goto out_unfreeze; if (lo->lo_offset != info->lo_offset || lo->lo_sizelimit != info->lo_sizelimit) { if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { err = -EFBIG; - goto exit; + goto out_unfreeze; } } @@ -1188,15 +1253,20 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) /* update dio if lo_offset or transfer is changed */ __loop_update_dio(lo, lo->use_dio); - exit: +out_unfreeze: blk_mq_unfreeze_queue(lo->lo_queue); if (!err && (info->lo_flags & LO_FLAGS_PARTSCAN) && !(lo->lo_flags & LO_FLAGS_PARTSCAN)) { lo->lo_flags |= LO_FLAGS_PARTSCAN; lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; - loop_reread_partitions(lo, lo->lo_device); + bdev = lo->lo_device; + partscan = true; } +out_unlock: + mutex_unlock(&loop_ctl_mutex); + if (partscan) + loop_reread_partitions(lo, bdev); return err; } @@ -1204,12 +1274,15 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) static int loop_get_status(struct loop_device *lo, struct loop_info64 *info) { - struct file *file; + struct path path; struct kstat stat; int ret; + ret = mutex_lock_killable(&loop_ctl_mutex); + if (ret) + return ret; if (lo->lo_state != Lo_bound) { - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); return -ENXIO; } @@ -1228,17 +1301,17 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info) lo->lo_encrypt_key_size); } - /* Drop lo_ctl_mutex while we call into the filesystem. */ - file = get_file(lo->lo_backing_file); - mutex_unlock(&lo->lo_ctl_mutex); - ret = vfs_getattr(&file->f_path, &stat, STATX_INO, - AT_STATX_SYNC_AS_STAT); + /* Drop loop_ctl_mutex while we call into the filesystem. */ + path = lo->lo_backing_file->f_path; + path_get(&path); + mutex_unlock(&loop_ctl_mutex); + ret = vfs_getattr(&path, &stat, STATX_INO, AT_STATX_SYNC_AS_STAT); if (!ret) { info->lo_device = huge_encode_dev(stat.dev); info->lo_inode = stat.ino; info->lo_rdevice = huge_encode_dev(stat.rdev); } - fput(file); + path_put(&path); return ret; } @@ -1322,10 +1395,8 @@ loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) { struct loop_info64 info64; int err; - if (!arg) { - mutex_unlock(&lo->lo_ctl_mutex); + if (!arg) return -EINVAL; - } err = loop_get_status(lo, &info64); if (!err) err = loop_info64_to_old(&info64, &info); @@ -1340,10 +1411,8 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) { struct loop_info64 info64; int err; - if (!arg) { - mutex_unlock(&lo->lo_ctl_mutex); + if (!arg) return -EINVAL; - } err = loop_get_status(lo, &info64); if (!err && copy_to_user(arg, &info64, sizeof(info64))) err = -EFAULT; @@ -1393,70 +1462,73 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) return 0; } +static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd, + unsigned long arg) +{ + int err; + + err = mutex_lock_killable(&loop_ctl_mutex); + if (err) + return err; + switch (cmd) { + case LOOP_SET_CAPACITY: + err = loop_set_capacity(lo); + break; + case LOOP_SET_DIRECT_IO: + err = loop_set_dio(lo, arg); + break; + case LOOP_SET_BLOCK_SIZE: + err = loop_set_block_size(lo, arg); + break; + default: + err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; + } + mutex_unlock(&loop_ctl_mutex); + return err; +} + static int lo_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { struct loop_device *lo = bdev->bd_disk->private_data; int err; - err = mutex_lock_killable_nested(&lo->lo_ctl_mutex, 1); - if (err) - goto out_unlocked; - switch (cmd) { case LOOP_SET_FD: - err = loop_set_fd(lo, mode, bdev, arg); - break; + return loop_set_fd(lo, mode, bdev, arg); case LOOP_CHANGE_FD: - err = loop_change_fd(lo, bdev, arg); - break; + return loop_change_fd(lo, bdev, arg); case LOOP_CLR_FD: - /* loop_clr_fd would have unlocked lo_ctl_mutex on success */ - err = loop_clr_fd(lo); - if (!err) - goto out_unlocked; - break; + return loop_clr_fd(lo); case LOOP_SET_STATUS: err = -EPERM; - if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) + if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) { err = loop_set_status_old(lo, (struct loop_info __user *)arg); + } break; case LOOP_GET_STATUS: - err = loop_get_status_old(lo, (struct loop_info __user *) arg); - /* loop_get_status() unlocks lo_ctl_mutex */ - goto out_unlocked; + return loop_get_status_old(lo, (struct loop_info __user *) arg); case LOOP_SET_STATUS64: err = -EPERM; - if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) + if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) { err = loop_set_status64(lo, (struct loop_info64 __user *) arg); + } break; case LOOP_GET_STATUS64: - err = loop_get_status64(lo, (struct loop_info64 __user *) arg); - /* loop_get_status() unlocks lo_ctl_mutex */ - goto out_unlocked; + return loop_get_status64(lo, (struct loop_info64 __user *) arg); case LOOP_SET_CAPACITY: - err = -EPERM; - if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) - err = loop_set_capacity(lo); - break; case LOOP_SET_DIRECT_IO: - err = -EPERM; - if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) - err = loop_set_dio(lo, arg); - break; case LOOP_SET_BLOCK_SIZE: - err = -EPERM; - if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) - err = loop_set_block_size(lo, arg); - break; + if (!(mode & FMODE_WRITE) && !capable(CAP_SYS_ADMIN)) + return -EPERM; + /* Fall through */ default: - err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; + err = lo_simple_ioctl(lo, cmd, arg); + break; } - mutex_unlock(&lo->lo_ctl_mutex); -out_unlocked: return err; } @@ -1570,10 +1642,8 @@ loop_get_status_compat(struct loop_device *lo, struct loop_info64 info64; int err; - if (!arg) { - mutex_unlock(&lo->lo_ctl_mutex); + if (!arg) return -EINVAL; - } err = loop_get_status(lo, &info64); if (!err) err = loop_info64_to_compat(&info64, arg); @@ -1588,20 +1658,12 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, switch(cmd) { case LOOP_SET_STATUS: - err = mutex_lock_killable(&lo->lo_ctl_mutex); - if (!err) { - err = loop_set_status_compat(lo, - (const struct compat_loop_info __user *)arg); - mutex_unlock(&lo->lo_ctl_mutex); - } + err = loop_set_status_compat(lo, + (const struct compat_loop_info __user *)arg); break; case LOOP_GET_STATUS: - err = mutex_lock_killable(&lo->lo_ctl_mutex); - if (!err) { - err = loop_get_status_compat(lo, - (struct compat_loop_info __user *)arg); - /* loop_get_status() unlocks lo_ctl_mutex */ - } + err = loop_get_status_compat(lo, + (struct compat_loop_info __user *)arg); break; case LOOP_SET_CAPACITY: case LOOP_CLR_FD: @@ -1625,9 +1687,11 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, static int lo_open(struct block_device *bdev, fmode_t mode) { struct loop_device *lo; - int err = 0; + int err; - mutex_lock(&loop_index_mutex); + err = mutex_lock_killable(&loop_ctl_mutex); + if (err) + return err; lo = bdev->bd_disk->private_data; if (!lo) { err = -ENXIO; @@ -1636,26 +1700,30 @@ static int lo_open(struct block_device *bdev, fmode_t mode) atomic_inc(&lo->lo_refcnt); out: - mutex_unlock(&loop_index_mutex); + mutex_unlock(&loop_ctl_mutex); return err; } -static void __lo_release(struct loop_device *lo) +static void lo_release(struct gendisk *disk, fmode_t mode) { - int err; + struct loop_device *lo; + mutex_lock(&loop_ctl_mutex); + lo = disk->private_data; if (atomic_dec_return(&lo->lo_refcnt)) - return; + goto out_unlock; - mutex_lock(&lo->lo_ctl_mutex); if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) { + if (lo->lo_state != Lo_bound) + goto out_unlock; + lo->lo_state = Lo_rundown; + mutex_unlock(&loop_ctl_mutex); /* * In autoclear mode, stop the loop thread * and remove configuration after last close. */ - err = loop_clr_fd(lo); - if (!err) - return; + __loop_clr_fd(lo, true); + return; } else if (lo->lo_state == Lo_bound) { /* * Otherwise keep thread (if running) and config, @@ -1665,14 +1733,8 @@ static void __lo_release(struct loop_device *lo) blk_mq_unfreeze_queue(lo->lo_queue); } - mutex_unlock(&lo->lo_ctl_mutex); -} - -static void lo_release(struct gendisk *disk, fmode_t mode) -{ - mutex_lock(&loop_index_mutex); - __lo_release(disk->private_data); - mutex_unlock(&loop_index_mutex); +out_unlock: + mutex_unlock(&loop_ctl_mutex); } static const struct block_device_operations lo_fops = { @@ -1711,10 +1773,10 @@ static int unregister_transfer_cb(int id, void *ptr, void *data) struct loop_device *lo = ptr; struct loop_func_table *xfer = data; - mutex_lock(&lo->lo_ctl_mutex); + mutex_lock(&loop_ctl_mutex); if (lo->lo_encryption == xfer) loop_release_xfer(lo); - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); return 0; } @@ -1759,8 +1821,8 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx, /* always use the first bio's css */ #ifdef CONFIG_BLK_CGROUP - if (cmd->use_aio && rq->bio && rq->bio->bi_css) { - cmd->css = rq->bio->bi_css; + if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) { + cmd->css = &bio_blkcg(rq->bio)->css; css_get(cmd->css); } else #endif @@ -1853,7 +1915,7 @@ static int loop_add(struct loop_device **l, int i) goto out_free_idr; lo->lo_queue = blk_mq_init_queue(&lo->tag_set); - if (IS_ERR_OR_NULL(lo->lo_queue)) { + if (IS_ERR(lo->lo_queue)) { err = PTR_ERR(lo->lo_queue); goto out_cleanup_tags; } @@ -1895,7 +1957,6 @@ static int loop_add(struct loop_device **l, int i) if (!part_shift) disk->flags |= GENHD_FL_NO_PART_SCAN; disk->flags |= GENHD_FL_EXT_DEVT; - mutex_init(&lo->lo_ctl_mutex); atomic_set(&lo->lo_refcnt, 0); lo->lo_number = i; spin_lock_init(&lo->lo_lock); @@ -1974,7 +2035,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data) struct kobject *kobj; int err; - mutex_lock(&loop_index_mutex); + mutex_lock(&loop_ctl_mutex); err = loop_lookup(&lo, MINOR(dev) >> part_shift); if (err < 0) err = loop_add(&lo, MINOR(dev) >> part_shift); @@ -1982,7 +2043,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data) kobj = NULL; else kobj = get_disk_and_module(lo->lo_disk); - mutex_unlock(&loop_index_mutex); + mutex_unlock(&loop_ctl_mutex); *part = 0; return kobj; @@ -1992,9 +2053,13 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, unsigned long parm) { struct loop_device *lo; - int ret = -ENOSYS; + int ret; - mutex_lock(&loop_index_mutex); + ret = mutex_lock_killable(&loop_ctl_mutex); + if (ret) + return ret; + + ret = -ENOSYS; switch (cmd) { case LOOP_CTL_ADD: ret = loop_lookup(&lo, parm); @@ -2008,21 +2073,15 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, ret = loop_lookup(&lo, parm); if (ret < 0) break; - ret = mutex_lock_killable(&lo->lo_ctl_mutex); - if (ret) - break; if (lo->lo_state != Lo_unbound) { ret = -EBUSY; - mutex_unlock(&lo->lo_ctl_mutex); break; } if (atomic_read(&lo->lo_refcnt) > 0) { ret = -EBUSY; - mutex_unlock(&lo->lo_ctl_mutex); break; } lo->lo_disk->private_data = NULL; - mutex_unlock(&lo->lo_ctl_mutex); idr_remove(&loop_index_idr, lo->lo_number); loop_remove(lo); break; @@ -2032,7 +2091,7 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, break; ret = loop_add(&lo, -1); } - mutex_unlock(&loop_index_mutex); + mutex_unlock(&loop_ctl_mutex); return ret; } @@ -2116,10 +2175,10 @@ static int __init loop_init(void) THIS_MODULE, loop_probe, NULL, NULL); /* pre-create number of devices given by config or max_loop */ - mutex_lock(&loop_index_mutex); + mutex_lock(&loop_ctl_mutex); for (i = 0; i < nr; i++) loop_add(&lo, i); - mutex_unlock(&loop_index_mutex); + mutex_unlock(&loop_ctl_mutex); printk(KERN_INFO "loop: module loaded\n"); return 0; diff --git a/drivers/block/loop.h b/drivers/block/loop.h index 4d42c7af7de7..af75a5ee4094 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -54,7 +54,6 @@ struct loop_device { spinlock_t lo_lock; int lo_state; - struct mutex lo_ctl_mutex; struct kthread_worker worker; struct task_struct *worker_task; bool use_dio; diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index a7daa8acbab3..88e8440e75c3 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -168,41 +168,6 @@ static bool mtip_check_surprise_removal(struct pci_dev *pdev) return false; /* device present */ } -/* we have to use runtime tag to setup command header */ -static void mtip_init_cmd_header(struct request *rq) -{ - struct driver_data *dd = rq->q->queuedata; - struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); - - /* Point the command headers at the command tables. */ - cmd->command_header = dd->port->command_list + - (sizeof(struct mtip_cmd_hdr) * rq->tag); - cmd->command_header_dma = dd->port->command_list_dma + - (sizeof(struct mtip_cmd_hdr) * rq->tag); - - if (test_bit(MTIP_PF_HOST_CAP_64, &dd->port->flags)) - cmd->command_header->ctbau = __force_bit2int cpu_to_le32((cmd->command_dma >> 16) >> 16); - - cmd->command_header->ctba = __force_bit2int cpu_to_le32(cmd->command_dma & 0xFFFFFFFF); -} - -static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd) -{ - struct request *rq; - - if (mtip_check_surprise_removal(dd->pdev)) - return NULL; - - rq = blk_mq_alloc_request(dd->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_RESERVED); - if (IS_ERR(rq)) - return NULL; - - /* Internal cmd isn't submitted via .queue_rq */ - mtip_init_cmd_header(rq); - - return blk_mq_rq_to_pdu(rq); -} - static struct mtip_cmd *mtip_cmd_from_tag(struct driver_data *dd, unsigned int tag) { @@ -1023,13 +988,14 @@ static int mtip_exec_internal_command(struct mtip_port *port, return -EFAULT; } - int_cmd = mtip_get_int_command(dd); - if (!int_cmd) { + if (mtip_check_surprise_removal(dd->pdev)) + return -EFAULT; + + rq = blk_mq_alloc_request(dd->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_RESERVED); + if (IS_ERR(rq)) { dbg_printk(MTIP_DRV_NAME "Unable to allocate tag for PIO cmd\n"); return -EFAULT; } - rq = blk_mq_rq_from_pdu(int_cmd); - rq->special = &icmd; set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); @@ -1050,6 +1016,8 @@ static int mtip_exec_internal_command(struct mtip_port *port, } /* Copy the command to the command table */ + int_cmd = blk_mq_rq_to_pdu(rq); + int_cmd->icmd = &icmd; memcpy(int_cmd->command, fis, fis_len*4); rq->timeout = timeout; @@ -1423,23 +1391,19 @@ static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id, * @dd pointer to driver_data structure * @lba starting lba * @len # of 512b sectors to trim - * - * return value - * -ENOMEM Out of dma memory - * -EINVAL Invalid parameters passed in, trim not supported - * -EIO Error submitting trim request to hw */ -static int mtip_send_trim(struct driver_data *dd, unsigned int lba, - unsigned int len) +static blk_status_t mtip_send_trim(struct driver_data *dd, unsigned int lba, + unsigned int len) { - int i, rv = 0; u64 tlba, tlen, sect_left; struct mtip_trim_entry *buf; dma_addr_t dma_addr; struct host_to_dev_fis fis; + blk_status_t ret = BLK_STS_OK; + int i; if (!len || dd->trim_supp == false) - return -EINVAL; + return BLK_STS_IOERR; /* Trim request too big */ WARN_ON(len > (MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES)); @@ -1454,7 +1418,7 @@ static int mtip_send_trim(struct driver_data *dd, unsigned int lba, buf = dmam_alloc_coherent(&dd->pdev->dev, ATA_SECT_SIZE, &dma_addr, GFP_KERNEL); if (!buf) - return -ENOMEM; + return BLK_STS_RESOURCE; memset(buf, 0, ATA_SECT_SIZE); for (i = 0, sect_left = len, tlba = lba; @@ -1463,8 +1427,8 @@ static int mtip_send_trim(struct driver_data *dd, unsigned int lba, tlen = (sect_left >= MTIP_MAX_TRIM_ENTRY_LEN ? MTIP_MAX_TRIM_ENTRY_LEN : sect_left); - buf[i].lba = __force_bit2int cpu_to_le32(tlba); - buf[i].range = __force_bit2int cpu_to_le16(tlen); + buf[i].lba = cpu_to_le32(tlba); + buf[i].range = cpu_to_le16(tlen); tlba += tlen; sect_left -= tlen; } @@ -1486,10 +1450,10 @@ static int mtip_send_trim(struct driver_data *dd, unsigned int lba, ATA_SECT_SIZE, 0, MTIP_TRIM_TIMEOUT_MS) < 0) - rv = -EIO; + ret = BLK_STS_IOERR; dmam_free_coherent(&dd->pdev->dev, ATA_SECT_SIZE, buf, dma_addr); - return rv; + return ret; } /* @@ -1585,23 +1549,20 @@ static inline void fill_command_sg(struct driver_data *dd, int n; unsigned int dma_len; struct mtip_cmd_sg *command_sg; - struct scatterlist *sg = command->sg; + struct scatterlist *sg; command_sg = command->command + AHCI_CMD_TBL_HDR_SZ; - for (n = 0; n < nents; n++) { + for_each_sg(command->sg, sg, nents, n) { dma_len = sg_dma_len(sg); if (dma_len > 0x400000) dev_err(&dd->pdev->dev, "DMA segment length truncated\n"); - command_sg->info = __force_bit2int - cpu_to_le32((dma_len-1) & 0x3FFFFF); - command_sg->dba = __force_bit2int - cpu_to_le32(sg_dma_address(sg)); - command_sg->dba_upper = __force_bit2int + command_sg->info = cpu_to_le32((dma_len-1) & 0x3FFFFF); + command_sg->dba = cpu_to_le32(sg_dma_address(sg)); + command_sg->dba_upper = cpu_to_le32((sg_dma_address(sg) >> 16) >> 16); command_sg++; - sg++; } } @@ -2171,7 +2132,6 @@ static int mtip_hw_ioctl(struct driver_data *dd, unsigned int cmd, * @dd Pointer to the driver data structure. * @start First sector to read. * @nsect Number of sectors to read. - * @nents Number of entries in scatter list for the read command. * @tag The tag of this read command. * @callback Pointer to the function that should be called * when the read completes. @@ -2183,16 +2143,20 @@ static int mtip_hw_ioctl(struct driver_data *dd, unsigned int cmd, * None */ static void mtip_hw_submit_io(struct driver_data *dd, struct request *rq, - struct mtip_cmd *command, int nents, + struct mtip_cmd *command, struct blk_mq_hw_ctx *hctx) { + struct mtip_cmd_hdr *hdr = + dd->port->command_list + sizeof(struct mtip_cmd_hdr) * rq->tag; struct host_to_dev_fis *fis; struct mtip_port *port = dd->port; int dma_dir = rq_data_dir(rq) == READ ? DMA_FROM_DEVICE : DMA_TO_DEVICE; u64 start = blk_rq_pos(rq); unsigned int nsect = blk_rq_sectors(rq); + unsigned int nents; /* Map the scatter list for DMA access */ + nents = blk_rq_map_sg(hctx->queue, rq, command->sg); nents = dma_map_sg(&dd->pdev->dev, command->sg, nents, dma_dir); prefetch(&port->flags); @@ -2233,10 +2197,11 @@ static void mtip_hw_submit_io(struct driver_data *dd, struct request *rq, fis->device |= 1 << 7; /* Populate the command header */ - command->command_header->opts = - __force_bit2int cpu_to_le32( - (nents << 16) | 5 | AHCI_CMD_PREFETCH); - command->command_header->byte_count = 0; + hdr->ctba = cpu_to_le32(command->command_dma & 0xFFFFFFFF); + if (test_bit(MTIP_PF_HOST_CAP_64, &dd->port->flags)) + hdr->ctbau = cpu_to_le32((command->command_dma >> 16) >> 16); + hdr->opts = cpu_to_le32((nents << 16) | 5 | AHCI_CMD_PREFETCH); + hdr->byte_count = 0; command->direction = dma_dir; @@ -2715,12 +2680,12 @@ static void mtip_softirq_done_fn(struct request *rq) cmd->direction); if (unlikely(cmd->unaligned)) - up(&dd->port->cmd_slot_unal); + atomic_inc(&dd->port->cmd_slot_unal); blk_mq_end_request(rq, cmd->status); } -static void mtip_abort_cmd(struct request *req, void *data, bool reserved) +static bool mtip_abort_cmd(struct request *req, void *data, bool reserved) { struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req); struct driver_data *dd = data; @@ -2730,14 +2695,16 @@ static void mtip_abort_cmd(struct request *req, void *data, bool reserved) clear_bit(req->tag, dd->port->cmds_to_issue); cmd->status = BLK_STS_IOERR; mtip_softirq_done_fn(req); + return true; } -static void mtip_queue_cmd(struct request *req, void *data, bool reserved) +static bool mtip_queue_cmd(struct request *req, void *data, bool reserved) { struct driver_data *dd = data; set_bit(req->tag, dd->port->cmds_to_issue); blk_abort_request(req); + return true; } /* @@ -2803,10 +2770,7 @@ restart_eh: blk_mq_quiesce_queue(dd->queue); - spin_lock(dd->queue->queue_lock); - blk_mq_tagset_busy_iter(&dd->tags, - mtip_queue_cmd, dd); - spin_unlock(dd->queue->queue_lock); + blk_mq_tagset_busy_iter(&dd->tags, mtip_queue_cmd, dd); set_bit(MTIP_PF_ISSUE_CMDS_BIT, &dd->port->flags); @@ -3026,7 +2990,7 @@ static int mtip_hw_init(struct driver_data *dd) else dd->unal_qdepth = 0; - sema_init(&dd->port->cmd_slot_unal, dd->unal_qdepth); + atomic_set(&dd->port->cmd_slot_unal, dd->unal_qdepth); /* Spinlock to prevent concurrent issue */ for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++) @@ -3531,58 +3495,24 @@ static inline bool is_se_active(struct driver_data *dd) return false; } -/* - * Block layer make request function. - * - * This function is called by the kernel to process a BIO for - * the P320 device. - * - * @queue Pointer to the request queue. Unused other than to obtain - * the driver data structure. - * @rq Pointer to the request. - * - */ -static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq) +static inline bool is_stopped(struct driver_data *dd, struct request *rq) { - struct driver_data *dd = hctx->queue->queuedata; - struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); - unsigned int nents; - - if (is_se_active(dd)) - return -ENODATA; - - if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) { - if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, - &dd->dd_flag))) { - return -ENXIO; - } - if (unlikely(test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))) { - return -ENODATA; - } - if (unlikely(test_bit(MTIP_DDF_WRITE_PROTECT_BIT, - &dd->dd_flag) && - rq_data_dir(rq))) { - return -ENODATA; - } - if (unlikely(test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag) || - test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag))) - return -ENODATA; - } - - if (req_op(rq) == REQ_OP_DISCARD) { - int err; - - err = mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq)); - blk_mq_end_request(rq, err ? BLK_STS_IOERR : BLK_STS_OK); - return 0; - } + if (likely(!(dd->dd_flag & MTIP_DDF_STOP_IO))) + return false; - /* Create the scatter list for this request. */ - nents = blk_rq_map_sg(hctx->queue, rq, cmd->sg); + if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) + return true; + if (test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag)) + return true; + if (test_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag) && + rq_data_dir(rq)) + return true; + if (test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag)) + return true; + if (test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag)) + return true; - /* Issue the read/write. */ - mtip_hw_submit_io(dd, rq, cmd, nents, hctx); - return 0; + return false; } static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx, @@ -3603,7 +3533,7 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx, cmd->unaligned = 1; } - if (cmd->unaligned && down_trylock(&dd->port->cmd_slot_unal)) + if (cmd->unaligned && atomic_dec_if_positive(&dd->port->cmd_slot_unal) >= 0) return true; return false; @@ -3613,32 +3543,33 @@ static blk_status_t mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx, struct request *rq) { struct driver_data *dd = hctx->queue->queuedata; - struct mtip_int_cmd *icmd = rq->special; struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); + struct mtip_int_cmd *icmd = cmd->icmd; + struct mtip_cmd_hdr *hdr = + dd->port->command_list + sizeof(struct mtip_cmd_hdr) * rq->tag; struct mtip_cmd_sg *command_sg; if (mtip_commands_active(dd->port)) - return BLK_STS_RESOURCE; + return BLK_STS_DEV_RESOURCE; + hdr->ctba = cpu_to_le32(cmd->command_dma & 0xFFFFFFFF); + if (test_bit(MTIP_PF_HOST_CAP_64, &dd->port->flags)) + hdr->ctbau = cpu_to_le32((cmd->command_dma >> 16) >> 16); /* Populate the SG list */ - cmd->command_header->opts = - __force_bit2int cpu_to_le32(icmd->opts | icmd->fis_len); + hdr->opts = cpu_to_le32(icmd->opts | icmd->fis_len); if (icmd->buf_len) { command_sg = cmd->command + AHCI_CMD_TBL_HDR_SZ; - command_sg->info = - __force_bit2int cpu_to_le32((icmd->buf_len-1) & 0x3FFFFF); - command_sg->dba = - __force_bit2int cpu_to_le32(icmd->buffer & 0xFFFFFFFF); + command_sg->info = cpu_to_le32((icmd->buf_len-1) & 0x3FFFFF); + command_sg->dba = cpu_to_le32(icmd->buffer & 0xFFFFFFFF); command_sg->dba_upper = - __force_bit2int cpu_to_le32((icmd->buffer >> 16) >> 16); + cpu_to_le32((icmd->buffer >> 16) >> 16); - cmd->command_header->opts |= - __force_bit2int cpu_to_le32((1 << 16)); + hdr->opts |= cpu_to_le32((1 << 16)); } /* Populate the command header */ - cmd->command_header->byte_count = 0; + hdr->byte_count = 0; blk_mq_start_request(rq); mtip_issue_non_ncq_command(dd->port, rq->tag); @@ -3648,23 +3579,25 @@ static blk_status_t mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx, static blk_status_t mtip_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { + struct driver_data *dd = hctx->queue->queuedata; struct request *rq = bd->rq; - int ret; - - mtip_init_cmd_header(rq); + struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); if (blk_rq_is_passthrough(rq)) return mtip_issue_reserved_cmd(hctx, rq); if (unlikely(mtip_check_unal_depth(hctx, rq))) - return BLK_STS_RESOURCE; + return BLK_STS_DEV_RESOURCE; + + if (is_se_active(dd) || is_stopped(dd, rq)) + return BLK_STS_IOERR; blk_mq_start_request(rq); - ret = mtip_submit_request(hctx, rq); - if (likely(!ret)) - return BLK_STS_OK; - return BLK_STS_IOERR; + if (req_op(rq) == REQ_OP_DISCARD) + return mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq)); + mtip_hw_submit_io(dd, rq, cmd, hctx); + return BLK_STS_OK; } static void mtip_free_cmd(struct blk_mq_tag_set *set, struct request *rq, @@ -3920,12 +3853,13 @@ protocol_init_error: return rv; } -static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv) +static bool mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv) { struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); cmd->status = BLK_STS_IOERR; blk_mq_complete_request(rq); + return true; } /* diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index e20e55dab443..abce25f27f57 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -126,8 +126,6 @@ #define MTIP_DFS_MAX_BUF_SIZE 1024 -#define __force_bit2int (unsigned int __force) - enum { /* below are bit numbers in 'flags' defined in mtip_port */ MTIP_PF_IC_ACTIVE_BIT = 0, /* pio/ioctl */ @@ -174,10 +172,10 @@ enum { struct smart_attr { u8 attr_id; - u16 flags; + __le16 flags; u8 cur; u8 worst; - u32 data; + __le32 data; u8 res[3]; } __packed; @@ -200,9 +198,9 @@ struct mtip_work { #define MTIP_MAX_TRIM_ENTRY_LEN 0xfff8 struct mtip_trim_entry { - u32 lba; /* starting lba of region */ - u16 rsvd; /* unused */ - u16 range; /* # of 512b blocks to trim */ + __le32 lba; /* starting lba of region */ + __le16 rsvd; /* unused */ + __le16 range; /* # of 512b blocks to trim */ } __packed; struct mtip_trim { @@ -278,24 +276,24 @@ struct mtip_cmd_hdr { * - Bit 5 Unused in this implementation. * - Bits 4:0 Length of the command FIS in DWords (DWord = 4 bytes). */ - unsigned int opts; + __le32 opts; /* This field is unsed when using NCQ. */ union { - unsigned int byte_count; - unsigned int status; + __le32 byte_count; + __le32 status; }; /* * Lower 32 bits of the command table address associated with this * header. The command table addresses must be 128 byte aligned. */ - unsigned int ctba; + __le32 ctba; /* * If 64 bit addressing is used this field is the upper 32 bits * of the command table address associated with this command. */ - unsigned int ctbau; + __le32 ctbau; /* Reserved and unused. */ - unsigned int res[4]; + u32 res[4]; }; /* Command scatter gather structure (PRD). */ @@ -305,31 +303,28 @@ struct mtip_cmd_sg { * address must be 8 byte aligned signified by bits 2:0 being * set to 0. */ - unsigned int dba; + __le32 dba; /* * When 64 bit addressing is used this field is the upper * 32 bits of the data buffer address. */ - unsigned int dba_upper; + __le32 dba_upper; /* Unused. */ - unsigned int reserved; + __le32 reserved; /* * Bit 31: interrupt when this data block has been transferred. * Bits 30..22: reserved * Bits 21..0: byte count (minus 1). For P320 the byte count must be * 8 byte aligned signified by bits 2:0 being set to 1. */ - unsigned int info; + __le32 info; }; struct mtip_port; +struct mtip_int_cmd; + /* Structure used to describe a command. */ struct mtip_cmd { - - struct mtip_cmd_hdr *command_header; /* ptr to command header entry */ - - dma_addr_t command_header_dma; /* corresponding physical address */ - void *command; /* ptr to command table entry */ dma_addr_t command_dma; /* corresponding physical address */ @@ -338,7 +333,10 @@ struct mtip_cmd { int unaligned; /* command is unaligned on 4k boundary */ - struct scatterlist sg[MTIP_MAX_SG]; /* Scatter list entries */ + union { + struct scatterlist sg[MTIP_MAX_SG]; /* Scatter list entries */ + struct mtip_int_cmd *icmd; + }; int retries; /* The number of retries left for this command. */ @@ -435,8 +433,8 @@ struct mtip_port { */ unsigned long ic_pause_timer; - /* Semaphore to control queue depth of unaligned IOs */ - struct semaphore cmd_slot_unal; + /* Counter to control queue depth of unaligned IOs */ + atomic_t cmd_slot_unal; /* Spinlock for working around command-issue bug. */ spinlock_t cmd_issue_lock[MTIP_MAX_SLOT_GROUPS]; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 4d4d6129ff66..08696f5f00bb 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -734,12 +734,13 @@ static void recv_work(struct work_struct *work) kfree(args); } -static void nbd_clear_req(struct request *req, void *data, bool reserved) +static bool nbd_clear_req(struct request *req, void *data, bool reserved) { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); cmd->status = BLK_STS_IOERR; blk_mq_complete_request(req); + return true; } static void nbd_clear_que(struct nbd_device *nbd) diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h index 7685df43f1ef..b3df2793e7cd 100644 --- a/drivers/block/null_blk.h +++ b/drivers/block/null_blk.h @@ -49,6 +49,7 @@ struct nullb_device { unsigned long completion_nsec; /* time in ns to complete a request */ unsigned long cache_size; /* disk cache size in MB */ unsigned long zone_size; /* zone size in MB if device is zoned */ + unsigned int zone_nr_conv; /* number of conventional zones */ unsigned int submit_queues; /* number of submission queues */ unsigned int home_node; /* home node for the device */ unsigned int queue_mode; /* block interface */ diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index 09339203dfba..62c9654b9ce8 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -188,6 +188,10 @@ static unsigned long g_zone_size = 256; module_param_named(zone_size, g_zone_size, ulong, S_IRUGO); MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must be power-of-two: Default: 256"); +static unsigned int g_zone_nr_conv; +module_param_named(zone_nr_conv, g_zone_nr_conv, uint, 0444); +MODULE_PARM_DESC(zone_nr_conv, "Number of conventional zones when block device is zoned. Default: 0"); + static struct nullb_device *null_alloc_dev(void); static void null_free_dev(struct nullb_device *dev); static void null_del_dev(struct nullb *nullb); @@ -293,6 +297,7 @@ NULLB_DEVICE_ATTR(mbps, uint); NULLB_DEVICE_ATTR(cache_size, ulong); NULLB_DEVICE_ATTR(zoned, bool); NULLB_DEVICE_ATTR(zone_size, ulong); +NULLB_DEVICE_ATTR(zone_nr_conv, uint); static ssize_t nullb_device_power_show(struct config_item *item, char *page) { @@ -407,6 +412,7 @@ static struct configfs_attribute *nullb_device_attrs[] = { &nullb_device_attr_badblocks, &nullb_device_attr_zoned, &nullb_device_attr_zone_size, + &nullb_device_attr_zone_nr_conv, NULL, }; @@ -520,6 +526,7 @@ static struct nullb_device *null_alloc_dev(void) dev->use_per_node_hctx = g_use_per_node_hctx; dev->zoned = g_zoned; dev->zone_size = g_zone_size; + dev->zone_nr_conv = g_zone_nr_conv; return dev; } @@ -635,14 +642,9 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd) hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL); } -static void null_softirq_done_fn(struct request *rq) +static void null_complete_rq(struct request *rq) { - struct nullb *nullb = rq->q->queuedata; - - if (nullb->dev->queue_mode == NULL_Q_MQ) - end_cmd(blk_mq_rq_to_pdu(rq)); - else - end_cmd(rq->special); + end_cmd(blk_mq_rq_to_pdu(rq)); } static struct nullb_page *null_alloc_page(gfp_t gfp_flags) @@ -1350,7 +1352,7 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, static const struct blk_mq_ops null_mq_ops = { .queue_rq = null_queue_rq, - .complete = null_softirq_done_fn, + .complete = null_complete_rq, .timeout = null_timeout_rq, }; @@ -1657,8 +1659,7 @@ static int null_add_dev(struct nullb_device *dev) } null_init_queues(nullb); } else if (dev->queue_mode == NULL_Q_BIO) { - nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node, - NULL); + nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node); if (!nullb->q) { rv = -ENOMEM; goto out_cleanup_queues; diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c index c0b0e4a3fa8f..5d1c261a2cfd 100644 --- a/drivers/block/null_blk_zoned.c +++ b/drivers/block/null_blk_zoned.c @@ -29,7 +29,25 @@ int null_zone_init(struct nullb_device *dev) if (!dev->zones) return -ENOMEM; - for (i = 0; i < dev->nr_zones; i++) { + if (dev->zone_nr_conv >= dev->nr_zones) { + dev->zone_nr_conv = dev->nr_zones - 1; + pr_info("null_blk: changed the number of conventional zones to %u", + dev->zone_nr_conv); + } + + for (i = 0; i < dev->zone_nr_conv; i++) { + struct blk_zone *zone = &dev->zones[i]; + + zone->start = sector; + zone->len = dev->zone_size_sects; + zone->wp = zone->start + zone->len; + zone->type = BLK_ZONE_TYPE_CONVENTIONAL; + zone->cond = BLK_ZONE_COND_NOT_WP; + + sector += dev->zone_size_sects; + } + + for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) { struct blk_zone *zone = &dev->zones[i]; zone->start = zone->wp = sector; @@ -98,6 +116,8 @@ void null_zone_write(struct nullb_cmd *cmd, sector_t sector, if (zone->wp == zone->start + zone->len) zone->cond = BLK_ZONE_COND_FULL; break; + case BLK_ZONE_COND_NOT_WP: + break; default: /* Invalid zone condition */ cmd->error = BLK_STS_IOERR; @@ -111,6 +131,11 @@ void null_zone_reset(struct nullb_cmd *cmd, sector_t sector) unsigned int zno = null_zone_no(dev, sector); struct blk_zone *zone = &dev->zones[zno]; + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) { + cmd->error = BLK_STS_IOERR; + return; + } + zone->cond = BLK_ZONE_COND_EMPTY; zone->wp = zone->start; } diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index ae4971e5d9a8..0ff9b12d0e35 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -242,6 +242,11 @@ struct pd_unit { static struct pd_unit pd[PD_UNITS]; +struct pd_req { + /* for REQ_OP_DRV_IN: */ + enum action (*func)(struct pd_unit *disk); +}; + static char pd_scratch[512]; /* scratch block buffer */ static char *pd_errs[17] = { "ERR", "INDEX", "ECC", "DRQ", "SEEK", "WRERR", @@ -502,8 +507,9 @@ static enum action do_pd_io_start(void) static enum action pd_special(void) { - enum action (*func)(struct pd_unit *) = pd_req->special; - return func(pd_current); + struct pd_req *req = blk_mq_rq_to_pdu(pd_req); + + return req->func(pd_current); } static int pd_next_buf(void) @@ -767,12 +773,14 @@ static int pd_special_command(struct pd_unit *disk, enum action (*func)(struct pd_unit *disk)) { struct request *rq; + struct pd_req *req; rq = blk_get_request(disk->gd->queue, REQ_OP_DRV_IN, 0); if (IS_ERR(rq)) return PTR_ERR(rq); + req = blk_mq_rq_to_pdu(rq); - rq->special = func; + req->func = func; blk_execute_rq(disk->gd->queue, disk->gd, rq, 0); blk_put_request(rq); return 0; @@ -892,9 +900,21 @@ static void pd_probe_drive(struct pd_unit *disk) disk->gd = p; p->private_data = disk; - p->queue = blk_mq_init_sq_queue(&disk->tag_set, &pd_mq_ops, 2, - BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING); + memset(&disk->tag_set, 0, sizeof(disk->tag_set)); + disk->tag_set.ops = &pd_mq_ops; + disk->tag_set.cmd_size = sizeof(struct pd_req); + disk->tag_set.nr_hw_queues = 1; + disk->tag_set.nr_maps = 1; + disk->tag_set.queue_depth = 2; + disk->tag_set.numa_node = NUMA_NO_NODE; + disk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING; + + if (blk_mq_alloc_tag_set(&disk->tag_set)) + return; + + p->queue = blk_mq_init_queue(&disk->tag_set); if (IS_ERR(p->queue)) { + blk_mq_free_tag_set(&disk->tag_set); p->queue = NULL; return; } diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 9381f4e3b221..f5a71023f76c 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2203,9 +2203,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) * Some CDRW drives can not handle writes larger than one packet, * even if the size is a multiple of the packet size. */ - spin_lock_irq(q->queue_lock); blk_queue_max_hw_sectors(q, pd->settings.size); - spin_unlock_irq(q->queue_lock); set_bit(PACKET_WRITABLE, &pd->flags); } else { pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index 2459dcc04b1c..a10d5736d8f7 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -181,6 +181,7 @@ struct skd_request_context { struct fit_completion_entry_v1 completion; struct fit_comp_error_info err_info; + int retries; blk_status_t status; }; @@ -382,11 +383,12 @@ static void skd_log_skreq(struct skd_device *skdev, * READ/WRITE REQUESTS ***************************************************************************** */ -static void skd_inc_in_flight(struct request *rq, void *data, bool reserved) +static bool skd_inc_in_flight(struct request *rq, void *data, bool reserved) { int *count = data; count++; + return true; } static int skd_in_flight(struct skd_device *skdev) @@ -494,6 +496,11 @@ static blk_status_t skd_mq_queue_rq(struct blk_mq_hw_ctx *hctx, if (unlikely(skdev->state != SKD_DRVR_STATE_ONLINE)) return skd_fail_all(q) ? BLK_STS_IOERR : BLK_STS_RESOURCE; + if (!(req->rq_flags & RQF_DONTPREP)) { + skreq->retries = 0; + req->rq_flags |= RQF_DONTPREP; + } + blk_mq_start_request(req); WARN_ONCE(tag >= skd_max_queue_depth, "%#x > %#x (nr_requests = %lu)\n", @@ -1425,7 +1432,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev, break; case SKD_CHECK_STATUS_REQUEUE_REQUEST: - if ((unsigned long) ++req->special < SKD_MAX_RETRIES) { + if (++skreq->retries < SKD_MAX_RETRIES) { skd_log_skreq(skdev, skreq, "retry"); blk_mq_requeue_request(req, true); break; @@ -1887,13 +1894,13 @@ static void skd_isr_fwstate(struct skd_device *skdev) skd_skdev_state_to_str(skdev->state), skdev->state); } -static void skd_recover_request(struct request *req, void *data, bool reserved) +static bool skd_recover_request(struct request *req, void *data, bool reserved) { struct skd_device *const skdev = data; struct skd_request_context *skreq = blk_mq_rq_to_pdu(req); if (skreq->state != SKD_REQ_STATE_BUSY) - return; + return true; skd_log_skreq(skdev, skreq, "recover"); @@ -1904,6 +1911,7 @@ static void skd_recover_request(struct request *req, void *data, bool reserved) skreq->state = SKD_REQ_STATE_IDLE; skreq->status = BLK_STS_IOERR; blk_mq_complete_request(req); + return true; } static void skd_recover_requests(struct skd_device *skdev) diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index b54fa6726303..9c0553dd13e7 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -6,7 +6,7 @@ #include <linux/module.h> #include <linux/kernel.h> #include <linux/types.h> -#include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/hdreg.h> #include <linux/genhd.h> #include <linux/cdrom.h> @@ -45,6 +45,8 @@ MODULE_VERSION(DRV_MODULE_VERSION); #define WAITING_FOR_GEN_CMD 0x04 #define WAITING_FOR_ANY -1 +#define VDC_MAX_RETRIES 10 + static struct workqueue_struct *sunvdc_wq; struct vdc_req_entry { @@ -66,9 +68,10 @@ struct vdc_port { u64 max_xfer_size; u32 vdisk_block_size; + u32 drain; u64 ldc_timeout; - struct timer_list ldc_reset_timer; + struct delayed_work ldc_reset_timer_work; struct work_struct ldc_reset_work; /* The server fills these in for us in the disk attribute @@ -80,12 +83,14 @@ struct vdc_port { u8 vdisk_mtype; u32 vdisk_phys_blksz; + struct blk_mq_tag_set tag_set; + char disk_name[32]; }; static void vdc_ldc_reset(struct vdc_port *port); static void vdc_ldc_reset_work(struct work_struct *work); -static void vdc_ldc_reset_timer(struct timer_list *t); +static void vdc_ldc_reset_timer_work(struct work_struct *work); static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio) { @@ -175,11 +180,8 @@ static void vdc_blk_queue_start(struct vdc_port *port) * handshake completes, so check for initial handshake before we've * allocated a disk. */ - if (port->disk && blk_queue_stopped(port->disk->queue) && - vdc_tx_dring_avail(dr) * 100 / VDC_TX_RING_SIZE >= 50) { - blk_start_queue(port->disk->queue); - } - + if (port->disk && vdc_tx_dring_avail(dr) * 100 / VDC_TX_RING_SIZE >= 50) + blk_mq_start_hw_queues(port->disk->queue); } static void vdc_finish(struct vio_driver_state *vio, int err, int waiting_for) @@ -197,7 +199,7 @@ static void vdc_handshake_complete(struct vio_driver_state *vio) { struct vdc_port *port = to_vdc_port(vio); - del_timer(&port->ldc_reset_timer); + cancel_delayed_work(&port->ldc_reset_timer_work); vdc_finish(vio, 0, WAITING_FOR_LINK_UP); vdc_blk_queue_start(port); } @@ -320,7 +322,7 @@ static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr, rqe->req = NULL; - __blk_end_request(req, (desc->status ? BLK_STS_IOERR : 0), desc->size); + blk_mq_end_request(req, desc->status ? BLK_STS_IOERR : 0); vdc_blk_queue_start(port); } @@ -431,6 +433,7 @@ static int __vdc_tx_trigger(struct vdc_port *port) .end_idx = dr->prod, }; int err, delay; + int retries = 0; hdr.seq = dr->snd_nxt; delay = 1; @@ -443,6 +446,8 @@ static int __vdc_tx_trigger(struct vdc_port *port) udelay(delay); if ((delay <<= 1) > 128) delay = 128; + if (retries++ > VDC_MAX_RETRIES) + break; } while (err == -EAGAIN); if (err == -ENOTCONN) @@ -525,29 +530,40 @@ static int __send_request(struct request *req) return err; } -static void do_vdc_request(struct request_queue *rq) +static blk_status_t vdc_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - struct request *req; + struct vdc_port *port = hctx->queue->queuedata; + struct vio_dring_state *dr; + unsigned long flags; - while ((req = blk_peek_request(rq)) != NULL) { - struct vdc_port *port; - struct vio_dring_state *dr; + dr = &port->vio.drings[VIO_DRIVER_TX_RING]; - port = req->rq_disk->private_data; - dr = &port->vio.drings[VIO_DRIVER_TX_RING]; - if (unlikely(vdc_tx_dring_avail(dr) < 1)) - goto wait; + blk_mq_start_request(bd->rq); - blk_start_request(req); + spin_lock_irqsave(&port->vio.lock, flags); - if (__send_request(req) < 0) { - blk_requeue_request(rq, req); -wait: - /* Avoid pointless unplugs. */ - blk_stop_queue(rq); - break; - } + /* + * Doing drain, just end the request in error + */ + if (unlikely(port->drain)) { + spin_unlock_irqrestore(&port->vio.lock, flags); + return BLK_STS_IOERR; + } + + if (unlikely(vdc_tx_dring_avail(dr) < 1)) { + spin_unlock_irqrestore(&port->vio.lock, flags); + blk_mq_stop_hw_queue(hctx); + return BLK_STS_DEV_RESOURCE; + } + + if (__send_request(bd->rq) < 0) { + spin_unlock_irqrestore(&port->vio.lock, flags); + return BLK_STS_IOERR; } + + spin_unlock_irqrestore(&port->vio.lock, flags); + return BLK_STS_OK; } static int generic_request(struct vdc_port *port, u8 op, void *buf, int len) @@ -759,6 +775,31 @@ static void vdc_port_down(struct vdc_port *port) vio_ldc_free(&port->vio); } +static const struct blk_mq_ops vdc_mq_ops = { + .queue_rq = vdc_queue_rq, +}; + +static void cleanup_queue(struct request_queue *q) +{ + struct vdc_port *port = q->queuedata; + + blk_cleanup_queue(q); + blk_mq_free_tag_set(&port->tag_set); +} + +static struct request_queue *init_queue(struct vdc_port *port) +{ + struct request_queue *q; + + q = blk_mq_init_sq_queue(&port->tag_set, &vdc_mq_ops, VDC_TX_RING_SIZE, + BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(q)) + return q; + + q->queuedata = port; + return q; +} + static int probe_disk(struct vdc_port *port) { struct request_queue *q; @@ -796,17 +837,17 @@ static int probe_disk(struct vdc_port *port) (u64)geom.num_sec); } - q = blk_init_queue(do_vdc_request, &port->vio.lock); - if (!q) { + q = init_queue(port); + if (IS_ERR(q)) { printk(KERN_ERR PFX "%s: Could not allocate queue.\n", port->vio.name); - return -ENOMEM; + return PTR_ERR(q); } g = alloc_disk(1 << PARTITION_SHIFT); if (!g) { printk(KERN_ERR PFX "%s: Could not allocate gendisk.\n", port->vio.name); - blk_cleanup_queue(q); + cleanup_queue(q); return -ENOMEM; } @@ -981,7 +1022,7 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) */ ldc_timeout = mdesc_get_property(hp, vdev->mp, "vdc-timeout", NULL); port->ldc_timeout = ldc_timeout ? *ldc_timeout : 0; - timer_setup(&port->ldc_reset_timer, vdc_ldc_reset_timer, 0); + INIT_DELAYED_WORK(&port->ldc_reset_timer_work, vdc_ldc_reset_timer_work); INIT_WORK(&port->ldc_reset_work, vdc_ldc_reset_work); err = vio_driver_init(&port->vio, vdev, VDEV_DISK, @@ -1034,18 +1075,14 @@ static int vdc_port_remove(struct vio_dev *vdev) struct vdc_port *port = dev_get_drvdata(&vdev->dev); if (port) { - unsigned long flags; - - spin_lock_irqsave(&port->vio.lock, flags); - blk_stop_queue(port->disk->queue); - spin_unlock_irqrestore(&port->vio.lock, flags); + blk_mq_stop_hw_queues(port->disk->queue); flush_work(&port->ldc_reset_work); - del_timer_sync(&port->ldc_reset_timer); + cancel_delayed_work_sync(&port->ldc_reset_timer_work); del_timer_sync(&port->vio.timer); del_gendisk(port->disk); - blk_cleanup_queue(port->disk->queue); + cleanup_queue(port->disk->queue); put_disk(port->disk); port->disk = NULL; @@ -1080,32 +1117,46 @@ static void vdc_requeue_inflight(struct vdc_port *port) } rqe->req = NULL; - blk_requeue_request(port->disk->queue, req); + blk_mq_requeue_request(req, false); } } static void vdc_queue_drain(struct vdc_port *port) { - struct request *req; + struct request_queue *q = port->disk->queue; + + /* + * Mark the queue as draining, then freeze/quiesce to ensure + * that all existing requests are seen in ->queue_rq() and killed + */ + port->drain = 1; + spin_unlock_irq(&port->vio.lock); - while ((req = blk_fetch_request(port->disk->queue)) != NULL) - __blk_end_request_all(req, BLK_STS_IOERR); + blk_mq_freeze_queue(q); + blk_mq_quiesce_queue(q); + + spin_lock_irq(&port->vio.lock); + port->drain = 0; + blk_mq_unquiesce_queue(q); + blk_mq_unfreeze_queue(q); } -static void vdc_ldc_reset_timer(struct timer_list *t) +static void vdc_ldc_reset_timer_work(struct work_struct *work) { - struct vdc_port *port = from_timer(port, t, ldc_reset_timer); - struct vio_driver_state *vio = &port->vio; - unsigned long flags; + struct vdc_port *port; + struct vio_driver_state *vio; - spin_lock_irqsave(&vio->lock, flags); + port = container_of(work, struct vdc_port, ldc_reset_timer_work.work); + vio = &port->vio; + + spin_lock_irq(&vio->lock); if (!(port->vio.hs_state & VIO_HS_COMPLETE)) { pr_warn(PFX "%s ldc down %llu seconds, draining queue\n", port->disk_name, port->ldc_timeout); vdc_queue_drain(port); vdc_blk_queue_start(port); } - spin_unlock_irqrestore(&vio->lock, flags); + spin_unlock_irq(&vio->lock); } static void vdc_ldc_reset_work(struct work_struct *work) @@ -1129,7 +1180,7 @@ static void vdc_ldc_reset(struct vdc_port *port) assert_spin_locked(&port->vio.lock); pr_warn(PFX "%s ldc link reset\n", port->disk_name); - blk_stop_queue(port->disk->queue); + blk_mq_stop_hw_queues(port->disk->queue); vdc_requeue_inflight(port); vdc_port_down(port); @@ -1146,7 +1197,7 @@ static void vdc_ldc_reset(struct vdc_port *port) } if (port->ldc_timeout) - mod_timer(&port->ldc_reset_timer, + mod_delayed_work(system_wq, &port->ldc_reset_timer_work, round_jiffies(jiffies + HZ * port->ldc_timeout)); mod_timer(&port->vio.timer, round_jiffies(jiffies + HZ)); return; diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index 064b8c5c7a32..4478eb7efee0 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -243,7 +243,6 @@ struct carm_port { unsigned int port_no; struct gendisk *disk; struct carm_host *host; - struct blk_mq_tag_set tag_set; /* attached device characteristics */ u64 capacity; @@ -254,13 +253,10 @@ struct carm_port { }; struct carm_request { - unsigned int tag; int n_elem; unsigned int msg_type; unsigned int msg_subtype; unsigned int msg_bucket; - struct request *rq; - struct carm_port *port; struct scatterlist sg[CARM_MAX_REQ_SG]; }; @@ -291,9 +287,6 @@ struct carm_host { unsigned int wait_q_cons; struct request_queue *wait_q[CARM_MAX_WAIT_Q]; - unsigned int n_msgs; - u64 msg_alloc; - struct carm_request req[CARM_MAX_REQ]; void *msg_base; dma_addr_t msg_dma; @@ -478,10 +471,10 @@ static inline dma_addr_t carm_ref_msg_dma(struct carm_host *host, } static int carm_send_msg(struct carm_host *host, - struct carm_request *crq) + struct carm_request *crq, unsigned tag) { void __iomem *mmio = host->mmio; - u32 msg = (u32) carm_ref_msg_dma(host, crq->tag); + u32 msg = (u32) carm_ref_msg_dma(host, tag); u32 cm_bucket = crq->msg_bucket; u32 tmp; int rc = 0; @@ -506,99 +499,24 @@ static int carm_send_msg(struct carm_host *host, return rc; } -static struct carm_request *carm_get_request(struct carm_host *host) -{ - unsigned int i; - - /* obey global hardware limit on S/G entries */ - if (host->hw_sg_used >= (CARM_MAX_HOST_SG - CARM_MAX_REQ_SG)) - return NULL; - - for (i = 0; i < max_queue; i++) - if ((host->msg_alloc & (1ULL << i)) == 0) { - struct carm_request *crq = &host->req[i]; - crq->port = NULL; - crq->n_elem = 0; - - host->msg_alloc |= (1ULL << i); - host->n_msgs++; - - assert(host->n_msgs <= CARM_MAX_REQ); - sg_init_table(crq->sg, CARM_MAX_REQ_SG); - return crq; - } - - DPRINTK("no request available, returning NULL\n"); - return NULL; -} - -static int carm_put_request(struct carm_host *host, struct carm_request *crq) -{ - assert(crq->tag < max_queue); - - if (unlikely((host->msg_alloc & (1ULL << crq->tag)) == 0)) - return -EINVAL; /* tried to clear a tag that was not active */ - - assert(host->hw_sg_used >= crq->n_elem); - - host->msg_alloc &= ~(1ULL << crq->tag); - host->hw_sg_used -= crq->n_elem; - host->n_msgs--; - - return 0; -} - -static struct carm_request *carm_get_special(struct carm_host *host) -{ - unsigned long flags; - struct carm_request *crq = NULL; - struct request *rq; - int tries = 5000; - - while (tries-- > 0) { - spin_lock_irqsave(&host->lock, flags); - crq = carm_get_request(host); - spin_unlock_irqrestore(&host->lock, flags); - - if (crq) - break; - msleep(10); - } - - if (!crq) - return NULL; - - rq = blk_get_request(host->oob_q, REQ_OP_DRV_OUT, 0); - if (IS_ERR(rq)) { - spin_lock_irqsave(&host->lock, flags); - carm_put_request(host, crq); - spin_unlock_irqrestore(&host->lock, flags); - return NULL; - } - - crq->rq = rq; - return crq; -} - static int carm_array_info (struct carm_host *host, unsigned int array_idx) { struct carm_msg_ioctl *ioc; - unsigned int idx; u32 msg_data; dma_addr_t msg_dma; struct carm_request *crq; + struct request *rq; int rc; - crq = carm_get_special(host); - if (!crq) { + rq = blk_mq_alloc_request(host->oob_q, REQ_OP_DRV_OUT, 0); + if (IS_ERR(rq)) { rc = -ENOMEM; goto err_out; } + crq = blk_mq_rq_to_pdu(rq); - idx = crq->tag; - - ioc = carm_ref_msg(host, idx); - msg_dma = carm_ref_msg_dma(host, idx); + ioc = carm_ref_msg(host, rq->tag); + msg_dma = carm_ref_msg_dma(host, rq->tag); msg_data = (u32) (msg_dma + sizeof(struct carm_array_info)); crq->msg_type = CARM_MSG_ARRAY; @@ -612,7 +530,7 @@ static int carm_array_info (struct carm_host *host, unsigned int array_idx) ioc->type = CARM_MSG_ARRAY; ioc->subtype = CARM_ARRAY_INFO; ioc->array_id = (u8) array_idx; - ioc->handle = cpu_to_le32(TAG_ENCODE(idx)); + ioc->handle = cpu_to_le32(TAG_ENCODE(rq->tag)); ioc->data_addr = cpu_to_le32(msg_data); spin_lock_irq(&host->lock); @@ -620,9 +538,8 @@ static int carm_array_info (struct carm_host *host, unsigned int array_idx) host->state == HST_DEV_SCAN); spin_unlock_irq(&host->lock); - DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx); - crq->rq->special = crq; - blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL); + DPRINTK("blk_execute_rq_nowait, tag == %u\n", rq->tag); + blk_execute_rq_nowait(host->oob_q, NULL, rq, true, NULL); return 0; @@ -637,21 +554,21 @@ typedef unsigned int (*carm_sspc_t)(struct carm_host *, unsigned int, void *); static int carm_send_special (struct carm_host *host, carm_sspc_t func) { + struct request *rq; struct carm_request *crq; struct carm_msg_ioctl *ioc; void *mem; - unsigned int idx, msg_size; + unsigned int msg_size; int rc; - crq = carm_get_special(host); - if (!crq) + rq = blk_mq_alloc_request(host->oob_q, REQ_OP_DRV_OUT, 0); + if (IS_ERR(rq)) return -ENOMEM; + crq = blk_mq_rq_to_pdu(rq); - idx = crq->tag; + mem = carm_ref_msg(host, rq->tag); - mem = carm_ref_msg(host, idx); - - msg_size = func(host, idx, mem); + msg_size = func(host, rq->tag, mem); ioc = mem; crq->msg_type = ioc->type; @@ -660,9 +577,8 @@ static int carm_send_special (struct carm_host *host, carm_sspc_t func) BUG_ON(rc < 0); crq->msg_bucket = (u32) rc; - DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx); - crq->rq->special = crq; - blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL); + DPRINTK("blk_execute_rq_nowait, tag == %u\n", rq->tag); + blk_execute_rq_nowait(host->oob_q, NULL, rq, true, NULL); return 0; } @@ -744,19 +660,6 @@ static unsigned int carm_fill_get_fw_ver(struct carm_host *host, sizeof(struct carm_fw_ver); } -static inline void carm_end_request_queued(struct carm_host *host, - struct carm_request *crq, - blk_status_t error) -{ - struct request *req = crq->rq; - int rc; - - blk_mq_end_request(req, error); - - rc = carm_put_request(host, crq); - assert(rc == 0); -} - static inline void carm_push_q (struct carm_host *host, struct request_queue *q) { unsigned int idx = host->wait_q_prod % CARM_MAX_WAIT_Q; @@ -791,101 +694,50 @@ static inline void carm_round_robin(struct carm_host *host) } } -static inline void carm_end_rq(struct carm_host *host, struct carm_request *crq, - blk_status_t error) -{ - carm_end_request_queued(host, crq, error); - if (max_queue == 1) - carm_round_robin(host); - else if ((host->n_msgs <= CARM_MSG_LOW_WATER) && - (host->hw_sg_used <= CARM_SG_LOW_WATER)) { - carm_round_robin(host); - } -} - -static blk_status_t carm_oob_queue_rq(struct blk_mq_hw_ctx *hctx, - const struct blk_mq_queue_data *bd) +static inline enum dma_data_direction carm_rq_dir(struct request *rq) { - struct request_queue *q = hctx->queue; - struct carm_host *host = q->queuedata; - struct carm_request *crq; - int rc; - - blk_mq_start_request(bd->rq); - - spin_lock_irq(&host->lock); - - crq = bd->rq->special; - assert(crq != NULL); - assert(crq->rq == bd->rq); - - crq->n_elem = 0; - - DPRINTK("send req\n"); - rc = carm_send_msg(host, crq); - if (rc) { - carm_push_q(host, q); - spin_unlock_irq(&host->lock); - return BLK_STS_DEV_RESOURCE; - } - - spin_unlock_irq(&host->lock); - return BLK_STS_OK; + return op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE; } static blk_status_t carm_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { struct request_queue *q = hctx->queue; + struct request *rq = bd->rq; struct carm_port *port = q->queuedata; struct carm_host *host = port->host; + struct carm_request *crq = blk_mq_rq_to_pdu(rq); struct carm_msg_rw *msg; - struct carm_request *crq; - struct request *rq = bd->rq; struct scatterlist *sg; - int writing = 0, pci_dir, i, n_elem, rc; - u32 tmp; + int i, n_elem = 0, rc; unsigned int msg_size; + u32 tmp; + + crq->n_elem = 0; + sg_init_table(crq->sg, CARM_MAX_REQ_SG); blk_mq_start_request(rq); spin_lock_irq(&host->lock); - - crq = carm_get_request(host); - if (!crq) { - carm_push_q(host, q); - spin_unlock_irq(&host->lock); - return BLK_STS_DEV_RESOURCE; - } - crq->rq = rq; - - if (rq_data_dir(rq) == WRITE) { - writing = 1; - pci_dir = DMA_TO_DEVICE; - } else { - pci_dir = DMA_FROM_DEVICE; - } + if (req_op(rq) == REQ_OP_DRV_OUT) + goto send_msg; /* get scatterlist from block layer */ sg = &crq->sg[0]; n_elem = blk_rq_map_sg(q, rq, sg); - if (n_elem <= 0) { - /* request with no s/g entries? */ - carm_end_rq(host, crq, BLK_STS_IOERR); - spin_unlock_irq(&host->lock); - return BLK_STS_IOERR; - } + if (n_elem <= 0) + goto out_ioerr; /* map scatterlist to PCI bus addresses */ - n_elem = dma_map_sg(&host->pdev->dev, sg, n_elem, pci_dir); - if (n_elem <= 0) { - /* request with no s/g entries? */ - carm_end_rq(host, crq, BLK_STS_IOERR); - spin_unlock_irq(&host->lock); - return BLK_STS_IOERR; - } + n_elem = dma_map_sg(&host->pdev->dev, sg, n_elem, carm_rq_dir(rq)); + if (n_elem <= 0) + goto out_ioerr; + + /* obey global hardware limit on S/G entries */ + if (host->hw_sg_used >= CARM_MAX_HOST_SG - n_elem) + goto out_resource; + crq->n_elem = n_elem; - crq->port = port; host->hw_sg_used += n_elem; /* @@ -893,9 +745,9 @@ static blk_status_t carm_queue_rq(struct blk_mq_hw_ctx *hctx, */ VPRINTK("build msg\n"); - msg = (struct carm_msg_rw *) carm_ref_msg(host, crq->tag); + msg = (struct carm_msg_rw *) carm_ref_msg(host, rq->tag); - if (writing) { + if (rq_data_dir(rq) == WRITE) { msg->type = CARM_MSG_WRITE; crq->msg_type = CARM_MSG_WRITE; } else { @@ -906,7 +758,7 @@ static blk_status_t carm_queue_rq(struct blk_mq_hw_ctx *hctx, msg->id = port->port_no; msg->sg_count = n_elem; msg->sg_type = SGT_32BIT; - msg->handle = cpu_to_le32(TAG_ENCODE(crq->tag)); + msg->handle = cpu_to_le32(TAG_ENCODE(rq->tag)); msg->lba = cpu_to_le32(blk_rq_pos(rq) & 0xffffffff); tmp = (blk_rq_pos(rq) >> 16) >> 16; msg->lba_high = cpu_to_le16( (u16) tmp ); @@ -923,22 +775,28 @@ static blk_status_t carm_queue_rq(struct blk_mq_hw_ctx *hctx, rc = carm_lookup_bucket(msg_size); BUG_ON(rc < 0); crq->msg_bucket = (u32) rc; - +send_msg: /* * queue read/write message to hardware */ - - VPRINTK("send msg, tag == %u\n", crq->tag); - rc = carm_send_msg(host, crq); + VPRINTK("send msg, tag == %u\n", rq->tag); + rc = carm_send_msg(host, crq, rq->tag); if (rc) { - carm_put_request(host, crq); - carm_push_q(host, q); - spin_unlock_irq(&host->lock); - return BLK_STS_DEV_RESOURCE; + host->hw_sg_used -= n_elem; + goto out_resource; } spin_unlock_irq(&host->lock); return BLK_STS_OK; +out_resource: + dma_unmap_sg(&host->pdev->dev, &crq->sg[0], n_elem, carm_rq_dir(rq)); + carm_push_q(host, q); + spin_unlock_irq(&host->lock); + return BLK_STS_DEV_RESOURCE; +out_ioerr: + carm_round_robin(host); + spin_unlock_irq(&host->lock); + return BLK_STS_IOERR; } static void carm_handle_array_info(struct carm_host *host, @@ -954,8 +812,6 @@ static void carm_handle_array_info(struct carm_host *host, DPRINTK("ENTER\n"); - carm_end_rq(host, crq, error); - if (error) goto out; if (le32_to_cpu(desc->array_status) & ARRAY_NO_EXIST) @@ -1011,8 +867,6 @@ static void carm_handle_scan_chan(struct carm_host *host, DPRINTK("ENTER\n"); - carm_end_rq(host, crq, error); - if (error) { new_state = HST_ERROR; goto out; @@ -1040,8 +894,6 @@ static void carm_handle_generic(struct carm_host *host, { DPRINTK("ENTER\n"); - carm_end_rq(host, crq, error); - assert(host->state == cur_state); if (error) host->state = HST_ERROR; @@ -1050,28 +902,12 @@ static void carm_handle_generic(struct carm_host *host, schedule_work(&host->fsm_task); } -static inline void carm_handle_rw(struct carm_host *host, - struct carm_request *crq, blk_status_t error) -{ - int pci_dir; - - VPRINTK("ENTER\n"); - - if (rq_data_dir(crq->rq) == WRITE) - pci_dir = DMA_TO_DEVICE; - else - pci_dir = DMA_FROM_DEVICE; - - dma_unmap_sg(&host->pdev->dev, &crq->sg[0], crq->n_elem, pci_dir); - - carm_end_rq(host, crq, error); -} - static inline void carm_handle_resp(struct carm_host *host, __le32 ret_handle_le, u32 status) { u32 handle = le32_to_cpu(ret_handle_le); unsigned int msg_idx; + struct request *rq; struct carm_request *crq; blk_status_t error = (status == RMSG_OK) ? 0 : BLK_STS_IOERR; u8 *mem; @@ -1087,13 +923,15 @@ static inline void carm_handle_resp(struct carm_host *host, msg_idx = TAG_DECODE(handle); VPRINTK("tag == %u\n", msg_idx); - crq = &host->req[msg_idx]; + rq = blk_mq_tag_to_rq(host->tag_set.tags[0], msg_idx); + crq = blk_mq_rq_to_pdu(rq); /* fast path */ if (likely(crq->msg_type == CARM_MSG_READ || crq->msg_type == CARM_MSG_WRITE)) { - carm_handle_rw(host, crq, error); - return; + dma_unmap_sg(&host->pdev->dev, &crq->sg[0], crq->n_elem, + carm_rq_dir(rq)); + goto done; } mem = carm_ref_msg(host, msg_idx); @@ -1103,7 +941,7 @@ static inline void carm_handle_resp(struct carm_host *host, switch (crq->msg_subtype) { case CARM_IOC_SCAN_CHAN: carm_handle_scan_chan(host, crq, mem, error); - break; + goto done; default: /* unknown / invalid response */ goto err_out; @@ -1116,11 +954,11 @@ static inline void carm_handle_resp(struct carm_host *host, case MISC_ALLOC_MEM: carm_handle_generic(host, crq, error, HST_ALLOC_BUF, HST_SYNC_TIME); - break; + goto done; case MISC_SET_TIME: carm_handle_generic(host, crq, error, HST_SYNC_TIME, HST_GET_FW_VER); - break; + goto done; case MISC_GET_FW_VER: { struct carm_fw_ver *ver = (struct carm_fw_ver *) (mem + sizeof(struct carm_msg_get_fw_ver)); @@ -1130,7 +968,7 @@ static inline void carm_handle_resp(struct carm_host *host, } carm_handle_generic(host, crq, error, HST_GET_FW_VER, HST_PORT_SCAN); - break; + goto done; } default: /* unknown / invalid response */ @@ -1161,7 +999,13 @@ static inline void carm_handle_resp(struct carm_host *host, err_out: printk(KERN_WARNING DRV_NAME "(%s): BUG: unhandled message type %d/%d\n", pci_name(host->pdev), crq->msg_type, crq->msg_subtype); - carm_end_rq(host, crq, BLK_STS_IOERR); + error = BLK_STS_IOERR; +done: + host->hw_sg_used -= crq->n_elem; + blk_mq_end_request(blk_mq_rq_from_pdu(crq), error); + + if (host->hw_sg_used <= CARM_SG_LOW_WATER) + carm_round_robin(host); } static inline void carm_handle_responses(struct carm_host *host) @@ -1491,78 +1335,56 @@ static int carm_init_host(struct carm_host *host) return 0; } -static const struct blk_mq_ops carm_oob_mq_ops = { - .queue_rq = carm_oob_queue_rq, -}; - static const struct blk_mq_ops carm_mq_ops = { .queue_rq = carm_queue_rq, }; -static int carm_init_disks(struct carm_host *host) +static int carm_init_disk(struct carm_host *host, unsigned int port_no) { - unsigned int i; - int rc = 0; + struct carm_port *port = &host->port[port_no]; + struct gendisk *disk; + struct request_queue *q; - for (i = 0; i < CARM_MAX_PORTS; i++) { - struct gendisk *disk; - struct request_queue *q; - struct carm_port *port; + port->host = host; + port->port_no = port_no; - port = &host->port[i]; - port->host = host; - port->port_no = i; + disk = alloc_disk(CARM_MINORS_PER_MAJOR); + if (!disk) + return -ENOMEM; - disk = alloc_disk(CARM_MINORS_PER_MAJOR); - if (!disk) { - rc = -ENOMEM; - break; - } + port->disk = disk; + sprintf(disk->disk_name, DRV_NAME "/%u", + (unsigned int)host->id * CARM_MAX_PORTS + port_no); + disk->major = host->major; + disk->first_minor = port_no * CARM_MINORS_PER_MAJOR; + disk->fops = &carm_bd_ops; + disk->private_data = port; - port->disk = disk; - sprintf(disk->disk_name, DRV_NAME "/%u", - (unsigned int) (host->id * CARM_MAX_PORTS) + i); - disk->major = host->major; - disk->first_minor = i * CARM_MINORS_PER_MAJOR; - disk->fops = &carm_bd_ops; - disk->private_data = port; - - q = blk_mq_init_sq_queue(&port->tag_set, &carm_mq_ops, - max_queue, BLK_MQ_F_SHOULD_MERGE); - if (IS_ERR(q)) { - rc = PTR_ERR(q); - break; - } - disk->queue = q; - blk_queue_max_segments(q, CARM_MAX_REQ_SG); - blk_queue_segment_boundary(q, CARM_SG_BOUNDARY); + q = blk_mq_init_queue(&host->tag_set); + if (IS_ERR(q)) + return PTR_ERR(q); - q->queuedata = port; - } + blk_queue_max_segments(q, CARM_MAX_REQ_SG); + blk_queue_segment_boundary(q, CARM_SG_BOUNDARY); - return rc; + q->queuedata = port; + disk->queue = q; + return 0; } -static void carm_free_disks(struct carm_host *host) +static void carm_free_disk(struct carm_host *host, unsigned int port_no) { - unsigned int i; - - for (i = 0; i < CARM_MAX_PORTS; i++) { - struct carm_port *port = &host->port[i]; - struct gendisk *disk = port->disk; + struct carm_port *port = &host->port[port_no]; + struct gendisk *disk = port->disk; - if (disk) { - struct request_queue *q = disk->queue; + if (!disk) + return; - if (disk->flags & GENHD_FL_UP) - del_gendisk(disk); - if (q) { - blk_mq_free_tag_set(&port->tag_set); - blk_cleanup_queue(q); - } - put_disk(disk); - } - } + if (disk->flags & GENHD_FL_UP) + del_gendisk(disk); + if (disk->queue) + blk_cleanup_queue(disk->queue); + put_disk(disk); } static int carm_init_shm(struct carm_host *host) @@ -1618,9 +1440,6 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) INIT_WORK(&host->fsm_task, carm_fsm_task); init_completion(&host->probe_comp); - for (i = 0; i < ARRAY_SIZE(host->req); i++) - host->req[i].tag = i; - host->mmio = ioremap(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0)); if (!host->mmio) { @@ -1637,14 +1456,26 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_iounmap; } - q = blk_mq_init_sq_queue(&host->tag_set, &carm_oob_mq_ops, 1, - BLK_MQ_F_NO_SCHED); + memset(&host->tag_set, 0, sizeof(host->tag_set)); + host->tag_set.ops = &carm_mq_ops; + host->tag_set.cmd_size = sizeof(struct carm_request); + host->tag_set.nr_hw_queues = 1; + host->tag_set.nr_maps = 1; + host->tag_set.queue_depth = max_queue; + host->tag_set.numa_node = NUMA_NO_NODE; + host->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + + rc = blk_mq_alloc_tag_set(&host->tag_set); + if (rc) + goto err_out_dma_free; + + q = blk_mq_init_queue(&host->tag_set); if (IS_ERR(q)) { - printk(KERN_ERR DRV_NAME "(%s): OOB queue alloc failure\n", - pci_name(pdev)); rc = PTR_ERR(q); + blk_mq_free_tag_set(&host->tag_set); goto err_out_dma_free; } + host->oob_q = q; q->queuedata = host; @@ -1667,9 +1498,11 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) if (host->flags & FL_DYN_MAJOR) host->major = rc; - rc = carm_init_disks(host); - if (rc) - goto err_out_blkdev_disks; + for (i = 0; i < CARM_MAX_PORTS; i++) { + rc = carm_init_disk(host, i); + if (rc) + goto err_out_blkdev_disks; + } pci_set_master(pdev); @@ -1699,7 +1532,8 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) err_out_free_irq: free_irq(pdev->irq, host); err_out_blkdev_disks: - carm_free_disks(host); + for (i = 0; i < CARM_MAX_PORTS; i++) + carm_free_disk(host, i); unregister_blkdev(host->major, host->name); err_out_free_majors: if (host->major == 160) @@ -1724,6 +1558,7 @@ err_out: static void carm_remove_one (struct pci_dev *pdev) { struct carm_host *host = pci_get_drvdata(pdev); + unsigned int i; if (!host) { printk(KERN_ERR PFX "BUG: no host data for PCI(%s)\n", @@ -1732,7 +1567,8 @@ static void carm_remove_one (struct pci_dev *pdev) } free_irq(pdev->irq, host); - carm_free_disks(host); + for (i = 0; i < CARM_MAX_PORTS; i++) + carm_free_disk(host, i); unregister_blkdev(host->major, host->name); if (host->major == 160) clear_bit(0, &carm_major_alloc); diff --git a/drivers/block/umem.c b/drivers/block/umem.c index be3e3ab79950..aa035cf8a51d 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -888,8 +888,7 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) card->biotail = &card->bio; spin_lock_init(&card->lock); - card->queue = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, - &card->lock); + card->queue = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE); if (!card->queue) goto failed_alloc; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 086c6bb12baa..912c4265e592 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -214,6 +214,20 @@ static void virtblk_done(struct virtqueue *vq) spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); } +static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx) +{ + struct virtio_blk *vblk = hctx->queue->queuedata; + struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num]; + bool kick; + + spin_lock_irq(&vq->lock); + kick = virtqueue_kick_prepare(vq->vq); + spin_unlock_irq(&vq->lock); + + if (kick) + virtqueue_notify(vq->vq); +} + static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -624,7 +638,7 @@ static int virtblk_map_queues(struct blk_mq_tag_set *set) { struct virtio_blk *vblk = set->driver_data; - return blk_mq_virtio_map_queues(set, vblk->vdev, 0); + return blk_mq_virtio_map_queues(&set->map[0], vblk->vdev, 0); } #ifdef CONFIG_VIRTIO_BLK_SCSI @@ -638,6 +652,7 @@ static void virtblk_initialize_rq(struct request *req) static const struct blk_mq_ops virtio_mq_ops = { .queue_rq = virtio_queue_rq, + .commit_rqs = virtio_commit_rqs, .complete = virtblk_request_done, .init_request = virtblk_init_request, #ifdef CONFIG_VIRTIO_BLK_SCSI diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c index 6bac03999fd4..09b845e90114 100644 --- a/drivers/firewire/sbp2.c +++ b/drivers/firewire/sbp2.c @@ -1610,7 +1610,6 @@ static struct scsi_host_template scsi_driver_template = { .eh_abort_handler = sbp2_scsi_abort, .this_id = -1, .sg_tablesize = SG_ALL, - .use_clustering = ENABLE_CLUSTERING, .can_queue = 1, .sdev_attrs = sbp2_scsi_sysfs_attrs, }; diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 023a32cfac42..540166443c34 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -449,7 +449,7 @@ static void pca953x_setup_gpio(struct pca953x_chip *chip, int gpios) gc->base = chip->gpio_start; gc->ngpio = gpios; - gc->label = chip->client->name; + gc->label = dev_name(&chip->client->dev); gc->parent = &chip->client->dev; gc->owner = THIS_MODULE; gc->names = chip->names; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index f05a29ff586e..9fd5fbe8bebf 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -583,7 +583,7 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv) dev_priv->map_mode = vmw_dma_map_populate; - if (dma_ops->sync_single_for_cpu) + if (dma_ops && dma_ops->sync_single_for_cpu) dev_priv->map_mode = vmw_dma_alloc_coherent; #ifdef CONFIG_SWIOTLB if (swiotlb_nr_tbl() == 0) diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 8b2b72b93885..da58020a144e 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -94,7 +94,7 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk, rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0); ide_req(rq)->type = ATA_PRIV_MISC; - rq->special = (char *)pc; + ide_req(rq)->special = pc; if (buf && bufflen) { error = blk_rq_map_kern(drive->queue, rq, buf, bufflen, @@ -172,8 +172,8 @@ EXPORT_SYMBOL_GPL(ide_create_request_sense_cmd); void ide_prep_sense(ide_drive_t *drive, struct request *rq) { struct request_sense *sense = &drive->sense_data; - struct request *sense_rq = drive->sense_rq; - struct scsi_request *req = scsi_req(sense_rq); + struct request *sense_rq; + struct scsi_request *req; unsigned int cmd_len, sense_len; int err; @@ -196,9 +196,16 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) if (ata_sense_request(rq) || drive->sense_rq_armed) return; + sense_rq = drive->sense_rq; + if (!sense_rq) { + sense_rq = blk_mq_alloc_request(drive->queue, REQ_OP_DRV_IN, + BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); + drive->sense_rq = sense_rq; + } + req = scsi_req(sense_rq); + memset(sense, 0, sizeof(*sense)); - blk_rq_init(rq->q, sense_rq); scsi_req_init(req); err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len, @@ -207,6 +214,8 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) if (printk_ratelimit()) printk(KERN_WARNING PFX "%s: failed to map sense " "buffer\n", drive->name); + blk_mq_free_request(sense_rq); + drive->sense_rq = NULL; return; } @@ -226,6 +235,8 @@ EXPORT_SYMBOL_GPL(ide_prep_sense); int ide_queue_sense_rq(ide_drive_t *drive, void *special) { + struct request *sense_rq = drive->sense_rq; + /* deferred failure from ide_prep_sense() */ if (!drive->sense_rq_armed) { printk(KERN_WARNING PFX "%s: error queuing a sense request\n", @@ -233,12 +244,12 @@ int ide_queue_sense_rq(ide_drive_t *drive, void *special) return -ENOMEM; } - drive->sense_rq->special = special; + ide_req(sense_rq)->special = special; drive->sense_rq_armed = false; drive->hwif->rq = NULL; - elv_add_request(drive->queue, drive->sense_rq, ELEVATOR_INSERT_FRONT); + ide_insert_request_head(drive, sense_rq); return 0; } EXPORT_SYMBOL_GPL(ide_queue_sense_rq); @@ -270,10 +281,8 @@ void ide_retry_pc(ide_drive_t *drive) */ drive->hwif->rq = NULL; ide_requeue_and_plug(drive, failed_rq); - if (ide_queue_sense_rq(drive, pc)) { - blk_start_request(failed_rq); + if (ide_queue_sense_rq(drive, pc)) ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(failed_rq)); - } } EXPORT_SYMBOL_GPL(ide_retry_pc); diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index f9b59d41813f..1f03884a6808 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -211,12 +211,12 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive, static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq) { /* - * For ATA_PRIV_SENSE, "rq->special" points to the original + * For ATA_PRIV_SENSE, "ide_req(rq)->special" points to the original * failed request. Also, the sense data should be read * directly from rq which might be different from the original * sense buffer if it got copied during mapping. */ - struct request *failed = (struct request *)rq->special; + struct request *failed = ide_req(rq)->special; void *sense = bio_data(rq->bio); if (failed) { @@ -258,11 +258,22 @@ static int ide_cd_breathe(ide_drive_t *drive, struct request *rq) /* * take a breather */ - blk_delay_queue(drive->queue, 1); + blk_mq_requeue_request(rq, false); + blk_mq_delay_kick_requeue_list(drive->queue, 1); return 1; } } +static void ide_cd_free_sense(ide_drive_t *drive) +{ + if (!drive->sense_rq) + return; + + blk_mq_free_request(drive->sense_rq); + drive->sense_rq = NULL; + drive->sense_rq_armed = false; +} + /** * Returns: * 0: if the request should be continued. @@ -516,6 +527,82 @@ static bool ide_cd_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd) return false; } +/* standard prep_rq that builds 10 byte cmds */ +static bool ide_cdrom_prep_fs(struct request_queue *q, struct request *rq) +{ + int hard_sect = queue_logical_block_size(q); + long block = (long)blk_rq_pos(rq) / (hard_sect >> 9); + unsigned long blocks = blk_rq_sectors(rq) / (hard_sect >> 9); + struct scsi_request *req = scsi_req(rq); + + if (rq_data_dir(rq) == READ) + req->cmd[0] = GPCMD_READ_10; + else + req->cmd[0] = GPCMD_WRITE_10; + + /* + * fill in lba + */ + req->cmd[2] = (block >> 24) & 0xff; + req->cmd[3] = (block >> 16) & 0xff; + req->cmd[4] = (block >> 8) & 0xff; + req->cmd[5] = block & 0xff; + + /* + * and transfer length + */ + req->cmd[7] = (blocks >> 8) & 0xff; + req->cmd[8] = blocks & 0xff; + req->cmd_len = 10; + return true; +} + +/* + * Most of the SCSI commands are supported directly by ATAPI devices. + * This transform handles the few exceptions. + */ +static bool ide_cdrom_prep_pc(struct request *rq) +{ + u8 *c = scsi_req(rq)->cmd; + + /* transform 6-byte read/write commands to the 10-byte version */ + if (c[0] == READ_6 || c[0] == WRITE_6) { + c[8] = c[4]; + c[5] = c[3]; + c[4] = c[2]; + c[3] = c[1] & 0x1f; + c[2] = 0; + c[1] &= 0xe0; + c[0] += (READ_10 - READ_6); + scsi_req(rq)->cmd_len = 10; + return true; + } + + /* + * it's silly to pretend we understand 6-byte sense commands, just + * reject with ILLEGAL_REQUEST and the caller should take the + * appropriate action + */ + if (c[0] == MODE_SENSE || c[0] == MODE_SELECT) { + scsi_req(rq)->result = ILLEGAL_REQUEST; + return false; + } + + return true; +} + +static bool ide_cdrom_prep_rq(ide_drive_t *drive, struct request *rq) +{ + if (!blk_rq_is_passthrough(rq)) { + scsi_req_init(scsi_req(rq)); + + return ide_cdrom_prep_fs(drive->queue, rq); + } else if (blk_rq_is_scsi(rq)) + return ide_cdrom_prep_pc(rq); + + return true; +} + static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; @@ -675,7 +762,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) out_end: if (blk_rq_is_scsi(rq) && rc == 0) { scsi_req(rq)->resid_len = 0; - blk_end_request_all(rq, BLK_STS_OK); + blk_mq_end_request(rq, BLK_STS_OK); hwif->rq = NULL; } else { if (sense && uptodate) @@ -705,6 +792,8 @@ out_end: if (sense && rc == 2) ide_error(drive, "request sense failure", stat); } + + ide_cd_free_sense(drive); return ide_stopped; } @@ -729,7 +818,7 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq) * We may be retrying this request after an error. Fix up any * weirdness which might be present in the request packet. */ - q->prep_rq_fn(q, rq); + ide_cdrom_prep_rq(drive, rq); } /* fs requests *must* be hardware frame aligned */ @@ -1323,82 +1412,6 @@ static int ide_cdrom_probe_capabilities(ide_drive_t *drive) return nslots; } -/* standard prep_rq_fn that builds 10 byte cmds */ -static int ide_cdrom_prep_fs(struct request_queue *q, struct request *rq) -{ - int hard_sect = queue_logical_block_size(q); - long block = (long)blk_rq_pos(rq) / (hard_sect >> 9); - unsigned long blocks = blk_rq_sectors(rq) / (hard_sect >> 9); - struct scsi_request *req = scsi_req(rq); - - q->initialize_rq_fn(rq); - - if (rq_data_dir(rq) == READ) - req->cmd[0] = GPCMD_READ_10; - else - req->cmd[0] = GPCMD_WRITE_10; - - /* - * fill in lba - */ - req->cmd[2] = (block >> 24) & 0xff; - req->cmd[3] = (block >> 16) & 0xff; - req->cmd[4] = (block >> 8) & 0xff; - req->cmd[5] = block & 0xff; - - /* - * and transfer length - */ - req->cmd[7] = (blocks >> 8) & 0xff; - req->cmd[8] = blocks & 0xff; - req->cmd_len = 10; - return BLKPREP_OK; -} - -/* - * Most of the SCSI commands are supported directly by ATAPI devices. - * This transform handles the few exceptions. - */ -static int ide_cdrom_prep_pc(struct request *rq) -{ - u8 *c = scsi_req(rq)->cmd; - - /* transform 6-byte read/write commands to the 10-byte version */ - if (c[0] == READ_6 || c[0] == WRITE_6) { - c[8] = c[4]; - c[5] = c[3]; - c[4] = c[2]; - c[3] = c[1] & 0x1f; - c[2] = 0; - c[1] &= 0xe0; - c[0] += (READ_10 - READ_6); - scsi_req(rq)->cmd_len = 10; - return BLKPREP_OK; - } - - /* - * it's silly to pretend we understand 6-byte sense commands, just - * reject with ILLEGAL_REQUEST and the caller should take the - * appropriate action - */ - if (c[0] == MODE_SENSE || c[0] == MODE_SELECT) { - scsi_req(rq)->result = ILLEGAL_REQUEST; - return BLKPREP_KILL; - } - - return BLKPREP_OK; -} - -static int ide_cdrom_prep_fn(struct request_queue *q, struct request *rq) -{ - if (!blk_rq_is_passthrough(rq)) - return ide_cdrom_prep_fs(q, rq); - else if (blk_rq_is_scsi(rq)) - return ide_cdrom_prep_pc(rq); - - return 0; -} - struct cd_list_entry { const char *id_model; const char *id_firmware; @@ -1508,7 +1521,7 @@ static int ide_cdrom_setup(ide_drive_t *drive) ide_debug_log(IDE_DBG_PROBE, "enter"); - blk_queue_prep_rq(q, ide_cdrom_prep_fn); + drive->prep_rq = ide_cdrom_prep_rq; blk_queue_dma_alignment(q, 31); blk_queue_update_dma_pad(q, 15); @@ -1569,7 +1582,7 @@ static void ide_cd_release(struct device *dev) if (devinfo->handle == drive) unregister_cdrom(devinfo); drive->driver_data = NULL; - blk_queue_prep_rq(drive->queue, NULL); + drive->prep_rq = NULL; g->private_data = NULL; put_disk(g); kfree(info); diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c index f4f8afdf8bbe..f2f93ed40356 100644 --- a/drivers/ide/ide-devsets.c +++ b/drivers/ide/ide-devsets.c @@ -171,7 +171,7 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting, scsi_req(rq)->cmd_len = 5; scsi_req(rq)->cmd[0] = REQ_DEVSET_EXEC; *(int *)&scsi_req(rq)->cmd[1] = arg; - rq->special = setting->set; + ide_req(rq)->special = setting->set; blk_execute_rq(q, NULL, rq, 0); ret = scsi_req(rq)->result; @@ -182,7 +182,7 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting, ide_startstop_t ide_do_devset(ide_drive_t *drive, struct request *rq) { - int err, (*setfunc)(ide_drive_t *, int) = rq->special; + int err, (*setfunc)(ide_drive_t *, int) = ide_req(rq)->special; err = setfunc(drive, *(int *)&scsi_req(rq)->cmd[1]); if (err) diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index e3b4e659082d..197912af5c2f 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -427,16 +427,15 @@ static void ide_disk_unlock_native_capacity(ide_drive_t *drive) drive->dev_flags |= IDE_DFLAG_NOHPA; /* disable HPA on resume */ } -static int idedisk_prep_fn(struct request_queue *q, struct request *rq) +static bool idedisk_prep_rq(ide_drive_t *drive, struct request *rq) { - ide_drive_t *drive = q->queuedata; struct ide_cmd *cmd; if (req_op(rq) != REQ_OP_FLUSH) - return BLKPREP_OK; + return true; - if (rq->special) { - cmd = rq->special; + if (ide_req(rq)->special) { + cmd = ide_req(rq)->special; memset(cmd, 0, sizeof(*cmd)); } else { cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC); @@ -456,10 +455,10 @@ static int idedisk_prep_fn(struct request_queue *q, struct request *rq) rq->cmd_flags &= ~REQ_OP_MASK; rq->cmd_flags |= REQ_OP_DRV_OUT; ide_req(rq)->type = ATA_PRIV_TASKFILE; - rq->special = cmd; + ide_req(rq)->special = cmd; cmd->rq = rq; - return BLKPREP_OK; + return true; } ide_devset_get(multcount, mult_count); @@ -548,7 +547,7 @@ static void update_flush(ide_drive_t *drive) if (barrier) { wc = true; - blk_queue_prep_rq(drive->queue, idedisk_prep_fn); + drive->prep_rq = idedisk_prep_rq; } } diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c index 47d5f3379748..e1323e058454 100644 --- a/drivers/ide/ide-eh.c +++ b/drivers/ide/ide-eh.c @@ -125,7 +125,7 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat) /* retry only "normal" I/O: */ if (blk_rq_is_passthrough(rq)) { if (ata_taskfile_request(rq)) { - struct ide_cmd *cmd = rq->special; + struct ide_cmd *cmd = ide_req(rq)->special; if (cmd) ide_complete_cmd(drive, cmd, stat, err); diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index a8df300f949c..780d33ccc5d8 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -276,7 +276,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, switch (ide_req(rq)->type) { case ATA_PRIV_MISC: case ATA_PRIV_SENSE: - pc = (struct ide_atapi_pc *)rq->special; + pc = (struct ide_atapi_pc *)ide_req(rq)->special; break; default: BUG(); diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 0d93e0cfbeaf..8445b484ae69 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -67,7 +67,15 @@ int ide_end_rq(ide_drive_t *drive, struct request *rq, blk_status_t error, ide_dma_on(drive); } - return blk_end_request(rq, error, nr_bytes); + if (!blk_update_request(rq, error, nr_bytes)) { + if (rq == drive->sense_rq) + drive->sense_rq = NULL; + + __blk_mq_end_request(rq, error); + return 0; + } + + return 1; } EXPORT_SYMBOL_GPL(ide_end_rq); @@ -103,7 +111,7 @@ void ide_complete_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat, u8 err) } if (rq && ata_taskfile_request(rq)) { - struct ide_cmd *orig_cmd = rq->special; + struct ide_cmd *orig_cmd = ide_req(rq)->special; if (cmd->tf_flags & IDE_TFLAG_DYN) kfree(orig_cmd); @@ -253,7 +261,7 @@ EXPORT_SYMBOL_GPL(ide_init_sg_cmd); static ide_startstop_t execute_drive_cmd (ide_drive_t *drive, struct request *rq) { - struct ide_cmd *cmd = rq->special; + struct ide_cmd *cmd = ide_req(rq)->special; if (cmd) { if (cmd->protocol == ATA_PROT_PIO) { @@ -307,8 +315,6 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) { ide_startstop_t startstop; - BUG_ON(!(rq->rq_flags & RQF_STARTED)); - #ifdef DEBUG printk("%s: start_request: current=0x%08lx\n", drive->hwif->name, (unsigned long) rq); @@ -320,6 +326,9 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) goto kill_rq; } + if (drive->prep_rq && !drive->prep_rq(drive, rq)) + return ide_stopped; + if (ata_pm_request(rq)) ide_check_pm_state(drive, rq); @@ -343,7 +352,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) if (ata_taskfile_request(rq)) return execute_drive_cmd(drive, rq); else if (ata_pm_request(rq)) { - struct ide_pm_state *pm = rq->special; + struct ide_pm_state *pm = ide_req(rq)->special; #ifdef DEBUG_PM printk("%s: start_power_step(step: %d)\n", drive->name, pm->pm_step); @@ -430,44 +439,42 @@ static inline void ide_unlock_host(struct ide_host *host) } } -static void __ide_requeue_and_plug(struct request_queue *q, struct request *rq) -{ - if (rq) - blk_requeue_request(q, rq); - if (rq || blk_peek_request(q)) { - /* Use 3ms as that was the old plug delay */ - blk_delay_queue(q, 3); - } -} - void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq) { struct request_queue *q = drive->queue; - unsigned long flags; - spin_lock_irqsave(q->queue_lock, flags); - __ide_requeue_and_plug(q, rq); - spin_unlock_irqrestore(q->queue_lock, flags); + /* Use 3ms as that was the old plug delay */ + if (rq) { + blk_mq_requeue_request(rq, false); + blk_mq_delay_kick_requeue_list(q, 3); + } else + blk_mq_delay_run_hw_queue(q->queue_hw_ctx[0], 3); } /* * Issue a new request to a device. */ -void do_ide_request(struct request_queue *q) +blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - ide_drive_t *drive = q->queuedata; + ide_drive_t *drive = hctx->queue->queuedata; ide_hwif_t *hwif = drive->hwif; struct ide_host *host = hwif->host; - struct request *rq = NULL; + struct request *rq = bd->rq; ide_startstop_t startstop; - spin_unlock_irq(q->queue_lock); + if (!blk_rq_is_passthrough(rq) && !(rq->rq_flags & RQF_DONTPREP)) { + rq->rq_flags |= RQF_DONTPREP; + ide_req(rq)->special = NULL; + } /* HLD do_request() callback might sleep, make sure it's okay */ might_sleep(); if (ide_lock_host(host, hwif)) - goto plug_device_2; + return BLK_STS_DEV_RESOURCE; + + blk_mq_start_request(rq); spin_lock_irq(&hwif->lock); @@ -503,21 +510,16 @@ repeat: hwif->cur_dev = drive; drive->dev_flags &= ~(IDE_DFLAG_SLEEPING | IDE_DFLAG_PARKED); - spin_unlock_irq(&hwif->lock); - spin_lock_irq(q->queue_lock); /* * we know that the queue isn't empty, but this can happen - * if the q->prep_rq_fn() decides to kill a request + * if ->prep_rq() decides to kill a request */ - if (!rq) - rq = blk_fetch_request(drive->queue); - - spin_unlock_irq(q->queue_lock); - spin_lock_irq(&hwif->lock); - if (!rq) { - ide_unlock_port(hwif); - goto out; + rq = bd->rq; + if (!rq) { + ide_unlock_port(hwif); + goto out; + } } /* @@ -551,23 +553,24 @@ repeat: if (startstop == ide_stopped) { rq = hwif->rq; hwif->rq = NULL; - goto repeat; + if (rq) + goto repeat; + ide_unlock_port(hwif); + goto out; } - } else - goto plug_device; + } else { +plug_device: + spin_unlock_irq(&hwif->lock); + ide_unlock_host(host); + ide_requeue_and_plug(drive, rq); + return BLK_STS_OK; + } + out: spin_unlock_irq(&hwif->lock); if (rq == NULL) ide_unlock_host(host); - spin_lock_irq(q->queue_lock); - return; - -plug_device: - spin_unlock_irq(&hwif->lock); - ide_unlock_host(host); -plug_device_2: - spin_lock_irq(q->queue_lock); - __ide_requeue_and_plug(q, rq); + return BLK_STS_OK; } static int drive_is_ready(ide_drive_t *drive) @@ -887,3 +890,16 @@ void ide_pad_transfer(ide_drive_t *drive, int write, int len) } } EXPORT_SYMBOL_GPL(ide_pad_transfer); + +void ide_insert_request_head(ide_drive_t *drive, struct request *rq) +{ + ide_hwif_t *hwif = drive->hwif; + unsigned long flags; + + spin_lock_irqsave(&hwif->lock, flags); + list_add_tail(&rq->queuelist, &drive->rq_list); + spin_unlock_irqrestore(&hwif->lock, flags); + + kblockd_schedule_work(&drive->rq_work); +} +EXPORT_SYMBOL_GPL(ide_insert_request_head); diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c index 622f0edb3945..102aa3bc3e7f 100644 --- a/drivers/ide/ide-park.c +++ b/drivers/ide/ide-park.c @@ -27,7 +27,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) spin_unlock_irq(&hwif->lock); if (start_queue) - blk_run_queue(q); + blk_mq_run_hw_queues(q, true); return; } spin_unlock_irq(&hwif->lock); @@ -36,7 +36,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) scsi_req(rq)->cmd[0] = REQ_PARK_HEADS; scsi_req(rq)->cmd_len = 1; ide_req(rq)->type = ATA_PRIV_MISC; - rq->special = &timeout; + ide_req(rq)->special = &timeout; blk_execute_rq(q, NULL, rq, 1); rc = scsi_req(rq)->result ? -EIO : 0; blk_put_request(rq); @@ -54,7 +54,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) scsi_req(rq)->cmd[0] = REQ_UNPARK_HEADS; scsi_req(rq)->cmd_len = 1; ide_req(rq)->type = ATA_PRIV_MISC; - elv_add_request(q, rq, ELEVATOR_INSERT_FRONT); + ide_insert_request_head(drive, rq); out: return; @@ -67,7 +67,7 @@ ide_startstop_t ide_do_park_unpark(ide_drive_t *drive, struct request *rq) memset(&cmd, 0, sizeof(cmd)); if (scsi_req(rq)->cmd[0] == REQ_PARK_HEADS) { - drive->sleep = *(unsigned long *)rq->special; + drive->sleep = *(unsigned long *)ide_req(rq)->special; drive->dev_flags |= IDE_DFLAG_SLEEPING; tf->command = ATA_CMD_IDLEIMMEDIATE; tf->feature = 0x44; diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c index 59217aa1d1fb..192e6c65d34e 100644 --- a/drivers/ide/ide-pm.c +++ b/drivers/ide/ide-pm.c @@ -21,7 +21,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) memset(&rqpm, 0, sizeof(rqpm)); rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0); ide_req(rq)->type = ATA_PRIV_PM_SUSPEND; - rq->special = &rqpm; + ide_req(rq)->special = &rqpm; rqpm.pm_step = IDE_PM_START_SUSPEND; if (mesg.event == PM_EVENT_PRETHAW) mesg.event = PM_EVENT_FREEZE; @@ -40,32 +40,17 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) return ret; } -static void ide_end_sync_rq(struct request *rq, blk_status_t error) -{ - complete(rq->end_io_data); -} - static int ide_pm_execute_rq(struct request *rq) { struct request_queue *q = rq->q; - DECLARE_COMPLETION_ONSTACK(wait); - rq->end_io_data = &wait; - rq->end_io = ide_end_sync_rq; - - spin_lock_irq(q->queue_lock); if (unlikely(blk_queue_dying(q))) { rq->rq_flags |= RQF_QUIET; scsi_req(rq)->result = -ENXIO; - __blk_end_request_all(rq, BLK_STS_OK); - spin_unlock_irq(q->queue_lock); + blk_mq_end_request(rq, BLK_STS_OK); return -ENXIO; } - __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT); - __blk_run_queue_uncond(q); - spin_unlock_irq(q->queue_lock); - - wait_for_completion_io(&wait); + blk_execute_rq(q, NULL, rq, true); return scsi_req(rq)->result ? -EIO : 0; } @@ -79,6 +64,8 @@ int generic_ide_resume(struct device *dev) struct ide_pm_state rqpm; int err; + blk_mq_start_stopped_hw_queues(drive->queue, true); + if (ide_port_acpi(hwif)) { /* call ACPI _PS0 / _STM only once */ if ((drive->dn & 1) == 0 || pair == NULL) { @@ -92,7 +79,7 @@ int generic_ide_resume(struct device *dev) memset(&rqpm, 0, sizeof(rqpm)); rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_PREEMPT); ide_req(rq)->type = ATA_PRIV_PM_RESUME; - rq->special = &rqpm; + ide_req(rq)->special = &rqpm; rqpm.pm_step = IDE_PM_START_RESUME; rqpm.pm_state = PM_EVENT_ON; @@ -111,7 +98,7 @@ int generic_ide_resume(struct device *dev) void ide_complete_power_step(ide_drive_t *drive, struct request *rq) { - struct ide_pm_state *pm = rq->special; + struct ide_pm_state *pm = ide_req(rq)->special; #ifdef DEBUG_PM printk(KERN_INFO "%s: complete_power_step(step: %d)\n", @@ -141,7 +128,7 @@ void ide_complete_power_step(ide_drive_t *drive, struct request *rq) ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq) { - struct ide_pm_state *pm = rq->special; + struct ide_pm_state *pm = ide_req(rq)->special; struct ide_cmd cmd = { }; switch (pm->pm_step) { @@ -213,8 +200,7 @@ out_do_tf: void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq) { struct request_queue *q = drive->queue; - struct ide_pm_state *pm = rq->special; - unsigned long flags; + struct ide_pm_state *pm = ide_req(rq)->special; ide_complete_power_step(drive, rq); if (pm->pm_step != IDE_PM_COMPLETED) @@ -224,22 +210,19 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq) printk("%s: completing PM request, %s\n", drive->name, (ide_req(rq)->type == ATA_PRIV_PM_SUSPEND) ? "suspend" : "resume"); #endif - spin_lock_irqsave(q->queue_lock, flags); if (ide_req(rq)->type == ATA_PRIV_PM_SUSPEND) - blk_stop_queue(q); + blk_mq_stop_hw_queues(q); else drive->dev_flags &= ~IDE_DFLAG_BLOCKED; - spin_unlock_irqrestore(q->queue_lock, flags); drive->hwif->rq = NULL; - if (blk_end_request(rq, BLK_STS_OK, 0)) - BUG(); + blk_mq_end_request(rq, BLK_STS_OK); } void ide_check_pm_state(ide_drive_t *drive, struct request *rq) { - struct ide_pm_state *pm = rq->special; + struct ide_pm_state *pm = ide_req(rq)->special; if (blk_rq_is_private(rq) && ide_req(rq)->type == ATA_PRIV_PM_SUSPEND && @@ -260,7 +243,6 @@ void ide_check_pm_state(ide_drive_t *drive, struct request *rq) ide_hwif_t *hwif = drive->hwif; const struct ide_tp_ops *tp_ops = hwif->tp_ops; struct request_queue *q = drive->queue; - unsigned long flags; int rc; #ifdef DEBUG_PM printk("%s: Wakeup request inited, waiting for !BSY...\n", drive->name); @@ -274,8 +256,6 @@ void ide_check_pm_state(ide_drive_t *drive, struct request *rq) if (rc) printk(KERN_WARNING "%s: drive not ready on wakeup\n", drive->name); - spin_lock_irqsave(q->queue_lock, flags); - blk_start_queue(q); - spin_unlock_irqrestore(q->queue_lock, flags); + blk_mq_start_hw_queues(q); } } diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 3b75a7b7a284..63627be0811a 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -746,10 +746,16 @@ static void ide_initialize_rq(struct request *rq) { struct ide_request *req = blk_mq_rq_to_pdu(rq); + req->special = NULL; scsi_req_init(&req->sreq); req->sreq.sense = req->sense; } +static const struct blk_mq_ops ide_mq_ops = { + .queue_rq = ide_queue_rq, + .initialize_rq_fn = ide_initialize_rq, +}; + /* * init request queue */ @@ -759,6 +765,7 @@ static int ide_init_queue(ide_drive_t *drive) ide_hwif_t *hwif = drive->hwif; int max_sectors = 256; int max_sg_entries = PRD_ENTRIES; + struct blk_mq_tag_set *set; /* * Our default set up assumes the normal IDE case, @@ -767,19 +774,26 @@ static int ide_init_queue(ide_drive_t *drive) * limits and LBA48 we could raise it but as yet * do not. */ - q = blk_alloc_queue_node(GFP_KERNEL, hwif_to_node(hwif), NULL); - if (!q) + + set = &drive->tag_set; + set->ops = &ide_mq_ops; + set->nr_hw_queues = 1; + set->queue_depth = 32; + set->reserved_tags = 1; + set->cmd_size = sizeof(struct ide_request); + set->numa_node = hwif_to_node(hwif); + set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING; + if (blk_mq_alloc_tag_set(set)) return 1; - q->request_fn = do_ide_request; - q->initialize_rq_fn = ide_initialize_rq; - q->cmd_size = sizeof(struct ide_request); - blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q); - if (blk_init_allocated_queue(q) < 0) { - blk_cleanup_queue(q); + q = blk_mq_init_queue(set); + if (IS_ERR(q)) { + blk_mq_free_tag_set(set); return 1; } + blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q); + q->queuedata = drive; blk_queue_segment_boundary(q, 0xffff); @@ -965,8 +979,12 @@ static void drive_release_dev (struct device *dev) ide_proc_unregister_device(drive); + if (drive->sense_rq) + blk_mq_free_request(drive->sense_rq); + blk_cleanup_queue(drive->queue); drive->queue = NULL; + blk_mq_free_tag_set(&drive->tag_set); drive->dev_flags &= ~IDE_DFLAG_PRESENT; @@ -1133,6 +1151,28 @@ static void ide_port_cable_detect(ide_hwif_t *hwif) } } +/* + * Deferred request list insertion handler + */ +static void drive_rq_insert_work(struct work_struct *work) +{ + ide_drive_t *drive = container_of(work, ide_drive_t, rq_work); + ide_hwif_t *hwif = drive->hwif; + struct request *rq; + LIST_HEAD(list); + + spin_lock_irq(&hwif->lock); + if (!list_empty(&drive->rq_list)) + list_splice_init(&drive->rq_list, &list); + spin_unlock_irq(&hwif->lock); + + while (!list_empty(&list)) { + rq = list_first_entry(&list, struct request, queuelist); + list_del_init(&rq->queuelist); + blk_execute_rq_nowait(drive->queue, rq->rq_disk, rq, true, NULL); + } +} + static const u8 ide_hwif_to_major[] = { IDE0_MAJOR, IDE1_MAJOR, IDE2_MAJOR, IDE3_MAJOR, IDE4_MAJOR, IDE5_MAJOR, IDE6_MAJOR, IDE7_MAJOR, IDE8_MAJOR, IDE9_MAJOR }; @@ -1145,12 +1185,10 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif) ide_port_for_each_dev(i, drive, hwif) { u8 j = (hwif->index * MAX_DRIVES) + i; u16 *saved_id = drive->id; - struct request *saved_sense_rq = drive->sense_rq; memset(drive, 0, sizeof(*drive)); memset(saved_id, 0, SECTOR_SIZE); drive->id = saved_id; - drive->sense_rq = saved_sense_rq; drive->media = ide_disk; drive->select = (i << 4) | ATA_DEVICE_OBS; @@ -1166,6 +1204,9 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif) INIT_LIST_HEAD(&drive->list); init_completion(&drive->gendev_rel_comp); + + INIT_WORK(&drive->rq_work, drive_rq_insert_work); + INIT_LIST_HEAD(&drive->rq_list); } } @@ -1255,7 +1296,6 @@ static void ide_port_free_devices(ide_hwif_t *hwif) int i; ide_port_for_each_dev(i, drive, hwif) { - kfree(drive->sense_rq); kfree(drive->id); kfree(drive); } @@ -1283,17 +1323,10 @@ static int ide_port_alloc_devices(ide_hwif_t *hwif, int node) if (drive->id == NULL) goto out_free_drive; - drive->sense_rq = kmalloc(sizeof(struct request) + - sizeof(struct ide_request), GFP_KERNEL); - if (!drive->sense_rq) - goto out_free_id; - hwif->devices[i] = drive; } return 0; -out_free_id: - kfree(drive->id); out_free_drive: kfree(drive); out_nomem: diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 34c1165226a4..db1a65f4b490 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -639,7 +639,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, goto out; } if (req->cmd[13] & REQ_IDETAPE_PC1) { - pc = (struct ide_atapi_pc *)rq->special; + pc = (struct ide_atapi_pc *)ide_req(rq)->special; req->cmd[13] &= ~(REQ_IDETAPE_PC1); req->cmd[13] |= REQ_IDETAPE_PC2; goto out; diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index c21d5c50ae3a..17b2e379e872 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -440,7 +440,7 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf, goto put_req; } - rq->special = cmd; + ide_req(rq)->special = cmd; cmd->rq = rq; blk_execute_rq(drive->queue, NULL, rq, 0); diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 867cee5e27b2..69dee36e0e89 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -38,4 +38,4 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ uverbs_std_types_cq.o \ uverbs_std_types_flow_action.o uverbs_std_types_dm.o \ uverbs_std_types_mr.o uverbs_std_types_counters.o \ - uverbs_uapi.o + uverbs_uapi.o uverbs_std_types_device.o diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index 324ef85a13b6..f82b4260de42 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -137,13 +137,13 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh * err2: ib_free_send_mad(send_buf); err1: - rdma_destroy_ah(ah); + rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE); } static void agent_send_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc) { - rdma_destroy_ah(mad_send_wc->send_buf->ah); + rdma_destroy_ah(mad_send_wc->send_buf->ah, RDMA_DESTROY_AH_SLEEPABLE); ib_free_send_mad(mad_send_wc->send_buf); } diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 5b2fce4a7091..7b04590f307f 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -215,10 +215,6 @@ static void free_gid_entry_locked(struct ib_gid_table_entry *entry) dev_dbg(&device->dev, "%s port=%d index=%d gid %pI6\n", __func__, port_num, entry->attr.index, entry->attr.gid.raw); - if (rdma_cap_roce_gid_table(device, port_num) && - entry->state != GID_TABLE_ENTRY_INVALID) - device->del_gid(&entry->attr, &entry->context); - write_lock_irq(&table->rwlock); /* @@ -324,7 +320,7 @@ static int add_roce_gid(struct ib_gid_table_entry *entry) return -EINVAL; } if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) { - ret = attr->device->add_gid(attr, &entry->context); + ret = attr->device->ops.add_gid(attr, &entry->context); if (ret) { dev_err(&attr->device->dev, "%s GID add failed port=%d index=%d\n", @@ -364,6 +360,9 @@ static void del_gid(struct ib_device *ib_dev, u8 port, table->data_vec[ix] = NULL; write_unlock_irq(&table->rwlock); + if (rdma_cap_roce_gid_table(ib_dev, port)) + ib_dev->ops.del_gid(&entry->attr, &entry->context); + put_gid_entry_locked(entry); } @@ -548,8 +547,8 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port, unsigned long mask; int ret; - if (ib_dev->get_netdev) { - idev = ib_dev->get_netdev(ib_dev, port); + if (ib_dev->ops.get_netdev) { + idev = ib_dev->ops.get_netdev(ib_dev, port); if (idev && attr->ndev != idev) { union ib_gid default_gid; @@ -1296,9 +1295,9 @@ static int config_non_roce_gid_cache(struct ib_device *device, mutex_lock(&table->lock); for (i = 0; i < gid_tbl_len; ++i) { - if (!device->query_gid) + if (!device->ops.query_gid) continue; - ret = device->query_gid(device, port, i, &gid_attr.gid); + ret = device->ops.query_gid(device, port, i, &gid_attr.gid); if (ret) { dev_warn(&device->dev, "query_gid failed (%d) for index %d\n", ret, diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index edb2cb758be7..37980c7564c0 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -343,7 +343,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, ret = -ENODEV; goto out; } - ah = rdma_create_ah(mad_agent->qp->pd, &av->ah_attr); + ah = rdma_create_ah(mad_agent->qp->pd, &av->ah_attr, 0); if (IS_ERR(ah)) { ret = PTR_ERR(ah); goto out; @@ -355,7 +355,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, GFP_ATOMIC, IB_MGMT_BASE_VERSION); if (IS_ERR(m)) { - rdma_destroy_ah(ah); + rdma_destroy_ah(ah, 0); ret = PTR_ERR(m); goto out; } @@ -400,7 +400,7 @@ static int cm_create_response_msg_ah(struct cm_port *port, static void cm_free_msg(struct ib_mad_send_buf *msg) { if (msg->ah) - rdma_destroy_ah(msg->ah); + rdma_destroy_ah(msg->ah, 0); if (msg->context[0]) cm_deref_id(msg->context[0]); ib_free_send_mad(msg); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 15d5bb7bf6bb..63a7cc00bae0 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -494,7 +494,7 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv, id_priv->id.route.addr.dev_addr.transport = rdma_node_get_transport(cma_dev->device->node_type); list_add_tail(&id_priv->list, &cma_dev->id_list); - rdma_restrack_add(&id_priv->res); + rdma_restrack_kadd(&id_priv->res); } static void cma_attach_to_dev(struct rdma_id_private *id_priv, diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index 8c2dfb3e294e..3ec2c415bb70 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -33,7 +33,10 @@ #include <linux/module.h> #include <linux/configfs.h> #include <rdma/ib_verbs.h> +#include <rdma/rdma_cm.h> + #include "core_priv.h" +#include "cma_priv.h" struct cma_device; diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h index 194cfe78c447..cf47c69436a7 100644 --- a/drivers/infiniband/core/cma_priv.h +++ b/drivers/infiniband/core/cma_priv.h @@ -94,4 +94,32 @@ struct rdma_id_private { */ struct rdma_restrack_entry res; }; + +#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) +int cma_configfs_init(void); +void cma_configfs_exit(void); +#else +static inline int cma_configfs_init(void) +{ + return 0; +} + +static inline void cma_configfs_exit(void) +{ +} +#endif + +void cma_ref_dev(struct cma_device *dev); +void cma_deref_dev(struct cma_device *dev); +typedef bool (*cma_device_filter)(struct ib_device *, void *); +struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter, + void *cookie); +int cma_get_default_gid_type(struct cma_device *dev, unsigned int port); +int cma_set_default_gid_type(struct cma_device *dev, unsigned int port, + enum ib_gid_type default_gid_type); +int cma_get_default_roce_tos(struct cma_device *dev, unsigned int port); +int cma_set_default_roce_tos(struct cma_device *dev, unsigned int port, + u8 default_roce_tos); +struct ib_device *cma_get_ib_dev(struct cma_device *dev); + #endif /* _CMA_PRIV_H */ diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index bb9007a0cca7..3cd830d52967 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -54,35 +54,6 @@ struct pkey_index_qp_list { struct list_head qp_list; }; -#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) -int cma_configfs_init(void); -void cma_configfs_exit(void); -#else -static inline int cma_configfs_init(void) -{ - return 0; -} - -static inline void cma_configfs_exit(void) -{ -} -#endif -struct cma_device; -void cma_ref_dev(struct cma_device *cma_dev); -void cma_deref_dev(struct cma_device *cma_dev); -typedef bool (*cma_device_filter)(struct ib_device *, void *); -struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter, - void *cookie); -int cma_get_default_gid_type(struct cma_device *cma_dev, - unsigned int port); -int cma_set_default_gid_type(struct cma_device *cma_dev, - unsigned int port, - enum ib_gid_type default_gid_type); -int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port); -int cma_set_default_roce_tos(struct cma_device *a_dev, unsigned int port, - u8 default_roce_tos); -struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev); - int ib_device_register_sysfs(struct ib_device *device, int (*port_callback)(struct ib_device *, u8, struct kobject *)); @@ -244,10 +215,10 @@ static inline int ib_security_modify_qp(struct ib_qp *qp, int qp_attr_mask, struct ib_udata *udata) { - return qp->device->modify_qp(qp->real_qp, - qp_attr, - qp_attr_mask, - udata); + return qp->device->ops.modify_qp(qp->real_qp, + qp_attr, + qp_attr_mask, + udata); } static inline int ib_create_qp_security(struct ib_qp *qp, @@ -296,6 +267,7 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map, #endif struct ib_device *ib_device_get_by_index(u32 ifindex); +void ib_device_put(struct ib_device *device); /* RDMA device netlink */ void nldev_init(void); void nldev_exit(void); @@ -308,10 +280,10 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, { struct ib_qp *qp; - if (!dev->create_qp) + if (!dev->ops.create_qp) return ERR_PTR(-EOPNOTSUPP); - qp = dev->create_qp(pd, attr, udata); + qp = dev->ops.create_qp(pd, attr, udata); if (IS_ERR(qp)) return qp; @@ -325,7 +297,10 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, */ if (attr->qp_type < IB_QPT_XRC_INI) { qp->res.type = RDMA_RESTRACK_QP; - rdma_restrack_add(&qp->res); + if (uobj) + rdma_restrack_uadd(&qp->res); + else + rdma_restrack_kadd(&qp->res); } else qp->res.valid = false; diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index b1e5365ddafa..d61e5e1427c2 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -145,7 +145,7 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, struct ib_cq *cq; int ret = -ENOMEM; - cq = dev->create_cq(dev, &cq_attr, NULL, NULL); + cq = dev->ops.create_cq(dev, &cq_attr, NULL, NULL); if (IS_ERR(cq)) return cq; @@ -162,7 +162,7 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, cq->res.type = RDMA_RESTRACK_CQ; rdma_restrack_set_task(&cq->res, caller); - rdma_restrack_add(&cq->res); + rdma_restrack_kadd(&cq->res); switch (cq->poll_ctx) { case IB_POLL_DIRECT: @@ -193,7 +193,7 @@ out_free_wc: kfree(cq->wc); rdma_restrack_del(&cq->res); out_destroy_cq: - cq->device->destroy_cq(cq); + cq->device->ops.destroy_cq(cq); return ERR_PTR(ret); } EXPORT_SYMBOL(__ib_alloc_cq); @@ -225,7 +225,7 @@ void ib_free_cq(struct ib_cq *cq) kfree(cq->wc); rdma_restrack_del(&cq->res); - ret = cq->device->destroy_cq(cq); + ret = cq->device->ops.destroy_cq(cq); WARN_ON_ONCE(ret); } EXPORT_SYMBOL(ib_free_cq); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 87eb4f2cdd7d..47ab34ee1a9d 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -96,7 +96,7 @@ static struct notifier_block ibdev_lsm_nb = { static int ib_device_check_mandatory(struct ib_device *device) { -#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device, x), #x } +#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device_ops, x), #x } static const struct { size_t offset; char *name; @@ -122,7 +122,8 @@ static int ib_device_check_mandatory(struct ib_device *device) int i; for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) { - if (!*(void **) ((void *) device + mandatory_table[i].offset)) { + if (!*(void **) ((void *) &device->ops + + mandatory_table[i].offset)) { dev_warn(&device->dev, "Device is missing mandatory function %s\n", mandatory_table[i].name); @@ -145,7 +146,8 @@ static struct ib_device *__ib_device_get_by_index(u32 index) } /* - * Caller is responsible to return refrerence count by calling put_device() + * Caller must perform ib_device_put() to return the device reference count + * when ib_device_get_by_index() returns valid device pointer. */ struct ib_device *ib_device_get_by_index(u32 index) { @@ -153,13 +155,21 @@ struct ib_device *ib_device_get_by_index(u32 index) down_read(&lists_rwsem); device = __ib_device_get_by_index(index); - if (device) - get_device(&device->dev); - + if (device) { + /* Do not return a device if unregistration has started. */ + if (!refcount_inc_not_zero(&device->refcount)) + device = NULL; + } up_read(&lists_rwsem); return device; } +void ib_device_put(struct ib_device *device) +{ + if (refcount_dec_and_test(&device->refcount)) + complete(&device->unreg_completion); +} + static struct ib_device *__ib_device_get_by_name(const char *name) { struct ib_device *device; @@ -293,6 +303,8 @@ struct ib_device *ib_alloc_device(size_t size) rwlock_init(&device->client_data_lock); INIT_LIST_HEAD(&device->client_data_list); INIT_LIST_HEAD(&device->port_list); + refcount_set(&device->refcount, 1); + init_completion(&device->unreg_completion); return device; } @@ -362,8 +374,8 @@ static int read_port_immutable(struct ib_device *device) return -ENOMEM; for (port = start_port; port <= end_port; ++port) { - ret = device->get_port_immutable(device, port, - &device->port_immutable[port]); + ret = device->ops.get_port_immutable( + device, port, &device->port_immutable[port]); if (ret) return ret; @@ -375,8 +387,8 @@ static int read_port_immutable(struct ib_device *device) void ib_get_device_fw_str(struct ib_device *dev, char *str) { - if (dev->get_dev_fw_str) - dev->get_dev_fw_str(dev, str); + if (dev->ops.get_dev_fw_str) + dev->ops.get_dev_fw_str(dev, str); else str[0] = '\0'; } @@ -525,7 +537,7 @@ static int setup_device(struct ib_device *device) } memset(&device->attrs, 0, sizeof(device->attrs)); - ret = device->query_device(device, &device->attrs, &uhw); + ret = device->ops.query_device(device, &device->attrs, &uhw); if (ret) { dev_warn(&device->dev, "Couldn't query the device attributes\n"); @@ -641,6 +653,13 @@ void ib_unregister_device(struct ib_device *device) struct ib_client_data *context, *tmp; unsigned long flags; + /* + * Wait for all netlink command callers to finish working on the + * device. + */ + ib_device_put(device); + wait_for_completion(&device->unreg_completion); + mutex_lock(&device_mutex); down_write(&lists_rwsem); @@ -905,14 +924,14 @@ int ib_query_port(struct ib_device *device, return -EINVAL; memset(port_attr, 0, sizeof(*port_attr)); - err = device->query_port(device, port_num, port_attr); + err = device->ops.query_port(device, port_num, port_attr); if (err || port_attr->subnet_prefix) return err; if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND) return 0; - err = device->query_gid(device, port_num, 0, &gid); + err = device->ops.query_gid(device, port_num, 0, &gid); if (err) return err; @@ -946,8 +965,8 @@ void ib_enum_roce_netdev(struct ib_device *ib_dev, if (rdma_protocol_roce(ib_dev, port)) { struct net_device *idev = NULL; - if (ib_dev->get_netdev) - idev = ib_dev->get_netdev(ib_dev, port); + if (ib_dev->ops.get_netdev) + idev = ib_dev->ops.get_netdev(ib_dev, port); if (idev && idev->reg_state >= NETREG_UNREGISTERED) { @@ -1024,7 +1043,10 @@ int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb, int ib_query_pkey(struct ib_device *device, u8 port_num, u16 index, u16 *pkey) { - return device->query_pkey(device, port_num, index, pkey); + if (!rdma_is_port_valid(device, port_num)) + return -EINVAL; + + return device->ops.query_pkey(device, port_num, index, pkey); } EXPORT_SYMBOL(ib_query_pkey); @@ -1041,11 +1063,11 @@ int ib_modify_device(struct ib_device *device, int device_modify_mask, struct ib_device_modify *device_modify) { - if (!device->modify_device) + if (!device->ops.modify_device) return -ENOSYS; - return device->modify_device(device, device_modify_mask, - device_modify); + return device->ops.modify_device(device, device_modify_mask, + device_modify); } EXPORT_SYMBOL(ib_modify_device); @@ -1069,9 +1091,10 @@ int ib_modify_port(struct ib_device *device, if (!rdma_is_port_valid(device, port_num)) return -EINVAL; - if (device->modify_port) - rc = device->modify_port(device, port_num, port_modify_mask, - port_modify); + if (device->ops.modify_port) + rc = device->ops.modify_port(device, port_num, + port_modify_mask, + port_modify); else rc = rdma_protocol_roce(device, port_num) ? 0 : -ENOSYS; return rc; @@ -1198,6 +1221,105 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, } EXPORT_SYMBOL(ib_get_net_dev_by_params); +void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) +{ + struct ib_device_ops *dev_ops = &dev->ops; +#define SET_DEVICE_OP(ptr, name) \ + do { \ + if (ops->name) \ + if (!((ptr)->name)) \ + (ptr)->name = ops->name; \ + } while (0) + + SET_DEVICE_OP(dev_ops, add_gid); + SET_DEVICE_OP(dev_ops, alloc_dm); + SET_DEVICE_OP(dev_ops, alloc_fmr); + SET_DEVICE_OP(dev_ops, alloc_hw_stats); + SET_DEVICE_OP(dev_ops, alloc_mr); + SET_DEVICE_OP(dev_ops, alloc_mw); + SET_DEVICE_OP(dev_ops, alloc_pd); + SET_DEVICE_OP(dev_ops, alloc_rdma_netdev); + SET_DEVICE_OP(dev_ops, alloc_ucontext); + SET_DEVICE_OP(dev_ops, alloc_xrcd); + SET_DEVICE_OP(dev_ops, attach_mcast); + SET_DEVICE_OP(dev_ops, check_mr_status); + SET_DEVICE_OP(dev_ops, create_ah); + SET_DEVICE_OP(dev_ops, create_counters); + SET_DEVICE_OP(dev_ops, create_cq); + SET_DEVICE_OP(dev_ops, create_flow); + SET_DEVICE_OP(dev_ops, create_flow_action_esp); + SET_DEVICE_OP(dev_ops, create_qp); + SET_DEVICE_OP(dev_ops, create_rwq_ind_table); + SET_DEVICE_OP(dev_ops, create_srq); + SET_DEVICE_OP(dev_ops, create_wq); + SET_DEVICE_OP(dev_ops, dealloc_dm); + SET_DEVICE_OP(dev_ops, dealloc_fmr); + SET_DEVICE_OP(dev_ops, dealloc_mw); + SET_DEVICE_OP(dev_ops, dealloc_pd); + SET_DEVICE_OP(dev_ops, dealloc_ucontext); + SET_DEVICE_OP(dev_ops, dealloc_xrcd); + SET_DEVICE_OP(dev_ops, del_gid); + SET_DEVICE_OP(dev_ops, dereg_mr); + SET_DEVICE_OP(dev_ops, destroy_ah); + SET_DEVICE_OP(dev_ops, destroy_counters); + SET_DEVICE_OP(dev_ops, destroy_cq); + SET_DEVICE_OP(dev_ops, destroy_flow); + SET_DEVICE_OP(dev_ops, destroy_flow_action); + SET_DEVICE_OP(dev_ops, destroy_qp); + SET_DEVICE_OP(dev_ops, destroy_rwq_ind_table); + SET_DEVICE_OP(dev_ops, destroy_srq); + SET_DEVICE_OP(dev_ops, destroy_wq); + SET_DEVICE_OP(dev_ops, detach_mcast); + SET_DEVICE_OP(dev_ops, disassociate_ucontext); + SET_DEVICE_OP(dev_ops, drain_rq); + SET_DEVICE_OP(dev_ops, drain_sq); + SET_DEVICE_OP(dev_ops, get_dev_fw_str); + SET_DEVICE_OP(dev_ops, get_dma_mr); + SET_DEVICE_OP(dev_ops, get_hw_stats); + SET_DEVICE_OP(dev_ops, get_link_layer); + SET_DEVICE_OP(dev_ops, get_netdev); + SET_DEVICE_OP(dev_ops, get_port_immutable); + SET_DEVICE_OP(dev_ops, get_vector_affinity); + SET_DEVICE_OP(dev_ops, get_vf_config); + SET_DEVICE_OP(dev_ops, get_vf_stats); + SET_DEVICE_OP(dev_ops, map_mr_sg); + SET_DEVICE_OP(dev_ops, map_phys_fmr); + SET_DEVICE_OP(dev_ops, mmap); + SET_DEVICE_OP(dev_ops, modify_ah); + SET_DEVICE_OP(dev_ops, modify_cq); + SET_DEVICE_OP(dev_ops, modify_device); + SET_DEVICE_OP(dev_ops, modify_flow_action_esp); + SET_DEVICE_OP(dev_ops, modify_port); + SET_DEVICE_OP(dev_ops, modify_qp); + SET_DEVICE_OP(dev_ops, modify_srq); + SET_DEVICE_OP(dev_ops, modify_wq); + SET_DEVICE_OP(dev_ops, peek_cq); + SET_DEVICE_OP(dev_ops, poll_cq); + SET_DEVICE_OP(dev_ops, post_recv); + SET_DEVICE_OP(dev_ops, post_send); + SET_DEVICE_OP(dev_ops, post_srq_recv); + SET_DEVICE_OP(dev_ops, process_mad); + SET_DEVICE_OP(dev_ops, query_ah); + SET_DEVICE_OP(dev_ops, query_device); + SET_DEVICE_OP(dev_ops, query_gid); + SET_DEVICE_OP(dev_ops, query_pkey); + SET_DEVICE_OP(dev_ops, query_port); + SET_DEVICE_OP(dev_ops, query_qp); + SET_DEVICE_OP(dev_ops, query_srq); + SET_DEVICE_OP(dev_ops, rdma_netdev_get_params); + SET_DEVICE_OP(dev_ops, read_counters); + SET_DEVICE_OP(dev_ops, reg_dm_mr); + SET_DEVICE_OP(dev_ops, reg_user_mr); + SET_DEVICE_OP(dev_ops, req_ncomp_notif); + SET_DEVICE_OP(dev_ops, req_notify_cq); + SET_DEVICE_OP(dev_ops, rereg_user_mr); + SET_DEVICE_OP(dev_ops, resize_cq); + SET_DEVICE_OP(dev_ops, set_vf_guid); + SET_DEVICE_OP(dev_ops, set_vf_link_state); + SET_DEVICE_OP(dev_ops, unmap_fmr); +} +EXPORT_SYMBOL(ib_set_device_ops); + static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = { [RDMA_NL_LS_OP_RESOLVE] = { .doit = ib_nl_handle_resolve_resp, diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index 83ba0068e8bb..7d841b689a1e 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -211,8 +211,8 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, return ERR_PTR(-EINVAL); device = pd->device; - if (!device->alloc_fmr || !device->dealloc_fmr || - !device->map_phys_fmr || !device->unmap_fmr) { + if (!device->ops.alloc_fmr || !device->ops.dealloc_fmr || + !device->ops.map_phys_fmr || !device->ops.unmap_fmr) { dev_info(&device->dev, "Device does not support FMRs\n"); return ERR_PTR(-ENOSYS); } @@ -474,7 +474,7 @@ EXPORT_SYMBOL(ib_fmr_pool_map_phys); * Unmap an FMR. The FMR mapping may remain valid until the FMR is * reused (or until ib_flush_fmr_pool() is called). */ -int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr) +void ib_fmr_pool_unmap(struct ib_pool_fmr *fmr) { struct ib_fmr_pool *pool; unsigned long flags; @@ -503,7 +503,5 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr) #endif spin_unlock_irqrestore(&pool->pool_lock, flags); - - return 0; } EXPORT_SYMBOL(ib_fmr_pool_unmap); diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index ba668d49c751..476abc74178e 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -502,17 +502,21 @@ static void iw_cm_check_wildcard(struct sockaddr_storage *pm_addr, */ static int iw_cm_map(struct iw_cm_id *cm_id, bool active) { + const char *devname = dev_name(&cm_id->device->dev); + const char *ifname = cm_id->device->iwcm->ifname; struct iwpm_dev_data pm_reg_msg; struct iwpm_sa_data pm_msg; int status; + if (strlen(devname) >= sizeof(pm_reg_msg.dev_name) || + strlen(ifname) >= sizeof(pm_reg_msg.if_name)) + return -EINVAL; + cm_id->m_local_addr = cm_id->local_addr; cm_id->m_remote_addr = cm_id->remote_addr; - memcpy(pm_reg_msg.dev_name, dev_name(&cm_id->device->dev), - sizeof(pm_reg_msg.dev_name)); - memcpy(pm_reg_msg.if_name, cm_id->device->iwcm->ifname, - sizeof(pm_reg_msg.if_name)); + strncpy(pm_reg_msg.dev_name, devname, sizeof(pm_reg_msg.dev_name)); + strncpy(pm_reg_msg.if_name, ifname, sizeof(pm_reg_msg.if_name)); if (iwpm_register_pid(&pm_reg_msg, RDMA_NL_IWCM) || !iwpm_valid_pid()) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index d7025cd5be28..7870823bac47 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -888,10 +888,10 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, } /* No GRH for DR SMP */ - ret = device->process_mad(device, 0, port_num, &mad_wc, NULL, - (const struct ib_mad_hdr *)smp, mad_size, - (struct ib_mad_hdr *)mad_priv->mad, - &mad_size, &out_mad_pkey_index); + ret = device->ops.process_mad(device, 0, port_num, &mad_wc, NULL, + (const struct ib_mad_hdr *)smp, mad_size, + (struct ib_mad_hdr *)mad_priv->mad, + &mad_size, &out_mad_pkey_index); switch (ret) { case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY: @@ -2305,14 +2305,12 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc) } /* Give driver "right of first refusal" on incoming MAD */ - if (port_priv->device->process_mad) { - ret = port_priv->device->process_mad(port_priv->device, 0, - port_priv->port_num, - wc, &recv->grh, - (const struct ib_mad_hdr *)recv->mad, - recv->mad_size, - (struct ib_mad_hdr *)response->mad, - &mad_size, &resp_mad_pkey_index); + if (port_priv->device->ops.process_mad) { + ret = port_priv->device->ops.process_mad( + port_priv->device, 0, port_priv->port_num, wc, + &recv->grh, (const struct ib_mad_hdr *)recv->mad, + recv->mad_size, (struct ib_mad_hdr *)response->mad, + &mad_size, &resp_mad_pkey_index); if (opa) wc->pkey_index = resp_mad_pkey_index; diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index e5cf09c66fe6..5ec57abc0849 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -81,7 +81,7 @@ static void destroy_rmpp_recv(struct mad_rmpp_recv *rmpp_recv) { deref_rmpp_recv(rmpp_recv); wait_for_completion(&rmpp_recv->comp); - rdma_destroy_ah(rmpp_recv->ah); + rdma_destroy_ah(rmpp_recv->ah, RDMA_DESTROY_AH_SLEEPABLE); kfree(rmpp_recv); } @@ -171,7 +171,7 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent, hdr_len, 0, GFP_KERNEL, IB_MGMT_BASE_VERSION); if (IS_ERR(msg)) - rdma_destroy_ah(ah); + rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE); else { msg->ah = ah; msg->context[0] = ah; @@ -201,7 +201,7 @@ static void ack_ds_ack(struct ib_mad_agent_private *agent, ret = ib_post_send_mad(msg, NULL); if (ret) { - rdma_destroy_ah(msg->ah); + rdma_destroy_ah(msg->ah, RDMA_DESTROY_AH_SLEEPABLE); ib_free_send_mad(msg); } } @@ -209,7 +209,8 @@ static void ack_ds_ack(struct ib_mad_agent_private *agent, void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc) { if (mad_send_wc->send_buf->context[0] == mad_send_wc->send_buf->ah) - rdma_destroy_ah(mad_send_wc->send_buf->ah); + rdma_destroy_ah(mad_send_wc->send_buf->ah, + RDMA_DESTROY_AH_SLEEPABLE); ib_free_send_mad(mad_send_wc->send_buf); } @@ -237,7 +238,7 @@ static void nack_recv(struct ib_mad_agent_private *agent, ret = ib_post_send_mad(msg, NULL); if (ret) { - rdma_destroy_ah(msg->ah); + rdma_destroy_ah(msg->ah, RDMA_DESTROY_AH_SLEEPABLE); ib_free_send_mad(msg); } } diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 573399e3ccc1..e600fc23ae62 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -227,6 +227,7 @@ static int fill_port_info(struct sk_buff *msg, struct net_device *netdev = NULL; struct ib_port_attr attr; int ret; + u64 cap_flags = 0; if (fill_nldev_handle(msg, device)) return -EMSGSIZE; @@ -239,10 +240,12 @@ static int fill_port_info(struct sk_buff *msg, return ret; if (rdma_protocol_ib(device, port)) { - BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64)); + BUILD_BUG_ON((sizeof(attr.port_cap_flags) + + sizeof(attr.port_cap_flags2)) > sizeof(u64)); + cap_flags = attr.port_cap_flags | + ((u64)attr.port_cap_flags2 << 32); if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, - (u64)attr.port_cap_flags, - RDMA_NLDEV_ATTR_PAD)) + cap_flags, RDMA_NLDEV_ATTR_PAD)) return -EMSGSIZE; if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX, attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD)) @@ -259,8 +262,8 @@ static int fill_port_info(struct sk_buff *msg, if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state)) return -EMSGSIZE; - if (device->get_netdev) - netdev = device->get_netdev(device, port); + if (device->ops.get_netdev) + netdev = device->ops.get_netdev(device, port); if (netdev && net_eq(dev_net(netdev), net)) { ret = nla_put_u32(msg, @@ -308,6 +311,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device) [RDMA_RESTRACK_QP] = "qp", [RDMA_RESTRACK_CM_ID] = "cm_id", [RDMA_RESTRACK_MR] = "mr", + [RDMA_RESTRACK_CTX] = "ctx", }; struct rdma_restrack_root *res = &device->res; @@ -636,13 +640,13 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nlmsg_end(msg, nlh); - put_device(&device->dev); + ib_device_put(device); return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); err_free: nlmsg_free(msg); err: - put_device(&device->dev); + ib_device_put(device); return err; } @@ -672,7 +676,7 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, err = ib_device_rename(device, name); } - put_device(&device->dev); + ib_device_put(device); return err; } @@ -756,14 +760,14 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, goto err_free; nlmsg_end(msg, nlh); - put_device(&device->dev); + ib_device_put(device); return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); err_free: nlmsg_free(msg); err: - put_device(&device->dev); + ib_device_put(device); return err; } @@ -820,7 +824,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb, } out: - put_device(&device->dev); + ib_device_put(device); cb->args[0] = idx; return skb->len; } @@ -859,13 +863,13 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, goto err_free; nlmsg_end(msg, nlh); - put_device(&device->dev); + ib_device_put(device); return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); err_free: nlmsg_free(msg); err: - put_device(&device->dev); + ib_device_put(device); return ret; } @@ -1058,7 +1062,7 @@ next: idx++; if (!filled) goto err; - put_device(&device->dev); + ib_device_put(device); return skb->len; res_err: @@ -1069,7 +1073,7 @@ err: nlmsg_cancel(skb, nlh); err_index: - put_device(&device->dev); + ib_device_put(device); return ret; } diff --git a/drivers/infiniband/core/opa_smi.h b/drivers/infiniband/core/opa_smi.h index 3bfab3505a29..af4879bdf3d6 100644 --- a/drivers/infiniband/core/opa_smi.h +++ b/drivers/infiniband/core/opa_smi.h @@ -55,7 +55,7 @@ static inline enum smi_action opa_smi_check_local_smp(struct opa_smp *smp, { /* C14-9:3 -- We're at the end of the DR segment of path */ /* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */ - return (device->process_mad && + return (device->ops.process_mad && !opa_get_smp_direction(smp) && (smp->hop_ptr == smp->hop_cnt + 1)) ? IB_SMI_HANDLE : IB_SMI_DISCARD; @@ -70,7 +70,7 @@ static inline enum smi_action opa_smi_check_local_returning_smp(struct opa_smp * { /* C14-13:3 -- We're at the end of the DR segment of path */ /* C14-13:4 -- Hop Pointer == 0 -> give to SM */ - return (device->process_mad && + return (device->ops.process_mad && opa_get_smp_direction(smp) && !smp->hop_ptr) ? IB_SMI_HANDLE : IB_SMI_DISCARD; } diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 752a55c6bdce..6c4747e61d2b 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -224,12 +224,14 @@ out_unlock: * uverbs_put_destroy. */ struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj, - u32 id, struct ib_uverbs_file *ufile) + u32 id, + const struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj; int ret; - uobj = rdma_lookup_get_uobject(obj, ufile, id, UVERBS_LOOKUP_DESTROY); + uobj = rdma_lookup_get_uobject(obj, attrs->ufile, id, + UVERBS_LOOKUP_DESTROY); if (IS_ERR(uobj)) return uobj; @@ -243,21 +245,20 @@ struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj, } /* - * Does both uobj_get_destroy() and uobj_put_destroy(). Returns success_res - * on success (negative errno on failure). For use by callers that do not need - * the uobj. + * Does both uobj_get_destroy() and uobj_put_destroy(). Returns 0 on success + * (negative errno on failure). For use by callers that do not need the uobj. */ int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id, - struct ib_uverbs_file *ufile, int success_res) + const struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj; - uobj = __uobj_get_destroy(obj, id, ufile); + uobj = __uobj_get_destroy(obj, id, attrs); if (IS_ERR(uobj)) return PTR_ERR(uobj); rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); - return success_res; + return 0; } /* alloc_uobj must be undone by uverbs_destroy_uobject() */ @@ -267,7 +268,7 @@ static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile, struct ib_uobject *uobj; struct ib_ucontext *ucontext; - ucontext = ib_uverbs_get_ucontext(ufile); + ucontext = ib_uverbs_get_ucontext_file(ufile); if (IS_ERR(ucontext)) return ERR_CAST(ucontext); @@ -397,16 +398,23 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj, struct ib_uobject *uobj; int ret; - if (!obj) - return ERR_PTR(-EINVAL); + if (IS_ERR(obj) && PTR_ERR(obj) == -ENOMSG) { + /* must be UVERBS_IDR_ANY_OBJECT, see uapi_get_object() */ + uobj = lookup_get_idr_uobject(NULL, ufile, id, mode); + if (IS_ERR(uobj)) + return uobj; + } else { + if (IS_ERR(obj)) + return ERR_PTR(-EINVAL); - uobj = obj->type_class->lookup_get(obj, ufile, id, mode); - if (IS_ERR(uobj)) - return uobj; + uobj = obj->type_class->lookup_get(obj, ufile, id, mode); + if (IS_ERR(uobj)) + return uobj; - if (uobj->uapi_object != obj) { - ret = -EINVAL; - goto free; + if (uobj->uapi_object != obj) { + ret = -EINVAL; + goto free; + } } /* @@ -426,7 +434,7 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj, return uobj; free: - obj->type_class->lookup_put(uobj, mode); + uobj->uapi_object->type_class->lookup_put(uobj, mode); uverbs_uobject_put(uobj); return ERR_PTR(ret); } @@ -490,7 +498,7 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, { struct ib_uobject *ret; - if (!obj) + if (IS_ERR(obj)) return ERR_PTR(-EINVAL); /* @@ -812,18 +820,20 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile, */ if (reason == RDMA_REMOVE_DRIVER_REMOVE) { uverbs_user_mmap_disassociate(ufile); - if (ib_dev->disassociate_ucontext) - ib_dev->disassociate_ucontext(ucontext); + if (ib_dev->ops.disassociate_ucontext) + ib_dev->ops.disassociate_ucontext(ucontext); } ib_rdmacg_uncharge(&ucontext->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE); + rdma_restrack_del(&ucontext->res); + /* * FIXME: Drivers are not permitted to fail dealloc_ucontext, remove * the error return. */ - ret = ib_dev->dealloc_ucontext(ucontext); + ret = ib_dev->ops.dealloc_ucontext(ucontext); WARN_ON(ret); ufile->ucontext = NULL; diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index 4886d2bba7c7..be6b8e1257d0 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -118,43 +118,67 @@ void release_ufile_idr_uobject(struct ib_uverbs_file *ufile); * Depending on ID the slot pointer in the radix tree points at one of these * structs. */ -struct uverbs_api_object { - const struct uverbs_obj_type *type_attrs; - const struct uverbs_obj_type_class *type_class; -}; struct uverbs_api_ioctl_method { - int (__rcu *handler)(struct ib_uverbs_file *ufile, - struct uverbs_attr_bundle *ctx); + int(__rcu *handler)(struct uverbs_attr_bundle *attrs); DECLARE_BITMAP(attr_mandatory, UVERBS_API_ATTR_BKEY_LEN); u16 bundle_size; u8 use_stack:1; u8 driver_method:1; + u8 disabled:1; + u8 has_udata:1; u8 key_bitmap_len; u8 destroy_bkey; }; +struct uverbs_api_write_method { + int (*handler)(struct uverbs_attr_bundle *attrs); + u8 disabled:1; + u8 is_ex:1; + u8 has_udata:1; + u8 has_resp:1; + u8 req_size; + u8 resp_size; +}; + struct uverbs_api_attr { struct uverbs_attr_spec spec; }; -struct uverbs_api_object; struct uverbs_api { /* radix tree contains struct uverbs_api_* pointers */ struct radix_tree_root radix; enum rdma_driver_id driver_id; + + unsigned int num_write; + unsigned int num_write_ex; + struct uverbs_api_write_method notsupp_method; + const struct uverbs_api_write_method **write_methods; + const struct uverbs_api_write_method **write_ex_methods; }; +/* + * Get an uverbs_api_object that corresponds to the given object_id. + * Note: + * -ENOMSG means that any object is allowed to match during lookup. + */ static inline const struct uverbs_api_object * uapi_get_object(struct uverbs_api *uapi, u16 object_id) { - return radix_tree_lookup(&uapi->radix, uapi_key_obj(object_id)); + const struct uverbs_api_object *res; + + if (object_id == UVERBS_IDR_ANY_OBJECT) + return ERR_PTR(-ENOMSG); + + res = radix_tree_lookup(&uapi->radix, uapi_key_obj(object_id)); + if (!res) + return ERR_PTR(-ENOENT); + + return res; } char *uapi_key_format(char *S, unsigned int key); -struct uverbs_api *uverbs_alloc_api( - const struct uverbs_object_tree_def *const *driver_specs, - enum rdma_driver_id driver_id); +struct uverbs_api *uverbs_alloc_api(struct ib_device *ibdev); void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev); void uverbs_disassociate_api(struct uverbs_api *uapi); void uverbs_destroy_api(struct uverbs_api *uapi); @@ -162,4 +186,37 @@ void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm, unsigned int num_attrs); void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile); +extern const struct uapi_definition uverbs_def_obj_counters[]; +extern const struct uapi_definition uverbs_def_obj_cq[]; +extern const struct uapi_definition uverbs_def_obj_device[]; +extern const struct uapi_definition uverbs_def_obj_dm[]; +extern const struct uapi_definition uverbs_def_obj_flow_action[]; +extern const struct uapi_definition uverbs_def_obj_intf[]; +extern const struct uapi_definition uverbs_def_obj_mr[]; +extern const struct uapi_definition uverbs_def_write_intf[]; + +static inline const struct uverbs_api_write_method * +uapi_get_method(const struct uverbs_api *uapi, u32 command) +{ + u32 cmd_idx = command & IB_USER_VERBS_CMD_COMMAND_MASK; + + if (command & ~(u32)(IB_USER_VERBS_CMD_FLAG_EXTENDED | + IB_USER_VERBS_CMD_COMMAND_MASK)) + return ERR_PTR(-EINVAL); + + if (command & IB_USER_VERBS_CMD_FLAG_EXTENDED) { + if (cmd_idx >= uapi->num_write_ex) + return ERR_PTR(-EOPNOTSUPP); + return uapi->write_ex_methods[cmd_idx]; + } + + if (cmd_idx >= uapi->num_write) + return ERR_PTR(-EOPNOTSUPP); + return uapi->write_methods[cmd_idx]; +} + +void uverbs_fill_udata(struct uverbs_attr_bundle *bundle, + struct ib_udata *udata, unsigned int attr_in, + unsigned int attr_out); + #endif /* RDMA_CORE_H */ diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index 06d8657ce583..46a5c553c624 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -32,6 +32,7 @@ static const char *type2str(enum rdma_restrack_type type) [RDMA_RESTRACK_QP] = "QP", [RDMA_RESTRACK_CM_ID] = "CM_ID", [RDMA_RESTRACK_MR] = "MR", + [RDMA_RESTRACK_CTX] = "CTX", }; return names[type]; @@ -130,31 +131,14 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res) res)->id.device; case RDMA_RESTRACK_MR: return container_of(res, struct ib_mr, res)->device; + case RDMA_RESTRACK_CTX: + return container_of(res, struct ib_ucontext, res)->device; default: WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type); return NULL; } } -static bool res_is_user(struct rdma_restrack_entry *res) -{ - switch (res->type) { - case RDMA_RESTRACK_PD: - return container_of(res, struct ib_pd, res)->uobject; - case RDMA_RESTRACK_CQ: - return container_of(res, struct ib_cq, res)->uobject; - case RDMA_RESTRACK_QP: - return container_of(res, struct ib_qp, res)->uobject; - case RDMA_RESTRACK_CM_ID: - return !res->kern_name; - case RDMA_RESTRACK_MR: - return container_of(res, struct ib_mr, res)->pd->uobject; - default: - WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type); - return false; - } -} - void rdma_restrack_set_task(struct rdma_restrack_entry *res, const char *caller) { @@ -170,17 +154,17 @@ void rdma_restrack_set_task(struct rdma_restrack_entry *res, } EXPORT_SYMBOL(rdma_restrack_set_task); -void rdma_restrack_add(struct rdma_restrack_entry *res) +static void rdma_restrack_add(struct rdma_restrack_entry *res) { struct ib_device *dev = res_to_dev(res); if (!dev) return; - if (res->type != RDMA_RESTRACK_CM_ID || !res_is_user(res)) + if (res->type != RDMA_RESTRACK_CM_ID || rdma_is_kernel_res(res)) res->task = NULL; - if (res_is_user(res)) { + if (!rdma_is_kernel_res(res)) { if (!res->task) rdma_restrack_set_task(res, NULL); res->kern_name = NULL; @@ -196,7 +180,28 @@ void rdma_restrack_add(struct rdma_restrack_entry *res) hash_add(dev->res.hash, &res->node, res->type); up_write(&dev->res.rwsem); } -EXPORT_SYMBOL(rdma_restrack_add); + +/** + * rdma_restrack_kadd() - add kernel object to the reource tracking database + * @res: resource entry + */ +void rdma_restrack_kadd(struct rdma_restrack_entry *res) +{ + res->user = false; + rdma_restrack_add(res); +} +EXPORT_SYMBOL(rdma_restrack_kadd); + +/** + * rdma_restrack_uadd() - add user object to the reource tracking database + * @res: resource entry + */ +void rdma_restrack_uadd(struct rdma_restrack_entry *res) +{ + res->user = true; + rdma_restrack_add(res); +} +EXPORT_SYMBOL(rdma_restrack_uadd); int __must_check rdma_restrack_get(struct rdma_restrack_entry *res) { diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index be5ba5e15496..97e6d7b69abf 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1147,7 +1147,7 @@ static void free_sm_ah(struct kref *kref) { struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); - rdma_destroy_ah(sm_ah->ah); + rdma_destroy_ah(sm_ah->ah, 0); kfree(sm_ah); } @@ -2276,7 +2276,8 @@ static void update_sm_ah(struct work_struct *work) cpu_to_be64(IB_SA_WELL_KNOWN_GUID)); } - new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr); + new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr, + RDMA_CREATE_AH_SLEEPABLE); if (IS_ERR(new_ah->ah)) { pr_warn("Couldn't create new SM AH\n"); kfree(new_ah); diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index 1143c0448666..1efadbccf394 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -626,10 +626,10 @@ int ib_security_modify_qp(struct ib_qp *qp, } if (!ret) - ret = real_qp->device->modify_qp(real_qp, - qp_attr, - qp_attr_mask, - udata); + ret = real_qp->device->ops.modify_qp(real_qp, + qp_attr, + qp_attr_mask, + udata); if (new_pps) { /* Clean up the lists and free the appropriate diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h index 33c91c8a16e9..91d9b353ab85 100644 --- a/drivers/infiniband/core/smi.h +++ b/drivers/infiniband/core/smi.h @@ -67,7 +67,7 @@ static inline enum smi_action smi_check_local_smp(struct ib_smp *smp, { /* C14-9:3 -- We're at the end of the DR segment of path */ /* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */ - return ((device->process_mad && + return ((device->ops.process_mad && !ib_get_smp_direction(smp) && (smp->hop_ptr == smp->hop_cnt + 1)) ? IB_SMI_HANDLE : IB_SMI_DISCARD); @@ -82,7 +82,7 @@ static inline enum smi_action smi_check_local_returning_smp(struct ib_smp *smp, { /* C14-13:3 -- We're at the end of the DR segment of path */ /* C14-13:4 -- Hop Pointer == 0 -> give to SM */ - return ((device->process_mad && + return ((device->ops.process_mad && ib_get_smp_direction(smp) && !smp->hop_ptr) ? IB_SMI_HANDLE : IB_SMI_DISCARD); } diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 6fcce2c206c6..80f68eb0ba5c 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -462,7 +462,7 @@ static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr, u16 out_mad_pkey_index = 0; ssize_t ret; - if (!dev->process_mad) + if (!dev->ops.process_mad) return -ENOSYS; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); @@ -481,11 +481,11 @@ static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr, if (attr != IB_PMA_CLASS_PORT_INFO) in_mad->data[41] = port_num; /* PortSelect field */ - if ((dev->process_mad(dev, IB_MAD_IGNORE_MKEY, - port_num, NULL, NULL, - (const struct ib_mad_hdr *)in_mad, mad_size, - (struct ib_mad_hdr *)out_mad, &mad_size, - &out_mad_pkey_index) & + if ((dev->ops.process_mad(dev, IB_MAD_IGNORE_MKEY, + port_num, NULL, NULL, + (const struct ib_mad_hdr *)in_mad, mad_size, + (struct ib_mad_hdr *)out_mad, &mad_size, + &out_mad_pkey_index) & (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) != (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) { ret = -EINVAL; @@ -786,7 +786,7 @@ static int update_hw_stats(struct ib_device *dev, struct rdma_hw_stats *stats, if (time_is_after_eq_jiffies(stats->timestamp + stats->lifespan)) return 0; - ret = dev->get_hw_stats(dev, stats, port_num, index); + ret = dev->ops.get_hw_stats(dev, stats, port_num, index); if (ret < 0) return ret; if (ret == stats->num_counters) @@ -946,7 +946,7 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port, struct rdma_hw_stats *stats; int i, ret; - stats = device->alloc_hw_stats(device, port_num); + stats = device->ops.alloc_hw_stats(device, port_num); if (!stats) return; @@ -964,8 +964,8 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port, if (!hsag) goto err_free_stats; - ret = device->get_hw_stats(device, stats, port_num, - stats->num_counters); + ret = device->ops.get_hw_stats(device, stats, port_num, + stats->num_counters); if (ret != stats->num_counters) goto err_free_hsag; @@ -1057,7 +1057,7 @@ static int add_port(struct ib_device *device, int port_num, goto err_put; } - if (device->process_mad) { + if (device->ops.process_mad) { p->pma_table = get_counter_table(device, port_num); ret = sysfs_create_group(&p->kobj, p->pma_table); if (ret) @@ -1124,7 +1124,7 @@ static int add_port(struct ib_device *device, int port_num, * port, so holder should be device. Therefore skip per port conunter * initialization. */ - if (device->alloc_hw_stats && port_num) + if (device->ops.alloc_hw_stats && port_num) setup_hw_stats(device, p, port_num); list_add_tail(&p->kobj.entry, &device->port_list); @@ -1245,7 +1245,7 @@ static ssize_t node_desc_store(struct device *device, struct ib_device_modify desc = {}; int ret; - if (!dev->modify_device) + if (!dev->ops.modify_device) return -EIO; memcpy(desc.node_desc, buf, min_t(int, count, IB_DEVICE_NODE_DESC_MAX)); @@ -1341,7 +1341,7 @@ int ib_device_register_sysfs(struct ib_device *device, } } - if (device->alloc_hw_stats) + if (device->ops.alloc_hw_stats) setup_hw_stats(device, NULL, 0); return 0; diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 73332b9a25b5..7541fbaf58a3 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -1242,7 +1242,7 @@ static void ib_ucm_add_one(struct ib_device *device) dev_t base; struct ib_ucm_device *ucm_dev; - if (!device->alloc_ucontext || !rdma_cap_ib_cm(device, 1)) + if (!device->ops.alloc_ucontext || !rdma_cap_ib_cm(device, 1)) return; ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index f55f48f6b272..de8d31ab8945 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -88,10 +88,9 @@ enum { struct ib_umad_port { struct cdev cdev; - struct device *dev; - + struct device dev; struct cdev sm_cdev; - struct device *sm_dev; + struct device sm_dev; struct semaphore sm_sem; struct mutex file_mutex; @@ -104,8 +103,8 @@ struct ib_umad_port { }; struct ib_umad_device { - struct kobject kobj; - struct ib_umad_port port[0]; + struct kref kref; + struct ib_umad_port ports[]; }; struct ib_umad_file { @@ -130,8 +129,6 @@ struct ib_umad_packet { struct ib_user_mad mad; }; -static struct class *umad_class; - static const dev_t base_umad_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE); static const dev_t base_issm_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE) + IB_UMAD_NUM_FIXED_MINOR; @@ -143,17 +140,23 @@ static DEFINE_IDA(umad_ida); static void ib_umad_add_one(struct ib_device *device); static void ib_umad_remove_one(struct ib_device *device, void *client_data); -static void ib_umad_release_dev(struct kobject *kobj) +static void ib_umad_dev_free(struct kref *kref) { struct ib_umad_device *dev = - container_of(kobj, struct ib_umad_device, kobj); + container_of(kref, struct ib_umad_device, kref); kfree(dev); } -static struct kobj_type ib_umad_dev_ktype = { - .release = ib_umad_release_dev, -}; +static void ib_umad_dev_get(struct ib_umad_device *dev) +{ + kref_get(&dev->kref); +} + +static void ib_umad_dev_put(struct ib_umad_device *dev) +{ + kref_put(&dev->kref, ib_umad_dev_free); +} static int hdr_size(struct ib_umad_file *file) { @@ -205,7 +208,7 @@ static void send_handler(struct ib_mad_agent *agent, struct ib_umad_packet *packet = send_wc->send_buf->context[0]; dequeue_send(file, packet); - rdma_destroy_ah(packet->msg->ah); + rdma_destroy_ah(packet->msg->ah, RDMA_DESTROY_AH_SLEEPABLE); ib_free_send_mad(packet->msg); if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { @@ -621,7 +624,7 @@ err_send: err_msg: ib_free_send_mad(packet->msg); err_ah: - rdma_destroy_ah(ah); + rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE); err_up: mutex_unlock(&file->mutex); err: @@ -657,7 +660,7 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg, mutex_lock(&file->mutex); if (!file->port->ib_dev) { - dev_notice(file->port->dev, + dev_notice(&file->port->dev, "ib_umad_reg_agent: invalid device\n"); ret = -EPIPE; goto out; @@ -669,7 +672,7 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg, } if (ureq.qpn != 0 && ureq.qpn != 1) { - dev_notice(file->port->dev, + dev_notice(&file->port->dev, "ib_umad_reg_agent: invalid QPN %d specified\n", ureq.qpn); ret = -EINVAL; @@ -680,7 +683,7 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg, if (!__get_agent(file, agent_id)) goto found; - dev_notice(file->port->dev, + dev_notice(&file->port->dev, "ib_umad_reg_agent: Max Agents (%u) reached\n", IB_UMAD_MAX_AGENTS); ret = -ENOMEM; @@ -725,10 +728,10 @@ found: if (!file->already_used) { file->already_used = 1; if (!file->use_pkey_index) { - dev_warn(file->port->dev, + dev_warn(&file->port->dev, "process %s did not enable P_Key index support.\n", current->comm); - dev_warn(file->port->dev, + dev_warn(&file->port->dev, " Documentation/infiniband/user_mad.txt has info on the new ABI.\n"); } } @@ -759,7 +762,7 @@ static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg) mutex_lock(&file->mutex); if (!file->port->ib_dev) { - dev_notice(file->port->dev, + dev_notice(&file->port->dev, "ib_umad_reg_agent2: invalid device\n"); ret = -EPIPE; goto out; @@ -771,7 +774,7 @@ static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg) } if (ureq.qpn != 0 && ureq.qpn != 1) { - dev_notice(file->port->dev, + dev_notice(&file->port->dev, "ib_umad_reg_agent2: invalid QPN %d specified\n", ureq.qpn); ret = -EINVAL; @@ -779,7 +782,7 @@ static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg) } if (ureq.flags & ~IB_USER_MAD_REG_FLAGS_CAP) { - dev_notice(file->port->dev, + dev_notice(&file->port->dev, "ib_umad_reg_agent2 failed: invalid registration flags specified 0x%x; supported 0x%x\n", ureq.flags, IB_USER_MAD_REG_FLAGS_CAP); ret = -EINVAL; @@ -796,7 +799,7 @@ static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg) if (!__get_agent(file, agent_id)) goto found; - dev_notice(file->port->dev, + dev_notice(&file->port->dev, "ib_umad_reg_agent2: Max Agents (%u) reached\n", IB_UMAD_MAX_AGENTS); ret = -ENOMEM; @@ -808,7 +811,7 @@ found: req.mgmt_class = ureq.mgmt_class; req.mgmt_class_version = ureq.mgmt_class_version; if (ureq.oui & 0xff000000) { - dev_notice(file->port->dev, + dev_notice(&file->port->dev, "ib_umad_reg_agent2 failed: oui invalid 0x%08x\n", ureq.oui); ret = -EINVAL; @@ -986,8 +989,7 @@ static int ib_umad_open(struct inode *inode, struct file *filp) goto out; } - kobject_get(&port->umad_dev->kobj); - + ib_umad_dev_get(port->umad_dev); out: mutex_unlock(&port->file_mutex); return ret; @@ -1025,8 +1027,7 @@ static int ib_umad_close(struct inode *inode, struct file *filp) mutex_unlock(&file->port->file_mutex); kfree(file); - kobject_put(&dev->kobj); - + ib_umad_dev_put(dev); return 0; } @@ -1076,8 +1077,7 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp) if (ret) goto err_clr_sm_cap; - kobject_get(&port->umad_dev->kobj); - + ib_umad_dev_get(port->umad_dev); return 0; err_clr_sm_cap: @@ -1106,8 +1106,7 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp) up(&port->sm_sem); - kobject_put(&port->umad_dev->kobj); - + ib_umad_dev_put(port->umad_dev); return ret; } @@ -1124,7 +1123,7 @@ static struct ib_client umad_client = { .remove = ib_umad_remove_one }; -static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, +static ssize_t ibdev_show(struct device *dev, struct device_attribute *attr, char *buf) { struct ib_umad_port *port = dev_get_drvdata(dev); @@ -1134,9 +1133,9 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, return sprintf(buf, "%s\n", dev_name(&port->ib_dev->dev)); } -static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); +static DEVICE_ATTR_RO(ibdev); -static ssize_t show_port(struct device *dev, struct device_attribute *attr, +static ssize_t port_show(struct device *dev, struct device_attribute *attr, char *buf) { struct ib_umad_port *port = dev_get_drvdata(dev); @@ -1146,10 +1145,59 @@ static ssize_t show_port(struct device *dev, struct device_attribute *attr, return sprintf(buf, "%d\n", port->port_num); } -static DEVICE_ATTR(port, S_IRUGO, show_port, NULL); +static DEVICE_ATTR_RO(port); -static CLASS_ATTR_STRING(abi_version, S_IRUGO, - __stringify(IB_USER_MAD_ABI_VERSION)); +static struct attribute *umad_class_dev_attrs[] = { + &dev_attr_ibdev.attr, + &dev_attr_port.attr, + NULL, +}; +ATTRIBUTE_GROUPS(umad_class_dev); + +static char *umad_devnode(struct device *dev, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); +} + +static ssize_t abi_version_show(struct class *class, + struct class_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION); +} +static CLASS_ATTR_RO(abi_version); + +static struct attribute *umad_class_attrs[] = { + &class_attr_abi_version.attr, + NULL, +}; +ATTRIBUTE_GROUPS(umad_class); + +static struct class umad_class = { + .name = "infiniband_mad", + .devnode = umad_devnode, + .class_groups = umad_class_groups, + .dev_groups = umad_class_dev_groups, +}; + +static void ib_umad_release_port(struct device *device) +{ + struct ib_umad_port *port = dev_get_drvdata(device); + struct ib_umad_device *umad_dev = port->umad_dev; + + ib_umad_dev_put(umad_dev); +} + +static void ib_umad_init_port_dev(struct device *dev, + struct ib_umad_port *port, + const struct ib_device *device) +{ + device_initialize(dev); + ib_umad_dev_get(port->umad_dev); + dev->class = &umad_class; + dev->parent = device->dev.parent; + dev_set_drvdata(dev, port); + dev->release = ib_umad_release_port; +} static int ib_umad_init_port(struct ib_device *device, int port_num, struct ib_umad_device *umad_dev, @@ -1158,6 +1206,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, int devnum; dev_t base_umad; dev_t base_issm; + int ret; devnum = ida_alloc_max(&umad_ida, IB_UMAD_MAX_PORTS - 1, GFP_KERNEL); if (devnum < 0) @@ -1172,63 +1221,41 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, } port->ib_dev = device; + port->umad_dev = umad_dev; port->port_num = port_num; sema_init(&port->sm_sem, 1); mutex_init(&port->file_mutex); INIT_LIST_HEAD(&port->file_list); + ib_umad_init_port_dev(&port->dev, port, device); + port->dev.devt = base_umad; + dev_set_name(&port->dev, "umad%d", port->dev_num); cdev_init(&port->cdev, &umad_fops); port->cdev.owner = THIS_MODULE; - cdev_set_parent(&port->cdev, &umad_dev->kobj); - kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num); - if (cdev_add(&port->cdev, base_umad, 1)) - goto err_cdev; - port->dev = device_create(umad_class, device->dev.parent, - port->cdev.dev, port, - "umad%d", port->dev_num); - if (IS_ERR(port->dev)) + ret = cdev_device_add(&port->cdev, &port->dev); + if (ret) goto err_cdev; - if (device_create_file(port->dev, &dev_attr_ibdev)) - goto err_dev; - if (device_create_file(port->dev, &dev_attr_port)) - goto err_dev; - + ib_umad_init_port_dev(&port->sm_dev, port, device); + port->sm_dev.devt = base_issm; + dev_set_name(&port->sm_dev, "issm%d", port->dev_num); cdev_init(&port->sm_cdev, &umad_sm_fops); port->sm_cdev.owner = THIS_MODULE; - cdev_set_parent(&port->sm_cdev, &umad_dev->kobj); - kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num); - if (cdev_add(&port->sm_cdev, base_issm, 1)) - goto err_sm_cdev; - - port->sm_dev = device_create(umad_class, device->dev.parent, - port->sm_cdev.dev, port, - "issm%d", port->dev_num); - if (IS_ERR(port->sm_dev)) - goto err_sm_cdev; - - if (device_create_file(port->sm_dev, &dev_attr_ibdev)) - goto err_sm_dev; - if (device_create_file(port->sm_dev, &dev_attr_port)) - goto err_sm_dev; - - return 0; -err_sm_dev: - device_destroy(umad_class, port->sm_cdev.dev); + ret = cdev_device_add(&port->sm_cdev, &port->sm_dev); + if (ret) + goto err_dev; -err_sm_cdev: - cdev_del(&port->sm_cdev); + return 0; err_dev: - device_destroy(umad_class, port->cdev.dev); - + put_device(&port->sm_dev); + cdev_device_del(&port->cdev, &port->dev); err_cdev: - cdev_del(&port->cdev); + put_device(&port->dev); ida_free(&umad_ida, devnum); - - return -1; + return ret; } static void ib_umad_kill_port(struct ib_umad_port *port) @@ -1236,17 +1263,11 @@ static void ib_umad_kill_port(struct ib_umad_port *port) struct ib_umad_file *file; int id; - dev_set_drvdata(port->dev, NULL); - dev_set_drvdata(port->sm_dev, NULL); - - device_destroy(umad_class, port->cdev.dev); - device_destroy(umad_class, port->sm_cdev.dev); - - cdev_del(&port->cdev); - cdev_del(&port->sm_cdev); - mutex_lock(&port->file_mutex); + /* Mark ib_dev NULL and block ioctl or other file ops to progress + * further. + */ port->ib_dev = NULL; list_for_each_entry(file, &port->file_list, port_list) { @@ -1260,6 +1281,11 @@ static void ib_umad_kill_port(struct ib_umad_port *port) } mutex_unlock(&port->file_mutex); + + cdev_device_del(&port->sm_cdev, &port->sm_dev); + put_device(&port->sm_dev); + cdev_device_del(&port->cdev, &port->dev); + put_device(&port->dev); ida_free(&umad_ida, port->dev_num); } @@ -1272,22 +1298,17 @@ static void ib_umad_add_one(struct ib_device *device) s = rdma_start_port(device); e = rdma_end_port(device); - umad_dev = kzalloc(sizeof *umad_dev + - (e - s + 1) * sizeof (struct ib_umad_port), - GFP_KERNEL); + umad_dev = kzalloc(struct_size(umad_dev, ports, e - s + 1), GFP_KERNEL); if (!umad_dev) return; - kobject_init(&umad_dev->kobj, &ib_umad_dev_ktype); - + kref_init(&umad_dev->kref); for (i = s; i <= e; ++i) { if (!rdma_cap_ib_mad(device, i)) continue; - umad_dev->port[i - s].umad_dev = umad_dev; - if (ib_umad_init_port(device, i, umad_dev, - &umad_dev->port[i - s])) + &umad_dev->ports[i - s])) goto err; count++; @@ -1305,10 +1326,10 @@ err: if (!rdma_cap_ib_mad(device, i)) continue; - ib_umad_kill_port(&umad_dev->port[i - s]); + ib_umad_kill_port(&umad_dev->ports[i - s]); } free: - kobject_put(&umad_dev->kobj); + ib_umad_dev_put(umad_dev); } static void ib_umad_remove_one(struct ib_device *device, void *client_data) @@ -1321,15 +1342,9 @@ static void ib_umad_remove_one(struct ib_device *device, void *client_data) for (i = 0; i <= rdma_end_port(device) - rdma_start_port(device); ++i) { if (rdma_cap_ib_mad(device, i + rdma_start_port(device))) - ib_umad_kill_port(&umad_dev->port[i]); + ib_umad_kill_port(&umad_dev->ports[i]); } - - kobject_put(&umad_dev->kobj); -} - -static char *umad_devnode(struct device *dev, umode_t *mode) -{ - return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); + ib_umad_dev_put(umad_dev); } static int __init ib_umad_init(void) @@ -1338,7 +1353,7 @@ static int __init ib_umad_init(void) ret = register_chrdev_region(base_umad_dev, IB_UMAD_NUM_FIXED_MINOR * 2, - "infiniband_mad"); + umad_class.name); if (ret) { pr_err("couldn't register device number\n"); goto out; @@ -1346,28 +1361,19 @@ static int __init ib_umad_init(void) ret = alloc_chrdev_region(&dynamic_umad_dev, 0, IB_UMAD_NUM_DYNAMIC_MINOR * 2, - "infiniband_mad"); + umad_class.name); if (ret) { pr_err("couldn't register dynamic device number\n"); goto out_alloc; } dynamic_issm_dev = dynamic_umad_dev + IB_UMAD_NUM_DYNAMIC_MINOR; - umad_class = class_create(THIS_MODULE, "infiniband_mad"); - if (IS_ERR(umad_class)) { - ret = PTR_ERR(umad_class); + ret = class_register(&umad_class); + if (ret) { pr_err("couldn't create class infiniband_mad\n"); goto out_chrdev; } - umad_class->devnode = umad_devnode; - - ret = class_create_file(umad_class, &class_attr_abi_version.attr); - if (ret) { - pr_err("couldn't create abi_version attribute\n"); - goto out_class; - } - ret = ib_register_client(&umad_client); if (ret) { pr_err("couldn't register ib_umad client\n"); @@ -1377,7 +1383,7 @@ static int __init ib_umad_init(void) return 0; out_class: - class_destroy(umad_class); + class_unregister(&umad_class); out_chrdev: unregister_chrdev_region(dynamic_umad_dev, @@ -1394,7 +1400,7 @@ out: static void __exit ib_umad_cleanup(void) { ib_unregister_client(&umad_client); - class_destroy(umad_class); + class_unregister(&umad_class); unregister_chrdev_region(base_umad_dev, IB_UMAD_NUM_FIXED_MINOR * 2); unregister_chrdev_region(dynamic_umad_dev, diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index c97935a0c7c6..ea0bc6885517 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -161,9 +161,6 @@ struct ib_uverbs_file { struct mutex umap_lock; struct list_head umaps; - u64 uverbs_cmd_mask; - u64 uverbs_ex_cmd_mask; - struct idr idr; /* spinlock protects write access to idr */ spinlock_t idr_lock; @@ -249,7 +246,6 @@ int uverbs_dealloc_mw(struct ib_mw *mw); void ib_uverbs_detach_umcast(struct ib_qp *qp, struct ib_uqp_object *uobj); -void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata); long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); struct ib_uverbs_flow_spec { @@ -297,63 +293,29 @@ extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION); extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DM); extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS); -#define IB_UVERBS_DECLARE_CMD(name) \ - ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ - const char __user *buf, int in_len, \ - int out_len) - -IB_UVERBS_DECLARE_CMD(get_context); -IB_UVERBS_DECLARE_CMD(query_device); -IB_UVERBS_DECLARE_CMD(query_port); -IB_UVERBS_DECLARE_CMD(alloc_pd); -IB_UVERBS_DECLARE_CMD(dealloc_pd); -IB_UVERBS_DECLARE_CMD(reg_mr); -IB_UVERBS_DECLARE_CMD(rereg_mr); -IB_UVERBS_DECLARE_CMD(dereg_mr); -IB_UVERBS_DECLARE_CMD(alloc_mw); -IB_UVERBS_DECLARE_CMD(dealloc_mw); -IB_UVERBS_DECLARE_CMD(create_comp_channel); -IB_UVERBS_DECLARE_CMD(create_cq); -IB_UVERBS_DECLARE_CMD(resize_cq); -IB_UVERBS_DECLARE_CMD(poll_cq); -IB_UVERBS_DECLARE_CMD(req_notify_cq); -IB_UVERBS_DECLARE_CMD(destroy_cq); -IB_UVERBS_DECLARE_CMD(create_qp); -IB_UVERBS_DECLARE_CMD(open_qp); -IB_UVERBS_DECLARE_CMD(query_qp); -IB_UVERBS_DECLARE_CMD(modify_qp); -IB_UVERBS_DECLARE_CMD(destroy_qp); -IB_UVERBS_DECLARE_CMD(post_send); -IB_UVERBS_DECLARE_CMD(post_recv); -IB_UVERBS_DECLARE_CMD(post_srq_recv); -IB_UVERBS_DECLARE_CMD(create_ah); -IB_UVERBS_DECLARE_CMD(destroy_ah); -IB_UVERBS_DECLARE_CMD(attach_mcast); -IB_UVERBS_DECLARE_CMD(detach_mcast); -IB_UVERBS_DECLARE_CMD(create_srq); -IB_UVERBS_DECLARE_CMD(modify_srq); -IB_UVERBS_DECLARE_CMD(query_srq); -IB_UVERBS_DECLARE_CMD(destroy_srq); -IB_UVERBS_DECLARE_CMD(create_xsrq); -IB_UVERBS_DECLARE_CMD(open_xrcd); -IB_UVERBS_DECLARE_CMD(close_xrcd); - -#define IB_UVERBS_DECLARE_EX_CMD(name) \ - int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \ - struct ib_udata *ucore, \ - struct ib_udata *uhw) - -IB_UVERBS_DECLARE_EX_CMD(create_flow); -IB_UVERBS_DECLARE_EX_CMD(destroy_flow); -IB_UVERBS_DECLARE_EX_CMD(query_device); -IB_UVERBS_DECLARE_EX_CMD(create_cq); -IB_UVERBS_DECLARE_EX_CMD(create_qp); -IB_UVERBS_DECLARE_EX_CMD(create_wq); -IB_UVERBS_DECLARE_EX_CMD(modify_wq); -IB_UVERBS_DECLARE_EX_CMD(destroy_wq); -IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table); -IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table); -IB_UVERBS_DECLARE_EX_CMD(modify_qp); -IB_UVERBS_DECLARE_EX_CMD(modify_cq); +/* + * ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the + * PortInfo CapabilityMask, but was extended with unique bits. + */ +static inline u32 make_port_cap_flags(const struct ib_port_attr *attr) +{ + u32 res; + + /* All IBA CapabilityMask bits are passed through here, except bit 26, + * which is overridden with IP_BASED_GIDS. This is due to a historical + * mistake in the implementation of IP_BASED_GIDS. Otherwise all other + * bits match the IBA definition across all kernel versions. + */ + res = attr->port_cap_flags & ~(u32)IB_UVERBS_PCF_IP_BASED_GIDS; + + if (attr->ip_gids) + res |= IB_UVERBS_PCF_IP_BASED_GIDS; + + return res; +} + +void copy_port_attr_to_resp(struct ib_port_attr *attr, + struct ib_uverbs_query_port_resp *resp, + struct ib_device *ib_dev, u8 port_num); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index a93853770e3c..6b12cc5f97b2 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -47,11 +47,134 @@ #include "uverbs.h" #include "core_priv.h" +/* + * Copy a response to userspace. If the provided 'resp' is larger than the + * user buffer it is silently truncated. If the user provided a larger buffer + * then the trailing portion is zero filled. + * + * These semantics are intended to support future extension of the output + * structures. + */ +static int uverbs_response(struct uverbs_attr_bundle *attrs, const void *resp, + size_t resp_len) +{ + int ret; + + if (copy_to_user(attrs->ucore.outbuf, resp, + min(attrs->ucore.outlen, resp_len))) + return -EFAULT; + + if (resp_len < attrs->ucore.outlen) { + /* + * Zero fill any extra memory that user + * space might have provided. + */ + ret = clear_user(attrs->ucore.outbuf + resp_len, + attrs->ucore.outlen - resp_len); + if (ret) + return -EFAULT; + } + + return 0; +} + +/* + * Copy a request from userspace. If the provided 'req' is larger than the + * user buffer then the user buffer is zero extended into the 'req'. If 'req' + * is smaller than the user buffer then the uncopied bytes in the user buffer + * must be zero. + */ +static int uverbs_request(struct uverbs_attr_bundle *attrs, void *req, + size_t req_len) +{ + if (copy_from_user(req, attrs->ucore.inbuf, + min(attrs->ucore.inlen, req_len))) + return -EFAULT; + + if (attrs->ucore.inlen < req_len) { + memset(req + attrs->ucore.inlen, 0, + req_len - attrs->ucore.inlen); + } else if (attrs->ucore.inlen > req_len) { + if (!ib_is_buffer_cleared(attrs->ucore.inbuf + req_len, + attrs->ucore.inlen - req_len)) + return -EOPNOTSUPP; + } + return 0; +} + +/* + * Generate the value for the 'response_length' protocol used by write_ex. + * This is the number of bytes the kernel actually wrote. Userspace can use + * this to detect what structure members in the response the kernel + * understood. + */ +static u32 uverbs_response_length(struct uverbs_attr_bundle *attrs, + size_t resp_len) +{ + return min_t(size_t, attrs->ucore.outlen, resp_len); +} + +/* + * The iterator version of the request interface is for handlers that need to + * step over a flex array at the end of a command header. + */ +struct uverbs_req_iter { + const void __user *cur; + const void __user *end; +}; + +static int uverbs_request_start(struct uverbs_attr_bundle *attrs, + struct uverbs_req_iter *iter, + void *req, + size_t req_len) +{ + if (attrs->ucore.inlen < req_len) + return -ENOSPC; + + if (copy_from_user(req, attrs->ucore.inbuf, req_len)) + return -EFAULT; + + iter->cur = attrs->ucore.inbuf + req_len; + iter->end = attrs->ucore.inbuf + attrs->ucore.inlen; + return 0; +} + +static int uverbs_request_next(struct uverbs_req_iter *iter, void *val, + size_t len) +{ + if (iter->cur + len > iter->end) + return -ENOSPC; + + if (copy_from_user(val, iter->cur, len)) + return -EFAULT; + + iter->cur += len; + return 0; +} + +static const void __user *uverbs_request_next_ptr(struct uverbs_req_iter *iter, + size_t len) +{ + const void __user *res = iter->cur; + + if (iter->cur + len > iter->end) + return ERR_PTR(-ENOSPC); + iter->cur += len; + return res; +} + +static int uverbs_request_finish(struct uverbs_req_iter *iter) +{ + if (!ib_is_buffer_cleared(iter->cur, iter->end - iter->cur)) + return -EOPNOTSUPP; + return 0; +} + static struct ib_uverbs_completion_event_file * -_ib_uverbs_lookup_comp_file(s32 fd, struct ib_uverbs_file *ufile) +_ib_uverbs_lookup_comp_file(s32 fd, const struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = ufd_get_read(UVERBS_OBJECT_COMP_CHANNEL, - fd, ufile); + fd, attrs); if (IS_ERR(uobj)) return (void *)uobj; @@ -65,24 +188,20 @@ _ib_uverbs_lookup_comp_file(s32 fd, struct ib_uverbs_file *ufile) #define ib_uverbs_lookup_comp_file(_fd, _ufile) \ _ib_uverbs_lookup_comp_file((_fd)*typecheck(s32, _fd), _ufile) -ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, - const char __user *buf, - int in_len, int out_len) +static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs) { + struct ib_uverbs_file *file = attrs->ufile; struct ib_uverbs_get_context cmd; struct ib_uverbs_get_context_resp resp; - struct ib_udata udata; struct ib_ucontext *ucontext; struct file *filp; struct ib_rdmacg_object cg_obj; struct ib_device *ib_dev; int ret; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; mutex_lock(&file->ucontext_lock); ib_dev = srcu_dereference(file->device->ib_dev, @@ -97,16 +216,11 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, goto err; } - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response) + sizeof(resp), - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof(resp)); - ret = ib_rdmacg_try_charge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE); if (ret) goto err; - ucontext = ib_dev->alloc_ucontext(ib_dev, &udata); + ucontext = ib_dev->ops.alloc_ucontext(ib_dev, &attrs->driver_udata); if (IS_ERR(ucontext)) { ret = PTR_ERR(ucontext); goto err_alloc; @@ -141,13 +255,15 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, goto err_fd; } - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) { - ret = -EFAULT; + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) goto err_file; - } fd_install(resp.async_fd, filp); + ucontext->res.type = RDMA_RESTRACK_CTX; + rdma_restrack_uadd(&ucontext->res); + /* * Make sure that ib_uverbs_get_ucontext() sees the pointer update * only after all writes to setup the ucontext have completed @@ -156,7 +272,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, mutex_unlock(&file->ucontext_lock); - return in_len; + return 0; err_file: ib_uverbs_free_async_event_file(file); @@ -166,7 +282,7 @@ err_fd: put_unused_fd(resp.async_fd); err_free: - ib_dev->dealloc_ucontext(ucontext); + ib_dev->ops.dealloc_ucontext(ucontext); err_alloc: ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE); @@ -224,57 +340,28 @@ static void copy_query_dev_fields(struct ib_ucontext *ucontext, resp->phys_port_cnt = ib_dev->phys_port_cnt; } -ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, - const char __user *buf, - int in_len, int out_len) +static int ib_uverbs_query_device(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_query_device cmd; struct ib_uverbs_query_device_resp resp; struct ib_ucontext *ucontext; + int ret; - ucontext = ib_uverbs_get_ucontext(file); + ucontext = ib_uverbs_get_ucontext(attrs); if (IS_ERR(ucontext)) return PTR_ERR(ucontext); - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; memset(&resp, 0, sizeof resp); copy_query_dev_fields(ucontext, &resp, &ucontext->device->attrs); - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) - return -EFAULT; - - return in_len; -} - -/* - * ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the - * PortInfo CapabilityMask, but was extended with unique bits. - */ -static u32 make_port_cap_flags(const struct ib_port_attr *attr) -{ - u32 res; - - /* All IBA CapabilityMask bits are passed through here, except bit 26, - * which is overridden with IP_BASED_GIDS. This is due to a historical - * mistake in the implementation of IP_BASED_GIDS. Otherwise all other - * bits match the IBA definition across all kernel versions. - */ - res = attr->port_cap_flags & ~(u32)IB_UVERBS_PCF_IP_BASED_GIDS; - - if (attr->ip_gids) - res |= IB_UVERBS_PCF_IP_BASED_GIDS; - - return res; + return uverbs_response(attrs, &resp, sizeof(resp)); } -ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, - const char __user *buf, - int in_len, int out_len) +static int ib_uverbs_query_port(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_query_port cmd; struct ib_uverbs_query_port_resp resp; @@ -283,88 +370,43 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, struct ib_ucontext *ucontext; struct ib_device *ib_dev; - ucontext = ib_uverbs_get_ucontext(file); + ucontext = ib_uverbs_get_ucontext(attrs); if (IS_ERR(ucontext)) return PTR_ERR(ucontext); ib_dev = ucontext->device; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; ret = ib_query_port(ib_dev, cmd.port_num, &attr); if (ret) return ret; memset(&resp, 0, sizeof resp); + copy_port_attr_to_resp(&attr, &resp, ib_dev, cmd.port_num); - resp.state = attr.state; - resp.max_mtu = attr.max_mtu; - resp.active_mtu = attr.active_mtu; - resp.gid_tbl_len = attr.gid_tbl_len; - resp.port_cap_flags = make_port_cap_flags(&attr); - resp.max_msg_sz = attr.max_msg_sz; - resp.bad_pkey_cntr = attr.bad_pkey_cntr; - resp.qkey_viol_cntr = attr.qkey_viol_cntr; - resp.pkey_tbl_len = attr.pkey_tbl_len; - - if (rdma_is_grh_required(ib_dev, cmd.port_num)) - resp.flags |= IB_UVERBS_QPF_GRH_REQUIRED; - - if (rdma_cap_opa_ah(ib_dev, cmd.port_num)) { - resp.lid = OPA_TO_IB_UCAST_LID(attr.lid); - resp.sm_lid = OPA_TO_IB_UCAST_LID(attr.sm_lid); - } else { - resp.lid = ib_lid_cpu16(attr.lid); - resp.sm_lid = ib_lid_cpu16(attr.sm_lid); - } - resp.lmc = attr.lmc; - resp.max_vl_num = attr.max_vl_num; - resp.sm_sl = attr.sm_sl; - resp.subnet_timeout = attr.subnet_timeout; - resp.init_type_reply = attr.init_type_reply; - resp.active_width = attr.active_width; - resp.active_speed = attr.active_speed; - resp.phys_state = attr.phys_state; - resp.link_layer = rdma_port_get_link_layer(ib_dev, - cmd.port_num); - - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) - return -EFAULT; - - return in_len; + return uverbs_response(attrs, &resp, sizeof(resp)); } -ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, - const char __user *buf, - int in_len, int out_len) +static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_alloc_pd cmd; struct ib_uverbs_alloc_pd_resp resp; - struct ib_udata udata; struct ib_uobject *uobj; struct ib_pd *pd; int ret; struct ib_device *ib_dev; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response) + sizeof(resp), - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof(resp)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - uobj = uobj_alloc(UVERBS_OBJECT_PD, file, &ib_dev); + uobj = uobj_alloc(UVERBS_OBJECT_PD, attrs, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); - pd = ib_dev->alloc_pd(ib_dev, uobj->context, &udata); + pd = ib_dev->ops.alloc_pd(ib_dev, uobj->context, &attrs->driver_udata); if (IS_ERR(pd)) { ret = PTR_ERR(pd); goto err; @@ -379,14 +421,13 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, memset(&resp, 0, sizeof resp); resp.pd_handle = uobj->id; pd->res.type = RDMA_RESTRACK_PD; - rdma_restrack_add(&pd->res); + rdma_restrack_uadd(&pd->res); - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) { - ret = -EFAULT; + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) goto err_copy; - } - return uobj_alloc_commit(uobj, in_len); + return uobj_alloc_commit(uobj); err_copy: ib_dealloc_pd(pd); @@ -396,17 +437,16 @@ err: return ret; } -ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, - const char __user *buf, - int in_len, int out_len) +static int ib_uverbs_dealloc_pd(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_dealloc_pd cmd; + int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - return uobj_perform_destroy(UVERBS_OBJECT_PD, cmd.pd_handle, file, - in_len); + return uobj_perform_destroy(UVERBS_OBJECT_PD, cmd.pd_handle, attrs); } struct xrcd_table_entry { @@ -494,13 +534,11 @@ static void xrcd_table_delete(struct ib_uverbs_device *dev, } } -ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs) { + struct ib_uverbs_device *ibudev = attrs->ufile->device; struct ib_uverbs_open_xrcd cmd; struct ib_uverbs_open_xrcd_resp resp; - struct ib_udata udata; struct ib_uxrcd_object *obj; struct ib_xrcd *xrcd = NULL; struct fd f = {NULL, 0}; @@ -509,18 +547,11 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, int new_xrcd = 0; struct ib_device *ib_dev; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response) + sizeof(resp), - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof(resp)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - mutex_lock(&file->device->xrcd_tree_mutex); + mutex_lock(&ibudev->xrcd_tree_mutex); if (cmd.fd != -1) { /* search for file descriptor */ @@ -531,7 +562,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, } inode = file_inode(f.file); - xrcd = find_xrcd(file->device, inode); + xrcd = find_xrcd(ibudev, inode); if (!xrcd && !(cmd.oflags & O_CREAT)) { /* no file descriptor. Need CREATE flag */ ret = -EAGAIN; @@ -544,7 +575,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, } } - obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, file, + obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, attrs, &ib_dev); if (IS_ERR(obj)) { ret = PTR_ERR(obj); @@ -552,7 +583,8 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, } if (!xrcd) { - xrcd = ib_dev->alloc_xrcd(ib_dev, obj->uobject.context, &udata); + xrcd = ib_dev->ops.alloc_xrcd(ib_dev, obj->uobject.context, + &attrs->driver_udata); if (IS_ERR(xrcd)) { ret = PTR_ERR(xrcd); goto err; @@ -574,29 +606,28 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, if (inode) { if (new_xrcd) { /* create new inode/xrcd table entry */ - ret = xrcd_table_insert(file->device, inode, xrcd); + ret = xrcd_table_insert(ibudev, inode, xrcd); if (ret) goto err_dealloc_xrcd; } atomic_inc(&xrcd->usecnt); } - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) { - ret = -EFAULT; + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) goto err_copy; - } if (f.file) fdput(f); - mutex_unlock(&file->device->xrcd_tree_mutex); + mutex_unlock(&ibudev->xrcd_tree_mutex); - return uobj_alloc_commit(&obj->uobject, in_len); + return uobj_alloc_commit(&obj->uobject); err_copy: if (inode) { if (new_xrcd) - xrcd_table_delete(file->device, inode); + xrcd_table_delete(ibudev, inode); atomic_dec(&xrcd->usecnt); } @@ -610,22 +641,21 @@ err_tree_mutex_unlock: if (f.file) fdput(f); - mutex_unlock(&file->device->xrcd_tree_mutex); + mutex_unlock(&ibudev->xrcd_tree_mutex); return ret; } -ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_close_xrcd(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_close_xrcd cmd; + int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - return uobj_perform_destroy(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, file, - in_len); + return uobj_perform_destroy(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, attrs); } int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, @@ -653,29 +683,19 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, return ret; } -ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_reg_mr cmd; struct ib_uverbs_reg_mr_resp resp; - struct ib_udata udata; struct ib_uobject *uobj; struct ib_pd *pd; struct ib_mr *mr; int ret; struct ib_device *ib_dev; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response) + sizeof(resp), - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof(resp)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) return -EINVAL; @@ -684,11 +704,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, if (ret) return ret; - uobj = uobj_alloc(UVERBS_OBJECT_MR, file, &ib_dev); + uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs); if (!pd) { ret = -EINVAL; goto err_free; @@ -703,8 +723,9 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, } } - mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va, - cmd.access_flags, &udata); + mr = pd->device->ops.reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va, + cmd.access_flags, + &attrs->driver_udata); if (IS_ERR(mr)) { ret = PTR_ERR(mr); goto err_put; @@ -716,7 +737,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, mr->uobject = uobj; atomic_inc(&pd->usecnt); mr->res.type = RDMA_RESTRACK_MR; - rdma_restrack_add(&mr->res); + rdma_restrack_uadd(&mr->res); uobj->object = mr; @@ -725,14 +746,13 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, resp.rkey = mr->rkey; resp.mr_handle = uobj->id; - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) { - ret = -EFAULT; + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) goto err_copy; - } uobj_put_obj_read(pd); - return uobj_alloc_commit(uobj, in_len); + return uobj_alloc_commit(uobj); err_copy: ib_dereg_mr(mr); @@ -745,29 +765,19 @@ err_free: return ret; } -ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_rereg_mr cmd; struct ib_uverbs_rereg_mr_resp resp; - struct ib_udata udata; struct ib_pd *pd = NULL; struct ib_mr *mr; struct ib_pd *old_pd; int ret; struct ib_uobject *uobj; - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof(cmd))) - return -EFAULT; - - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response) + sizeof(resp), - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof(resp)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags) return -EINVAL; @@ -777,7 +787,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))) return -EINVAL; - uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, file); + uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, attrs); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -796,7 +806,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, if (cmd.flags & IB_MR_REREG_PD) { pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, - file); + attrs); if (!pd) { ret = -EINVAL; goto put_uobjs; @@ -804,9 +814,10 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, } old_pd = mr->pd; - ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start, - cmd.length, cmd.hca_va, - cmd.access_flags, pd, &udata); + ret = mr->device->ops.rereg_user_mr(mr, cmd.flags, cmd.start, + cmd.length, cmd.hca_va, + cmd.access_flags, pd, + &attrs->driver_udata); if (!ret) { if (cmd.flags & IB_MR_REREG_PD) { atomic_inc(&pd->usecnt); @@ -821,10 +832,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, resp.lkey = mr->lkey; resp.rkey = mr->rkey; - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) - ret = -EFAULT; - else - ret = in_len; + ret = uverbs_response(attrs, &resp, sizeof(resp)); put_uobj_pd: if (cmd.flags & IB_MR_REREG_PD) @@ -836,54 +844,43 @@ put_uobjs: return ret; } -ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_dereg_mr(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_dereg_mr cmd; + int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - return uobj_perform_destroy(UVERBS_OBJECT_MR, cmd.mr_handle, file, - in_len); + return uobj_perform_destroy(UVERBS_OBJECT_MR, cmd.mr_handle, attrs); } -ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_alloc_mw cmd; struct ib_uverbs_alloc_mw_resp resp; struct ib_uobject *uobj; struct ib_pd *pd; struct ib_mw *mw; - struct ib_udata udata; int ret; struct ib_device *ib_dev; - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof(cmd))) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - uobj = uobj_alloc(UVERBS_OBJECT_MW, file, &ib_dev); + uobj = uobj_alloc(UVERBS_OBJECT_MW, attrs, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs); if (!pd) { ret = -EINVAL; goto err_free; } - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response) + sizeof(resp), - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof(resp)); - - mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata); + mw = pd->device->ops.alloc_mw(pd, cmd.mw_type, &attrs->driver_udata); if (IS_ERR(mw)) { ret = PTR_ERR(mw); goto err_put; @@ -900,13 +897,12 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, resp.rkey = mw->rkey; resp.mw_handle = uobj->id; - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) { - ret = -EFAULT; + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) goto err_copy; - } uobj_put_obj_read(pd); - return uobj_alloc_commit(uobj, in_len); + return uobj_alloc_commit(uobj); err_copy: uverbs_dealloc_mw(mw); @@ -917,36 +913,32 @@ err_free: return ret; } -ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_dealloc_mw(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_dealloc_mw cmd; + int ret; - if (copy_from_user(&cmd, buf, sizeof(cmd))) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - return uobj_perform_destroy(UVERBS_OBJECT_MW, cmd.mw_handle, file, - in_len); + return uobj_perform_destroy(UVERBS_OBJECT_MW, cmd.mw_handle, attrs); } -ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_create_comp_channel(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_create_comp_channel cmd; struct ib_uverbs_create_comp_channel_resp resp; struct ib_uobject *uobj; struct ib_uverbs_completion_event_file *ev_file; struct ib_device *ib_dev; + int ret; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file, &ib_dev); + uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, attrs, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -956,25 +948,17 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, uobj); ib_uverbs_init_event_queue(&ev_file->ev_queue); - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) { + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) { uobj_alloc_abort(uobj); - return -EFAULT; + return ret; } - return uobj_alloc_commit(uobj, in_len); + return uobj_alloc_commit(uobj); } -static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw, - struct ib_uverbs_ex_create_cq *cmd, - size_t cmd_sz, - int (*cb)(struct ib_uverbs_file *file, - struct ib_ucq_object *obj, - struct ib_uverbs_ex_create_cq_resp *resp, - struct ib_udata *udata, - void *context), - void *context) +static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, + struct ib_uverbs_ex_create_cq *cmd) { struct ib_ucq_object *obj; struct ib_uverbs_completion_event_file *ev_file = NULL; @@ -984,21 +968,16 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, struct ib_cq_init_attr attr = {}; struct ib_device *ib_dev; - if (cmd->comp_vector >= file->device->num_comp_vectors) + if (cmd->comp_vector >= attrs->ufile->device->num_comp_vectors) return ERR_PTR(-EINVAL); - obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, file, + obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, attrs, &ib_dev); if (IS_ERR(obj)) return obj; - if (!ib_dev->create_cq) { - ret = -EOPNOTSUPP; - goto err; - } - if (cmd->comp_channel >= 0) { - ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, file); + ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, attrs); if (IS_ERR(ev_file)) { ret = PTR_ERR(ev_file); goto err; @@ -1013,11 +992,10 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, attr.cqe = cmd->cqe; attr.comp_vector = cmd->comp_vector; + attr.flags = cmd->flags; - if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags)) - attr.flags = cmd->flags; - - cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context, uhw); + cq = ib_dev->ops.create_cq(ib_dev, &attr, obj->uobject.context, + &attrs->driver_udata); if (IS_ERR(cq)) { ret = PTR_ERR(cq); goto err_file; @@ -1034,18 +1012,16 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, memset(&resp, 0, sizeof resp); resp.base.cq_handle = obj->uobject.id; resp.base.cqe = cq->cqe; - - resp.response_length = offsetof(typeof(resp), response_length) + - sizeof(resp.response_length); + resp.response_length = uverbs_response_length(attrs, sizeof(resp)); cq->res.type = RDMA_RESTRACK_CQ; - rdma_restrack_add(&cq->res); + rdma_restrack_uadd(&cq->res); - ret = cb(file, obj, &resp, ucore, context); + ret = uverbs_response(attrs, &resp, sizeof(resp)); if (ret) goto err_cb; - ret = uobj_alloc_commit(&obj->uobject, 0); + ret = uobj_alloc_commit(&obj->uobject); if (ret) return ERR_PTR(ret); return obj; @@ -1055,7 +1031,7 @@ err_cb: err_file: if (ev_file) - ib_uverbs_release_ucq(file, ev_file, obj); + ib_uverbs_release_ucq(attrs->ufile, ev_file, obj); err: uobj_alloc_abort(&obj->uobject); @@ -1063,41 +1039,16 @@ err: return ERR_PTR(ret); } -static int ib_uverbs_create_cq_cb(struct ib_uverbs_file *file, - struct ib_ucq_object *obj, - struct ib_uverbs_ex_create_cq_resp *resp, - struct ib_udata *ucore, void *context) -{ - if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base))) - return -EFAULT; - - return 0; -} - -ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_create_cq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_create_cq cmd; struct ib_uverbs_ex_create_cq cmd_ex; - struct ib_uverbs_create_cq_resp resp; - struct ib_udata ucore; - struct ib_udata uhw; struct ib_ucq_object *obj; + int ret; - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof(cmd))) - return -EFAULT; - - ib_uverbs_init_udata(&ucore, buf, u64_to_user_ptr(cmd.response), - sizeof(cmd), sizeof(resp)); - - ib_uverbs_init_udata(&uhw, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response) + sizeof(resp), - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof(resp)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; memset(&cmd_ex, 0, sizeof(cmd_ex)); cmd_ex.user_handle = cmd.user_handle; @@ -1105,43 +1056,19 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, cmd_ex.comp_vector = cmd.comp_vector; cmd_ex.comp_channel = cmd.comp_channel; - obj = create_cq(file, &ucore, &uhw, &cmd_ex, - offsetof(typeof(cmd_ex), comp_channel) + - sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb, - NULL); - - if (IS_ERR(obj)) - return PTR_ERR(obj); - - return in_len; -} - -static int ib_uverbs_ex_create_cq_cb(struct ib_uverbs_file *file, - struct ib_ucq_object *obj, - struct ib_uverbs_ex_create_cq_resp *resp, - struct ib_udata *ucore, void *context) -{ - if (ib_copy_to_udata(ucore, resp, resp->response_length)) - return -EFAULT; - - return 0; + obj = create_cq(attrs, &cmd_ex); + return PTR_ERR_OR_ZERO(obj); } -int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_create_cq(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_ex_create_cq_resp resp; struct ib_uverbs_ex_create_cq cmd; struct ib_ucq_object *obj; - int err; - - if (ucore->inlen < sizeof(cmd)) - return -EINVAL; + int ret; - err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); - if (err) - return err; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; if (cmd.comp_mask) return -EINVAL; @@ -1149,52 +1076,36 @@ int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file, if (cmd.reserved) return -EINVAL; - if (ucore->outlen < (offsetof(typeof(resp), response_length) + - sizeof(resp.response_length))) - return -ENOSPC; - - obj = create_cq(file, ucore, uhw, &cmd, - min(ucore->inlen, sizeof(cmd)), - ib_uverbs_ex_create_cq_cb, NULL); - + obj = create_cq(attrs, &cmd); return PTR_ERR_OR_ZERO(obj); } -ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_resize_cq cmd; struct ib_uverbs_resize_cq_resp resp = {}; - struct ib_udata udata; struct ib_cq *cq; int ret = -EINVAL; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response) + sizeof(resp), - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof(resp)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs); if (!cq) return -EINVAL; - ret = cq->device->resize_cq(cq, cmd.cqe, &udata); + ret = cq->device->ops.resize_cq(cq, cmd.cqe, &attrs->driver_udata); if (ret) goto out; resp.cqe = cq->cqe; - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp.cqe)) - ret = -EFAULT; - + ret = uverbs_response(attrs, &resp, sizeof(resp)); out: uobj_put_obj_read(cq); - return ret ? ret : in_len; + return ret; } static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest, @@ -1227,9 +1138,7 @@ static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest, return 0; } -ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_poll_cq cmd; struct ib_uverbs_poll_cq_resp resp; @@ -1239,15 +1148,16 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, struct ib_wc wc; int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs); if (!cq) return -EINVAL; /* we copy a struct ib_uverbs_poll_cq_resp to user space */ - header_ptr = u64_to_user_ptr(cmd.response); + header_ptr = attrs->ucore.outbuf; data_ptr = header_ptr + sizeof resp; memset(&resp, 0, sizeof resp); @@ -1271,24 +1181,24 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, goto out_put; } - ret = in_len; + ret = 0; out_put: uobj_put_obj_read(cq); return ret; } -ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_req_notify_cq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_req_notify_cq cmd; struct ib_cq *cq; + int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs); if (!cq) return -EINVAL; @@ -1297,22 +1207,22 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, uobj_put_obj_read(cq); - return in_len; + return 0; } -ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_destroy_cq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_destroy_cq cmd; struct ib_uverbs_destroy_cq_resp resp; struct ib_uobject *uobj; struct ib_ucq_object *obj; + int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - uobj = uobj_get_destroy(UVERBS_OBJECT_CQ, cmd.cq_handle, file); + uobj = uobj_get_destroy(UVERBS_OBJECT_CQ, cmd.cq_handle, attrs); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -1323,21 +1233,11 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, uobj_put_destroy(uobj); - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) - return -EFAULT; - - return in_len; + return uverbs_response(attrs, &resp, sizeof(resp)); } -static int create_qp(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw, - struct ib_uverbs_ex_create_qp *cmd, - size_t cmd_sz, - int (*cb)(struct ib_uverbs_file *file, - struct ib_uverbs_ex_create_qp_resp *resp, - struct ib_udata *udata), - void *context) +static int create_qp(struct uverbs_attr_bundle *attrs, + struct ib_uverbs_ex_create_qp *cmd) { struct ib_uqp_object *obj; struct ib_device *device; @@ -1347,7 +1247,6 @@ static int create_qp(struct ib_uverbs_file *file, struct ib_cq *scq = NULL, *rcq = NULL; struct ib_srq *srq = NULL; struct ib_qp *qp; - char *buf; struct ib_qp_init_attr attr = {}; struct ib_uverbs_ex_create_qp_resp resp; int ret; @@ -1358,7 +1257,7 @@ static int create_qp(struct ib_uverbs_file *file, if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) return -EPERM; - obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file, + obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, attrs, &ib_dev); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -1366,12 +1265,10 @@ static int create_qp(struct ib_uverbs_file *file, obj->uevent.uobject.user_handle = cmd->user_handle; mutex_init(&obj->mcast_lock); - if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) + - sizeof(cmd->rwq_ind_tbl_handle) && - (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) { + if (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE) { ind_tbl = uobj_get_obj_read(rwq_ind_table, UVERBS_OBJECT_RWQ_IND_TBL, - cmd->rwq_ind_tbl_handle, file); + cmd->rwq_ind_tbl_handle, attrs); if (!ind_tbl) { ret = -EINVAL; goto err_put; @@ -1380,13 +1277,6 @@ static int create_qp(struct ib_uverbs_file *file, attr.rwq_ind_tbl = ind_tbl; } - if (cmd_sz > sizeof(*cmd) && - !ib_is_udata_cleared(ucore, sizeof(*cmd), - cmd_sz - sizeof(*cmd))) { - ret = -EOPNOTSUPP; - goto err_put; - } - if (ind_tbl && (cmd->max_recv_wr || cmd->max_recv_sge || cmd->is_srq)) { ret = -EINVAL; goto err_put; @@ -1397,7 +1287,7 @@ static int create_qp(struct ib_uverbs_file *file, if (cmd->qp_type == IB_QPT_XRC_TGT) { xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->pd_handle, - file); + attrs); if (IS_ERR(xrcd_uobj)) { ret = -EINVAL; @@ -1417,7 +1307,7 @@ static int create_qp(struct ib_uverbs_file *file, } else { if (cmd->is_srq) { srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, - cmd->srq_handle, file); + cmd->srq_handle, attrs); if (!srq || srq->srq_type == IB_SRQT_XRC) { ret = -EINVAL; goto err_put; @@ -1428,7 +1318,7 @@ static int create_qp(struct ib_uverbs_file *file, if (cmd->recv_cq_handle != cmd->send_cq_handle) { rcq = uobj_get_obj_read( cq, UVERBS_OBJECT_CQ, - cmd->recv_cq_handle, file); + cmd->recv_cq_handle, attrs); if (!rcq) { ret = -EINVAL; goto err_put; @@ -1439,11 +1329,11 @@ static int create_qp(struct ib_uverbs_file *file, if (has_sq) scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, - cmd->send_cq_handle, file); + cmd->send_cq_handle, attrs); if (!ind_tbl) rcq = rcq ?: scq; pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, - file); + attrs); if (!pd || (!scq && has_sq)) { ret = -EINVAL; goto err_put; @@ -1453,7 +1343,7 @@ static int create_qp(struct ib_uverbs_file *file, } attr.event_handler = ib_uverbs_qp_event_handler; - attr.qp_context = file; + attr.qp_context = attrs->ufile; attr.send_cq = scq; attr.recv_cq = rcq; attr.srq = srq; @@ -1473,10 +1363,7 @@ static int create_qp(struct ib_uverbs_file *file, INIT_LIST_HEAD(&obj->uevent.event_list); INIT_LIST_HEAD(&obj->mcast_list); - if (cmd_sz >= offsetof(typeof(*cmd), create_flags) + - sizeof(cmd->create_flags)) - attr.create_flags = cmd->create_flags; - + attr.create_flags = cmd->create_flags; if (attr.create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | IB_QP_CREATE_CROSS_CHANNEL | IB_QP_CREATE_MANAGED_SEND | @@ -1498,18 +1385,10 @@ static int create_qp(struct ib_uverbs_file *file, attr.source_qpn = cmd->source_qpn; } - buf = (void *)cmd + sizeof(*cmd); - if (cmd_sz > sizeof(*cmd)) - if (!(buf[0] == 0 && !memcmp(buf, buf + 1, - cmd_sz - sizeof(*cmd) - 1))) { - ret = -EINVAL; - goto err_put; - } - if (cmd->qp_type == IB_QPT_XRC_TGT) qp = ib_create_qp(pd, &attr); else - qp = _ib_create_qp(device, pd, &attr, uhw, + qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, &obj->uevent.uobject); if (IS_ERR(qp)) { @@ -1557,11 +1436,9 @@ static int create_qp(struct ib_uverbs_file *file, resp.base.max_recv_wr = attr.cap.max_recv_wr; resp.base.max_send_wr = attr.cap.max_send_wr; resp.base.max_inline_data = attr.cap.max_inline_data; + resp.response_length = uverbs_response_length(attrs, sizeof(resp)); - resp.response_length = offsetof(typeof(resp), response_length) + - sizeof(resp.response_length); - - ret = cb(file, &resp, ucore); + ret = uverbs_response(attrs, &resp, sizeof(resp)); if (ret) goto err_cb; @@ -1583,7 +1460,7 @@ static int create_qp(struct ib_uverbs_file *file, if (ind_tbl) uobj_put_obj_read(ind_tbl); - return uobj_alloc_commit(&obj->uevent.uobject, 0); + return uobj_alloc_commit(&obj->uevent.uobject); err_cb: ib_destroy_qp(qp); @@ -1605,39 +1482,15 @@ err_put: return ret; } -static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file, - struct ib_uverbs_ex_create_qp_resp *resp, - struct ib_udata *ucore) -{ - if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base))) - return -EFAULT; - - return 0; -} - -ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_create_qp(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_create_qp cmd; struct ib_uverbs_ex_create_qp cmd_ex; - struct ib_udata ucore; - struct ib_udata uhw; - ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp); - int err; - - if (out_len < resp_size) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof(cmd))) - return -EFAULT; + int ret; - ib_uverbs_init_udata(&ucore, buf, u64_to_user_ptr(cmd.response), - sizeof(cmd), resp_size); - ib_uverbs_init_udata(&uhw, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response) + resp_size, - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - resp_size); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; memset(&cmd_ex, 0, sizeof(cmd_ex)); cmd_ex.user_handle = cmd.user_handle; @@ -1654,42 +1507,17 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, cmd_ex.qp_type = cmd.qp_type; cmd_ex.is_srq = cmd.is_srq; - err = create_qp(file, &ucore, &uhw, &cmd_ex, - offsetof(typeof(cmd_ex), is_srq) + - sizeof(cmd.is_srq), ib_uverbs_create_qp_cb, - NULL); - - if (err) - return err; - - return in_len; + return create_qp(attrs, &cmd_ex); } -static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file, - struct ib_uverbs_ex_create_qp_resp *resp, - struct ib_udata *ucore) +static int ib_uverbs_ex_create_qp(struct uverbs_attr_bundle *attrs) { - if (ib_copy_to_udata(ucore, resp, resp->response_length)) - return -EFAULT; - - return 0; -} - -int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw) -{ - struct ib_uverbs_ex_create_qp_resp resp; - struct ib_uverbs_ex_create_qp cmd = {0}; - int err; - - if (ucore->inlen < (offsetof(typeof(cmd), comp_mask) + - sizeof(cmd.comp_mask))) - return -EINVAL; + struct ib_uverbs_ex_create_qp cmd; + int ret; - err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); - if (err) - return err; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; if (cmd.comp_mask & ~IB_UVERBS_CREATE_QP_SUP_COMP_MASK) return -EINVAL; @@ -1697,26 +1525,13 @@ int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file, if (cmd.reserved) return -EINVAL; - if (ucore->outlen < (offsetof(typeof(resp), response_length) + - sizeof(resp.response_length))) - return -ENOSPC; - - err = create_qp(file, ucore, uhw, &cmd, - min(ucore->inlen, sizeof(cmd)), - ib_uverbs_ex_create_qp_cb, NULL); - - if (err) - return err; - - return 0; + return create_qp(attrs, &cmd); } -ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, - const char __user *buf, int in_len, int out_len) +static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_open_qp cmd; struct ib_uverbs_create_qp_resp resp; - struct ib_udata udata; struct ib_uqp_object *obj; struct ib_xrcd *xrcd; struct ib_uobject *uninitialized_var(xrcd_uobj); @@ -1725,23 +1540,16 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, int ret; struct ib_device *ib_dev; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response) + sizeof(resp), - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof(resp)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file, + obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, attrs, &ib_dev); if (IS_ERR(obj)) return PTR_ERR(obj); - xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, file); + xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, attrs); if (IS_ERR(xrcd_uobj)) { ret = -EINVAL; goto err_put; @@ -1754,7 +1562,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, } attr.event_handler = ib_uverbs_qp_event_handler; - attr.qp_context = file; + attr.qp_context = attrs->ufile; attr.qp_num = cmd.qpn; attr.qp_type = cmd.qp_type; @@ -1775,17 +1583,16 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, resp.qpn = qp->qp_num; resp.qp_handle = obj->uevent.uobject.id; - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) { - ret = -EFAULT; + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) goto err_destroy; - } obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); atomic_inc(&obj->uxrcd->refcnt); qp->uobject = &obj->uevent.uobject; uobj_put_read(xrcd_uobj); - return uobj_alloc_commit(&obj->uevent.uobject, in_len); + return uobj_alloc_commit(&obj->uevent.uobject); err_destroy: ib_destroy_qp(qp); @@ -1818,9 +1625,7 @@ static void copy_ah_attr_to_uverbs(struct ib_uverbs_qp_dest *uverb_attr, uverb_attr->port_num = rdma_ah_get_port_num(rdma_attr); } -ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_query_qp(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_query_qp cmd; struct ib_uverbs_query_qp_resp resp; @@ -1829,8 +1634,9 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, struct ib_qp_init_attr *init_attr; int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; attr = kmalloc(sizeof *attr, GFP_KERNEL); init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL); @@ -1839,7 +1645,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, goto out; } - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs); if (!qp) { ret = -EINVAL; goto out; @@ -1886,14 +1692,13 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, resp.max_inline_data = init_attr->cap.max_inline_data; resp.sq_sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR; - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) - ret = -EFAULT; + ret = uverbs_response(attrs, &resp, sizeof(resp)); out: kfree(attr); kfree(init_attr); - return ret ? ret : in_len; + return ret; } /* Remove ignored fields set in the attribute mask */ @@ -1933,8 +1738,8 @@ static void copy_ah_attr_from_uverbs(struct ib_device *dev, rdma_ah_set_make_grd(rdma_attr, false); } -static int modify_qp(struct ib_uverbs_file *file, - struct ib_uverbs_ex_modify_qp *cmd, struct ib_udata *udata) +static int modify_qp(struct uverbs_attr_bundle *attrs, + struct ib_uverbs_ex_modify_qp *cmd) { struct ib_qp_attr *attr; struct ib_qp *qp; @@ -1944,7 +1749,8 @@ static int modify_qp(struct ib_uverbs_file *file, if (!attr) return -ENOMEM; - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, + attrs); if (!qp) { ret = -EINVAL; goto out; @@ -2081,7 +1887,7 @@ static int modify_qp(struct ib_uverbs_file *file, ret = ib_modify_qp_with_udata(qp, attr, modify_qp_mask(qp->qp_type, cmd->base.attr_mask), - udata); + &attrs->driver_udata); release_qp: uobj_put_obj_read(qp); @@ -2091,80 +1897,64 @@ out: return ret; } -ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_modify_qp(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_ex_modify_qp cmd = {}; - struct ib_udata udata; + struct ib_uverbs_ex_modify_qp cmd; int ret; - if (copy_from_user(&cmd.base, buf, sizeof(cmd.base))) - return -EFAULT; + ret = uverbs_request(attrs, &cmd.base, sizeof(cmd.base)); + if (ret) + return ret; if (cmd.base.attr_mask & ~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1)) return -EOPNOTSUPP; - ib_uverbs_init_udata(&udata, buf + sizeof(cmd.base), NULL, - in_len - sizeof(cmd.base) - sizeof(struct ib_uverbs_cmd_hdr), - out_len); - - ret = modify_qp(file, &cmd, &udata); - if (ret) - return ret; - - return in_len; + return modify_qp(attrs, &cmd); } -int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_modify_qp(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_ex_modify_qp cmd = {}; + struct ib_uverbs_ex_modify_qp cmd; + struct ib_uverbs_ex_modify_qp_resp resp = { + .response_length = uverbs_response_length(attrs, sizeof(resp)) + }; int ret; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; + /* * Last bit is reserved for extending the attr_mask by * using another field. */ BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1 << 31)); - if (ucore->inlen < sizeof(cmd.base)) - return -EINVAL; - - ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); - if (ret) - return ret; - if (cmd.base.attr_mask & ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1)) return -EOPNOTSUPP; - if (ucore->inlen > sizeof(cmd)) { - if (!ib_is_udata_cleared(ucore, sizeof(cmd), - ucore->inlen - sizeof(cmd))) - return -EOPNOTSUPP; - } - - ret = modify_qp(file, &cmd, uhw); + ret = modify_qp(attrs, &cmd); + if (ret) + return ret; - return ret; + return uverbs_response(attrs, &resp, sizeof(resp)); } -ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_destroy_qp(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_destroy_qp cmd; struct ib_uverbs_destroy_qp_resp resp; struct ib_uobject *uobj; struct ib_uqp_object *obj; + int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - uobj = uobj_get_destroy(UVERBS_OBJECT_QP, cmd.qp_handle, file); + uobj = uobj_get_destroy(UVERBS_OBJECT_QP, cmd.qp_handle, attrs); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -2174,10 +1964,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, uobj_put_destroy(uobj); - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) - return -EFAULT; - - return in_len; + return uverbs_response(attrs, &resp, sizeof(resp)); } static void *alloc_wr(size_t wr_size, __u32 num_sge) @@ -2190,9 +1977,7 @@ static void *alloc_wr(size_t wr_size, __u32 num_sge) num_sge * sizeof (struct ib_sge), GFP_KERNEL); } -ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_post_send cmd; struct ib_uverbs_post_send_resp resp; @@ -2202,24 +1987,31 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, struct ib_qp *qp; int i, sg_ind; int is_ud; - ssize_t ret = -EINVAL; + int ret, ret2; size_t next_size; + const struct ib_sge __user *sgls; + const void __user *wqes; + struct uverbs_req_iter iter; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count + - cmd.sge_count * sizeof (struct ib_uverbs_sge)) - return -EINVAL; - - if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr)) - return -EINVAL; + ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd)); + if (ret) + return ret; + wqes = uverbs_request_next_ptr(&iter, cmd.wqe_size * cmd.wr_count); + if (IS_ERR(wqes)) + return PTR_ERR(wqes); + sgls = uverbs_request_next_ptr( + &iter, cmd.sge_count * sizeof(struct ib_uverbs_sge)); + if (IS_ERR(sgls)) + return PTR_ERR(sgls); + ret = uverbs_request_finish(&iter); + if (ret) + return ret; user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL); if (!user_wr) return -ENOMEM; - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs); if (!qp) goto out; @@ -2227,8 +2019,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, sg_ind = 0; last = NULL; for (i = 0; i < cmd.wr_count; ++i) { - if (copy_from_user(user_wr, - buf + sizeof cmd + i * cmd.wqe_size, + if (copy_from_user(user_wr, wqes + i * cmd.wqe_size, cmd.wqe_size)) { ret = -EFAULT; goto out_put; @@ -2256,7 +2047,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, } ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH, - user_wr->wr.ud.ah, file); + user_wr->wr.ud.ah, attrs); if (!ud->ah) { kfree(ud); ret = -EINVAL; @@ -2336,11 +2127,9 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, if (next->num_sge) { next->sg_list = (void *) next + ALIGN(next_size, sizeof(struct ib_sge)); - if (copy_from_user(next->sg_list, - buf + sizeof cmd + - cmd.wr_count * cmd.wqe_size + - sg_ind * sizeof (struct ib_sge), - next->num_sge * sizeof (struct ib_sge))) { + if (copy_from_user(next->sg_list, sgls + sg_ind, + next->num_sge * + sizeof(struct ib_sge))) { ret = -EFAULT; goto out_put; } @@ -2350,7 +2139,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, } resp.bad_wr = 0; - ret = qp->device->post_send(qp->real_qp, wr, &bad_wr); + ret = qp->device->ops.post_send(qp->real_qp, wr, &bad_wr); if (ret) for (next = wr; next; next = next->next) { ++resp.bad_wr; @@ -2358,8 +2147,9 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, break; } - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) - ret = -EFAULT; + ret2 = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret2) + ret = ret2; out_put: uobj_put_obj_read(qp); @@ -2375,28 +2165,35 @@ out_put: out: kfree(user_wr); - return ret ? ret : in_len; + return ret; } -static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf, - int in_len, - u32 wr_count, - u32 sge_count, - u32 wqe_size) +static struct ib_recv_wr * +ib_uverbs_unmarshall_recv(struct uverbs_req_iter *iter, u32 wr_count, + u32 wqe_size, u32 sge_count) { struct ib_uverbs_recv_wr *user_wr; struct ib_recv_wr *wr = NULL, *last, *next; int sg_ind; int i; int ret; - - if (in_len < wqe_size * wr_count + - sge_count * sizeof (struct ib_uverbs_sge)) - return ERR_PTR(-EINVAL); + const struct ib_sge __user *sgls; + const void __user *wqes; if (wqe_size < sizeof (struct ib_uverbs_recv_wr)) return ERR_PTR(-EINVAL); + wqes = uverbs_request_next_ptr(iter, wqe_size * wr_count); + if (IS_ERR(wqes)) + return ERR_CAST(wqes); + sgls = uverbs_request_next_ptr( + iter, sge_count * sizeof(struct ib_uverbs_sge)); + if (IS_ERR(sgls)) + return ERR_CAST(sgls); + ret = uverbs_request_finish(iter); + if (ret) + return ERR_PTR(ret); + user_wr = kmalloc(wqe_size, GFP_KERNEL); if (!user_wr) return ERR_PTR(-ENOMEM); @@ -2404,7 +2201,7 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf, sg_ind = 0; last = NULL; for (i = 0; i < wr_count; ++i) { - if (copy_from_user(user_wr, buf + i * wqe_size, + if (copy_from_user(user_wr, wqes + i * wqe_size, wqe_size)) { ret = -EFAULT; goto err; @@ -2443,10 +2240,9 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf, if (next->num_sge) { next->sg_list = (void *) next + ALIGN(sizeof *next, sizeof (struct ib_sge)); - if (copy_from_user(next->sg_list, - buf + wr_count * wqe_size + - sg_ind * sizeof (struct ib_sge), - next->num_sge * sizeof (struct ib_sge))) { + if (copy_from_user(next->sg_list, sgls + sg_ind, + next->num_sge * + sizeof(struct ib_sge))) { ret = -EFAULT; goto err; } @@ -2470,32 +2266,33 @@ err: return ERR_PTR(ret); } -ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_post_recv(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_post_recv cmd; struct ib_uverbs_post_recv_resp resp; struct ib_recv_wr *wr, *next; const struct ib_recv_wr *bad_wr; struct ib_qp *qp; - ssize_t ret = -EINVAL; + int ret, ret2; + struct uverbs_req_iter iter; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd)); + if (ret) + return ret; - wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, - in_len - sizeof cmd, cmd.wr_count, - cmd.sge_count, cmd.wqe_size); + wr = ib_uverbs_unmarshall_recv(&iter, cmd.wr_count, cmd.wqe_size, + cmd.sge_count); if (IS_ERR(wr)) return PTR_ERR(wr); - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); - if (!qp) + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs); + if (!qp) { + ret = -EINVAL; goto out; + } resp.bad_wr = 0; - ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr); + ret = qp->device->ops.post_recv(qp->real_qp, wr, &bad_wr); uobj_put_obj_read(qp); if (ret) { @@ -2506,9 +2303,9 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, } } - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) - ret = -EFAULT; - + ret2 = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret2) + ret = ret2; out: while (wr) { next = wr->next; @@ -2516,36 +2313,36 @@ out: wr = next; } - return ret ? ret : in_len; + return ret; } -ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_post_srq_recv(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_post_srq_recv cmd; struct ib_uverbs_post_srq_recv_resp resp; struct ib_recv_wr *wr, *next; const struct ib_recv_wr *bad_wr; struct ib_srq *srq; - ssize_t ret = -EINVAL; + int ret, ret2; + struct uverbs_req_iter iter; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd)); + if (ret) + return ret; - wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, - in_len - sizeof cmd, cmd.wr_count, - cmd.sge_count, cmd.wqe_size); + wr = ib_uverbs_unmarshall_recv(&iter, cmd.wr_count, cmd.wqe_size, + cmd.sge_count); if (IS_ERR(wr)) return PTR_ERR(wr); - srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file); - if (!srq) + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs); + if (!srq) { + ret = -EINVAL; goto out; + } resp.bad_wr = 0; - ret = srq->device->post_srq_recv ? - srq->device->post_srq_recv(srq, wr, &bad_wr) : -EOPNOTSUPP; + ret = srq->device->ops.post_srq_recv(srq, wr, &bad_wr); uobj_put_obj_read(srq); @@ -2556,8 +2353,9 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, break; } - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) - ret = -EFAULT; + ret2 = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret2) + ret = ret2; out: while (wr) { @@ -2566,12 +2364,10 @@ out: wr = next; } - return ret ? ret : in_len; + return ret; } -ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_create_ah(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_create_ah cmd; struct ib_uverbs_create_ah_resp resp; @@ -2580,21 +2376,13 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, struct ib_ah *ah; struct rdma_ah_attr attr = {}; int ret; - struct ib_udata udata; struct ib_device *ib_dev; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response) + sizeof(resp), - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof(resp)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - uobj = uobj_alloc(UVERBS_OBJECT_AH, file, &ib_dev); + uobj = uobj_alloc(UVERBS_OBJECT_AH, attrs, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -2603,7 +2391,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, goto err; } - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs); if (!pd) { ret = -EINVAL; goto err; @@ -2627,7 +2415,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, rdma_ah_set_ah_flags(&attr, 0); } - ah = rdma_create_user_ah(pd, &attr, &udata); + ah = rdma_create_user_ah(pd, &attr, &attrs->driver_udata); if (IS_ERR(ah)) { ret = PTR_ERR(ah); goto err_put; @@ -2639,16 +2427,15 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, resp.ah_handle = uobj->id; - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) { - ret = -EFAULT; + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) goto err_copy; - } uobj_put_obj_read(pd); - return uobj_alloc_commit(uobj, in_len); + return uobj_alloc_commit(uobj); err_copy: - rdma_destroy_ah(ah); + rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE); err_put: uobj_put_obj_read(pd); @@ -2658,21 +2445,19 @@ err: return ret; } -ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, - const char __user *buf, int in_len, int out_len) +static int ib_uverbs_destroy_ah(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_destroy_ah cmd; + int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - return uobj_perform_destroy(UVERBS_OBJECT_AH, cmd.ah_handle, file, - in_len); + return uobj_perform_destroy(UVERBS_OBJECT_AH, cmd.ah_handle, attrs); } -ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_attach_mcast(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_attach_mcast cmd; struct ib_qp *qp; @@ -2680,10 +2465,11 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, struct ib_uverbs_mcast_entry *mcast; int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs); if (!qp) return -EINVAL; @@ -2716,12 +2502,10 @@ out_put: mutex_unlock(&obj->mcast_lock); uobj_put_obj_read(qp); - return ret ? ret : in_len; + return ret; } -ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_detach_mcast(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_detach_mcast cmd; struct ib_uqp_object *obj; @@ -2730,10 +2514,11 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, int ret = -EINVAL; bool found = false; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs); if (!qp) return -EINVAL; @@ -2759,7 +2544,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, out_put: mutex_unlock(&obj->mcast_lock); uobj_put_obj_read(qp); - return ret ? ret : in_len; + return ret; } struct ib_uflow_resources *flow_resources_alloc(size_t num_specs) @@ -2838,7 +2623,7 @@ void flow_resources_add(struct ib_uflow_resources *uflow_res, } EXPORT_SYMBOL(flow_resources_add); -static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile, +static int kern_spec_to_ib_spec_action(const struct uverbs_attr_bundle *attrs, struct ib_uverbs_flow_spec *kern_spec, union ib_flow_spec *ib_spec, struct ib_uflow_resources *uflow_res) @@ -2867,7 +2652,7 @@ static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile, ib_spec->action.act = uobj_get_obj_read(flow_action, UVERBS_OBJECT_FLOW_ACTION, kern_spec->action.handle, - ufile); + attrs); if (!ib_spec->action.act) return -EINVAL; ib_spec->action.size = @@ -2885,7 +2670,7 @@ static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile, uobj_get_obj_read(counters, UVERBS_OBJECT_COUNTERS, kern_spec->flow_count.handle, - ufile); + attrs); if (!ib_spec->flow_count.counters) return -EINVAL; ib_spec->flow_count.size = @@ -3066,7 +2851,7 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec, kern_filter_sz, ib_spec); } -static int kern_spec_to_ib_spec(struct ib_uverbs_file *ufile, +static int kern_spec_to_ib_spec(struct uverbs_attr_bundle *attrs, struct ib_uverbs_flow_spec *kern_spec, union ib_flow_spec *ib_spec, struct ib_uflow_resources *uflow_res) @@ -3075,17 +2860,15 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_file *ufile, return -EINVAL; if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG) - return kern_spec_to_ib_spec_action(ufile, kern_spec, ib_spec, + return kern_spec_to_ib_spec_action(attrs, kern_spec, ib_spec, uflow_res); else return kern_spec_to_ib_spec_filter(kern_spec, ib_spec); } -int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_ex_create_wq cmd = {}; + struct ib_uverbs_ex_create_wq cmd; struct ib_uverbs_ex_create_wq_resp resp = {}; struct ib_uwq_object *obj; int err = 0; @@ -3093,43 +2876,27 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, struct ib_pd *pd; struct ib_wq *wq; struct ib_wq_init_attr wq_init_attr = {}; - size_t required_cmd_sz; - size_t required_resp_len; struct ib_device *ib_dev; - required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge); - required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn); - - if (ucore->inlen < required_cmd_sz) - return -EINVAL; - - if (ucore->outlen < required_resp_len) - return -ENOSPC; - - if (ucore->inlen > sizeof(cmd) && - !ib_is_udata_cleared(ucore, sizeof(cmd), - ucore->inlen - sizeof(cmd))) - return -EOPNOTSUPP; - - err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + err = uverbs_request(attrs, &cmd, sizeof(cmd)); if (err) return err; if (cmd.comp_mask) return -EOPNOTSUPP; - obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, file, + obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, attrs, &ib_dev); if (IS_ERR(obj)) return PTR_ERR(obj); - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs); if (!pd) { err = -EINVAL; goto err_uobj; } - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs); if (!cq) { err = -EINVAL; goto err_put_pd; @@ -3138,20 +2905,14 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, wq_init_attr.cq = cq; wq_init_attr.max_sge = cmd.max_sge; wq_init_attr.max_wr = cmd.max_wr; - wq_init_attr.wq_context = file; + wq_init_attr.wq_context = attrs->ufile; wq_init_attr.wq_type = cmd.wq_type; wq_init_attr.event_handler = ib_uverbs_wq_event_handler; - if (ucore->inlen >= (offsetof(typeof(cmd), create_flags) + - sizeof(cmd.create_flags))) - wq_init_attr.create_flags = cmd.create_flags; + wq_init_attr.create_flags = cmd.create_flags; obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); - if (!pd->device->create_wq) { - err = -EOPNOTSUPP; - goto err_put_cq; - } - wq = pd->device->create_wq(pd, &wq_init_attr, uhw); + wq = pd->device->ops.create_wq(pd, &wq_init_attr, &attrs->driver_udata); if (IS_ERR(wq)) { err = PTR_ERR(wq); goto err_put_cq; @@ -3175,15 +2936,14 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, resp.max_sge = wq_init_attr.max_sge; resp.max_wr = wq_init_attr.max_wr; resp.wqn = wq->wq_num; - resp.response_length = required_resp_len; - err = ib_copy_to_udata(ucore, - &resp, resp.response_length); + resp.response_length = uverbs_response_length(attrs, sizeof(resp)); + err = uverbs_response(attrs, &resp, sizeof(resp)); if (err) goto err_copy; uobj_put_obj_read(pd); uobj_put_obj_read(cq); - return uobj_alloc_commit(&obj->uevent.uobject, 0); + return uobj_alloc_commit(&obj->uevent.uobject); err_copy: ib_destroy_wq(wq); @@ -3197,41 +2957,23 @@ err_uobj: return err; } -int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_destroy_wq(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_ex_destroy_wq cmd = {}; + struct ib_uverbs_ex_destroy_wq cmd; struct ib_uverbs_ex_destroy_wq_resp resp = {}; struct ib_uobject *uobj; struct ib_uwq_object *obj; - size_t required_cmd_sz; - size_t required_resp_len; int ret; - required_cmd_sz = offsetof(typeof(cmd), wq_handle) + sizeof(cmd.wq_handle); - required_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); - - if (ucore->inlen < required_cmd_sz) - return -EINVAL; - - if (ucore->outlen < required_resp_len) - return -ENOSPC; - - if (ucore->inlen > sizeof(cmd) && - !ib_is_udata_cleared(ucore, sizeof(cmd), - ucore->inlen - sizeof(cmd))) - return -EOPNOTSUPP; - - ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) return ret; if (cmd.comp_mask) return -EOPNOTSUPP; - resp.response_length = required_resp_len; - uobj = uobj_get_destroy(UVERBS_OBJECT_WQ, cmd.wq_handle, file); + resp.response_length = uverbs_response_length(attrs, sizeof(resp)); + uobj = uobj_get_destroy(UVERBS_OBJECT_WQ, cmd.wq_handle, attrs); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -3240,29 +2982,17 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, uobj_put_destroy(uobj); - return ib_copy_to_udata(ucore, &resp, resp.response_length); + return uverbs_response(attrs, &resp, sizeof(resp)); } -int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_modify_wq(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_ex_modify_wq cmd = {}; + struct ib_uverbs_ex_modify_wq cmd; struct ib_wq *wq; struct ib_wq_attr wq_attr = {}; - size_t required_cmd_sz; int ret; - required_cmd_sz = offsetof(typeof(cmd), curr_wq_state) + sizeof(cmd.curr_wq_state); - if (ucore->inlen < required_cmd_sz) - return -EINVAL; - - if (ucore->inlen > sizeof(cmd) && - !ib_is_udata_cleared(ucore, sizeof(cmd), - ucore->inlen - sizeof(cmd))) - return -EOPNOTSUPP; - - ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) return ret; @@ -3272,7 +3002,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS)) return -EINVAL; - wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file); + wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, attrs); if (!wq) return -EINVAL; @@ -3282,24 +3012,18 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, wq_attr.flags = cmd.flags; wq_attr.flags_mask = cmd.flags_mask; } - if (!wq->device->modify_wq) { - ret = -EOPNOTSUPP; - goto out; - } - ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw); -out: + ret = wq->device->ops.modify_wq(wq, &wq_attr, cmd.attr_mask, + &attrs->driver_udata); uobj_put_obj_read(wq); return ret; } -int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_ex_create_rwq_ind_table cmd = {}; + struct ib_uverbs_ex_create_rwq_ind_table cmd; struct ib_uverbs_ex_create_rwq_ind_table_resp resp = {}; struct ib_uobject *uobj; - int err = 0; + int err; struct ib_rwq_ind_table_init_attr init_attr = {}; struct ib_rwq_ind_table *rwq_ind_tbl; struct ib_wq **wqs = NULL; @@ -3307,27 +3031,13 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, struct ib_wq *wq = NULL; int i, j, num_read_wqs; u32 num_wq_handles; - u32 expected_in_size; - size_t required_cmd_sz_header; - size_t required_resp_len; + struct uverbs_req_iter iter; struct ib_device *ib_dev; - required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size); - required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num); - - if (ucore->inlen < required_cmd_sz_header) - return -EINVAL; - - if (ucore->outlen < required_resp_len) - return -ENOSPC; - - err = ib_copy_from_udata(&cmd, ucore, required_cmd_sz_header); + err = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd)); if (err) return err; - ucore->inbuf += required_cmd_sz_header; - ucore->inlen -= required_cmd_sz_header; - if (cmd.comp_mask) return -EOPNOTSUPP; @@ -3335,26 +3045,17 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, return -EINVAL; num_wq_handles = 1 << cmd.log_ind_tbl_size; - expected_in_size = num_wq_handles * sizeof(__u32); - if (num_wq_handles == 1) - /* input size for wq handles is u64 aligned */ - expected_in_size += sizeof(__u32); - - if (ucore->inlen < expected_in_size) - return -EINVAL; - - if (ucore->inlen > expected_in_size && - !ib_is_udata_cleared(ucore, expected_in_size, - ucore->inlen - expected_in_size)) - return -EOPNOTSUPP; - wqs_handles = kcalloc(num_wq_handles, sizeof(*wqs_handles), GFP_KERNEL); if (!wqs_handles) return -ENOMEM; - err = ib_copy_from_udata(wqs_handles, ucore, - num_wq_handles * sizeof(__u32)); + err = uverbs_request_next(&iter, wqs_handles, + num_wq_handles * sizeof(__u32)); + if (err) + goto err_free; + + err = uverbs_request_finish(&iter); if (err) goto err_free; @@ -3367,7 +3068,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, for (num_read_wqs = 0; num_read_wqs < num_wq_handles; num_read_wqs++) { wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, - wqs_handles[num_read_wqs], file); + wqs_handles[num_read_wqs], attrs); if (!wq) { err = -EINVAL; goto put_wqs; @@ -3376,7 +3077,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, wqs[num_read_wqs] = wq; } - uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file, &ib_dev); + uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, attrs, &ib_dev); if (IS_ERR(uobj)) { err = PTR_ERR(uobj); goto put_wqs; @@ -3385,11 +3086,8 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size; init_attr.ind_tbl = wqs; - if (!ib_dev->create_rwq_ind_table) { - err = -EOPNOTSUPP; - goto err_uobj; - } - rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw); + rwq_ind_tbl = ib_dev->ops.create_rwq_ind_table(ib_dev, &init_attr, + &attrs->driver_udata); if (IS_ERR(rwq_ind_tbl)) { err = PTR_ERR(rwq_ind_tbl); @@ -3408,10 +3106,9 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, resp.ind_tbl_handle = uobj->id; resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num; - resp.response_length = required_resp_len; + resp.response_length = uverbs_response_length(attrs, sizeof(resp)); - err = ib_copy_to_udata(ucore, - &resp, resp.response_length); + err = uverbs_response(attrs, &resp, sizeof(resp)); if (err) goto err_copy; @@ -3420,7 +3117,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, for (j = 0; j < num_read_wqs; j++) uobj_put_obj_read(wqs[j]); - return uobj_alloc_commit(uobj, 0); + return uobj_alloc_commit(uobj); err_copy: ib_destroy_rwq_ind_table(rwq_ind_tbl); @@ -3435,25 +3132,12 @@ err_free: return err; } -int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_destroy_rwq_ind_table(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {}; - int ret; - size_t required_cmd_sz; - - required_cmd_sz = offsetof(typeof(cmd), ind_tbl_handle) + sizeof(cmd.ind_tbl_handle); - - if (ucore->inlen < required_cmd_sz) - return -EINVAL; - - if (ucore->inlen > sizeof(cmd) && - !ib_is_udata_cleared(ucore, sizeof(cmd), - ucore->inlen - sizeof(cmd))) - return -EOPNOTSUPP; + struct ib_uverbs_ex_destroy_rwq_ind_table cmd; + int ret; - ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) return ret; @@ -3461,12 +3145,10 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, return -EOPNOTSUPP; return uobj_perform_destroy(UVERBS_OBJECT_RWQ_IND_TBL, - cmd.ind_tbl_handle, file, 0); + cmd.ind_tbl_handle, attrs); } -int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_create_flow cmd; struct ib_uverbs_create_flow_resp resp; @@ -3477,24 +3159,16 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, struct ib_qp *qp; struct ib_uflow_resources *uflow_res; struct ib_uverbs_flow_spec_hdr *kern_spec; - int err = 0; + struct uverbs_req_iter iter; + int err; void *ib_spec; int i; struct ib_device *ib_dev; - if (ucore->inlen < sizeof(cmd)) - return -EINVAL; - - if (ucore->outlen < sizeof(resp)) - return -ENOSPC; - - err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); + err = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd)); if (err) return err; - ucore->inbuf += sizeof(cmd); - ucore->inlen -= sizeof(cmd); - if (cmd.comp_mask) return -EINVAL; @@ -3512,8 +3186,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS) return -EINVAL; - if (cmd.flow_attr.size > ucore->inlen || - cmd.flow_attr.size > + if (cmd.flow_attr.size > (cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec))) return -EINVAL; @@ -3528,21 +3201,25 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, return -ENOMEM; *kern_flow_attr = cmd.flow_attr; - err = ib_copy_from_udata(&kern_flow_attr->flow_specs, ucore, - cmd.flow_attr.size); + err = uverbs_request_next(&iter, &kern_flow_attr->flow_specs, + cmd.flow_attr.size); if (err) goto err_free_attr; } else { kern_flow_attr = &cmd.flow_attr; } - uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file, &ib_dev); + err = uverbs_request_finish(&iter); + if (err) + goto err_free_attr; + + uobj = uobj_alloc(UVERBS_OBJECT_FLOW, attrs, &ib_dev); if (IS_ERR(uobj)) { err = PTR_ERR(uobj); goto err_free_attr; } - qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file); + qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs); if (!qp) { err = -EINVAL; goto err_uobj; @@ -3553,11 +3230,6 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, goto err_put; } - if (!qp->device->create_flow) { - err = -EOPNOTSUPP; - goto err_put; - } - flow_attr = kzalloc(struct_size(flow_attr, flows, cmd.flow_attr.num_of_specs), GFP_KERNEL); if (!flow_attr) { @@ -3584,7 +3256,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, cmd.flow_attr.size >= kern_spec->size; i++) { err = kern_spec_to_ib_spec( - file, (struct ib_uverbs_flow_spec *)kern_spec, + attrs, (struct ib_uverbs_flow_spec *)kern_spec, ib_spec, uflow_res); if (err) goto err_free; @@ -3602,8 +3274,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, goto err_free; } - flow_id = qp->device->create_flow(qp, flow_attr, - IB_FLOW_DOMAIN_USER, uhw); + flow_id = qp->device->ops.create_flow( + qp, flow_attr, IB_FLOW_DOMAIN_USER, &attrs->driver_udata); if (IS_ERR(flow_id)) { err = PTR_ERR(flow_id); @@ -3615,8 +3287,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, memset(&resp, 0, sizeof(resp)); resp.flow_handle = uobj->id; - err = ib_copy_to_udata(ucore, - &resp, sizeof(resp)); + err = uverbs_response(attrs, &resp, sizeof(resp)); if (err) goto err_copy; @@ -3624,9 +3295,9 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, kfree(flow_attr); if (cmd.flow_attr.num_of_specs) kfree(kern_flow_attr); - return uobj_alloc_commit(uobj, 0); + return uobj_alloc_commit(uobj); err_copy: - if (!qp->device->destroy_flow(flow_id)) + if (!qp->device->ops.destroy_flow(flow_id)) atomic_dec(&qp->usecnt); err_free: ib_uverbs_flow_resources_free(uflow_res); @@ -3642,28 +3313,22 @@ err_free_attr: return err; } -int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_destroy_flow(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_destroy_flow cmd; int ret; - if (ucore->inlen < sizeof(cmd)) - return -EINVAL; - - ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) return ret; if (cmd.comp_mask) return -EINVAL; - return uobj_perform_destroy(UVERBS_OBJECT_FLOW, cmd.flow_handle, file, - 0); + return uobj_perform_destroy(UVERBS_OBJECT_FLOW, cmd.flow_handle, attrs); } -static int __uverbs_create_xsrq(struct ib_uverbs_file *file, +static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, struct ib_uverbs_create_xsrq *cmd, struct ib_udata *udata) { @@ -3676,7 +3341,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, int ret; struct ib_device *ib_dev; - obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, file, + obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, attrs, &ib_dev); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -3686,7 +3351,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, if (cmd->srq_type == IB_SRQT_XRC) { xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->xrcd_handle, - file); + attrs); if (IS_ERR(xrcd_uobj)) { ret = -EINVAL; goto err; @@ -3704,21 +3369,21 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, if (ib_srq_has_cq(cmd->srq_type)) { attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, - cmd->cq_handle, file); + cmd->cq_handle, attrs); if (!attr.ext.cq) { ret = -EINVAL; goto err_put_xrcd; } } - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file); + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, attrs); if (!pd) { ret = -EINVAL; goto err_put_cq; } attr.event_handler = ib_uverbs_srq_event_handler; - attr.srq_context = file; + attr.srq_context = attrs->ufile; attr.srq_type = cmd->srq_type; attr.attr.max_wr = cmd->max_wr; attr.attr.max_sge = cmd->max_sge; @@ -3727,7 +3392,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); - srq = pd->device->create_srq(pd, &attr, udata); + srq = pd->device->ops.create_srq(pd, &attr, udata); if (IS_ERR(srq)) { ret = PTR_ERR(srq); goto err_put; @@ -3763,11 +3428,9 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, if (cmd->srq_type == IB_SRQT_XRC) resp.srqn = srq->ext.xrc.srq_num; - if (copy_to_user(u64_to_user_ptr(cmd->response), - &resp, sizeof resp)) { - ret = -EFAULT; + ret = uverbs_response(attrs, &resp, sizeof(resp)); + if (ret) goto err_copy; - } if (cmd->srq_type == IB_SRQT_XRC) uobj_put_read(xrcd_uobj); @@ -3776,7 +3439,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, uobj_put_obj_read(attr.ext.cq); uobj_put_obj_read(pd); - return uobj_alloc_commit(&obj->uevent.uobject, 0); + return uobj_alloc_commit(&obj->uevent.uobject); err_copy: ib_destroy_srq(srq); @@ -3799,21 +3462,15 @@ err: return ret; } -ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_create_srq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_create_srq cmd; struct ib_uverbs_create_xsrq xcmd; - struct ib_uverbs_create_srq_resp resp; - struct ib_udata udata; int ret; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; memset(&xcmd, 0, sizeof(xcmd)); xcmd.response = cmd.response; @@ -3824,77 +3481,48 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, xcmd.max_sge = cmd.max_sge; xcmd.srq_limit = cmd.srq_limit; - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response) + sizeof(resp), - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof(resp)); - - ret = __uverbs_create_xsrq(file, &xcmd, &udata); - if (ret) - return ret; - - return in_len; + return __uverbs_create_xsrq(attrs, &xcmd, &attrs->driver_udata); } -ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, int out_len) +static int ib_uverbs_create_xsrq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_create_xsrq cmd; - struct ib_uverbs_create_srq_resp resp; - struct ib_udata udata; int ret; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - ib_uverbs_init_udata(&udata, buf + sizeof(cmd), - u64_to_user_ptr(cmd.response) + sizeof(resp), - in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof(resp)); - - ret = __uverbs_create_xsrq(file, &cmd, &udata); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) return ret; - return in_len; + return __uverbs_create_xsrq(attrs, &cmd, &attrs->driver_udata); } -ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_modify_srq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_modify_srq cmd; - struct ib_udata udata; struct ib_srq *srq; struct ib_srq_attr attr; int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - ib_uverbs_init_udata(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, - out_len); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file); + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs); if (!srq) return -EINVAL; attr.max_wr = cmd.max_wr; attr.srq_limit = cmd.srq_limit; - ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata); + ret = srq->device->ops.modify_srq(srq, &attr, cmd.attr_mask, + &attrs->driver_udata); uobj_put_obj_read(srq); - return ret ? ret : in_len; + return ret; } -ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, - const char __user *buf, - int in_len, int out_len) +static int ib_uverbs_query_srq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_query_srq cmd; struct ib_uverbs_query_srq_resp resp; @@ -3902,13 +3530,11 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, struct ib_srq *srq; int ret; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file); + srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs); if (!srq) return -EINVAL; @@ -3925,25 +3551,22 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, resp.max_sge = attr.max_sge; resp.srq_limit = attr.srq_limit; - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) - return -EFAULT; - - return in_len; + return uverbs_response(attrs, &resp, sizeof(resp)); } -ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static int ib_uverbs_destroy_srq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_destroy_srq cmd; struct ib_uverbs_destroy_srq_resp resp; struct ib_uobject *uobj; struct ib_uevent_object *obj; + int ret; - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); + if (ret) + return ret; - uobj = uobj_get_destroy(UVERBS_OBJECT_SRQ, cmd.srq_handle, file); + uobj = uobj_get_destroy(UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs); if (IS_ERR(uobj)) return PTR_ERR(uobj); @@ -3953,35 +3576,24 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, uobj_put_destroy(uobj); - if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) - return -EFAULT; - - return in_len; + return uverbs_response(attrs, &resp, sizeof(resp)); } -int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_query_device(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_ex_query_device_resp resp = { {0} }; + struct ib_uverbs_ex_query_device_resp resp = {}; struct ib_uverbs_ex_query_device cmd; struct ib_device_attr attr = {0}; struct ib_ucontext *ucontext; struct ib_device *ib_dev; int err; - ucontext = ib_uverbs_get_ucontext(file); + ucontext = ib_uverbs_get_ucontext(attrs); if (IS_ERR(ucontext)) return PTR_ERR(ucontext); ib_dev = ucontext->device; - if (!ib_dev->query_device) - return -EOPNOTSUPP; - - if (ucore->inlen < sizeof(cmd)) - return -EINVAL; - - err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); + err = uverbs_request(attrs, &cmd, sizeof(cmd)); if (err) return err; @@ -3991,20 +3603,12 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, if (cmd.reserved) return -EINVAL; - resp.response_length = offsetof(typeof(resp), odp_caps); - - if (ucore->outlen < resp.response_length) - return -ENOSPC; - - err = ib_dev->query_device(ib_dev, &attr, uhw); + err = ib_dev->ops.query_device(ib_dev, &attr, &attrs->driver_udata); if (err) return err; copy_query_dev_fields(ucontext, &resp.base, &attr); - if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps)) - goto end; - #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING resp.odp_caps.general_caps = attr.odp_caps.general_caps; resp.odp_caps.per_transport_caps.rc_odp_caps = @@ -4014,99 +3618,39 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, resp.odp_caps.per_transport_caps.ud_odp_caps = attr.odp_caps.per_transport_caps.ud_odp_caps; #endif - resp.response_length += sizeof(resp.odp_caps); - - if (ucore->outlen < resp.response_length + sizeof(resp.timestamp_mask)) - goto end; resp.timestamp_mask = attr.timestamp_mask; - resp.response_length += sizeof(resp.timestamp_mask); - - if (ucore->outlen < resp.response_length + sizeof(resp.hca_core_clock)) - goto end; - resp.hca_core_clock = attr.hca_core_clock; - resp.response_length += sizeof(resp.hca_core_clock); - - if (ucore->outlen < resp.response_length + sizeof(resp.device_cap_flags_ex)) - goto end; - resp.device_cap_flags_ex = attr.device_cap_flags; - resp.response_length += sizeof(resp.device_cap_flags_ex); - - if (ucore->outlen < resp.response_length + sizeof(resp.rss_caps)) - goto end; - resp.rss_caps.supported_qpts = attr.rss_caps.supported_qpts; resp.rss_caps.max_rwq_indirection_tables = attr.rss_caps.max_rwq_indirection_tables; resp.rss_caps.max_rwq_indirection_table_size = attr.rss_caps.max_rwq_indirection_table_size; - - resp.response_length += sizeof(resp.rss_caps); - - if (ucore->outlen < resp.response_length + sizeof(resp.max_wq_type_rq)) - goto end; - resp.max_wq_type_rq = attr.max_wq_type_rq; - resp.response_length += sizeof(resp.max_wq_type_rq); - - if (ucore->outlen < resp.response_length + sizeof(resp.raw_packet_caps)) - goto end; - resp.raw_packet_caps = attr.raw_packet_caps; - resp.response_length += sizeof(resp.raw_packet_caps); - - if (ucore->outlen < resp.response_length + sizeof(resp.tm_caps)) - goto end; - resp.tm_caps.max_rndv_hdr_size = attr.tm_caps.max_rndv_hdr_size; resp.tm_caps.max_num_tags = attr.tm_caps.max_num_tags; resp.tm_caps.max_ops = attr.tm_caps.max_ops; resp.tm_caps.max_sge = attr.tm_caps.max_sge; resp.tm_caps.flags = attr.tm_caps.flags; - resp.response_length += sizeof(resp.tm_caps); - - if (ucore->outlen < resp.response_length + sizeof(resp.cq_moderation_caps)) - goto end; - resp.cq_moderation_caps.max_cq_moderation_count = attr.cq_caps.max_cq_moderation_count; resp.cq_moderation_caps.max_cq_moderation_period = attr.cq_caps.max_cq_moderation_period; - resp.response_length += sizeof(resp.cq_moderation_caps); - - if (ucore->outlen < resp.response_length + sizeof(resp.max_dm_size)) - goto end; - resp.max_dm_size = attr.max_dm_size; - resp.response_length += sizeof(resp.max_dm_size); -end: - err = ib_copy_to_udata(ucore, &resp, resp.response_length); - return err; + resp.response_length = uverbs_response_length(attrs, sizeof(resp)); + + return uverbs_response(attrs, &resp, sizeof(resp)); } -int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw) +static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_ex_modify_cq cmd = {}; + struct ib_uverbs_ex_modify_cq cmd; struct ib_cq *cq; - size_t required_cmd_sz; int ret; - required_cmd_sz = offsetof(typeof(cmd), reserved) + - sizeof(cmd.reserved); - if (ucore->inlen < required_cmd_sz) - return -EINVAL; - - /* sanity checks */ - if (ucore->inlen > sizeof(cmd) && - !ib_is_udata_cleared(ucore, sizeof(cmd), - ucore->inlen - sizeof(cmd))) - return -EOPNOTSUPP; - - ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) return ret; @@ -4116,7 +3660,7 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file, if (cmd.attr_mask > IB_CQ_MODERATE) return -EOPNOTSUPP; - cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file); + cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs); if (!cq) return -EINVAL; @@ -4126,3 +3670,381 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file, return ret; } + +/* + * Describe the input structs for write(). Some write methods have an input + * only struct, most have an input and output. If the struct has an output then + * the 'response' u64 must be the first field in the request structure. + * + * If udata is present then both the request and response structs have a + * trailing driver_data flex array. In this case the size of the base struct + * cannot be changed. + */ +#define offsetof_after(_struct, _member) \ + (offsetof(_struct, _member) + sizeof(((_struct *)NULL)->_member)) + +#define UAPI_DEF_WRITE_IO(req, resp) \ + .write.has_resp = 1 + \ + BUILD_BUG_ON_ZERO(offsetof(req, response) != 0) + \ + BUILD_BUG_ON_ZERO(sizeof(((req *)0)->response) != \ + sizeof(u64)), \ + .write.req_size = sizeof(req), .write.resp_size = sizeof(resp) + +#define UAPI_DEF_WRITE_I(req) .write.req_size = sizeof(req) + +#define UAPI_DEF_WRITE_UDATA_IO(req, resp) \ + UAPI_DEF_WRITE_IO(req, resp), \ + .write.has_udata = \ + 1 + \ + BUILD_BUG_ON_ZERO(offsetof(req, driver_data) != \ + sizeof(req)) + \ + BUILD_BUG_ON_ZERO(offsetof(resp, driver_data) != \ + sizeof(resp)) + +#define UAPI_DEF_WRITE_UDATA_I(req) \ + UAPI_DEF_WRITE_I(req), \ + .write.has_udata = \ + 1 + BUILD_BUG_ON_ZERO(offsetof(req, driver_data) != \ + sizeof(req)) + +/* + * The _EX versions are for use with WRITE_EX and allow the last struct member + * to be specified. Buffers that do not include that member will be rejected. + */ +#define UAPI_DEF_WRITE_IO_EX(req, req_last_member, resp, resp_last_member) \ + .write.has_resp = 1, \ + .write.req_size = offsetof_after(req, req_last_member), \ + .write.resp_size = offsetof_after(resp, resp_last_member) + +#define UAPI_DEF_WRITE_I_EX(req, req_last_member) \ + .write.req_size = offsetof_after(req, req_last_member) + +const struct uapi_definition uverbs_def_write_intf[] = { + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_AH, + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_AH, + ib_uverbs_create_ah, + UAPI_DEF_WRITE_UDATA_IO( + struct ib_uverbs_create_ah, + struct ib_uverbs_create_ah_resp), + UAPI_DEF_METHOD_NEEDS_FN(create_ah)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_DESTROY_AH, + ib_uverbs_destroy_ah, + UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_ah), + UAPI_DEF_METHOD_NEEDS_FN(destroy_ah))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_COMP_CHANNEL, + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL, + ib_uverbs_create_comp_channel, + UAPI_DEF_WRITE_IO( + struct ib_uverbs_create_comp_channel, + struct ib_uverbs_create_comp_channel_resp))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_CQ, + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_CQ, + ib_uverbs_create_cq, + UAPI_DEF_WRITE_UDATA_IO( + struct ib_uverbs_create_cq, + struct ib_uverbs_create_cq_resp), + UAPI_DEF_METHOD_NEEDS_FN(create_cq)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_DESTROY_CQ, + ib_uverbs_destroy_cq, + UAPI_DEF_WRITE_IO(struct ib_uverbs_destroy_cq, + struct ib_uverbs_destroy_cq_resp), + UAPI_DEF_METHOD_NEEDS_FN(destroy_cq)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_POLL_CQ, + ib_uverbs_poll_cq, + UAPI_DEF_WRITE_IO(struct ib_uverbs_poll_cq, + struct ib_uverbs_poll_cq_resp), + UAPI_DEF_METHOD_NEEDS_FN(poll_cq)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_REQ_NOTIFY_CQ, + ib_uverbs_req_notify_cq, + UAPI_DEF_WRITE_I(struct ib_uverbs_req_notify_cq), + UAPI_DEF_METHOD_NEEDS_FN(req_notify_cq)), + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_RESIZE_CQ, + ib_uverbs_resize_cq, + UAPI_DEF_WRITE_UDATA_IO( + struct ib_uverbs_resize_cq, + struct ib_uverbs_resize_cq_resp), + UAPI_DEF_METHOD_NEEDS_FN(resize_cq)), + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_CREATE_CQ, + ib_uverbs_ex_create_cq, + UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_create_cq, + reserved, + struct ib_uverbs_ex_create_cq_resp, + response_length), + UAPI_DEF_METHOD_NEEDS_FN(create_cq)), + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_MODIFY_CQ, + ib_uverbs_ex_modify_cq, + UAPI_DEF_WRITE_I(struct ib_uverbs_ex_modify_cq), + UAPI_DEF_METHOD_NEEDS_FN(create_cq))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_DEVICE, + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_GET_CONTEXT, + ib_uverbs_get_context, + UAPI_DEF_WRITE_UDATA_IO( + struct ib_uverbs_get_context, + struct ib_uverbs_get_context_resp)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_QUERY_DEVICE, + ib_uverbs_query_device, + UAPI_DEF_WRITE_IO(struct ib_uverbs_query_device, + struct ib_uverbs_query_device_resp)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_QUERY_PORT, + ib_uverbs_query_port, + UAPI_DEF_WRITE_IO(struct ib_uverbs_query_port, + struct ib_uverbs_query_port_resp), + UAPI_DEF_METHOD_NEEDS_FN(query_port)), + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_QUERY_DEVICE, + ib_uverbs_ex_query_device, + UAPI_DEF_WRITE_IO_EX( + struct ib_uverbs_ex_query_device, + reserved, + struct ib_uverbs_ex_query_device_resp, + response_length), + UAPI_DEF_METHOD_NEEDS_FN(query_device)), + UAPI_DEF_OBJ_NEEDS_FN(alloc_ucontext), + UAPI_DEF_OBJ_NEEDS_FN(dealloc_ucontext)), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_FLOW, + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_CREATE_FLOW, + ib_uverbs_ex_create_flow, + UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_create_flow, + flow_attr, + struct ib_uverbs_create_flow_resp, + flow_handle), + UAPI_DEF_METHOD_NEEDS_FN(create_flow)), + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_DESTROY_FLOW, + ib_uverbs_ex_destroy_flow, + UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_flow), + UAPI_DEF_METHOD_NEEDS_FN(destroy_flow))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_MR, + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_DEREG_MR, + ib_uverbs_dereg_mr, + UAPI_DEF_WRITE_I(struct ib_uverbs_dereg_mr), + UAPI_DEF_METHOD_NEEDS_FN(dereg_mr)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_REG_MR, + ib_uverbs_reg_mr, + UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_reg_mr, + struct ib_uverbs_reg_mr_resp), + UAPI_DEF_METHOD_NEEDS_FN(reg_user_mr)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_REREG_MR, + ib_uverbs_rereg_mr, + UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_rereg_mr, + struct ib_uverbs_rereg_mr_resp), + UAPI_DEF_METHOD_NEEDS_FN(rereg_user_mr))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_MW, + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_ALLOC_MW, + ib_uverbs_alloc_mw, + UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_alloc_mw, + struct ib_uverbs_alloc_mw_resp), + UAPI_DEF_METHOD_NEEDS_FN(alloc_mw)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_DEALLOC_MW, + ib_uverbs_dealloc_mw, + UAPI_DEF_WRITE_I(struct ib_uverbs_dealloc_mw), + UAPI_DEF_METHOD_NEEDS_FN(dealloc_mw))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_PD, + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_ALLOC_PD, + ib_uverbs_alloc_pd, + UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_alloc_pd, + struct ib_uverbs_alloc_pd_resp), + UAPI_DEF_METHOD_NEEDS_FN(alloc_pd)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_DEALLOC_PD, + ib_uverbs_dealloc_pd, + UAPI_DEF_WRITE_I(struct ib_uverbs_dealloc_pd), + UAPI_DEF_METHOD_NEEDS_FN(dealloc_pd))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_QP, + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_ATTACH_MCAST, + ib_uverbs_attach_mcast, + UAPI_DEF_WRITE_I(struct ib_uverbs_attach_mcast), + UAPI_DEF_METHOD_NEEDS_FN(attach_mcast), + UAPI_DEF_METHOD_NEEDS_FN(detach_mcast)), + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_QP, + ib_uverbs_create_qp, + UAPI_DEF_WRITE_UDATA_IO( + struct ib_uverbs_create_qp, + struct ib_uverbs_create_qp_resp), + UAPI_DEF_METHOD_NEEDS_FN(create_qp)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_DESTROY_QP, + ib_uverbs_destroy_qp, + UAPI_DEF_WRITE_IO(struct ib_uverbs_destroy_qp, + struct ib_uverbs_destroy_qp_resp), + UAPI_DEF_METHOD_NEEDS_FN(destroy_qp)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_DETACH_MCAST, + ib_uverbs_detach_mcast, + UAPI_DEF_WRITE_I(struct ib_uverbs_detach_mcast), + UAPI_DEF_METHOD_NEEDS_FN(detach_mcast)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_MODIFY_QP, + ib_uverbs_modify_qp, + UAPI_DEF_WRITE_I(struct ib_uverbs_modify_qp), + UAPI_DEF_METHOD_NEEDS_FN(modify_qp)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_POST_RECV, + ib_uverbs_post_recv, + UAPI_DEF_WRITE_IO(struct ib_uverbs_post_recv, + struct ib_uverbs_post_recv_resp), + UAPI_DEF_METHOD_NEEDS_FN(post_recv)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_POST_SEND, + ib_uverbs_post_send, + UAPI_DEF_WRITE_IO(struct ib_uverbs_post_send, + struct ib_uverbs_post_send_resp), + UAPI_DEF_METHOD_NEEDS_FN(post_send)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_QUERY_QP, + ib_uverbs_query_qp, + UAPI_DEF_WRITE_IO(struct ib_uverbs_query_qp, + struct ib_uverbs_query_qp_resp), + UAPI_DEF_METHOD_NEEDS_FN(query_qp)), + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_CREATE_QP, + ib_uverbs_ex_create_qp, + UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_create_qp, + comp_mask, + struct ib_uverbs_ex_create_qp_resp, + response_length), + UAPI_DEF_METHOD_NEEDS_FN(create_qp)), + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_MODIFY_QP, + ib_uverbs_ex_modify_qp, + UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_modify_qp, + base, + struct ib_uverbs_ex_modify_qp_resp, + response_length), + UAPI_DEF_METHOD_NEEDS_FN(modify_qp))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_RWQ_IND_TBL, + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL, + ib_uverbs_ex_create_rwq_ind_table, + UAPI_DEF_WRITE_IO_EX( + struct ib_uverbs_ex_create_rwq_ind_table, + log_ind_tbl_size, + struct ib_uverbs_ex_create_rwq_ind_table_resp, + ind_tbl_num), + UAPI_DEF_METHOD_NEEDS_FN(create_rwq_ind_table)), + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL, + ib_uverbs_ex_destroy_rwq_ind_table, + UAPI_DEF_WRITE_I( + struct ib_uverbs_ex_destroy_rwq_ind_table), + UAPI_DEF_METHOD_NEEDS_FN(destroy_rwq_ind_table))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_WQ, + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_CREATE_WQ, + ib_uverbs_ex_create_wq, + UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_create_wq, + max_sge, + struct ib_uverbs_ex_create_wq_resp, + wqn), + UAPI_DEF_METHOD_NEEDS_FN(create_wq)), + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_DESTROY_WQ, + ib_uverbs_ex_destroy_wq, + UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_destroy_wq, + wq_handle, + struct ib_uverbs_ex_destroy_wq_resp, + reserved), + UAPI_DEF_METHOD_NEEDS_FN(destroy_wq)), + DECLARE_UVERBS_WRITE_EX( + IB_USER_VERBS_EX_CMD_MODIFY_WQ, + ib_uverbs_ex_modify_wq, + UAPI_DEF_WRITE_I_EX(struct ib_uverbs_ex_modify_wq, + curr_wq_state), + UAPI_DEF_METHOD_NEEDS_FN(modify_wq))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_SRQ, + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_SRQ, + ib_uverbs_create_srq, + UAPI_DEF_WRITE_UDATA_IO( + struct ib_uverbs_create_srq, + struct ib_uverbs_create_srq_resp), + UAPI_DEF_METHOD_NEEDS_FN(create_srq)), + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_XSRQ, + ib_uverbs_create_xsrq, + UAPI_DEF_WRITE_UDATA_IO( + struct ib_uverbs_create_xsrq, + struct ib_uverbs_create_srq_resp), + UAPI_DEF_METHOD_NEEDS_FN(create_srq)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_DESTROY_SRQ, + ib_uverbs_destroy_srq, + UAPI_DEF_WRITE_IO(struct ib_uverbs_destroy_srq, + struct ib_uverbs_destroy_srq_resp), + UAPI_DEF_METHOD_NEEDS_FN(destroy_srq)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_MODIFY_SRQ, + ib_uverbs_modify_srq, + UAPI_DEF_WRITE_UDATA_I(struct ib_uverbs_modify_srq), + UAPI_DEF_METHOD_NEEDS_FN(modify_srq)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_POST_SRQ_RECV, + ib_uverbs_post_srq_recv, + UAPI_DEF_WRITE_IO(struct ib_uverbs_post_srq_recv, + struct ib_uverbs_post_srq_recv_resp), + UAPI_DEF_METHOD_NEEDS_FN(post_srq_recv)), + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_QUERY_SRQ, + ib_uverbs_query_srq, + UAPI_DEF_WRITE_IO(struct ib_uverbs_query_srq, + struct ib_uverbs_query_srq_resp), + UAPI_DEF_METHOD_NEEDS_FN(query_srq))), + + DECLARE_UVERBS_OBJECT( + UVERBS_OBJECT_XRCD, + DECLARE_UVERBS_WRITE( + IB_USER_VERBS_CMD_CLOSE_XRCD, + ib_uverbs_close_xrcd, + UAPI_DEF_WRITE_I(struct ib_uverbs_close_xrcd), + UAPI_DEF_METHOD_NEEDS_FN(dealloc_xrcd)), + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_OPEN_QP, + ib_uverbs_open_qp, + UAPI_DEF_WRITE_UDATA_IO( + struct ib_uverbs_open_qp, + struct ib_uverbs_create_qp_resp)), + DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_OPEN_XRCD, + ib_uverbs_open_xrcd, + UAPI_DEF_WRITE_UDATA_IO( + struct ib_uverbs_open_xrcd, + struct ib_uverbs_open_xrcd_resp), + UAPI_DEF_METHOD_NEEDS_FN(alloc_xrcd))), + + {}, +}; diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index b0e493e8d860..8c81ff698052 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -404,8 +404,7 @@ static int uverbs_set_attr(struct bundle_priv *pbundle, static int ib_uverbs_run_method(struct bundle_priv *pbundle, unsigned int num_attrs) { - int (*handler)(struct ib_uverbs_file *ufile, - struct uverbs_attr_bundle *ctx); + int (*handler)(struct uverbs_attr_bundle *attrs); size_t uattrs_size = array_size(sizeof(*pbundle->uattrs), num_attrs); unsigned int destroy_bkey = pbundle->method_elm->destroy_bkey; unsigned int i; @@ -436,6 +435,11 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle, pbundle->method_elm->key_bitmap_len))) return -EINVAL; + if (pbundle->method_elm->has_udata) + uverbs_fill_udata(&pbundle->bundle, + &pbundle->bundle.driver_udata, + UVERBS_ATTR_UHW_IN, UVERBS_ATTR_UHW_OUT); + if (destroy_bkey != UVERBS_API_ATTR_BKEY_LEN) { struct uverbs_obj_attr *destroy_attr = &pbundle->bundle.attrs[destroy_bkey].obj_attr; @@ -445,10 +449,10 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle, return ret; __clear_bit(destroy_bkey, pbundle->uobj_finalize); - ret = handler(pbundle->bundle.ufile, &pbundle->bundle); + ret = handler(&pbundle->bundle); uobj_put_destroy(destroy_attr->uobject); } else { - ret = handler(pbundle->bundle.ufile, &pbundle->bundle); + ret = handler(&pbundle->bundle); } /* @@ -662,35 +666,37 @@ int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle, EXPORT_SYMBOL(uverbs_get_flags32); /* - * This is for ease of conversion. The purpose is to convert all drivers to - * use uverbs_attr_bundle instead of ib_udata. Assume attr == 0 is input and - * attr == 1 is output. + * Fill a ib_udata struct (core or uhw) using the given attribute IDs. + * This is primarily used to convert the UVERBS_ATTR_UHW() into the + * ib_udata format used by the drivers. */ -void create_udata(struct uverbs_attr_bundle *bundle, struct ib_udata *udata) +void uverbs_fill_udata(struct uverbs_attr_bundle *bundle, + struct ib_udata *udata, unsigned int attr_in, + unsigned int attr_out) { struct bundle_priv *pbundle = container_of(bundle, struct bundle_priv, bundle); - const struct uverbs_attr *uhw_in = - uverbs_attr_get(bundle, UVERBS_ATTR_UHW_IN); - const struct uverbs_attr *uhw_out = - uverbs_attr_get(bundle, UVERBS_ATTR_UHW_OUT); - - if (!IS_ERR(uhw_in)) { - udata->inlen = uhw_in->ptr_attr.len; - if (uverbs_attr_ptr_is_inline(uhw_in)) + const struct uverbs_attr *in = + uverbs_attr_get(&pbundle->bundle, attr_in); + const struct uverbs_attr *out = + uverbs_attr_get(&pbundle->bundle, attr_out); + + if (!IS_ERR(in)) { + udata->inlen = in->ptr_attr.len; + if (uverbs_attr_ptr_is_inline(in)) udata->inbuf = - &pbundle->user_attrs[uhw_in->ptr_attr.uattr_idx] + &pbundle->user_attrs[in->ptr_attr.uattr_idx] .data; else - udata->inbuf = u64_to_user_ptr(uhw_in->ptr_attr.data); + udata->inbuf = u64_to_user_ptr(in->ptr_attr.data); } else { udata->inbuf = NULL; udata->inlen = 0; } - if (!IS_ERR(uhw_out)) { - udata->outbuf = u64_to_user_ptr(uhw_out->ptr_attr.data); - udata->outlen = uhw_out->ptr_attr.len; + if (!IS_ERR(out)) { + udata->outbuf = u64_to_user_ptr(out->ptr_attr.data); + udata->outlen = out->ptr_attr.len; } else { udata->outbuf = NULL; udata->outlen = 0; @@ -745,3 +751,14 @@ int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, return 0; } EXPORT_SYMBOL(_uverbs_get_const); + +int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle, + size_t idx, const void *from, size_t size) +{ + const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx); + + if (clear_user(u64_to_user_ptr(attr->ptr_attr.data), + attr->ptr_attr.len)) + return -EFAULT; + return uverbs_copy_to(bundle, idx, from, size); +} diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 6d373f5515b7..9f9172eb1512 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -74,64 +74,6 @@ static dev_t dynamic_uverbs_dev; static struct class *uverbs_class; static DEFINE_IDA(uverbs_ida); - -static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) = { - [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, - [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, - [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, - [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, - [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, - [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, - [IB_USER_VERBS_CMD_REREG_MR] = ib_uverbs_rereg_mr, - [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, - [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw, - [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw, - [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, - [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, - [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq, - [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq, - [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq, - [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, - [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, - [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp, - [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, - [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, - [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send, - [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv, - [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv, - [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah, - [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah, - [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, - [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, - [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, - [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, - [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq, - [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, - [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd, - [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd, - [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq, - [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp, -}; - -static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw) = { - [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow, - [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow, - [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device, - [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq, - [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp, - [IB_USER_VERBS_EX_CMD_CREATE_WQ] = ib_uverbs_ex_create_wq, - [IB_USER_VERBS_EX_CMD_MODIFY_WQ] = ib_uverbs_ex_modify_wq, - [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq, - [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table, - [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table, - [IB_USER_VERBS_EX_CMD_MODIFY_QP] = ib_uverbs_ex_modify_qp, - [IB_USER_VERBS_EX_CMD_MODIFY_CQ] = ib_uverbs_ex_modify_cq, -}; - static void ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); @@ -139,7 +81,7 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); * Must be called with the ufile->device->disassociate_srcu held, and the lock * must be held until use of the ucontext is finished. */ -struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile) +struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile) { /* * We do not hold the hw_destroy_rwsem lock for this flow, instead @@ -157,14 +99,14 @@ struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile) return ucontext; } -EXPORT_SYMBOL(ib_uverbs_get_ucontext); +EXPORT_SYMBOL(ib_uverbs_get_ucontext_file); int uverbs_dealloc_mw(struct ib_mw *mw) { struct ib_pd *pd = mw->pd; int ret; - ret = mw->device->dealloc_mw(mw); + ret = mw->device->ops.dealloc_mw(mw); if (!ret) atomic_dec(&pd->usecnt); return ret; @@ -255,7 +197,7 @@ void ib_uverbs_release_file(struct kref *ref) srcu_key = srcu_read_lock(&file->device->disassociate_srcu); ib_dev = srcu_dereference(file->device->ib_dev, &file->device->disassociate_srcu); - if (ib_dev && !ib_dev->disassociate_ucontext) + if (ib_dev && !ib_dev->ops.disassociate_ucontext) module_put(ib_dev->owner); srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); @@ -646,51 +588,19 @@ err_put_refs: return filp; } -static bool verify_command_mask(struct ib_uverbs_file *ufile, u32 command, - bool extended) -{ - if (!extended) - return ufile->uverbs_cmd_mask & BIT_ULL(command); - - return ufile->uverbs_ex_cmd_mask & BIT_ULL(command); -} - -static bool verify_command_idx(u32 command, bool extended) -{ - if (extended) - return command < ARRAY_SIZE(uverbs_ex_cmd_table) && - uverbs_ex_cmd_table[command]; - - return command < ARRAY_SIZE(uverbs_cmd_table) && - uverbs_cmd_table[command]; -} - -static ssize_t process_hdr(struct ib_uverbs_cmd_hdr *hdr, - u32 *command, bool *extended) -{ - if (hdr->command & ~(u32)(IB_USER_VERBS_CMD_FLAG_EXTENDED | - IB_USER_VERBS_CMD_COMMAND_MASK)) - return -EINVAL; - - *command = hdr->command & IB_USER_VERBS_CMD_COMMAND_MASK; - *extended = hdr->command & IB_USER_VERBS_CMD_FLAG_EXTENDED; - - if (!verify_command_idx(*command, *extended)) - return -EOPNOTSUPP; - - return 0; -} - static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr, - struct ib_uverbs_ex_cmd_hdr *ex_hdr, - size_t count, bool extended) + struct ib_uverbs_ex_cmd_hdr *ex_hdr, size_t count, + const struct uverbs_api_write_method *method_elm) { - if (extended) { + if (method_elm->is_ex) { count -= sizeof(*hdr) + sizeof(*ex_hdr); if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count) return -EINVAL; + if (hdr->in_words * 8 < method_elm->req_size) + return -ENOSPC; + if (ex_hdr->cmd_hdr_reserved) return -EINVAL; @@ -698,6 +608,9 @@ static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr, if (!hdr->out_words && !ex_hdr->provider_out_words) return -EINVAL; + if (hdr->out_words * 8 < method_elm->resp_size) + return -ENOSPC; + if (!access_ok(VERIFY_WRITE, u64_to_user_ptr(ex_hdr->response), (hdr->out_words + ex_hdr->provider_out_words) * 8)) @@ -714,6 +627,24 @@ static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr, if (hdr->in_words * 4 != count) return -EINVAL; + if (count < method_elm->req_size + sizeof(hdr)) { + /* + * rdma-core v18 and v19 have a bug where they send DESTROY_CQ + * with a 16 byte write instead of 24. Old kernels didn't + * check the size so they allowed this. Now that the size is + * checked provide a compatibility work around to not break + * those userspaces. + */ + if (hdr->command == IB_USER_VERBS_CMD_DESTROY_CQ && + count == 16) { + hdr->in_words = 6; + return 0; + } + return -ENOSPC; + } + if (hdr->out_words * 4 < method_elm->resp_size) + return -ENOSPC; + return 0; } @@ -721,11 +652,12 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct ib_uverbs_file *file = filp->private_data; + const struct uverbs_api_write_method *method_elm; + struct uverbs_api *uapi = file->device->uapi; struct ib_uverbs_ex_cmd_hdr ex_hdr; struct ib_uverbs_cmd_hdr hdr; - bool extended; + struct uverbs_attr_bundle bundle; int srcu_key; - u32 command; ssize_t ret; if (!ib_safe_file_access(filp)) { @@ -740,57 +672,92 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, if (copy_from_user(&hdr, buf, sizeof(hdr))) return -EFAULT; - ret = process_hdr(&hdr, &command, &extended); - if (ret) - return ret; + method_elm = uapi_get_method(uapi, hdr.command); + if (IS_ERR(method_elm)) + return PTR_ERR(method_elm); - if (extended) { + if (method_elm->is_ex) { if (count < (sizeof(hdr) + sizeof(ex_hdr))) return -EINVAL; if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) return -EFAULT; } - ret = verify_hdr(&hdr, &ex_hdr, count, extended); + ret = verify_hdr(&hdr, &ex_hdr, count, method_elm); if (ret) return ret; srcu_key = srcu_read_lock(&file->device->disassociate_srcu); - if (!verify_command_mask(file, command, extended)) { - ret = -EOPNOTSUPP; - goto out; - } - buf += sizeof(hdr); - if (!extended) { - ret = uverbs_cmd_table[command](file, buf, - hdr.in_words * 4, - hdr.out_words * 4); - } else { - struct ib_udata ucore; - struct ib_udata uhw; + bundle.ufile = file; + if (!method_elm->is_ex) { + size_t in_len = hdr.in_words * 4 - sizeof(hdr); + size_t out_len = hdr.out_words * 4; + u64 response = 0; + + if (method_elm->has_udata) { + bundle.driver_udata.inlen = + in_len - method_elm->req_size; + in_len = method_elm->req_size; + if (bundle.driver_udata.inlen) + bundle.driver_udata.inbuf = buf + in_len; + else + bundle.driver_udata.inbuf = NULL; + } else { + memset(&bundle.driver_udata, 0, + sizeof(bundle.driver_udata)); + } + + if (method_elm->has_resp) { + /* + * The macros check that if has_resp is set + * then the command request structure starts + * with a '__aligned u64 response' member. + */ + ret = get_user(response, (const u64 *)buf); + if (ret) + goto out_unlock; + + if (method_elm->has_udata) { + bundle.driver_udata.outlen = + out_len - method_elm->resp_size; + out_len = method_elm->resp_size; + if (bundle.driver_udata.outlen) + bundle.driver_udata.outbuf = + u64_to_user_ptr(response + + out_len); + else + bundle.driver_udata.outbuf = NULL; + } + } else { + bundle.driver_udata.outlen = 0; + bundle.driver_udata.outbuf = NULL; + } + ib_uverbs_init_udata_buf_or_null( + &bundle.ucore, buf, u64_to_user_ptr(response), + in_len, out_len); + } else { buf += sizeof(ex_hdr); - ib_uverbs_init_udata_buf_or_null(&ucore, buf, + ib_uverbs_init_udata_buf_or_null(&bundle.ucore, buf, u64_to_user_ptr(ex_hdr.response), hdr.in_words * 8, hdr.out_words * 8); - ib_uverbs_init_udata_buf_or_null(&uhw, - buf + ucore.inlen, - u64_to_user_ptr(ex_hdr.response) + ucore.outlen, - ex_hdr.provider_in_words * 8, - ex_hdr.provider_out_words * 8); + ib_uverbs_init_udata_buf_or_null( + &bundle.driver_udata, buf + bundle.ucore.inlen, + u64_to_user_ptr(ex_hdr.response) + bundle.ucore.outlen, + ex_hdr.provider_in_words * 8, + ex_hdr.provider_out_words * 8); - ret = uverbs_ex_cmd_table[command](file, &ucore, &uhw); - ret = (ret) ? : count; } -out: + ret = method_elm->handler(&bundle); +out_unlock: srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); - return ret; + return (ret) ? : count; } static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) @@ -801,13 +768,13 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) int srcu_key; srcu_key = srcu_read_lock(&file->device->disassociate_srcu); - ucontext = ib_uverbs_get_ucontext(file); + ucontext = ib_uverbs_get_ucontext_file(file); if (IS_ERR(ucontext)) { ret = PTR_ERR(ucontext); goto out; } - ret = ucontext->device->mmap(ucontext, vma); + ret = ucontext->device->ops.mmap(ucontext, vma); out: srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); return ret; @@ -1069,7 +1036,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) /* In case IB device supports disassociate ucontext, there is no hard * dependency between uverbs device and its low level device. */ - module_dependent = !(ib_dev->disassociate_ucontext); + module_dependent = !(ib_dev->ops.disassociate_ucontext); if (module_dependent) { if (!try_module_get(ib_dev->owner)) { @@ -1102,9 +1069,6 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) mutex_unlock(&dev->lists_mutex); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); - file->uverbs_cmd_mask = ib_dev->uverbs_cmd_mask; - file->uverbs_ex_cmd_mask = ib_dev->uverbs_ex_cmd_mask; - setup_ufile_idr_uobject(file); return nonseekable_open(inode, filp); @@ -1224,7 +1188,7 @@ static int ib_uverbs_create_uapi(struct ib_device *device, { struct uverbs_api *uapi; - uapi = uverbs_alloc_api(device->driver_specs, device->driver_id); + uapi = uverbs_alloc_api(device); if (IS_ERR(uapi)) return PTR_ERR(uapi); @@ -1239,7 +1203,7 @@ static void ib_uverbs_add_one(struct ib_device *device) struct ib_uverbs_device *uverbs_dev; int ret; - if (!device->alloc_ucontext) + if (!device->ops.alloc_ucontext) return; uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL); @@ -1285,7 +1249,7 @@ static void ib_uverbs_add_one(struct ib_device *device) dev_set_name(&uverbs_dev->dev, "uverbs%d", uverbs_dev->devnum); cdev_init(&uverbs_dev->cdev, - device->mmap ? &uverbs_mmap_fops : &uverbs_fops); + device->ops.mmap ? &uverbs_mmap_fops : &uverbs_fops); uverbs_dev->cdev.owner = THIS_MODULE; ret = cdev_device_add(&uverbs_dev->cdev, &uverbs_dev->dev); @@ -1373,7 +1337,7 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev); ida_free(&uverbs_ida, uverbs_dev->devnum); - if (device->disassociate_ucontext) { + if (device->ops.disassociate_ucontext) { /* We disassociate HW resources and immediately return. * Userspace will see a EIO errno for all future access. * Upon returning, ib_device may be freed internally and is not diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 203cc96ac6f5..cbc72312eb41 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -42,7 +42,8 @@ static int uverbs_free_ah(struct ib_uobject *uobject, enum rdma_remove_reason why) { - return rdma_destroy_ah((struct ib_ah *)uobject->object); + return rdma_destroy_ah((struct ib_ah *)uobject->object, + RDMA_DESTROY_AH_SLEEPABLE); } static int uverbs_free_flow(struct ib_uobject *uobject, @@ -54,7 +55,7 @@ static int uverbs_free_flow(struct ib_uobject *uobject, struct ib_qp *qp = flow->qp; int ret; - ret = flow->device->destroy_flow(flow); + ret = flow->device->ops.destroy_flow(flow); if (!ret) { if (qp) atomic_dec(&qp->usecnt); @@ -210,8 +211,7 @@ static int uverbs_hot_unplug_completion_event_file(struct ib_uobject *uobj, return 0; }; -int uverbs_destroy_def_handler(struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs) { return 0; } @@ -229,58 +229,106 @@ DECLARE_UVERBS_NAMED_OBJECT( UVERBS_OBJECT_QP, UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), uverbs_free_qp)); +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_MW_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_MW_HANDLE, + UVERBS_OBJECT_MW, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW, - UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw)); + UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw), + &UVERBS_METHOD(UVERBS_METHOD_MW_DESTROY)); DECLARE_UVERBS_NAMED_OBJECT( UVERBS_OBJECT_SRQ, UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), uverbs_free_srq)); +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_AH_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_AH_HANDLE, + UVERBS_OBJECT_AH, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_AH, - UVERBS_TYPE_ALLOC_IDR(uverbs_free_ah)); + UVERBS_TYPE_ALLOC_IDR(uverbs_free_ah), + &UVERBS_METHOD(UVERBS_METHOD_AH_DESTROY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_FLOW_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_HANDLE, + UVERBS_OBJECT_FLOW, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); DECLARE_UVERBS_NAMED_OBJECT( UVERBS_OBJECT_FLOW, UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object), - uverbs_free_flow)); + uverbs_free_flow), + &UVERBS_METHOD(UVERBS_METHOD_FLOW_DESTROY)); DECLARE_UVERBS_NAMED_OBJECT( UVERBS_OBJECT_WQ, UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), uverbs_free_wq)); +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_RWQ_IND_TBL_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_RWQ_IND_TBL_HANDLE, + UVERBS_OBJECT_RWQ_IND_TBL, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL, - UVERBS_TYPE_ALLOC_IDR(uverbs_free_rwq_ind_tbl)); + UVERBS_TYPE_ALLOC_IDR(uverbs_free_rwq_ind_tbl), + &UVERBS_METHOD(UVERBS_METHOD_RWQ_IND_TBL_DESTROY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_XRCD_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_XRCD_HANDLE, + UVERBS_OBJECT_XRCD, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); DECLARE_UVERBS_NAMED_OBJECT( UVERBS_OBJECT_XRCD, UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), - uverbs_free_xrcd)); + uverbs_free_xrcd), + &UVERBS_METHOD(UVERBS_METHOD_XRCD_DESTROY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_PD_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_PD_HANDLE, + UVERBS_OBJECT_PD, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD, - UVERBS_TYPE_ALLOC_IDR(uverbs_free_pd)); - -DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE); - -DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects, - &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE), - &UVERBS_OBJECT(UVERBS_OBJECT_PD), - &UVERBS_OBJECT(UVERBS_OBJECT_MR), - &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL), - &UVERBS_OBJECT(UVERBS_OBJECT_CQ), - &UVERBS_OBJECT(UVERBS_OBJECT_QP), - &UVERBS_OBJECT(UVERBS_OBJECT_AH), - &UVERBS_OBJECT(UVERBS_OBJECT_MW), - &UVERBS_OBJECT(UVERBS_OBJECT_SRQ), - &UVERBS_OBJECT(UVERBS_OBJECT_FLOW), - &UVERBS_OBJECT(UVERBS_OBJECT_WQ), - &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL), - &UVERBS_OBJECT(UVERBS_OBJECT_XRCD), - &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION), - &UVERBS_OBJECT(UVERBS_OBJECT_DM), - &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS)); - -const struct uverbs_object_tree_def *uverbs_default_get_objects(void) -{ - return &uverbs_default_objects; -} + UVERBS_TYPE_ALLOC_IDR(uverbs_free_pd), + &UVERBS_METHOD(UVERBS_METHOD_PD_DESTROY)); + +const struct uapi_definition uverbs_def_obj_intf[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_PD, + UAPI_DEF_OBJ_NEEDS_FN(dealloc_pd)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_COMP_CHANNEL, + UAPI_DEF_OBJ_NEEDS_FN(dealloc_pd)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_QP, + UAPI_DEF_OBJ_NEEDS_FN(destroy_qp)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_AH, + UAPI_DEF_OBJ_NEEDS_FN(destroy_ah)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MW, + UAPI_DEF_OBJ_NEEDS_FN(dealloc_mw)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_SRQ, + UAPI_DEF_OBJ_NEEDS_FN(destroy_srq)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_FLOW, + UAPI_DEF_OBJ_NEEDS_FN(destroy_flow)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_WQ, + UAPI_DEF_OBJ_NEEDS_FN(destroy_wq)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + UVERBS_OBJECT_RWQ_IND_TBL, + UAPI_DEF_OBJ_NEEDS_FN(destroy_rwq_ind_table)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_XRCD, + UAPI_DEF_OBJ_NEEDS_FN(dealloc_xrcd)), + {} +}; diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index a0ffdcf9a51c..309c5e80988d 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -44,11 +44,11 @@ static int uverbs_free_counters(struct ib_uobject *uobject, if (ret) return ret; - return counters->device->destroy_counters(counters); + return counters->device->ops.destroy_counters(counters); } static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE); @@ -61,10 +61,10 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)( * have the ability to remove methods from parse tree once * such condition is met. */ - if (!ib_dev->create_counters) + if (!ib_dev->ops.create_counters) return -EOPNOTSUPP; - counters = ib_dev->create_counters(ib_dev, attrs); + counters = ib_dev->ops.create_counters(ib_dev, attrs); if (IS_ERR(counters)) { ret = PTR_ERR(counters); goto err_create_counters; @@ -82,7 +82,7 @@ err_create_counters: } static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct ib_counters_read_attr read_attr = {}; const struct uverbs_attr *uattr; @@ -90,7 +90,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)( uverbs_attr_get_obj(attrs, UVERBS_ATTR_READ_COUNTERS_HANDLE); int ret; - if (!counters->device->read_counters) + if (!counters->device->ops.read_counters) return -EOPNOTSUPP; if (!atomic_read(&counters->usecnt)) @@ -109,7 +109,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)( if (IS_ERR(read_attr.counters_buff)) return PTR_ERR(read_attr.counters_buff); - ret = counters->device->read_counters(counters, &read_attr, attrs); + ret = counters->device->ops.read_counters(counters, &read_attr, attrs); if (ret) return ret; @@ -149,3 +149,9 @@ DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS, &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE), &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY), &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ)); + +const struct uapi_definition uverbs_def_obj_counters[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_COUNTERS, + UAPI_DEF_OBJ_NEEDS_FN(destroy_counters)), + {} +}; diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 5b5f2052cd52..a59ea89e3f2b 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -58,13 +58,12 @@ static int uverbs_free_cq(struct ib_uobject *uobject, } static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct ib_ucq_object *obj = container_of( uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE), typeof(*obj), uobject); struct ib_device *ib_dev = obj->uobject.context->device; - struct ib_udata uhw; int ret; u64 user_handle; struct ib_cq_init_attr attr = {}; @@ -72,7 +71,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( struct ib_uverbs_completion_event_file *ev_file = NULL; struct ib_uobject *ev_file_uobj; - if (!ib_dev->create_cq || !ib_dev->destroy_cq) + if (!ib_dev->ops.create_cq || !ib_dev->ops.destroy_cq) return -EOPNOTSUPP; ret = uverbs_copy_from(&attr.comp_vector, attrs, @@ -101,7 +100,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( uverbs_uobject_get(ev_file_uobj); } - if (attr.comp_vector >= file->device->num_comp_vectors) { + if (attr.comp_vector >= attrs->ufile->device->num_comp_vectors) { ret = -EINVAL; goto err_event_file; } @@ -111,10 +110,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( INIT_LIST_HEAD(&obj->comp_list); INIT_LIST_HEAD(&obj->async_list); - /* Temporary, only until drivers get the new uverbs_attr_bundle */ - create_udata(attrs, &uhw); - - cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context, &uhw); + cq = ib_dev->ops.create_cq(ib_dev, &attr, obj->uobject.context, + &attrs->driver_udata); if (IS_ERR(cq)) { ret = PTR_ERR(cq); goto err_event_file; @@ -129,7 +126,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( obj->uobject.user_handle = user_handle; atomic_set(&cq->usecnt, 0); cq->res.type = RDMA_RESTRACK_CQ; - rdma_restrack_add(&cq->res); + rdma_restrack_uadd(&cq->res); ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe, sizeof(cq->cqe)); @@ -173,7 +170,7 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_UHW()); static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE); @@ -207,3 +204,9 @@ DECLARE_UVERBS_NAMED_OBJECT( &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY) #endif ); + +const struct uapi_definition uverbs_def_obj_cq[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_CQ, + UAPI_DEF_OBJ_NEEDS_FN(destroy_cq)), + {} +}; diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c new file mode 100644 index 000000000000..5030ec480370 --- /dev/null +++ b/drivers/infiniband/core/uverbs_std_types_device.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + */ + +#include <rdma/uverbs_std_types.h> +#include "rdma_core.h" +#include "uverbs.h" +#include <rdma/uverbs_ioctl.h> +#include <rdma/opa_addr.h> + +/* + * This ioctl method allows calling any defined write or write_ex + * handler. This essentially replaces the hdr/ex_hdr system with the ioctl + * marshalling, and brings the non-ex path into the same marshalling as the ex + * path. + */ +static int UVERBS_HANDLER(UVERBS_METHOD_INVOKE_WRITE)( + struct uverbs_attr_bundle *attrs) +{ + struct uverbs_api *uapi = attrs->ufile->device->uapi; + const struct uverbs_api_write_method *method_elm; + u32 cmd; + int rc; + + rc = uverbs_get_const(&cmd, attrs, UVERBS_ATTR_WRITE_CMD); + if (rc) + return rc; + + method_elm = uapi_get_method(uapi, cmd); + if (IS_ERR(method_elm)) + return PTR_ERR(method_elm); + + uverbs_fill_udata(attrs, &attrs->ucore, UVERBS_ATTR_CORE_IN, + UVERBS_ATTR_CORE_OUT); + + if (attrs->ucore.inlen < method_elm->req_size || + attrs->ucore.outlen < method_elm->resp_size) + return -ENOSPC; + + return method_elm->handler(attrs); +} + +DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_INVOKE_WRITE, + UVERBS_ATTR_CONST_IN(UVERBS_ATTR_WRITE_CMD, + enum ib_uverbs_write_cmds, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CORE_IN, + UVERBS_ATTR_MIN_SIZE(sizeof(u32)), + UA_OPTIONAL), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CORE_OUT, + UVERBS_ATTR_MIN_SIZE(0), + UA_OPTIONAL), + UVERBS_ATTR_UHW()); + +static uint32_t * +gather_objects_handle(struct ib_uverbs_file *ufile, + const struct uverbs_api_object *uapi_object, + struct uverbs_attr_bundle *attrs, + ssize_t out_len, + u64 *total) +{ + u64 max_count = out_len / sizeof(u32); + struct ib_uobject *obj; + u64 count = 0; + u32 *handles; + + /* Allocated memory that cannot page out where we gather + * all object ids under a spin_lock. + */ + handles = uverbs_zalloc(attrs, out_len); + if (IS_ERR(handles)) + return handles; + + spin_lock_irq(&ufile->uobjects_lock); + list_for_each_entry(obj, &ufile->uobjects, list) { + u32 obj_id = obj->id; + + if (obj->uapi_object != uapi_object) + continue; + + if (count >= max_count) + break; + + handles[count] = obj_id; + count++; + } + spin_unlock_irq(&ufile->uobjects_lock); + + *total = count; + return handles; +} + +static int UVERBS_HANDLER(UVERBS_METHOD_INFO_HANDLES)( + struct uverbs_attr_bundle *attrs) +{ + const struct uverbs_api_object *uapi_object; + ssize_t out_len; + u64 total = 0; + u16 object_id; + u32 *handles; + int ret; + + out_len = uverbs_attr_get_len(attrs, UVERBS_ATTR_INFO_HANDLES_LIST); + if (out_len <= 0 || (out_len % sizeof(u32) != 0)) + return -EINVAL; + + ret = uverbs_get_const(&object_id, attrs, UVERBS_ATTR_INFO_OBJECT_ID); + if (ret) + return ret; + + uapi_object = uapi_get_object(attrs->ufile->device->uapi, object_id); + if (!uapi_object) + return -EINVAL; + + handles = gather_objects_handle(attrs->ufile, uapi_object, attrs, + out_len, &total); + if (IS_ERR(handles)) + return PTR_ERR(handles); + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_INFO_HANDLES_LIST, handles, + sizeof(u32) * total); + if (ret) + goto err; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_INFO_TOTAL_HANDLES, &total, + sizeof(total)); +err: + return ret; +} + +void copy_port_attr_to_resp(struct ib_port_attr *attr, + struct ib_uverbs_query_port_resp *resp, + struct ib_device *ib_dev, u8 port_num) +{ + resp->state = attr->state; + resp->max_mtu = attr->max_mtu; + resp->active_mtu = attr->active_mtu; + resp->gid_tbl_len = attr->gid_tbl_len; + resp->port_cap_flags = make_port_cap_flags(attr); + resp->max_msg_sz = attr->max_msg_sz; + resp->bad_pkey_cntr = attr->bad_pkey_cntr; + resp->qkey_viol_cntr = attr->qkey_viol_cntr; + resp->pkey_tbl_len = attr->pkey_tbl_len; + + if (rdma_is_grh_required(ib_dev, port_num)) + resp->flags |= IB_UVERBS_QPF_GRH_REQUIRED; + + if (rdma_cap_opa_ah(ib_dev, port_num)) { + resp->lid = OPA_TO_IB_UCAST_LID(attr->lid); + resp->sm_lid = OPA_TO_IB_UCAST_LID(attr->sm_lid); + } else { + resp->lid = ib_lid_cpu16(attr->lid); + resp->sm_lid = ib_lid_cpu16(attr->sm_lid); + } + + resp->lmc = attr->lmc; + resp->max_vl_num = attr->max_vl_num; + resp->sm_sl = attr->sm_sl; + resp->subnet_timeout = attr->subnet_timeout; + resp->init_type_reply = attr->init_type_reply; + resp->active_width = attr->active_width; + resp->active_speed = attr->active_speed; + resp->phys_state = attr->phys_state; + resp->link_layer = rdma_port_get_link_layer(ib_dev, port_num); +} + +static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_PORT)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_device *ib_dev = attrs->ufile->device->ib_dev; + struct ib_port_attr attr = {}; + struct ib_uverbs_query_port_resp_ex resp = {}; + int ret; + u8 port_num; + + /* FIXME: Extend the UAPI_DEF_OBJ_NEEDS_FN stuff.. */ + if (!ib_dev->ops.query_port) + return -EOPNOTSUPP; + + ret = uverbs_get_const(&port_num, attrs, + UVERBS_ATTR_QUERY_PORT_PORT_NUM); + if (ret) + return ret; + + ret = ib_query_port(ib_dev, port_num, &attr); + if (ret) + return ret; + + copy_port_attr_to_resp(&attr, &resp.legacy_resp, ib_dev, port_num); + resp.port_cap_flags2 = attr.port_cap_flags2; + + return uverbs_copy_to_struct_or_zero(attrs, UVERBS_ATTR_QUERY_PORT_RESP, + &resp, sizeof(resp)); +} + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_INFO_HANDLES, + /* Also includes any device specific object ids */ + UVERBS_ATTR_CONST_IN(UVERBS_ATTR_INFO_OBJECT_ID, + enum uverbs_default_objects, UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_INFO_TOTAL_HANDLES, + UVERBS_ATTR_TYPE(u32), UA_OPTIONAL), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_INFO_HANDLES_LIST, + UVERBS_ATTR_MIN_SIZE(sizeof(u32)), UA_OPTIONAL)); + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_QUERY_PORT, + UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_PORT_PORT_NUM, u8, UA_MANDATORY), + UVERBS_ATTR_PTR_OUT( + UVERBS_ATTR_QUERY_PORT_RESP, + UVERBS_ATTR_STRUCT(struct ib_uverbs_query_port_resp_ex, + reserved), + UA_MANDATORY)); + +DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE, + &UVERBS_METHOD(UVERBS_METHOD_INVOKE_WRITE), + &UVERBS_METHOD(UVERBS_METHOD_INFO_HANDLES), + &UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT)); + +const struct uapi_definition uverbs_def_obj_device[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DEVICE), + {}, +}; diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c index edc3ff7733d4..2ef70637bee1 100644 --- a/drivers/infiniband/core/uverbs_std_types_dm.c +++ b/drivers/infiniband/core/uverbs_std_types_dm.c @@ -43,12 +43,11 @@ static int uverbs_free_dm(struct ib_uobject *uobject, if (ret) return ret; - return dm->device->dealloc_dm(dm); + return dm->device->ops.dealloc_dm(dm); } -static int -UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs) +static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)( + struct uverbs_attr_bundle *attrs) { struct ib_dm_alloc_attr attr = {}; struct ib_uobject *uobj = @@ -58,7 +57,7 @@ UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_uverbs_file *file, struct ib_dm *dm; int ret; - if (!ib_dev->alloc_dm) + if (!ib_dev->ops.alloc_dm) return -EOPNOTSUPP; ret = uverbs_copy_from(&attr.length, attrs, @@ -71,7 +70,7 @@ UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_uverbs_file *file, if (ret) return ret; - dm = ib_dev->alloc_dm(ib_dev, uobj->context, &attr, attrs); + dm = ib_dev->ops.alloc_dm(ib_dev, uobj->context, &attr, attrs); if (IS_ERR(dm)) return PTR_ERR(dm); @@ -109,3 +108,9 @@ DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM, UVERBS_TYPE_ALLOC_IDR(uverbs_free_dm), &UVERBS_METHOD(UVERBS_METHOD_DM_ALLOC), &UVERBS_METHOD(UVERBS_METHOD_DM_FREE)); + +const struct uapi_definition uverbs_def_obj_dm[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DM, + UAPI_DEF_OBJ_NEEDS_FN(dealloc_dm)), + {} +}; diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index cb9486ad5c67..4962b87fa600 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -43,7 +43,7 @@ static int uverbs_free_flow_action(struct ib_uobject *uobject, if (ret) return ret; - return action->device->destroy_flow_action(action); + return action->device->ops.destroy_flow_action(action); } static u64 esp_flags_uverbs_to_verbs(struct uverbs_attr_bundle *attrs, @@ -223,7 +223,6 @@ struct ib_flow_action_esp_attr { #define ESP_LAST_SUPPORTED_FLAG IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW static int parse_flow_action_esp(struct ib_device *ib_dev, - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs, struct ib_flow_action_esp_attr *esp_attr, bool is_modify) @@ -305,7 +304,7 @@ static int parse_flow_action_esp(struct ib_device *ib_dev, } static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE); @@ -314,15 +313,16 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)( struct ib_flow_action *action; struct ib_flow_action_esp_attr esp_attr = {}; - if (!ib_dev->create_flow_action_esp) + if (!ib_dev->ops.create_flow_action_esp) return -EOPNOTSUPP; - ret = parse_flow_action_esp(ib_dev, file, attrs, &esp_attr, false); + ret = parse_flow_action_esp(ib_dev, attrs, &esp_attr, false); if (ret) return ret; /* No need to check as this attribute is marked as MANDATORY */ - action = ib_dev->create_flow_action_esp(ib_dev, &esp_attr.hdr, attrs); + action = ib_dev->ops.create_flow_action_esp(ib_dev, &esp_attr.hdr, + attrs); if (IS_ERR(action)) return PTR_ERR(action); @@ -333,7 +333,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)( } static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE); @@ -341,19 +341,19 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)( int ret; struct ib_flow_action_esp_attr esp_attr = {}; - if (!action->device->modify_flow_action_esp) + if (!action->device->ops.modify_flow_action_esp) return -EOPNOTSUPP; - ret = parse_flow_action_esp(action->device, file, attrs, &esp_attr, - true); + ret = parse_flow_action_esp(action->device, attrs, &esp_attr, true); if (ret) return ret; if (action->type != IB_FLOW_ACTION_ESP) return -EINVAL; - return action->device->modify_flow_action_esp(action, &esp_attr.hdr, - attrs); + return action->device->ops.modify_flow_action_esp(action, + &esp_attr.hdr, + attrs); } static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = { @@ -438,3 +438,10 @@ DECLARE_UVERBS_NAMED_OBJECT( &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE), &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY), &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)); + +const struct uapi_definition uverbs_def_obj_flow_action[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + UVERBS_OBJECT_FLOW_ACTION, + UAPI_DEF_OBJ_NEEDS_FN(destroy_flow_action)), + {} +}; diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index cf02e774303e..4d4be0c2b752 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -39,8 +39,44 @@ static int uverbs_free_mr(struct ib_uobject *uobject, return ib_dereg_mr((struct ib_mr *)uobject->object); } +static int UVERBS_HANDLER(UVERBS_METHOD_ADVISE_MR)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_pd *pd = + uverbs_attr_get_obj(attrs, UVERBS_ATTR_ADVISE_MR_PD_HANDLE); + enum ib_uverbs_advise_mr_advice advice; + struct ib_device *ib_dev = pd->device; + struct ib_sge *sg_list; + int num_sge; + u32 flags; + int ret; + + /* FIXME: Extend the UAPI_DEF_OBJ_NEEDS_FN stuff.. */ + if (!ib_dev->ops.advise_mr) + return -EOPNOTSUPP; + + ret = uverbs_get_const(&advice, attrs, UVERBS_ATTR_ADVISE_MR_ADVICE); + if (ret) + return ret; + + ret = uverbs_get_flags32(&flags, attrs, UVERBS_ATTR_ADVISE_MR_FLAGS, + IB_UVERBS_ADVISE_MR_FLAG_FLUSH); + if (ret) + return ret; + + num_sge = uverbs_attr_ptr_get_array_size( + attrs, UVERBS_ATTR_ADVISE_MR_SGE_LIST, sizeof(struct ib_sge)); + if (num_sge < 0) + return num_sge; + + sg_list = uverbs_attr_get_alloced_ptr(attrs, + UVERBS_ATTR_ADVISE_MR_SGE_LIST); + return ib_dev->ops.advise_mr(pd, advice, flags, sg_list, num_sge, + attrs); +} + static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct ib_dm_mr_attr attr = {}; struct ib_uobject *uobj = @@ -54,7 +90,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( struct ib_mr *mr; int ret; - if (!ib_dev->reg_dm_mr) + if (!ib_dev->ops.reg_dm_mr) return -EOPNOTSUPP; ret = uverbs_copy_from(&attr.offset, attrs, UVERBS_ATTR_REG_DM_MR_OFFSET); @@ -83,7 +119,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( attr.length > dm->length - attr.offset) return -EINVAL; - mr = pd->device->reg_dm_mr(pd, dm, &attr, attrs); + mr = pd->device->ops.reg_dm_mr(pd, dm, &attr, attrs); if (IS_ERR(mr)) return PTR_ERR(mr); @@ -115,6 +151,23 @@ err_dereg: } DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_ADVISE_MR, + UVERBS_ATTR_IDR(UVERBS_ATTR_ADVISE_MR_PD_HANDLE, + UVERBS_OBJECT_PD, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_CONST_IN(UVERBS_ATTR_ADVISE_MR_ADVICE, + enum ib_uverbs_advise_mr_advice, + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_ADVISE_MR_FLAGS, + enum ib_uverbs_advise_mr_flag, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ADVISE_MR_SGE_LIST, + UVERBS_ATTR_MIN_SIZE(sizeof(struct ib_uverbs_sge)), + UA_MANDATORY, + UA_ALLOC_AND_COPY)); + +DECLARE_UVERBS_NAMED_METHOD( UVERBS_METHOD_DM_MR_REG, UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE, UVERBS_OBJECT_MR, @@ -143,7 +196,22 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_TYPE(u32), UA_MANDATORY)); +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + UVERBS_METHOD_MR_DESTROY, + UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_MR_HANDLE, + UVERBS_OBJECT_MR, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + DECLARE_UVERBS_NAMED_OBJECT( UVERBS_OBJECT_MR, UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr), - &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG)); + &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG), + &UVERBS_METHOD(UVERBS_METHOD_MR_DESTROY), + &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR)); + +const struct uapi_definition uverbs_def_obj_mr[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MR, + UAPI_DEF_OBJ_NEEDS_FN(dereg_mr)), + {} +}; diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c index 86f3fc5e04b4..9ae08e4b78a3 100644 --- a/drivers/infiniband/core/uverbs_uapi.c +++ b/drivers/infiniband/core/uverbs_uapi.c @@ -8,6 +8,11 @@ #include "rdma_core.h" #include "uverbs.h" +static int ib_uverbs_notsupp(struct uverbs_attr_bundle *attrs) +{ + return -EOPNOTSUPP; +} + static void *uapi_add_elm(struct uverbs_api *uapi, u32 key, size_t alloc_size) { void *elm; @@ -26,6 +31,70 @@ static void *uapi_add_elm(struct uverbs_api *uapi, u32 key, size_t alloc_size) return elm; } +static void *uapi_add_get_elm(struct uverbs_api *uapi, u32 key, + size_t alloc_size, bool *exists) +{ + void *elm; + + elm = uapi_add_elm(uapi, key, alloc_size); + if (!IS_ERR(elm)) { + *exists = false; + return elm; + } + + if (elm != ERR_PTR(-EEXIST)) + return elm; + + elm = radix_tree_lookup(&uapi->radix, key); + if (WARN_ON(!elm)) + return ERR_PTR(-EINVAL); + *exists = true; + return elm; +} + +static int uapi_create_write(struct uverbs_api *uapi, + struct ib_device *ibdev, + const struct uapi_definition *def, + u32 obj_key, + u32 *cur_method_key) +{ + struct uverbs_api_write_method *method_elm; + u32 method_key = obj_key; + bool exists; + + if (def->write.is_ex) + method_key |= uapi_key_write_ex_method(def->write.command_num); + else + method_key |= uapi_key_write_method(def->write.command_num); + + method_elm = uapi_add_get_elm(uapi, method_key, sizeof(*method_elm), + &exists); + if (IS_ERR(method_elm)) + return PTR_ERR(method_elm); + + if (WARN_ON(exists && (def->write.is_ex != method_elm->is_ex))) + return -EINVAL; + + method_elm->is_ex = def->write.is_ex; + method_elm->handler = def->func_write; + if (def->write.is_ex) + method_elm->disabled = !(ibdev->uverbs_ex_cmd_mask & + BIT_ULL(def->write.command_num)); + else + method_elm->disabled = !(ibdev->uverbs_cmd_mask & + BIT_ULL(def->write.command_num)); + + if (!def->write.is_ex && def->func_write) { + method_elm->has_udata = def->write.has_udata; + method_elm->has_resp = def->write.has_resp; + method_elm->req_size = def->write.req_size; + method_elm->resp_size = def->write.resp_size; + } + + *cur_method_key = method_key; + return 0; +} + static int uapi_merge_method(struct uverbs_api *uapi, struct uverbs_api_object *obj_elm, u32 obj_key, const struct uverbs_method_def *method, @@ -34,23 +103,21 @@ static int uapi_merge_method(struct uverbs_api *uapi, u32 method_key = obj_key | uapi_key_ioctl_method(method->id); struct uverbs_api_ioctl_method *method_elm; unsigned int i; + bool exists; if (!method->attrs) return 0; - method_elm = uapi_add_elm(uapi, method_key, sizeof(*method_elm)); - if (IS_ERR(method_elm)) { - if (method_elm != ERR_PTR(-EEXIST)) - return PTR_ERR(method_elm); - + method_elm = uapi_add_get_elm(uapi, method_key, sizeof(*method_elm), + &exists); + if (IS_ERR(method_elm)) + return PTR_ERR(method_elm); + if (exists) { /* * This occurs when a driver uses ADD_UVERBS_ATTRIBUTES_SIMPLE */ if (WARN_ON(method->handler)) return -EINVAL; - method_elm = radix_tree_lookup(&uapi->radix, method_key); - if (WARN_ON(!method_elm)) - return -EINVAL; } else { WARN_ON(!method->handler); rcu_assign_pointer(method_elm->handler, method->handler); @@ -98,72 +165,183 @@ static int uapi_merge_method(struct uverbs_api *uapi, return 0; } -static int uapi_merge_tree(struct uverbs_api *uapi, - const struct uverbs_object_tree_def *tree, - bool is_driver) +static int uapi_merge_obj_tree(struct uverbs_api *uapi, + const struct uverbs_object_def *obj, + bool is_driver) { - unsigned int i, j; + struct uverbs_api_object *obj_elm; + unsigned int i; + u32 obj_key; + bool exists; int rc; - if (!tree->objects) + obj_key = uapi_key_obj(obj->id); + obj_elm = uapi_add_get_elm(uapi, obj_key, sizeof(*obj_elm), &exists); + if (IS_ERR(obj_elm)) + return PTR_ERR(obj_elm); + + if (obj->type_attrs) { + if (WARN_ON(obj_elm->type_attrs)) + return -EINVAL; + + obj_elm->id = obj->id; + obj_elm->type_attrs = obj->type_attrs; + obj_elm->type_class = obj->type_attrs->type_class; + /* + * Today drivers are only permitted to use idr_class + * types. They cannot use FD types because we currently have + * no way to revoke the fops pointer after device + * disassociation. + */ + if (WARN_ON(is_driver && + obj->type_attrs->type_class != &uverbs_idr_class)) + return -EINVAL; + } + + if (!obj->methods) return 0; - for (i = 0; i != tree->num_objects; i++) { - const struct uverbs_object_def *obj = (*tree->objects)[i]; - struct uverbs_api_object *obj_elm; - u32 obj_key; + for (i = 0; i != obj->num_methods; i++) { + const struct uverbs_method_def *method = (*obj->methods)[i]; - if (!obj) + if (!method) continue; - obj_key = uapi_key_obj(obj->id); - obj_elm = uapi_add_elm(uapi, obj_key, sizeof(*obj_elm)); - if (IS_ERR(obj_elm)) { - if (obj_elm != ERR_PTR(-EEXIST)) - return PTR_ERR(obj_elm); + rc = uapi_merge_method(uapi, obj_elm, obj_key, method, + is_driver); + if (rc) + return rc; + } - /* This occurs when a driver uses ADD_UVERBS_METHODS */ - if (WARN_ON(obj->type_attrs)) - return -EINVAL; - obj_elm = radix_tree_lookup(&uapi->radix, obj_key); - if (WARN_ON(!obj_elm)) + return 0; +} + +static int uapi_disable_elm(struct uverbs_api *uapi, + const struct uapi_definition *def, + u32 obj_key, + u32 method_key) +{ + bool exists; + + if (def->scope == UAPI_SCOPE_OBJECT) { + struct uverbs_api_object *obj_elm; + + obj_elm = uapi_add_get_elm( + uapi, obj_key, sizeof(*obj_elm), &exists); + if (IS_ERR(obj_elm)) + return PTR_ERR(obj_elm); + obj_elm->disabled = 1; + return 0; + } + + if (def->scope == UAPI_SCOPE_METHOD && + uapi_key_is_ioctl_method(method_key)) { + struct uverbs_api_ioctl_method *method_elm; + + method_elm = uapi_add_get_elm(uapi, method_key, + sizeof(*method_elm), &exists); + if (IS_ERR(method_elm)) + return PTR_ERR(method_elm); + method_elm->disabled = 1; + return 0; + } + + if (def->scope == UAPI_SCOPE_METHOD && + (uapi_key_is_write_method(method_key) || + uapi_key_is_write_ex_method(method_key))) { + struct uverbs_api_write_method *write_elm; + + write_elm = uapi_add_get_elm(uapi, method_key, + sizeof(*write_elm), &exists); + if (IS_ERR(write_elm)) + return PTR_ERR(write_elm); + write_elm->disabled = 1; + return 0; + } + + WARN_ON(true); + return -EINVAL; +} + +static int uapi_merge_def(struct uverbs_api *uapi, struct ib_device *ibdev, + const struct uapi_definition *def_list, + bool is_driver) +{ + const struct uapi_definition *def = def_list; + u32 cur_obj_key = UVERBS_API_KEY_ERR; + u32 cur_method_key = UVERBS_API_KEY_ERR; + bool exists; + int rc; + + if (!def_list) + return 0; + + for (;; def++) { + switch ((enum uapi_definition_kind)def->kind) { + case UAPI_DEF_CHAIN: + rc = uapi_merge_def(uapi, ibdev, def->chain, is_driver); + if (rc) + return rc; + continue; + + case UAPI_DEF_CHAIN_OBJ_TREE: + if (WARN_ON(def->object_start.object_id != + def->chain_obj_tree->id)) return -EINVAL; - } else { - obj_elm->type_attrs = obj->type_attrs; - if (obj->type_attrs) { - obj_elm->type_class = - obj->type_attrs->type_class; - /* - * Today drivers are only permitted to use - * idr_class types. They cannot use FD types - * because we currently have no way to revoke - * the fops pointer after device - * disassociation. - */ - if (WARN_ON(is_driver && - obj->type_attrs->type_class != - &uverbs_idr_class)) - return -EINVAL; - } - } - if (!obj->methods) + cur_obj_key = uapi_key_obj(def->object_start.object_id); + rc = uapi_merge_obj_tree(uapi, def->chain_obj_tree, + is_driver); + if (rc) + return rc; continue; - for (j = 0; j != obj->num_methods; j++) { - const struct uverbs_method_def *method = - (*obj->methods)[j]; - if (!method) + case UAPI_DEF_END: + return 0; + + case UAPI_DEF_IS_SUPPORTED_DEV_FN: { + void **ibdev_fn = + (void *)(&ibdev->ops) + def->needs_fn_offset; + + if (*ibdev_fn) continue; + rc = uapi_disable_elm( + uapi, def, cur_obj_key, cur_method_key); + if (rc) + return rc; + continue; + } - rc = uapi_merge_method(uapi, obj_elm, obj_key, method, - is_driver); + case UAPI_DEF_IS_SUPPORTED_FUNC: + if (def->func_is_supported(ibdev)) + continue; + rc = uapi_disable_elm( + uapi, def, cur_obj_key, cur_method_key); if (rc) return rc; + continue; + + case UAPI_DEF_OBJECT_START: { + struct uverbs_api_object *obj_elm; + + cur_obj_key = uapi_key_obj(def->object_start.object_id); + obj_elm = uapi_add_get_elm(uapi, cur_obj_key, + sizeof(*obj_elm), &exists); + if (IS_ERR(obj_elm)) + return PTR_ERR(obj_elm); + continue; } - } - return 0; + case UAPI_DEF_WRITE: + rc = uapi_create_write( + uapi, ibdev, def, cur_obj_key, &cur_method_key); + if (rc) + return rc; + continue; + } + WARN_ON(true); + return -EINVAL; + } } static int @@ -186,13 +364,16 @@ uapi_finalize_ioctl_method(struct uverbs_api *uapi, u32 attr_bkey = uapi_bkey_attr(attr_key); u8 type = elm->spec.type; - if (uapi_key_attr_to_method(iter.index) != - uapi_key_attr_to_method(method_key)) + if (uapi_key_attr_to_ioctl_method(iter.index) != + uapi_key_attr_to_ioctl_method(method_key)) break; if (elm->spec.mandatory) __set_bit(attr_bkey, method_elm->attr_mandatory); + if (elm->spec.is_udata) + method_elm->has_udata = true; + if (type == UVERBS_ATTR_TYPE_IDR || type == UVERBS_ATTR_TYPE_FD) { u8 access = elm->spec.u.obj.access; @@ -229,9 +410,13 @@ uapi_finalize_ioctl_method(struct uverbs_api *uapi, static int uapi_finalize(struct uverbs_api *uapi) { + const struct uverbs_api_write_method **data; + unsigned long max_write_ex = 0; + unsigned long max_write = 0; struct radix_tree_iter iter; void __rcu **slot; int rc; + int i; radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) { struct uverbs_api_ioctl_method *method_elm = @@ -243,29 +428,209 @@ static int uapi_finalize(struct uverbs_api *uapi) if (rc) return rc; } + + if (uapi_key_is_write_method(iter.index)) + max_write = max(max_write, + iter.index & UVERBS_API_ATTR_KEY_MASK); + if (uapi_key_is_write_ex_method(iter.index)) + max_write_ex = + max(max_write_ex, + iter.index & UVERBS_API_ATTR_KEY_MASK); + } + + uapi->notsupp_method.handler = ib_uverbs_notsupp; + uapi->num_write = max_write + 1; + uapi->num_write_ex = max_write_ex + 1; + data = kmalloc_array(uapi->num_write + uapi->num_write_ex, + sizeof(*uapi->write_methods), GFP_KERNEL); + for (i = 0; i != uapi->num_write + uapi->num_write_ex; i++) + data[i] = &uapi->notsupp_method; + uapi->write_methods = data; + uapi->write_ex_methods = data + uapi->num_write; + + radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) { + if (uapi_key_is_write_method(iter.index)) + uapi->write_methods[iter.index & + UVERBS_API_ATTR_KEY_MASK] = + rcu_dereference_protected(*slot, true); + if (uapi_key_is_write_ex_method(iter.index)) + uapi->write_ex_methods[iter.index & + UVERBS_API_ATTR_KEY_MASK] = + rcu_dereference_protected(*slot, true); } return 0; } -void uverbs_destroy_api(struct uverbs_api *uapi) +static void uapi_remove_range(struct uverbs_api *uapi, u32 start, u32 last) { struct radix_tree_iter iter; void __rcu **slot; - if (!uapi) - return; - - radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) { + radix_tree_for_each_slot (slot, &uapi->radix, &iter, start) { + if (iter.index > last) + return; kfree(rcu_dereference_protected(*slot, true)); radix_tree_iter_delete(&uapi->radix, &iter, slot); } +} + +static void uapi_remove_object(struct uverbs_api *uapi, u32 obj_key) +{ + uapi_remove_range(uapi, obj_key, + obj_key | UVERBS_API_METHOD_KEY_MASK | + UVERBS_API_ATTR_KEY_MASK); +} + +static void uapi_remove_method(struct uverbs_api *uapi, u32 method_key) +{ + uapi_remove_range(uapi, method_key, + method_key | UVERBS_API_ATTR_KEY_MASK); +} + + +static u32 uapi_get_obj_id(struct uverbs_attr_spec *spec) +{ + if (spec->type == UVERBS_ATTR_TYPE_IDR || + spec->type == UVERBS_ATTR_TYPE_FD) + return spec->u.obj.obj_type; + if (spec->type == UVERBS_ATTR_TYPE_IDRS_ARRAY) + return spec->u2.objs_arr.obj_type; + return UVERBS_API_KEY_ERR; +} + +static void uapi_key_okay(u32 key) +{ + unsigned int count = 0; + + if (uapi_key_is_object(key)) + count++; + if (uapi_key_is_ioctl_method(key)) + count++; + if (uapi_key_is_write_method(key)) + count++; + if (uapi_key_is_write_ex_method(key)) + count++; + if (uapi_key_is_attr(key)) + count++; + WARN(count != 1, "Bad count %d key=%x", count, key); +} + +static void uapi_finalize_disable(struct uverbs_api *uapi) +{ + struct radix_tree_iter iter; + u32 starting_key = 0; + bool scan_again = false; + void __rcu **slot; + +again: + radix_tree_for_each_slot (slot, &uapi->radix, &iter, starting_key) { + uapi_key_okay(iter.index); + + if (uapi_key_is_object(iter.index)) { + struct uverbs_api_object *obj_elm = + rcu_dereference_protected(*slot, true); + + if (obj_elm->disabled) { + /* Have to check all the attrs again */ + scan_again = true; + starting_key = iter.index; + uapi_remove_object(uapi, iter.index); + goto again; + } + continue; + } + + if (uapi_key_is_ioctl_method(iter.index)) { + struct uverbs_api_ioctl_method *method_elm = + rcu_dereference_protected(*slot, true); + + if (method_elm->disabled) { + starting_key = iter.index; + uapi_remove_method(uapi, iter.index); + goto again; + } + continue; + } + + if (uapi_key_is_write_method(iter.index) || + uapi_key_is_write_ex_method(iter.index)) { + struct uverbs_api_write_method *method_elm = + rcu_dereference_protected(*slot, true); + + if (method_elm->disabled) { + kfree(method_elm); + radix_tree_iter_delete(&uapi->radix, &iter, slot); + } + continue; + } + + if (uapi_key_is_attr(iter.index)) { + struct uverbs_api_attr *attr_elm = + rcu_dereference_protected(*slot, true); + const struct uverbs_api_object *tmp_obj; + u32 obj_key; + + /* + * If the method has a mandatory object handle + * attribute which relies on an object which is not + * present then the entire method is uncallable. + */ + if (!attr_elm->spec.mandatory) + continue; + obj_key = uapi_get_obj_id(&attr_elm->spec); + if (obj_key == UVERBS_API_KEY_ERR) + continue; + tmp_obj = uapi_get_object(uapi, obj_key); + if (IS_ERR(tmp_obj)) { + if (PTR_ERR(tmp_obj) == -ENOMSG) + continue; + } else { + if (!tmp_obj->disabled) + continue; + } + + starting_key = iter.index; + uapi_remove_method( + uapi, + iter.index & (UVERBS_API_OBJ_KEY_MASK | + UVERBS_API_METHOD_KEY_MASK)); + goto again; + } + + WARN_ON(false); + } + + if (!scan_again) + return; + scan_again = false; + starting_key = 0; + goto again; +} + +void uverbs_destroy_api(struct uverbs_api *uapi) +{ + if (!uapi) + return; + + uapi_remove_range(uapi, 0, U32_MAX); + kfree(uapi->write_methods); kfree(uapi); } -struct uverbs_api *uverbs_alloc_api( - const struct uverbs_object_tree_def *const *driver_specs, - enum rdma_driver_id driver_id) +static const struct uapi_definition uverbs_core_api[] = { + UAPI_DEF_CHAIN(uverbs_def_obj_counters), + UAPI_DEF_CHAIN(uverbs_def_obj_cq), + UAPI_DEF_CHAIN(uverbs_def_obj_device), + UAPI_DEF_CHAIN(uverbs_def_obj_dm), + UAPI_DEF_CHAIN(uverbs_def_obj_flow_action), + UAPI_DEF_CHAIN(uverbs_def_obj_intf), + UAPI_DEF_CHAIN(uverbs_def_obj_mr), + UAPI_DEF_CHAIN(uverbs_def_write_intf), + {}, +}; + +struct uverbs_api *uverbs_alloc_api(struct ib_device *ibdev) { struct uverbs_api *uapi; int rc; @@ -275,18 +640,16 @@ struct uverbs_api *uverbs_alloc_api( return ERR_PTR(-ENOMEM); INIT_RADIX_TREE(&uapi->radix, GFP_KERNEL); - uapi->driver_id = driver_id; + uapi->driver_id = ibdev->driver_id; - rc = uapi_merge_tree(uapi, uverbs_default_get_objects(), false); + rc = uapi_merge_def(uapi, ibdev, uverbs_core_api, false); + if (rc) + goto err; + rc = uapi_merge_def(uapi, ibdev, ibdev->driver_def, true); if (rc) goto err; - for (; driver_specs && *driver_specs; driver_specs++) { - rc = uapi_merge_tree(uapi, *driver_specs, true); - if (rc) - goto err; - } - + uapi_finalize_disable(uapi); rc = uapi_finalize(uapi); if (rc) goto err; @@ -294,8 +657,9 @@ struct uverbs_api *uverbs_alloc_api( return uapi; err: if (rc != -ENOMEM) - pr_err("Setup of uverbs_api failed, kernel parsing tree description is not valid (%d)??\n", - rc); + dev_err(&ibdev->dev, + "Setup of uverbs_api failed, kernel parsing tree description is not valid (%d)??\n", + rc); uverbs_destroy_api(uapi); return ERR_PTR(rc); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 178899e3ce73..ac011836bb54 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -141,6 +141,10 @@ __attribute_const__ int ib_rate_to_mult(enum ib_rate rate) case IB_RATE_100_GBPS: return 40; case IB_RATE_200_GBPS: return 80; case IB_RATE_300_GBPS: return 120; + case IB_RATE_28_GBPS: return 11; + case IB_RATE_50_GBPS: return 20; + case IB_RATE_400_GBPS: return 160; + case IB_RATE_600_GBPS: return 240; default: return -1; } } @@ -166,6 +170,10 @@ __attribute_const__ enum ib_rate mult_to_ib_rate(int mult) case 40: return IB_RATE_100_GBPS; case 80: return IB_RATE_200_GBPS; case 120: return IB_RATE_300_GBPS; + case 11: return IB_RATE_28_GBPS; + case 20: return IB_RATE_50_GBPS; + case 160: return IB_RATE_400_GBPS; + case 240: return IB_RATE_600_GBPS; default: return IB_RATE_PORT_CURRENT; } } @@ -191,6 +199,10 @@ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate) case IB_RATE_100_GBPS: return 103125; case IB_RATE_200_GBPS: return 206250; case IB_RATE_300_GBPS: return 309375; + case IB_RATE_28_GBPS: return 28125; + case IB_RATE_50_GBPS: return 53125; + case IB_RATE_400_GBPS: return 425000; + case IB_RATE_600_GBPS: return 637500; default: return -1; } } @@ -214,8 +226,8 @@ EXPORT_SYMBOL(rdma_node_get_transport); enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num) { enum rdma_transport_type lt; - if (device->get_link_layer) - return device->get_link_layer(device, port_num); + if (device->ops.get_link_layer) + return device->ops.get_link_layer(device, port_num); lt = rdma_node_get_transport(device->node_type); if (lt == RDMA_TRANSPORT_IB) @@ -243,7 +255,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, struct ib_pd *pd; int mr_access_flags = 0; - pd = device->alloc_pd(device, NULL, NULL); + pd = device->ops.alloc_pd(device, NULL, NULL); if (IS_ERR(pd)) return pd; @@ -265,12 +277,12 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, pd->res.type = RDMA_RESTRACK_PD; rdma_restrack_set_task(&pd->res, caller); - rdma_restrack_add(&pd->res); + rdma_restrack_kadd(&pd->res); if (mr_access_flags) { struct ib_mr *mr; - mr = pd->device->get_dma_mr(pd, mr_access_flags); + mr = pd->device->ops.get_dma_mr(pd, mr_access_flags); if (IS_ERR(mr)) { ib_dealloc_pd(pd); return ERR_CAST(mr); @@ -307,7 +319,7 @@ void ib_dealloc_pd(struct ib_pd *pd) int ret; if (pd->__internal_mr) { - ret = pd->device->dereg_mr(pd->__internal_mr); + ret = pd->device->ops.dereg_mr(pd->__internal_mr); WARN_ON(ret); pd->__internal_mr = NULL; } @@ -319,7 +331,7 @@ void ib_dealloc_pd(struct ib_pd *pd) rdma_restrack_del(&pd->res); /* Making delalloc_pd a void return is a WIP, no driver should return an error here. */ - ret = pd->device->dealloc_pd(pd); + ret = pd->device->ops.dealloc_pd(pd); WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd"); } EXPORT_SYMBOL(ib_dealloc_pd); @@ -475,14 +487,17 @@ rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr, static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { struct ib_ah *ah; - if (!pd->device->create_ah) + might_sleep_if(flags & RDMA_CREATE_AH_SLEEPABLE); + + if (!pd->device->ops.create_ah) return ERR_PTR(-EOPNOTSUPP); - ah = pd->device->create_ah(pd, ah_attr, udata); + ah = pd->device->ops.create_ah(pd, ah_attr, flags, udata); if (!IS_ERR(ah)) { ah->device = pd->device; @@ -502,12 +517,14 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, * given address vector. * @pd: The protection domain associated with the address handle. * @ah_attr: The attributes of the address vector. + * @flags: Create address handle flags (see enum rdma_create_ah_flags). * * It returns 0 on success and returns appropriate error code on error. * The address handle is used to reference a local or global destination * in all UD QP post sends. */ -struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr) +struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 flags) { const struct ib_gid_attr *old_sgid_attr; struct ib_ah *ah; @@ -517,7 +534,7 @@ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr) if (ret) return ERR_PTR(ret); - ah = _rdma_create_ah(pd, ah_attr, NULL); + ah = _rdma_create_ah(pd, ah_attr, flags, NULL); rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); return ah; @@ -557,7 +574,7 @@ struct ib_ah *rdma_create_user_ah(struct ib_pd *pd, } } - ah = _rdma_create_ah(pd, ah_attr, udata); + ah = _rdma_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE, udata); out: rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); @@ -869,7 +886,7 @@ struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc, if (ret) return ERR_PTR(ret); - ah = rdma_create_ah(pd, &ah_attr); + ah = rdma_create_ah(pd, &ah_attr, RDMA_CREATE_AH_SLEEPABLE); rdma_destroy_ah_attr(&ah_attr); return ah; @@ -888,8 +905,8 @@ int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr) if (ret) return ret; - ret = ah->device->modify_ah ? - ah->device->modify_ah(ah, ah_attr) : + ret = ah->device->ops.modify_ah ? + ah->device->ops.modify_ah(ah, ah_attr) : -EOPNOTSUPP; ah->sgid_attr = rdma_update_sgid_attr(ah_attr, ah->sgid_attr); @@ -902,20 +919,22 @@ int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr) { ah_attr->grh.sgid_attr = NULL; - return ah->device->query_ah ? - ah->device->query_ah(ah, ah_attr) : + return ah->device->ops.query_ah ? + ah->device->ops.query_ah(ah, ah_attr) : -EOPNOTSUPP; } EXPORT_SYMBOL(rdma_query_ah); -int rdma_destroy_ah(struct ib_ah *ah) +int rdma_destroy_ah(struct ib_ah *ah, u32 flags) { const struct ib_gid_attr *sgid_attr = ah->sgid_attr; struct ib_pd *pd; int ret; + might_sleep_if(flags & RDMA_DESTROY_AH_SLEEPABLE); + pd = ah->pd; - ret = ah->device->destroy_ah(ah); + ret = ah->device->ops.destroy_ah(ah, flags); if (!ret) { atomic_dec(&pd->usecnt); if (sgid_attr) @@ -933,10 +952,10 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd, { struct ib_srq *srq; - if (!pd->device->create_srq) + if (!pd->device->ops.create_srq) return ERR_PTR(-EOPNOTSUPP); - srq = pd->device->create_srq(pd, srq_init_attr, NULL); + srq = pd->device->ops.create_srq(pd, srq_init_attr, NULL); if (!IS_ERR(srq)) { srq->device = pd->device; @@ -965,17 +984,17 @@ int ib_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask) { - return srq->device->modify_srq ? - srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL) : - -EOPNOTSUPP; + return srq->device->ops.modify_srq ? + srq->device->ops.modify_srq(srq, srq_attr, srq_attr_mask, + NULL) : -EOPNOTSUPP; } EXPORT_SYMBOL(ib_modify_srq); int ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr) { - return srq->device->query_srq ? - srq->device->query_srq(srq, srq_attr) : -EOPNOTSUPP; + return srq->device->ops.query_srq ? + srq->device->ops.query_srq(srq, srq_attr) : -EOPNOTSUPP; } EXPORT_SYMBOL(ib_query_srq); @@ -997,7 +1016,7 @@ int ib_destroy_srq(struct ib_srq *srq) if (srq_type == IB_SRQT_XRC) xrcd = srq->ext.xrc.xrcd; - ret = srq->device->destroy_srq(srq); + ret = srq->device->ops.destroy_srq(srq); if (!ret) { atomic_dec(&pd->usecnt); if (srq_type == IB_SRQT_XRC) @@ -1106,7 +1125,7 @@ static struct ib_qp *ib_create_xrc_qp(struct ib_qp *qp, if (!IS_ERR(qp)) __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp); else - real_qp->device->destroy_qp(real_qp); + real_qp->device->ops.destroy_qp(real_qp); return qp; } @@ -1692,10 +1711,10 @@ int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width) if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET) return -EINVAL; - if (!dev->get_netdev) + if (!dev->ops.get_netdev) return -EOPNOTSUPP; - netdev = dev->get_netdev(dev, port_num); + netdev = dev->ops.get_netdev(dev, port_num); if (!netdev) return -ENODEV; @@ -1753,9 +1772,9 @@ int ib_query_qp(struct ib_qp *qp, qp_attr->ah_attr.grh.sgid_attr = NULL; qp_attr->alt_ah_attr.grh.sgid_attr = NULL; - return qp->device->query_qp ? - qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) : - -EOPNOTSUPP; + return qp->device->ops.query_qp ? + qp->device->ops.query_qp(qp->real_qp, qp_attr, qp_attr_mask, + qp_init_attr) : -EOPNOTSUPP; } EXPORT_SYMBOL(ib_query_qp); @@ -1841,7 +1860,7 @@ int ib_destroy_qp(struct ib_qp *qp) rdma_rw_cleanup_mrs(qp); rdma_restrack_del(&qp->res); - ret = qp->device->destroy_qp(qp); + ret = qp->device->ops.destroy_qp(qp); if (!ret) { if (alt_path_sgid_attr) rdma_put_gid_attr(alt_path_sgid_attr); @@ -1879,7 +1898,7 @@ struct ib_cq *__ib_create_cq(struct ib_device *device, { struct ib_cq *cq; - cq = device->create_cq(device, cq_attr, NULL, NULL); + cq = device->ops.create_cq(device, cq_attr, NULL, NULL); if (!IS_ERR(cq)) { cq->device = device; @@ -1890,7 +1909,7 @@ struct ib_cq *__ib_create_cq(struct ib_device *device, atomic_set(&cq->usecnt, 0); cq->res.type = RDMA_RESTRACK_CQ; rdma_restrack_set_task(&cq->res, caller); - rdma_restrack_add(&cq->res); + rdma_restrack_kadd(&cq->res); } return cq; @@ -1899,8 +1918,9 @@ EXPORT_SYMBOL(__ib_create_cq); int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period) { - return cq->device->modify_cq ? - cq->device->modify_cq(cq, cq_count, cq_period) : -EOPNOTSUPP; + return cq->device->ops.modify_cq ? + cq->device->ops.modify_cq(cq, cq_count, + cq_period) : -EOPNOTSUPP; } EXPORT_SYMBOL(rdma_set_cq_moderation); @@ -1910,14 +1930,14 @@ int ib_destroy_cq(struct ib_cq *cq) return -EBUSY; rdma_restrack_del(&cq->res); - return cq->device->destroy_cq(cq); + return cq->device->ops.destroy_cq(cq); } EXPORT_SYMBOL(ib_destroy_cq); int ib_resize_cq(struct ib_cq *cq, int cqe) { - return cq->device->resize_cq ? - cq->device->resize_cq(cq, cqe, NULL) : -EOPNOTSUPP; + return cq->device->ops.resize_cq ? + cq->device->ops.resize_cq(cq, cqe, NULL) : -EOPNOTSUPP; } EXPORT_SYMBOL(ib_resize_cq); @@ -1930,7 +1950,7 @@ int ib_dereg_mr(struct ib_mr *mr) int ret; rdma_restrack_del(&mr->res); - ret = mr->device->dereg_mr(mr); + ret = mr->device->ops.dereg_mr(mr); if (!ret) { atomic_dec(&pd->usecnt); if (dm) @@ -1959,10 +1979,10 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd, { struct ib_mr *mr; - if (!pd->device->alloc_mr) + if (!pd->device->ops.alloc_mr) return ERR_PTR(-EOPNOTSUPP); - mr = pd->device->alloc_mr(pd, mr_type, max_num_sg); + mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg); if (!IS_ERR(mr)) { mr->device = pd->device; mr->pd = pd; @@ -1971,7 +1991,7 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd, atomic_inc(&pd->usecnt); mr->need_inval = false; mr->res.type = RDMA_RESTRACK_MR; - rdma_restrack_add(&mr->res); + rdma_restrack_kadd(&mr->res); } return mr; @@ -1986,10 +2006,10 @@ struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd, { struct ib_fmr *fmr; - if (!pd->device->alloc_fmr) + if (!pd->device->ops.alloc_fmr) return ERR_PTR(-EOPNOTSUPP); - fmr = pd->device->alloc_fmr(pd, mr_access_flags, fmr_attr); + fmr = pd->device->ops.alloc_fmr(pd, mr_access_flags, fmr_attr); if (!IS_ERR(fmr)) { fmr->device = pd->device; fmr->pd = pd; @@ -2008,7 +2028,7 @@ int ib_unmap_fmr(struct list_head *fmr_list) return 0; fmr = list_entry(fmr_list->next, struct ib_fmr, list); - return fmr->device->unmap_fmr(fmr_list); + return fmr->device->ops.unmap_fmr(fmr_list); } EXPORT_SYMBOL(ib_unmap_fmr); @@ -2018,7 +2038,7 @@ int ib_dealloc_fmr(struct ib_fmr *fmr) int ret; pd = fmr->pd; - ret = fmr->device->dealloc_fmr(fmr); + ret = fmr->device->ops.dealloc_fmr(fmr); if (!ret) atomic_dec(&pd->usecnt); @@ -2070,14 +2090,14 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) { int ret; - if (!qp->device->attach_mcast) + if (!qp->device->ops.attach_mcast) return -EOPNOTSUPP; if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) || qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid)) return -EINVAL; - ret = qp->device->attach_mcast(qp, gid, lid); + ret = qp->device->ops.attach_mcast(qp, gid, lid); if (!ret) atomic_inc(&qp->usecnt); return ret; @@ -2088,14 +2108,14 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) { int ret; - if (!qp->device->detach_mcast) + if (!qp->device->ops.detach_mcast) return -EOPNOTSUPP; if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) || qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid)) return -EINVAL; - ret = qp->device->detach_mcast(qp, gid, lid); + ret = qp->device->ops.detach_mcast(qp, gid, lid); if (!ret) atomic_dec(&qp->usecnt); return ret; @@ -2106,10 +2126,10 @@ struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller) { struct ib_xrcd *xrcd; - if (!device->alloc_xrcd) + if (!device->ops.alloc_xrcd) return ERR_PTR(-EOPNOTSUPP); - xrcd = device->alloc_xrcd(device, NULL, NULL); + xrcd = device->ops.alloc_xrcd(device, NULL, NULL); if (!IS_ERR(xrcd)) { xrcd->device = device; xrcd->inode = NULL; @@ -2137,7 +2157,7 @@ int ib_dealloc_xrcd(struct ib_xrcd *xrcd) return ret; } - return xrcd->device->dealloc_xrcd(xrcd); + return xrcd->device->ops.dealloc_xrcd(xrcd); } EXPORT_SYMBOL(ib_dealloc_xrcd); @@ -2160,10 +2180,10 @@ struct ib_wq *ib_create_wq(struct ib_pd *pd, { struct ib_wq *wq; - if (!pd->device->create_wq) + if (!pd->device->ops.create_wq) return ERR_PTR(-EOPNOTSUPP); - wq = pd->device->create_wq(pd, wq_attr, NULL); + wq = pd->device->ops.create_wq(pd, wq_attr, NULL); if (!IS_ERR(wq)) { wq->event_handler = wq_attr->event_handler; wq->wq_context = wq_attr->wq_context; @@ -2193,7 +2213,7 @@ int ib_destroy_wq(struct ib_wq *wq) if (atomic_read(&wq->usecnt)) return -EBUSY; - err = wq->device->destroy_wq(wq); + err = wq->device->ops.destroy_wq(wq); if (!err) { atomic_dec(&pd->usecnt); atomic_dec(&cq->usecnt); @@ -2215,10 +2235,10 @@ int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, { int err; - if (!wq->device->modify_wq) + if (!wq->device->ops.modify_wq) return -EOPNOTSUPP; - err = wq->device->modify_wq(wq, wq_attr, wq_attr_mask, NULL); + err = wq->device->ops.modify_wq(wq, wq_attr, wq_attr_mask, NULL); return err; } EXPORT_SYMBOL(ib_modify_wq); @@ -2240,12 +2260,12 @@ struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device, int i; u32 table_size; - if (!device->create_rwq_ind_table) + if (!device->ops.create_rwq_ind_table) return ERR_PTR(-EOPNOTSUPP); table_size = (1 << init_attr->log_ind_tbl_size); - rwq_ind_table = device->create_rwq_ind_table(device, - init_attr, NULL); + rwq_ind_table = device->ops.create_rwq_ind_table(device, + init_attr, NULL); if (IS_ERR(rwq_ind_table)) return rwq_ind_table; @@ -2275,7 +2295,7 @@ int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table) if (atomic_read(&rwq_ind_table->usecnt)) return -EBUSY; - err = rwq_ind_table->device->destroy_rwq_ind_table(rwq_ind_table); + err = rwq_ind_table->device->ops.destroy_rwq_ind_table(rwq_ind_table); if (!err) { for (i = 0; i < table_size; i++) atomic_dec(&ind_tbl[i]->usecnt); @@ -2288,48 +2308,50 @@ EXPORT_SYMBOL(ib_destroy_rwq_ind_table); int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status) { - return mr->device->check_mr_status ? - mr->device->check_mr_status(mr, check_mask, mr_status) : -EOPNOTSUPP; + if (!mr->device->ops.check_mr_status) + return -EOPNOTSUPP; + + return mr->device->ops.check_mr_status(mr, check_mask, mr_status); } EXPORT_SYMBOL(ib_check_mr_status); int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, int state) { - if (!device->set_vf_link_state) + if (!device->ops.set_vf_link_state) return -EOPNOTSUPP; - return device->set_vf_link_state(device, vf, port, state); + return device->ops.set_vf_link_state(device, vf, port, state); } EXPORT_SYMBOL(ib_set_vf_link_state); int ib_get_vf_config(struct ib_device *device, int vf, u8 port, struct ifla_vf_info *info) { - if (!device->get_vf_config) + if (!device->ops.get_vf_config) return -EOPNOTSUPP; - return device->get_vf_config(device, vf, port, info); + return device->ops.get_vf_config(device, vf, port, info); } EXPORT_SYMBOL(ib_get_vf_config); int ib_get_vf_stats(struct ib_device *device, int vf, u8 port, struct ifla_vf_stats *stats) { - if (!device->get_vf_stats) + if (!device->ops.get_vf_stats) return -EOPNOTSUPP; - return device->get_vf_stats(device, vf, port, stats); + return device->ops.get_vf_stats(device, vf, port, stats); } EXPORT_SYMBOL(ib_get_vf_stats); int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, int type) { - if (!device->set_vf_guid) + if (!device->ops.set_vf_guid) return -EOPNOTSUPP; - return device->set_vf_guid(device, vf, port, guid, type); + return device->ops.set_vf_guid(device, vf, port, guid, type); } EXPORT_SYMBOL(ib_set_vf_guid); @@ -2361,12 +2383,12 @@ EXPORT_SYMBOL(ib_set_vf_guid); int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset, unsigned int page_size) { - if (unlikely(!mr->device->map_mr_sg)) + if (unlikely(!mr->device->ops.map_mr_sg)) return -EOPNOTSUPP; mr->page_size = page_size; - return mr->device->map_mr_sg(mr, sg, sg_nents, sg_offset); + return mr->device->ops.map_mr_sg(mr, sg, sg_nents, sg_offset); } EXPORT_SYMBOL(ib_map_mr_sg); @@ -2565,8 +2587,8 @@ static void __ib_drain_rq(struct ib_qp *qp) */ void ib_drain_sq(struct ib_qp *qp) { - if (qp->device->drain_sq) - qp->device->drain_sq(qp); + if (qp->device->ops.drain_sq) + qp->device->ops.drain_sq(qp); else __ib_drain_sq(qp); } @@ -2593,8 +2615,8 @@ EXPORT_SYMBOL(ib_drain_sq); */ void ib_drain_rq(struct ib_qp *qp) { - if (qp->device->drain_rq) - qp->device->drain_rq(qp); + if (qp->device->ops.drain_rq) + qp->device->ops.drain_rq(qp); else __ib_drain_rq(qp); } @@ -2632,10 +2654,11 @@ struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num, struct net_device *netdev; int rc; - if (!device->rdma_netdev_get_params) + if (!device->ops.rdma_netdev_get_params) return ERR_PTR(-EOPNOTSUPP); - rc = device->rdma_netdev_get_params(device, port_num, type, ¶ms); + rc = device->ops.rdma_netdev_get_params(device, port_num, type, + ¶ms); if (rc) return ERR_PTR(rc); @@ -2657,10 +2680,11 @@ int rdma_init_netdev(struct ib_device *device, u8 port_num, struct rdma_netdev_alloc_params params; int rc; - if (!device->rdma_netdev_get_params) + if (!device->ops.rdma_netdev_get_params) return -EOPNOTSUPP; - rc = device->rdma_netdev_get_params(device, port_num, type, ¶ms); + rc = device->ops.rdma_netdev_get_params(device, port_num, type, + ¶ms); if (rc) return rc; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 54fdd4cf5288..1e2515e2eb62 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -647,13 +647,14 @@ fail: } /* Address Handles */ -int bnxt_re_destroy_ah(struct ib_ah *ib_ah) +int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags) { struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah); struct bnxt_re_dev *rdev = ah->rdev; int rc; - rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah); + rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah, + !(flags & RDMA_DESTROY_AH_SLEEPABLE)); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to destroy HW AH"); return rc; @@ -664,6 +665,7 @@ int bnxt_re_destroy_ah(struct ib_ah *ib_ah) struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); @@ -698,7 +700,7 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, ah->qplib_ah.flow_label = grh->flow_label; ah->qplib_ah.hop_limit = grh->hop_limit; ah->qplib_ah.sl = rdma_ah_get_sl(ah_attr); - if (ib_pd->uobject && + if (udata && !rdma_is_multicast_addr((struct in6_addr *) grh->dgid.raw) && !rdma_link_local_addr((struct in6_addr *) @@ -722,14 +724,15 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, } memcpy(ah->qplib_ah.dmac, ah_attr->roce.dmac, ETH_ALEN); - rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah); + rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah, + !(flags & RDMA_CREATE_AH_SLEEPABLE)); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to allocate HW AH"); goto fail; } /* Write AVID to shared page. */ - if (ib_pd->uobject) { + if (udata) { struct ib_ucontext *ib_uctx = ib_pd->uobject->context; struct bnxt_re_ucontext *uctx; unsigned long flag; @@ -818,7 +821,7 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp) if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp) { rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, - &rdev->sqp_ah->qplib_ah); + &rdev->sqp_ah->qplib_ah, false); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to destroy HW AH for shadow QP"); @@ -958,7 +961,7 @@ static struct bnxt_re_ah *bnxt_re_create_shadow_qp_ah /* Have DMAC same as SMAC */ ether_addr_copy(ah->qplib_ah.dmac, rdev->netdev->dev_addr); - rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah); + rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah, false); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to allocate HW AH for Shadow QP"); diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index aa33e7b82c84..c4af72604b4f 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -169,10 +169,11 @@ struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev, int bnxt_re_dealloc_pd(struct ib_pd *pd); struct ib_ah *bnxt_re_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata); int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); -int bnxt_re_destroy_ah(struct ib_ah *ah); +int bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags); struct ib_srq *bnxt_re_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 77f095e5fbe3..e7a997f2a537 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -568,6 +568,50 @@ static void bnxt_re_unregister_ib(struct bnxt_re_dev *rdev) ib_unregister_device(&rdev->ibdev); } +static const struct ib_device_ops bnxt_re_dev_ops = { + .add_gid = bnxt_re_add_gid, + .alloc_hw_stats = bnxt_re_ib_alloc_hw_stats, + .alloc_mr = bnxt_re_alloc_mr, + .alloc_pd = bnxt_re_alloc_pd, + .alloc_ucontext = bnxt_re_alloc_ucontext, + .create_ah = bnxt_re_create_ah, + .create_cq = bnxt_re_create_cq, + .create_qp = bnxt_re_create_qp, + .create_srq = bnxt_re_create_srq, + .dealloc_pd = bnxt_re_dealloc_pd, + .dealloc_ucontext = bnxt_re_dealloc_ucontext, + .del_gid = bnxt_re_del_gid, + .dereg_mr = bnxt_re_dereg_mr, + .destroy_ah = bnxt_re_destroy_ah, + .destroy_cq = bnxt_re_destroy_cq, + .destroy_qp = bnxt_re_destroy_qp, + .destroy_srq = bnxt_re_destroy_srq, + .get_dev_fw_str = bnxt_re_query_fw_str, + .get_dma_mr = bnxt_re_get_dma_mr, + .get_hw_stats = bnxt_re_ib_get_hw_stats, + .get_link_layer = bnxt_re_get_link_layer, + .get_netdev = bnxt_re_get_netdev, + .get_port_immutable = bnxt_re_get_port_immutable, + .map_mr_sg = bnxt_re_map_mr_sg, + .mmap = bnxt_re_mmap, + .modify_ah = bnxt_re_modify_ah, + .modify_device = bnxt_re_modify_device, + .modify_qp = bnxt_re_modify_qp, + .modify_srq = bnxt_re_modify_srq, + .poll_cq = bnxt_re_poll_cq, + .post_recv = bnxt_re_post_recv, + .post_send = bnxt_re_post_send, + .post_srq_recv = bnxt_re_post_srq_recv, + .query_ah = bnxt_re_query_ah, + .query_device = bnxt_re_query_device, + .query_pkey = bnxt_re_query_pkey, + .query_port = bnxt_re_query_port, + .query_qp = bnxt_re_query_qp, + .query_srq = bnxt_re_query_srq, + .reg_user_mr = bnxt_re_reg_user_mr, + .req_notify_cq = bnxt_re_req_notify_cq, +}; + static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) { struct ib_device *ibdev = &rdev->ibdev; @@ -614,60 +658,10 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) (1ull << IB_USER_VERBS_CMD_DESTROY_AH); /* POLL_CQ and REQ_NOTIFY_CQ is directly handled in libbnxt_re */ - /* Kernel verbs */ - ibdev->query_device = bnxt_re_query_device; - ibdev->modify_device = bnxt_re_modify_device; - - ibdev->query_port = bnxt_re_query_port; - ibdev->get_port_immutable = bnxt_re_get_port_immutable; - ibdev->get_dev_fw_str = bnxt_re_query_fw_str; - ibdev->query_pkey = bnxt_re_query_pkey; - ibdev->get_netdev = bnxt_re_get_netdev; - ibdev->add_gid = bnxt_re_add_gid; - ibdev->del_gid = bnxt_re_del_gid; - ibdev->get_link_layer = bnxt_re_get_link_layer; - - ibdev->alloc_pd = bnxt_re_alloc_pd; - ibdev->dealloc_pd = bnxt_re_dealloc_pd; - - ibdev->create_ah = bnxt_re_create_ah; - ibdev->modify_ah = bnxt_re_modify_ah; - ibdev->query_ah = bnxt_re_query_ah; - ibdev->destroy_ah = bnxt_re_destroy_ah; - - ibdev->create_srq = bnxt_re_create_srq; - ibdev->modify_srq = bnxt_re_modify_srq; - ibdev->query_srq = bnxt_re_query_srq; - ibdev->destroy_srq = bnxt_re_destroy_srq; - ibdev->post_srq_recv = bnxt_re_post_srq_recv; - - ibdev->create_qp = bnxt_re_create_qp; - ibdev->modify_qp = bnxt_re_modify_qp; - ibdev->query_qp = bnxt_re_query_qp; - ibdev->destroy_qp = bnxt_re_destroy_qp; - - ibdev->post_send = bnxt_re_post_send; - ibdev->post_recv = bnxt_re_post_recv; - - ibdev->create_cq = bnxt_re_create_cq; - ibdev->destroy_cq = bnxt_re_destroy_cq; - ibdev->poll_cq = bnxt_re_poll_cq; - ibdev->req_notify_cq = bnxt_re_req_notify_cq; - - ibdev->get_dma_mr = bnxt_re_get_dma_mr; - ibdev->dereg_mr = bnxt_re_dereg_mr; - ibdev->alloc_mr = bnxt_re_alloc_mr; - ibdev->map_mr_sg = bnxt_re_map_mr_sg; - - ibdev->reg_user_mr = bnxt_re_reg_user_mr; - ibdev->alloc_ucontext = bnxt_re_alloc_ucontext; - ibdev->dealloc_ucontext = bnxt_re_dealloc_ucontext; - ibdev->mmap = bnxt_re_mmap; - ibdev->get_hw_stats = bnxt_re_ib_get_hw_stats; - ibdev->alloc_hw_stats = bnxt_re_ib_alloc_hw_stats; rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group); ibdev->driver_id = RDMA_DRIVER_BNXT_RE; + ib_set_device_ops(ibdev, &bnxt_re_dev_ops); return ib_register_device(ibdev, "bnxt_re%d", NULL); } @@ -1203,6 +1197,35 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev) return 0; } +static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) +{ + struct bnxt_en_dev *en_dev = rdev->en_dev; + struct hwrm_ver_get_output resp = {0}; + struct hwrm_ver_get_input req = {0}; + struct bnxt_fw_msg fw_msg; + int rc = 0; + + memset(&fw_msg, 0, sizeof(fw_msg)); + bnxt_re_init_hwrm_hdr(rdev, (void *)&req, + HWRM_VER_GET, -1, -1); + req.hwrm_intf_maj = HWRM_VERSION_MAJOR; + req.hwrm_intf_min = HWRM_VERSION_MINOR; + req.hwrm_intf_upd = HWRM_VERSION_UPDATE; + bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, + sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); + rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); + if (rc) { + dev_err(rdev_to_dev(rdev), + "Failed to query HW version, rc = 0x%x", rc); + return; + } + rdev->qplib_ctx.hwrm_intf_ver = + (u64)resp.hwrm_intf_major << 48 | + (u64)resp.hwrm_intf_minor << 32 | + (u64)resp.hwrm_intf_build << 16 | + resp.hwrm_intf_patch; +} + static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev) { int rc; @@ -1285,10 +1308,13 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) } set_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags); + bnxt_re_query_hwrm_intf_version(rdev); + /* Establish RCFW Communication Channel to initialize the context * memory for the function and all child VFs */ rc = bnxt_qplib_alloc_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw, + &rdev->qplib_ctx, BNXT_RE_MAX_QPC_COUNT); if (rc) { pr_err("Failed to allocate RCFW Channel: %#x\n", rc); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index be4e33e9f962..326805461265 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -58,7 +58,7 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) u16 cbit; int rc; - cbit = cookie % RCFW_MAX_OUTSTANDING_CMD; + cbit = cookie % rcfw->cmdq_depth; rc = wait_event_timeout(rcfw->waitq, !test_bit(cbit, rcfw->cmdq_bitmap), msecs_to_jiffies(RCFW_CMD_WAIT_TIME_MS)); @@ -70,7 +70,7 @@ static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) u32 count = RCFW_BLOCKED_CMD_WAIT_COUNT; u16 cbit; - cbit = cookie % RCFW_MAX_OUTSTANDING_CMD; + cbit = cookie % rcfw->cmdq_depth; if (!test_bit(cbit, rcfw->cmdq_bitmap)) goto done; do { @@ -86,6 +86,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, { struct bnxt_qplib_cmdqe *cmdqe, **cmdq_ptr; struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq; + u32 cmdq_depth = rcfw->cmdq_depth; struct bnxt_qplib_crsq *crsqe; u32 sw_prod, cmdq_prod; unsigned long flags; @@ -124,7 +125,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, cookie = rcfw->seq_num & RCFW_MAX_COOKIE_VALUE; - cbit = cookie % RCFW_MAX_OUTSTANDING_CMD; + cbit = cookie % rcfw->cmdq_depth; if (is_block) cookie |= RCFW_CMD_IS_BLOCKING; @@ -153,7 +154,8 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, do { /* Locate the next cmdq slot */ sw_prod = HWQ_CMP(cmdq->prod, cmdq); - cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod)][get_cmdq_idx(sw_prod)]; + cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod, cmdq_depth)] + [get_cmdq_idx(sw_prod, cmdq_depth)]; if (!cmdqe) { dev_err(&rcfw->pdev->dev, "RCFW request failed with no cmdqe!\n"); @@ -326,7 +328,7 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, mcookie = qp_event->cookie; blocked = cookie & RCFW_CMD_IS_BLOCKING; cookie &= RCFW_MAX_COOKIE_VALUE; - cbit = cookie % RCFW_MAX_OUTSTANDING_CMD; + cbit = cookie % rcfw->cmdq_depth; crsqe = &rcfw->crsqe_tbl[cbit]; if (crsqe->resp && crsqe->resp->cookie == mcookie) { @@ -555,6 +557,7 @@ void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw) int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, struct bnxt_qplib_rcfw *rcfw, + struct bnxt_qplib_ctx *ctx, int qp_tbl_sz) { rcfw->pdev = pdev; @@ -567,11 +570,18 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, "HW channel CREQ allocation failed\n"); goto fail; } - rcfw->cmdq.max_elements = BNXT_QPLIB_CMDQE_MAX_CNT; - if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->cmdq, NULL, 0, - &rcfw->cmdq.max_elements, - BNXT_QPLIB_CMDQE_UNITS, 0, PAGE_SIZE, - HWQ_TYPE_CTX)) { + if (ctx->hwrm_intf_ver < HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK) + rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT_256; + else + rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT_8192; + + rcfw->cmdq.max_elements = rcfw->cmdq_depth; + if (bnxt_qplib_alloc_init_hwq + (rcfw->pdev, &rcfw->cmdq, NULL, 0, + &rcfw->cmdq.max_elements, + BNXT_QPLIB_CMDQE_UNITS, 0, + bnxt_qplib_cmdqe_page_size(rcfw->cmdq_depth), + HWQ_TYPE_CTX)) { dev_err(&rcfw->pdev->dev, "HW channel CMDQ allocation failed\n"); goto fail; @@ -674,7 +684,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev, /* General */ rcfw->seq_num = 0; set_bit(FIRMWARE_FIRST_FLAG, &rcfw->flags); - bmap_size = BITS_TO_LONGS(RCFW_MAX_OUTSTANDING_CMD * + bmap_size = BITS_TO_LONGS(rcfw->cmdq_depth * sizeof(unsigned long)); rcfw->cmdq_bitmap = kzalloc(bmap_size, GFP_KERNEL); if (!rcfw->cmdq_bitmap) @@ -734,7 +744,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev, init.cmdq_pbl = cpu_to_le64(rcfw->cmdq.pbl[PBL_LVL_0].pg_map_arr[0]); init.cmdq_size_cmdq_lvl = cpu_to_le16( - ((BNXT_QPLIB_CMDQE_MAX_CNT << CMDQ_INIT_CMDQ_SIZE_SFT) & + ((rcfw->cmdq_depth << CMDQ_INIT_CMDQ_SIZE_SFT) & CMDQ_INIT_CMDQ_SIZE_MASK) | ((rcfw->cmdq.level << CMDQ_INIT_CMDQ_LVL_SFT) & CMDQ_INIT_CMDQ_LVL_MASK)); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 9a8687dc0a79..be0ef0e8c53e 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -63,32 +63,60 @@ #define RCFW_CMD_WAIT_TIME_MS 20000 /* 20 Seconds timeout */ +/* Cmdq contains a fix number of a 16-Byte slots */ +struct bnxt_qplib_cmdqe { + u8 data[16]; +}; + /* CMDQ elements */ -#define BNXT_QPLIB_CMDQE_MAX_CNT 256 +#define BNXT_QPLIB_CMDQE_MAX_CNT_256 256 +#define BNXT_QPLIB_CMDQE_MAX_CNT_8192 8192 #define BNXT_QPLIB_CMDQE_UNITS sizeof(struct bnxt_qplib_cmdqe) -#define BNXT_QPLIB_CMDQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_CMDQE_UNITS) +#define BNXT_QPLIB_CMDQE_BYTES(depth) ((depth) * BNXT_QPLIB_CMDQE_UNITS) + +static inline u32 bnxt_qplib_cmdqe_npages(u32 depth) +{ + u32 npages; + + npages = BNXT_QPLIB_CMDQE_BYTES(depth) / PAGE_SIZE; + if (BNXT_QPLIB_CMDQE_BYTES(depth) % PAGE_SIZE) + npages++; + return npages; +} + +static inline u32 bnxt_qplib_cmdqe_page_size(u32 depth) +{ + return (bnxt_qplib_cmdqe_npages(depth) * PAGE_SIZE); +} + +static inline u32 bnxt_qplib_cmdqe_cnt_per_pg(u32 depth) +{ + return (bnxt_qplib_cmdqe_page_size(depth) / + BNXT_QPLIB_CMDQE_UNITS); +} -#define MAX_CMDQ_IDX (BNXT_QPLIB_CMDQE_MAX_CNT - 1) -#define MAX_CMDQ_IDX_PER_PG (BNXT_QPLIB_CMDQE_CNT_PER_PG - 1) +#define MAX_CMDQ_IDX(depth) ((depth) - 1) + +static inline u32 bnxt_qplib_max_cmdq_idx_per_pg(u32 depth) +{ + return (bnxt_qplib_cmdqe_cnt_per_pg(depth) - 1); +} -#define RCFW_MAX_OUTSTANDING_CMD BNXT_QPLIB_CMDQE_MAX_CNT #define RCFW_MAX_COOKIE_VALUE 0x7FFF #define RCFW_CMD_IS_BLOCKING 0x8000 #define RCFW_BLOCKED_CMD_WAIT_COUNT 0x4E20 -/* Cmdq contains a fix number of a 16-Byte slots */ -struct bnxt_qplib_cmdqe { - u8 data[16]; -}; +#define HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK 0x1000900020011ULL -static inline u32 get_cmdq_pg(u32 val) +static inline u32 get_cmdq_pg(u32 val, u32 depth) { - return (val & ~MAX_CMDQ_IDX_PER_PG) / BNXT_QPLIB_CMDQE_CNT_PER_PG; + return (val & ~(bnxt_qplib_max_cmdq_idx_per_pg(depth))) / + (bnxt_qplib_cmdqe_cnt_per_pg(depth)); } -static inline u32 get_cmdq_idx(u32 val) +static inline u32 get_cmdq_idx(u32 val, u32 depth) { - return val & MAX_CMDQ_IDX_PER_PG; + return val & (bnxt_qplib_max_cmdq_idx_per_pg(depth)); } /* Crsq buf is 1024-Byte */ @@ -194,11 +222,14 @@ struct bnxt_qplib_rcfw { struct bnxt_qplib_qp_node *qp_tbl; u64 oos_prev; u32 init_oos_stats; + u32 cmdq_depth; }; void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw); int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, - struct bnxt_qplib_rcfw *rcfw, int qp_tbl_sz); + struct bnxt_qplib_rcfw *rcfw, + struct bnxt_qplib_ctx *ctx, + int qp_tbl_sz); void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill); void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw); int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 2e5c052da5a9..1e80aa7bbcce 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -177,6 +177,7 @@ struct bnxt_qplib_ctx { struct bnxt_qplib_hwq tqm_tbl[MAX_TQM_ALLOC_REQ]; struct bnxt_qplib_stats stats; struct bnxt_qplib_vf_res vf_res; + u64 hwrm_intf_ver; }; struct bnxt_qplib_res { diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 5216b5f844cc..be03b5738f71 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -488,7 +488,8 @@ int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res, } /* AH */ -int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah) +int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, + bool block) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct cmdq_create_ah req; @@ -522,7 +523,7 @@ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah) req.dest_mac[2] = cpu_to_le16(temp16[2]); rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp, - NULL, 1); + NULL, block); if (rc) return rc; @@ -530,7 +531,8 @@ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah) return 0; } -int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah) +int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, + bool block) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct cmdq_destroy_ah req; @@ -544,7 +546,7 @@ int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah) req.ah_cid = cpu_to_le32(ah->id); rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp, - NULL, 1); + NULL, block); if (rc) return rc; return 0; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index 8079d7f5a008..39454b3f738d 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -241,8 +241,10 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res, struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_ctx *ctx); -int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah); -int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah); +int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, + bool block); +int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, + bool block); int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw); int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw, diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index dcb4bba522ba..df4f7a3f043d 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -291,13 +291,12 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain, if (!wq->sq) goto err3; - wq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), + wq->queue = dma_zalloc_coherent(&(rdev_p->rnic_info.pdev->dev), depth * sizeof(union t3_wr), &(wq->dma_addr), GFP_KERNEL); if (!wq->queue) goto err4; - memset(wq->queue, 0, depth * sizeof(union t3_wr)); dma_unmap_addr_set(wq, mapping, wq->dma_addr); wq->doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr; if (!kernel_domain) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index ebbec02cebe0..b34b1a1bd94b 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -836,7 +836,7 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, * Kernel users need more wq space for fastreg WRs which can take * 2 WR fragments. */ - ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL; + ucontext = udata ? to_iwch_ucontext(pd->uobject->context) : NULL; if (!ucontext && wqsize < (rqsize + (2 * sqsize))) wqsize = roundup_pow_of_two(rqsize + roundup_pow_of_two(attrs->cap.max_send_wr * 2)); @@ -1317,6 +1317,39 @@ static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str) snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", info.fw_version); } +static const struct ib_device_ops iwch_dev_ops = { + .alloc_hw_stats = iwch_alloc_stats, + .alloc_mr = iwch_alloc_mr, + .alloc_mw = iwch_alloc_mw, + .alloc_pd = iwch_allocate_pd, + .alloc_ucontext = iwch_alloc_ucontext, + .create_cq = iwch_create_cq, + .create_qp = iwch_create_qp, + .dealloc_mw = iwch_dealloc_mw, + .dealloc_pd = iwch_deallocate_pd, + .dealloc_ucontext = iwch_dealloc_ucontext, + .dereg_mr = iwch_dereg_mr, + .destroy_cq = iwch_destroy_cq, + .destroy_qp = iwch_destroy_qp, + .get_dev_fw_str = get_dev_fw_ver_str, + .get_dma_mr = iwch_get_dma_mr, + .get_hw_stats = iwch_get_mib, + .get_port_immutable = iwch_port_immutable, + .map_mr_sg = iwch_map_mr_sg, + .mmap = iwch_mmap, + .modify_qp = iwch_ib_modify_qp, + .poll_cq = iwch_poll_cq, + .post_recv = iwch_post_receive, + .post_send = iwch_post_send, + .query_device = iwch_query_device, + .query_gid = iwch_query_gid, + .query_pkey = iwch_query_pkey, + .query_port = iwch_query_port, + .reg_user_mr = iwch_reg_user_mr, + .req_notify_cq = iwch_arm_cq, + .resize_cq = iwch_resize_cq, +}; + int iwch_register_device(struct iwch_dev *dev) { int ret; @@ -1356,37 +1389,7 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports; dev->ibdev.num_comp_vectors = 1; dev->ibdev.dev.parent = &dev->rdev.rnic_info.pdev->dev; - dev->ibdev.query_device = iwch_query_device; - dev->ibdev.query_port = iwch_query_port; - dev->ibdev.query_pkey = iwch_query_pkey; - dev->ibdev.query_gid = iwch_query_gid; - dev->ibdev.alloc_ucontext = iwch_alloc_ucontext; - dev->ibdev.dealloc_ucontext = iwch_dealloc_ucontext; - dev->ibdev.mmap = iwch_mmap; - dev->ibdev.alloc_pd = iwch_allocate_pd; - dev->ibdev.dealloc_pd = iwch_deallocate_pd; - dev->ibdev.create_qp = iwch_create_qp; - dev->ibdev.modify_qp = iwch_ib_modify_qp; - dev->ibdev.destroy_qp = iwch_destroy_qp; - dev->ibdev.create_cq = iwch_create_cq; - dev->ibdev.destroy_cq = iwch_destroy_cq; - dev->ibdev.resize_cq = iwch_resize_cq; - dev->ibdev.poll_cq = iwch_poll_cq; - dev->ibdev.get_dma_mr = iwch_get_dma_mr; - dev->ibdev.reg_user_mr = iwch_reg_user_mr; - dev->ibdev.dereg_mr = iwch_dereg_mr; - dev->ibdev.alloc_mw = iwch_alloc_mw; - dev->ibdev.dealloc_mw = iwch_dealloc_mw; - dev->ibdev.alloc_mr = iwch_alloc_mr; - dev->ibdev.map_mr_sg = iwch_map_mr_sg; - dev->ibdev.req_notify_cq = iwch_arm_cq; - dev->ibdev.post_send = iwch_post_send; - dev->ibdev.post_recv = iwch_post_receive; - dev->ibdev.alloc_hw_stats = iwch_alloc_stats; - dev->ibdev.get_hw_stats = iwch_get_mib; dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION; - dev->ibdev.get_port_immutable = iwch_port_immutable; - dev->ibdev.get_dev_fw_str = get_dev_fw_ver_str; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) @@ -1405,6 +1408,7 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.driver_id = RDMA_DRIVER_CXGB3; rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group); + ib_set_device_ops(&dev->ibdev, &iwch_dev_ops); ret = ib_register_device(&dev->ibdev, "cxgb3_%d", NULL); if (ret) kfree(dev->ibdev.iwcm); diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 97ecc8c684f5..8221813219e5 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2793,7 +2793,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) break; case MPA_REQ_SENT: (void)stop_ep_timer(ep); - if (mpa_rev == 1 || (mpa_rev == 2 && ep->tried_with_mpa_v1)) + if (status != CPL_ERR_CONN_RESET || mpa_rev == 1 || + (mpa_rev == 2 && ep->tried_with_mpa_v1)) connect_reply_upcall(ep, -ECONNRESET); else { /* diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index cbb3c0ddd990..586b0c37481f 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -531,6 +531,44 @@ static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res) c4iw_restrack_funcs[res->type](msg, res) : 0; } +static const struct ib_device_ops c4iw_dev_ops = { + .alloc_hw_stats = c4iw_alloc_stats, + .alloc_mr = c4iw_alloc_mr, + .alloc_mw = c4iw_alloc_mw, + .alloc_pd = c4iw_allocate_pd, + .alloc_ucontext = c4iw_alloc_ucontext, + .create_cq = c4iw_create_cq, + .create_qp = c4iw_create_qp, + .create_srq = c4iw_create_srq, + .dealloc_mw = c4iw_dealloc_mw, + .dealloc_pd = c4iw_deallocate_pd, + .dealloc_ucontext = c4iw_dealloc_ucontext, + .dereg_mr = c4iw_dereg_mr, + .destroy_cq = c4iw_destroy_cq, + .destroy_qp = c4iw_destroy_qp, + .destroy_srq = c4iw_destroy_srq, + .get_dev_fw_str = get_dev_fw_str, + .get_dma_mr = c4iw_get_dma_mr, + .get_hw_stats = c4iw_get_mib, + .get_netdev = get_netdev, + .get_port_immutable = c4iw_port_immutable, + .map_mr_sg = c4iw_map_mr_sg, + .mmap = c4iw_mmap, + .modify_qp = c4iw_ib_modify_qp, + .modify_srq = c4iw_modify_srq, + .poll_cq = c4iw_poll_cq, + .post_recv = c4iw_post_receive, + .post_send = c4iw_post_send, + .post_srq_recv = c4iw_post_srq_recv, + .query_device = c4iw_query_device, + .query_gid = c4iw_query_gid, + .query_pkey = c4iw_query_pkey, + .query_port = c4iw_query_port, + .query_qp = c4iw_ib_query_qp, + .reg_user_mr = c4iw_reg_user_mr, + .req_notify_cq = c4iw_arm_cq, +}; + void c4iw_register_device(struct work_struct *work) { int ret; @@ -573,42 +611,7 @@ void c4iw_register_device(struct work_struct *work) dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports; dev->ibdev.num_comp_vectors = dev->rdev.lldi.nciq; dev->ibdev.dev.parent = &dev->rdev.lldi.pdev->dev; - dev->ibdev.query_device = c4iw_query_device; - dev->ibdev.query_port = c4iw_query_port; - dev->ibdev.query_pkey = c4iw_query_pkey; - dev->ibdev.query_gid = c4iw_query_gid; - dev->ibdev.alloc_ucontext = c4iw_alloc_ucontext; - dev->ibdev.dealloc_ucontext = c4iw_dealloc_ucontext; - dev->ibdev.mmap = c4iw_mmap; - dev->ibdev.alloc_pd = c4iw_allocate_pd; - dev->ibdev.dealloc_pd = c4iw_deallocate_pd; - dev->ibdev.create_qp = c4iw_create_qp; - dev->ibdev.modify_qp = c4iw_ib_modify_qp; - dev->ibdev.query_qp = c4iw_ib_query_qp; - dev->ibdev.destroy_qp = c4iw_destroy_qp; - dev->ibdev.create_srq = c4iw_create_srq; - dev->ibdev.modify_srq = c4iw_modify_srq; - dev->ibdev.destroy_srq = c4iw_destroy_srq; - dev->ibdev.create_cq = c4iw_create_cq; - dev->ibdev.destroy_cq = c4iw_destroy_cq; - dev->ibdev.poll_cq = c4iw_poll_cq; - dev->ibdev.get_dma_mr = c4iw_get_dma_mr; - dev->ibdev.reg_user_mr = c4iw_reg_user_mr; - dev->ibdev.dereg_mr = c4iw_dereg_mr; - dev->ibdev.alloc_mw = c4iw_alloc_mw; - dev->ibdev.dealloc_mw = c4iw_dealloc_mw; - dev->ibdev.alloc_mr = c4iw_alloc_mr; - dev->ibdev.map_mr_sg = c4iw_map_mr_sg; - dev->ibdev.req_notify_cq = c4iw_arm_cq; - dev->ibdev.post_send = c4iw_post_send; - dev->ibdev.post_recv = c4iw_post_receive; - dev->ibdev.post_srq_recv = c4iw_post_srq_recv; - dev->ibdev.alloc_hw_stats = c4iw_alloc_stats; - dev->ibdev.get_hw_stats = c4iw_get_mib; dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; - dev->ibdev.get_port_immutable = c4iw_port_immutable; - dev->ibdev.get_dev_fw_str = get_dev_fw_str; - dev->ibdev.get_netdev = get_netdev; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) { @@ -630,6 +633,7 @@ void c4iw_register_device(struct work_struct *work) rdma_set_device_sysfs_group(&dev->ibdev, &c4iw_attr_group); dev->ibdev.driver_id = RDMA_DRIVER_CXGB4; + ib_set_device_ops(&dev->ibdev, &c4iw_dev_ops); ret = ib_register_device(&dev->ibdev, "cxgb4_%d", NULL); if (ret) goto err_kfree_iwcm; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 13478f3b7057..981ff5cfb5d1 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2163,7 +2163,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, if (sqsize < 8) sqsize = 8; - ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL; + ucontext = udata ? to_c4iw_ucontext(pd->uobject->context) : NULL; qhp = kzalloc(sizeof(*qhp), GFP_KERNEL); if (!qhp) @@ -2564,13 +2564,12 @@ static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx, wq->rqt_abs_idx = (wq->rqt_hwaddr - rdev->lldi.vr->rq.start) >> T4_RQT_ENTRY_SHIFT; - wq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev, + wq->queue = dma_zalloc_coherent(&rdev->lldi.pdev->dev, wq->memsize, &wq->dma_addr, GFP_KERNEL); if (!wq->queue) goto err_free_rqtpool; - memset(wq->queue, 0, wq->memsize); dma_unmap_addr_set(wq, mapping, wq->dma_addr); wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, CXGB4_BAR2_QTYPE_EGRESS, @@ -2713,7 +2712,7 @@ struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs, rqsize = attrs->attr.max_wr + 1; rqsize = roundup_pow_of_two(max_t(u16, rqsize, 16)); - ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL; + ucontext = udata ? to_c4iw_ucontext(pd->uobject->context) : NULL; srq = kzalloc(sizeof(*srq), GFP_KERNEL); if (!srq) diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index ff790390c91a..3ce9dc8c3463 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -34,6 +34,7 @@ hfi1-y := \ ruc.o \ sdma.o \ sysfs.o \ + tid_rdma.o \ trace.o \ uc.o \ ud.o \ diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 7e6d70936c63..b443642eac02 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1072,6 +1072,8 @@ static void log_state_transition(struct hfi1_pportdata *ppd, u32 state); static void log_physical_state(struct hfi1_pportdata *ppd, u32 state); static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state, int msecs); +static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd, + int msecs); static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc); static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr); static void handle_temp_err(struct hfi1_devdata *dd); @@ -10770,13 +10772,15 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) break; ppd->port_error_action = 0; - ppd->host_link_state = HLS_DN_POLL; if (quick_linkup) { /* quick linkup does not go into polling */ ret = do_quick_linkup(dd); } else { ret1 = set_physical_link_state(dd, PLS_POLLING); + if (!ret1) + ret1 = wait_phys_link_out_of_offline(ppd, + 3000); if (ret1 != HCMD_SUCCESS) { dd_dev_err(dd, "Failed to transition to Polling link state, return 0x%x\n", @@ -10784,6 +10788,14 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) ret = -EINVAL; } } + + /* + * Change the host link state after requesting DC8051 to + * change its physical state so that we can ignore any + * interrupt with stale LNI(XX) error, which will not be + * cleared until DC8051 transitions to Polling state. + */ + ppd->host_link_state = HLS_DN_POLL; ppd->offline_disabled_reason = HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE); /* @@ -12928,6 +12940,39 @@ static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd, return read_state; } +/* + * wait_phys_link_out_of_offline - wait for any out of offline state + * @ppd: port device + * @msecs: the number of milliseconds to wait + * + * Wait up to msecs milliseconds for any out of offline physical link + * state change to occur. + * Returns 0 if at least one state is reached, otherwise -ETIMEDOUT. + */ +static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd, + int msecs) +{ + u32 read_state; + unsigned long timeout; + + timeout = jiffies + msecs_to_jiffies(msecs); + while (1) { + read_state = read_physical_state(ppd->dd); + if ((read_state & 0xF0) != PLS_OFFLINE) + break; + if (time_after(jiffies, timeout)) { + dd_dev_err(ppd->dd, + "timeout waiting for phy link out of offline. Read state 0x%x, %dms\n", + read_state, msecs); + return -ETIMEDOUT; + } + usleep_range(1950, 2050); /* sleep 2ms-ish */ + } + + log_state_transition(ppd, read_state); + return read_state; +} + #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \ (r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK) diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h index c6163a347e93..c0800ea5a3f8 100644 --- a/drivers/infiniband/hw/hfi1/chip_registers.h +++ b/drivers/infiniband/hw/hfi1/chip_registers.h @@ -935,6 +935,10 @@ #define SEND_CTXT_CREDIT_CTRL_THRESHOLD_MASK 0x7FFull #define SEND_CTXT_CREDIT_CTRL_THRESHOLD_SHIFT 0 #define SEND_CTXT_CREDIT_CTRL_THRESHOLD_SMASK 0x7FFull +#define SEND_CTXT_CREDIT_STATUS (TXE + 0x000000100018) +#define SEND_CTXT_CREDIT_STATUS_CURRENT_FREE_COUNTER_MASK 0x7FFull +#define SEND_CTXT_CREDIT_STATUS_CURRENT_FREE_COUNTER_SHIFT 32 +#define SEND_CTXT_CREDIT_STATUS_LAST_RETURNED_COUNTER_SMASK 0x7FFull #define SEND_CTXT_CREDIT_FORCE (TXE + 0x000000100028) #define SEND_CTXT_CREDIT_FORCE_FORCE_RETURN_SMASK 0x1ull #define SEND_CTXT_CREDIT_RETURN_ADDR (TXE + 0x000000100020) diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index 7108d4d92259..40d3cfb58bd1 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -136,18 +136,21 @@ HFI1_CAP_ALLOW_PERM_JKEY | \ HFI1_CAP_STATIC_RATE_CTRL | \ HFI1_CAP_PRINT_UNIMPL | \ - HFI1_CAP_TID_UNMAP) + HFI1_CAP_TID_UNMAP | \ + HFI1_CAP_OPFN) /* * A set of capability bits that are "global" and are not allowed to be * set in the user bitmask. */ #define HFI1_CAP_RESERVED_MASK ((HFI1_CAP_SDMA | \ - HFI1_CAP_USE_SDMA_HEAD | \ - HFI1_CAP_EXTENDED_PSN | \ - HFI1_CAP_PRINT_UNIMPL | \ - HFI1_CAP_NO_INTEGRITY | \ - HFI1_CAP_PKEY_CHECK) << \ - HFI1_CAP_USER_SHIFT) + HFI1_CAP_USE_SDMA_HEAD | \ + HFI1_CAP_EXTENDED_PSN | \ + HFI1_CAP_PRINT_UNIMPL | \ + HFI1_CAP_NO_INTEGRITY | \ + HFI1_CAP_PKEY_CHECK | \ + HFI1_CAP_TID_RDMA | \ + HFI1_CAP_OPFN) << \ + HFI1_CAP_USER_SHIFT) /* * Set of capabilities that need to be enabled for kernel context in * order to be allowed for user contexts, as well. diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 9f992ae36c89..0a557795563c 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -407,6 +407,54 @@ DEBUGFS_SEQ_FILE_OPS(rcds); DEBUGFS_SEQ_FILE_OPEN(rcds) DEBUGFS_FILE_OPS(rcds); +static void *_pios_seq_start(struct seq_file *s, loff_t *pos) +{ + struct hfi1_ibdev *ibd; + struct hfi1_devdata *dd; + + ibd = (struct hfi1_ibdev *)s->private; + dd = dd_from_dev(ibd); + if (!dd->send_contexts || *pos >= dd->num_send_contexts) + return NULL; + return pos; +} + +static void *_pios_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; + struct hfi1_devdata *dd = dd_from_dev(ibd); + + ++*pos; + if (!dd->send_contexts || *pos >= dd->num_send_contexts) + return NULL; + return pos; +} + +static void _pios_seq_stop(struct seq_file *s, void *v) +{ +} + +static int _pios_seq_show(struct seq_file *s, void *v) +{ + struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; + struct hfi1_devdata *dd = dd_from_dev(ibd); + struct send_context_info *sci; + loff_t *spos = v; + loff_t i = *spos; + unsigned long flags; + + spin_lock_irqsave(&dd->sc_lock, flags); + sci = &dd->send_contexts[i]; + if (sci && sci->type != SC_USER && sci->allocated && sci->sc) + seqfile_dump_sci(s, i, sci); + spin_unlock_irqrestore(&dd->sc_lock, flags); + return 0; +} + +DEBUGFS_SEQ_FILE_OPS(pios); +DEBUGFS_SEQ_FILE_OPEN(pios) +DEBUGFS_FILE_OPS(pios); + /* read the per-device counters */ static ssize_t dev_counters_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) @@ -1143,6 +1191,7 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) DEBUGFS_SEQ_FILE_CREATE(qp_stats, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(sdes, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(rcds, ibd->hfi1_ibdev_dbg, ibd); + DEBUGFS_SEQ_FILE_CREATE(pios, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(sdma_cpu_list, ibd->hfi1_ibdev_dbg, ibd); /* dev counter files */ for (i = 0; i < ARRAY_SIZE(cntr_ops); i++) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index a41f85558312..a8ad70730203 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -430,40 +430,60 @@ static const hfi1_handle_cnp hfi1_handle_cnp_tbl[2] = { [HFI1_PKT_TYPE_16B] = &return_cnp_16B }; -void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, - bool do_cnp) +/** + * hfi1_process_ecn_slowpath - Process FECN or BECN bits + * @qp: The packet's destination QP + * @pkt: The packet itself. + * @prescan: Is the caller the RXQ prescan + * + * Process the packet's FECN or BECN bits. By now, the packet + * has already been evaluated whether processing of those bit should + * be done. + * The significance of the @prescan argument is that if the caller + * is the RXQ prescan, a CNP will be send out instead of waiting for the + * normal packet processing to send an ACK with BECN set (or a CNP). + */ +bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, + bool prescan) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct ib_other_headers *ohdr = pkt->ohdr; struct ib_grh *grh = pkt->grh; - u32 rqpn = 0, bth1; + u32 rqpn = 0; u16 pkey; u32 rlid, slid, dlid = 0; - u8 hdr_type, sc, svc_type; - bool is_mcast = false; + u8 hdr_type, sc, svc_type, opcode; + bool is_mcast = false, ignore_fecn = false, do_cnp = false, + fecn, becn; /* can be called from prescan */ if (pkt->etype == RHF_RCV_TYPE_BYPASS) { - is_mcast = hfi1_is_16B_mcast(dlid); pkey = hfi1_16B_get_pkey(pkt->hdr); sc = hfi1_16B_get_sc(pkt->hdr); dlid = hfi1_16B_get_dlid(pkt->hdr); slid = hfi1_16B_get_slid(pkt->hdr); + is_mcast = hfi1_is_16B_mcast(dlid); + opcode = ib_bth_get_opcode(ohdr); hdr_type = HFI1_PKT_TYPE_16B; + fecn = hfi1_16B_get_fecn(pkt->hdr); + becn = hfi1_16B_get_becn(pkt->hdr); } else { - is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && - (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); pkey = ib_bth_get_pkey(ohdr); sc = hfi1_9B_get_sc5(pkt->hdr, pkt->rhf); - dlid = ib_get_dlid(pkt->hdr); + dlid = qp->ibqp.qp_type != IB_QPT_UD ? ib_get_dlid(pkt->hdr) : + ppd->lid; slid = ib_get_slid(pkt->hdr); + is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && + (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); + opcode = ib_bth_get_opcode(ohdr); hdr_type = HFI1_PKT_TYPE_9B; + fecn = ib_bth_get_fecn(ohdr); + becn = ib_bth_get_becn(ohdr); } switch (qp->ibqp.qp_type) { case IB_QPT_UD: - dlid = ppd->lid; rlid = slid; rqpn = ib_get_sqpn(pkt->ohdr); svc_type = IB_CC_SVCTYPE_UD; @@ -485,22 +505,31 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, svc_type = IB_CC_SVCTYPE_RC; break; default: - return; + return false; } - bth1 = be32_to_cpu(ohdr->bth[1]); + ignore_fecn = is_mcast || (opcode == IB_OPCODE_CNP) || + (opcode == IB_OPCODE_RC_ACKNOWLEDGE); + /* + * ACKNOWLEDGE packets do not get a CNP but this will be + * guarded by ignore_fecn above. + */ + do_cnp = prescan || + (opcode >= IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST && + opcode <= IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE); + /* Call appropriate CNP handler */ - if (do_cnp && (bth1 & IB_FECN_SMASK)) + if (!ignore_fecn && do_cnp && fecn) hfi1_handle_cnp_tbl[hdr_type](ibp, qp, rqpn, pkey, dlid, rlid, sc, grh); - if (!is_mcast && (bth1 & IB_BECN_SMASK)) { - u32 lqpn = bth1 & RVT_QPN_MASK; + if (becn) { + u32 lqpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; u8 sl = ibp->sc_to_sl[sc]; process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type); } - + return !ignore_fecn && fecn; } struct ps_mdata { @@ -599,7 +628,6 @@ static void __prescan_rxq(struct hfi1_packet *packet) struct rvt_dev_info *rdi = &rcd->dd->verbs_dev.rdi; u64 rhf = rhf_to_cpu(rhf_addr); u32 etype = rhf_rcv_type(rhf), qpn, bth1; - int is_ecn = 0; u8 lnh; if (ps_done(&mdata, rhf, rcd)) @@ -625,12 +653,10 @@ static void __prescan_rxq(struct hfi1_packet *packet) goto next; /* just in case */ } - bth1 = be32_to_cpu(packet->ohdr->bth[1]); - is_ecn = !!(bth1 & (IB_FECN_SMASK | IB_BECN_SMASK)); - - if (!is_ecn) + if (!hfi1_may_ecn(packet)) goto next; + bth1 = be32_to_cpu(packet->ohdr->bth[1]); qpn = bth1 & RVT_QPN_MASK; rcu_read_lock(); qp = rvt_lookup_qpn(rdi, &ibp->rvp, qpn); @@ -640,7 +666,7 @@ static void __prescan_rxq(struct hfi1_packet *packet) goto next; } - process_ecn(qp, packet, true); + hfi1_process_ecn_slowpath(qp, packet, true); rcu_read_unlock(); /* turn off BECN, FECN */ @@ -1400,7 +1426,7 @@ static int hfi1_bypass_ingress_pkt_check(struct hfi1_packet *packet) if ((!(hfi1_is_16B_mcast(packet->dlid))) && (packet->dlid != opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B))) { - if (packet->dlid != ppd->lid) + if ((packet->dlid & ~((1 << ppd->lmc) - 1)) != ppd->lid) return -EINVAL; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 2b882347d0c2..6db2276f5c13 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1804,13 +1804,20 @@ static inline struct hfi1_ibport *rcd_to_iport(struct hfi1_ctxtdata *rcd) return &rcd->ppd->ibport_data; } -void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, - bool do_cnp); -static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt, - bool do_cnp) +/** + * hfi1_may_ecn - Check whether FECN or BECN processing should be done + * @pkt: the packet to be evaluated + * + * Check whether the FECN or BECN bits in the packet's header are + * enabled, depending on packet type. + * + * This function only checks for FECN and BECN bits. Additional checks + * are done in the slowpath (hfi1_process_ecn_slowpath()) in order to + * ensure correct handling. + */ +static inline bool hfi1_may_ecn(struct hfi1_packet *pkt) { - bool becn; - bool fecn; + bool fecn, becn; if (pkt->etype == RHF_RCV_TYPE_BYPASS) { fecn = hfi1_16B_get_fecn(pkt->hdr); @@ -1819,10 +1826,18 @@ static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt, fecn = ib_bth_get_fecn(pkt->ohdr); becn = ib_bth_get_becn(pkt->ohdr); } - if (unlikely(fecn || becn)) { - hfi1_process_ecn_slowpath(qp, pkt, do_cnp); - return fecn; - } + return fecn || becn; +} + +bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, + bool prescan); +static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt) +{ + bool do_work; + + do_work = hfi1_may_ecn(pkt); + if (unlikely(do_work)) + return hfi1_process_ecn_slowpath(qp, pkt, false); return false; } diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 88a0cf930136..4228393e6c4c 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -305,7 +305,7 @@ static struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u32 dlid) rcu_read_lock(); qp0 = rcu_dereference(ibp->rvp.qp[0]); if (qp0) - ah = rdma_create_ah(qp0->ibqp.pd, &attr); + ah = rdma_create_ah(qp0->ibqp.pd, &attr, 0); rcu_read_unlock(); return ah; } diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 9ab50d2308dc..dd5a5c030066 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -742,6 +742,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, spin_lock_init(&sc->alloc_lock); spin_lock_init(&sc->release_lock); spin_lock_init(&sc->credit_ctrl_lock); + seqlock_init(&sc->waitlock); INIT_LIST_HEAD(&sc->piowait); INIT_WORK(&sc->halt_work, sc_halted); init_waitqueue_head(&sc->halt_wait); @@ -1593,7 +1594,6 @@ void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint) static void sc_piobufavail(struct send_context *sc) { struct hfi1_devdata *dd = sc->dd; - struct hfi1_ibdev *dev = &dd->verbs_dev; struct list_head *list; struct rvt_qp *qps[PIO_WAIT_BATCH_SIZE]; struct rvt_qp *qp; @@ -1612,7 +1612,7 @@ static void sc_piobufavail(struct send_context *sc) * could end up with QPs on the wait list with the interrupt * disabled. */ - write_seqlock_irqsave(&dev->iowait_lock, flags); + write_seqlock_irqsave(&sc->waitlock, flags); while (!list_empty(list)) { struct iowait *wait; @@ -1636,7 +1636,7 @@ static void sc_piobufavail(struct send_context *sc) if (!list_empty(list)) hfi1_sc_wantpiobuf_intr(sc, 1); } - write_sequnlock_irqrestore(&dev->iowait_lock, flags); + write_sequnlock_irqrestore(&sc->waitlock, flags); /* Wake up the most starved one first */ if (n) @@ -2137,3 +2137,28 @@ void free_credit_return(struct hfi1_devdata *dd) kfree(dd->cr_base); dd->cr_base = NULL; } + +void seqfile_dump_sci(struct seq_file *s, u32 i, + struct send_context_info *sci) +{ + struct send_context *sc = sci->sc; + u64 reg; + + seq_printf(s, "SCI %u: type %u base %u credits %u\n", + i, sci->type, sci->base, sci->credits); + seq_printf(s, " flags 0x%x sw_inx %u hw_ctxt %u grp %u\n", + sc->flags, sc->sw_index, sc->hw_context, sc->group); + seq_printf(s, " sr_size %u credits %u sr_head %u sr_tail %u\n", + sc->sr_size, sc->credits, sc->sr_head, sc->sr_tail); + seq_printf(s, " fill %lu free %lu fill_wrap %u alloc_free %lu\n", + sc->fill, sc->free, sc->fill_wrap, sc->alloc_free); + seq_printf(s, " credit_intr_count %u credit_ctrl 0x%llx\n", + sc->credit_intr_count, sc->credit_ctrl); + reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_STATUS)); + seq_printf(s, " *hw_free %llu CurrentFree %llu LastReturned %llu\n", + (le64_to_cpu(*sc->hw_free) & CR_COUNTER_SMASK) >> + CR_COUNTER_SHIFT, + (reg >> SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_SHIFT)) & + SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_MASK), + reg & SC(CREDIT_STATUS_LAST_RETURNED_COUNTER_SMASK)); +} diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index aaf372c3e5d6..c9a58b642bdd 100644 --- a/drivers/infiniband/hw/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h @@ -127,6 +127,8 @@ struct send_context { volatile __le64 *hw_free; /* HW free counter */ /* list for PIO waiters */ struct list_head piowait ____cacheline_aligned_in_smp; + seqlock_t waitlock; + spinlock_t credit_ctrl_lock ____cacheline_aligned_in_smp; u32 credit_intr_count; /* count of credit intr users */ u64 credit_ctrl; /* cache for credit control */ @@ -329,4 +331,7 @@ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc, void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes); void seg_pio_copy_end(struct pio_buf *pbuf); +void seqfile_dump_sci(struct seq_file *s, u32 i, + struct send_context_info *sci); + #endif /* _PIO_H */ diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 1a016248039f..5344e8993b28 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -375,20 +375,18 @@ bool _hfi1_schedule_send(struct rvt_qp *qp) static void qp_pio_drain(struct rvt_qp *qp) { - struct hfi1_ibdev *dev; struct hfi1_qp_priv *priv = qp->priv; if (!priv->s_sendcontext) return; - dev = to_idev(qp->ibqp.device); while (iowait_pio_pending(&priv->s_iowait)) { - write_seqlock_irq(&dev->iowait_lock); + write_seqlock_irq(&priv->s_sendcontext->waitlock); hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1); - write_sequnlock_irq(&dev->iowait_lock); + write_sequnlock_irq(&priv->s_sendcontext->waitlock); iowait_pio_drain(&priv->s_iowait); - write_seqlock_irq(&dev->iowait_lock); + write_seqlock_irq(&priv->s_sendcontext->waitlock); hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0); - write_sequnlock_irq(&dev->iowait_lock); + write_sequnlock_irq(&priv->s_sendcontext->waitlock); } } @@ -459,7 +457,6 @@ static int iowait_sleep( struct hfi1_qp_priv *priv; unsigned long flags; int ret = 0; - struct hfi1_ibdev *dev; qp = tx->qp; priv = qp->priv; @@ -472,9 +469,8 @@ static int iowait_sleep( * buffer and undoing the side effects of the copy. */ /* Make a common routine? */ - dev = &sde->dd->verbs_dev; list_add_tail(&stx->list, &wait->tx_head); - write_seqlock(&dev->iowait_lock); + write_seqlock(&sde->waitlock); if (sdma_progress(sde, seq, stx)) goto eagain; if (list_empty(&priv->s_iowait.list)) { @@ -485,11 +481,11 @@ static int iowait_sleep( qp->s_flags |= RVT_S_WAIT_DMA_DESC; iowait_queue(pkts_sent, &priv->s_iowait, &sde->dmawait); - priv->s_iowait.lock = &dev->iowait_lock; + priv->s_iowait.lock = &sde->waitlock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC); rvt_get_qp(qp); } - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sde->waitlock); hfi1_qp_unbusy(qp, wait); spin_unlock_irqrestore(&qp->s_lock, flags); ret = -EBUSY; @@ -499,7 +495,7 @@ static int iowait_sleep( } return ret; eagain: - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sde->waitlock); spin_unlock_irqrestore(&qp->s_lock, flags); list_del_init(&stx->list); return -EAGAIN; diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 188aa4f686a0..be603f35d7e4 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1157,6 +1157,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 && cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) break; + rvt_qp_wqe_unreserve(qp, wqe); s_last = qp->s_last; trace_hfi1_qp_send_completion(qp, wqe, s_last); if (++s_last >= qp->s_size) @@ -1209,6 +1210,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, u32 s_last; rvt_put_swqe(wqe); + rvt_qp_wqe_unreserve(qp, wqe); s_last = qp->s_last; trace_hfi1_qp_send_completion(qp, wqe, s_last); if (++s_last >= qp->s_size) @@ -2049,8 +2051,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) struct ib_reth *reth; unsigned long flags; int ret; - bool is_fecn = false; - bool copy_last = false; + bool copy_last = false, fecn; u32 rkey; u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2); @@ -2059,7 +2060,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) if (hfi1_ruc_check_hdr(ibp, packet)) return; - is_fecn = process_ecn(qp, packet, false); + fecn = process_ecn(qp, packet); /* * Process responses (ACKs) before anything else. Note that the @@ -2070,8 +2071,6 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && opcode <= OP(ATOMIC_ACKNOWLEDGE)) { rc_rcv_resp(packet); - if (is_fecn) - goto send_ack; return; } @@ -2347,11 +2346,11 @@ send_last: /* Schedule the send engine. */ qp->s_flags |= RVT_S_RESP_PENDING; + if (fecn) + qp->s_flags |= RVT_S_ECN; hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); - if (is_fecn) - goto send_ack; return; } @@ -2413,11 +2412,11 @@ send_last: /* Schedule the send engine. */ qp->s_flags |= RVT_S_RESP_PENDING; + if (fecn) + qp->s_flags |= RVT_S_ECN; hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); - if (is_fecn) - goto send_ack; return; } @@ -2430,16 +2429,9 @@ send_last: qp->r_ack_psn = psn; qp->r_nak_state = 0; /* Send an ACK if requested or required. */ - if (psn & IB_BTH_REQ_ACK) { - if (packet->numpkt == 0) { - rc_cancel_ack(qp); - goto send_ack; - } - if (qp->r_adefered >= HFI1_PSN_CREDIT) { - rc_cancel_ack(qp); - goto send_ack; - } - if (unlikely(is_fecn)) { + if (psn & IB_BTH_REQ_ACK || fecn) { + if (packet->numpkt == 0 || fecn || + qp->r_adefered >= HFI1_PSN_CREDIT) { rc_cancel_ack(qp); goto send_ack; } @@ -2480,7 +2472,7 @@ nack_acc: qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; qp->r_ack_psn = qp->r_psn; send_ack: - hfi1_send_rc_ack(packet, is_fecn); + hfi1_send_rc_ack(packet, fecn); } void hfi1_rc_hdrerr( diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 891d2386d1ca..b84356e1a4c1 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1424,6 +1424,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) seqlock_init(&sde->head_lock); spin_lock_init(&sde->senddmactrl_lock); spin_lock_init(&sde->flushlist_lock); + seqlock_init(&sde->waitlock); /* insure there is always a zero bit */ sde->ahg_bits = 0xfffffffe00000000ULL; @@ -1758,7 +1759,6 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail) struct iowait *wait, *nw; struct iowait *waits[SDMA_WAIT_BATCH_SIZE]; uint i, n = 0, seq, max_idx = 0; - struct hfi1_ibdev *dev = &sde->dd->verbs_dev; u8 max_starved_cnt = 0; #ifdef CONFIG_SDMA_VERBOSITY @@ -1768,10 +1768,10 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail) #endif do { - seq = read_seqbegin(&dev->iowait_lock); + seq = read_seqbegin(&sde->waitlock); if (!list_empty(&sde->dmawait)) { /* at least one item */ - write_seqlock(&dev->iowait_lock); + write_seqlock(&sde->waitlock); /* Harvest waiters wanting DMA descriptors */ list_for_each_entry_safe( wait, @@ -1794,10 +1794,10 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail) list_del_init(&wait->list); waits[n++] = wait; } - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sde->waitlock); break; } - } while (read_seqretry(&dev->iowait_lock, seq)); + } while (read_seqretry(&sde->waitlock, seq)); /* Schedule the most starved one first */ if (n) diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 6dc63d7c5685..1e2e40f79cb2 100644 --- a/drivers/infiniband/hw/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h @@ -382,6 +382,7 @@ struct sdma_engine { u64 progress_int_cnt; /* private: */ + seqlock_t waitlock; struct list_head dmawait; /* CONFIG SDMA for now, just blindly duplicate */ diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c new file mode 100644 index 000000000000..da1ecb68a928 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +/* + * Copyright(c) 2018 Intel Corporation. + * + */ + +#include "hfi.h" +#include "verbs.h" +#include "tid_rdma.h" + +/** + * qp_to_rcd - determine the receive context used by a qp + * @qp - the qp + * + * This routine returns the receive context associated + * with a a qp's qpn. + * + * Returns the context. + */ +static struct hfi1_ctxtdata *qp_to_rcd(struct rvt_dev_info *rdi, + struct rvt_qp *qp) +{ + struct hfi1_ibdev *verbs_dev = container_of(rdi, + struct hfi1_ibdev, + rdi); + struct hfi1_devdata *dd = container_of(verbs_dev, + struct hfi1_devdata, + verbs_dev); + unsigned int ctxt; + + if (qp->ibqp.qp_num == 0) + ctxt = 0; + else + ctxt = ((qp->ibqp.qp_num >> dd->qos_shift) % + (dd->n_krcv_queues - 1)) + 1; + + return dd->rcd[ctxt]; +} + +int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp, + struct ib_qp_init_attr *init_attr) +{ + struct hfi1_qp_priv *qpriv = qp->priv; + + qpriv->rcd = qp_to_rcd(rdi, qp); + + return 0; +} diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h new file mode 100644 index 000000000000..6fcd3adcdcc3 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/tid_rdma.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +/* + * Copyright(c) 2018 Intel Corporation. + * + */ +#ifndef HFI1_TID_RDMA_H +#define HFI1_TID_RDMA_H + +int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp, + struct ib_qp_init_attr *init_attr); + +#endif /* HFI1_TID_RDMA_H */ + diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 6aca0c5a7f97..6ba47037c424 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -321,7 +321,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet) if (hfi1_ruc_check_hdr(ibp, packet)) return; - process_ecn(qp, packet, true); + process_ecn(qp, packet); psn = ib_bth_get_psn(ohdr); /* Compare the PSN verses the expected PSN. */ diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 4baa8f4d49de..88242fe95eaa 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -51,6 +51,7 @@ #include "hfi.h" #include "mad.h" #include "verbs_txreq.h" +#include "trace_ibhdrs.h" #include "qp.h" /* We support only two types - 9B and 16B for now */ @@ -656,18 +657,19 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 bth0, plen, vl, hwords = 7; u16 len; u8 l4; - struct hfi1_16b_header hdr; + struct hfi1_opa_header hdr; struct ib_other_headers *ohdr; struct pio_buf *pbuf; struct send_context *ctxt = qp_to_send_context(qp, sc5); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); u32 nwords; + hdr.hdr_type = HFI1_PKT_TYPE_16B; /* Populate length */ nwords = ((hfi1_get_16b_padding(hwords << 2, 0) + SIZE_OF_LT) >> 2) + SIZE_OF_CRC; if (old_grh) { - struct ib_grh *grh = &hdr.u.l.grh; + struct ib_grh *grh = &hdr.opah.u.l.grh; grh->version_tclass_flow = old_grh->version_tclass_flow; grh->paylen = cpu_to_be16( @@ -675,11 +677,11 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, grh->hop_limit = 0xff; grh->sgid = old_grh->dgid; grh->dgid = old_grh->sgid; - ohdr = &hdr.u.l.oth; + ohdr = &hdr.opah.u.l.oth; l4 = OPA_16B_L4_IB_GLOBAL; hwords += sizeof(struct ib_grh) / sizeof(u32); } else { - ohdr = &hdr.u.oth; + ohdr = &hdr.opah.u.oth; l4 = OPA_16B_L4_IB_LOCAL; } @@ -693,7 +695,7 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, /* Convert dwords to flits */ len = (hwords + nwords) >> 1; - hfi1_make_16b_hdr(&hdr, slid, dlid, len, pkey, 1, 0, l4, sc5); + hfi1_make_16b_hdr(&hdr.opah, slid, dlid, len, pkey, 1, 0, l4, sc5); plen = 2 /* PBC */ + hwords + nwords; pbc_flags |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC; @@ -701,9 +703,11 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); if (ctxt) { pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL); - if (pbuf) + if (pbuf) { + trace_pio_output_ibhdr(ppd->dd, &hdr, sc5); ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords); + } } } @@ -715,14 +719,15 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, u32 bth0, plen, vl, hwords = 5; u16 lrh0; u8 sl = ibp->sc_to_sl[sc5]; - struct ib_header hdr; + struct hfi1_opa_header hdr; struct ib_other_headers *ohdr; struct pio_buf *pbuf; struct send_context *ctxt = qp_to_send_context(qp, sc5); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + hdr.hdr_type = HFI1_PKT_TYPE_9B; if (old_grh) { - struct ib_grh *grh = &hdr.u.l.grh; + struct ib_grh *grh = &hdr.ibh.u.l.grh; grh->version_tclass_flow = old_grh->version_tclass_flow; grh->paylen = cpu_to_be16( @@ -730,11 +735,11 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, grh->hop_limit = 0xff; grh->sgid = old_grh->dgid; grh->dgid = old_grh->sgid; - ohdr = &hdr.u.l.oth; + ohdr = &hdr.ibh.u.l.oth; lrh0 = HFI1_LRH_GRH; hwords += sizeof(struct ib_grh) / sizeof(u32); } else { - ohdr = &hdr.u.oth; + ohdr = &hdr.ibh.u.oth; lrh0 = HFI1_LRH_BTH; } @@ -746,16 +751,18 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << IB_BECN_SHIFT)); ohdr->bth[2] = 0; /* PSN 0 */ - hfi1_make_ib_hdr(&hdr, lrh0, hwords + SIZE_OF_CRC, dlid, slid); + hfi1_make_ib_hdr(&hdr.ibh, lrh0, hwords + SIZE_OF_CRC, dlid, slid); plen = 2 /* PBC */ + hwords; pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); vl = sc_to_vlt(ppd->dd, sc5); pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); if (ctxt) { pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL); - if (pbuf) + if (pbuf) { + trace_pio_output_ibhdr(ppd->dd, &hdr, sc5); ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords); + } } } @@ -912,7 +919,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) src_qp = hfi1_16B_get_src_qpn(packet->mgmt); } - process_ecn(qp, packet, (opcode != IB_OPCODE_CNP)); + process_ecn(qp, packet); /* * Get the number of bytes the message was padded by * and drop incomplete packets. diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 3f0aadccd9f6..e5e7fad09f32 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -130,7 +130,6 @@ static int defer_packet_queue( { struct hfi1_user_sdma_pkt_q *pq = container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy); - struct hfi1_ibdev *dev = &pq->dd->verbs_dev; struct user_sdma_txreq *tx = container_of(txreq, struct user_sdma_txreq, txreq); @@ -144,10 +143,10 @@ static int defer_packet_queue( * it is supposed to be enqueued. */ xchg(&pq->state, SDMA_PKT_Q_DEFERRED); - write_seqlock(&dev->iowait_lock); + write_seqlock(&sde->waitlock); if (list_empty(&pq->busy.list)) iowait_queue(pkts_sent, &pq->busy, &sde->dmawait); - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sde->waitlock); return -EBUSY; eagain: return -EAGAIN; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index a365089a9305..ec582d86025f 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -765,7 +765,6 @@ static int pio_wait(struct rvt_qp *qp, { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_devdata *dd = sc->dd; - struct hfi1_ibdev *dev = &dd->verbs_dev; unsigned long flags; int ret = 0; @@ -777,7 +776,7 @@ static int pio_wait(struct rvt_qp *qp, */ spin_lock_irqsave(&qp->s_lock, flags); if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { - write_seqlock(&dev->iowait_lock); + write_seqlock(&sc->waitlock); list_add_tail(&ps->s_txreq->txreq.list, &ps->wait->tx_head); if (list_empty(&priv->s_iowait.list)) { @@ -790,14 +789,14 @@ static int pio_wait(struct rvt_qp *qp, was_empty = list_empty(&sc->piowait); iowait_queue(ps->pkts_sent, &priv->s_iowait, &sc->piowait); - priv->s_iowait.lock = &dev->iowait_lock; + priv->s_iowait.lock = &sc->waitlock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO); rvt_get_qp(qp); /* counting: only call wantpiobuf_intr if first user */ if (was_empty) hfi1_sc_wantpiobuf_intr(sc, 1); } - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sc->waitlock); hfi1_qp_unbusy(qp, ps->wait); ret = -EBUSY; } @@ -919,6 +918,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, if (slen > len) slen = len; + if (slen > ss->sge.sge_length) + slen = ss->sge.sge_length; rvt_update_sge(ss, slen, false); seg_pio_copy_mid(pbuf, addr, slen); len -= slen; @@ -1616,6 +1617,16 @@ static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, return count; } +static const struct ib_device_ops hfi1_dev_ops = { + .alloc_hw_stats = alloc_hw_stats, + .alloc_rdma_netdev = hfi1_vnic_alloc_rn, + .get_dev_fw_str = hfi1_get_dev_fw_str, + .get_hw_stats = get_hw_stats, + .modify_device = modify_device, + /* keep process mad in the driver */ + .process_mad = hfi1_process_mad, +}; + /** * hfi1_register_ib_device - register our device with the infiniband core * @dd: the device data structure @@ -1659,14 +1670,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->owner = THIS_MODULE; ibdev->phys_port_cnt = dd->num_pports; ibdev->dev.parent = &dd->pcidev->dev; - ibdev->modify_device = modify_device; - ibdev->alloc_hw_stats = alloc_hw_stats; - ibdev->get_hw_stats = get_hw_stats; - ibdev->alloc_rdma_netdev = hfi1_vnic_alloc_rn; - /* keep process mad in the driver */ - ibdev->process_mad = hfi1_process_mad; - ibdev->get_dev_fw_str = hfi1_get_dev_fw_str; + ib_set_device_ops(ibdev, &hfi1_dev_ops); strlcpy(ibdev->node_desc, init_utsname()->nodename, sizeof(ibdev->node_desc)); @@ -1704,6 +1709,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.dparms.max_mad_size = OPA_MGMT_MAD_SIZE; dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc; + dd->verbs_dev.rdi.driver_f.qp_priv_init = hfi1_qp_priv_init; dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free; dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps; dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset; diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 64c9054db5f3..1ad0b14bdb3c 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -71,6 +71,7 @@ struct hfi1_devdata; struct hfi1_packet; #include "iowait.h" +#include "tid_rdma.h" #define HFI1_MAX_RDMA_ATOMIC 16 @@ -156,6 +157,7 @@ struct hfi1_qp_priv { struct hfi1_ahg_info *s_ahg; /* ahg info for next header */ struct sdma_engine *s_sde; /* current sde */ struct send_context *s_sendcontext; /* current sendcontext */ + struct hfi1_ctxtdata *rcd; /* QP's receive context */ u8 s_sc; /* SC[0..4] for next packet */ struct iowait s_iowait; struct rvt_qp *owner; diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index c9876d9e3cb9..a922db58be14 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -816,14 +816,14 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo); netdev = alloc_netdev_mqs(size, name, name_assign_type, setup, - chip_sdma_engines(dd), dd->num_vnic_contexts); + dd->num_sdma, dd->num_vnic_contexts); if (!netdev) return ERR_PTR(-ENOMEM); rn = netdev_priv(netdev); vinfo = opa_vnic_dev_priv(netdev); vinfo->dd = dd; - vinfo->num_tx_q = chip_sdma_engines(dd); + vinfo->num_tx_q = dd->num_sdma; vinfo->num_rx_q = dd->num_vnic_contexts; vinfo->netdev = netdev; rn->free_rdma_netdev = hfi1_vnic_free_rn; diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c index 97bd940a056a..1f81c480e028 100644 --- a/drivers/infiniband/hw/hfi1/vnic_sdma.c +++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c @@ -57,7 +57,6 @@ #define HFI1_VNIC_TXREQ_NAME_LEN 32 #define HFI1_VNIC_SDMA_DESC_WTRMRK 64 -#define HFI1_VNIC_SDMA_RETRY_COUNT 1 /* * struct vnic_txreq - VNIC transmit descriptor @@ -67,7 +66,6 @@ * @pad: pad buffer * @plen: pad length * @pbc_val: pbc value - * @retry_count: tx retry count */ struct vnic_txreq { struct sdma_txreq txreq; @@ -77,8 +75,6 @@ struct vnic_txreq { unsigned char pad[HFI1_VNIC_MAX_PAD]; u16 plen; __le64 pbc_val; - - u32 retry_count; }; static void vnic_sdma_complete(struct sdma_txreq *txreq, @@ -196,7 +192,6 @@ int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx, ret = build_vnic_tx_desc(sde, tx, pbc); if (unlikely(ret)) goto free_desc; - tx->retry_count = 0; ret = sdma_send_txreq(sde, iowait_get_ib_work(&vnic_sdma->wait), &tx->txreq, vnic_sdma->pkts_sent); @@ -237,18 +232,17 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde, { struct hfi1_vnic_sdma *vnic_sdma = container_of(wait->iow, struct hfi1_vnic_sdma, wait); - struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev; - struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq); - if (sdma_progress(sde, seq, txreq)) - if (tx->retry_count++ < HFI1_VNIC_SDMA_RETRY_COUNT) - return -EAGAIN; + write_seqlock(&sde->waitlock); + if (sdma_progress(sde, seq, txreq)) { + write_sequnlock(&sde->waitlock); + return -EAGAIN; + } vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED; - write_seqlock(&dev->iowait_lock); if (list_empty(&vnic_sdma->wait.list)) iowait_queue(pkts_sent, wait->iow, &sde->dmawait); - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sde->waitlock); return -EBUSY; } diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile index cf03404b9d58..004c88b32e13 100644 --- a/drivers/infiniband/hw/hns/Makefile +++ b/drivers/infiniband/hw/hns/Makefile @@ -7,7 +7,7 @@ ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3 obj-$(CONFIG_INFINIBAND_HNS) += hns-roce.o hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ - hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o + hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o hns-roce-hw-v1-objs := hns_roce_hw_v1.o obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 9990dc9eb96a..b3c8c45ec1e3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -41,6 +41,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device); @@ -110,7 +111,7 @@ int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } -int hns_roce_destroy_ah(struct ib_ah *ah) +int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags) { kfree(to_hr_ah(ah)); diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 46f65f9f59d0..6300033a448f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -239,6 +239,8 @@ err_free: void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) + hns_roce_cleanup_srq_table(hr_dev); hns_roce_cleanup_qp_table(hr_dev); hns_roce_cleanup_cq_table(hr_dev); hns_roce_cleanup_mr_table(hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h index 9549ae51a0dd..927701df5eff 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.h +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -120,6 +120,10 @@ enum { HNS_ROCE_CMD_SQD2RTS_QP = 0x20, HNS_ROCE_CMD_2RST_QP = 0x21, HNS_ROCE_CMD_QUERY_QP = 0x22, + HNS_ROCE_CMD_SW2HW_SRQ = 0x70, + HNS_ROCE_CMD_MODIFY_SRQC = 0x72, + HNS_ROCE_CMD_QUERY_SRQC = 0x73, + HNS_ROCE_CMD_HW2SW_SRQ = 0x74, }; int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index 93d4b4ec002d..f4c92a7ac1ce 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -376,9 +376,6 @@ #define ROCEE_RX_CMQ_TAIL_REG 0x07024 #define ROCEE_RX_CMQ_HEAD_REG 0x07028 -#define ROCEE_VF_MB_CFG0_REG 0x40 -#define ROCEE_VF_MB_STATUS_REG 0x58 - #define ROCEE_VF_EQ_DB_CFG0_REG 0x238 #define ROCEE_VF_EQ_DB_CFG1_REG 0x23C diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index d39bdfdb5de9..509e467843f6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -111,6 +111,9 @@ #define PAGES_SHIFT_24 24 #define PAGES_SHIFT_32 32 +#define HNS_ROCE_IDX_QUE_ENTRY_SZ 4 +#define SRQ_DB_REG 0x230 + enum { HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0, HNS_ROCE_SUPPORT_SQ_RECORD_DB = 1 << 1, @@ -196,6 +199,7 @@ enum { HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2), HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3), HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4), + HNS_ROCE_CAP_FLAG_SRQ = BIT(5), HNS_ROCE_CAP_FLAG_MW = BIT(7), HNS_ROCE_CAP_FLAG_FRMR = BIT(8), HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10), @@ -204,6 +208,8 @@ enum { enum hns_roce_mtt_type { MTT_TYPE_WQE, MTT_TYPE_CQE, + MTT_TYPE_SRQWQE, + MTT_TYPE_IDX }; enum { @@ -339,6 +345,10 @@ struct hns_roce_mr_table { struct hns_roce_hem_table mtpt_table; struct hns_roce_buddy mtt_cqe_buddy; struct hns_roce_hem_table mtt_cqe_table; + struct hns_roce_buddy mtt_srqwqe_buddy; + struct hns_roce_hem_table mtt_srqwqe_table; + struct hns_roce_buddy mtt_idx_buddy; + struct hns_roce_hem_table mtt_idx_table; }; struct hns_roce_wq { @@ -429,9 +439,37 @@ struct hns_roce_cq { struct completion free; }; +struct hns_roce_idx_que { + struct hns_roce_buf idx_buf; + int entry_sz; + u32 buf_size; + struct ib_umem *umem; + struct hns_roce_mtt mtt; + u64 *bitmap; +}; + struct hns_roce_srq { struct ib_srq ibsrq; - int srqn; + void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event); + unsigned long srqn; + int max; + int max_gs; + int wqe_shift; + void __iomem *db_reg_l; + + atomic_t refcount; + struct completion free; + + struct hns_roce_buf buf; + u64 *wrid; + struct ib_umem *umem; + struct hns_roce_mtt mtt; + struct hns_roce_idx_que idx_que; + spinlock_t lock; + int head; + int tail; + u16 wqe_ctr; + struct mutex mutex; }; struct hns_roce_uar_table { @@ -453,6 +491,12 @@ struct hns_roce_cq_table { struct hns_roce_hem_table table; }; +struct hns_roce_srq_table { + struct hns_roce_bitmap bitmap; + struct xarray xa; + struct hns_roce_hem_table table; +}; + struct hns_roce_raq_table { struct hns_roce_buf_list *e_raq_buf; }; @@ -603,6 +647,12 @@ struct hns_roce_aeqe { } qp_event; struct { + __le32 srq; + u32 rsv0; + u32 rsv1; + } srq_event; + + struct { __le32 cq; u32 rsv0; u32 rsv1; @@ -679,7 +729,12 @@ struct hns_roce_caps { u32 max_extend_sg; int num_qps; /* 256k */ int reserved_qps; + u32 max_srq_sg; + int num_srqs; u32 max_wqes; /* 16k */ + u32 max_srqs; + u32 max_srq_wrs; + u32 max_srq_sges; u32 max_sq_desc_sz; /* 64 */ u32 max_rq_desc_sz; /* 64 */ u32 max_srq_desc_sz; @@ -690,12 +745,16 @@ struct hns_roce_caps { int min_cqes; u32 min_wqes; int reserved_cqs; + int reserved_srqs; + u32 max_srqwqes; int num_aeq_vectors; /* 1 */ int num_comp_vectors; int num_other_vectors; int num_mtpts; u32 num_mtt_segs; u32 num_cqe_segs; + u32 num_srqwqe_segs; + u32 num_idx_segs; int reserved_mrws; int reserved_uars; int num_pds; @@ -709,6 +768,8 @@ struct hns_roce_caps { int irrl_entry_sz; int trrl_entry_sz; int cqc_entry_sz; + int srqc_entry_sz; + int idx_entry_sz; u32 pbl_ba_pg_sz; u32 pbl_buf_pg_sz; u32 pbl_hop_num; @@ -737,6 +798,12 @@ struct hns_roce_caps { u32 cqe_ba_pg_sz; u32 cqe_buf_pg_sz; u32 cqe_hop_num; + u32 srqwqe_ba_pg_sz; + u32 srqwqe_buf_pg_sz; + u32 srqwqe_hop_num; + u32 idx_ba_pg_sz; + u32 idx_buf_pg_sz; + u32 idx_hop_num; u32 eqe_ba_pg_sz; u32 eqe_buf_pg_sz; u32 eqe_hop_num; @@ -805,6 +872,19 @@ struct hns_roce_hw { int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); int (*init_eq)(struct hns_roce_dev *hr_dev); void (*cleanup_eq)(struct hns_roce_dev *hr_dev); + void (*write_srqc)(struct hns_roce_dev *hr_dev, + struct hns_roce_srq *srq, u32 pdn, u16 xrcd, u32 cqn, + void *mb_buf, u64 *mtts_wqe, u64 *mtts_idx, + dma_addr_t dma_handle_wqe, + dma_addr_t dma_handle_idx); + int (*modify_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata); + int (*query_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *attr); + int (*post_srq_recv)(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); + const struct ib_device_ops *hns_roce_dev_ops; + const struct ib_device_ops *hns_roce_dev_srq_ops; }; struct hns_roce_dev { @@ -839,6 +919,7 @@ struct hns_roce_dev { struct hns_roce_uar_table uar_table; struct hns_roce_mr_table mr_table; struct hns_roce_cq_table cq_table; + struct hns_roce_srq_table srq_table; struct hns_roce_qp_table qp_table; struct hns_roce_eq_table eq_table; @@ -951,12 +1032,14 @@ int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev); int hns_roce_init_eq_table(struct hns_roce_dev *hr_dev); int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev); int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev); +int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_eq_table(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev); +void hns_roce_cleanup_srq_table(struct hns_roce_dev *hr_dev); int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj); void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj, @@ -973,9 +1056,10 @@ void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap, struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata); int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -int hns_roce_destroy_ah(struct ib_ah *ah); +int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags); struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev, struct ib_ucontext *context, @@ -1011,6 +1095,14 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt, struct ib_umem *umem); +struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata); +int hns_roce_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata); +int hns_roce_destroy_srq(struct ib_srq *ibsrq); + struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); @@ -1052,6 +1144,7 @@ void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db); void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn); void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type); void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type); +void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type); int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index); int hns_roce_init(struct hns_roce_dev *hr_dev); void hns_roce_exit(struct hns_roce_dev *hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index f6faefed96e8..4cdbcafa5915 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -46,7 +46,9 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type) (hr_dev->caps.cqc_hop_num && type == HEM_TYPE_CQC) || (hr_dev->caps.srqc_hop_num && type == HEM_TYPE_SRQC) || (hr_dev->caps.cqe_hop_num && type == HEM_TYPE_CQE) || - (hr_dev->caps.mtt_hop_num && type == HEM_TYPE_MTT)) + (hr_dev->caps.mtt_hop_num && type == HEM_TYPE_MTT) || + (hr_dev->caps.srqwqe_hop_num && type == HEM_TYPE_SRQWQE) || + (hr_dev->caps.idx_hop_num && type == HEM_TYPE_IDX)) return true; return false; @@ -147,6 +149,22 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, mhop->ba_l0_num = mhop->bt_chunk_size / 8; mhop->hop_num = hr_dev->caps.cqe_hop_num; break; + case HEM_TYPE_SRQWQE: + mhop->buf_chunk_size = 1 << (hr_dev->caps.srqwqe_buf_pg_sz + + PAGE_SHIFT); + mhop->bt_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + + PAGE_SHIFT); + mhop->ba_l0_num = mhop->bt_chunk_size / 8; + mhop->hop_num = hr_dev->caps.srqwqe_hop_num; + break; + case HEM_TYPE_IDX: + mhop->buf_chunk_size = 1 << (hr_dev->caps.idx_buf_pg_sz + + PAGE_SHIFT); + mhop->bt_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz + + PAGE_SHIFT); + mhop->ba_l0_num = mhop->bt_chunk_size / 8; + mhop->hop_num = hr_dev->caps.idx_hop_num; + break; default: dev_err(dev, "Table %d not support multi-hop addressing!\n", table->type); @@ -906,6 +924,18 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, bt_chunk_size = buf_chunk_size; hop_num = hr_dev->caps.cqe_hop_num; break; + case HEM_TYPE_SRQWQE: + buf_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + + PAGE_SHIFT); + bt_chunk_size = buf_chunk_size; + hop_num = hr_dev->caps.srqwqe_hop_num; + break; + case HEM_TYPE_IDX: + buf_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz + + PAGE_SHIFT); + bt_chunk_size = buf_chunk_size; + hop_num = hr_dev->caps.idx_hop_num; + break; default: dev_err(dev, "Table %d not support to init hem table here!\n", @@ -1041,6 +1071,15 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) { + if ((hr_dev->caps.num_idx_segs)) + hns_roce_cleanup_hem_table(hr_dev, + &hr_dev->mr_table.mtt_idx_table); + if (hr_dev->caps.num_srqwqe_segs) + hns_roce_cleanup_hem_table(hr_dev, + &hr_dev->mr_table.mtt_srqwqe_table); + if (hr_dev->caps.srqc_entry_sz) + hns_roce_cleanup_hem_table(hr_dev, + &hr_dev->srq_table.table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table); if (hr_dev->caps.trrl_entry_sz) hns_roce_cleanup_hem_table(hr_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index e8850d59e780..a650278c6fbd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -48,6 +48,8 @@ enum { /* UNMAP HEM */ HEM_TYPE_MTT, HEM_TYPE_CQE, + HEM_TYPE_SRQWQE, + HEM_TYPE_IDX, HEM_TYPE_IRRL, HEM_TYPE_TRRL, }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index ca05810c92dc..b74c742b000c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -3926,7 +3926,7 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp) struct hns_roce_qp_work *qp_work; struct hns_roce_v1_priv *priv; struct hns_roce_cq *send_cq, *recv_cq; - int is_user = !!ibqp->pd->uobject; + bool is_user = ibqp->uobject; int is_timeout = 0; int ret; @@ -4793,6 +4793,16 @@ static void hns_roce_v1_cleanup_eq_table(struct hns_roce_dev *hr_dev) kfree(eq_table->eq); } +static const struct ib_device_ops hns_roce_v1_dev_ops = { + .destroy_qp = hns_roce_v1_destroy_qp, + .modify_cq = hns_roce_v1_modify_cq, + .poll_cq = hns_roce_v1_poll_cq, + .post_recv = hns_roce_v1_post_recv, + .post_send = hns_roce_v1_post_send, + .query_qp = hns_roce_v1_query_qp, + .req_notify_cq = hns_roce_v1_req_notify_cq, +}; + static const struct hns_roce_hw hns_roce_hw_v1 = { .reset = hns_roce_v1_reset, .hw_profile = hns_roce_v1_profile, @@ -4818,6 +4828,7 @@ static const struct hns_roce_hw hns_roce_hw_v1 = { .destroy_cq = hns_roce_v1_destroy_cq, .init_eq = hns_roce_v1_init_eq_table, .cleanup_eq = hns_roce_v1_cleanup_eq_table, + .hns_roce_dev_ops = &hns_roce_v1_dev_ops, }; static const struct of_device_id hns_roce_of_match[] = { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 3beb1523e17c..3a669451cf86 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1082,6 +1082,33 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) return 0; } +static int hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev, + int vf_id) +{ + struct hns_roce_cmq_desc desc; + struct hns_roce_vf_switch *swt; + int ret; + + swt = (struct hns_roce_vf_switch *)desc.data; + hns_roce_cmq_setup_basic_desc(&desc, HNS_SWITCH_PARAMETER_CFG, true); + swt->rocee_sel |= cpu_to_le16(HNS_ICL_SWITCH_CMD_ROCEE_SEL); + roce_set_field(swt->fun_id, + VF_SWITCH_DATA_FUN_ID_VF_ID_M, + VF_SWITCH_DATA_FUN_ID_VF_ID_S, + vf_id); + ret = hns_roce_cmq_send(hr_dev, &desc, 1); + if (ret) + return ret; + desc.flag = + cpu_to_le16(HNS_ROCE_CMD_FLAG_NO_INTR | HNS_ROCE_CMD_FLAG_IN); + desc.flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR); + roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LPBK_S, 1); + roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S, 1); + roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S, 1); + + return hns_roce_cmq_send(hr_dev, &desc, 1); +} + static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev) { struct hns_roce_cmq_desc desc[2]; @@ -1269,6 +1296,15 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) return ret; } + if (hr_dev->pci_dev->revision == 0x21) { + ret = hns_roce_set_vf_switch_param(hr_dev, 0); + if (ret) { + dev_err(hr_dev->dev, + "Set function switch param fail, ret = %d.\n", + ret); + return ret; + } + } hr_dev->vendor_part_id = hr_dev->pci_dev->device; hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid); @@ -1276,11 +1312,14 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->num_qps = HNS_ROCE_V2_MAX_QP_NUM; caps->max_wqes = HNS_ROCE_V2_MAX_WQE_NUM; caps->num_cqs = HNS_ROCE_V2_MAX_CQ_NUM; + caps->num_srqs = HNS_ROCE_V2_MAX_SRQ_NUM; caps->max_cqes = HNS_ROCE_V2_MAX_CQE_NUM; + caps->max_srqwqes = HNS_ROCE_V2_MAX_SRQWQE_NUM; caps->max_sq_sg = HNS_ROCE_V2_MAX_SQ_SGE_NUM; caps->max_extend_sg = HNS_ROCE_V2_MAX_EXTEND_SGE_NUM; caps->max_rq_sg = HNS_ROCE_V2_MAX_RQ_SGE_NUM; caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE; + caps->max_srq_sg = HNS_ROCE_V2_MAX_SRQ_SGE_NUM; caps->num_uars = HNS_ROCE_V2_UAR_NUM; caps->phy_num_uars = HNS_ROCE_V2_PHY_UAR_NUM; caps->num_aeq_vectors = HNS_ROCE_V2_AEQE_VEC_NUM; @@ -1289,6 +1328,8 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->num_mtpts = HNS_ROCE_V2_MAX_MTPT_NUM; caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS; caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS; + caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS; + caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS; caps->num_pds = HNS_ROCE_V2_MAX_PD_NUM; caps->max_qp_init_rdma = HNS_ROCE_V2_MAX_QP_INIT_RDMA; caps->max_qp_dest_rdma = HNS_ROCE_V2_MAX_QP_DEST_RDMA; @@ -1299,8 +1340,10 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->irrl_entry_sz = HNS_ROCE_V2_IRRL_ENTRY_SZ; caps->trrl_entry_sz = HNS_ROCE_V2_TRRL_ENTRY_SZ; caps->cqc_entry_sz = HNS_ROCE_V2_CQC_ENTRY_SZ; + caps->srqc_entry_sz = HNS_ROCE_V2_SRQC_ENTRY_SZ; caps->mtpt_entry_sz = HNS_ROCE_V2_MTPT_ENTRY_SZ; caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ; + caps->idx_entry_sz = 4; caps->cq_entry_sz = HNS_ROCE_V2_CQE_ENTRY_SIZE; caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED; caps->reserved_lkey = 0; @@ -1308,6 +1351,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->reserved_mrws = 1; caps->reserved_uars = 0; caps->reserved_cqs = 0; + caps->reserved_srqs = 0; caps->reserved_qps = HNS_ROCE_V2_RSV_QPS; caps->qpc_ba_pg_sz = 0; @@ -1331,6 +1375,12 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->cqe_ba_pg_sz = 0; caps->cqe_buf_pg_sz = 0; caps->cqe_hop_num = HNS_ROCE_CQE_HOP_NUM; + caps->srqwqe_ba_pg_sz = 0; + caps->srqwqe_buf_pg_sz = 0; + caps->srqwqe_hop_num = HNS_ROCE_SRQWQE_HOP_NUM; + caps->idx_ba_pg_sz = 0; + caps->idx_buf_pg_sz = 0; + caps->idx_hop_num = HNS_ROCE_IDX_HOP_NUM; caps->eqe_ba_pg_sz = 0; caps->eqe_buf_pg_sz = 0; caps->eqe_hop_num = HNS_ROCE_EQE_HOP_NUM; @@ -1354,8 +1404,13 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->local_ca_ack_delay = 0; caps->max_mtu = IB_MTU_4096; + caps->max_srqs = HNS_ROCE_V2_MAX_SRQ; + caps->max_srq_wrs = HNS_ROCE_V2_MAX_SRQ_WR; + caps->max_srq_sges = HNS_ROCE_V2_MAX_SRQ_SGE; + if (hr_dev->pci_dev->revision == 0x21) - caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC; + caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | + HNS_ROCE_CAP_FLAG_SRQ; ret = hns_roce_v2_set_bt(hr_dev); if (ret) @@ -1587,30 +1642,62 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev) hns_roce_free_link_table(hr_dev, &priv->tsq); } +static int hns_roce_query_mbox_status(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_cmq_desc desc; + struct hns_roce_mbox_status *mb_st = + (struct hns_roce_mbox_status *)desc.data; + enum hns_roce_cmd_return_status status; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_MB_ST, true); + + status = hns_roce_cmq_send(hr_dev, &desc, 1); + if (status) + return status; + + return cpu_to_le32(mb_st->mb_status_hw_run); +} + static int hns_roce_v2_cmd_pending(struct hns_roce_dev *hr_dev) { - u32 status = readl(hr_dev->reg_base + ROCEE_VF_MB_STATUS_REG); + u32 status = hns_roce_query_mbox_status(hr_dev); return status >> HNS_ROCE_HW_RUN_BIT_SHIFT; } static int hns_roce_v2_cmd_complete(struct hns_roce_dev *hr_dev) { - u32 status = readl(hr_dev->reg_base + ROCEE_VF_MB_STATUS_REG); + u32 status = hns_roce_query_mbox_status(hr_dev); return status & HNS_ROCE_HW_MB_STATUS_MASK; } +static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev, u64 in_param, + u64 out_param, u32 in_modifier, u8 op_modifier, + u16 op, u16 token, int event) +{ + struct hns_roce_cmq_desc desc; + struct hns_roce_post_mbox *mb = (struct hns_roce_post_mbox *)desc.data; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_POST_MB, false); + + mb->in_param_l = cpu_to_le64(in_param); + mb->in_param_h = cpu_to_le64(in_param) >> 32; + mb->out_param_l = cpu_to_le64(out_param); + mb->out_param_h = cpu_to_le64(out_param) >> 32; + mb->cmd_tag = cpu_to_le32(in_modifier << 8 | op); + mb->token_event_en = cpu_to_le32(event << 16 | token); + + return hns_roce_cmq_send(hr_dev, &desc, 1); +} + static int hns_roce_v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, u32 in_modifier, u8 op_modifier, u16 op, u16 token, int event) { struct device *dev = hr_dev->dev; - u32 __iomem *hcr = (u32 __iomem *)(hr_dev->reg_base + - ROCEE_VF_MB_CFG0_REG); unsigned long end; - u32 val0 = 0; - u32 val1 = 0; + int ret; end = msecs_to_jiffies(HNS_ROCE_V2_GO_BIT_TIMEOUT_MSECS) + jiffies; while (hns_roce_v2_cmd_pending(hr_dev)) { @@ -1622,27 +1709,12 @@ static int hns_roce_v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, cond_resched(); } - roce_set_field(val0, HNS_ROCE_VF_MB4_TAG_MASK, - HNS_ROCE_VF_MB4_TAG_SHIFT, in_modifier); - roce_set_field(val0, HNS_ROCE_VF_MB4_CMD_MASK, - HNS_ROCE_VF_MB4_CMD_SHIFT, op); - roce_set_field(val1, HNS_ROCE_VF_MB5_EVENT_MASK, - HNS_ROCE_VF_MB5_EVENT_SHIFT, event); - roce_set_field(val1, HNS_ROCE_VF_MB5_TOKEN_MASK, - HNS_ROCE_VF_MB5_TOKEN_SHIFT, token); - - writeq(in_param, hcr + 0); - writeq(out_param, hcr + 2); - - /* Memory barrier */ - wmb(); - - writel(val0, hcr + 4); - writel(val1, hcr + 5); - - mmiowb(); + ret = hns_roce_mbox_post(hr_dev, in_param, out_param, in_modifier, + op_modifier, op, token, event); + if (ret) + dev_err(dev, "Post mailbox fail(%d)\n", ret); - return 0; + return ret; } static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev, @@ -2007,6 +2079,27 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq) return get_sw_cqe_v2(hr_cq, hr_cq->cons_index); } +static void *get_srq_wqe(struct hns_roce_srq *srq, int n) +{ + return hns_roce_buf_offset(&srq->buf, n << srq->wqe_shift); +} + +static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index) +{ + u32 bitmap_num; + int bit_num; + + /* always called with interrupts disabled. */ + spin_lock(&srq->lock); + + bitmap_num = wqe_index / (sizeof(u64) * 8); + bit_num = wqe_index % (sizeof(u64) * 8); + srq->idx_que.bitmap[bitmap_num] |= (1ULL << bit_num); + srq->tail++; + + spin_unlock(&srq->lock); +} + static void hns_roce_v2_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index) { *hr_cq->set_ci_db = cons_index & 0xffffff; @@ -2018,6 +2111,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, struct hns_roce_v2_cqe *cqe, *dest; u32 prod_index; int nfreed = 0; + int wqe_index; u8 owner_bit; for (prod_index = hr_cq->cons_index; get_sw_cqe_v2(hr_cq, prod_index); @@ -2035,7 +2129,13 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, if ((roce_get_field(cqe->byte_16, V2_CQE_BYTE_16_LCL_QPN_M, V2_CQE_BYTE_16_LCL_QPN_S) & HNS_ROCE_V2_CQE_QPN_MASK) == qpn) { - /* In v1 engine, not support SRQ */ + if (srq && + roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_S_R_S)) { + wqe_index = roce_get_field(cqe->byte_4, + V2_CQE_BYTE_4_WQE_INDX_M, + V2_CQE_BYTE_4_WQE_INDX_S); + hns_roce_free_srq_wqe(srq, wqe_index); + } ++nfreed; } else if (nfreed) { dest = get_cqe_v2(hr_cq, (prod_index + nfreed) & @@ -2212,6 +2312,7 @@ static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe, static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, struct hns_roce_qp **cur_qp, struct ib_wc *wc) { + struct hns_roce_srq *srq = NULL; struct hns_roce_dev *hr_dev; struct hns_roce_v2_cqe *cqe; struct hns_roce_qp *hr_qp; @@ -2254,6 +2355,37 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, wc->qp = &(*cur_qp)->ibqp; wc->vendor_err = 0; + if (is_send) { + wq = &(*cur_qp)->sq; + if ((*cur_qp)->sq_signal_bits) { + /* + * If sg_signal_bit is 1, + * firstly tail pointer updated to wqe + * which current cqe correspond to + */ + wqe_ctr = (u16)roce_get_field(cqe->byte_4, + V2_CQE_BYTE_4_WQE_INDX_M, + V2_CQE_BYTE_4_WQE_INDX_S); + wq->tail += (wqe_ctr - (u16)wq->tail) & + (wq->wqe_cnt - 1); + } + + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + } else if ((*cur_qp)->ibqp.srq) { + srq = to_hr_srq((*cur_qp)->ibqp.srq); + wqe_ctr = le16_to_cpu(roce_get_field(cqe->byte_4, + V2_CQE_BYTE_4_WQE_INDX_M, + V2_CQE_BYTE_4_WQE_INDX_S)); + wc->wr_id = srq->wrid[wqe_ctr]; + hns_roce_free_srq_wqe(srq, wqe_ctr); + } else { + /* Update tail pointer, record wr_id */ + wq = &(*cur_qp)->rq; + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + } + status = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_STATUS_M, V2_CQE_BYTE_4_STATUS_S); switch (status & HNS_ROCE_V2_CQE_STATUS_MASK) { @@ -2373,23 +2505,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, wc->status = IB_WC_GENERAL_ERR; break; } - - wq = &(*cur_qp)->sq; - if ((*cur_qp)->sq_signal_bits) { - /* - * If sg_signal_bit is 1, - * firstly tail pointer updated to wqe - * which current cqe correspond to - */ - wqe_ctr = (u16)roce_get_field(cqe->byte_4, - V2_CQE_BYTE_4_WQE_INDX_M, - V2_CQE_BYTE_4_WQE_INDX_S); - wq->tail += (wqe_ctr - (u16)wq->tail) & - (wq->wqe_cnt - 1); - } - - wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; - ++wq->tail; } else { /* RQ correspond to CQE */ wc->byte_len = le32_to_cpu(cqe->byte_cnt); @@ -2434,11 +2549,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, return -EAGAIN; } - /* Update tail pointer, record wr_id */ - wq = &(*cur_qp)->rq; - wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; - ++wq->tail; - wc->sl = (u8)roce_get_field(cqe->byte_32, V2_CQE_BYTE_32_SL_M, V2_CQE_BYTE_32_SL_S); wc->src_qp = (u8)roce_get_field(cqe->byte_32, @@ -2747,6 +2857,8 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, + (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI || + hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 : ilog2((unsigned int)hr_qp->rq.wqe_cnt)); roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0); @@ -3088,6 +3200,8 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, + (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI || + hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 : ilog2((unsigned int)hr_qp->rq.wqe_cnt)); roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0); @@ -3601,6 +3715,21 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, return 0; } +static inline bool hns_roce_v2_check_qp_stat(enum ib_qp_state cur_state, + enum ib_qp_state new_state) +{ + + if ((cur_state != IB_QPS_RESET && + (new_state == IB_QPS_ERR || new_state == IB_QPS_RESET)) || + ((cur_state == IB_QPS_RTS || cur_state == IB_QPS_SQD) && + (new_state == IB_QPS_RTS || new_state == IB_QPS_SQD)) || + (cur_state == IB_QPS_SQE && new_state == IB_QPS_RTS)) + return true; + + return false; + +} + static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, @@ -3626,6 +3755,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, */ memset(qpc_mask, 0xff, sizeof(*qpc_mask)); if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { + memset(qpc_mask, 0, sizeof(*qpc_mask)); modify_qp_reset_to_init(ibqp, attr, attr_mask, context, qpc_mask); } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { @@ -3641,21 +3771,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, qpc_mask); if (ret) goto out; - } else if ((cur_state == IB_QPS_RTS && new_state == IB_QPS_RTS) || - (cur_state == IB_QPS_SQE && new_state == IB_QPS_RTS) || - (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD) || - (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD) || - (cur_state == IB_QPS_SQD && new_state == IB_QPS_RTS) || - (cur_state == IB_QPS_INIT && new_state == IB_QPS_RESET) || - (cur_state == IB_QPS_RTR && new_state == IB_QPS_RESET) || - (cur_state == IB_QPS_RTS && new_state == IB_QPS_RESET) || - (cur_state == IB_QPS_ERR && new_state == IB_QPS_RESET) || - (cur_state == IB_QPS_INIT && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_RTR && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_RTS && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_SQD && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_SQE && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_ERR && new_state == IB_QPS_ERR)) { + } else if (hns_roce_v2_check_qp_stat(cur_state, new_state)) { /* Nothing */ ; } else { @@ -3789,6 +3905,11 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask); + roce_set_bit(context->byte_108_rx_reqepsn, V2_QPC_BYTE_108_INV_CREDIT_S, + ibqp->srq ? 1 : 0); + roce_set_bit(qpc_mask->byte_108_rx_reqepsn, + V2_QPC_BYTE_108_INV_CREDIT_S, 0); + /* Every status migrate must change state */ roce_set_field(context->byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M, V2_QPC_BYTE_60_QP_ST_S, new_state); @@ -4012,7 +4133,7 @@ out: static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, - int is_user) + bool is_user) { struct hns_roce_cq *send_cq, *recv_cq; struct device *dev = hr_dev->dev; @@ -4074,7 +4195,8 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, hns_roce_free_db(hr_dev, &hr_qp->rdb); } - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) { + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && + hr_qp->rq.wqe_cnt) { kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list); kfree(hr_qp->rq_inl_buf.wqe_list); } @@ -4088,7 +4210,7 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp) struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); int ret; - ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, !!ibqp->pd->uobject); + ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, ibqp->uobject); if (ret) { dev_err(hr_dev->dev, "Destroy qp failed(%d)\n", ret); return ret; @@ -4384,6 +4506,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, int aeqe_found = 0; int event_type; int sub_type; + u32 srqn; u32 qpn; u32 cqn; @@ -4406,6 +4529,9 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, cqn = roce_get_field(aeqe->event.cq_event.cq, HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); + srqn = roce_get_field(aeqe->event.srq_event.srq, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); switch (event_type) { case HNS_ROCE_EVENT_TYPE_PATH_MIG: @@ -4413,13 +4539,14 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, case HNS_ROCE_EVENT_TYPE_COMM_EST: case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: + case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: hns_roce_qp_event(hr_dev, qpn, event_type); break; case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: - case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: + hns_roce_srq_event(hr_dev, srqn, event_type); break; case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: @@ -4964,13 +5091,12 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev, eqe_alloc = i * (buf_chk_sz / eq->eqe_size); size = (eq->entries - eqe_alloc) * eq->eqe_size; } - eq->buf[i] = dma_alloc_coherent(dev, size, + eq->buf[i] = dma_zalloc_coherent(dev, size, &(eq->buf_dma[i]), GFP_KERNEL); if (!eq->buf[i]) goto err_dma_alloc_buf; - memset(eq->buf[i], 0, size); *(eq->bt_l0 + i) = eq->buf_dma[i]; eq_buf_cnt++; @@ -5000,13 +5126,12 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev, size = (eq->entries - eqe_alloc) * eq->eqe_size; } - eq->buf[idx] = dma_alloc_coherent(dev, size, + eq->buf[idx] = dma_zalloc_coherent(dev, size, &(eq->buf_dma[idx]), GFP_KERNEL); if (!eq->buf[idx]) goto err_dma_alloc_buf; - memset(eq->buf[idx], 0, size); *(eq->bt_l1[i] + j) = eq->buf_dma[idx]; eq_buf_cnt++; @@ -5116,7 +5241,7 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev, goto free_cmd_mbox; } - eq->buf_list->buf = dma_alloc_coherent(dev, buf_chk_sz, + eq->buf_list->buf = dma_zalloc_coherent(dev, buf_chk_sz, &(eq->buf_list->map), GFP_KERNEL); if (!eq->buf_list->buf) { @@ -5124,7 +5249,6 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev, goto err_alloc_buf; } - memset(eq->buf_list->buf, 0, buf_chk_sz); } else { ret = hns_roce_mhop_alloc_eq(hr_dev, eq); if (ret) { @@ -5332,6 +5456,300 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev) destroy_workqueue(hr_dev->irq_workq); } +static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, + struct hns_roce_srq *srq, u32 pdn, u16 xrcd, + u32 cqn, void *mb_buf, u64 *mtts_wqe, + u64 *mtts_idx, dma_addr_t dma_handle_wqe, + dma_addr_t dma_handle_idx) +{ + struct hns_roce_srq_context *srq_context; + + srq_context = mb_buf; + memset(srq_context, 0, sizeof(*srq_context)); + + roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_ST_M, + SRQC_BYTE_4_SRQ_ST_S, 1); + + roce_set_field(srq_context->byte_4_srqn_srqst, + SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M, + SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S, + (hr_dev->caps.srqwqe_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : + hr_dev->caps.srqwqe_hop_num)); + roce_set_field(srq_context->byte_4_srqn_srqst, + SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S, + ilog2(srq->max)); + + roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M, + SRQC_BYTE_4_SRQN_S, srq->srqn); + + roce_set_field(srq_context->byte_8_limit_wl, SRQC_BYTE_8_SRQ_LIMIT_WL_M, + SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0); + + roce_set_field(srq_context->byte_12_xrcd, SRQC_BYTE_12_SRQ_XRCD_M, + SRQC_BYTE_12_SRQ_XRCD_S, xrcd); + + srq_context->wqe_bt_ba = cpu_to_le32((u32)(dma_handle_wqe >> 3)); + + roce_set_field(srq_context->byte_24_wqe_bt_ba, + SRQC_BYTE_24_SRQ_WQE_BT_BA_M, + SRQC_BYTE_24_SRQ_WQE_BT_BA_S, + cpu_to_le32(dma_handle_wqe >> 35)); + + roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M, + SRQC_BYTE_28_PD_S, pdn); + roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_RQWS_M, + SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 : + fls(srq->max_gs - 1)); + + srq_context->idx_bt_ba = (u32)(dma_handle_idx >> 3); + srq_context->idx_bt_ba = cpu_to_le32(srq_context->idx_bt_ba); + roce_set_field(srq_context->rsv_idx_bt_ba, + SRQC_BYTE_36_SRQ_IDX_BT_BA_M, + SRQC_BYTE_36_SRQ_IDX_BT_BA_S, + cpu_to_le32(dma_handle_idx >> 35)); + + srq_context->idx_cur_blk_addr = (u32)(mtts_idx[0] >> PAGE_ADDR_SHIFT); + srq_context->idx_cur_blk_addr = + cpu_to_le32(srq_context->idx_cur_blk_addr); + roce_set_field(srq_context->byte_44_idxbufpgsz_addr, + SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M, + SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S, + cpu_to_le32((mtts_idx[0]) >> (32 + PAGE_ADDR_SHIFT))); + roce_set_field(srq_context->byte_44_idxbufpgsz_addr, + SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M, + SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S, + hr_dev->caps.idx_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : + hr_dev->caps.idx_hop_num); + + roce_set_field(srq_context->byte_44_idxbufpgsz_addr, + SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M, + SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S, + hr_dev->caps.idx_ba_pg_sz); + roce_set_field(srq_context->byte_44_idxbufpgsz_addr, + SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M, + SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S, + hr_dev->caps.idx_buf_pg_sz); + + srq_context->idx_nxt_blk_addr = (u32)(mtts_idx[1] >> PAGE_ADDR_SHIFT); + srq_context->idx_nxt_blk_addr = + cpu_to_le32(srq_context->idx_nxt_blk_addr); + roce_set_field(srq_context->rsv_idxnxtblkaddr, + SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M, + SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S, + cpu_to_le32((mtts_idx[1]) >> (32 + PAGE_ADDR_SHIFT))); + roce_set_field(srq_context->byte_56_xrc_cqn, + SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S, + cqn); + roce_set_field(srq_context->byte_56_xrc_cqn, + SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M, + SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S, + hr_dev->caps.srqwqe_ba_pg_sz + PG_SHIFT_OFFSET); + roce_set_field(srq_context->byte_56_xrc_cqn, + SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M, + SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S, + hr_dev->caps.srqwqe_buf_pg_sz + PG_SHIFT_OFFSET); + + roce_set_bit(srq_context->db_record_addr_record_en, + SRQC_BYTE_60_SRQ_RECORD_EN_S, 0); +} + +static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, + struct ib_srq_attr *srq_attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); + struct hns_roce_srq *srq = to_hr_srq(ibsrq); + struct hns_roce_srq_context *srq_context; + struct hns_roce_srq_context *srqc_mask; + struct hns_roce_cmd_mailbox *mailbox; + int ret; + + if (srq_attr_mask & IB_SRQ_LIMIT) { + if (srq_attr->srq_limit >= srq->max) + return -EINVAL; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + srq_context = mailbox->buf; + srqc_mask = (struct hns_roce_srq_context *)mailbox->buf + 1; + + memset(srqc_mask, 0xff, sizeof(*srqc_mask)); + + roce_set_field(srq_context->byte_8_limit_wl, + SRQC_BYTE_8_SRQ_LIMIT_WL_M, + SRQC_BYTE_8_SRQ_LIMIT_WL_S, srq_attr->srq_limit); + roce_set_field(srqc_mask->byte_8_limit_wl, + SRQC_BYTE_8_SRQ_LIMIT_WL_M, + SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0); + + ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, srq->srqn, 0, + HNS_ROCE_CMD_MODIFY_SRQC, + HNS_ROCE_CMD_TIMEOUT_MSECS); + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + if (ret) { + dev_err(hr_dev->dev, + "MODIFY SRQ Failed to cmd mailbox.\n"); + return ret; + } + } + + return 0; +} + +int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); + struct hns_roce_srq *srq = to_hr_srq(ibsrq); + struct hns_roce_srq_context *srq_context; + struct hns_roce_cmd_mailbox *mailbox; + int limit_wl; + int ret; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + srq_context = mailbox->buf; + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, srq->srqn, 0, + HNS_ROCE_CMD_QUERY_SRQC, + HNS_ROCE_CMD_TIMEOUT_MSECS); + if (ret) { + dev_err(hr_dev->dev, "QUERY SRQ cmd process error\n"); + goto out; + } + + limit_wl = roce_get_field(srq_context->byte_8_limit_wl, + SRQC_BYTE_8_SRQ_LIMIT_WL_M, + SRQC_BYTE_8_SRQ_LIMIT_WL_S); + + attr->srq_limit = limit_wl; + attr->max_wr = srq->max - 1; + attr->max_sge = srq->max_gs; + + memcpy(srq_context, mailbox->buf, sizeof(*srq_context)); + +out: + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + return ret; +} + +static int find_empty_entry(struct hns_roce_idx_que *idx_que) +{ + int bit_num; + int i; + + /* bitmap[i] is set zero if all bits are allocated */ + for (i = 0; idx_que->bitmap[i] == 0; ++i) + ; + bit_num = ffs(idx_que->bitmap[i]); + idx_que->bitmap[i] &= ~(1ULL << (bit_num - 1)); + + return i * sizeof(u64) * 8 + (bit_num - 1); +} + +static void fill_idx_queue(struct hns_roce_idx_que *idx_que, + int cur_idx, int wqe_idx) +{ + unsigned int *addr; + + addr = (unsigned int *)hns_roce_buf_offset(&idx_que->idx_buf, + cur_idx * idx_que->entry_sz); + *addr = wqe_idx; +} + +static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, + const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + struct hns_roce_srq *srq = to_hr_srq(ibsrq); + struct hns_roce_v2_wqe_data_seg *dseg; + struct hns_roce_v2_db srq_db; + unsigned long flags; + int ret = 0; + int wqe_idx; + void *wqe; + int nreq; + int ind; + int i; + + spin_lock_irqsave(&srq->lock, flags); + + ind = srq->head & (srq->max - 1); + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (unlikely(wr->num_sge > srq->max_gs)) { + ret = -EINVAL; + *bad_wr = wr; + break; + } + + if (unlikely(srq->head == srq->tail)) { + ret = -ENOMEM; + *bad_wr = wr; + break; + } + + wqe_idx = find_empty_entry(&srq->idx_que); + fill_idx_queue(&srq->idx_que, ind, wqe_idx); + wqe = get_srq_wqe(srq, wqe_idx); + dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; + + for (i = 0; i < wr->num_sge; ++i) { + dseg[i].len = cpu_to_le32(wr->sg_list[i].length); + dseg[i].lkey = cpu_to_le32(wr->sg_list[i].lkey); + dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr); + } + + if (i < srq->max_gs) { + dseg->len = 0; + dseg->lkey = cpu_to_le32(0x100); + dseg->addr = 0; + } + + srq->wrid[wqe_idx] = wr->wr_id; + ind = (ind + 1) & (srq->max - 1); + } + + if (likely(nreq)) { + srq->head += nreq; + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + + srq_db.byte_4 = HNS_ROCE_V2_SRQ_DB << 24 | srq->srqn; + srq_db.parameter = srq->head; + + hns_roce_write64_k((__le32 *)&srq_db, srq->db_reg_l); + + } + + spin_unlock_irqrestore(&srq->lock, flags); + + return ret; +} + +static const struct ib_device_ops hns_roce_v2_dev_ops = { + .destroy_qp = hns_roce_v2_destroy_qp, + .modify_cq = hns_roce_v2_modify_cq, + .poll_cq = hns_roce_v2_poll_cq, + .post_recv = hns_roce_v2_post_recv, + .post_send = hns_roce_v2_post_send, + .query_qp = hns_roce_v2_query_qp, + .req_notify_cq = hns_roce_v2_req_notify_cq, +}; + +static const struct ib_device_ops hns_roce_v2_dev_srq_ops = { + .modify_srq = hns_roce_v2_modify_srq, + .post_srq_recv = hns_roce_v2_post_srq_recv, + .query_srq = hns_roce_v2_query_srq, +}; + static const struct hns_roce_hw hns_roce_hw_v2 = { .cmq_init = hns_roce_v2_cmq_init, .cmq_exit = hns_roce_v2_cmq_exit, @@ -5359,6 +5777,12 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .poll_cq = hns_roce_v2_poll_cq, .init_eq = hns_roce_v2_init_eq_table, .cleanup_eq = hns_roce_v2_cleanup_eq_table, + .write_srqc = hns_roce_v2_write_srqc, + .modify_srq = hns_roce_v2_modify_srq, + .query_srq = hns_roce_v2_query_srq, + .post_srq_recv = hns_roce_v2_post_srq_recv, + .hns_roce_dev_ops = &hns_roce_v2_dev_ops, + .hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops, }; static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 8bc820635bbd..b72d0443c835 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -46,10 +46,16 @@ #define HNS_ROCE_V2_MAX_QP_NUM 0x2000 #define HNS_ROCE_V2_MAX_WQE_NUM 0x8000 +#define HNS_ROCE_V2_MAX_SRQ 0x100000 +#define HNS_ROCE_V2_MAX_SRQ_WR 0x8000 +#define HNS_ROCE_V2_MAX_SRQ_SGE 0x100 #define HNS_ROCE_V2_MAX_CQ_NUM 0x8000 +#define HNS_ROCE_V2_MAX_SRQ_NUM 0x100000 #define HNS_ROCE_V2_MAX_CQE_NUM 0x10000 +#define HNS_ROCE_V2_MAX_SRQWQE_NUM 0x8000 #define HNS_ROCE_V2_MAX_RQ_SGE_NUM 0x100 #define HNS_ROCE_V2_MAX_SQ_SGE_NUM 0xff +#define HNS_ROCE_V2_MAX_SRQ_SGE_NUM 0x100 #define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM 0x200000 #define HNS_ROCE_V2_MAX_SQ_INLINE 0x20 #define HNS_ROCE_V2_UAR_NUM 256 @@ -61,6 +67,8 @@ #define HNS_ROCE_V2_MAX_MTPT_NUM 0x8000 #define HNS_ROCE_V2_MAX_MTT_SEGS 0x1000000 #define HNS_ROCE_V2_MAX_CQE_SEGS 0x1000000 +#define HNS_ROCE_V2_MAX_SRQWQE_SEGS 0x1000000 +#define HNS_ROCE_V2_MAX_IDX_SEGS 0x1000000 #define HNS_ROCE_V2_MAX_PD_NUM 0x1000000 #define HNS_ROCE_V2_MAX_QP_INIT_RDMA 128 #define HNS_ROCE_V2_MAX_QP_DEST_RDMA 128 @@ -71,6 +79,7 @@ #define HNS_ROCE_V2_IRRL_ENTRY_SZ 64 #define HNS_ROCE_V2_TRRL_ENTRY_SZ 48 #define HNS_ROCE_V2_CQC_ENTRY_SZ 64 +#define HNS_ROCE_V2_SRQC_ENTRY_SZ 64 #define HNS_ROCE_V2_MTPT_ENTRY_SZ 64 #define HNS_ROCE_V2_MTT_ENTRY_SZ 64 #define HNS_ROCE_V2_CQE_ENTRY_SIZE 32 @@ -84,8 +93,10 @@ #define HNS_ROCE_CONTEXT_HOP_NUM 1 #define HNS_ROCE_MTT_HOP_NUM 1 #define HNS_ROCE_CQE_HOP_NUM 1 +#define HNS_ROCE_SRQWQE_HOP_NUM 1 #define HNS_ROCE_PBL_HOP_NUM 2 #define HNS_ROCE_EQE_HOP_NUM 2 +#define HNS_ROCE_IDX_HOP_NUM 1 #define HNS_ROCE_V2_GID_INDEX_NUM 256 @@ -113,6 +124,8 @@ ((step_idx == 0 && hop_num == HNS_ROCE_HOP_NUM_0) || \ (step_idx == 1 && hop_num == 1) || \ (step_idx == 2 && hop_num == 2)) +#define HNS_ICL_SWITCH_CMD_ROCEE_SEL_SHIFT 0 +#define HNS_ICL_SWITCH_CMD_ROCEE_SEL BIT(HNS_ICL_SWITCH_CMD_ROCEE_SEL_SHIFT) #define CMD_CSQ_DESC_NUM 1024 #define CMD_CRQ_DESC_NUM 1024 @@ -213,7 +226,10 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_CFG_TMOUT_LLM = 0x8404, HNS_ROCE_OPC_CFG_SGID_TB = 0x8500, HNS_ROCE_OPC_CFG_SMAC_TB = 0x8501, + HNS_ROCE_OPC_POST_MB = 0x8504, + HNS_ROCE_OPC_QUERY_MB_ST = 0x8505, HNS_ROCE_OPC_CFG_BT_ATTR = 0x8506, + HNS_SWITCH_PARAMETER_CFG = 0x1033, }; enum { @@ -325,6 +341,90 @@ struct hns_roce_v2_cq_context { #define V2_CQC_BYTE_64_SE_CQE_IDX_S 0 #define V2_CQC_BYTE_64_SE_CQE_IDX_M GENMASK(23, 0) +struct hns_roce_srq_context { + __le32 byte_4_srqn_srqst; + __le32 byte_8_limit_wl; + __le32 byte_12_xrcd; + __le32 byte_16_pi_ci; + __le32 wqe_bt_ba; + __le32 byte_24_wqe_bt_ba; + __le32 byte_28_rqws_pd; + __le32 idx_bt_ba; + __le32 rsv_idx_bt_ba; + __le32 idx_cur_blk_addr; + __le32 byte_44_idxbufpgsz_addr; + __le32 idx_nxt_blk_addr; + __le32 rsv_idxnxtblkaddr; + __le32 byte_56_xrc_cqn; + __le32 db_record_addr_record_en; + __le32 db_record_addr; +}; + +#define SRQC_BYTE_4_SRQ_ST_S 0 +#define SRQC_BYTE_4_SRQ_ST_M GENMASK(1, 0) + +#define SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S 2 +#define SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M GENMASK(3, 2) + +#define SRQC_BYTE_4_SRQ_SHIFT_S 4 +#define SRQC_BYTE_4_SRQ_SHIFT_M GENMASK(7, 4) + +#define SRQC_BYTE_4_SRQN_S 8 +#define SRQC_BYTE_4_SRQN_M GENMASK(31, 8) + +#define SRQC_BYTE_8_SRQ_LIMIT_WL_S 0 +#define SRQC_BYTE_8_SRQ_LIMIT_WL_M GENMASK(15, 0) + +#define SRQC_BYTE_12_SRQ_XRCD_S 0 +#define SRQC_BYTE_12_SRQ_XRCD_M GENMASK(23, 0) + +#define SRQC_BYTE_16_SRQ_PRODUCER_IDX_S 0 +#define SRQC_BYTE_16_SRQ_PRODUCER_IDX_M GENMASK(15, 0) + +#define SRQC_BYTE_16_SRQ_CONSUMER_IDX_S 0 +#define SRQC_BYTE_16_SRQ_CONSUMER_IDX_M GENMASK(31, 16) + +#define SRQC_BYTE_24_SRQ_WQE_BT_BA_S 0 +#define SRQC_BYTE_24_SRQ_WQE_BT_BA_M GENMASK(28, 0) + +#define SRQC_BYTE_28_PD_S 0 +#define SRQC_BYTE_28_PD_M GENMASK(23, 0) + +#define SRQC_BYTE_28_RQWS_S 24 +#define SRQC_BYTE_28_RQWS_M GENMASK(27, 24) + +#define SRQC_BYTE_36_SRQ_IDX_BT_BA_S 0 +#define SRQC_BYTE_36_SRQ_IDX_BT_BA_M GENMASK(28, 0) + +#define SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S 0 +#define SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M GENMASK(19, 0) + +#define SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S 22 +#define SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M GENMASK(23, 22) + +#define SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S 24 +#define SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M GENMASK(27, 24) + +#define SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S 28 +#define SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M GENMASK(31, 28) + +#define SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S 0 +#define SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M GENMASK(19, 0) + +#define SRQC_BYTE_56_SRQ_XRC_CQN_S 0 +#define SRQC_BYTE_56_SRQ_XRC_CQN_M GENMASK(23, 0) + +#define SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S 24 +#define SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M GENMASK(27, 24) + +#define SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S 28 +#define SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M GENMASK(31, 28) + +#define SRQC_BYTE_60_SRQ_RECORD_EN_S 0 + +#define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_S 1 +#define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_M GENMASK(31, 1) + enum{ V2_MPT_ST_VALID = 0x1, V2_MPT_ST_FREE = 0x2, @@ -1289,6 +1389,36 @@ struct hns_roce_vf_res_b { #define VF_RES_B_DATA_3_VF_SL_NUM_S 16 #define VF_RES_B_DATA_3_VF_SL_NUM_M GENMASK(19, 16) +struct hns_roce_vf_switch { + __le32 rocee_sel; + __le32 fun_id; + __le32 cfg; + __le32 resv1; + __le32 resv2; + __le32 resv3; +}; + +#define VF_SWITCH_DATA_FUN_ID_VF_ID_S 3 +#define VF_SWITCH_DATA_FUN_ID_VF_ID_M GENMASK(10, 3) + +#define VF_SWITCH_DATA_CFG_ALW_LPBK_S 1 +#define VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S 2 +#define VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S 3 + +struct hns_roce_post_mbox { + __le32 in_param_l; + __le32 in_param_h; + __le32 out_param_l; + __le32 out_param_h; + __le32 cmd_tag; + __le32 token_event_en; +}; + +struct hns_roce_mbox_status { + __le32 mb_status_hw_run; + __le32 rsv[5]; +}; + struct hns_roce_cfg_bt_attr { __le32 vf_qpc_cfg; __le32 vf_srqc_cfg; @@ -1372,18 +1502,6 @@ struct hns_roce_cmq_desc { #define HNS_ROCE_HW_RUN_BIT_SHIFT 31 #define HNS_ROCE_HW_MB_STATUS_MASK 0xFF -#define HNS_ROCE_VF_MB4_TAG_MASK 0xFFFFFF00 -#define HNS_ROCE_VF_MB4_TAG_SHIFT 8 - -#define HNS_ROCE_VF_MB4_CMD_MASK 0xFF -#define HNS_ROCE_VF_MB4_CMD_SHIFT 0 - -#define HNS_ROCE_VF_MB5_EVENT_MASK 0x10000 -#define HNS_ROCE_VF_MB5_EVENT_SHIFT 16 - -#define HNS_ROCE_VF_MB5_TOKEN_MASK 0xFFFF -#define HNS_ROCE_VF_MB5_TOKEN_SHIFT 0 - struct hns_roce_v2_cmq_ring { dma_addr_t desc_dma_addr; struct hns_roce_cmq_desc *desc; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 1b3ee514f2ef..c79054ba9495 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -220,6 +220,11 @@ static int hns_roce_query_device(struct ib_device *ib_dev, IB_ATOMIC_HCA : IB_ATOMIC_NONE; props->max_pkeys = 1; props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay; + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { + props->max_srq = hr_dev->caps.max_srqs; + props->max_srq_wr = hr_dev->caps.max_srq_wrs; + props->max_srq_sge = hr_dev->caps.max_srq_sges; + } return 0; } @@ -440,6 +445,54 @@ static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev) ib_unregister_device(&hr_dev->ib_dev); } +static const struct ib_device_ops hns_roce_dev_ops = { + .add_gid = hns_roce_add_gid, + .alloc_pd = hns_roce_alloc_pd, + .alloc_ucontext = hns_roce_alloc_ucontext, + .create_ah = hns_roce_create_ah, + .create_cq = hns_roce_ib_create_cq, + .create_qp = hns_roce_create_qp, + .dealloc_pd = hns_roce_dealloc_pd, + .dealloc_ucontext = hns_roce_dealloc_ucontext, + .del_gid = hns_roce_del_gid, + .dereg_mr = hns_roce_dereg_mr, + .destroy_ah = hns_roce_destroy_ah, + .destroy_cq = hns_roce_ib_destroy_cq, + .disassociate_ucontext = hns_roce_disassociate_ucontext, + .get_dma_mr = hns_roce_get_dma_mr, + .get_link_layer = hns_roce_get_link_layer, + .get_netdev = hns_roce_get_netdev, + .get_port_immutable = hns_roce_port_immutable, + .mmap = hns_roce_mmap, + .modify_device = hns_roce_modify_device, + .modify_port = hns_roce_modify_port, + .modify_qp = hns_roce_modify_qp, + .query_ah = hns_roce_query_ah, + .query_device = hns_roce_query_device, + .query_pkey = hns_roce_query_pkey, + .query_port = hns_roce_query_port, + .reg_user_mr = hns_roce_reg_user_mr, +}; + +static const struct ib_device_ops hns_roce_dev_mr_ops = { + .rereg_user_mr = hns_roce_rereg_user_mr, +}; + +static const struct ib_device_ops hns_roce_dev_mw_ops = { + .alloc_mw = hns_roce_alloc_mw, + .dealloc_mw = hns_roce_dealloc_mw, +}; + +static const struct ib_device_ops hns_roce_dev_frmr_ops = { + .alloc_mr = hns_roce_alloc_mr, + .map_mr_sg = hns_roce_map_mr_sg, +}; + +static const struct ib_device_ops hns_roce_dev_srq_ops = { + .create_srq = hns_roce_create_srq, + .destroy_srq = hns_roce_destroy_srq, +}; + static int hns_roce_register_device(struct hns_roce_dev *hr_dev) { int ret; @@ -479,73 +532,38 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) ib_dev->uverbs_ex_cmd_mask |= (1ULL << IB_USER_VERBS_EX_CMD_MODIFY_CQ); - /* HCA||device||port */ - ib_dev->modify_device = hns_roce_modify_device; - ib_dev->query_device = hns_roce_query_device; - ib_dev->query_port = hns_roce_query_port; - ib_dev->modify_port = hns_roce_modify_port; - ib_dev->get_link_layer = hns_roce_get_link_layer; - ib_dev->get_netdev = hns_roce_get_netdev; - ib_dev->add_gid = hns_roce_add_gid; - ib_dev->del_gid = hns_roce_del_gid; - ib_dev->query_pkey = hns_roce_query_pkey; - ib_dev->alloc_ucontext = hns_roce_alloc_ucontext; - ib_dev->dealloc_ucontext = hns_roce_dealloc_ucontext; - ib_dev->mmap = hns_roce_mmap; - - /* PD */ - ib_dev->alloc_pd = hns_roce_alloc_pd; - ib_dev->dealloc_pd = hns_roce_dealloc_pd; - - /* AH */ - ib_dev->create_ah = hns_roce_create_ah; - ib_dev->query_ah = hns_roce_query_ah; - ib_dev->destroy_ah = hns_roce_destroy_ah; - - /* QP */ - ib_dev->create_qp = hns_roce_create_qp; - ib_dev->modify_qp = hns_roce_modify_qp; - ib_dev->query_qp = hr_dev->hw->query_qp; - ib_dev->destroy_qp = hr_dev->hw->destroy_qp; - ib_dev->post_send = hr_dev->hw->post_send; - ib_dev->post_recv = hr_dev->hw->post_recv; - - /* CQ */ - ib_dev->create_cq = hns_roce_ib_create_cq; - ib_dev->modify_cq = hr_dev->hw->modify_cq; - ib_dev->destroy_cq = hns_roce_ib_destroy_cq; - ib_dev->req_notify_cq = hr_dev->hw->req_notify_cq; - ib_dev->poll_cq = hr_dev->hw->poll_cq; - - /* MR */ - ib_dev->get_dma_mr = hns_roce_get_dma_mr; - ib_dev->reg_user_mr = hns_roce_reg_user_mr; - ib_dev->dereg_mr = hns_roce_dereg_mr; if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR) { - ib_dev->rereg_user_mr = hns_roce_rereg_user_mr; ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR); + ib_set_device_ops(ib_dev, &hns_roce_dev_mr_ops); } /* MW */ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) { - ib_dev->alloc_mw = hns_roce_alloc_mw; - ib_dev->dealloc_mw = hns_roce_dealloc_mw; ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_ALLOC_MW) | (1ULL << IB_USER_VERBS_CMD_DEALLOC_MW); + ib_set_device_ops(ib_dev, &hns_roce_dev_mw_ops); } /* FRMR */ - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) { - ib_dev->alloc_mr = hns_roce_alloc_mr; - ib_dev->map_mr_sg = hns_roce_map_mr_sg; - } + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) + ib_set_device_ops(ib_dev, &hns_roce_dev_frmr_ops); - /* OTHERS */ - ib_dev->get_port_immutable = hns_roce_port_immutable; - ib_dev->disassociate_ucontext = hns_roce_disassociate_ucontext; + /* SRQ */ + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { + ib_dev->uverbs_cmd_mask |= + (1ULL << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ULL << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ULL << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ULL << IB_USER_VERBS_CMD_DESTROY_SRQ) | + (1ULL << IB_USER_VERBS_CMD_POST_SRQ_RECV); + ib_set_device_ops(ib_dev, &hns_roce_dev_srq_ops); + ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_srq_ops); + } ib_dev->driver_id = RDMA_DRIVER_HNS; + ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops); + ib_set_device_ops(ib_dev, &hns_roce_dev_ops); ret = ib_register_device(ib_dev, "hns_%d", NULL); if (ret) { dev_err(dev, "ib_register_device failed!\n"); @@ -646,8 +664,58 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) goto err_unmap_trrl; } + if (hr_dev->caps.srqc_entry_sz) { + ret = hns_roce_init_hem_table(hr_dev, &hr_dev->srq_table.table, + HEM_TYPE_SRQC, + hr_dev->caps.srqc_entry_sz, + hr_dev->caps.num_srqs, 1); + if (ret) { + dev_err(dev, + "Failed to init SRQ context memory, aborting.\n"); + goto err_unmap_cq; + } + } + + if (hr_dev->caps.num_srqwqe_segs) { + ret = hns_roce_init_hem_table(hr_dev, + &hr_dev->mr_table.mtt_srqwqe_table, + HEM_TYPE_SRQWQE, + hr_dev->caps.mtt_entry_sz, + hr_dev->caps.num_srqwqe_segs, 1); + if (ret) { + dev_err(dev, + "Failed to init MTT srqwqe memory, aborting.\n"); + goto err_unmap_srq; + } + } + + if (hr_dev->caps.num_idx_segs) { + ret = hns_roce_init_hem_table(hr_dev, + &hr_dev->mr_table.mtt_idx_table, + HEM_TYPE_IDX, + hr_dev->caps.idx_entry_sz, + hr_dev->caps.num_idx_segs, 1); + if (ret) { + dev_err(dev, + "Failed to init MTT idx memory, aborting.\n"); + goto err_unmap_srqwqe; + } + } + return 0; +err_unmap_srqwqe: + if (hr_dev->caps.num_srqwqe_segs) + hns_roce_cleanup_hem_table(hr_dev, + &hr_dev->mr_table.mtt_srqwqe_table); + +err_unmap_srq: + if (hr_dev->caps.srqc_entry_sz) + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table); + +err_unmap_cq: + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table); + err_unmap_trrl: if (hr_dev->caps.trrl_entry_sz) hns_roce_cleanup_hem_table(hr_dev, @@ -727,8 +795,21 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) goto err_cq_table_free; } + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { + ret = hns_roce_init_srq_table(hr_dev); + if (ret) { + dev_err(dev, + "Failed to init share receive queue table.\n"); + goto err_qp_table_free; + } + } + return 0; +err_qp_table_free: + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) + hns_roce_cleanup_qp_table(hr_dev); + err_cq_table_free: hns_roce_cleanup_cq_table(hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 521ad2aa3a4e..ee5991bd4171 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -184,12 +184,27 @@ static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order, struct hns_roce_buddy *buddy; int ret; - if (mtt_type == MTT_TYPE_WQE) { + switch (mtt_type) { + case MTT_TYPE_WQE: buddy = &mr_table->mtt_buddy; table = &mr_table->mtt_table; - } else { + break; + case MTT_TYPE_CQE: buddy = &mr_table->mtt_cqe_buddy; table = &mr_table->mtt_cqe_table; + break; + case MTT_TYPE_SRQWQE: + buddy = &mr_table->mtt_srqwqe_buddy; + table = &mr_table->mtt_srqwqe_table; + break; + case MTT_TYPE_IDX: + buddy = &mr_table->mtt_idx_buddy; + table = &mr_table->mtt_idx_table; + break; + default: + dev_err(hr_dev->dev, "Unsupport MTT table type: %d\n", + mtt_type); + return -EINVAL; } ret = hns_roce_buddy_alloc(buddy, order, seg); @@ -242,18 +257,40 @@ void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt) if (mtt->order < 0) return; - if (mtt->mtt_type == MTT_TYPE_WQE) { + switch (mtt->mtt_type) { + case MTT_TYPE_WQE: hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg, mtt->order); hns_roce_table_put_range(hr_dev, &mr_table->mtt_table, mtt->first_seg, mtt->first_seg + (1 << mtt->order) - 1); - } else { + break; + case MTT_TYPE_CQE: hns_roce_buddy_free(&mr_table->mtt_cqe_buddy, mtt->first_seg, mtt->order); hns_roce_table_put_range(hr_dev, &mr_table->mtt_cqe_table, mtt->first_seg, mtt->first_seg + (1 << mtt->order) - 1); + break; + case MTT_TYPE_SRQWQE: + hns_roce_buddy_free(&mr_table->mtt_srqwqe_buddy, mtt->first_seg, + mtt->order); + hns_roce_table_put_range(hr_dev, &mr_table->mtt_srqwqe_table, + mtt->first_seg, + mtt->first_seg + (1 << mtt->order) - 1); + break; + case MTT_TYPE_IDX: + hns_roce_buddy_free(&mr_table->mtt_idx_buddy, mtt->first_seg, + mtt->order); + hns_roce_table_put_range(hr_dev, &mr_table->mtt_idx_table, + mtt->first_seg, + mtt->first_seg + (1 << mtt->order) - 1); + break; + default: + dev_err(hr_dev->dev, + "Unsupport mtt type %d, clean mtt failed\n", + mtt->mtt_type); + break; } } EXPORT_SYMBOL_GPL(hns_roce_mtt_cleanup); @@ -713,10 +750,26 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev, u32 bt_page_size; u32 i; - if (mtt->mtt_type == MTT_TYPE_WQE) + switch (mtt->mtt_type) { + case MTT_TYPE_WQE: + table = &hr_dev->mr_table.mtt_table; bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT); - else + break; + case MTT_TYPE_CQE: + table = &hr_dev->mr_table.mtt_cqe_table; bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT); + break; + case MTT_TYPE_SRQWQE: + table = &hr_dev->mr_table.mtt_srqwqe_table; + bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT); + break; + case MTT_TYPE_IDX: + table = &hr_dev->mr_table.mtt_idx_table; + bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT); + break; + default: + return -EINVAL; + } /* All MTTs must fit in the same page */ if (start_index / (bt_page_size / sizeof(u64)) != @@ -726,11 +779,6 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev, if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1)) return -EINVAL; - if (mtt->mtt_type == MTT_TYPE_WQE) - table = &hr_dev->mr_table.mtt_table; - else - table = &hr_dev->mr_table.mtt_cqe_table; - mtts = hns_roce_table_find(hr_dev, table, mtt->first_seg + s / hr_dev->caps.mtt_entry_sz, &dma_handle); @@ -759,10 +807,25 @@ static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev, if (mtt->order < 0) return -EINVAL; - if (mtt->mtt_type == MTT_TYPE_WQE) + switch (mtt->mtt_type) { + case MTT_TYPE_WQE: bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT); - else + break; + case MTT_TYPE_CQE: bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT); + break; + case MTT_TYPE_SRQWQE: + bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT); + break; + case MTT_TYPE_IDX: + bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT); + break; + default: + dev_err(hr_dev->dev, + "Unsupport mtt type %d, write mtt failed\n", + mtt->mtt_type); + return -EINVAL; + } while (npages > 0) { chunk = min_t(int, bt_page_size / sizeof(u64), npages); @@ -828,8 +891,31 @@ int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev) if (ret) goto err_buddy_cqe; } + + if (hr_dev->caps.num_srqwqe_segs) { + ret = hns_roce_buddy_init(&mr_table->mtt_srqwqe_buddy, + ilog2(hr_dev->caps.num_srqwqe_segs)); + if (ret) + goto err_buddy_srqwqe; + } + + if (hr_dev->caps.num_idx_segs) { + ret = hns_roce_buddy_init(&mr_table->mtt_idx_buddy, + ilog2(hr_dev->caps.num_idx_segs)); + if (ret) + goto err_buddy_idx; + } + return 0; +err_buddy_idx: + if (hr_dev->caps.num_srqwqe_segs) + hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy); + +err_buddy_srqwqe: + if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) + hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy); + err_buddy_cqe: hns_roce_buddy_cleanup(&mr_table->mtt_buddy); @@ -842,6 +928,10 @@ void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev) { struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; + if (hr_dev->caps.num_idx_segs) + hns_roce_buddy_cleanup(&mr_table->mtt_idx_buddy); + if (hr_dev->caps.num_srqwqe_segs) + hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy); hns_roce_buddy_cleanup(&mr_table->mtt_buddy); if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy); @@ -897,8 +987,25 @@ int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, u32 bt_page_size; u32 n; - order = mtt->mtt_type == MTT_TYPE_WQE ? hr_dev->caps.mtt_ba_pg_sz : - hr_dev->caps.cqe_ba_pg_sz; + switch (mtt->mtt_type) { + case MTT_TYPE_WQE: + order = hr_dev->caps.mtt_ba_pg_sz; + break; + case MTT_TYPE_CQE: + order = hr_dev->caps.cqe_ba_pg_sz; + break; + case MTT_TYPE_SRQWQE: + order = hr_dev->caps.srqwqe_ba_pg_sz; + break; + case MTT_TYPE_IDX: + order = hr_dev->caps.idx_ba_pg_sz; + break; + default: + dev_err(dev, "Unsupport mtt type %d, write mtt failed\n", + mtt->mtt_type); + return -EINVAL; + } + bt_page_size = 1 << (order + PAGE_SHIFT); pages = (u64 *) __get_free_pages(GFP_KERNEL, order); @@ -1021,14 +1128,14 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err_umem; } } else { - int pbl_size = 1; + u64 pbl_size = 1; bt_size = (1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT)) / 8; for (i = 0; i < hr_dev->caps.pbl_hop_num; i++) pbl_size *= bt_size; if (n > pbl_size) { dev_err(dev, - " MR len %lld err. MR page num is limited to %d!\n", + " MR len %lld err. MR page num is limited to %lld!\n", length, pbl_size); ret = -EINVAL; goto err_umem; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 5ebf481a39d9..54031c5b53fa 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -280,7 +280,7 @@ void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn, EXPORT_SYMBOL_GPL(hns_roce_release_range_qp); static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, - struct ib_qp_cap *cap, int is_user, int has_srq, + struct ib_qp_cap *cap, bool is_user, int has_rq, struct hns_roce_qp *hr_qp) { struct device *dev = hr_dev->dev; @@ -294,14 +294,12 @@ static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, return -EINVAL; } - /* If srq exit, set zero for relative number of rq */ - if (has_srq) { - if (cap->max_recv_wr) { - dev_dbg(dev, "srq no need config max_recv_wr\n"); - return -EINVAL; - } - - hr_qp->rq.wqe_cnt = hr_qp->rq.max_gs = 0; + /* If srq exist, set zero for relative number of rq */ + if (!has_rq) { + hr_qp->rq.wqe_cnt = 0; + hr_qp->rq.max_gs = 0; + cap->max_recv_wr = 0; + cap->max_recv_sge = 0; } else { if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) { dev_err(dev, "user space no need config max_recv_wr max_recv_sge\n"); @@ -562,14 +560,15 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, else hr_qp->sq_signal_bits = cpu_to_le32(IB_SIGNAL_REQ_WR); - ret = hns_roce_set_rq_size(hr_dev, &init_attr->cap, !!ib_pd->uobject, - !!init_attr->srq, hr_qp); + ret = hns_roce_set_rq_size(hr_dev, &init_attr->cap, udata, + hns_roce_qp_has_rq(init_attr), hr_qp); if (ret) { dev_err(dev, "hns_roce_set_rq_size failed\n"); goto err_out; } - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) { + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && + hns_roce_qp_has_rq(init_attr)) { /* allocate recv inline buf */ hr_qp->rq_inl_buf.wqe_list = kcalloc(hr_qp->rq.wqe_cnt, sizeof(struct hns_roce_rinl_wqe), @@ -599,7 +598,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, init_attr->cap.max_recv_sge]; } - if (ib_pd->uobject) { + if (udata) { if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { dev_err(dev, "ib_copy_from_udata error for create qp\n"); ret = -EFAULT; @@ -784,7 +783,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, else hr_qp->doorbell_qpn = cpu_to_le64(hr_qp->qpn); - if (ib_pd->uobject && (udata->outlen >= sizeof(resp)) && + if (udata && (udata->outlen >= sizeof(resp)) && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) { /* indicate kernel supports rq record db */ @@ -811,7 +810,7 @@ err_qpn: hns_roce_release_range_qp(hr_dev, qpn, 1); err_wrid: - if (ib_pd->uobject) { + if (udata) { if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && (udata->outlen >= sizeof(resp)) && hns_roce_qp_has_rq(init_attr)) @@ -824,7 +823,7 @@ err_wrid: } err_sq_dbmap: - if (ib_pd->uobject) + if (udata) if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) && (udata->inlen >= sizeof(ucmd)) && (udata->outlen >= sizeof(resp)) && @@ -837,13 +836,13 @@ err_mtt: hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); err_buf: - if (ib_pd->uobject) + if (hr_qp->umem) ib_umem_release(hr_qp->umem); else hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); err_db: - if (!ib_pd->uobject && hns_roce_qp_has_rq(init_attr) && + if (!udata && hns_roce_qp_has_rq(init_attr) && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) hns_roce_free_db(hr_dev, &hr_qp->rdb); @@ -889,7 +888,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, } case IB_QPT_GSI: { /* Userspace is not allowed to create special QPs: */ - if (pd->uobject) { + if (udata) { dev_err(dev, "not support usr space GSI\n"); return ERR_PTR(-EINVAL); } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c new file mode 100644 index 000000000000..960b1946c365 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -0,0 +1,457 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2018 Hisilicon Limited. + */ + +#include <rdma/ib_umem.h> +#include <rdma/hns-abi.h> +#include "hns_roce_device.h" +#include "hns_roce_cmd.h" +#include "hns_roce_hem.h" + +void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type) +{ + struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; + struct hns_roce_srq *srq; + + xa_lock(&srq_table->xa); + srq = xa_load(&srq_table->xa, srqn & (hr_dev->caps.num_srqs - 1)); + if (srq) + atomic_inc(&srq->refcount); + xa_unlock(&srq_table->xa); + + if (!srq) { + dev_warn(hr_dev->dev, "Async event for bogus SRQ %08x\n", srqn); + return; + } + + srq->event(srq, event_type); + + if (atomic_dec_and_test(&srq->refcount)) + complete(&srq->free); +} +EXPORT_SYMBOL_GPL(hns_roce_srq_event); + +static void hns_roce_ib_srq_event(struct hns_roce_srq *srq, + enum hns_roce_event event_type) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device); + struct ib_srq *ibsrq = &srq->ibsrq; + struct ib_event event; + + if (ibsrq->event_handler) { + event.device = ibsrq->device; + event.element.srq = ibsrq; + switch (event_type) { + case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: + event.event = IB_EVENT_SRQ_LIMIT_REACHED; + break; + case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: + event.event = IB_EVENT_SRQ_ERR; + break; + default: + dev_err(hr_dev->dev, + "hns_roce:Unexpected event type 0x%x on SRQ %06lx\n", + event_type, srq->srqn); + return; + } + + ibsrq->event_handler(&event, ibsrq->srq_context); + } +} + +static int hns_roce_sw2hw_srq(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long srq_num) +{ + return hns_roce_cmd_mbox(dev, mailbox->dma, 0, srq_num, 0, + HNS_ROCE_CMD_SW2HW_SRQ, + HNS_ROCE_CMD_TIMEOUT_MSECS); +} + +static int hns_roce_hw2sw_srq(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long srq_num) +{ + return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, srq_num, + mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_SRQ, + HNS_ROCE_CMD_TIMEOUT_MSECS); +} + +int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, u16 xrcd, + struct hns_roce_mtt *hr_mtt, u64 db_rec_addr, + struct hns_roce_srq *srq) +{ + struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; + struct hns_roce_cmd_mailbox *mailbox; + dma_addr_t dma_handle_wqe; + dma_addr_t dma_handle_idx; + u64 *mtts_wqe; + u64 *mtts_idx; + int ret; + + /* Get the physical address of srq buf */ + mtts_wqe = hns_roce_table_find(hr_dev, + &hr_dev->mr_table.mtt_srqwqe_table, + srq->mtt.first_seg, + &dma_handle_wqe); + if (!mtts_wqe) { + dev_err(hr_dev->dev, + "SRQ alloc.Failed to find srq buf addr.\n"); + return -EINVAL; + } + + /* Get physical address of idx que buf */ + mtts_idx = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_idx_table, + srq->idx_que.mtt.first_seg, + &dma_handle_idx); + if (!mtts_idx) { + dev_err(hr_dev->dev, + "SRQ alloc.Failed to find idx que buf addr.\n"); + return -EINVAL; + } + + ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn); + if (ret == -1) { + dev_err(hr_dev->dev, "SRQ alloc.Failed to alloc index.\n"); + return -ENOMEM; + } + + ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn); + if (ret) + goto err_out; + + ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL)); + if (ret) + goto err_put; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) { + ret = PTR_ERR(mailbox); + goto err_xa; + } + + hr_dev->hw->write_srqc(hr_dev, srq, pdn, xrcd, cqn, mailbox->buf, + mtts_wqe, mtts_idx, dma_handle_wqe, + dma_handle_idx); + + ret = hns_roce_sw2hw_srq(hr_dev, mailbox, srq->srqn); + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + if (ret) + goto err_xa; + + atomic_set(&srq->refcount, 1); + init_completion(&srq->free); + return ret; + +err_xa: + xa_erase(&srq_table->xa, srq->srqn); + +err_put: + hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn); + +err_out: + hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR); + return ret; +} + +void hns_roce_srq_free(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) +{ + struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; + int ret; + + ret = hns_roce_hw2sw_srq(hr_dev, NULL, srq->srqn); + if (ret) + dev_err(hr_dev->dev, "HW2SW_SRQ failed (%d) for CQN %06lx\n", + ret, srq->srqn); + + xa_erase(&srq_table->xa, srq->srqn); + + if (atomic_dec_and_test(&srq->refcount)) + complete(&srq->free); + wait_for_completion(&srq->free); + + hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn); + hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR); +} + +static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq, + u32 page_shift) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); + struct hns_roce_idx_que *idx_que = &srq->idx_que; + u32 bitmap_num; + int i; + + bitmap_num = HNS_ROCE_ALOGN_UP(srq->max, 8 * sizeof(u64)); + + idx_que->bitmap = kcalloc(1, bitmap_num / 8, GFP_KERNEL); + if (!idx_que->bitmap) + return -ENOMEM; + + bitmap_num = bitmap_num / (8 * sizeof(u64)); + + idx_que->buf_size = srq->idx_que.buf_size; + + if (hns_roce_buf_alloc(hr_dev, idx_que->buf_size, (1 << page_shift) * 2, + &idx_que->idx_buf, page_shift)) { + kfree(idx_que->bitmap); + return -ENOMEM; + } + + for (i = 0; i < bitmap_num; i++) + idx_que->bitmap[i] = ~(0UL); + + return 0; +} + +struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); + struct hns_roce_srq *srq; + int srq_desc_size; + int srq_buf_size; + u32 page_shift; + int ret = 0; + u32 npages; + u32 cqn; + + /* Check the actual SRQ wqe and SRQ sge num */ + if (srq_init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || + srq_init_attr->attr.max_sge > hr_dev->caps.max_srq_sges) + return ERR_PTR(-EINVAL); + + srq = kzalloc(sizeof(*srq), GFP_KERNEL); + if (!srq) + return ERR_PTR(-ENOMEM); + + mutex_init(&srq->mutex); + spin_lock_init(&srq->lock); + + srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1); + srq->max_gs = srq_init_attr->attr.max_sge; + + srq_desc_size = max(16, 16 * srq->max_gs); + + srq->wqe_shift = ilog2(srq_desc_size); + + srq_buf_size = srq->max * srq_desc_size; + + srq->idx_que.entry_sz = HNS_ROCE_IDX_QUE_ENTRY_SZ; + srq->idx_que.buf_size = srq->max * srq->idx_que.entry_sz; + srq->mtt.mtt_type = MTT_TYPE_SRQWQE; + srq->idx_que.mtt.mtt_type = MTT_TYPE_IDX; + + if (udata) { + struct hns_roce_ib_create_srq ucmd; + + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { + ret = -EFAULT; + goto err_srq; + } + + srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, + srq_buf_size, 0, 0); + if (IS_ERR(srq->umem)) { + ret = PTR_ERR(srq->umem); + goto err_srq; + } + + if (hr_dev->caps.srqwqe_buf_pg_sz) { + npages = (ib_umem_page_count(srq->umem) + + (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) / + (1 << hr_dev->caps.srqwqe_buf_pg_sz); + page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; + ret = hns_roce_mtt_init(hr_dev, npages, + page_shift, + &srq->mtt); + } else + ret = hns_roce_mtt_init(hr_dev, + ib_umem_page_count(srq->umem), + srq->umem->page_shift, + &srq->mtt); + if (ret) + goto err_buf; + + ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->mtt, srq->umem); + if (ret) + goto err_srq_mtt; + + /* config index queue BA */ + srq->idx_que.umem = ib_umem_get(pd->uobject->context, + ucmd.que_addr, + srq->idx_que.buf_size, 0, 0); + if (IS_ERR(srq->idx_que.umem)) { + dev_err(hr_dev->dev, + "ib_umem_get error for index queue\n"); + ret = PTR_ERR(srq->idx_que.umem); + goto err_srq_mtt; + } + + if (hr_dev->caps.idx_buf_pg_sz) { + npages = (ib_umem_page_count(srq->idx_que.umem) + + (1 << hr_dev->caps.idx_buf_pg_sz) - 1) / + (1 << hr_dev->caps.idx_buf_pg_sz); + page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; + ret = hns_roce_mtt_init(hr_dev, npages, + page_shift, &srq->idx_que.mtt); + } else { + ret = hns_roce_mtt_init(hr_dev, + ib_umem_page_count(srq->idx_que.umem), + srq->idx_que.umem->page_shift, + &srq->idx_que.mtt); + } + + if (ret) { + dev_err(hr_dev->dev, + "hns_roce_mtt_init error for idx que\n"); + goto err_idx_mtt; + } + + ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->idx_que.mtt, + srq->idx_que.umem); + if (ret) { + dev_err(hr_dev->dev, + "hns_roce_ib_umem_write_mtt error for idx que\n"); + goto err_idx_buf; + } + } else { + page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; + if (hns_roce_buf_alloc(hr_dev, srq_buf_size, + (1 << page_shift) * 2, + &srq->buf, page_shift)) { + ret = -ENOMEM; + goto err_srq; + } + + srq->head = 0; + srq->tail = srq->max - 1; + + ret = hns_roce_mtt_init(hr_dev, srq->buf.npages, + srq->buf.page_shift, &srq->mtt); + if (ret) + goto err_buf; + + ret = hns_roce_buf_write_mtt(hr_dev, &srq->mtt, &srq->buf); + if (ret) + goto err_srq_mtt; + + page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; + ret = hns_roce_create_idx_que(pd, srq, page_shift); + if (ret) { + dev_err(hr_dev->dev, "Create idx queue fail(%d)!\n", + ret); + goto err_srq_mtt; + } + + /* Init mtt table for idx_que */ + ret = hns_roce_mtt_init(hr_dev, srq->idx_que.idx_buf.npages, + srq->idx_que.idx_buf.page_shift, + &srq->idx_que.mtt); + if (ret) + goto err_create_idx; + + /* Write buffer address into the mtt table */ + ret = hns_roce_buf_write_mtt(hr_dev, &srq->idx_que.mtt, + &srq->idx_que.idx_buf); + if (ret) + goto err_idx_buf; + + srq->wrid = kvmalloc_array(srq->max, sizeof(u64), GFP_KERNEL); + if (!srq->wrid) { + ret = -ENOMEM; + goto err_idx_buf; + } + } + + cqn = ib_srq_has_cq(srq_init_attr->srq_type) ? + to_hr_cq(srq_init_attr->ext.cq)->cqn : 0; + + srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG; + + ret = hns_roce_srq_alloc(hr_dev, to_hr_pd(pd)->pdn, cqn, 0, + &srq->mtt, 0, srq); + if (ret) + goto err_wrid; + + srq->event = hns_roce_ib_srq_event; + srq->ibsrq.ext.xrc.srq_num = srq->srqn; + + if (udata) { + if (ib_copy_to_udata(udata, &srq->srqn, sizeof(__u32))) { + ret = -EFAULT; + goto err_wrid; + } + } + + return &srq->ibsrq; + +err_wrid: + kvfree(srq->wrid); + +err_idx_buf: + hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); + +err_idx_mtt: + if (udata) + ib_umem_release(srq->idx_que.umem); + +err_create_idx: + hns_roce_buf_free(hr_dev, srq->idx_que.buf_size, + &srq->idx_que.idx_buf); + kfree(srq->idx_que.bitmap); + +err_srq_mtt: + hns_roce_mtt_cleanup(hr_dev, &srq->mtt); + +err_buf: + if (udata) + ib_umem_release(srq->umem); + else + hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf); + +err_srq: + kfree(srq); + return ERR_PTR(ret); +} + +int hns_roce_destroy_srq(struct ib_srq *ibsrq) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); + struct hns_roce_srq *srq = to_hr_srq(ibsrq); + + hns_roce_srq_free(hr_dev, srq); + hns_roce_mtt_cleanup(hr_dev, &srq->mtt); + + if (ibsrq->uobject) { + hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); + ib_umem_release(srq->idx_que.umem); + ib_umem_release(srq->umem); + } else { + kvfree(srq->wrid); + hns_roce_buf_free(hr_dev, srq->max << srq->wqe_shift, + &srq->buf); + } + + kfree(srq); + + return 0; +} + +int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; + + xa_init(&srq_table->xa); + + return hns_roce_bitmap_init(&srq_table->bitmap, hr_dev->caps.num_srqs, + hr_dev->caps.num_srqs - 1, + hr_dev->caps.reserved_srqs, 0); +} + +void hns_roce_cleanup_srq_table(struct hns_roce_dev *hr_dev) +{ + hns_roce_bitmap_cleanup(&hr_dev->srq_table.bitmap); +} diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 4b3999d88c9e..206cfb0016f8 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -3478,7 +3478,7 @@ static void i40iw_qp_disconnect(struct i40iw_qp *iwqp) /* Need to free the Last Streaming Mode Message */ if (iwqp->ietf_mem.va) { if (iwqp->lsmm_mr) - iwibdev->ibdev.dereg_mr(iwqp->lsmm_mr); + iwibdev->ibdev.ops.dereg_mr(iwqp->lsmm_mr); i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->ietf_mem); } } diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 102875872bea..0b675b0742c2 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -673,28 +673,26 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, goto error; } iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx; - if (ibpd->uobject && ibpd->uobject->context) { - iwqp->user_mode = 1; - ucontext = to_ucontext(ibpd->uobject->context); - - if (req.user_wqe_buffers) { - struct i40iw_pbl *iwpbl; - - spin_lock_irqsave( - &ucontext->qp_reg_mem_list_lock, flags); - iwpbl = i40iw_get_pbl( - (unsigned long)req.user_wqe_buffers, - &ucontext->qp_reg_mem_list); - spin_unlock_irqrestore( - &ucontext->qp_reg_mem_list_lock, flags); - - if (!iwpbl) { - err_code = -ENODATA; - i40iw_pr_err("no pbl info\n"); - goto error; - } - memcpy(&iwqp->iwpbl, iwpbl, sizeof(iwqp->iwpbl)); + iwqp->user_mode = 1; + ucontext = to_ucontext(ibpd->uobject->context); + + if (req.user_wqe_buffers) { + struct i40iw_pbl *iwpbl; + + spin_lock_irqsave( + &ucontext->qp_reg_mem_list_lock, flags); + iwpbl = i40iw_get_pbl( + (unsigned long)req.user_wqe_buffers, + &ucontext->qp_reg_mem_list); + spin_unlock_irqrestore( + &ucontext->qp_reg_mem_list_lock, flags); + + if (!iwpbl) { + err_code = -ENODATA; + i40iw_pr_err("no pbl info\n"); + goto error; } + memcpy(&iwqp->iwpbl, iwpbl, sizeof(iwqp->iwpbl)); } err_code = i40iw_setup_virt_qp(iwdev, iwqp, &init_info); } else { @@ -768,7 +766,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, iwdev->qp_table[qp_num] = iwqp; i40iw_add_pdusecount(iwqp->iwpd); i40iw_add_devusecount(iwdev); - if (ibpd->uobject && udata) { + if (udata) { memset(&uresp, 0, sizeof(uresp)); uresp.actual_sq_size = sq_size; uresp.actual_rq_size = rq_size; @@ -2092,7 +2090,8 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr) ib_umem_release(iwmr->region); if (iwmr->type != IW_MEMREG_TYPE_MEM) { - if (ibpd->uobject) { + /* region is released. only test for userness. */ + if (iwmr->region) { struct i40iw_ucontext *ucontext; ucontext = to_ucontext(ibpd->uobject->context); @@ -2721,24 +2720,38 @@ static int i40iw_query_pkey(struct ib_device *ibdev, return 0; } -/** - * i40iw_get_vector_affinity - report IRQ affinity mask - * @ibdev: IB device - * @comp_vector: completion vector index - */ -static const struct cpumask *i40iw_get_vector_affinity(struct ib_device *ibdev, - int comp_vector) -{ - struct i40iw_device *iwdev = to_iwdev(ibdev); - struct i40iw_msix_vector *msix_vec; - - if (iwdev->msix_shared) - msix_vec = &iwdev->iw_msixtbl[comp_vector]; - else - msix_vec = &iwdev->iw_msixtbl[comp_vector + 1]; - - return irq_get_affinity_mask(msix_vec->irq); -} +static const struct ib_device_ops i40iw_dev_ops = { + .alloc_hw_stats = i40iw_alloc_hw_stats, + .alloc_mr = i40iw_alloc_mr, + .alloc_pd = i40iw_alloc_pd, + .alloc_ucontext = i40iw_alloc_ucontext, + .create_cq = i40iw_create_cq, + .create_qp = i40iw_create_qp, + .dealloc_pd = i40iw_dealloc_pd, + .dealloc_ucontext = i40iw_dealloc_ucontext, + .dereg_mr = i40iw_dereg_mr, + .destroy_cq = i40iw_destroy_cq, + .destroy_qp = i40iw_destroy_qp, + .drain_rq = i40iw_drain_rq, + .drain_sq = i40iw_drain_sq, + .get_dev_fw_str = i40iw_get_dev_fw_str, + .get_dma_mr = i40iw_get_dma_mr, + .get_hw_stats = i40iw_get_hw_stats, + .get_port_immutable = i40iw_port_immutable, + .map_mr_sg = i40iw_map_mr_sg, + .mmap = i40iw_mmap, + .modify_qp = i40iw_modify_qp, + .poll_cq = i40iw_poll_cq, + .post_recv = i40iw_post_recv, + .post_send = i40iw_post_send, + .query_device = i40iw_query_device, + .query_gid = i40iw_query_gid, + .query_pkey = i40iw_query_pkey, + .query_port = i40iw_query_port, + .query_qp = i40iw_query_qp, + .reg_user_mr = i40iw_reg_user_mr, + .req_notify_cq = i40iw_req_notify_cq, +}; /** * i40iw_init_rdma_device - initialization of iwarp device @@ -2786,30 +2799,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.phys_port_cnt = 1; iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count; iwibdev->ibdev.dev.parent = &pcidev->dev; - iwibdev->ibdev.query_port = i40iw_query_port; - iwibdev->ibdev.query_pkey = i40iw_query_pkey; - iwibdev->ibdev.query_gid = i40iw_query_gid; - iwibdev->ibdev.alloc_ucontext = i40iw_alloc_ucontext; - iwibdev->ibdev.dealloc_ucontext = i40iw_dealloc_ucontext; - iwibdev->ibdev.mmap = i40iw_mmap; - iwibdev->ibdev.alloc_pd = i40iw_alloc_pd; - iwibdev->ibdev.dealloc_pd = i40iw_dealloc_pd; - iwibdev->ibdev.create_qp = i40iw_create_qp; - iwibdev->ibdev.modify_qp = i40iw_modify_qp; - iwibdev->ibdev.query_qp = i40iw_query_qp; - iwibdev->ibdev.destroy_qp = i40iw_destroy_qp; - iwibdev->ibdev.create_cq = i40iw_create_cq; - iwibdev->ibdev.destroy_cq = i40iw_destroy_cq; - iwibdev->ibdev.get_dma_mr = i40iw_get_dma_mr; - iwibdev->ibdev.reg_user_mr = i40iw_reg_user_mr; - iwibdev->ibdev.dereg_mr = i40iw_dereg_mr; - iwibdev->ibdev.alloc_hw_stats = i40iw_alloc_hw_stats; - iwibdev->ibdev.get_hw_stats = i40iw_get_hw_stats; - iwibdev->ibdev.query_device = i40iw_query_device; - iwibdev->ibdev.drain_sq = i40iw_drain_sq; - iwibdev->ibdev.drain_rq = i40iw_drain_rq; - iwibdev->ibdev.alloc_mr = i40iw_alloc_mr; - iwibdev->ibdev.map_mr_sg = i40iw_map_mr_sg; iwibdev->ibdev.iwcm = kzalloc(sizeof(*iwibdev->ibdev.iwcm), GFP_KERNEL); if (!iwibdev->ibdev.iwcm) { ib_dealloc_device(&iwibdev->ibdev); @@ -2826,13 +2815,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.iwcm->destroy_listen = i40iw_destroy_listen; memcpy(iwibdev->ibdev.iwcm->ifname, netdev->name, sizeof(iwibdev->ibdev.iwcm->ifname)); - iwibdev->ibdev.get_port_immutable = i40iw_port_immutable; - iwibdev->ibdev.get_dev_fw_str = i40iw_get_dev_fw_str; - iwibdev->ibdev.poll_cq = i40iw_poll_cq; - iwibdev->ibdev.req_notify_cq = i40iw_req_notify_cq; - iwibdev->ibdev.post_send = i40iw_post_send; - iwibdev->ibdev.post_recv = i40iw_post_recv; - iwibdev->ibdev.get_vector_affinity = i40iw_get_vector_affinity; + ib_set_device_ops(&iwibdev->ibdev, &i40iw_dev_ops); return iwibdev; } diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index e9e3a6f390db..1672808262ba 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -144,7 +144,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, } struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata) + u32 flags, struct ib_udata *udata) { struct mlx4_ib_ah *ah; @@ -189,7 +189,7 @@ struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd, slave_attr.grh.sgid_attr = NULL; slave_attr.grh.sgid_index = slave_sgid_index; - ah = mlx4_ib_create_ah(pd, &slave_attr, NULL); + ah = mlx4_ib_create_ah(pd, &slave_attr, 0, NULL); if (IS_ERR(ah)) return ah; @@ -250,7 +250,7 @@ int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } -int mlx4_ib_destroy_ah(struct ib_ah *ah) +int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags) { kfree(to_mah(ah)); return 0; diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 155b4dfc0ae8..782499abcd98 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -849,7 +849,7 @@ int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev) spin_lock_init(&dev->sriov.alias_guid.ag_work_lock); for (i = 1; i <= dev->num_ports; ++i) { - if (dev->ib_dev.query_gid(&dev->ib_dev , i, 0, &gid)) { + if (dev->ib_dev.ops.query_gid(&dev->ib_dev, i, 0, &gid)) { ret = -EFAULT; goto err_unregister; } diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 8942f5f7f04d..25439da8976c 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -202,13 +202,13 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl) rdma_ah_set_port_num(&ah_attr, port_num); new_ah = rdma_create_ah(dev->send_agent[port_num - 1][0]->qp->pd, - &ah_attr); + &ah_attr, 0); if (IS_ERR(new_ah)) return; spin_lock_irqsave(&dev->sm_lock, flags); if (dev->sm_ah[port_num - 1]) - rdma_destroy_ah(dev->sm_ah[port_num - 1]); + rdma_destroy_ah(dev->sm_ah[port_num - 1], 0); dev->sm_ah[port_num - 1] = new_ah; spin_unlock_irqrestore(&dev->sm_lock, flags); } @@ -567,7 +567,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, return -EINVAL; rdma_ah_set_grh(&attr, &dgid, 0, 0, 0, 0); } - ah = rdma_create_ah(tun_ctx->pd, &attr); + ah = rdma_create_ah(tun_ctx->pd, &attr, 0); if (IS_ERR(ah)) return -ENOMEM; @@ -584,7 +584,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr); if (tun_qp->tx_ring[tun_tx_ix].ah) - rdma_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah); + rdma_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah, 0); tun_qp->tx_ring[tun_tx_ix].ah = ah; ib_dma_sync_single_for_cpu(&dev->ib_dev, tun_qp->tx_ring[tun_tx_ix].buf.map, @@ -657,7 +657,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, spin_unlock(&tun_qp->tx_lock); tun_qp->tx_ring[tun_tx_ix].ah = NULL; end: - rdma_destroy_ah(ah); + rdma_destroy_ah(ah, 0); return ret; } @@ -1024,7 +1024,7 @@ static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { if (mad_send_wc->send_buf->context[0]) - rdma_destroy_ah(mad_send_wc->send_buf->context[0]); + rdma_destroy_ah(mad_send_wc->send_buf->context[0], 0); ib_free_send_mad(mad_send_wc->send_buf); } @@ -1079,7 +1079,7 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev) } if (dev->sm_ah[p]) - rdma_destroy_ah(dev->sm_ah[p]); + rdma_destroy_ah(dev->sm_ah[p], 0); } } @@ -1411,7 +1411,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr); if (sqp->tx_ring[wire_tx_ix].ah) - rdma_destroy_ah(sqp->tx_ring[wire_tx_ix].ah); + rdma_destroy_ah(sqp->tx_ring[wire_tx_ix].ah, 0); sqp->tx_ring[wire_tx_ix].ah = ah; ib_dma_sync_single_for_cpu(&dev->ib_dev, sqp->tx_ring[wire_tx_ix].buf.map, @@ -1450,7 +1450,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, spin_unlock(&sqp->tx_lock); sqp->tx_ring[wire_tx_ix].ah = NULL; out: - mlx4_ib_destroy_ah(ah); + mlx4_ib_destroy_ah(ah, 0); return ret; } @@ -1716,7 +1716,7 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx, tx_buf_size, DMA_TO_DEVICE); kfree(tun_qp->tx_ring[i].buf.addr); if (tun_qp->tx_ring[i].ah) - rdma_destroy_ah(tun_qp->tx_ring[i].ah); + rdma_destroy_ah(tun_qp->tx_ring[i].ah, 0); } kfree(tun_qp->tx_ring); kfree(tun_qp->ring); @@ -1749,7 +1749,7 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work) "wrid=0x%llx, status=0x%x\n", wc.wr_id, wc.status); rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); + (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&tun_qp->tx_lock); @@ -1766,7 +1766,7 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work) ctx->slave, wc.status, wc.wr_id); if (!MLX4_TUN_IS_RECV(wc.wr_id)) { rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); + (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&tun_qp->tx_lock); @@ -1903,7 +1903,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) switch (wc.opcode) { case IB_WC_SEND: rdma_destroy_ah(sqp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); + (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&sqp->tx_lock); @@ -1932,7 +1932,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) ctx->slave, wc.status, wc.wr_id); if (!MLX4_TUN_IS_RECV(wc.wr_id)) { rdma_destroy_ah(sqp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); + (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&sqp->tx_lock); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 0def2323459c..1f15ec3e2b83 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2220,6 +2220,11 @@ static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev, } } +static const struct ib_device_ops mlx4_ib_hw_stats_ops = { + .alloc_hw_stats = mlx4_ib_alloc_hw_stats, + .get_hw_stats = mlx4_ib_get_hw_stats, +}; + static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev) { struct mlx4_ib_diag_counters *diag = ibdev->diag_counters; @@ -2246,8 +2251,7 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev) diag[i].offset, i); } - ibdev->ib_dev.get_hw_stats = mlx4_ib_get_hw_stats; - ibdev->ib_dev.alloc_hw_stats = mlx4_ib_alloc_hw_stats; + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_hw_stats_ops); return 0; @@ -2352,6 +2356,32 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev, event == NETDEV_UP || event == NETDEV_CHANGE)) update_qps_port = port; + if (dev == iboe->netdevs[port - 1] && + (event == NETDEV_UP || event == NETDEV_DOWN)) { + enum ib_port_state port_state; + struct ib_event ibev = { }; + + if (ib_get_cached_port_state(&ibdev->ib_dev, port, + &port_state)) + continue; + + if (event == NETDEV_UP && + (port_state != IB_PORT_ACTIVE || + iboe->last_port_state[port - 1] != IB_PORT_DOWN)) + continue; + if (event == NETDEV_DOWN && + (port_state != IB_PORT_DOWN || + iboe->last_port_state[port - 1] != IB_PORT_ACTIVE)) + continue; + iboe->last_port_state[port - 1] = port_state; + + ibev.device = &ibdev->ib_dev; + ibev.element.port_num = port; + ibev.event = event == NETDEV_UP ? IB_EVENT_PORT_ACTIVE : + IB_EVENT_PORT_ERR; + ib_dispatch_event(&ibev); + } + } spin_unlock_bh(&iboe->lock); @@ -2499,6 +2529,88 @@ static void get_fw_ver_str(struct ib_device *device, char *str) (int) dev->dev->caps.fw_ver & 0xffff); } +static const struct ib_device_ops mlx4_ib_dev_ops = { + .add_gid = mlx4_ib_add_gid, + .alloc_mr = mlx4_ib_alloc_mr, + .alloc_pd = mlx4_ib_alloc_pd, + .alloc_ucontext = mlx4_ib_alloc_ucontext, + .attach_mcast = mlx4_ib_mcg_attach, + .create_ah = mlx4_ib_create_ah, + .create_cq = mlx4_ib_create_cq, + .create_qp = mlx4_ib_create_qp, + .create_srq = mlx4_ib_create_srq, + .dealloc_pd = mlx4_ib_dealloc_pd, + .dealloc_ucontext = mlx4_ib_dealloc_ucontext, + .del_gid = mlx4_ib_del_gid, + .dereg_mr = mlx4_ib_dereg_mr, + .destroy_ah = mlx4_ib_destroy_ah, + .destroy_cq = mlx4_ib_destroy_cq, + .destroy_qp = mlx4_ib_destroy_qp, + .destroy_srq = mlx4_ib_destroy_srq, + .detach_mcast = mlx4_ib_mcg_detach, + .disassociate_ucontext = mlx4_ib_disassociate_ucontext, + .drain_rq = mlx4_ib_drain_rq, + .drain_sq = mlx4_ib_drain_sq, + .get_dev_fw_str = get_fw_ver_str, + .get_dma_mr = mlx4_ib_get_dma_mr, + .get_link_layer = mlx4_ib_port_link_layer, + .get_netdev = mlx4_ib_get_netdev, + .get_port_immutable = mlx4_port_immutable, + .map_mr_sg = mlx4_ib_map_mr_sg, + .mmap = mlx4_ib_mmap, + .modify_cq = mlx4_ib_modify_cq, + .modify_device = mlx4_ib_modify_device, + .modify_port = mlx4_ib_modify_port, + .modify_qp = mlx4_ib_modify_qp, + .modify_srq = mlx4_ib_modify_srq, + .poll_cq = mlx4_ib_poll_cq, + .post_recv = mlx4_ib_post_recv, + .post_send = mlx4_ib_post_send, + .post_srq_recv = mlx4_ib_post_srq_recv, + .process_mad = mlx4_ib_process_mad, + .query_ah = mlx4_ib_query_ah, + .query_device = mlx4_ib_query_device, + .query_gid = mlx4_ib_query_gid, + .query_pkey = mlx4_ib_query_pkey, + .query_port = mlx4_ib_query_port, + .query_qp = mlx4_ib_query_qp, + .query_srq = mlx4_ib_query_srq, + .reg_user_mr = mlx4_ib_reg_user_mr, + .req_notify_cq = mlx4_ib_arm_cq, + .rereg_user_mr = mlx4_ib_rereg_user_mr, + .resize_cq = mlx4_ib_resize_cq, +}; + +static const struct ib_device_ops mlx4_ib_dev_wq_ops = { + .create_rwq_ind_table = mlx4_ib_create_rwq_ind_table, + .create_wq = mlx4_ib_create_wq, + .destroy_rwq_ind_table = mlx4_ib_destroy_rwq_ind_table, + .destroy_wq = mlx4_ib_destroy_wq, + .modify_wq = mlx4_ib_modify_wq, +}; + +static const struct ib_device_ops mlx4_ib_dev_fmr_ops = { + .alloc_fmr = mlx4_ib_fmr_alloc, + .dealloc_fmr = mlx4_ib_fmr_dealloc, + .map_phys_fmr = mlx4_ib_map_phys_fmr, + .unmap_fmr = mlx4_ib_unmap_fmr, +}; + +static const struct ib_device_ops mlx4_ib_dev_mw_ops = { + .alloc_mw = mlx4_ib_alloc_mw, + .dealloc_mw = mlx4_ib_dealloc_mw, +}; + +static const struct ib_device_ops mlx4_ib_dev_xrc_ops = { + .alloc_xrcd = mlx4_ib_alloc_xrcd, + .dealloc_xrcd = mlx4_ib_dealloc_xrcd, +}; + +static const struct ib_device_ops mlx4_ib_dev_fs_ops = { + .create_flow = mlx4_ib_create_flow, + .destroy_flow = mlx4_ib_destroy_flow, +}; + static void *mlx4_ib_add(struct mlx4_dev *dev) { struct mlx4_ib_dev *ibdev; @@ -2554,9 +2666,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) 1 : ibdev->num_ports; ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; ibdev->ib_dev.dev.parent = &dev->persist->pdev->dev; - ibdev->ib_dev.get_netdev = mlx4_ib_get_netdev; - ibdev->ib_dev.add_gid = mlx4_ib_add_gid; - ibdev->ib_dev.del_gid = mlx4_ib_del_gid; if (dev->caps.userspace_caps) ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; @@ -2589,116 +2698,53 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | (1ull << IB_USER_VERBS_CMD_OPEN_QP); - ibdev->ib_dev.query_device = mlx4_ib_query_device; - ibdev->ib_dev.query_port = mlx4_ib_query_port; - ibdev->ib_dev.get_link_layer = mlx4_ib_port_link_layer; - ibdev->ib_dev.query_gid = mlx4_ib_query_gid; - ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey; - ibdev->ib_dev.modify_device = mlx4_ib_modify_device; - ibdev->ib_dev.modify_port = mlx4_ib_modify_port; - ibdev->ib_dev.alloc_ucontext = mlx4_ib_alloc_ucontext; - ibdev->ib_dev.dealloc_ucontext = mlx4_ib_dealloc_ucontext; - ibdev->ib_dev.mmap = mlx4_ib_mmap; - ibdev->ib_dev.alloc_pd = mlx4_ib_alloc_pd; - ibdev->ib_dev.dealloc_pd = mlx4_ib_dealloc_pd; - ibdev->ib_dev.create_ah = mlx4_ib_create_ah; - ibdev->ib_dev.query_ah = mlx4_ib_query_ah; - ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah; - ibdev->ib_dev.create_srq = mlx4_ib_create_srq; - ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq; - ibdev->ib_dev.query_srq = mlx4_ib_query_srq; - ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq; - ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv; - ibdev->ib_dev.create_qp = mlx4_ib_create_qp; - ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp; - ibdev->ib_dev.query_qp = mlx4_ib_query_qp; - ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp; - ibdev->ib_dev.drain_sq = mlx4_ib_drain_sq; - ibdev->ib_dev.drain_rq = mlx4_ib_drain_rq; - ibdev->ib_dev.post_send = mlx4_ib_post_send; - ibdev->ib_dev.post_recv = mlx4_ib_post_recv; - ibdev->ib_dev.create_cq = mlx4_ib_create_cq; - ibdev->ib_dev.modify_cq = mlx4_ib_modify_cq; - ibdev->ib_dev.resize_cq = mlx4_ib_resize_cq; - ibdev->ib_dev.destroy_cq = mlx4_ib_destroy_cq; - ibdev->ib_dev.poll_cq = mlx4_ib_poll_cq; - ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq; - ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr; - ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr; - ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr; - ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr; - ibdev->ib_dev.alloc_mr = mlx4_ib_alloc_mr; - ibdev->ib_dev.map_mr_sg = mlx4_ib_map_mr_sg; - ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach; - ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; - ibdev->ib_dev.process_mad = mlx4_ib_process_mad; - ibdev->ib_dev.get_port_immutable = mlx4_port_immutable; - ibdev->ib_dev.get_dev_fw_str = get_fw_ver_str; - ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext; - + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_ops); ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ); + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) | + (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) && ((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) || (mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) == IB_LINK_LAYER_ETHERNET))) { - ibdev->ib_dev.create_wq = mlx4_ib_create_wq; - ibdev->ib_dev.modify_wq = mlx4_ib_modify_wq; - ibdev->ib_dev.destroy_wq = mlx4_ib_destroy_wq; - ibdev->ib_dev.create_rwq_ind_table = - mlx4_ib_create_rwq_ind_table; - ibdev->ib_dev.destroy_rwq_ind_table = - mlx4_ib_destroy_rwq_ind_table; ibdev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_wq_ops); } - if (!mlx4_is_slave(ibdev->dev)) { - ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc; - ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr; - ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; - ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; - } + if (!mlx4_is_slave(ibdev->dev)) + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fmr_ops); if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) { - ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw; - ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw; - ibdev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_mw_ops); } if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) { - ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd; - ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd; ibdev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_xrc_ops); } if (check_flow_steering_support(dev)) { ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED; - ibdev->ib_dev.create_flow = mlx4_ib_create_flow; - ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow; - ibdev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fs_ops); } - ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); - mlx4_ib_alloc_eqs(dev, ibdev); spin_lock_init(&iboe->lock); @@ -2710,6 +2756,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) for (i = 0; i < ibdev->num_ports; ++i) { mutex_init(&ibdev->counters_table[i].mutex); INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list); + iboe->last_port_state[i] = IB_PORT_DOWN; } num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 8850dfc3826d..e491f3eda6e7 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -519,6 +519,7 @@ struct mlx4_ib_iboe { atomic64_t mac[MLX4_MAX_PORTS]; struct notifier_block nb; struct mlx4_port_gid_table gids[MLX4_MAX_PORTS]; + enum ib_port_state last_port_state[MLX4_MAX_PORTS]; }; struct pkey_mgt { @@ -753,13 +754,13 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata); + u32 flags, struct ib_udata *udata); struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, int slave_sgid_index, u8 *s_mac, u16 vlan_tag); int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -int mlx4_ib_destroy_ah(struct ib_ah *ah); +int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags); struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *init_attr, diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 0711ca1dfb8f..971e9a9ebdaf 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -323,7 +323,7 @@ static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags) } static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, - int is_user, int has_rq, struct mlx4_ib_qp *qp, + bool is_user, int has_rq, struct mlx4_ib_qp *qp, u32 inl_recv_sz) { /* Sanity check RQ size before proceeding */ @@ -401,7 +401,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, * We need to leave 2 KB + 1 WR of headroom in the SQ to * allow HW to prefetch. */ - qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1; + qp->sq_spare_wqes = MLX4_IB_SQ_HEADROOM(qp->sq.wqe_shift); qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr + qp->sq_spare_wqes); @@ -942,7 +942,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); - if (pd->uobject) { + if (udata) { union { struct mlx4_ib_create_qp qp; struct mlx4_ib_create_wq wq; @@ -991,7 +991,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, qp->flags |= MLX4_IB_QP_SCATTER_FCS; } - err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, + err = set_rq_size(dev, &init_attr->cap, udata, qp_has_rq(init_attr), qp, qp->inl_recv_sz); if (err) goto err; @@ -1043,7 +1043,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, } qp->mqp.usage = MLX4_RES_USAGE_USER_VERBS; } else { - err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, + err = set_rq_size(dev, &init_attr->cap, udata, qp_has_rq(init_attr), qp, 0); if (err) goto err; @@ -1189,7 +1189,7 @@ err_proxy: if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI) free_proxy_bufs(pd->device, qp); err_wrid: - if (pd->uobject) { + if (udata) { if (qp_has_rq(init_attr)) mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db); } else { @@ -1201,20 +1201,20 @@ err_mtt: mlx4_mtt_cleanup(dev->dev, &qp->mtt); err_buf: - if (pd->uobject) + if (qp->umem) ib_umem_release(qp->umem); else mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); err_db: - if (!pd->uobject && qp_has_rq(init_attr)) + if (!udata && qp_has_rq(init_attr)) mlx4_db_free(dev->dev, &qp->db); err: - if (sqp) - kfree(sqp); - else if (!*caller_qp) + if (!sqp && !*caller_qp) kfree(qp); + kfree(sqp); + return err; } @@ -1332,7 +1332,7 @@ static void destroy_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) } static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, - enum mlx4_ib_source_type src, int is_user) + enum mlx4_ib_source_type src, bool is_user) { struct mlx4_ib_cq *send_cq, *recv_cq; unsigned long flags; @@ -1609,10 +1609,7 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp) if (qp->rwq_ind_tbl) { destroy_qp_rss(dev, mqp); } else { - struct mlx4_ib_pd *pd; - - pd = get_pd(mqp); - destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, !!pd->ibpd.uobject); + destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, qp->uobject); } if (is_sqp(dev, mqp)) @@ -4044,7 +4041,7 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, struct mlx4_ib_create_wq ucmd; int err, required_cmd_sz; - if (!(udata && pd->uobject)) + if (!udata) return ERR_PTR(-EINVAL); required_cmd_sz = offsetof(typeof(ucmd), comp_mask) + diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 3731b31c3653..4456f1b8921d 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -105,7 +105,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, buf_size = srq->msrq.max * desc_size; - if (pd->uobject) { + if (udata) { struct mlx4_ib_create_srq ucmd; if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { @@ -191,7 +191,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, srq->msrq.event = mlx4_ib_srq_event; srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn; - if (pd->uobject) + if (udata) if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) { err = -EFAULT; goto err_wrid; @@ -202,7 +202,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, return &srq->ibsrq; err_wrid: - if (pd->uobject) + if (udata) mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db); else kvfree(srq->wrid); @@ -211,13 +211,13 @@ err_mtt: mlx4_mtt_cleanup(dev->dev, &srq->mtt); err_buf: - if (pd->uobject) + if (srq->umem) ib_umem_release(srq->umem); else mlx4_buf_free(dev->dev, buf_size, &srq->buf); err_db: - if (!pd->uobject) + if (!udata) mlx4_db_free(dev->dev, &srq->db); err_srq: diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index 752bdd536130..ea1f3a081b05 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -353,16 +353,12 @@ err: static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max) { - char base_name[9]; - - /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */ - strlcpy(name, pci_name(dev->dev->persist->pdev), max); - strncpy(base_name, name, 8); /*till xxxx:yy:*/ - base_name[8] = '\0'; - /* with no ARI only 3 last bits are used so when the fn is higher than 8 + /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n + * with no ARI only 3 last bits are used so when the fn is higher than 8 * need to add it to the dev num, so count in the last number will be * modulo 8 */ - sprintf(name, "%s%.2d.%d", base_name, (i/8), (i%8)); + snprintf(name, max, "%.8s%.2d.%d", pci_name(dev->dev->persist->pdev), + i / 8, i % 8); } struct mlx4_port { diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index ffd03bf1a71e..420ae0897333 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -72,7 +72,7 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, } struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata) + u32 flags, struct ib_udata *udata) { struct mlx5_ib_ah *ah; @@ -131,7 +131,7 @@ int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } -int mlx5_ib_destroy_ah(struct ib_ah *ah) +int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags) { kfree(to_mah(ah)); return 0; diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index ca060a2e2b36..356bccc715ee 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -240,6 +240,7 @@ int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn, MLX5_SET(alloc_transport_domain_in, in, opcode, MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN); + MLX5_SET(alloc_transport_domain_in, in, uid, uid); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (!err) @@ -257,6 +258,7 @@ void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn, MLX5_SET(dealloc_transport_domain_in, in, opcode, MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); + MLX5_SET(dealloc_transport_domain_in, in, uid, uid); MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn); mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } @@ -326,3 +328,20 @@ int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid) MLX5_SET(dealloc_xrcd_in, in, uid, uid); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } + +int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, + u16 uid) +{ + u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {0}; + int err; + + MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); + MLX5_SET(alloc_q_counter_in, in, uid, uid); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *counter_id = MLX5_GET(alloc_q_counter_out, out, + counter_set_id); + return err; +} diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index c03c56455534..1e76dc67a369 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -61,4 +61,6 @@ int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn, u16 uid); int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid); int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid); +int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, + u16 uid); #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 26ab9041f94a..90f1b0bae5b5 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -330,67 +330,6 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, dump_cqe(dev, cqe); } -static int is_atomic_response(struct mlx5_ib_qp *qp, uint16_t idx) -{ - /* TBD: waiting decision - */ - return 0; -} - -static void *mlx5_get_atomic_laddr(struct mlx5_ib_qp *qp, uint16_t idx) -{ - struct mlx5_wqe_data_seg *dpseg; - void *addr; - - dpseg = mlx5_get_send_wqe(qp, idx) + sizeof(struct mlx5_wqe_ctrl_seg) + - sizeof(struct mlx5_wqe_raddr_seg) + - sizeof(struct mlx5_wqe_atomic_seg); - addr = (void *)(unsigned long)be64_to_cpu(dpseg->addr); - return addr; -} - -static void handle_atomic(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, - uint16_t idx) -{ - void *addr; - int byte_count; - int i; - - if (!is_atomic_response(qp, idx)) - return; - - byte_count = be32_to_cpu(cqe64->byte_cnt); - addr = mlx5_get_atomic_laddr(qp, idx); - - if (byte_count == 4) { - *(uint32_t *)addr = be32_to_cpu(*((__be32 *)addr)); - } else { - for (i = 0; i < byte_count; i += 8) { - *(uint64_t *)addr = be64_to_cpu(*((__be64 *)addr)); - addr += 8; - } - } - - return; -} - -static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, - u16 tail, u16 head) -{ - u16 idx; - - do { - idx = tail & (qp->sq.wqe_cnt - 1); - handle_atomic(qp, cqe64, idx); - if (idx == head) - break; - - tail = qp->sq.w_list[idx].next; - } while (1); - tail = qp->sq.w_list[idx].next; - qp->sq.last_poll = tail; -} - static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) { mlx5_frag_buf_free(dev->mdev, &buf->frag_buf); @@ -428,45 +367,15 @@ static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe, item->key = be32_to_cpu(cqe->mkey); } -static void sw_send_comp(struct mlx5_ib_qp *qp, int num_entries, - struct ib_wc *wc, int *npolled) -{ - struct mlx5_ib_wq *wq; - unsigned int cur; - unsigned int idx; - int np; - int i; - - wq = &qp->sq; - cur = wq->head - wq->tail; - np = *npolled; - - if (cur == 0) - return; - - for (i = 0; i < cur && np < num_entries; i++) { - idx = wq->last_poll & (wq->wqe_cnt - 1); - wc->wr_id = wq->wrid[idx]; - wc->status = IB_WC_WR_FLUSH_ERR; - wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR; - wq->tail++; - np++; - wc->qp = &qp->ibqp; - wc++; - wq->last_poll = wq->w_list[idx].next; - } - *npolled = np; -} - -static void sw_recv_comp(struct mlx5_ib_qp *qp, int num_entries, - struct ib_wc *wc, int *npolled) +static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc, + int *npolled, int is_send) { struct mlx5_ib_wq *wq; unsigned int cur; int np; int i; - wq = &qp->rq; + wq = (is_send) ? &qp->sq : &qp->rq; cur = wq->head - wq->tail; np = *npolled; @@ -493,13 +402,13 @@ static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries, *npolled = 0; /* Find uncompleted WQEs belonging to that cq and return mmics ones */ list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) { - sw_send_comp(qp, num_entries, wc + *npolled, npolled); + sw_comp(qp, num_entries, wc + *npolled, npolled, true); if (*npolled >= num_entries) return; } list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) { - sw_recv_comp(qp, num_entries, wc + *npolled, npolled); + sw_comp(qp, num_entries, wc + *npolled, npolled, false); if (*npolled >= num_entries) return; } @@ -567,7 +476,6 @@ repoll: wqe_ctr = be16_to_cpu(cqe64->wqe_counter); idx = wqe_ctr & (wq->wqe_cnt - 1); handle_good_req(wc, cqe64, wq, idx); - handle_atomics(*cur_qp, cqe64, wq->last_poll, idx); wc->wr_id = wq->wrid[idx]; wq->tail = wq->wqe_head[idx] + 1; wc->status = IB_WC_SUCCESS; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 45c421c87100..5a588f3cfb1b 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -9,6 +9,7 @@ #include <rdma/uverbs_ioctl.h> #include <rdma/mlx5_user_ioctl_cmds.h> #include <rdma/ib_umem.h> +#include <rdma/uverbs_std_types.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/fs.h> #include "mlx5_ib.h" @@ -40,29 +41,32 @@ struct devx_umem_reg_cmd { u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; }; -static struct mlx5_ib_ucontext *devx_ufile2uctx(struct ib_uverbs_file *file) +static struct mlx5_ib_ucontext * +devx_ufile2uctx(const struct uverbs_attr_bundle *attrs) { - return to_mucontext(ib_uverbs_get_ucontext(file)); + return to_mucontext(ib_uverbs_get_ucontext(attrs)); } -int mlx5_ib_devx_create(struct mlx5_ib_dev *dev) +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) { u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0}; u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; - u64 general_obj_types; - void *hdr; + void *uctx; int err; u16 uid; + u32 cap = 0; - hdr = MLX5_ADDR_OF(create_uctx_in, in, hdr); - - general_obj_types = MLX5_CAP_GEN_64(dev->mdev, general_obj_types); - if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UCTX) || - !(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UMEM)) + /* 0 means not supported */ + if (!MLX5_CAP_GEN(dev->mdev, log_max_uctx)) return -EINVAL; - MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_UCTX); + uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx); + if (is_user && capable(CAP_NET_RAW) && + (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX)) + cap |= MLX5_UCTX_CAP_RAW_TX; + + MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX); + MLX5_SET(uctx, uctx, cap, cap); err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); if (err) @@ -74,12 +78,11 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev) void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) { - u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {0}; u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; - MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_UCTX); - MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, uid); + MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX); + MLX5_SET(destroy_uctx_in, in, uid, uid); mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); } @@ -106,6 +109,21 @@ bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type) } } +bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id) +{ + struct devx_obj *devx_obj = obj; + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); + + if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) { + *counter_id = MLX5_GET(dealloc_flow_counter_in, + devx_obj->dinbox, + flow_counter_id); + return true; + } + + return false; +} + /* * As the obj_id in the firmware is not globally unique the object type * must be considered upon checking for a valid object id. @@ -116,7 +134,7 @@ static u64 get_enc_obj_id(u16 opcode, u32 obj_id) return ((u64)opcode << 32) | obj_id; } -static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in) +static u64 devx_get_obj_id(const void *in) { u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); u64 obj_id; @@ -290,6 +308,8 @@ static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in) MLX5_GET(query_dct_in, in, dctn)); break; case MLX5_CMD_OP_QUERY_XRQ: + case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY: + case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS: obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ, MLX5_GET(query_xrq_in, in, xrqn)); break; @@ -316,17 +336,107 @@ static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in) MLX5_GET(drain_dct_in, in, dctn)); break; case MLX5_CMD_OP_ARM_XRQ: + case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY: obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ, MLX5_GET(arm_xrq_in, in, xrqn)); break; + case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT: + obj_id = get_enc_obj_id + (MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT, + MLX5_GET(query_packet_reformat_context_in, + in, packet_reformat_id)); + break; default: + obj_id = 0; + } + + return obj_id; +} + +static bool devx_is_valid_obj_id(struct ib_uobject *uobj, const void *in) +{ + u64 obj_id = devx_get_obj_id(in); + + if (!obj_id) return false; + + switch (uobj_get_object_id(uobj)) { + case UVERBS_OBJECT_CQ: + return get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ, + to_mcq(uobj->object)->mcq.cqn) == + obj_id; + + case UVERBS_OBJECT_SRQ: + { + struct mlx5_core_srq *srq = &(to_msrq(uobj->object)->msrq); + struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); + u16 opcode; + + switch (srq->common.res) { + case MLX5_RES_XSRQ: + opcode = MLX5_CMD_OP_CREATE_XRC_SRQ; + break; + case MLX5_RES_XRQ: + opcode = MLX5_CMD_OP_CREATE_XRQ; + break; + default: + if (!dev->mdev->issi) + opcode = MLX5_CMD_OP_CREATE_SRQ; + else + opcode = MLX5_CMD_OP_CREATE_RMP; + } + + return get_enc_obj_id(opcode, + to_msrq(uobj->object)->msrq.srqn) == + obj_id; } - if (obj_id == obj->obj_id) - return true; + case UVERBS_OBJECT_QP: + { + struct mlx5_ib_qp *qp = to_mqp(uobj->object); + enum ib_qp_type qp_type = qp->ibqp.qp_type; + + if (qp_type == IB_QPT_RAW_PACKET || + (qp->flags & MLX5_IB_QP_UNDERLAY)) { + struct mlx5_ib_raw_packet_qp *raw_packet_qp = + &qp->raw_packet_qp; + struct mlx5_ib_rq *rq = &raw_packet_qp->rq; + struct mlx5_ib_sq *sq = &raw_packet_qp->sq; + + return (get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ, + rq->base.mqp.qpn) == obj_id || + get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ, + sq->base.mqp.qpn) == obj_id || + get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR, + rq->tirn) == obj_id || + get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS, + sq->tisn) == obj_id); + } + + if (qp_type == MLX5_IB_QPT_DCT) + return get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT, + qp->dct.mdct.mqp.qpn) == obj_id; + + return get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, + qp->ibqp.qp_num) == obj_id; + } - return false; + case UVERBS_OBJECT_WQ: + return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ, + to_mrwq(uobj->object)->core_qp.qpn) == + obj_id; + + case UVERBS_OBJECT_RWQ_IND_TBL: + return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT, + to_mrwq_ind_table(uobj->object)->rqtn) == + obj_id; + + case MLX5_IB_OBJECT_DEVX_OBJ: + return ((struct devx_obj *)uobj->object)->obj_id == obj_id; + + default: + return false; + } } static void devx_set_umem_valid(const void *in) @@ -494,6 +604,7 @@ static bool devx_is_obj_modify_cmd(const void *in) case MLX5_CMD_OP_DRAIN_DCT: case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: case MLX5_CMD_OP_ARM_XRQ: + case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY: return true; case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: { @@ -535,6 +646,9 @@ static bool devx_is_obj_query_cmd(const void *in) case MLX5_CMD_OP_QUERY_XRC_SRQ: case MLX5_CMD_OP_QUERY_DCT: case MLX5_CMD_OP_QUERY_XRQ: + case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY: + case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS: + case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT: return true; default: return false; @@ -572,15 +686,16 @@ static int devx_get_uid(struct mlx5_ib_ucontext *c, void *cmd_in) if (!c->devx_uid) return -EINVAL; - if (!capable(CAP_NET_RAW)) - return -EPERM; - return c->devx_uid; } static bool devx_is_general_cmd(void *in) { u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + if (opcode >= MLX5_CMD_OP_GENERAL_START && + opcode < MLX5_CMD_OP_GENERAL_END) + return true; + switch (opcode) { case MLX5_CMD_OP_QUERY_HCA_CAP: case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: @@ -603,7 +718,7 @@ static bool devx_is_general_cmd(void *in) } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct mlx5_ib_ucontext *c; struct mlx5_ib_dev *dev; @@ -616,7 +731,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)( MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC)) return -EFAULT; - c = devx_ufile2uctx(file); + c = devx_ufile2uctx(attrs); if (IS_ERR(c)) return PTR_ERR(c); dev = to_mdev(c->ibucontext.device); @@ -653,14 +768,14 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)( * queue or arm its CQ for event generation), no further harm is expected. */ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct mlx5_ib_ucontext *c; struct mlx5_ib_dev *dev; u32 user_idx; s32 dev_idx; - c = devx_ufile2uctx(file); + c = devx_ufile2uctx(attrs); if (IS_ERR(c)) return PTR_ERR(c); dev = to_mdev(c->ibucontext.device); @@ -681,7 +796,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)( } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct mlx5_ib_ucontext *c; struct mlx5_ib_dev *dev; @@ -693,7 +808,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)( int err; int uid; - c = devx_ufile2uctx(file); + c = devx_ufile2uctx(attrs); if (IS_ERR(c)) return PTR_ERR(c); dev = to_mdev(c->ibucontext.device); @@ -740,6 +855,10 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type); break; + case MLX5_CMD_OP_CREATE_UMEM: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_UMEM); + break; case MLX5_CMD_OP_CREATE_MKEY: MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY); break; @@ -908,7 +1027,7 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN); int cmd_out_len = uverbs_attr_get_len(attrs, @@ -970,7 +1089,7 @@ obj_free: } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN); int cmd_out_len = uverbs_attr_get_len(attrs, @@ -978,7 +1097,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE); struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); - struct devx_obj *obj = uobj->object; + struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); void *cmd_out; int err; int uid; @@ -990,7 +1109,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( if (!devx_is_obj_modify_cmd(cmd_in)) return -EINVAL; - if (!devx_is_valid_obj_id(obj, cmd_in)) + if (!devx_is_valid_obj_id(uobj, cmd_in)) return -EINVAL; cmd_out = uverbs_zalloc(attrs, cmd_out_len); @@ -1000,7 +1119,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); devx_set_umem_valid(cmd_in); - err = mlx5_cmd_exec(obj->mdev, cmd_in, + err = mlx5_cmd_exec(mdev->mdev, cmd_in, uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN), cmd_out, cmd_out_len); if (err) @@ -1011,7 +1130,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN); int cmd_out_len = uverbs_attr_get_len(attrs, @@ -1019,10 +1138,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE); struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); - struct devx_obj *obj = uobj->object; void *cmd_out; int err; int uid; + struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); uid = devx_get_uid(c, cmd_in); if (uid < 0) @@ -1031,7 +1150,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( if (!devx_is_obj_query_cmd(cmd_in)) return -EINVAL; - if (!devx_is_valid_obj_id(obj, cmd_in)) + if (!devx_is_valid_obj_id(uobj, cmd_in)) return -EINVAL; cmd_out = uverbs_zalloc(attrs, cmd_out_len); @@ -1039,7 +1158,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( return PTR_ERR(cmd_out); MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); - err = mlx5_cmd_exec(obj->mdev, cmd_in, + err = mlx5_cmd_exec(mdev->mdev, cmd_in, uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN), cmd_out, cmd_out_len); if (err) @@ -1115,8 +1234,7 @@ static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev, umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem); mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt); - MLX5_SET(general_obj_in_cmd_hdr, cmd->in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, cmd->in, obj_type, MLX5_OBJ_TYPE_UMEM); + MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM); MLX5_SET64(umem, umem, num_of_mtt, obj->ncont); MLX5_SET(umem, umem, log_page_size, obj->page_shift - MLX5_ADAPTER_PAGE_SHIFT); @@ -1127,7 +1245,7 @@ static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev, } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct devx_umem_reg_cmd cmd; struct devx_umem *obj; @@ -1141,9 +1259,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( if (!c->devx_uid) return -EINVAL; - if (!capable(CAP_NET_RAW)) - return -EPERM; - obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL); if (!obj) return -ENOMEM; @@ -1158,7 +1273,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( devx_umem_reg_cmd_build(dev, obj, &cmd); - MLX5_SET(general_obj_in_cmd_hdr, cmd.in, uid, c->devx_uid); + MLX5_SET(create_umem_in, cmd.in, uid, c->devx_uid); err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out, sizeof(cmd.out)); if (err) @@ -1279,7 +1394,7 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY( DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_OBJ_MODIFY, UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE, - MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_IDR_ANY_OBJECT, UVERBS_ACCESS_WRITE, UA_MANDATORY), UVERBS_ATTR_PTR_IN( @@ -1295,7 +1410,7 @@ DECLARE_UVERBS_NAMED_METHOD( DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_OBJ_QUERY, UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE, - MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_IDR_ANY_OBJECT, UVERBS_ACCESS_READ, UA_MANDATORY), UVERBS_ATTR_PTR_IN( @@ -1325,12 +1440,22 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM, &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG)); -DECLARE_UVERBS_OBJECT_TREE(devx_objects, - &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX), - &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ), - &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM)); - -const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void) +static bool devx_is_supported(struct ib_device *device) { - return &devx_objects; + struct mlx5_ib_dev *dev = to_mdev(device); + + return !dev->rep && MLX5_CAP_GEN(dev->mdev, log_max_uctx); } + +const struct uapi_definition mlx5_ib_devx_defs[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_DEVX, + UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_DEVX_OBJ, + UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_DEVX_UMEM, + UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)), + {}, +}; diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index f86cdcafdafc..e8a1e4498e3f 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -60,7 +60,7 @@ static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { #define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2 static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; struct mlx5_ib_flow_handler *flow_handler; @@ -77,6 +77,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE); struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); int len, ret, i; + u32 counter_id = 0; if (!capable(CAP_NET_RAW)) return -EPERM; @@ -92,10 +93,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( ((dest_devx && dest_qp) || (!dest_devx && !dest_qp))) return -EINVAL; - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS && - (dest_devx || dest_qp)) - return -EINVAL; - if (dest_devx) { devx_obj = uverbs_attr_get_obj( attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); @@ -128,8 +125,19 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT; } - if (dev->rep) - return -ENOTSUPP; + len = uverbs_attr_get_uobjs_arr(attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions); + if (len) { + devx_obj = arr_flow_actions[0]->object; + + if (!mlx5_ib_devx_is_flow_counter(devx_obj, &counter_id)) + return -EINVAL; + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + } + + if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR && + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) + return -EINVAL; cmd_in = uverbs_attr_get_alloced_ptr( attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); @@ -164,6 +172,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( } flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, &flow_act, + counter_id, cmd_in, inlen, dest_id, dest_type); if (IS_ERR(flow_handler)) { @@ -194,7 +203,7 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject, } static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE); @@ -313,7 +322,6 @@ static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev) } static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject( @@ -321,9 +329,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); enum mlx5_ib_uapi_flow_table_type ft_type; struct ib_flow_action *action; - size_t num_actions; + int num_actions; void *in; - int len; int ret; if (!mlx5_ib_modify_header_supported(mdev)) @@ -331,18 +338,17 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM); - len = uverbs_attr_get_len(attrs, - MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM); - if (len % MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)) - return -EINVAL; + num_actions = uverbs_attr_ptr_get_array_size( + attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, + MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)); + if (num_actions < 0) + return num_actions; ret = uverbs_get_const(&ft_type, attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE); if (ret) return ret; - - num_actions = len / MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto), action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in); if (IS_ERR(action)) return PTR_ERR(action); @@ -435,7 +441,6 @@ static int mlx5_ib_flow_action_create_packet_reformat_ctx( } static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, @@ -526,7 +531,11 @@ DECLARE_UVERBS_NAMED_METHOD( UA_OPTIONAL), UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG, UVERBS_ATTR_TYPE(u32), - UA_OPTIONAL)); + UA_OPTIONAL), + UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_READ, 1, 1, + UA_OPTIONAL)); DECLARE_UVERBS_NAMED_METHOD_DESTROY( MLX5_IB_METHOD_DESTROY_FLOW, @@ -610,16 +619,20 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER, &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE), &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY)); -DECLARE_UVERBS_OBJECT_TREE(flow_objects, - &UVERBS_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER)); - -int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root) +static bool flow_is_supported(struct ib_device *device) { - int i = 0; - - root[i++] = &flow_objects; - root[i++] = &mlx5_ib_fs; - root[i++] = &mlx5_ib_flow_actions; - - return i; + return !to_mdev(device)->rep; } + +const struct uapi_definition mlx5_ib_flow_defs[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_FLOW_MATCHER, + UAPI_DEF_IS_OBJ_SUPPORTED(flow_is_supported)), + UAPI_DEF_CHAIN_OBJ_TREE( + UVERBS_OBJECT_FLOW, + &mlx5_ib_fs, + UAPI_DEF_IS_OBJ_SUPPORTED(flow_is_supported)), + UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, + &mlx5_ib_flow_actions), + {}, +}; diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 32a9e9228b13..558638468edb 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -526,11 +526,6 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, int ext_active_speed; int err = -ENOMEM; - if (port < 1 || port > dev->num_ports) { - mlx5_ib_warn(dev, "invalid port number %d\n", port); - return -EINVAL; - } - in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); if (!in_mad || !out_mad) @@ -568,6 +563,14 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, props->max_vl_num = out_mad->data[37] >> 4; props->init_type_reply = out_mad->data[41] >> 4; + if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP) { + props->port_cap_flags2 = + be16_to_cpup((__be16 *)(out_mad->data + 60)); + + if (props->port_cap_flags2 & IB_PORT_LINK_WIDTH_2X_SUP) + props->active_width = out_mad->data[31] & 0x1f; + } + /* Check if extended speeds (EDR/FDR/...) are supported */ if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) { ext_active_speed = out_mad->data[62] >> 4; @@ -579,6 +582,11 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, case 2: props->active_speed = 32; /* EDR */ break; + case 4: + if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP && + props->port_cap_flags2 & IB_PORT_LINK_SPEED_HDR_SUP) + props->active_speed = IB_SPEED_HDR; + break; } } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e85974ab06c0..94fe253d4956 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -150,7 +150,7 @@ static int get_port_state(struct ib_device *ibdev, int ret; memset(&attr, 0, sizeof(attr)); - ret = ibdev->query_port(ibdev, port_num, &attr); + ret = ibdev->ops.query_port(ibdev, port_num, &attr); if (!ret) *state = attr.state; return ret; @@ -172,7 +172,6 @@ static int mlx5_netdev_event(struct notifier_block *this, switch (event) { case NETDEV_REGISTER: - case NETDEV_UNREGISTER: write_lock(&roce->netdev_lock); if (ibdev->rep) { struct mlx5_eswitch *esw = ibdev->mdev->priv.eswitch; @@ -181,15 +180,20 @@ static int mlx5_netdev_event(struct notifier_block *this, rep_ndev = mlx5_ib_get_rep_netdev(esw, ibdev->rep->vport); if (rep_ndev == ndev) - roce->netdev = (event == NETDEV_UNREGISTER) ? - NULL : ndev; + roce->netdev = ndev; } else if (ndev->dev.parent == &mdev->pdev->dev) { - roce->netdev = (event == NETDEV_UNREGISTER) ? - NULL : ndev; + roce->netdev = ndev; } write_unlock(&roce->netdev_lock); break; + case NETDEV_UNREGISTER: + write_lock(&roce->netdev_lock); + if (roce->netdev == ndev) + roce->netdev = NULL; + write_unlock(&roce->netdev_lock); + break; + case NETDEV_CHANGE: case NETDEV_UP: case NETDEV_DOWN: { @@ -1018,6 +1022,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_GEN(mdev, cqe_128_always)) resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD; + if (MLX5_CAP_GEN(mdev, qp_packet_based)) + resp.flags |= + MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE; } if (field_avail(typeof(resp), sw_parsing_caps, @@ -1105,6 +1112,8 @@ static void translate_active_width(struct ib_device *ibdev, u8 active_width, if (active_width & MLX5_IB_WIDTH_1X) *ib_width = IB_WIDTH_1X; + else if (active_width & MLX5_IB_WIDTH_2X) + *ib_width = IB_WIDTH_2X; else if (active_width & MLX5_IB_WIDTH_4X) *ib_width = IB_WIDTH_4X; else if (active_width & MLX5_IB_WIDTH_8X) @@ -1220,6 +1229,9 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, props->subnet_timeout = rep->subnet_timeout; props->init_type_reply = rep->init_type_reply; + if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP) + props->port_cap_flags2 = rep->cap_mask2; + err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port); if (err) goto out; @@ -1756,7 +1768,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, #endif if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { - err = mlx5_ib_devx_create(dev); + err = mlx5_ib_devx_create(dev, true); if (err < 0) goto out_uars; context->devx_uid = err; @@ -3710,7 +3722,8 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev, struct mlx5_flow_destination *dst, struct mlx5_ib_flow_matcher *fs_matcher, struct mlx5_flow_act *flow_act, - void *cmd_in, int inlen) + void *cmd_in, int inlen, + int dst_num) { struct mlx5_ib_flow_handler *handler; struct mlx5_flow_spec *spec; @@ -3732,7 +3745,7 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev, spec->match_criteria_enable = fs_matcher->match_criteria_enable; handler->rule = mlx5_add_flow_rules(ft, spec, - flow_act, dst, 1); + flow_act, dst, dst_num); if (IS_ERR(handler->rule)) { err = PTR_ERR(handler->rule); @@ -3795,12 +3808,14 @@ struct mlx5_ib_flow_handler * mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, struct mlx5_flow_act *flow_act, + u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type) { struct mlx5_flow_destination *dst; struct mlx5_ib_flow_prio *ft_prio; struct mlx5_ib_flow_handler *handler; + int dst_num = 0; bool mcast; int err; @@ -3810,7 +3825,7 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO) return ERR_PTR(-ENOMEM); - dst = kzalloc(sizeof(*dst), GFP_KERNEL); + dst = kzalloc(sizeof(*dst) * 2, GFP_KERNEL); if (!dst) return ERR_PTR(-ENOMEM); @@ -3824,20 +3839,28 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, } if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) { - dst->type = dest_type; - dst->tir_num = dest_id; + dst[dst_num].type = dest_type; + dst[dst_num].tir_num = dest_id; flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) { - dst->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; - dst->ft_num = dest_id; + dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; + dst[dst_num].ft_num = dest_id; flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } else { - dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT; + dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_PORT; flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; } + dst_num++; + + if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dst[dst_num].counter_id = counter_id; + dst_num++; + } + handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, flow_act, - cmd_in, inlen); + cmd_in, inlen, dst_num); if (IS_ERR(handler)) { err = PTR_ERR(handler); @@ -5095,6 +5118,9 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) { int err = 0; int i; + bool is_shared; + + is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0; for (i = 0; i < dev->num_ports; i++) { err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts); @@ -5104,8 +5130,10 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) mlx5_ib_fill_counters(dev, dev->port[i].cnts.names, dev->port[i].cnts.offsets); - err = mlx5_core_alloc_q_counter(dev->mdev, - &dev->port[i].cnts.set_id); + err = mlx5_cmd_alloc_q_counter(dev->mdev, + &dev->port[i].cnts.set_id, + is_shared ? + MLX5_SHARED_RESOURCE_UID : 0); if (err) { mlx5_ib_warn(dev, "couldn't allocate queue counter for port %d, err %d\n", @@ -5382,14 +5410,6 @@ static void init_delay_drop(struct mlx5_ib_dev *dev) mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n"); } -static const struct cpumask * -mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector) -{ - struct mlx5_ib_dev *dev = to_mdev(ibdev); - - return mlx5_comp_irq_get_affinity_mask(dev->mdev, comp_vector); -} - /* The mlx5_ib_multiport_mutex should be held when calling this function */ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, struct mlx5_ib_multiport_info *mpi) @@ -5617,30 +5637,17 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE( UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, enum mlx5_ib_uapi_flow_action_flags)); -static int populate_specs_root(struct mlx5_ib_dev *dev) -{ - const struct uverbs_object_tree_def **trees = dev->driver_trees; - size_t num_trees = 0; - - if (mlx5_accel_ipsec_device_caps(dev->mdev) & - MLX5_ACCEL_IPSEC_CAP_DEVICE) - trees[num_trees++] = &mlx5_ib_flow_action; - - if (MLX5_CAP_DEV_MEM(dev->mdev, memic)) - trees[num_trees++] = &mlx5_ib_dm; - - if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & - MLX5_GENERAL_OBJ_TYPES_CAP_UCTX) - trees[num_trees++] = mlx5_ib_get_devx_tree(); - - num_trees += mlx5_ib_get_flow_trees(trees + num_trees); - - WARN_ON(num_trees >= ARRAY_SIZE(dev->driver_trees)); - trees[num_trees] = NULL; - dev->ib_dev.driver_specs = trees; +static const struct uapi_definition mlx5_ib_defs[] = { +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) + UAPI_DEF_CHAIN(mlx5_ib_devx_defs), + UAPI_DEF_CHAIN(mlx5_ib_flow_defs), +#endif - return 0; -} + UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, + &mlx5_ib_flow_action), + UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm), + {} +}; static int mlx5_ib_read_counters(struct ib_counters *counters, struct ib_counters_read_attr *read_attr, @@ -5717,6 +5724,8 @@ void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) mlx5_ib_cleanup_multiport_master(dev); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING cleanup_srcu_struct(&dev->mr_srcu); + drain_workqueue(dev->advise_mr_wq); + destroy_workqueue(dev->advise_mr_wq); #endif kfree(dev->port); } @@ -5771,9 +5780,17 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) dev->memic.dev = mdev; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + dev->advise_mr_wq = alloc_ordered_workqueue("mlx5_ib_advise_mr_wq", 0); + if (!dev->advise_mr_wq) { + err = -ENOMEM; + goto err_mp; + } + err = init_srcu_struct(&dev->mr_srcu); - if (err) - goto err_free_port; + if (err) { + destroy_workqueue(dev->advise_mr_wq); + goto err_mp; + } #endif return 0; @@ -5817,6 +5834,94 @@ static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev) kfree(dev->flow_db); } +static const struct ib_device_ops mlx5_ib_dev_ops = { + .add_gid = mlx5_ib_add_gid, + .alloc_mr = mlx5_ib_alloc_mr, + .alloc_pd = mlx5_ib_alloc_pd, + .alloc_ucontext = mlx5_ib_alloc_ucontext, + .attach_mcast = mlx5_ib_mcg_attach, + .check_mr_status = mlx5_ib_check_mr_status, + .create_ah = mlx5_ib_create_ah, + .create_counters = mlx5_ib_create_counters, + .create_cq = mlx5_ib_create_cq, + .create_flow = mlx5_ib_create_flow, + .create_qp = mlx5_ib_create_qp, + .create_srq = mlx5_ib_create_srq, + .dealloc_pd = mlx5_ib_dealloc_pd, + .dealloc_ucontext = mlx5_ib_dealloc_ucontext, + .del_gid = mlx5_ib_del_gid, + .dereg_mr = mlx5_ib_dereg_mr, + .destroy_ah = mlx5_ib_destroy_ah, + .destroy_counters = mlx5_ib_destroy_counters, + .destroy_cq = mlx5_ib_destroy_cq, + .destroy_flow = mlx5_ib_destroy_flow, + .destroy_flow_action = mlx5_ib_destroy_flow_action, + .destroy_qp = mlx5_ib_destroy_qp, + .destroy_srq = mlx5_ib_destroy_srq, + .detach_mcast = mlx5_ib_mcg_detach, + .disassociate_ucontext = mlx5_ib_disassociate_ucontext, + .drain_rq = mlx5_ib_drain_rq, + .drain_sq = mlx5_ib_drain_sq, + .get_dev_fw_str = get_dev_fw_str, + .get_dma_mr = mlx5_ib_get_dma_mr, + .get_link_layer = mlx5_ib_port_link_layer, + .map_mr_sg = mlx5_ib_map_mr_sg, + .mmap = mlx5_ib_mmap, + .modify_cq = mlx5_ib_modify_cq, + .modify_device = mlx5_ib_modify_device, + .modify_port = mlx5_ib_modify_port, + .modify_qp = mlx5_ib_modify_qp, + .modify_srq = mlx5_ib_modify_srq, + .poll_cq = mlx5_ib_poll_cq, + .post_recv = mlx5_ib_post_recv, + .post_send = mlx5_ib_post_send, + .post_srq_recv = mlx5_ib_post_srq_recv, + .process_mad = mlx5_ib_process_mad, + .query_ah = mlx5_ib_query_ah, + .query_device = mlx5_ib_query_device, + .query_gid = mlx5_ib_query_gid, + .query_pkey = mlx5_ib_query_pkey, + .query_qp = mlx5_ib_query_qp, + .query_srq = mlx5_ib_query_srq, + .read_counters = mlx5_ib_read_counters, + .reg_user_mr = mlx5_ib_reg_user_mr, + .req_notify_cq = mlx5_ib_arm_cq, + .rereg_user_mr = mlx5_ib_rereg_user_mr, + .resize_cq = mlx5_ib_resize_cq, +}; + +static const struct ib_device_ops mlx5_ib_dev_flow_ipsec_ops = { + .create_flow_action_esp = mlx5_ib_create_flow_action_esp, + .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp, +}; + +static const struct ib_device_ops mlx5_ib_dev_ipoib_enhanced_ops = { + .rdma_netdev_get_params = mlx5_ib_rn_get_params, +}; + +static const struct ib_device_ops mlx5_ib_dev_sriov_ops = { + .get_vf_config = mlx5_ib_get_vf_config, + .get_vf_stats = mlx5_ib_get_vf_stats, + .set_vf_guid = mlx5_ib_set_vf_guid, + .set_vf_link_state = mlx5_ib_set_vf_link_state, +}; + +static const struct ib_device_ops mlx5_ib_dev_mw_ops = { + .alloc_mw = mlx5_ib_alloc_mw, + .dealloc_mw = mlx5_ib_dealloc_mw, +}; + +static const struct ib_device_ops mlx5_ib_dev_xrc_ops = { + .alloc_xrcd = mlx5_ib_alloc_xrcd, + .dealloc_xrcd = mlx5_ib_dealloc_xrcd, +}; + +static const struct ib_device_ops mlx5_ib_dev_dm_ops = { + .alloc_dm = mlx5_ib_alloc_dm, + .dealloc_dm = mlx5_ib_dealloc_dm, + .reg_dm_mr = mlx5_ib_reg_dm_mr, +}; + int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; @@ -5855,104 +5960,45 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) | (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ); - - dev->ib_dev.query_device = mlx5_ib_query_device; - dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer; - dev->ib_dev.query_gid = mlx5_ib_query_gid; - dev->ib_dev.add_gid = mlx5_ib_add_gid; - dev->ib_dev.del_gid = mlx5_ib_del_gid; - dev->ib_dev.query_pkey = mlx5_ib_query_pkey; - dev->ib_dev.modify_device = mlx5_ib_modify_device; - dev->ib_dev.modify_port = mlx5_ib_modify_port; - dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext; - dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext; - dev->ib_dev.mmap = mlx5_ib_mmap; - dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd; - dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd; - dev->ib_dev.create_ah = mlx5_ib_create_ah; - dev->ib_dev.query_ah = mlx5_ib_query_ah; - dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah; - dev->ib_dev.create_srq = mlx5_ib_create_srq; - dev->ib_dev.modify_srq = mlx5_ib_modify_srq; - dev->ib_dev.query_srq = mlx5_ib_query_srq; - dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq; - dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv; - dev->ib_dev.create_qp = mlx5_ib_create_qp; - dev->ib_dev.modify_qp = mlx5_ib_modify_qp; - dev->ib_dev.query_qp = mlx5_ib_query_qp; - dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp; - dev->ib_dev.drain_sq = mlx5_ib_drain_sq; - dev->ib_dev.drain_rq = mlx5_ib_drain_rq; - dev->ib_dev.post_send = mlx5_ib_post_send; - dev->ib_dev.post_recv = mlx5_ib_post_recv; - dev->ib_dev.create_cq = mlx5_ib_create_cq; - dev->ib_dev.modify_cq = mlx5_ib_modify_cq; - dev->ib_dev.resize_cq = mlx5_ib_resize_cq; - dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq; - dev->ib_dev.poll_cq = mlx5_ib_poll_cq; - dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq; - dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr; - dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr; - dev->ib_dev.rereg_user_mr = mlx5_ib_rereg_user_mr; - dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr; - dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach; - dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; - dev->ib_dev.process_mad = mlx5_ib_process_mad; - dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr; - dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg; - dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; - dev->ib_dev.get_dev_fw_str = get_dev_fw_str; - dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity; + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); + if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) && IS_ENABLED(CONFIG_MLX5_CORE_IPOIB)) - dev->ib_dev.rdma_netdev_get_params = mlx5_ib_rn_get_params; + ib_set_device_ops(&dev->ib_dev, + &mlx5_ib_dev_ipoib_enhanced_ops); - if (mlx5_core_is_pf(mdev)) { - dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config; - dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state; - dev->ib_dev.get_vf_stats = mlx5_ib_get_vf_stats; - dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid; - } - - dev->ib_dev.disassociate_ucontext = mlx5_ib_disassociate_ucontext; + if (mlx5_core_is_pf(mdev)) + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_sriov_ops); dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence)); if (MLX5_CAP_GEN(mdev, imaicl)) { - dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw; - dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw; dev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_mw_ops); } if (MLX5_CAP_GEN(mdev, xrc)) { - dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; - dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; dev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_xrc_ops); } - if (MLX5_CAP_DEV_MEM(mdev, memic)) { - dev->ib_dev.alloc_dm = mlx5_ib_alloc_dm; - dev->ib_dev.dealloc_dm = mlx5_ib_dealloc_dm; - dev->ib_dev.reg_dm_mr = mlx5_ib_reg_dm_mr; - } + if (MLX5_CAP_DEV_MEM(mdev, memic)) + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops); - dev->ib_dev.create_flow = mlx5_ib_create_flow; - dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow; - dev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); - dev->ib_dev.create_flow_action_esp = mlx5_ib_create_flow_action_esp; - dev->ib_dev.destroy_flow_action = mlx5_ib_destroy_flow_action; - dev->ib_dev.modify_flow_action_esp = mlx5_ib_modify_flow_action_esp; + if (mlx5_accel_ipsec_device_caps(dev->mdev) & + MLX5_ACCEL_IPSEC_CAP_DEVICE) + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_flow_ipsec_ops); dev->ib_dev.driver_id = RDMA_DRIVER_MLX5; - dev->ib_dev.create_counters = mlx5_ib_create_counters; - dev->ib_dev.destroy_counters = mlx5_ib_destroy_counters; - dev->ib_dev.read_counters = mlx5_ib_read_counters; + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_ops); + + if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)) + dev->ib_dev.driver_def = mlx5_ib_defs; err = init_node_data(dev); if (err) @@ -5966,22 +6012,37 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) return 0; } +static const struct ib_device_ops mlx5_ib_dev_port_ops = { + .get_port_immutable = mlx5_port_immutable, + .query_port = mlx5_ib_query_port, +}; + static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev) { - dev->ib_dev.get_port_immutable = mlx5_port_immutable; - dev->ib_dev.query_port = mlx5_ib_query_port; - + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_ops); return 0; } +static const struct ib_device_ops mlx5_ib_dev_port_rep_ops = { + .get_port_immutable = mlx5_port_rep_immutable, + .query_port = mlx5_ib_rep_query_port, +}; + int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev) { - dev->ib_dev.get_port_immutable = mlx5_port_rep_immutable; - dev->ib_dev.query_port = mlx5_ib_rep_query_port; - + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_rep_ops); return 0; } +static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = { + .create_rwq_ind_table = mlx5_ib_create_rwq_ind_table, + .create_wq = mlx5_ib_create_wq, + .destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table, + .destroy_wq = mlx5_ib_destroy_wq, + .get_netdev = mlx5_ib_get_netdev, + .modify_wq = mlx5_ib_modify_wq, +}; + static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev) { u8 port_num; @@ -5993,19 +6054,13 @@ static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev) dev->roce[i].last_port_state = IB_PORT_DOWN; } - dev->ib_dev.get_netdev = mlx5_ib_get_netdev; - dev->ib_dev.create_wq = mlx5_ib_create_wq; - dev->ib_dev.modify_wq = mlx5_ib_modify_wq; - dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq; - dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table; - dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table; - dev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops); port_num = mlx5_core_native_port_num(dev->mdev) - 1; @@ -6104,11 +6159,15 @@ void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev) mlx5_ib_odp_cleanup_one(dev); } +static const struct ib_device_ops mlx5_ib_dev_hw_stats_ops = { + .alloc_hw_stats = mlx5_ib_alloc_hw_stats, + .get_hw_stats = mlx5_ib_get_hw_stats, +}; + int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) { - dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats; - dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats; + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_hw_stats_ops); return mlx5_ib_alloc_counters(dev); } @@ -6166,11 +6225,6 @@ void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev) mlx5_free_bfreg(dev->mdev, &dev->bfreg); } -static int mlx5_ib_stage_populate_specs(struct mlx5_ib_dev *dev) -{ - return populate_specs_root(dev); -} - int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) { const char *name; @@ -6226,7 +6280,7 @@ static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev) { int uid; - uid = mlx5_ib_devx_create(dev); + uid = mlx5_ib_devx_create(dev, false); if (uid > 0) dev->devx_whitelist_uid = uid; @@ -6318,9 +6372,6 @@ static const struct mlx5_ib_profile pf_profile = { STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR, NULL, mlx5_ib_stage_pre_ib_reg_umr_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_SPECS, - mlx5_ib_stage_populate_specs, - NULL), STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID, mlx5_ib_stage_devx_init, mlx5_ib_stage_devx_cleanup), @@ -6372,9 +6423,6 @@ static const struct mlx5_ib_profile nic_rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR, NULL, mlx5_ib_stage_pre_ib_reg_umr_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_SPECS, - mlx5_ib_stage_populate_specs, - NULL), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index e507b6eb7c09..b06d3b1efea8 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -41,7 +41,6 @@ #include <linux/mlx5/cq.h> #include <linux/mlx5/fs.h> #include <linux/mlx5/qp.h> -#include <linux/mlx5/fs.h> #include <linux/types.h> #include <linux/mlx5/transobj.h> #include <rdma/ib_user_verbs.h> @@ -258,6 +257,7 @@ enum mlx5_ib_rq_flags { }; struct mlx5_ib_wq { + struct mlx5_frag_buf_ctrl fbc; u64 *wrid; u32 *wr_data; struct wr_list *w_list; @@ -275,8 +275,7 @@ struct mlx5_ib_wq { unsigned head; unsigned tail; u16 cur_post; - u16 last_poll; - void *qend; + void *cur_edge; }; enum mlx5_ib_wq_flags { @@ -461,6 +460,7 @@ enum mlx5_ib_qp_flags { MLX5_IB_QP_UNDERLAY = 1 << 10, MLX5_IB_QP_PCI_WRITE_END_PADDING = 1 << 11, MLX5_IB_QP_TUNNEL_OFFLOAD = 1 << 12, + MLX5_IB_QP_PACKET_BASED_CREDIT = 1 << 13, }; struct mlx5_umr_wr { @@ -524,6 +524,7 @@ struct mlx5_ib_srq { struct mlx5_core_srq msrq; struct mlx5_frag_buf buf; struct mlx5_db db; + struct mlx5_frag_buf_ctrl fbc; u64 *wrid; /* protect SRQ hanlding */ @@ -541,7 +542,6 @@ struct mlx5_ib_srq { struct mlx5_ib_xrcd { struct ib_xrcd ibxrcd; u32 xrcdn; - u16 uid; }; enum mlx5_ib_mtt_access_flags { @@ -784,7 +784,6 @@ enum mlx5_ib_stages { MLX5_IB_STAGE_UAR, MLX5_IB_STAGE_BFREG, MLX5_IB_STAGE_PRE_IB_REG_UMR, - MLX5_IB_STAGE_SPECS, MLX5_IB_STAGE_WHITELIST_UID, MLX5_IB_STAGE_IB_REG, MLX5_IB_STAGE_POST_IB_REG_UMR, @@ -895,7 +894,6 @@ struct mlx5_ib_pf_eq { struct mlx5_ib_dev { struct ib_device ib_dev; - const struct uverbs_object_tree_def *driver_trees[7]; struct mlx5_core_dev *mdev; struct notifier_block mdev_events; struct mlx5_roce roce[MLX5_MAX_PORTS]; @@ -924,6 +922,7 @@ struct mlx5_ib_dev { */ struct srcu_struct mr_srcu; u32 null_mkey; + struct workqueue_struct *advise_mr_wq; #endif struct mlx5_ib_flow_db *flow_db; /* protect resources needed as part of reset flow */ @@ -1043,9 +1042,9 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const void *in_mad, void *response_mad); struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata); + u32 flags, struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -int mlx5_ib_destroy_ah(struct ib_ah *ah); +int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags); struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *init_attr, struct ib_udata *udata); @@ -1071,7 +1070,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr); int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); -void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n); int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, void *buffer, u32 length, struct mlx5_ib_qp_base *base); @@ -1088,6 +1086,12 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); +int mlx5_ib_advise_mr(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, + u32 flags, + struct ib_sge *sg_list, + u32 num_sge, + struct uverbs_attr_bundle *attrs); struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata); int mlx5_ib_dealloc_mw(struct ib_mw *mw); @@ -1185,6 +1189,10 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent); void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, size_t nentries, struct mlx5_ib_mr *mr, int flags); + +int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, + u32 flags, struct ib_sge *sg_list, u32 num_sge); #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) { @@ -1200,6 +1208,13 @@ static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, size_t nentries, struct mlx5_ib_mr *mr, int flags) {} +static inline int +mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, u32 flags, + struct ib_sge *sg_list, u32 num_sge) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ /* Needed for rep profile */ @@ -1268,32 +1283,29 @@ void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, u8 port_num); #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) -int mlx5_ib_devx_create(struct mlx5_ib_dev *dev); +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid); const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void); +extern const struct uapi_definition mlx5_ib_devx_defs[]; +extern const struct uapi_definition mlx5_ib_flow_defs[]; struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, - struct mlx5_flow_act *flow_act, void *cmd_in, int inlen, - int dest_id, int dest_type); + struct mlx5_flow_act *flow_act, u32 counter_id, + void *cmd_in, int inlen, int dest_id, int dest_type); bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type); +bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id); int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root); void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction); #else static inline int -mlx5_ib_devx_create(struct mlx5_ib_dev *dev) { return -EOPNOTSUPP; }; +mlx5_ib_devx_create(struct mlx5_ib_dev *dev, + bool is_user) { return -EOPNOTSUPP; } static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {} -static inline const struct uverbs_object_tree_def * -mlx5_ib_get_devx_tree(void) { return NULL; } static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type) { return false; } -static inline int -mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root) -{ - return 0; -} static inline void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 9b195d65a13e..1bd8c1b1dba1 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -73,7 +73,8 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING /* Wait until all page fault handlers using the mr complete. */ - synchronize_srcu(&dev->mr_srcu); + if (mr->umem && mr->umem->is_odp) + synchronize_srcu(&dev->mr_srcu); #endif return err; @@ -237,6 +238,9 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent = &cache->ent[c]; +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + bool odp_mkey_exist = false; +#endif struct mlx5_ib_mr *tmp_mr; struct mlx5_ib_mr *mr; LIST_HEAD(del_list); @@ -249,6 +253,10 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) break; } mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (mr->umem && mr->umem->is_odp) + odp_mkey_exist = true; +#endif list_move(&mr->list, &del_list); ent->cur--; ent->size--; @@ -257,7 +265,8 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) } #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - synchronize_srcu(&dev->mr_srcu); + if (odp_mkey_exist) + synchronize_srcu(&dev->mr_srcu); #endif list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { @@ -572,6 +581,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent = &cache->ent[c]; + bool odp_mkey_exist = false; struct mlx5_ib_mr *tmp_mr; struct mlx5_ib_mr *mr; LIST_HEAD(del_list); @@ -584,6 +594,8 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) break; } mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); + if (mr->umem && mr->umem->is_odp) + odp_mkey_exist = true; list_move(&mr->list, &del_list); ent->cur--; ent->size--; @@ -592,7 +604,8 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) } #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - synchronize_srcu(&dev->mr_srcu); + if (odp_mkey_exist) + synchronize_srcu(&dev->mr_srcu); #endif list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { @@ -1211,7 +1224,7 @@ err_1: return ERR_PTR(err); } -static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, +static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, int npages, u64 length, int access_flags) { mr->npages = npages; @@ -1267,7 +1280,7 @@ static struct ib_mr *mlx5_ib_get_memic_mr(struct ib_pd *pd, u64 memic_addr, kfree(in); mr->umem = NULL; - set_mr_fileds(dev, mr, 0, length, acc); + set_mr_fields(dev, mr, 0, length, acc); return &mr->ibmr; @@ -1280,6 +1293,21 @@ err_free: return ERR_PTR(err); } +int mlx5_ib_advise_mr(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, + u32 flags, + struct ib_sge *sg_list, + u32 num_sge, + struct uverbs_attr_bundle *attrs) +{ + if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH && + advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE) + return -EOPNOTSUPP; + + return mlx5_ib_advise_mr_prefetch(pd, advice, flags, + sg_list, num_sge); +} + struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, struct ib_dm_mr_attr *attr, struct uverbs_attr_bundle *attrs) @@ -1369,7 +1397,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); mr->umem = umem; - set_mr_fileds(dev, mr, npages, length, access_flags); + set_mr_fields(dev, mr, npages, length, access_flags); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING update_odp_mr(mr); @@ -1536,7 +1564,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, goto err; } - set_mr_fileds(dev, mr, npages, len, access_flags); + set_mr_fields(dev, mr, npages, len, access_flags); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING update_odp_mr(mr); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 7309fb6bf0d2..01e0f6200631 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -549,12 +549,17 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free)); } +#define MLX5_PF_FLAGS_PREFETCH BIT(0) +#define MLX5_PF_FLAGS_DOWNGRADE BIT(1) static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, - u64 io_virt, size_t bcnt, u32 *bytes_mapped) + u64 io_virt, size_t bcnt, u32 *bytes_mapped, + u32 flags) { int npages = 0, current_seq, page_shift, ret, np; bool implicit = false; struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); + bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE; + bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH; u64 access_mask = ODP_READ_ALLOWED_BIT; u64 start_idx, page_mask; struct ib_umem_odp *odp; @@ -578,7 +583,15 @@ next_mr: page_mask = ~(BIT(page_shift) - 1); start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift; - if (mr->umem->writable) + if (prefetch && !downgrade && !mr->umem->writable) { + /* prefetch with write-access must + * be supported by the MR + */ + ret = -EINVAL; + goto out; + } + + if (mr->umem->writable && !downgrade) access_mask |= ODP_WRITE_ALLOWED_BIT; current_seq = READ_ONCE(odp->notifiers_seq); @@ -683,12 +696,13 @@ struct pf_frame { * -EFAULT when there's an error mapping the requested pages. The caller will * abort the page fault handling. */ -static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, - u32 key, u64 io_virt, size_t bcnt, +static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, u32 key, + u64 io_virt, size_t bcnt, u32 *bytes_committed, - u32 *bytes_mapped) + u32 *bytes_mapped, u32 flags) { int npages = 0, srcu_key, ret, i, outlen, cur_outlen = 0, depth = 0; + bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH; struct pf_frame *head = NULL, *frame; struct mlx5_core_mkey *mmkey; struct mlx5_ib_mw *mw; @@ -710,6 +724,12 @@ next_mr: goto srcu_unlock; } + if (prefetch && mmkey->type != MLX5_MKEY_MR) { + mlx5_ib_dbg(dev, "prefetch is allowed only for MR\n"); + ret = -EINVAL; + goto srcu_unlock; + } + switch (mmkey->type) { case MLX5_MKEY_MR: mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); @@ -719,6 +739,11 @@ next_mr: goto srcu_unlock; } + if (prefetch && !mr->umem->is_odp) { + ret = -EINVAL; + goto srcu_unlock; + } + if (!mr->umem->is_odp) { mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", key); @@ -728,7 +753,7 @@ next_mr: goto srcu_unlock; } - ret = pagefault_mr(dev, mr, io_virt, bcnt, bytes_mapped); + ret = pagefault_mr(dev, mr, io_virt, bcnt, bytes_mapped, flags); if (ret < 0) goto srcu_unlock; @@ -905,7 +930,7 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, ret = pagefault_single_data_segment(dev, key, io_virt, bcnt, &pfault->bytes_committed, - bytes_mapped); + bytes_mapped, 0); if (ret < 0) break; npages += ret; @@ -1216,7 +1241,8 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, } ret = pagefault_single_data_segment(dev, rkey, address, length, - &pfault->bytes_committed, NULL); + &pfault->bytes_committed, NULL, + 0); if (ret == -EAGAIN) { /* We're racing with an invalidation, don't prefetch */ prefetch_activated = 0; @@ -1243,7 +1269,8 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, ret = pagefault_single_data_segment(dev, rkey, address, prefetch_len, - &bytes_committed, NULL); + &bytes_committed, NULL, + 0); if (ret < 0 && ret != -EAGAIN) { mlx5_ib_dbg(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n", ret, pfault->token, address, prefetch_len); @@ -1492,10 +1519,17 @@ void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) } } +static const struct ib_device_ops mlx5_ib_dev_odp_ops = { + .advise_mr = mlx5_ib_advise_mr, +}; + int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) { int ret = 0; + if (dev->odp_caps.general_caps & IB_ODP_SUPPORT) + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_odp_ops); + if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) { ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey); if (ret) { @@ -1527,3 +1561,76 @@ int mlx5_ib_odp_init(void) return 0; } + +struct prefetch_mr_work { + struct work_struct work; + struct mlx5_ib_dev *dev; + u32 pf_flags; + u32 num_sge; + struct ib_sge sg_list[0]; +}; + +static int mlx5_ib_prefetch_sg_list(struct mlx5_ib_dev *dev, u32 pf_flags, + struct ib_sge *sg_list, u32 num_sge) +{ + int i; + + for (i = 0; i < num_sge; ++i) { + struct ib_sge *sg = &sg_list[i]; + int bytes_committed = 0; + int ret; + + ret = pagefault_single_data_segment(dev, sg->lkey, sg->addr, + sg->length, + &bytes_committed, NULL, + pf_flags); + if (ret < 0) + return ret; + } + return 0; +} + +static void mlx5_ib_prefetch_mr_work(struct work_struct *work) +{ + struct prefetch_mr_work *w = + container_of(work, struct prefetch_mr_work, work); + + if (w->dev->ib_dev.reg_state == IB_DEV_REGISTERED) + mlx5_ib_prefetch_sg_list(w->dev, w->pf_flags, w->sg_list, + w->num_sge); + + kfree(w); +} + +int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, + u32 flags, struct ib_sge *sg_list, u32 num_sge) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + u32 pf_flags = MLX5_PF_FLAGS_PREFETCH; + struct prefetch_mr_work *work; + + if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH) + pf_flags |= MLX5_PF_FLAGS_DOWNGRADE; + + if (flags & IB_UVERBS_ADVISE_MR_FLAG_FLUSH) + return mlx5_ib_prefetch_sg_list(dev, pf_flags, sg_list, + num_sge); + + if (dev->ib_dev.reg_state != IB_DEV_REGISTERED) + return -ENODEV; + + work = kvzalloc(struct_size(work, sg_list, num_sge), GFP_KERNEL); + if (!work) + return -ENOMEM; + + memcpy(work->sg_list, sg_list, num_sge * sizeof(struct ib_sge)); + + work->dev = dev; + work->pf_flags = pf_flags; + work->num_sge = num_sge; + + INIT_WORK(&work->work, mlx5_ib_prefetch_mr_work); + schedule_work(&work->work); + return 0; +} diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index a0e9ff763d42..9c94c1b9ec35 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -108,21 +108,6 @@ static int is_sqp(enum ib_qp_type qp_type) return is_qp0(qp_type) || is_qp1(qp_type); } -static void *get_wqe(struct mlx5_ib_qp *qp, int offset) -{ - return mlx5_buf_offset(&qp->buf, offset); -} - -static void *get_recv_wqe(struct mlx5_ib_qp *qp, int n) -{ - return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift)); -} - -void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n) -{ - return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE)); -} - /** * mlx5_ib_read_user_wqe() - Copy a user-space WQE to kernel space. * @@ -790,6 +775,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, __be64 *pas; void *qpc; int err; + u16 uid; err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); if (err) { @@ -851,7 +837,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, goto err_umem; } - MLX5_SET(create_qp_in, *in, uid, to_mpd(pd)->uid); + uid = (attr->qp_type != IB_QPT_XRC_TGT) ? to_mpd(pd)->uid : 0; + MLX5_SET(create_qp_in, *in, uid, uid); pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); if (ubuffer->umem) mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0); @@ -917,6 +904,30 @@ static void destroy_qp_user(struct mlx5_ib_dev *dev, struct ib_pd *pd, mlx5_ib_free_bfreg(dev, &context->bfregi, qp->bfregn); } +/* get_sq_edge - Get the next nearby edge. + * + * An 'edge' is defined as the first following address after the end + * of the fragment or the SQ. Accordingly, during the WQE construction + * which repetitively increases the pointer to write the next data, it + * simply should check if it gets to an edge. + * + * @sq - SQ buffer. + * @idx - Stride index in the SQ buffer. + * + * Return: + * The new edge. + */ +static void *get_sq_edge(struct mlx5_ib_wq *sq, u32 idx) +{ + void *fragment_end; + + fragment_end = mlx5_frag_buf_get_wqe + (&sq->fbc, + mlx5_frag_buf_get_idx_last_contig_stride(&sq->fbc, idx)); + + return fragment_end + MLX5_SEND_WQE_BB; +} + static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *init_attr, struct mlx5_ib_qp *qp, @@ -955,13 +966,29 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; base->ubuffer.buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift); - err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size, &qp->buf); + err = mlx5_frag_buf_alloc_node(dev->mdev, base->ubuffer.buf_size, + &qp->buf, dev->mdev->priv.numa_node); if (err) { mlx5_ib_dbg(dev, "err %d\n", err); return err; } - qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt); + if (qp->rq.wqe_cnt) + mlx5_init_fbc(qp->buf.frags, qp->rq.wqe_shift, + ilog2(qp->rq.wqe_cnt), &qp->rq.fbc); + + if (qp->sq.wqe_cnt) { + int sq_strides_offset = (qp->sq.offset & (PAGE_SIZE - 1)) / + MLX5_SEND_WQE_BB; + mlx5_init_fbc_offset(qp->buf.frags + + (qp->sq.offset / PAGE_SIZE), + ilog2(MLX5_SEND_WQE_BB), + ilog2(qp->sq.wqe_cnt), + sq_strides_offset, &qp->sq.fbc); + + qp->sq.cur_edge = get_sq_edge(&qp->sq, 0); + } + *inlen = MLX5_ST_SZ_BYTES(create_qp_in) + MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * qp->buf.npages; *in = kvzalloc(*inlen, GFP_KERNEL); @@ -983,8 +1010,9 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, qp->flags |= MLX5_IB_QP_SQPN_QP1; } - mlx5_fill_page_array(&qp->buf, - (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas)); + mlx5_fill_page_frag_array(&qp->buf, + (__be64 *)MLX5_ADDR_OF(create_qp_in, + *in, pas)); err = mlx5_db_alloc(dev->mdev, &qp->db); if (err) { @@ -1024,7 +1052,7 @@ err_free: kvfree(*in); err_buf: - mlx5_buf_free(dev->mdev, &qp->buf); + mlx5_frag_buf_free(dev->mdev, &qp->buf); return err; } @@ -1036,7 +1064,7 @@ static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) kvfree(qp->sq.wr_data); kvfree(qp->rq.wrid); mlx5_db_free(dev->mdev, &qp->db); - mlx5_buf_free(dev->mdev, &qp->buf); + mlx5_frag_buf_free(dev->mdev, &qp->buf); } static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr) @@ -1876,7 +1904,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->flags |= MLX5_IB_QP_CVLAN_STRIPPING; } - if (pd && pd->uobject) { + if (udata) { if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { mlx5_ib_dbg(dev, "copy failed\n"); return -EFAULT; @@ -1889,7 +1917,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_QP_FLAG_BFREG_INDEX | MLX5_QP_FLAG_TYPE_DCT | MLX5_QP_FLAG_TYPE_DCI | - MLX5_QP_FLAG_ALLOW_SCATTER_CQE)) + MLX5_QP_FLAG_ALLOW_SCATTER_CQE | + MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE)) return -EINVAL; err = get_qp_user_index(to_mucontext(pd->uobject->context), @@ -1925,6 +1954,15 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC; } + if (ucmd.flags & MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE) { + if (init_attr->qp_type != IB_QPT_RC || + !MLX5_CAP_GEN(dev->mdev, qp_packet_based)) { + mlx5_ib_dbg(dev, "packet based credit mode isn't supported\n"); + return -EOPNOTSUPP; + } + qp->flags |= MLX5_IB_QP_PACKET_BASED_CREDIT; + } + if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) { if (init_attr->qp_type != IB_QPT_UD || (MLX5_CAP_GEN(dev->mdev, port_type) != @@ -1948,14 +1986,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->has_rq = qp_has_rq(init_attr); err = set_rq_size(dev, &init_attr->cap, qp->has_rq, - qp, (pd && pd->uobject) ? &ucmd : NULL); + qp, udata ? &ucmd : NULL); if (err) { mlx5_ib_dbg(dev, "err %d\n", err); return err; } if (pd) { - if (pd->uobject) { + if (udata) { __u32 max_wqes = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count); @@ -2021,11 +2059,12 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_SET(qpc, qpc, cd_slave_send, 1); if (qp->flags & MLX5_IB_QP_MANAGED_RECV) MLX5_SET(qpc, qpc, cd_slave_receive, 1); - + if (qp->flags & MLX5_IB_QP_PACKET_BASED_CREDIT) + MLX5_SET(qpc, qpc, req_e2e_credit_mode, 1); if (qp->scat_cqe && is_connected(init_attr->qp_type)) { configure_responder_scat_cqe(init_attr, qpc); configure_requester_scat_cqe(dev, init_attr, - (pd && pd->uobject) ? &ucmd : NULL, + udata ? &ucmd : NULL, qpc); } @@ -2465,7 +2504,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, dev = to_mdev(pd->device); if (init_attr->qp_type == IB_QPT_RAW_PACKET) { - if (!pd->uobject) { + if (!udata) { mlx5_ib_dbg(dev, "Raw Packet QP is not supported for kernel consumers\n"); return ERR_PTR(-EINVAL); } else if (!to_mucontext(pd->uobject->context)->cqe_version) { @@ -2663,7 +2702,7 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) if (rate == IB_RATE_PORT_CURRENT) return 0; - if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) + if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_600_GBPS) return -EINVAL; while (rate != IB_RATE_PORT_CURRENT && @@ -3475,7 +3514,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, qp->sq.head = 0; qp->sq.tail = 0; qp->sq.cur_post = 0; - qp->sq.last_poll = 0; + if (qp->sq.wqe_cnt) + qp->sq.cur_edge = get_sq_edge(&qp->sq, 0); qp->db.db[MLX5_RCV_DBR] = 0; qp->db.db[MLX5_SND_DBR] = 0; } @@ -3515,7 +3555,7 @@ static bool modify_dci_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state new return is_valid_mask(attr_mask, req, opt); } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { req |= IB_QP_PATH_MTU; - opt = IB_QP_PKEY_INDEX; + opt = IB_QP_PKEY_INDEX | IB_QP_AV; return is_valid_mask(attr_mask, req, opt); } else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) { req |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | @@ -3749,6 +3789,62 @@ out: return err; } +static void _handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg, + u32 wqe_sz, void **cur_edge) +{ + u32 idx; + + idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1); + *cur_edge = get_sq_edge(sq, idx); + + *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx); +} + +/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the + * next nearby edge and get new address translation for current WQE position. + * @sq - SQ buffer. + * @seg: Current WQE position (16B aligned). + * @wqe_sz: Total current WQE size [16B]. + * @cur_edge: Updated current edge. + */ +static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg, + u32 wqe_sz, void **cur_edge) +{ + if (likely(*seg != *cur_edge)) + return; + + _handle_post_send_edge(sq, seg, wqe_sz, cur_edge); +} + +/* memcpy_send_wqe - copy data from src to WQE and update the relevant WQ's + * pointers. At the end @seg is aligned to 16B regardless the copied size. + * @sq - SQ buffer. + * @cur_edge: Updated current edge. + * @seg: Current WQE position (16B aligned). + * @wqe_sz: Total current WQE size [16B]. + * @src: Pointer to copy from. + * @n: Number of bytes to copy. + */ +static inline void memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge, + void **seg, u32 *wqe_sz, const void *src, + size_t n) +{ + while (likely(n)) { + size_t leftlen = *cur_edge - *seg; + size_t copysz = min_t(size_t, leftlen, n); + size_t stride; + + memcpy(*seg, src, copysz); + + n -= copysz; + src += copysz; + stride = !n ? ALIGN(copysz, 16) : copysz; + *seg += stride; + *wqe_sz += stride >> 4; + handle_post_send_edge(sq, seg, *wqe_sz, cur_edge); + } +} + static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq) { struct mlx5_ib_cq *cq; @@ -3774,11 +3870,10 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg, rseg->reserved = 0; } -static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg, - const struct ib_send_wr *wr, void *qend, - struct mlx5_ib_qp *qp, int *size) +static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp, + void **seg, int *size, void **cur_edge) { - void *seg = eseg; + struct mlx5_wqe_eth_seg *eseg = *seg; memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg)); @@ -3786,45 +3881,41 @@ static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg, eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; - seg += sizeof(struct mlx5_wqe_eth_seg); - *size += sizeof(struct mlx5_wqe_eth_seg) / 16; - if (wr->opcode == IB_WR_LSO) { struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr); - int size_of_inl_hdr_start = sizeof(eseg->inline_hdr.start); - u64 left, leftlen, copysz; + size_t left, copysz; void *pdata = ud_wr->header; + size_t stride; left = ud_wr->hlen; eseg->mss = cpu_to_be16(ud_wr->mss); eseg->inline_hdr.sz = cpu_to_be16(left); - /* - * check if there is space till the end of queue, if yes, - * copy all in one shot, otherwise copy till the end of queue, - * rollback and than the copy the left + /* memcpy_send_wqe should get a 16B align address. Hence, we + * first copy up to the current edge and then, if needed, + * fall-through to memcpy_send_wqe. */ - leftlen = qend - (void *)eseg->inline_hdr.start; - copysz = min_t(u64, leftlen, left); - - memcpy(seg - size_of_inl_hdr_start, pdata, copysz); - - if (likely(copysz > size_of_inl_hdr_start)) { - seg += ALIGN(copysz - size_of_inl_hdr_start, 16); - *size += ALIGN(copysz - size_of_inl_hdr_start, 16) / 16; - } - - if (unlikely(copysz < left)) { /* the last wqe in the queue */ - seg = mlx5_get_send_wqe(qp, 0); + copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start, + left); + memcpy(eseg->inline_hdr.start, pdata, copysz); + stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) - + sizeof(eseg->inline_hdr.start) + copysz, 16); + *size += stride / 16; + *seg += stride; + + if (copysz < left) { + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); left -= copysz; pdata += copysz; - memcpy(seg, pdata, left); - seg += ALIGN(left, 16); - *size += ALIGN(left, 16) / 16; + memcpy_send_wqe(&qp->sq, cur_edge, seg, size, pdata, + left); } + + return; } - return seg; + *seg += sizeof(struct mlx5_wqe_eth_seg); + *size += sizeof(struct mlx5_wqe_eth_seg) / 16; } static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, @@ -4083,24 +4174,6 @@ static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey); } -static void set_reg_umr_inline_seg(void *seg, struct mlx5_ib_qp *qp, - struct mlx5_ib_mr *mr, int mr_list_size) -{ - void *qend = qp->sq.qend; - void *addr = mr->descs; - int copy; - - if (unlikely(seg + mr_list_size > qend)) { - copy = qend - seg; - memcpy(seg, addr, copy); - addr += copy; - mr_list_size -= copy; - seg = mlx5_get_send_wqe(qp, 0); - } - memcpy(seg, addr, mr_list_size); - seg += mr_list_size; -} - static __be32 send_ieth(const struct ib_send_wr *wr) { switch (wr->opcode) { @@ -4134,40 +4207,48 @@ static u8 wq_sig(void *wqe) } static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, - void *wqe, int *sz) + void **wqe, int *wqe_sz, void **cur_edge) { struct mlx5_wqe_inline_seg *seg; - void *qend = qp->sq.qend; - void *addr; + size_t offset; int inl = 0; - int copy; - int len; int i; - seg = wqe; - wqe += sizeof(*seg); + seg = *wqe; + *wqe += sizeof(*seg); + offset = sizeof(*seg); + for (i = 0; i < wr->num_sge; i++) { - addr = (void *)(unsigned long)(wr->sg_list[i].addr); - len = wr->sg_list[i].length; + size_t len = wr->sg_list[i].length; + void *addr = (void *)(unsigned long)(wr->sg_list[i].addr); + inl += len; if (unlikely(inl > qp->max_inline_data)) return -ENOMEM; - if (unlikely(wqe + len > qend)) { - copy = qend - wqe; - memcpy(wqe, addr, copy); - addr += copy; - len -= copy; - wqe = mlx5_get_send_wqe(qp, 0); + while (likely(len)) { + size_t leftlen; + size_t copysz; + + handle_post_send_edge(&qp->sq, wqe, + *wqe_sz + (offset >> 4), + cur_edge); + + leftlen = *cur_edge - *wqe; + copysz = min_t(size_t, leftlen, len); + + memcpy(*wqe, addr, copysz); + len -= copysz; + addr += copysz; + *wqe += copysz; + offset += copysz; } - memcpy(wqe, addr, len); - wqe += len; } seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG); - *sz = ALIGN(inl + sizeof(seg->byte_count), 16) / 16; + *wqe_sz += ALIGN(inl + sizeof(seg->byte_count), 16) / 16; return 0; } @@ -4280,7 +4361,8 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr, } static int set_sig_data_segment(const struct ib_sig_handover_wr *wr, - struct mlx5_ib_qp *qp, void **seg, int *size) + struct mlx5_ib_qp *qp, void **seg, + int *size, void **cur_edge) { struct ib_sig_attrs *sig_attrs = wr->sig_attrs; struct ib_mr *sig_mr = wr->sig_mr; @@ -4364,8 +4446,7 @@ static int set_sig_data_segment(const struct ib_sig_handover_wr *wr, *seg += wqe_size; *size += wqe_size / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); bsf = *seg; ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len); @@ -4374,8 +4455,7 @@ static int set_sig_data_segment(const struct ib_sig_handover_wr *wr, *seg += sizeof(*bsf); *size += sizeof(*bsf) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); return 0; } @@ -4413,7 +4493,8 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, static int set_sig_umr_wr(const struct ib_send_wr *send_wr, - struct mlx5_ib_qp *qp, void **seg, int *size) + struct mlx5_ib_qp *qp, void **seg, int *size, + void **cur_edge) { const struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr); struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr); @@ -4445,16 +4526,14 @@ static int set_sig_umr_wr(const struct ib_send_wr *send_wr, set_sig_umr_segment(*seg, xlt_size); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); set_sig_mkey_segment(*seg, wr, xlt_size, region_len, pdn); *seg += sizeof(struct mlx5_mkey_seg); *size += sizeof(struct mlx5_mkey_seg) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - ret = set_sig_data_segment(wr, qp, seg, size); + ret = set_sig_data_segment(wr, qp, seg, size, cur_edge); if (ret) return ret; @@ -4491,11 +4570,11 @@ static int set_psv_wr(struct ib_sig_domain *domain, static int set_reg_wr(struct mlx5_ib_qp *qp, const struct ib_reg_wr *wr, - void **seg, int *size) + void **seg, int *size, void **cur_edge) { struct mlx5_ib_mr *mr = to_mmr(wr->mr); struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd); - int mr_list_size = mr->ndescs * mr->desc_size; + size_t mr_list_size = mr->ndescs * mr->desc_size; bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD; if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) { @@ -4507,18 +4586,17 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, set_reg_umr_seg(*seg, mr, umr_inline); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); set_reg_mkey_seg(*seg, mr, wr->key, wr->access); *seg += sizeof(struct mlx5_mkey_seg); *size += sizeof(struct mlx5_mkey_seg) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); if (umr_inline) { - set_reg_umr_inline_seg(*seg, qp, mr, mr_list_size); - *size += get_xlt_octo(mr_list_size); + memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs, + mr_list_size); + *size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4); } else { set_reg_data_seg(*seg, mr, pd); *seg += sizeof(struct mlx5_wqe_data_seg); @@ -4527,32 +4605,31 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, return 0; } -static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size) +static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size, + void **cur_edge) { set_linv_umr_seg(*seg); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); set_linv_mkey_seg(*seg); *seg += sizeof(struct mlx5_mkey_seg); *size += sizeof(struct mlx5_mkey_seg) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); } -static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16) +static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16) { __be32 *p = NULL; - int tidx = idx; + u32 tidx = idx; int i, j; - pr_debug("dump wqe at %p\n", mlx5_get_send_wqe(qp, tidx)); + pr_debug("dump WQE index %u:\n", idx); for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) { if ((i & 0xf) == 0) { - void *buf = mlx5_get_send_wqe(qp, tidx); tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1); - p = buf; + p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, tidx); + pr_debug("WQBB at %p:\n", (void *)p); j = 0; } pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]), @@ -4562,15 +4639,16 @@ static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16) } static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg, - struct mlx5_wqe_ctrl_seg **ctrl, - const struct ib_send_wr *wr, unsigned *idx, - int *size, int nreq, bool send_signaled, bool solicited) + struct mlx5_wqe_ctrl_seg **ctrl, + const struct ib_send_wr *wr, unsigned int *idx, + int *size, void **cur_edge, int nreq, + bool send_signaled, bool solicited) { if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) return -ENOMEM; *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); - *seg = mlx5_get_send_wqe(qp, *idx); + *seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx); *ctrl = *seg; *(uint32_t *)(*seg + 8) = 0; (*ctrl)->imm = send_ieth(wr); @@ -4580,6 +4658,7 @@ static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg, *seg += sizeof(**ctrl); *size = sizeof(**ctrl) / 16; + *cur_edge = qp->sq.cur_edge; return 0; } @@ -4587,17 +4666,18 @@ static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg, static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, struct mlx5_wqe_ctrl_seg **ctrl, const struct ib_send_wr *wr, unsigned *idx, - int *size, int nreq) + int *size, void **cur_edge, int nreq) { - return __begin_wqe(qp, seg, ctrl, wr, idx, size, nreq, + return __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq, wr->send_flags & IB_SEND_SIGNALED, wr->send_flags & IB_SEND_SOLICITED); } static void finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl, - u8 size, unsigned idx, u64 wr_id, - int nreq, u8 fence, u32 mlx5_opcode) + void *seg, u8 size, void *cur_edge, + unsigned int idx, u64 wr_id, int nreq, u8 fence, + u32 mlx5_opcode) { u8 opmod = 0; @@ -4613,6 +4693,15 @@ static void finish_wqe(struct mlx5_ib_qp *qp, qp->sq.wqe_head[idx] = qp->sq.head + nreq; qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); qp->sq.w_list[idx].next = qp->sq.cur_post; + + /* We save the edge which was possibly updated during the WQE + * construction, into SQ's cache. + */ + seg = PTR_ALIGN(seg, MLX5_SEND_WQE_BB); + qp->sq.cur_edge = (unlikely(seg == cur_edge)) ? + get_sq_edge(&qp->sq, qp->sq.cur_post & + (qp->sq.wqe_cnt - 1)) : + cur_edge; } static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, @@ -4623,11 +4712,10 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_qp *qp; struct mlx5_ib_mr *mr; - struct mlx5_wqe_data_seg *dpseg; struct mlx5_wqe_xrc_seg *xrc; struct mlx5_bf *bf; + void *cur_edge; int uninitialized_var(size); - void *qend; unsigned long flags; unsigned idx; int err = 0; @@ -4649,7 +4737,6 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, qp = to_mqp(ibqp); bf = &qp->bf; - qend = qp->sq.qend; spin_lock_irqsave(&qp->sq.lock, flags); @@ -4669,7 +4756,8 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, goto out; } - err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq); + err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge, + nreq); if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; @@ -4719,14 +4807,15 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, case IB_WR_LOCAL_INV: qp->sq.wr_data[idx] = IB_WR_LOCAL_INV; ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey); - set_linv_wr(qp, &seg, &size); + set_linv_wr(qp, &seg, &size, &cur_edge); num_sge = 0; break; case IB_WR_REG_MR: qp->sq.wr_data[idx] = IB_WR_REG_MR; ctrl->imm = cpu_to_be32(reg_wr(wr)->key); - err = set_reg_wr(qp, reg_wr(wr), &seg, &size); + err = set_reg_wr(qp, reg_wr(wr), &seg, &size, + &cur_edge); if (err) { *bad_wr = wr; goto out; @@ -4739,21 +4828,24 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, mr = to_mmr(sig_handover_wr(wr)->sig_mr); ctrl->imm = cpu_to_be32(mr->ibmr.rkey); - err = set_sig_umr_wr(wr, qp, &seg, &size); + err = set_sig_umr_wr(wr, qp, &seg, &size, + &cur_edge); if (err) { mlx5_ib_warn(dev, "\n"); *bad_wr = wr; goto out; } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, - fence, MLX5_OPCODE_UMR); + finish_wqe(qp, ctrl, seg, size, cur_edge, idx, + wr->wr_id, nreq, fence, + MLX5_OPCODE_UMR); /* * SET_PSV WQEs are not signaled and solicited * on error */ err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, - &size, nreq, false, true); + &size, &cur_edge, nreq, false, + true); if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; @@ -4770,10 +4862,12 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, goto out; } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, - fence, MLX5_OPCODE_SET_PSV); + finish_wqe(qp, ctrl, seg, size, cur_edge, idx, + wr->wr_id, nreq, fence, + MLX5_OPCODE_SET_PSV); err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, - &size, nreq, false, true); + &size, &cur_edge, nreq, false, + true); if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; @@ -4790,8 +4884,9 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, goto out; } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, - fence, MLX5_OPCODE_SET_PSV); + finish_wqe(qp, ctrl, seg, size, cur_edge, idx, + wr->wr_id, nreq, fence, + MLX5_OPCODE_SET_PSV); qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; num_sge = 0; goto skip_psv; @@ -4828,16 +4923,14 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, set_datagram_seg(seg, wr); seg += sizeof(struct mlx5_wqe_datagram_seg); size += sizeof(struct mlx5_wqe_datagram_seg) / 16; - if (unlikely((seg == qend))) - seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, &seg, size, &cur_edge); + break; case IB_QPT_UD: set_datagram_seg(seg, wr); seg += sizeof(struct mlx5_wqe_datagram_seg); size += sizeof(struct mlx5_wqe_datagram_seg) / 16; - - if (unlikely((seg == qend))) - seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, &seg, size, &cur_edge); /* handle qp that supports ud offload */ if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) { @@ -4847,11 +4940,9 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad)); seg += sizeof(struct mlx5_wqe_eth_pad); size += sizeof(struct mlx5_wqe_eth_pad) / 16; - - seg = set_eth_seg(seg, wr, qend, qp, &size); - - if (unlikely((seg == qend))) - seg = mlx5_get_send_wqe(qp, 0); + set_eth_seg(wr, qp, &seg, &size, &cur_edge); + handle_post_send_edge(&qp->sq, &seg, size, + &cur_edge); } break; case MLX5_IB_QPT_REG_UMR: @@ -4867,13 +4958,11 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, goto out; seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - if (unlikely((seg == qend))) - seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, &seg, size, &cur_edge); set_reg_mkey_segment(seg, wr); seg += sizeof(struct mlx5_mkey_seg); size += sizeof(struct mlx5_mkey_seg) / 16; - if (unlikely((seg == qend))) - seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, &seg, size, &cur_edge); break; default: @@ -4881,33 +4970,29 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, } if (wr->send_flags & IB_SEND_INLINE && num_sge) { - int uninitialized_var(sz); - - err = set_data_inl_seg(qp, wr, seg, &sz); + err = set_data_inl_seg(qp, wr, &seg, &size, &cur_edge); if (unlikely(err)) { mlx5_ib_warn(dev, "\n"); *bad_wr = wr; goto out; } - size += sz; } else { - dpseg = seg; for (i = 0; i < num_sge; i++) { - if (unlikely(dpseg == qend)) { - seg = mlx5_get_send_wqe(qp, 0); - dpseg = seg; - } + handle_post_send_edge(&qp->sq, &seg, size, + &cur_edge); if (likely(wr->sg_list[i].length)) { - set_data_ptr_seg(dpseg, wr->sg_list + i); + set_data_ptr_seg + ((struct mlx5_wqe_data_seg *)seg, + wr->sg_list + i); size += sizeof(struct mlx5_wqe_data_seg) / 16; - dpseg++; + seg += sizeof(struct mlx5_wqe_data_seg); } } } qp->next_fence = next_fence; - finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, fence, - mlx5_ib_opcode[wr->opcode]); + finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq, + fence, mlx5_ib_opcode[wr->opcode]); skip_psv: if (0) dump_wqe(qp, idx, size); @@ -4993,7 +5078,7 @@ static int _mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, goto out; } - scat = get_recv_wqe(qp, ind); + scat = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ind); if (qp->wq_sig) scat++; @@ -5441,7 +5526,6 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_xrcd *xrcd; int err; - u16 uid; if (!MLX5_CAP_GEN(dev->mdev, xrc)) return ERR_PTR(-ENOSYS); @@ -5450,14 +5534,12 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, if (!xrcd) return ERR_PTR(-ENOMEM); - uid = context ? to_mucontext(context)->devx_uid : 0; - err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, uid); + err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0); if (err) { kfree(xrcd); return ERR_PTR(-ENOMEM); } - xrcd->uid = uid; return &xrcd->ibxrcd; } @@ -5465,10 +5547,9 @@ int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd) { struct mlx5_ib_dev *dev = to_mdev(xrcd->device); u32 xrcdn = to_mxrcd(xrcd)->xrcdn; - u16 uid = to_mxrcd(xrcd)->uid; int err; - err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, uid); + err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0); if (err) mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn); diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 91dcd3918d96..4e8d18009f58 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -13,7 +13,7 @@ static void *get_wqe(struct mlx5_ib_srq *srq, int n) { - return mlx5_buf_offset(&srq->buf, n << srq->msrq.wqe_shift); + return mlx5_frag_buf_get_wqe(&srq->fbc, n); } static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type) @@ -113,7 +113,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; in->page_offset = offset; - in->uid = to_mpd(pd)->uid; + in->uid = (in->type != IB_SRQT_XRC) ? to_mpd(pd)->uid : 0; if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && in->type != IB_SRQT_BASIC) in->user_index = uidx; @@ -142,12 +142,16 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, return err; } - if (mlx5_buf_alloc(dev->mdev, buf_size, &srq->buf)) { + if (mlx5_frag_buf_alloc_node(dev->mdev, buf_size, &srq->buf, + dev->mdev->priv.numa_node)) { mlx5_ib_dbg(dev, "buf alloc failed\n"); err = -ENOMEM; goto err_db; } + mlx5_init_fbc(srq->buf.frags, srq->msrq.wqe_shift, ilog2(srq->msrq.max), + &srq->fbc); + srq->head = 0; srq->tail = srq->msrq.max - 1; srq->wqe_ctr = 0; @@ -164,7 +168,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, err = -ENOMEM; goto err_buf; } - mlx5_fill_page_array(&srq->buf, in->pas); + mlx5_fill_page_frag_array(&srq->buf, in->pas); srq->wrid = kvmalloc_array(srq->msrq.max, sizeof(u64), GFP_KERNEL); if (!srq->wrid) { @@ -184,7 +188,7 @@ err_in: kvfree(in->pas); err_buf: - mlx5_buf_free(dev->mdev, &srq->buf); + mlx5_frag_buf_free(dev->mdev, &srq->buf); err_db: mlx5_db_free(dev->mdev, &srq->db); @@ -201,7 +205,7 @@ static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq) static void destroy_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq) { kvfree(srq->wrid); - mlx5_buf_free(dev->mdev, &srq->buf); + mlx5_frag_buf_free(dev->mdev, &srq->buf); mlx5_db_free(dev->mdev, &srq->db); } @@ -256,14 +260,14 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, } in.type = init_attr->srq_type; - if (pd->uobject) + if (udata) err = create_srq_user(pd, srq, &in, udata, buf_size); else err = create_srq_kernel(dev, srq, &in, buf_size); if (err) { mlx5_ib_warn(dev, "create srq %s failed, err %d\n", - pd->uobject ? "user" : "kernel", err); + udata ? "user" : "kernel", err); goto err_srq; } @@ -308,7 +312,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, srq->msrq.event = mlx5_ib_srq_event; srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn; - if (pd->uobject) + if (udata) if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof(__u32))) { mlx5_ib_dbg(dev, "copy to user failed\n"); err = -EFAULT; @@ -323,7 +327,7 @@ err_core: mlx5_cmd_destroy_srq(dev, &srq->msrq); err_usr_kern_srq: - if (pd->uobject) + if (udata) destroy_srq_user(pd, srq); else destroy_srq_kernel(dev, srq); diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 220a3e4717a3..bfd4eebc1182 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -510,7 +510,8 @@ int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe); int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, - struct ib_srq_attr *attr, struct mthca_srq *srq); + struct ib_srq_attr *attr, struct mthca_srq *srq, + struct ib_udata *udata); void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq); int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); @@ -547,7 +548,8 @@ int mthca_alloc_qp(struct mthca_dev *dev, enum ib_qp_type type, enum ib_sig_type send_policy, struct ib_qp_cap *cap, - struct mthca_qp *qp); + struct mthca_qp *qp, + struct ib_udata *udata); int mthca_alloc_sqp(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_cq *send_cq, @@ -556,7 +558,8 @@ int mthca_alloc_sqp(struct mthca_dev *dev, struct ib_qp_cap *cap, int qpn, int port, - struct mthca_sqp *sqp); + struct mthca_sqp *sqp, + struct ib_udata *udata); void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp); int mthca_create_ah(struct mthca_dev *dev, struct mthca_pd *pd, diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 2e5dc0a67cfc..7ad517da4917 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -89,13 +89,13 @@ static void update_sm_ah(struct mthca_dev *dev, rdma_ah_set_port_num(&ah_attr, port_num); new_ah = rdma_create_ah(dev->send_agent[port_num - 1][0]->qp->pd, - &ah_attr); + &ah_attr, 0); if (IS_ERR(new_ah)) return; spin_lock_irqsave(&dev->sm_lock, flags); if (dev->sm_ah[port_num - 1]) - rdma_destroy_ah(dev->sm_ah[port_num - 1]); + rdma_destroy_ah(dev->sm_ah[port_num - 1], 0); dev->sm_ah[port_num - 1] = new_ah; spin_unlock_irqrestore(&dev->sm_lock, flags); } @@ -347,6 +347,7 @@ void mthca_free_agents(struct mthca_dev *dev) } if (dev->sm_ah[p]) - rdma_destroy_ah(dev->sm_ah[p]); + rdma_destroy_ah(dev->sm_ah[p], + RDMA_DESTROY_AH_SLEEPABLE); } } diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 691c6f048938..82cb6b71ac7c 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -412,6 +412,7 @@ static int mthca_dealloc_pd(struct ib_pd *pd) static struct ib_ah *mthca_ah_create(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { @@ -431,7 +432,7 @@ static struct ib_ah *mthca_ah_create(struct ib_pd *pd, return &ah->ibah; } -static int mthca_ah_destroy(struct ib_ah *ah) +static int mthca_ah_destroy(struct ib_ah *ah, u32 flags) { mthca_destroy_ah(to_mdev(ah->device), to_mah(ah)); kfree(ah); @@ -455,7 +456,7 @@ static struct ib_srq *mthca_create_srq(struct ib_pd *pd, if (!srq) return ERR_PTR(-ENOMEM); - if (pd->uobject) { + if (udata) { context = to_mucontext(pd->uobject->context); if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { @@ -475,9 +476,9 @@ static struct ib_srq *mthca_create_srq(struct ib_pd *pd, } err = mthca_alloc_srq(to_mdev(pd->device), to_mpd(pd), - &init_attr->attr, srq); + &init_attr->attr, srq, udata); - if (err && pd->uobject) + if (err && udata) mthca_unmap_user_db(to_mdev(pd->device), &context->uar, context->db_tab, ucmd.db_index); @@ -537,7 +538,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, if (!qp) return ERR_PTR(-ENOMEM); - if (pd->uobject) { + if (udata) { context = to_mucontext(pd->uobject->context); if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { @@ -574,9 +575,9 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, to_mcq(init_attr->send_cq), to_mcq(init_attr->recv_cq), init_attr->qp_type, init_attr->sq_sig_type, - &init_attr->cap, qp); + &init_attr->cap, qp, udata); - if (err && pd->uobject) { + if (err && udata) { context = to_mucontext(pd->uobject->context); mthca_unmap_user_db(to_mdev(pd->device), @@ -596,7 +597,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, case IB_QPT_GSI: { /* Don't allow userspace to create special QPs */ - if (pd->uobject) + if (udata) return ERR_PTR(-EINVAL); qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL); @@ -610,7 +611,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, to_mcq(init_attr->recv_cq), init_attr->sq_sig_type, &init_attr->cap, qp->ibqp.qp_num, init_attr->port_num, - to_msqp(qp)); + to_msqp(qp), udata); break; } default: @@ -1193,6 +1194,81 @@ static void get_dev_fw_str(struct ib_device *device, char *str) (int) dev->fw_ver & 0xffff); } +static const struct ib_device_ops mthca_dev_ops = { + .alloc_pd = mthca_alloc_pd, + .alloc_ucontext = mthca_alloc_ucontext, + .attach_mcast = mthca_multicast_attach, + .create_ah = mthca_ah_create, + .create_cq = mthca_create_cq, + .create_qp = mthca_create_qp, + .dealloc_pd = mthca_dealloc_pd, + .dealloc_ucontext = mthca_dealloc_ucontext, + .dereg_mr = mthca_dereg_mr, + .destroy_ah = mthca_ah_destroy, + .destroy_cq = mthca_destroy_cq, + .destroy_qp = mthca_destroy_qp, + .detach_mcast = mthca_multicast_detach, + .get_dev_fw_str = get_dev_fw_str, + .get_dma_mr = mthca_get_dma_mr, + .get_port_immutable = mthca_port_immutable, + .mmap = mthca_mmap_uar, + .modify_device = mthca_modify_device, + .modify_port = mthca_modify_port, + .modify_qp = mthca_modify_qp, + .poll_cq = mthca_poll_cq, + .process_mad = mthca_process_mad, + .query_ah = mthca_ah_query, + .query_device = mthca_query_device, + .query_gid = mthca_query_gid, + .query_pkey = mthca_query_pkey, + .query_port = mthca_query_port, + .query_qp = mthca_query_qp, + .reg_user_mr = mthca_reg_user_mr, + .resize_cq = mthca_resize_cq, +}; + +static const struct ib_device_ops mthca_dev_arbel_srq_ops = { + .create_srq = mthca_create_srq, + .destroy_srq = mthca_destroy_srq, + .modify_srq = mthca_modify_srq, + .post_srq_recv = mthca_arbel_post_srq_recv, + .query_srq = mthca_query_srq, +}; + +static const struct ib_device_ops mthca_dev_tavor_srq_ops = { + .create_srq = mthca_create_srq, + .destroy_srq = mthca_destroy_srq, + .modify_srq = mthca_modify_srq, + .post_srq_recv = mthca_tavor_post_srq_recv, + .query_srq = mthca_query_srq, +}; + +static const struct ib_device_ops mthca_dev_arbel_fmr_ops = { + .alloc_fmr = mthca_alloc_fmr, + .dealloc_fmr = mthca_dealloc_fmr, + .map_phys_fmr = mthca_arbel_map_phys_fmr, + .unmap_fmr = mthca_unmap_fmr, +}; + +static const struct ib_device_ops mthca_dev_tavor_fmr_ops = { + .alloc_fmr = mthca_alloc_fmr, + .dealloc_fmr = mthca_dealloc_fmr, + .map_phys_fmr = mthca_tavor_map_phys_fmr, + .unmap_fmr = mthca_unmap_fmr, +}; + +static const struct ib_device_ops mthca_dev_arbel_ops = { + .post_recv = mthca_arbel_post_receive, + .post_send = mthca_arbel_post_send, + .req_notify_cq = mthca_arbel_arm_cq, +}; + +static const struct ib_device_ops mthca_dev_tavor_ops = { + .post_recv = mthca_tavor_post_receive, + .post_send = mthca_tavor_post_send, + .req_notify_cq = mthca_tavor_arm_cq, +}; + int mthca_register_device(struct mthca_dev *dev) { int ret; @@ -1226,26 +1302,8 @@ int mthca_register_device(struct mthca_dev *dev) dev->ib_dev.phys_port_cnt = dev->limits.num_ports; dev->ib_dev.num_comp_vectors = 1; dev->ib_dev.dev.parent = &dev->pdev->dev; - dev->ib_dev.query_device = mthca_query_device; - dev->ib_dev.query_port = mthca_query_port; - dev->ib_dev.modify_device = mthca_modify_device; - dev->ib_dev.modify_port = mthca_modify_port; - dev->ib_dev.query_pkey = mthca_query_pkey; - dev->ib_dev.query_gid = mthca_query_gid; - dev->ib_dev.alloc_ucontext = mthca_alloc_ucontext; - dev->ib_dev.dealloc_ucontext = mthca_dealloc_ucontext; - dev->ib_dev.mmap = mthca_mmap_uar; - dev->ib_dev.alloc_pd = mthca_alloc_pd; - dev->ib_dev.dealloc_pd = mthca_dealloc_pd; - dev->ib_dev.create_ah = mthca_ah_create; - dev->ib_dev.query_ah = mthca_ah_query; - dev->ib_dev.destroy_ah = mthca_ah_destroy; if (dev->mthca_flags & MTHCA_FLAG_SRQ) { - dev->ib_dev.create_srq = mthca_create_srq; - dev->ib_dev.modify_srq = mthca_modify_srq; - dev->ib_dev.query_srq = mthca_query_srq; - dev->ib_dev.destroy_srq = mthca_destroy_srq; dev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | @@ -1253,48 +1311,28 @@ int mthca_register_device(struct mthca_dev *dev) (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); if (mthca_is_memfree(dev)) - dev->ib_dev.post_srq_recv = mthca_arbel_post_srq_recv; + ib_set_device_ops(&dev->ib_dev, + &mthca_dev_arbel_srq_ops); else - dev->ib_dev.post_srq_recv = mthca_tavor_post_srq_recv; + ib_set_device_ops(&dev->ib_dev, + &mthca_dev_tavor_srq_ops); } - dev->ib_dev.create_qp = mthca_create_qp; - dev->ib_dev.modify_qp = mthca_modify_qp; - dev->ib_dev.query_qp = mthca_query_qp; - dev->ib_dev.destroy_qp = mthca_destroy_qp; - dev->ib_dev.create_cq = mthca_create_cq; - dev->ib_dev.resize_cq = mthca_resize_cq; - dev->ib_dev.destroy_cq = mthca_destroy_cq; - dev->ib_dev.poll_cq = mthca_poll_cq; - dev->ib_dev.get_dma_mr = mthca_get_dma_mr; - dev->ib_dev.reg_user_mr = mthca_reg_user_mr; - dev->ib_dev.dereg_mr = mthca_dereg_mr; - dev->ib_dev.get_port_immutable = mthca_port_immutable; - dev->ib_dev.get_dev_fw_str = get_dev_fw_str; - if (dev->mthca_flags & MTHCA_FLAG_FMR) { - dev->ib_dev.alloc_fmr = mthca_alloc_fmr; - dev->ib_dev.unmap_fmr = mthca_unmap_fmr; - dev->ib_dev.dealloc_fmr = mthca_dealloc_fmr; if (mthca_is_memfree(dev)) - dev->ib_dev.map_phys_fmr = mthca_arbel_map_phys_fmr; + ib_set_device_ops(&dev->ib_dev, + &mthca_dev_arbel_fmr_ops); else - dev->ib_dev.map_phys_fmr = mthca_tavor_map_phys_fmr; + ib_set_device_ops(&dev->ib_dev, + &mthca_dev_tavor_fmr_ops); } - dev->ib_dev.attach_mcast = mthca_multicast_attach; - dev->ib_dev.detach_mcast = mthca_multicast_detach; - dev->ib_dev.process_mad = mthca_process_mad; + ib_set_device_ops(&dev->ib_dev, &mthca_dev_ops); - if (mthca_is_memfree(dev)) { - dev->ib_dev.req_notify_cq = mthca_arbel_arm_cq; - dev->ib_dev.post_send = mthca_arbel_post_send; - dev->ib_dev.post_recv = mthca_arbel_post_receive; - } else { - dev->ib_dev.req_notify_cq = mthca_tavor_arm_cq; - dev->ib_dev.post_send = mthca_tavor_post_send; - dev->ib_dev.post_recv = mthca_tavor_post_receive; - } + if (mthca_is_memfree(dev)) + ib_set_device_ops(&dev->ib_dev, &mthca_dev_arbel_ops); + else + ib_set_device_ops(&dev->ib_dev, &mthca_dev_tavor_ops); mutex_init(&dev->cap_mask_mutex); diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 9d178ee3c96a..4e5b5cc17f1d 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -981,7 +981,8 @@ static void mthca_adjust_qp_caps(struct mthca_dev *dev, */ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, struct mthca_pd *pd, - struct mthca_qp *qp) + struct mthca_qp *qp, + struct ib_udata *udata) { int size; int err = -ENOMEM; @@ -1048,7 +1049,7 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, * allocate anything. All we need is to calculate the WQE * sizes and the send_wqe_offset, so we're done now. */ - if (pd->ibpd.uobject) + if (udata) return 0; size = PAGE_ALIGN(qp->send_wqe_offset + @@ -1155,7 +1156,8 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, struct mthca_cq *send_cq, struct mthca_cq *recv_cq, enum ib_sig_type send_policy, - struct mthca_qp *qp) + struct mthca_qp *qp, + struct ib_udata *udata) { int ret; int i; @@ -1178,7 +1180,7 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, if (ret) return ret; - ret = mthca_alloc_wqe_buf(dev, pd, qp); + ret = mthca_alloc_wqe_buf(dev, pd, qp, udata); if (ret) { mthca_unmap_memfree(dev, qp); return ret; @@ -1191,7 +1193,7 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, * will be allocated and buffers will be initialized in * userspace. */ - if (pd->ibpd.uobject) + if (udata) return 0; ret = mthca_alloc_memfree(dev, qp); @@ -1285,7 +1287,8 @@ int mthca_alloc_qp(struct mthca_dev *dev, enum ib_qp_type type, enum ib_sig_type send_policy, struct ib_qp_cap *cap, - struct mthca_qp *qp) + struct mthca_qp *qp, + struct ib_udata *udata) { int err; @@ -1308,7 +1311,7 @@ int mthca_alloc_qp(struct mthca_dev *dev, qp->port = 0; err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq, - send_policy, qp); + send_policy, qp, udata); if (err) { mthca_free(&dev->qp_table.alloc, qp->qpn); return err; @@ -1360,7 +1363,8 @@ int mthca_alloc_sqp(struct mthca_dev *dev, struct ib_qp_cap *cap, int qpn, int port, - struct mthca_sqp *sqp) + struct mthca_sqp *sqp, + struct ib_udata *udata) { u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1; int err; @@ -1391,7 +1395,7 @@ int mthca_alloc_sqp(struct mthca_dev *dev, sqp->qp.transport = MLX; err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq, - send_policy, &sqp->qp); + send_policy, &sqp->qp, udata); if (err) goto err_out_free; diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index 9a3fc6fb0d7e..b8333c79e3fa 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -95,7 +95,8 @@ static inline int *wqe_to_link(void *wqe) static void mthca_tavor_init_srq_context(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_srq *srq, - struct mthca_tavor_srq_context *context) + struct mthca_tavor_srq_context *context, + bool is_user) { memset(context, 0, sizeof *context); @@ -103,7 +104,7 @@ static void mthca_tavor_init_srq_context(struct mthca_dev *dev, context->state_pd = cpu_to_be32(pd->pd_num); context->lkey = cpu_to_be32(srq->mr.ibmr.lkey); - if (pd->ibpd.uobject) + if (is_user) context->uar = cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index); else @@ -113,7 +114,8 @@ static void mthca_tavor_init_srq_context(struct mthca_dev *dev, static void mthca_arbel_init_srq_context(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_srq *srq, - struct mthca_arbel_srq_context *context) + struct mthca_arbel_srq_context *context, + bool is_user) { int logsize, max; @@ -129,7 +131,7 @@ static void mthca_arbel_init_srq_context(struct mthca_dev *dev, context->lkey = cpu_to_be32(srq->mr.ibmr.lkey); context->db_index = cpu_to_be32(srq->db_index); context->logstride_usrpage = cpu_to_be32((srq->wqe_shift - 4) << 29); - if (pd->ibpd.uobject) + if (is_user) context->logstride_usrpage |= cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index); else @@ -145,14 +147,14 @@ static void mthca_free_srq_buf(struct mthca_dev *dev, struct mthca_srq *srq) } static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd, - struct mthca_srq *srq) + struct mthca_srq *srq, struct ib_udata *udata) { struct mthca_data_seg *scatter; void *wqe; int err; int i; - if (pd->ibpd.uobject) + if (udata) return 0; srq->wrid = kmalloc_array(srq->max, sizeof(u64), GFP_KERNEL); @@ -197,7 +199,8 @@ static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd, } int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, - struct ib_srq_attr *attr, struct mthca_srq *srq) + struct ib_srq_attr *attr, struct mthca_srq *srq, + struct ib_udata *udata) { struct mthca_mailbox *mailbox; int ds; @@ -235,7 +238,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, if (err) goto err_out; - if (!pd->ibpd.uobject) { + if (!udata) { srq->db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SRQ, srq->srqn, &srq->db); if (srq->db_index < 0) { @@ -251,7 +254,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, goto err_out_db; } - err = mthca_alloc_srq_buf(dev, pd, srq); + err = mthca_alloc_srq_buf(dev, pd, srq, udata); if (err) goto err_out_mailbox; @@ -261,9 +264,9 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, mutex_init(&srq->mutex); if (mthca_is_memfree(dev)) - mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf); + mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf, udata); else - mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf); + mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf, udata); err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn); @@ -297,14 +300,14 @@ err_out_free_srq: mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err); err_out_free_buf: - if (!pd->ibpd.uobject) + if (!udata) mthca_free_srq_buf(dev, srq); err_out_mailbox: mthca_free_mailbox(dev, mailbox); err_out_db: - if (!pd->ibpd.uobject && mthca_is_memfree(dev)) + if (!udata && mthca_is_memfree(dev)) mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index); err_out_icm: diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 2b67ace5b614..032883180f65 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -3033,7 +3033,7 @@ static int nes_disconnect(struct nes_qp *nesqp, int abrupt) /* Need to free the Last Streaming Mode Message */ if (nesqp->ietf_frame) { if (nesqp->lsmm_mr) - nesibdev->ibdev.dereg_mr(nesqp->lsmm_mr); + nesibdev->ibdev.ops.dereg_mr(nesqp->lsmm_mr); pci_free_consistent(nesdev->pcidev, nesqp->private_data_len + nesqp->ietf_frame_size, nesqp->ietf_frame, nesqp->ietf_frame_pbase); diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 92d1cadd4cfd..4e7f08ee1907 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1066,7 +1066,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, } if (req.user_qp_buffer) nesqp->nesuqp_addr = req.user_qp_buffer; - if ((ibpd->uobject) && (ibpd->uobject->context)) { + if (udata && (ibpd->uobject->context)) { nesqp->user_mode = 1; nes_ucontext = to_nesucontext(ibpd->uobject->context); if (virt_wqs) { @@ -1257,7 +1257,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, nes_put_cqp_request(nesdev, cqp_request); - if (ibpd->uobject) { + if (udata) { uresp.mmap_sq_db_index = nesqp->mmap_sq_db_index; uresp.mmap_rq_db_index = 0; uresp.actual_sq_size = sq_size; @@ -3627,6 +3627,39 @@ static void get_dev_fw_str(struct ib_device *dev, char *str) (nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff)); } +static const struct ib_device_ops nes_dev_ops = { + .alloc_mr = nes_alloc_mr, + .alloc_mw = nes_alloc_mw, + .alloc_pd = nes_alloc_pd, + .alloc_ucontext = nes_alloc_ucontext, + .create_cq = nes_create_cq, + .create_qp = nes_create_qp, + .dealloc_mw = nes_dealloc_mw, + .dealloc_pd = nes_dealloc_pd, + .dealloc_ucontext = nes_dealloc_ucontext, + .dereg_mr = nes_dereg_mr, + .destroy_cq = nes_destroy_cq, + .destroy_qp = nes_destroy_qp, + .drain_rq = nes_drain_rq, + .drain_sq = nes_drain_sq, + .get_dev_fw_str = get_dev_fw_str, + .get_dma_mr = nes_get_dma_mr, + .get_port_immutable = nes_port_immutable, + .map_mr_sg = nes_map_mr_sg, + .mmap = nes_mmap, + .modify_qp = nes_modify_qp, + .poll_cq = nes_poll_cq, + .post_recv = nes_post_recv, + .post_send = nes_post_send, + .query_device = nes_query_device, + .query_gid = nes_query_gid, + .query_pkey = nes_query_pkey, + .query_port = nes_query_port, + .query_qp = nes_query_qp, + .reg_user_mr = nes_reg_user_mr, + .req_notify_cq = nes_req_notify_cq, +}; + /** * nes_init_ofa_device */ @@ -3673,36 +3706,6 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.phys_port_cnt = 1; nesibdev->ibdev.num_comp_vectors = 1; nesibdev->ibdev.dev.parent = &nesdev->pcidev->dev; - nesibdev->ibdev.query_device = nes_query_device; - nesibdev->ibdev.query_port = nes_query_port; - nesibdev->ibdev.query_pkey = nes_query_pkey; - nesibdev->ibdev.query_gid = nes_query_gid; - nesibdev->ibdev.alloc_ucontext = nes_alloc_ucontext; - nesibdev->ibdev.dealloc_ucontext = nes_dealloc_ucontext; - nesibdev->ibdev.mmap = nes_mmap; - nesibdev->ibdev.alloc_pd = nes_alloc_pd; - nesibdev->ibdev.dealloc_pd = nes_dealloc_pd; - nesibdev->ibdev.create_qp = nes_create_qp; - nesibdev->ibdev.modify_qp = nes_modify_qp; - nesibdev->ibdev.query_qp = nes_query_qp; - nesibdev->ibdev.destroy_qp = nes_destroy_qp; - nesibdev->ibdev.create_cq = nes_create_cq; - nesibdev->ibdev.destroy_cq = nes_destroy_cq; - nesibdev->ibdev.poll_cq = nes_poll_cq; - nesibdev->ibdev.get_dma_mr = nes_get_dma_mr; - nesibdev->ibdev.reg_user_mr = nes_reg_user_mr; - nesibdev->ibdev.dereg_mr = nes_dereg_mr; - nesibdev->ibdev.alloc_mw = nes_alloc_mw; - nesibdev->ibdev.dealloc_mw = nes_dealloc_mw; - - nesibdev->ibdev.alloc_mr = nes_alloc_mr; - nesibdev->ibdev.map_mr_sg = nes_map_mr_sg; - - nesibdev->ibdev.req_notify_cq = nes_req_notify_cq; - nesibdev->ibdev.post_send = nes_post_send; - nesibdev->ibdev.post_recv = nes_post_recv; - nesibdev->ibdev.drain_sq = nes_drain_sq; - nesibdev->ibdev.drain_rq = nes_drain_rq; nesibdev->ibdev.iwcm = kzalloc(sizeof(*nesibdev->ibdev.iwcm), GFP_KERNEL); if (nesibdev->ibdev.iwcm == NULL) { @@ -3717,8 +3720,8 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.iwcm->reject = nes_reject; nesibdev->ibdev.iwcm->create_listen = nes_create_listen; nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen; - nesibdev->ibdev.get_port_immutable = nes_port_immutable; - nesibdev->ibdev.get_dev_fw_str = get_dev_fw_str; + + ib_set_device_ops(&nesibdev->ibdev, &nes_dev_ops); memcpy(nesibdev->ibdev.iwcm->ifname, netdev->name, sizeof(nesibdev->ibdev.iwcm->ifname)); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 58188fe5aed2..a7295322efbc 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -157,7 +157,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, } struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, - struct ib_udata *udata) + u32 flags, struct ib_udata *udata) { u32 *ahid_addr; int status; @@ -219,7 +219,7 @@ av_err: return ERR_PTR(status); } -int ocrdma_destroy_ah(struct ib_ah *ibah) +int ocrdma_destroy_ah(struct ib_ah *ibah, u32 flags) { struct ocrdma_ah *ah = get_ocrdma_ah(ibah); struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index c0c32c9b80ae..eb996e14b520 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -52,8 +52,8 @@ enum { }; struct ib_ah *ocrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata); -int ocrdma_destroy_ah(struct ib_ah *ah); + u32 flags, struct ib_udata *udata); +int ocrdma_destroy_ah(struct ib_ah *ah, u32 flags); int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int ocrdma_process_mad(struct ib_device *, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 873cc7f6fe61..1f393842453a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -143,6 +143,50 @@ static const struct attribute_group ocrdma_attr_group = { .attrs = ocrdma_attributes, }; +static const struct ib_device_ops ocrdma_dev_ops = { + .alloc_mr = ocrdma_alloc_mr, + .alloc_pd = ocrdma_alloc_pd, + .alloc_ucontext = ocrdma_alloc_ucontext, + .create_ah = ocrdma_create_ah, + .create_cq = ocrdma_create_cq, + .create_qp = ocrdma_create_qp, + .dealloc_pd = ocrdma_dealloc_pd, + .dealloc_ucontext = ocrdma_dealloc_ucontext, + .dereg_mr = ocrdma_dereg_mr, + .destroy_ah = ocrdma_destroy_ah, + .destroy_cq = ocrdma_destroy_cq, + .destroy_qp = ocrdma_destroy_qp, + .get_dev_fw_str = get_dev_fw_str, + .get_dma_mr = ocrdma_get_dma_mr, + .get_link_layer = ocrdma_link_layer, + .get_netdev = ocrdma_get_netdev, + .get_port_immutable = ocrdma_port_immutable, + .map_mr_sg = ocrdma_map_mr_sg, + .mmap = ocrdma_mmap, + .modify_port = ocrdma_modify_port, + .modify_qp = ocrdma_modify_qp, + .poll_cq = ocrdma_poll_cq, + .post_recv = ocrdma_post_recv, + .post_send = ocrdma_post_send, + .process_mad = ocrdma_process_mad, + .query_ah = ocrdma_query_ah, + .query_device = ocrdma_query_device, + .query_pkey = ocrdma_query_pkey, + .query_port = ocrdma_query_port, + .query_qp = ocrdma_query_qp, + .reg_user_mr = ocrdma_reg_user_mr, + .req_notify_cq = ocrdma_arm_cq, + .resize_cq = ocrdma_resize_cq, +}; + +static const struct ib_device_ops ocrdma_dev_srq_ops = { + .create_srq = ocrdma_create_srq, + .destroy_srq = ocrdma_destroy_srq, + .modify_srq = ocrdma_modify_srq, + .post_srq_recv = ocrdma_post_srq_recv, + .query_srq = ocrdma_query_srq, +}; + static int ocrdma_register_device(struct ocrdma_dev *dev) { ocrdma_get_guid(dev, (u8 *)&dev->ibdev.node_guid); @@ -182,50 +226,10 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) dev->ibdev.phys_port_cnt = 1; dev->ibdev.num_comp_vectors = dev->eq_cnt; - /* mandatory verbs. */ - dev->ibdev.query_device = ocrdma_query_device; - dev->ibdev.query_port = ocrdma_query_port; - dev->ibdev.modify_port = ocrdma_modify_port; - dev->ibdev.get_netdev = ocrdma_get_netdev; - dev->ibdev.get_link_layer = ocrdma_link_layer; - dev->ibdev.alloc_pd = ocrdma_alloc_pd; - dev->ibdev.dealloc_pd = ocrdma_dealloc_pd; - - dev->ibdev.create_cq = ocrdma_create_cq; - dev->ibdev.destroy_cq = ocrdma_destroy_cq; - dev->ibdev.resize_cq = ocrdma_resize_cq; - - dev->ibdev.create_qp = ocrdma_create_qp; - dev->ibdev.modify_qp = ocrdma_modify_qp; - dev->ibdev.query_qp = ocrdma_query_qp; - dev->ibdev.destroy_qp = ocrdma_destroy_qp; - - dev->ibdev.query_pkey = ocrdma_query_pkey; - dev->ibdev.create_ah = ocrdma_create_ah; - dev->ibdev.destroy_ah = ocrdma_destroy_ah; - dev->ibdev.query_ah = ocrdma_query_ah; - - dev->ibdev.poll_cq = ocrdma_poll_cq; - dev->ibdev.post_send = ocrdma_post_send; - dev->ibdev.post_recv = ocrdma_post_recv; - dev->ibdev.req_notify_cq = ocrdma_arm_cq; - - dev->ibdev.get_dma_mr = ocrdma_get_dma_mr; - dev->ibdev.dereg_mr = ocrdma_dereg_mr; - dev->ibdev.reg_user_mr = ocrdma_reg_user_mr; - - dev->ibdev.alloc_mr = ocrdma_alloc_mr; - dev->ibdev.map_mr_sg = ocrdma_map_mr_sg; - /* mandatory to support user space verbs consumer. */ - dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext; - dev->ibdev.dealloc_ucontext = ocrdma_dealloc_ucontext; - dev->ibdev.mmap = ocrdma_mmap; dev->ibdev.dev.parent = &dev->nic_info.pdev->dev; - dev->ibdev.process_mad = ocrdma_process_mad; - dev->ibdev.get_port_immutable = ocrdma_port_immutable; - dev->ibdev.get_dev_fw_str = get_dev_fw_str; + ib_set_device_ops(&dev->ibdev, &ocrdma_dev_ops); if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { dev->ibdev.uverbs_cmd_mask |= @@ -235,11 +239,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) OCRDMA_UVERBS(DESTROY_SRQ) | OCRDMA_UVERBS(POST_SRQ_RECV); - dev->ibdev.create_srq = ocrdma_create_srq; - dev->ibdev.modify_srq = ocrdma_modify_srq; - dev->ibdev.query_srq = ocrdma_query_srq; - dev->ibdev.destroy_srq = ocrdma_destroy_srq; - dev->ibdev.post_srq_recv = ocrdma_post_srq_recv; + ib_set_device_ops(&dev->ibdev, &ocrdma_dev_srq_ops); } rdma_set_device_sysfs_group(&dev->ibdev, &ocrdma_attr_group); dev->ibdev.driver_id = RDMA_DRIVER_OCRDMA; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c index 290d776edf48..dd15474b19b7 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -760,12 +760,13 @@ static const struct file_operations ocrdma_dbg_ops = { void ocrdma_add_port_stats(struct ocrdma_dev *dev) { + const struct pci_dev *pdev = dev->nic_info.pdev; + if (!ocrdma_dbgfs_dir) return; /* Create post stats base dir */ - dev->dir = - debugfs_create_dir(dev_name(&dev->ibdev.dev), ocrdma_dbgfs_dir); + dev->dir = debugfs_create_dir(pci_name(pdev), ocrdma_dbgfs_dir); if (!dev->dir) goto err; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 06d2a7f3304c..c46bed0c5513 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -177,11 +177,6 @@ int ocrdma_query_port(struct ib_device *ibdev, /* props being zeroed by the caller, avoid zeroing it here */ dev = get_ocrdma_dev(ibdev); - if (port > 1) { - pr_err("%s(%d) invalid_port=0x%x\n", __func__, - dev->id, port); - return -EINVAL; - } netdev = dev->nic_info.netdev; if (netif_running(netdev) && netif_oper_up(netdev)) { port_state = IB_PORT_ACTIVE; @@ -215,13 +210,6 @@ int ocrdma_query_port(struct ib_device *ibdev, int ocrdma_modify_port(struct ib_device *ibdev, u8 port, int mask, struct ib_port_modify *props) { - struct ocrdma_dev *dev; - - dev = get_ocrdma_dev(ibdev); - if (port > 1) { - pr_err("%s(%d) invalid_port=0x%x\n", __func__, dev->id, port); - return -EINVAL; - } return 0; } @@ -1169,7 +1157,8 @@ static void ocrdma_del_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp) } static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev, - struct ib_qp_init_attr *attrs) + struct ib_qp_init_attr *attrs, + struct ib_udata *udata) { if ((attrs->qp_type != IB_QPT_GSI) && (attrs->qp_type != IB_QPT_RC) && @@ -1217,7 +1206,7 @@ static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev, return -EINVAL; } /* unprivileged user space cannot create special QP */ - if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) { + if (udata && attrs->qp_type == IB_QPT_GSI) { pr_err ("%s(%d) Userspace can't create special QPs of type=0x%x\n", __func__, dev->id, attrs->qp_type); @@ -1374,7 +1363,7 @@ struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd, struct ocrdma_create_qp_ureq ureq; u16 dpp_credit_lmt, dpp_offset; - status = ocrdma_check_qp_params(ibpd, dev, attrs); + status = ocrdma_check_qp_params(ibpd, dev, attrs, udata); if (status) goto gen_err; diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 8d6ff9df49fe..75940e2a8791 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -160,12 +160,16 @@ static const struct attribute_group qedr_attr_group = { .attrs = qedr_attributes, }; +static const struct ib_device_ops qedr_iw_dev_ops = { + .get_port_immutable = qedr_iw_port_immutable, + .query_gid = qedr_iw_query_gid, +}; + static int qedr_iw_register_device(struct qedr_dev *dev) { dev->ibdev.node_type = RDMA_NODE_RNIC; - dev->ibdev.query_gid = qedr_iw_query_gid; - dev->ibdev.get_port_immutable = qedr_iw_port_immutable; + ib_set_device_ops(&dev->ibdev, &qedr_iw_dev_ops); dev->ibdev.iwcm = kzalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL); if (!dev->ibdev.iwcm) @@ -186,13 +190,56 @@ static int qedr_iw_register_device(struct qedr_dev *dev) return 0; } +static const struct ib_device_ops qedr_roce_dev_ops = { + .get_port_immutable = qedr_roce_port_immutable, +}; + static void qedr_roce_register_device(struct qedr_dev *dev) { dev->ibdev.node_type = RDMA_NODE_IB_CA; - dev->ibdev.get_port_immutable = qedr_roce_port_immutable; + ib_set_device_ops(&dev->ibdev, &qedr_roce_dev_ops); } +static const struct ib_device_ops qedr_dev_ops = { + .alloc_mr = qedr_alloc_mr, + .alloc_pd = qedr_alloc_pd, + .alloc_ucontext = qedr_alloc_ucontext, + .create_ah = qedr_create_ah, + .create_cq = qedr_create_cq, + .create_qp = qedr_create_qp, + .create_srq = qedr_create_srq, + .dealloc_pd = qedr_dealloc_pd, + .dealloc_ucontext = qedr_dealloc_ucontext, + .dereg_mr = qedr_dereg_mr, + .destroy_ah = qedr_destroy_ah, + .destroy_cq = qedr_destroy_cq, + .destroy_qp = qedr_destroy_qp, + .destroy_srq = qedr_destroy_srq, + .get_dev_fw_str = qedr_get_dev_fw_str, + .get_dma_mr = qedr_get_dma_mr, + .get_link_layer = qedr_link_layer, + .get_netdev = qedr_get_netdev, + .map_mr_sg = qedr_map_mr_sg, + .mmap = qedr_mmap, + .modify_port = qedr_modify_port, + .modify_qp = qedr_modify_qp, + .modify_srq = qedr_modify_srq, + .poll_cq = qedr_poll_cq, + .post_recv = qedr_post_recv, + .post_send = qedr_post_send, + .post_srq_recv = qedr_post_srq_recv, + .process_mad = qedr_process_mad, + .query_device = qedr_query_device, + .query_pkey = qedr_query_pkey, + .query_port = qedr_query_port, + .query_qp = qedr_query_qp, + .query_srq = qedr_query_srq, + .reg_user_mr = qedr_reg_user_mr, + .req_notify_cq = qedr_arm_cq, + .resize_cq = qedr_resize_cq, +}; + static int qedr_register_device(struct qedr_dev *dev) { int rc; @@ -237,57 +284,11 @@ static int qedr_register_device(struct qedr_dev *dev) dev->ibdev.phys_port_cnt = 1; dev->ibdev.num_comp_vectors = dev->num_cnq; - - dev->ibdev.query_device = qedr_query_device; - dev->ibdev.query_port = qedr_query_port; - dev->ibdev.modify_port = qedr_modify_port; - - dev->ibdev.alloc_ucontext = qedr_alloc_ucontext; - dev->ibdev.dealloc_ucontext = qedr_dealloc_ucontext; - dev->ibdev.mmap = qedr_mmap; - - dev->ibdev.alloc_pd = qedr_alloc_pd; - dev->ibdev.dealloc_pd = qedr_dealloc_pd; - - dev->ibdev.create_cq = qedr_create_cq; - dev->ibdev.destroy_cq = qedr_destroy_cq; - dev->ibdev.resize_cq = qedr_resize_cq; - dev->ibdev.req_notify_cq = qedr_arm_cq; - - dev->ibdev.create_qp = qedr_create_qp; - dev->ibdev.modify_qp = qedr_modify_qp; - dev->ibdev.query_qp = qedr_query_qp; - dev->ibdev.destroy_qp = qedr_destroy_qp; - - dev->ibdev.create_srq = qedr_create_srq; - dev->ibdev.destroy_srq = qedr_destroy_srq; - dev->ibdev.modify_srq = qedr_modify_srq; - dev->ibdev.query_srq = qedr_query_srq; - dev->ibdev.post_srq_recv = qedr_post_srq_recv; - dev->ibdev.query_pkey = qedr_query_pkey; - - dev->ibdev.create_ah = qedr_create_ah; - dev->ibdev.destroy_ah = qedr_destroy_ah; - - dev->ibdev.get_dma_mr = qedr_get_dma_mr; - dev->ibdev.dereg_mr = qedr_dereg_mr; - dev->ibdev.reg_user_mr = qedr_reg_user_mr; - dev->ibdev.alloc_mr = qedr_alloc_mr; - dev->ibdev.map_mr_sg = qedr_map_mr_sg; - - dev->ibdev.poll_cq = qedr_poll_cq; - dev->ibdev.post_send = qedr_post_send; - dev->ibdev.post_recv = qedr_post_recv; - - dev->ibdev.process_mad = qedr_process_mad; - - dev->ibdev.get_netdev = qedr_get_netdev; - dev->ibdev.dev.parent = &dev->pdev->dev; - dev->ibdev.get_link_layer = qedr_link_layer; - dev->ibdev.get_dev_fw_str = qedr_get_dev_fw_str; rdma_set_device_sysfs_group(&dev->ibdev, &qedr_attr_group); + ib_set_device_ops(&dev->ibdev, &qedr_dev_ops); + dev->ibdev.driver_id = RDMA_DRIVER_QEDR; return ib_register_device(&dev->ibdev, "qedr%d", NULL); } diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 82ee4b4a7084..b342a70e2814 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -216,10 +216,6 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) struct qed_rdma_port *rdma_port; dev = get_qedr_dev(ibdev); - if (port > 1) { - DP_ERR(dev, "invalid_port=0x%x\n", port); - return -EINVAL; - } if (!dev->rdma_ctx) { DP_ERR(dev, "rdma_ctx is NULL\n"); @@ -263,14 +259,6 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask, struct ib_port_modify *props) { - struct qedr_dev *dev; - - dev = get_qedr_dev(ibdev); - if (port > 1) { - DP_ERR(dev, "invalid_port=0x%x\n", port); - return -EINVAL; - } - return 0; } @@ -1148,7 +1136,8 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp, } static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev, - struct ib_qp_init_attr *attrs) + struct ib_qp_init_attr *attrs, + struct ib_udata *udata) { struct qedr_device_attr *qattr = &dev->attr; @@ -1189,7 +1178,7 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev, } /* Unprivileged user space cannot create special QP */ - if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) { + if (udata && attrs->qp_type == IB_QPT_GSI) { DP_ERR(dev, "create qp: userspace can't create special QPs of type=0x%x\n", attrs->qp_type); @@ -1552,7 +1541,7 @@ int qedr_destroy_srq(struct ib_srq *ibsrq) in_params.srq_id = srq->srq_id; dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params); - if (ibsrq->pd->uobject) + if (ibsrq->uobject) qedr_free_srq_user_params(srq); else qedr_free_srq_kernel_params(srq); @@ -2005,7 +1994,7 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n", udata ? "user library" : "kernel", pd); - rc = qedr_check_qp_attrs(ibpd, dev, attrs); + rc = qedr_check_qp_attrs(ibpd, dev, attrs, udata); if (rc) return ERR_PTR(rc); @@ -2626,7 +2615,7 @@ int qedr_destroy_qp(struct ib_qp *ibqp) } struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, - struct ib_udata *udata) + u32 flags, struct ib_udata *udata) { struct qedr_ah *ah; @@ -2639,7 +2628,7 @@ struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, return &ah->ibah; } -int qedr_destroy_ah(struct ib_ah *ibah) +int qedr_destroy_ah(struct ib_ah *ibah, u32 flags) { struct qedr_ah *ah = get_qedr_ah(ibah); diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 0b7d0124b16c..1852b7012bf4 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -76,8 +76,8 @@ int qedr_destroy_srq(struct ib_srq *ibsrq); int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_recv_wr); struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, - struct ib_udata *udata); -int qedr_destroy_ah(struct ib_ah *ibah); + u32 flags, struct ib_udata *udata); +int qedr_destroy_ah(struct ib_ah *ibah, u32 flags); int qedr_dereg_mr(struct ib_mr *); struct ib_mr *qedr_get_dma_mr(struct ib_pd *, int acc); diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index fb1ff59f40bd..cdbf707fa267 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -3237,7 +3237,6 @@ static int init_6120_variables(struct qib_devdata *dd) /* we always allocate at least 2048 bytes for eager buffers */ ret = ib_mtu_enum_to_int(qib_ibmtu); dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU; - BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); qib_6120_tidtemplate(dd); diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 163a57a88742..9fde45538f6e 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -4043,7 +4043,6 @@ static int qib_init_7220_variables(struct qib_devdata *dd) /* we always allocate at least 2048 bytes for eager buffers */ ret = ib_mtu_enum_to_int(qib_ibmtu); dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU; - BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); qib_7220_tidtemplate(dd); @@ -4252,7 +4251,6 @@ static int init_sdma_7220_regs(struct qib_pportdata *ppd) unsigned word = i / 64; unsigned bit = i & 63; - BUG_ON(word >= 3); senddmabufmask[word] |= 1ULL << bit; } qib_write_kreg(dd, kr_senddmabufmask0, senddmabufmask[0]); diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index bf5e222eed8e..17d6b24b3473 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -1382,7 +1382,6 @@ static void err_decode(char *msg, size_t len, u64 errs, *msg++ = ','; len--; } - BUG_ON(!msp->sz); /* msp->sz counts the nul */ took = min_t(size_t, msp->sz - (size_t)1, len); memcpy(msg, msp->msg, took); @@ -6599,7 +6598,6 @@ static int qib_init_7322_variables(struct qib_devdata *dd) /* we always allocate at least 2048 bytes for eager buffers */ dd->rcvegrbufsize = max(mtu, 2048); - BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); qib_7322_tidtemplate(dd); @@ -6904,7 +6902,6 @@ static int init_sdma_7322_regs(struct qib_pportdata *ppd) unsigned word = erstbuf / BITS_PER_LONG; unsigned bit = erstbuf & (BITS_PER_LONG - 1); - BUG_ON(word >= 3); senddmabufmask[word] |= 1ULL << bit; } qib_write_kreg_port(ppd, krp_senddmabufmask0, senddmabufmask[0]); diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index d7cdc77d6306..9fd69903ca57 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -209,7 +209,6 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt, rcd->rcvegrbuf_chunks = (rcd->rcvegrcnt + rcd->rcvegrbufs_perchunk - 1) / rcd->rcvegrbufs_perchunk; - BUG_ON(!is_power_of_2(rcd->rcvegrbufs_perchunk)); rcd->rcvegrbufs_perchunk_shift = ilog2(rcd->rcvegrbufs_perchunk); } diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 4845d000c22f..f92faf5ec369 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2494,5 +2494,6 @@ void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx) del_timer_sync(&dd->pport[port_idx].cong_stats.timer); if (dd->pport[port_idx].ibport_data.smi_ah) - rdma_destroy_ah(&dd->pport[port_idx].ibport_data.smi_ah->ibah); + rdma_destroy_ah(&dd->pport[port_idx].ibport_data.smi_ah->ibah, + RDMA_DESTROY_AH_SLEEPABLE); } diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index 30595b358d8f..864f2af171f7 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -387,7 +387,7 @@ void qib_pcie_reenable(struct qib_devdata *dd, u16 cmd, u8 iline, u8 cline) static int qib_pcie_coalesce; module_param_named(pcie_coalesce, qib_pcie_coalesce, int, S_IRUGO); -MODULE_PARM_DESC(pcie_coalesce, "tune PCIe colescing on some Intel chipsets"); +MODULE_PARM_DESC(pcie_coalesce, "tune PCIe coalescing on some Intel chipsets"); /* * Enable PCIe completion and data coalescing, on Intel 5x00 and 7300 diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c index 757d4c9d713d..3d64081c4819 100644 --- a/drivers/infiniband/hw/qib/qib_sdma.c +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -572,12 +572,13 @@ retry: len = sge->length; if (len > sge->sge_length) len = sge->sge_length; - BUG_ON(len == 0); dw = (len + 3) >> 2; addr = dma_map_single(&ppd->dd->pcidev->dev, sge->vaddr, dw << 2, DMA_TO_DEVICE); - if (dma_mapping_error(&ppd->dd->pcidev->dev, addr)) + if (dma_mapping_error(&ppd->dd->pcidev->dev, addr)) { + ret = -ENOMEM; goto unmap; + } sdmadesc[0] = 0; make_sdma_desc(ppd, sdmadesc, (u64) addr, dw, dwoffset); /* SDmaUseLargeBuf has to be set in every descriptor */ diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 4d4c31ea4e2d..868da0ece7ba 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -178,7 +178,6 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) len = length; if (len > sge->sge_length) len = sge->sge_length; - BUG_ON(len == 0); rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false); sge->vaddr += len; sge->length -= len; diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c index 926f3c8eba69..31c523b2a9f5 100644 --- a/drivers/infiniband/hw/qib/qib_user_sdma.c +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -237,7 +237,6 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root, sdma_rb_node); - BUG_ON(ret == 0); } pq->sdma_rb_node = sdma_rb_node; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 4b0f5761a646..276304f611ab 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -150,7 +150,6 @@ static u32 qib_count_sge(struct rvt_sge_state *ss, u32 length) len = length; if (len > sge.sge_length) len = sge.sge_length; - BUG_ON(len == 0); if (((long) sge.vaddr & (sizeof(u32) - 1)) || (len != length && (len & (sizeof(u32) - 1)))) { ndesc = 0; @@ -193,7 +192,6 @@ static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length) len = length; if (len > sge->sge_length) len = sge->sge_length; - BUG_ON(len == 0); memcpy(data, sge->vaddr, len); sge->vaddr += len; sge->length -= len; @@ -449,7 +447,6 @@ static void copy_io(u32 __iomem *piobuf, struct rvt_sge_state *ss, len = length; if (len > ss->sge.sge_length) len = ss->sge.sge_length; - BUG_ON(len == 0); /* If the source address is not aligned, try to align it. */ off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1); if (off) { @@ -1365,7 +1362,7 @@ struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid) rcu_read_lock(); qp0 = rcu_dereference(ibp->rvp.qp[0]); if (qp0) - ah = rdma_create_ah(qp0->ibqp.pd, &attr); + ah = rdma_create_ah(qp0->ibqp.pd, &attr, 0); rcu_read_unlock(); return ah; } @@ -1496,6 +1493,11 @@ static void qib_fill_device_attr(struct qib_devdata *dd) dd->verbs_dev.rdi.wc_opcode = ib_qib_wc_opcode; } +static const struct ib_device_ops qib_dev_ops = { + .modify_device = qib_modify_device, + .process_mad = qib_process_mad, +}; + /** * qib_register_ib_device - register our device with the infiniband core * @dd: the device data structure @@ -1558,8 +1560,6 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->node_guid = ppd->guid; ibdev->phys_port_cnt = dd->num_pports; ibdev->dev.parent = &dd->pcidev->dev; - ibdev->modify_device = qib_modify_device; - ibdev->process_mad = qib_process_mad; snprintf(ibdev->node_desc, sizeof(ibdev->node_desc), "Intel Infiniband HCA %s", init_utsname()->nodename); @@ -1627,6 +1627,7 @@ int qib_register_ib_device(struct qib_devdata *dd) } rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev, &qib_attr_group); + ib_set_device_ops(ibdev, &qib_dev_ops); ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_QIB); if (ret) goto err_tx; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 73bd00f8d2c8..b2323a52a0dd 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -330,6 +330,37 @@ static void usnic_get_dev_fw_str(struct ib_device *device, char *str) snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", info.fw_version); } +static const struct ib_device_ops usnic_dev_ops = { + .alloc_pd = usnic_ib_alloc_pd, + .alloc_ucontext = usnic_ib_alloc_ucontext, + .create_ah = usnic_ib_create_ah, + .create_cq = usnic_ib_create_cq, + .create_qp = usnic_ib_create_qp, + .dealloc_pd = usnic_ib_dealloc_pd, + .dealloc_ucontext = usnic_ib_dealloc_ucontext, + .dereg_mr = usnic_ib_dereg_mr, + .destroy_ah = usnic_ib_destroy_ah, + .destroy_cq = usnic_ib_destroy_cq, + .destroy_qp = usnic_ib_destroy_qp, + .get_dev_fw_str = usnic_get_dev_fw_str, + .get_dma_mr = usnic_ib_get_dma_mr, + .get_link_layer = usnic_ib_port_link_layer, + .get_netdev = usnic_get_netdev, + .get_port_immutable = usnic_port_immutable, + .mmap = usnic_ib_mmap, + .modify_qp = usnic_ib_modify_qp, + .poll_cq = usnic_ib_poll_cq, + .post_recv = usnic_ib_post_recv, + .post_send = usnic_ib_post_send, + .query_device = usnic_ib_query_device, + .query_gid = usnic_ib_query_gid, + .query_pkey = usnic_ib_query_pkey, + .query_port = usnic_ib_query_port, + .query_qp = usnic_ib_query_qp, + .reg_user_mr = usnic_ib_reg_mr, + .req_notify_cq = usnic_ib_req_notify_cq, +}; + /* Start of PF discovery section */ static void *usnic_ib_device_add(struct pci_dev *dev) { @@ -386,35 +417,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | (1ull << IB_USER_VERBS_CMD_OPEN_QP); - us_ibdev->ib_dev.query_device = usnic_ib_query_device; - us_ibdev->ib_dev.query_port = usnic_ib_query_port; - us_ibdev->ib_dev.query_pkey = usnic_ib_query_pkey; - us_ibdev->ib_dev.query_gid = usnic_ib_query_gid; - us_ibdev->ib_dev.get_netdev = usnic_get_netdev; - us_ibdev->ib_dev.get_link_layer = usnic_ib_port_link_layer; - us_ibdev->ib_dev.alloc_pd = usnic_ib_alloc_pd; - us_ibdev->ib_dev.dealloc_pd = usnic_ib_dealloc_pd; - us_ibdev->ib_dev.create_qp = usnic_ib_create_qp; - us_ibdev->ib_dev.modify_qp = usnic_ib_modify_qp; - us_ibdev->ib_dev.query_qp = usnic_ib_query_qp; - us_ibdev->ib_dev.destroy_qp = usnic_ib_destroy_qp; - us_ibdev->ib_dev.create_cq = usnic_ib_create_cq; - us_ibdev->ib_dev.destroy_cq = usnic_ib_destroy_cq; - us_ibdev->ib_dev.reg_user_mr = usnic_ib_reg_mr; - us_ibdev->ib_dev.dereg_mr = usnic_ib_dereg_mr; - us_ibdev->ib_dev.alloc_ucontext = usnic_ib_alloc_ucontext; - us_ibdev->ib_dev.dealloc_ucontext = usnic_ib_dealloc_ucontext; - us_ibdev->ib_dev.mmap = usnic_ib_mmap; - us_ibdev->ib_dev.create_ah = usnic_ib_create_ah; - us_ibdev->ib_dev.destroy_ah = usnic_ib_destroy_ah; - us_ibdev->ib_dev.post_send = usnic_ib_post_send; - us_ibdev->ib_dev.post_recv = usnic_ib_post_recv; - us_ibdev->ib_dev.poll_cq = usnic_ib_poll_cq; - us_ibdev->ib_dev.req_notify_cq = usnic_ib_req_notify_cq; - us_ibdev->ib_dev.get_dma_mr = usnic_ib_get_dma_mr; - us_ibdev->ib_dev.get_port_immutable = usnic_port_immutable; - us_ibdev->ib_dev.get_dev_fw_str = usnic_get_dev_fw_str; - + ib_set_device_ops(&us_ibdev->ib_dev, &usnic_dev_ops); us_ibdev->ib_dev.driver_id = RDMA_DRIVER_USNIC; rdma_set_device_sysfs_group(&us_ibdev->ib_dev, &usnic_attr_group); @@ -649,7 +652,7 @@ static int __init usnic_ib_init(void) err = usnic_uiom_init(DRV_NAME); if (err) { - usnic_err("Unable to initalize umem with err %d\n", err); + usnic_err("Unable to initialize umem with err %d\n", err); return err; } diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c index bf5136533d49..0cdb156e165e 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c @@ -681,7 +681,7 @@ usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf, err = usnic_vnic_res_spec_satisfied(&min_transport_spec[transport], res_spec); if (err) { - usnic_err("Spec does not meet miniumum req for transport %d\n", + usnic_err("Spec does not meet minimum req for transport %d\n", transport); log_spec(res_spec); return ERR_PTR(err); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 0b91ff36768a..1d4abef17e38 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -336,13 +336,16 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port, usnic_dbg("\n"); - mutex_lock(&us_ibdev->usdev_lock); if (ib_get_eth_speed(ibdev, port, &props->active_speed, - &props->active_width)) { - mutex_unlock(&us_ibdev->usdev_lock); + &props->active_width)) return -EINVAL; - } + /* + * usdev_lock is acquired after (and not before) ib_get_eth_speed call + * because acquiring rtnl_lock in ib_get_eth_speed, while holding + * usdev_lock could lead to a deadlock. + */ + mutex_lock(&us_ibdev->usdev_lock); /* props being zeroed by the caller, avoid zeroing it here */ props->lid = 0; @@ -760,6 +763,7 @@ int usnic_ib_mmap(struct ib_ucontext *context, /* In ib callbacks section - Start of stub funcs */ struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { @@ -767,7 +771,7 @@ struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, return ERR_PTR(-EPERM); } -int usnic_ib_destroy_ah(struct ib_ah *ah) +int usnic_ib_destroy_ah(struct ib_ah *ah, u32 flags) { usnic_dbg("\n"); return -EINVAL; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index 2a2c9beb715f..e33144261b9a 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -77,9 +77,10 @@ int usnic_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata); -int usnic_ib_destroy_ah(struct ib_ah *ah); +int usnic_ib_destroy_ah(struct ib_ah *ah, u32 flags); int usnic_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr); int usnic_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 398443f43dc3..eaa109dbc96a 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -161,6 +161,49 @@ static struct net_device *pvrdma_get_netdev(struct ib_device *ibdev, return netdev; } +static const struct ib_device_ops pvrdma_dev_ops = { + .add_gid = pvrdma_add_gid, + .alloc_mr = pvrdma_alloc_mr, + .alloc_pd = pvrdma_alloc_pd, + .alloc_ucontext = pvrdma_alloc_ucontext, + .create_ah = pvrdma_create_ah, + .create_cq = pvrdma_create_cq, + .create_qp = pvrdma_create_qp, + .dealloc_pd = pvrdma_dealloc_pd, + .dealloc_ucontext = pvrdma_dealloc_ucontext, + .del_gid = pvrdma_del_gid, + .dereg_mr = pvrdma_dereg_mr, + .destroy_ah = pvrdma_destroy_ah, + .destroy_cq = pvrdma_destroy_cq, + .destroy_qp = pvrdma_destroy_qp, + .get_dev_fw_str = pvrdma_get_fw_ver_str, + .get_dma_mr = pvrdma_get_dma_mr, + .get_link_layer = pvrdma_port_link_layer, + .get_netdev = pvrdma_get_netdev, + .get_port_immutable = pvrdma_port_immutable, + .map_mr_sg = pvrdma_map_mr_sg, + .mmap = pvrdma_mmap, + .modify_port = pvrdma_modify_port, + .modify_qp = pvrdma_modify_qp, + .poll_cq = pvrdma_poll_cq, + .post_recv = pvrdma_post_recv, + .post_send = pvrdma_post_send, + .query_device = pvrdma_query_device, + .query_gid = pvrdma_query_gid, + .query_pkey = pvrdma_query_pkey, + .query_port = pvrdma_query_port, + .query_qp = pvrdma_query_qp, + .reg_user_mr = pvrdma_reg_user_mr, + .req_notify_cq = pvrdma_req_notify_cq, +}; + +static const struct ib_device_ops pvrdma_dev_srq_ops = { + .create_srq = pvrdma_create_srq, + .destroy_srq = pvrdma_destroy_srq, + .modify_srq = pvrdma_modify_srq, + .query_srq = pvrdma_query_srq, +}; + static int pvrdma_register_device(struct pvrdma_dev *dev) { int ret = -1; @@ -197,39 +240,7 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt; - dev->ib_dev.query_device = pvrdma_query_device; - dev->ib_dev.query_port = pvrdma_query_port; - dev->ib_dev.query_gid = pvrdma_query_gid; - dev->ib_dev.query_pkey = pvrdma_query_pkey; - dev->ib_dev.modify_port = pvrdma_modify_port; - dev->ib_dev.alloc_ucontext = pvrdma_alloc_ucontext; - dev->ib_dev.dealloc_ucontext = pvrdma_dealloc_ucontext; - dev->ib_dev.mmap = pvrdma_mmap; - dev->ib_dev.alloc_pd = pvrdma_alloc_pd; - dev->ib_dev.dealloc_pd = pvrdma_dealloc_pd; - dev->ib_dev.create_ah = pvrdma_create_ah; - dev->ib_dev.destroy_ah = pvrdma_destroy_ah; - dev->ib_dev.create_qp = pvrdma_create_qp; - dev->ib_dev.modify_qp = pvrdma_modify_qp; - dev->ib_dev.query_qp = pvrdma_query_qp; - dev->ib_dev.destroy_qp = pvrdma_destroy_qp; - dev->ib_dev.post_send = pvrdma_post_send; - dev->ib_dev.post_recv = pvrdma_post_recv; - dev->ib_dev.create_cq = pvrdma_create_cq; - dev->ib_dev.destroy_cq = pvrdma_destroy_cq; - dev->ib_dev.poll_cq = pvrdma_poll_cq; - dev->ib_dev.req_notify_cq = pvrdma_req_notify_cq; - dev->ib_dev.get_dma_mr = pvrdma_get_dma_mr; - dev->ib_dev.reg_user_mr = pvrdma_reg_user_mr; - dev->ib_dev.dereg_mr = pvrdma_dereg_mr; - dev->ib_dev.alloc_mr = pvrdma_alloc_mr; - dev->ib_dev.map_mr_sg = pvrdma_map_mr_sg; - dev->ib_dev.add_gid = pvrdma_add_gid; - dev->ib_dev.del_gid = pvrdma_del_gid; - dev->ib_dev.get_netdev = pvrdma_get_netdev; - dev->ib_dev.get_port_immutable = pvrdma_port_immutable; - dev->ib_dev.get_link_layer = pvrdma_port_link_layer; - dev->ib_dev.get_dev_fw_str = pvrdma_get_fw_ver_str; + ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_ops); mutex_init(&dev->port_mutex); spin_lock_init(&dev->desc_lock); @@ -255,10 +266,7 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); - dev->ib_dev.create_srq = pvrdma_create_srq; - dev->ib_dev.modify_srq = pvrdma_modify_srq; - dev->ib_dev.query_srq = pvrdma_query_srq; - dev->ib_dev.destroy_srq = pvrdma_destroy_srq; + ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_srq_ops); dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq, sizeof(struct pvrdma_srq *), diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index cf22f57a9f0d..3acf74cbe266 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -249,7 +249,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, init_completion(&qp->free); qp->state = IB_QPS_RESET; - qp->is_kernel = !(pd->uobject && udata); + qp->is_kernel = !udata; if (!qp->is_kernel) { dev_dbg(&dev->pdev->dev, diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index dc0ce877c7a3..06ba7c7a2235 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -111,7 +111,7 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, unsigned long flags; int ret; - if (!(pd->uobject && udata)) { + if (!udata) { /* No support for kernel clients. */ dev_warn(&dev->pdev->dev, "no shared receive queue support for kernel client\n"); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index b65d10b0a875..4d238d0e484b 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -533,11 +533,12 @@ int pvrdma_dealloc_pd(struct ib_pd *pd) * @pd: the protection domain * @ah_attr: the attributes of the AH * @udata: user data blob + * @flags: create address handle flags (see enum rdma_create_ah_flags) * * @return: the ib_ah pointer on success, otherwise errno. */ struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata) + u32 flags, struct ib_udata *udata) { struct pvrdma_dev *dev = to_vdev(pd->device); struct pvrdma_ah *ah; @@ -555,7 +556,7 @@ struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, if (!atomic_add_unless(&dev->num_ahs, 1, dev->dsr->caps.max_ah)) return ERR_PTR(-ENOMEM); - ah = kzalloc(sizeof(*ah), GFP_KERNEL); + ah = kzalloc(sizeof(*ah), GFP_ATOMIC); if (!ah) { atomic_dec(&dev->num_ahs); return ERR_PTR(-ENOMEM); @@ -581,10 +582,11 @@ struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, /** * pvrdma_destroy_ah - destroy an address handle * @ah: the address handle to destroyed + * @flags: destroy address handle flags (see enum rdma_destroy_ah_flags) * * @return: 0 on success. */ -int pvrdma_destroy_ah(struct ib_ah *ah) +int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags) { struct pvrdma_dev *dev = to_vdev(ah->device); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index b2e3ab50cb08..f7f758d60110 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -420,8 +420,8 @@ int pvrdma_destroy_cq(struct ib_cq *cq); int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata); -int pvrdma_destroy_ah(struct ib_ah *ah); + u32 flags, struct ib_udata *udata); +int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags); struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *init_attr, diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index 084bb4baebb5..fc10e4e26ca7 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -91,6 +91,7 @@ EXPORT_SYMBOL(rvt_check_ah); * rvt_create_ah - create an address handle * @pd: the protection domain * @ah_attr: the attributes of the AH + * @create_flags: create address handle flags (see enum rdma_create_ah_flags) * @udata: pointer to user's input output buffer information. * * This may be called from interrupt context. @@ -99,6 +100,7 @@ EXPORT_SYMBOL(rvt_check_ah); */ struct ib_ah *rvt_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 create_flags, struct ib_udata *udata) { struct rvt_ah *ah; @@ -135,10 +137,11 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd, /** * rvt_destory_ah - Destory an address handle * @ibah: address handle + * @destroy_flags: destroy address handle flags (see enum rdma_destroy_ah_flags) * * Return: 0 on success */ -int rvt_destroy_ah(struct ib_ah *ibah) +int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags) { struct rvt_dev_info *dev = ib_to_rvt(ibah->device); struct rvt_ah *ah = ibah_to_rvtah(ibah); diff --git a/drivers/infiniband/sw/rdmavt/ah.h b/drivers/infiniband/sw/rdmavt/ah.h index 25271b48a683..72431a618d5d 100644 --- a/drivers/infiniband/sw/rdmavt/ah.h +++ b/drivers/infiniband/sw/rdmavt/ah.h @@ -52,8 +52,9 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 create_flags, struct ib_udata *udata); -int rvt_destroy_ah(struct ib_ah *ibah); +int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags); int rvt_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); int rvt_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c index d6981dc04adb..108c71e3ac23 100644 --- a/drivers/infiniband/sw/rdmavt/mad.c +++ b/drivers/infiniband/sw/rdmavt/mad.c @@ -160,7 +160,8 @@ void rvt_free_mad_agents(struct rvt_dev_info *rdi) ib_unregister_mad_agent(agent); } if (rvp->sm_ah) { - rdma_destroy_ah(&rvp->sm_ah->ibah); + rdma_destroy_ah(&rvp->sm_ah->ibah, + RDMA_DESTROY_AH_SLEEPABLE); rvp->sm_ah = NULL; } diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 1735deb1a9d4..a1bd8cfc2c25 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2016, 2017 Intel Corporation. + * Copyright(c) 2016 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -1094,6 +1094,13 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, qp->ibqp.qp_num = err; qp->port_num = init_attr->port_num; rvt_init_qp(rdi, qp, init_attr->qp_type); + if (rdi->driver_f.qp_priv_init) { + err = rdi->driver_f.qp_priv_init(rdi, qp, init_attr); + if (err) { + ret = ERR_PTR(err); + goto bail_rq_wq; + } + } break; default: diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 723d3daf2eba..aef3aa3fe667 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -392,16 +392,51 @@ enum { _VERB_IDX_MAX /* Must always be last! */ }; -static inline int check_driver_override(struct rvt_dev_info *rdi, - size_t offset, void *func) -{ - if (!*(void **)((void *)&rdi->ibdev + offset)) { - *(void **)((void *)&rdi->ibdev + offset) = func; - return 0; - } - - return 1; -} +static const struct ib_device_ops rvt_dev_ops = { + .alloc_fmr = rvt_alloc_fmr, + .alloc_mr = rvt_alloc_mr, + .alloc_pd = rvt_alloc_pd, + .alloc_ucontext = rvt_alloc_ucontext, + .attach_mcast = rvt_attach_mcast, + .create_ah = rvt_create_ah, + .create_cq = rvt_create_cq, + .create_qp = rvt_create_qp, + .create_srq = rvt_create_srq, + .dealloc_fmr = rvt_dealloc_fmr, + .dealloc_pd = rvt_dealloc_pd, + .dealloc_ucontext = rvt_dealloc_ucontext, + .dereg_mr = rvt_dereg_mr, + .destroy_ah = rvt_destroy_ah, + .destroy_cq = rvt_destroy_cq, + .destroy_qp = rvt_destroy_qp, + .destroy_srq = rvt_destroy_srq, + .detach_mcast = rvt_detach_mcast, + .get_dma_mr = rvt_get_dma_mr, + .get_port_immutable = rvt_get_port_immutable, + .map_mr_sg = rvt_map_mr_sg, + .map_phys_fmr = rvt_map_phys_fmr, + .mmap = rvt_mmap, + .modify_ah = rvt_modify_ah, + .modify_device = rvt_modify_device, + .modify_port = rvt_modify_port, + .modify_qp = rvt_modify_qp, + .modify_srq = rvt_modify_srq, + .poll_cq = rvt_poll_cq, + .post_recv = rvt_post_recv, + .post_send = rvt_post_send, + .post_srq_recv = rvt_post_srq_recv, + .query_ah = rvt_query_ah, + .query_device = rvt_query_device, + .query_gid = rvt_query_gid, + .query_pkey = rvt_query_pkey, + .query_port = rvt_query_port, + .query_qp = rvt_query_qp, + .query_srq = rvt_query_srq, + .reg_user_mr = rvt_reg_user_mr, + .req_notify_cq = rvt_req_notify_cq, + .resize_cq = rvt_resize_cq, + .unmap_fmr = rvt_unmap_fmr, +}; static noinline int check_support(struct rvt_dev_info *rdi, int verb) { @@ -416,76 +451,36 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb) return -EINVAL; break; - case QUERY_DEVICE: - check_driver_override(rdi, offsetof(struct ib_device, - query_device), - rvt_query_device); - break; - case MODIFY_DEVICE: /* * rdmavt does not support modify device currently drivers must * provide. */ - if (!check_driver_override(rdi, offsetof(struct ib_device, - modify_device), - rvt_modify_device)) + if (!rdi->ibdev.ops.modify_device) return -EOPNOTSUPP; break; case QUERY_PORT: - if (!check_driver_override(rdi, offsetof(struct ib_device, - query_port), - rvt_query_port)) + if (!rdi->ibdev.ops.query_port) if (!rdi->driver_f.query_port_state) return -EINVAL; break; case MODIFY_PORT: - if (!check_driver_override(rdi, offsetof(struct ib_device, - modify_port), - rvt_modify_port)) + if (!rdi->ibdev.ops.modify_port) if (!rdi->driver_f.cap_mask_chg || !rdi->driver_f.shut_down_port) return -EINVAL; break; - case QUERY_PKEY: - check_driver_override(rdi, offsetof(struct ib_device, - query_pkey), - rvt_query_pkey); - break; - case QUERY_GID: - if (!check_driver_override(rdi, offsetof(struct ib_device, - query_gid), - rvt_query_gid)) + if (!rdi->ibdev.ops.query_gid) if (!rdi->driver_f.get_guid_be) return -EINVAL; break; - case ALLOC_UCONTEXT: - check_driver_override(rdi, offsetof(struct ib_device, - alloc_ucontext), - rvt_alloc_ucontext); - break; - - case DEALLOC_UCONTEXT: - check_driver_override(rdi, offsetof(struct ib_device, - dealloc_ucontext), - rvt_dealloc_ucontext); - break; - - case GET_PORT_IMMUTABLE: - check_driver_override(rdi, offsetof(struct ib_device, - get_port_immutable), - rvt_get_port_immutable); - break; - case CREATE_QP: - if (!check_driver_override(rdi, offsetof(struct ib_device, - create_qp), - rvt_create_qp)) + if (!rdi->ibdev.ops.create_qp) if (!rdi->driver_f.qp_priv_alloc || !rdi->driver_f.qp_priv_free || !rdi->driver_f.notify_qp_reset || @@ -496,9 +491,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb) break; case MODIFY_QP: - if (!check_driver_override(rdi, offsetof(struct ib_device, - modify_qp), - rvt_modify_qp)) + if (!rdi->ibdev.ops.modify_qp) if (!rdi->driver_f.notify_qp_reset || !rdi->driver_f.schedule_send || !rdi->driver_f.get_pmtu_from_attr || @@ -512,9 +505,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb) break; case DESTROY_QP: - if (!check_driver_override(rdi, offsetof(struct ib_device, - destroy_qp), - rvt_destroy_qp)) + if (!rdi->ibdev.ops.destroy_qp) if (!rdi->driver_f.qp_priv_free || !rdi->driver_f.notify_qp_reset || !rdi->driver_f.flush_qp_waiters || @@ -523,197 +514,14 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb) return -EINVAL; break; - case QUERY_QP: - check_driver_override(rdi, offsetof(struct ib_device, - query_qp), - rvt_query_qp); - break; - case POST_SEND: - if (!check_driver_override(rdi, offsetof(struct ib_device, - post_send), - rvt_post_send)) + if (!rdi->ibdev.ops.post_send) if (!rdi->driver_f.schedule_send || !rdi->driver_f.do_send || !rdi->post_parms) return -EINVAL; break; - case POST_RECV: - check_driver_override(rdi, offsetof(struct ib_device, - post_recv), - rvt_post_recv); - break; - case POST_SRQ_RECV: - check_driver_override(rdi, offsetof(struct ib_device, - post_srq_recv), - rvt_post_srq_recv); - break; - - case CREATE_AH: - check_driver_override(rdi, offsetof(struct ib_device, - create_ah), - rvt_create_ah); - break; - - case DESTROY_AH: - check_driver_override(rdi, offsetof(struct ib_device, - destroy_ah), - rvt_destroy_ah); - break; - - case MODIFY_AH: - check_driver_override(rdi, offsetof(struct ib_device, - modify_ah), - rvt_modify_ah); - break; - - case QUERY_AH: - check_driver_override(rdi, offsetof(struct ib_device, - query_ah), - rvt_query_ah); - break; - - case CREATE_SRQ: - check_driver_override(rdi, offsetof(struct ib_device, - create_srq), - rvt_create_srq); - break; - - case MODIFY_SRQ: - check_driver_override(rdi, offsetof(struct ib_device, - modify_srq), - rvt_modify_srq); - break; - - case DESTROY_SRQ: - check_driver_override(rdi, offsetof(struct ib_device, - destroy_srq), - rvt_destroy_srq); - break; - - case QUERY_SRQ: - check_driver_override(rdi, offsetof(struct ib_device, - query_srq), - rvt_query_srq); - break; - - case ATTACH_MCAST: - check_driver_override(rdi, offsetof(struct ib_device, - attach_mcast), - rvt_attach_mcast); - break; - - case DETACH_MCAST: - check_driver_override(rdi, offsetof(struct ib_device, - detach_mcast), - rvt_detach_mcast); - break; - - case GET_DMA_MR: - check_driver_override(rdi, offsetof(struct ib_device, - get_dma_mr), - rvt_get_dma_mr); - break; - - case REG_USER_MR: - check_driver_override(rdi, offsetof(struct ib_device, - reg_user_mr), - rvt_reg_user_mr); - break; - - case DEREG_MR: - check_driver_override(rdi, offsetof(struct ib_device, - dereg_mr), - rvt_dereg_mr); - break; - - case ALLOC_FMR: - check_driver_override(rdi, offsetof(struct ib_device, - alloc_fmr), - rvt_alloc_fmr); - break; - - case ALLOC_MR: - check_driver_override(rdi, offsetof(struct ib_device, - alloc_mr), - rvt_alloc_mr); - break; - - case MAP_MR_SG: - check_driver_override(rdi, offsetof(struct ib_device, - map_mr_sg), - rvt_map_mr_sg); - break; - - case MAP_PHYS_FMR: - check_driver_override(rdi, offsetof(struct ib_device, - map_phys_fmr), - rvt_map_phys_fmr); - break; - - case UNMAP_FMR: - check_driver_override(rdi, offsetof(struct ib_device, - unmap_fmr), - rvt_unmap_fmr); - break; - - case DEALLOC_FMR: - check_driver_override(rdi, offsetof(struct ib_device, - dealloc_fmr), - rvt_dealloc_fmr); - break; - - case MMAP: - check_driver_override(rdi, offsetof(struct ib_device, - mmap), - rvt_mmap); - break; - - case CREATE_CQ: - check_driver_override(rdi, offsetof(struct ib_device, - create_cq), - rvt_create_cq); - break; - - case DESTROY_CQ: - check_driver_override(rdi, offsetof(struct ib_device, - destroy_cq), - rvt_destroy_cq); - break; - - case POLL_CQ: - check_driver_override(rdi, offsetof(struct ib_device, - poll_cq), - rvt_poll_cq); - break; - - case REQ_NOTFIY_CQ: - check_driver_override(rdi, offsetof(struct ib_device, - req_notify_cq), - rvt_req_notify_cq); - break; - - case RESIZE_CQ: - check_driver_override(rdi, offsetof(struct ib_device, - resize_cq), - rvt_resize_cq); - break; - - case ALLOC_PD: - check_driver_override(rdi, offsetof(struct ib_device, - alloc_pd), - rvt_alloc_pd); - break; - - case DEALLOC_PD: - check_driver_override(rdi, offsetof(struct ib_device, - dealloc_pd), - rvt_dealloc_pd); - break; - - default: - return -EINVAL; } return 0; @@ -745,6 +553,7 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id) return -EINVAL; } + ib_set_device_ops(&rdi->ibdev, &rvt_dev_ops); /* Once we get past here we can use rvt_pr macros and tracepoints */ trace_rvt_dbg(rdi, "Driver attempting registration"); diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h index d9ec2de68738..5bde2ad964d2 100644 --- a/drivers/infiniband/sw/rxe/rxe.h +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -65,8 +65,9 @@ */ #define RXE_UVERBS_ABI_VERSION 2 -#define IB_PHYS_STATE_LINK_UP (5) -#define IB_PHYS_STATE_LINK_DOWN (3) +#define RDMA_LINK_PHYS_STATE_LINK_UP (5) +#define RDMA_LINK_PHYS_STATE_DISABLED (3) +#define RDMA_LINK_PHYS_STATE_POLLING (2) #define RXE_ROCE_V2_SPORT (0xc000) @@ -109,5 +110,6 @@ struct rxe_dev *get_rxe_by_name(const char *name); void rxe_port_up(struct rxe_dev *rxe); void rxe_port_down(struct rxe_dev *rxe); +void rxe_set_port_state(struct rxe_dev *rxe); #endif /* RXE_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index ea089cb091ad..e996da67a851 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -439,6 +439,7 @@ static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe, */ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) { + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_cqe cqe; if ((qp->sq_sig_type == IB_SIGNAL_ALL_WR) || @@ -451,6 +452,11 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) advance_consumer(qp->sq.queue); } + if (wqe->wr.opcode == IB_WR_SEND || + wqe->wr.opcode == IB_WR_SEND_WITH_IMM || + wqe->wr.opcode == IB_WR_SEND_WITH_INV) + rxe_counter_inc(rxe, RXE_CNT_RDMA_SEND); + /* * we completed something so let req run again * if it is trying to fence diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/infiniband/sw/rxe/rxe_hw_counters.c index 6aeb7a165e46..636edb5f4cf4 100644 --- a/drivers/infiniband/sw/rxe/rxe_hw_counters.c +++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c @@ -37,15 +37,18 @@ static const char * const rxe_counter_name[] = { [RXE_CNT_SENT_PKTS] = "sent_pkts", [RXE_CNT_RCVD_PKTS] = "rcvd_pkts", [RXE_CNT_DUP_REQ] = "duplicate_request", - [RXE_CNT_OUT_OF_SEQ_REQ] = "out_of_sequence", + [RXE_CNT_OUT_OF_SEQ_REQ] = "out_of_seq_request", [RXE_CNT_RCV_RNR] = "rcvd_rnr_err", [RXE_CNT_SND_RNR] = "send_rnr_err", [RXE_CNT_RCV_SEQ_ERR] = "rcvd_seq_err", - [RXE_CNT_COMPLETER_SCHED] = "ack_deffered", + [RXE_CNT_COMPLETER_SCHED] = "ack_deferred", [RXE_CNT_RETRY_EXCEEDED] = "retry_exceeded_err", [RXE_CNT_RNR_RETRY_EXCEEDED] = "retry_rnr_exceeded_err", [RXE_CNT_COMP_RETRY] = "completer_retry_err", [RXE_CNT_SEND_ERR] = "send_err", + [RXE_CNT_LINK_DOWNED] = "link_downed", + [RXE_CNT_RDMA_SEND] = "rdma_sends", + [RXE_CNT_RDMA_RECV] = "rdma_recvs", }; int rxe_ib_get_hw_stats(struct ib_device *ibdev, @@ -59,7 +62,7 @@ int rxe_ib_get_hw_stats(struct ib_device *ibdev, return -EINVAL; for (cnt = 0; cnt < ARRAY_SIZE(rxe_counter_name); cnt++) - stats->value[cnt] = dev->stats_counters[cnt]; + stats->value[cnt] = atomic64_read(&dev->stats_counters[cnt]); return ARRAY_SIZE(rxe_counter_name); } diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.h b/drivers/infiniband/sw/rxe/rxe_hw_counters.h index f44df1b76742..72c0d63c79e0 100644 --- a/drivers/infiniband/sw/rxe/rxe_hw_counters.h +++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.h @@ -50,6 +50,9 @@ enum rxe_counters { RXE_CNT_RNR_RETRY_EXCEEDED, RXE_CNT_COMP_RETRY, RXE_CNT_SEND_ERR, + RXE_CNT_LINK_DOWNED, + RXE_CNT_RDMA_SEND, + RXE_CNT_RDMA_RECV, RXE_NUM_OF_COUNTERS }; diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index afd53f57a62b..01b74597b36a 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -157,7 +157,7 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init); int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, struct ib_qp_init_attr *init, struct rxe_create_qp_resp __user *uresp, - struct ib_pd *ibpd); + struct ib_pd *ibpd, struct ib_udata *udata); int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init); @@ -250,11 +250,12 @@ static inline unsigned int wr_opcode_mask(int opcode, struct rxe_qp *qp) return rxe_wr_opcode_info[opcode].mask[qp->ibqp.qp_type]; } -static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp, - struct rxe_pkt_info *pkt, struct sk_buff *skb) +static inline int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt, + struct sk_buff *skb) { int err; int is_request = pkt->mask & RXE_REQ_MASK; + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); if ((is_request && (qp->req.state != QP_STATE_READY)) || (!is_request && (qp->resp.state != QP_STATE_READY))) { diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 40e82e0f6c2d..8fd03ae20efc 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -607,7 +607,6 @@ void rxe_port_up(struct rxe_dev *rxe) port = &rxe->port; port->attr.state = IB_PORT_ACTIVE; - port->attr.phys_state = IB_PHYS_STATE_LINK_UP; rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE); dev_info(&rxe->ib_dev.dev, "set active\n"); @@ -620,12 +619,20 @@ void rxe_port_down(struct rxe_dev *rxe) port = &rxe->port; port->attr.state = IB_PORT_DOWN; - port->attr.phys_state = IB_PHYS_STATE_LINK_DOWN; rxe_port_event(rxe, IB_EVENT_PORT_ERR); + rxe_counter_inc(rxe, RXE_CNT_LINK_DOWNED); dev_info(&rxe->ib_dev.dev, "set down\n"); } +void rxe_set_port_state(struct rxe_dev *rxe) +{ + if (netif_running(rxe->ndev) && netif_carrier_ok(rxe->ndev)) + rxe_port_up(rxe); + else + rxe_port_down(rxe); +} + static int rxe_notify(struct notifier_block *not_blk, unsigned long event, void *arg) @@ -652,10 +659,7 @@ static int rxe_notify(struct notifier_block *not_blk, rxe_set_mtu(rxe, ndev->mtu); break; case NETDEV_CHANGE: - if (netif_running(ndev) && netif_carrier_ok(ndev)) - rxe_port_up(rxe); - else - rxe_port_down(rxe); + rxe_set_port_state(rxe); break; case NETDEV_REBOOT: case NETDEV_GOING_DOWN: diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 36b53fb94a49..b5c91df22047 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -112,6 +112,18 @@ static inline struct kmem_cache *pool_cache(struct rxe_pool *pool) return rxe_type_info[pool->type].cache; } +static void rxe_cache_clean(size_t cnt) +{ + int i; + struct rxe_type_info *type; + + for (i = 0; i < cnt; i++) { + type = &rxe_type_info[i]; + kmem_cache_destroy(type->cache); + type->cache = NULL; + } +} + int rxe_cache_init(void) { int err; @@ -136,24 +148,14 @@ int rxe_cache_init(void) return 0; err1: - while (--i >= 0) { - kmem_cache_destroy(type->cache); - type->cache = NULL; - } + rxe_cache_clean(i); return err; } void rxe_cache_exit(void) { - int i; - struct rxe_type_info *type; - - for (i = 0; i < RXE_NUM_TYPES; i++) { - type = &rxe_type_info[i]; - kmem_cache_destroy(type->cache); - type->cache = NULL; - } + rxe_cache_clean(RXE_NUM_TYPES); } static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min) @@ -241,7 +243,7 @@ static void rxe_pool_put(struct rxe_pool *pool) kref_put(&pool->ref_cnt, rxe_pool_release); } -int rxe_pool_cleanup(struct rxe_pool *pool) +void rxe_pool_cleanup(struct rxe_pool *pool) { unsigned long flags; @@ -253,8 +255,6 @@ int rxe_pool_cleanup(struct rxe_pool *pool) write_unlock_irqrestore(&pool->pool_lock, flags); rxe_pool_put(pool); - - return 0; } static u32 alloc_index(struct rxe_pool *pool) diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h index aa4ba307097b..72968c29e01f 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.h +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -126,7 +126,7 @@ int rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool, enum rxe_elem_type type, u32 max_elem); /* free resources from object pool */ -int rxe_pool_cleanup(struct rxe_pool *pool); +void rxe_pool_cleanup(struct rxe_pool *pool); /* allocate an object from pool */ void *rxe_alloc(struct rxe_pool *pool); diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index b9710907dac2..fd86fd2fbb26 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -97,7 +97,7 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init) goto err1; if (init->qp_type == IB_QPT_SMI || init->qp_type == IB_QPT_GSI) { - if (port_num != 1) { + if (!rdma_is_port_valid(&rxe->ib_dev, port_num)) { pr_warn("invalid port = %d\n", port_num); goto err1; } @@ -336,13 +336,14 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, struct ib_qp_init_attr *init, struct rxe_create_qp_resp __user *uresp, - struct ib_pd *ibpd) + struct ib_pd *ibpd, + struct ib_udata *udata) { int err; struct rxe_cq *rcq = to_rcq(init->recv_cq); struct rxe_cq *scq = to_rcq(init->send_cq); struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL; - struct ib_ucontext *context = ibpd->uobject ? ibpd->uobject->context : NULL; + struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL; rxe_add_ref(pd); rxe_add_ref(rcq); @@ -433,7 +434,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, } if (mask & IB_QP_PORT) { - if (attr->port_num != 1) { + if (!rdma_is_port_valid(&rxe->ib_dev, attr->port_num)) { pr_warn("invalid port %d\n", attr->port_num); goto err1; } @@ -448,7 +449,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (mask & IB_QP_ALT_PATH) { if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr)) goto err1; - if (attr->alt_port_num != 1) { + if (!rdma_is_port_valid(&rxe->ib_dev, attr->alt_port_num)) { pr_warn("invalid alt port %d\n", attr->alt_port_num); goto err1; } diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 6c361d70d7cd..c5d9b558fa90 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -643,6 +643,7 @@ next_wqe: rmr->access = wqe->wr.wr.reg.access; rmr->lkey = wqe->wr.wr.reg.key; rmr->rkey = wqe->wr.wr.reg.key; + rmr->iova = wqe->wr.wr.reg.mr->iova; wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; } else { @@ -728,7 +729,7 @@ next_wqe: save_state(wqe, qp, &rollback_wqe, &rollback_psn); update_wqe_state(qp, wqe, &pkt); update_wqe_psn(qp, wqe, &pkt, payload); - ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb); + ret = rxe_xmit_packet(qp, &pkt, skb); if (ret) { qp->need_req_skb = 1; diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index c962160292f4..231528188250 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -124,12 +124,9 @@ static inline enum resp_states get_req(struct rxe_qp *qp, struct sk_buff *skb; if (qp->resp.state == QP_STATE_ERROR) { - skb = skb_dequeue(&qp->req_pkts); - if (skb) { - /* drain request packet queue */ + while ((skb = skb_dequeue(&qp->req_pkts))) { rxe_drop_ref(qp); kfree_skb(skb); - return RESPST_GET_REQ; } /* go drain recv wr queue */ @@ -660,7 +657,6 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, static enum resp_states read_reply(struct rxe_qp *qp, struct rxe_pkt_info *req_pkt) { - struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_pkt_info ack_pkt; struct sk_buff *skb; int mtu = qp->mtu; @@ -739,7 +735,7 @@ static enum resp_states read_reply(struct rxe_qp *qp, p = payload_addr(&ack_pkt) + payload + bth_pad(&ack_pkt); *p = ~icrc; - err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb); + err = rxe_xmit_packet(qp, &ack_pkt, skb); if (err) { pr_err("Failed sending RDMA reply.\n"); return RESPST_ERR_RNR; @@ -838,18 +834,25 @@ static enum resp_states do_complete(struct rxe_qp *qp, struct ib_wc *wc = &cqe.ibwc; struct ib_uverbs_wc *uwc = &cqe.uibwc; struct rxe_recv_wqe *wqe = qp->resp.wqe; + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); if (unlikely(!wqe)) return RESPST_CLEANUP; memset(&cqe, 0, sizeof(cqe)); - wc->wr_id = wqe->wr_id; - wc->status = qp->resp.status; - wc->qp = &qp->ibqp; + if (qp->rcq->is_user) { + uwc->status = qp->resp.status; + uwc->qp_num = qp->ibqp.qp_num; + uwc->wr_id = wqe->wr_id; + } else { + wc->status = qp->resp.status; + wc->qp = &qp->ibqp; + wc->wr_id = wqe->wr_id; + } - /* fields after status are not required for errors */ if (wc->status == IB_WC_SUCCESS) { + rxe_counter_inc(rxe, RXE_CNT_RDMA_RECV); wc->opcode = (pkt->mask & RXE_IMMDT_MASK && pkt->mask & RXE_WRITE_MASK) ? IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV; @@ -898,7 +901,6 @@ static enum resp_states do_complete(struct rxe_qp *qp, } if (pkt->mask & RXE_IETH_MASK) { - struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_mem *rmr; wc->wc_flags |= IB_WC_WITH_INVALIDATE; @@ -950,7 +952,6 @@ static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, int err = 0; struct rxe_pkt_info ack_pkt; struct sk_buff *skb; - struct rxe_dev *rxe = to_rdev(qp->ibqp.device); skb = prepare_ack_packet(qp, pkt, &ack_pkt, IB_OPCODE_RC_ACKNOWLEDGE, 0, psn, syndrome, NULL); @@ -959,7 +960,7 @@ static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, goto err1; } - err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb); + err = rxe_xmit_packet(qp, &ack_pkt, skb); if (err) pr_err_ratelimited("Failed sending ack\n"); @@ -973,7 +974,6 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, int rc = 0; struct rxe_pkt_info ack_pkt; struct sk_buff *skb; - struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct resp_res *res; skb = prepare_ack_packet(qp, pkt, &ack_pkt, @@ -1001,7 +1001,7 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, res->last_psn = ack_pkt.psn; res->cur_psn = ack_pkt.psn; - rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb); + rc = rxe_xmit_packet(qp, &ack_pkt, skb); if (rc) { pr_err_ratelimited("Failed sending ack\n"); rxe_drop_ref(qp); @@ -1131,8 +1131,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, if (res) { skb_get(res->atomic.skb); /* Resend the result. */ - rc = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, - pkt, res->atomic.skb); + rc = rxe_xmit_packet(qp, pkt, res->atomic.skb); if (rc) { pr_err("Failed resending result. This flow is not handled - skb ignored\n"); rc = RESPST_CLEANUP; diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c index 73a19f808e1b..95a15892f7e6 100644 --- a/drivers/infiniband/sw/rxe/rxe_sysfs.c +++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c @@ -53,22 +53,6 @@ static int sanitize_arg(const char *val, char *intf, int intf_len) return len; } -static void rxe_set_port_state(struct net_device *ndev) -{ - struct rxe_dev *rxe = net_to_rxe(ndev); - bool is_up = netif_running(ndev) && netif_carrier_ok(ndev); - - if (!rxe) - goto out; - - if (is_up) - rxe_port_up(rxe); - else - rxe_port_down(rxe); /* down for unknown state */ -out: - return; -} - static int rxe_param_set_add(const char *val, const struct kernel_param *kp) { int len; @@ -104,7 +88,7 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp) goto err; } - rxe_set_port_state(ndev); + rxe_set_port_state(rxe); dev_info(&rxe->ib_dev.dev, "added %s\n", intf); err: if (ndev) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 9c19f2027511..b20e6e0415f5 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -56,12 +56,7 @@ static int rxe_query_port(struct ib_device *dev, { struct rxe_dev *rxe = to_rdev(dev); struct rxe_port *port; - int rc = -EINVAL; - - if (unlikely(port_num != 1)) { - pr_warn("invalid port_number %d\n", port_num); - goto out; - } + int rc; port = &rxe->port; @@ -71,9 +66,16 @@ static int rxe_query_port(struct ib_device *dev, mutex_lock(&rxe->usdev_lock); rc = ib_get_eth_speed(dev, port_num, &attr->active_speed, &attr->active_width); + + if (attr->state == IB_PORT_ACTIVE) + attr->phys_state = RDMA_LINK_PHYS_STATE_LINK_UP; + else if (dev_get_flags(rxe->ndev) & IFF_UP) + attr->phys_state = RDMA_LINK_PHYS_STATE_POLLING; + else + attr->phys_state = RDMA_LINK_PHYS_STATE_DISABLED; + mutex_unlock(&rxe->usdev_lock); -out: return rc; } @@ -96,12 +98,6 @@ static int rxe_query_pkey(struct ib_device *device, struct rxe_dev *rxe = to_rdev(device); struct rxe_port *port; - if (unlikely(port_num != 1)) { - dev_warn(device->dev.parent, "invalid port_num = %d\n", - port_num); - goto err1; - } - port = &rxe->port; if (unlikely(index >= port->attr.pkey_tbl_len)) { @@ -139,11 +135,6 @@ static int rxe_modify_port(struct ib_device *dev, struct rxe_dev *rxe = to_rdev(dev); struct rxe_port *port; - if (unlikely(port_num != 1)) { - pr_warn("invalid port_num = %d\n", port_num); - goto err1; - } - port = &rxe->port; port->attr.port_cap_flags |= attr->set_port_cap_mask; @@ -153,9 +144,6 @@ static int rxe_modify_port(struct ib_device *dev, port->attr.qkey_viol_cntr = 0; return 0; - -err1: - return -EINVAL; } static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev, @@ -231,6 +219,7 @@ static void rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr, static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, + u32 flags, struct ib_udata *udata) { @@ -278,7 +267,7 @@ static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) return 0; } -static int rxe_destroy_ah(struct ib_ah *ibah) +static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags) { struct rxe_ah *ah = to_rah(ibah); @@ -498,7 +487,7 @@ static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd, rxe_add_index(qp); - err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibpd); + err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibpd, udata); if (err) goto err3; @@ -1157,6 +1146,52 @@ static const struct attribute_group rxe_attr_group = { .attrs = rxe_dev_attributes, }; +static const struct ib_device_ops rxe_dev_ops = { + .alloc_hw_stats = rxe_ib_alloc_hw_stats, + .alloc_mr = rxe_alloc_mr, + .alloc_pd = rxe_alloc_pd, + .alloc_ucontext = rxe_alloc_ucontext, + .attach_mcast = rxe_attach_mcast, + .create_ah = rxe_create_ah, + .create_cq = rxe_create_cq, + .create_qp = rxe_create_qp, + .create_srq = rxe_create_srq, + .dealloc_pd = rxe_dealloc_pd, + .dealloc_ucontext = rxe_dealloc_ucontext, + .dereg_mr = rxe_dereg_mr, + .destroy_ah = rxe_destroy_ah, + .destroy_cq = rxe_destroy_cq, + .destroy_qp = rxe_destroy_qp, + .destroy_srq = rxe_destroy_srq, + .detach_mcast = rxe_detach_mcast, + .get_dma_mr = rxe_get_dma_mr, + .get_hw_stats = rxe_ib_get_hw_stats, + .get_link_layer = rxe_get_link_layer, + .get_netdev = rxe_get_netdev, + .get_port_immutable = rxe_port_immutable, + .map_mr_sg = rxe_map_mr_sg, + .mmap = rxe_mmap, + .modify_ah = rxe_modify_ah, + .modify_device = rxe_modify_device, + .modify_port = rxe_modify_port, + .modify_qp = rxe_modify_qp, + .modify_srq = rxe_modify_srq, + .peek_cq = rxe_peek_cq, + .poll_cq = rxe_poll_cq, + .post_recv = rxe_post_recv, + .post_send = rxe_post_send, + .post_srq_recv = rxe_post_srq_recv, + .query_ah = rxe_query_ah, + .query_device = rxe_query_device, + .query_pkey = rxe_query_pkey, + .query_port = rxe_query_port, + .query_qp = rxe_query_qp, + .query_srq = rxe_query_srq, + .reg_user_mr = rxe_reg_user_mr, + .req_notify_cq = rxe_req_notify_cq, + .resize_cq = rxe_resize_cq, +}; + int rxe_register_device(struct rxe_dev *rxe) { int err; @@ -1211,49 +1246,7 @@ int rxe_register_device(struct rxe_dev *rxe) | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) ; - dev->query_device = rxe_query_device; - dev->modify_device = rxe_modify_device; - dev->query_port = rxe_query_port; - dev->modify_port = rxe_modify_port; - dev->get_link_layer = rxe_get_link_layer; - dev->get_netdev = rxe_get_netdev; - dev->query_pkey = rxe_query_pkey; - dev->alloc_ucontext = rxe_alloc_ucontext; - dev->dealloc_ucontext = rxe_dealloc_ucontext; - dev->mmap = rxe_mmap; - dev->get_port_immutable = rxe_port_immutable; - dev->alloc_pd = rxe_alloc_pd; - dev->dealloc_pd = rxe_dealloc_pd; - dev->create_ah = rxe_create_ah; - dev->modify_ah = rxe_modify_ah; - dev->query_ah = rxe_query_ah; - dev->destroy_ah = rxe_destroy_ah; - dev->create_srq = rxe_create_srq; - dev->modify_srq = rxe_modify_srq; - dev->query_srq = rxe_query_srq; - dev->destroy_srq = rxe_destroy_srq; - dev->post_srq_recv = rxe_post_srq_recv; - dev->create_qp = rxe_create_qp; - dev->modify_qp = rxe_modify_qp; - dev->query_qp = rxe_query_qp; - dev->destroy_qp = rxe_destroy_qp; - dev->post_send = rxe_post_send; - dev->post_recv = rxe_post_recv; - dev->create_cq = rxe_create_cq; - dev->destroy_cq = rxe_destroy_cq; - dev->resize_cq = rxe_resize_cq; - dev->poll_cq = rxe_poll_cq; - dev->peek_cq = rxe_peek_cq; - dev->req_notify_cq = rxe_req_notify_cq; - dev->get_dma_mr = rxe_get_dma_mr; - dev->reg_user_mr = rxe_reg_user_mr; - dev->dereg_mr = rxe_dereg_mr; - dev->alloc_mr = rxe_alloc_mr; - dev->map_mr_sg = rxe_map_mr_sg; - dev->attach_mcast = rxe_attach_mcast; - dev->detach_mcast = rxe_detach_mcast; - dev->get_hw_stats = rxe_ib_get_hw_stats; - dev->alloc_hw_stats = rxe_ib_alloc_hw_stats; + ib_set_device_ops(dev, &rxe_dev_ops); tfm = crypto_alloc_shash("crc32", 0, 0); if (IS_ERR(tfm)) { @@ -1279,11 +1272,9 @@ err1: return err; } -int rxe_unregister_device(struct rxe_dev *rxe) +void rxe_unregister_device(struct rxe_dev *rxe) { struct ib_device *dev = &rxe->ib_dev; ib_unregister_device(dev); - - return 0; } diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 82e670d6eeea..74e04801d34d 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -409,16 +409,16 @@ struct rxe_dev { spinlock_t mmap_offset_lock; /* guard mmap_offset */ int mmap_offset; - u64 stats_counters[RXE_NUM_OF_COUNTERS]; + atomic64_t stats_counters[RXE_NUM_OF_COUNTERS]; struct rxe_port port; struct list_head list; struct crypto_shash *tfm; }; -static inline void rxe_counter_inc(struct rxe_dev *rxe, enum rxe_counters cnt) +static inline void rxe_counter_inc(struct rxe_dev *rxe, enum rxe_counters index) { - rxe->stats_counters[cnt]++; + atomic64_inc(&rxe->stats_counters[index]); } static inline struct rxe_dev *to_rdev(struct ib_device *dev) @@ -467,7 +467,7 @@ static inline struct rxe_mem *to_rmw(struct ib_mw *mw) } int rxe_register_device(struct rxe_dev *rxe); -int rxe_unregister_device(struct rxe_dev *rxe); +void rxe_unregister_device(struct rxe_dev *rxe); void rxe_mc_cleanup(struct rxe_pool_entry *arg); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 9006a13af1de..6d35570092d6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -66,7 +66,7 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev, ah->last_send = 0; kref_init(&ah->ref); - vah = rdma_create_ah(pd, attr); + vah = rdma_create_ah(pd, attr, RDMA_CREATE_AH_SLEEPABLE); if (IS_ERR(vah)) { kfree(ah); ah = (struct ipoib_ah *)vah; @@ -678,7 +678,7 @@ static void __ipoib_reap_ah(struct net_device *dev) list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list) if ((int) priv->tx_tail - (int) ah->last_send >= 0) { list_del(&ah->list); - rdma_destroy_ah(ah->ah); + rdma_destroy_ah(ah->ah, 0); kfree(ah); } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 6214d8c0d546..d932f99201d1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2453,8 +2453,8 @@ static struct net_device *ipoib_add_port(const char *format, return ERR_PTR(result); } - if (hca->rdma_netdev_get_params) { - int rc = hca->rdma_netdev_get_params(hca, port, + if (hca->ops.rdma_netdev_get_params) { + int rc = hca->ops.rdma_netdev_get_params(hca, port, RDMA_NETDEV_IPOIB, ¶ms); diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 3fecd87c9f2b..8c707accd148 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -997,7 +997,6 @@ static struct scsi_host_template iscsi_iser_sht = { .eh_device_reset_handler= iscsi_eh_device_reset, .eh_target_reset_handler = iscsi_eh_recover_target, .target_alloc = iscsi_target_alloc, - .use_clustering = ENABLE_CLUSTERING, .slave_alloc = iscsi_iser_slave_alloc, .proc_name = "iscsi_iser", .this_id = -1, diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 009be8889d71..e9b7efc302d0 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -77,8 +77,8 @@ int iser_assign_reg_ops(struct iser_device *device) struct ib_device *ib_dev = device->ib_device; /* Assign function handles - based on FMR support */ - if (ib_dev->alloc_fmr && ib_dev->dealloc_fmr && - ib_dev->map_phys_fmr && ib_dev->unmap_fmr) { + if (ib_dev->ops.alloc_fmr && ib_dev->ops.dealloc_fmr && + ib_dev->ops.map_phys_fmr && ib_dev->ops.unmap_fmr) { iser_info("FMR supported, using FMR for registration\n"); device->reg_ops = &fmr_ops; } else if (ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { @@ -277,16 +277,13 @@ void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir) { struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir]; - int ret; if (!reg->mem_h) return; iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n", reg->mem_h); - ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h); - if (ret) - iser_err("ib_fmr_pool_unmap failed %d\n", ret); + ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h); reg->mem_h = NULL; } diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c index 61558788b3fa..ae70cd18903e 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c @@ -330,10 +330,10 @@ struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev, struct rdma_netdev *rn; int rc; - netdev = ibdev->alloc_rdma_netdev(ibdev, port_num, - RDMA_NETDEV_OPA_VNIC, - "veth%d", NET_NAME_UNKNOWN, - ether_setup); + netdev = ibdev->ops.alloc_rdma_netdev(ibdev, port_num, + RDMA_NETDEV_OPA_VNIC, + "veth%d", NET_NAME_UNKNOWN, + ether_setup); if (!netdev) return ERR_PTR(-ENOMEM); else if (IS_ERR(netdev)) diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c index d119d9afa845..560e4f2d466e 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c @@ -606,7 +606,7 @@ static void vema_set(struct opa_vnic_vema_port *port, static void vema_send(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_wc) { - rdma_destroy_ah(mad_wc->send_buf->ah); + rdma_destroy_ah(mad_wc->send_buf->ah, RDMA_DESTROY_AH_SLEEPABLE); ib_free_send_mad(mad_wc->send_buf); } @@ -680,7 +680,7 @@ static void vema_recv(struct ib_mad_agent *mad_agent, ib_free_send_mad(rsp); err_rsp: - rdma_destroy_ah(ah); + rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE); free_recv_mad: ib_free_recv_mad(mad_wc); } @@ -777,7 +777,7 @@ void opa_vnic_vema_send_trap(struct opa_vnic_adapter *adapter, } rdma_ah_set_dlid(&ah_attr, trap_lid); - ah = rdma_create_ah(port->mad_agent->qp->pd, &ah_attr); + ah = rdma_create_ah(port->mad_agent->qp->pd, &ah_attr, 0); if (IS_ERR(ah)) { c_err("%s:Couldn't create new AH = %p\n", __func__, ah); c_err("%s:dlid = %d, sl = %d, port = %d\n", __func__, @@ -848,7 +848,7 @@ void opa_vnic_vema_send_trap(struct opa_vnic_adapter *adapter, } err_sndbuf: - rdma_destroy_ah(ah); + rdma_destroy_ah(ah, 0); err_exit: v_err("Aborting trap\n"); } diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index eed0eb3bb04c..31d91538bbf4 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -132,6 +132,15 @@ MODULE_PARM_DESC(dev_loss_tmo, " if fast_io_fail_tmo has not been set. \"off\" means that" " this functionality is disabled."); +static bool srp_use_imm_data = true; +module_param_named(use_imm_data, srp_use_imm_data, bool, 0644); +MODULE_PARM_DESC(use_imm_data, + "Whether or not to request permission to use immediate data during SRP login."); + +static unsigned int srp_max_imm_data = 8 * 1024; +module_param_named(max_imm_data, srp_max_imm_data, uint, 0644); +MODULE_PARM_DESC(max_imm_data, "Maximum immediate data size."); + static unsigned ch_count; module_param(ch_count, uint, 0444); MODULE_PARM_DESC(ch_count, @@ -573,7 +582,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) init_attr->cap.max_send_wr = m * target->queue_size; init_attr->cap.max_recv_wr = target->queue_size + 1; init_attr->cap.max_recv_sge = 1; - init_attr->cap.max_send_sge = 1; + init_attr->cap.max_send_sge = SRP_MAX_SGE; init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; init_attr->qp_type = IB_QPT_RC; init_attr->send_cq = send_cq; @@ -823,7 +832,8 @@ static u8 srp_get_subnet_timeout(struct srp_host *host) return subnet_timeout; } -static int srp_send_req(struct srp_rdma_ch *ch, bool multich) +static int srp_send_req(struct srp_rdma_ch *ch, uint32_t max_iu_len, + bool multich) { struct srp_target_port *target = ch->target; struct { @@ -852,11 +862,15 @@ static int srp_send_req(struct srp_rdma_ch *ch, bool multich) req->ib_req.opcode = SRP_LOGIN_REQ; req->ib_req.tag = 0; - req->ib_req.req_it_iu_len = cpu_to_be32(target->max_iu_len); + req->ib_req.req_it_iu_len = cpu_to_be32(max_iu_len); req->ib_req.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT); req->ib_req.req_flags = (multich ? SRP_MULTICHAN_MULTI : SRP_MULTICHAN_SINGLE); + if (srp_use_imm_data) { + req->ib_req.req_flags |= SRP_IMMED_REQUESTED; + req->ib_req.imm_data_offset = cpu_to_be16(SRP_IMM_DATA_OFFSET); + } if (target->using_rdma_cm) { req->rdma_param.flow_control = req->ib_param.flow_control; @@ -873,6 +887,7 @@ static int srp_send_req(struct srp_rdma_ch *ch, bool multich) req->rdma_req.req_it_iu_len = req->ib_req.req_it_iu_len; req->rdma_req.req_buf_fmt = req->ib_req.req_buf_fmt; req->rdma_req.req_flags = req->ib_req.req_flags; + req->rdma_req.imm_data_offset = req->ib_req.imm_data_offset; ipi = req->rdma_req.initiator_port_id; tpi = req->rdma_req.target_port_id; @@ -1145,7 +1160,8 @@ static int srp_connected_ch(struct srp_target_port *target) return c; } -static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) +static int srp_connect_ch(struct srp_rdma_ch *ch, uint32_t max_iu_len, + bool multich) { struct srp_target_port *target = ch->target; int ret; @@ -1158,7 +1174,7 @@ static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) while (1) { init_completion(&ch->done); - ret = srp_send_req(ch, multich); + ret = srp_send_req(ch, max_iu_len, multich); if (ret) goto out; ret = wait_for_completion_interruptible(&ch->done); @@ -1344,6 +1360,20 @@ static void srp_terminate_io(struct srp_rport *rport) } } +/* Calculate maximum initiator to target information unit length. */ +static uint32_t srp_max_it_iu_len(int cmd_sg_cnt, bool use_imm_data) +{ + uint32_t max_iu_len = sizeof(struct srp_cmd) + SRP_MAX_ADD_CDB_LEN + + sizeof(struct srp_indirect_buf) + + cmd_sg_cnt * sizeof(struct srp_direct_buf); + + if (use_imm_data) + max_iu_len = max(max_iu_len, SRP_IMM_DATA_OFFSET + + srp_max_imm_data); + + return max_iu_len; +} + /* * It is up to the caller to ensure that srp_rport_reconnect() calls are * serialized and that no concurrent srp_queuecommand(), srp_abort(), @@ -1357,6 +1387,8 @@ static int srp_rport_reconnect(struct srp_rport *rport) { struct srp_target_port *target = rport->lld_data; struct srp_rdma_ch *ch; + uint32_t max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, + srp_use_imm_data); int i, j, ret = 0; bool multich = false; @@ -1402,7 +1434,7 @@ static int srp_rport_reconnect(struct srp_rport *rport) ch = &target->ch[i]; if (ret) break; - ret = srp_connect_ch(ch, multich); + ret = srp_connect_ch(ch, max_iu_len, multich); multich = true; } @@ -1764,25 +1796,29 @@ static void srp_check_mapping(struct srp_map_state *state, * @req: SRP request * * Returns the length in bytes of the SRP_CMD IU or a negative value if - * mapping failed. + * mapping failed. The size of any immediate data is not included in the + * return value. */ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, struct srp_request *req) { struct srp_target_port *target = ch->target; - struct scatterlist *scat; + struct scatterlist *scat, *sg; struct srp_cmd *cmd = req->cmd->buf; - int len, nents, count, ret; + int i, len, nents, count, ret; struct srp_device *dev; struct ib_device *ibdev; struct srp_map_state state; struct srp_indirect_buf *indirect_hdr; + u64 data_len; u32 idb_len, table_len; __be32 idb_rkey; u8 fmt; + req->cmd->num_sge = 1; + if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE) - return sizeof (struct srp_cmd); + return sizeof(struct srp_cmd) + cmd->add_cdb_len; if (scmnd->sc_data_direction != DMA_FROM_DEVICE && scmnd->sc_data_direction != DMA_TO_DEVICE) { @@ -1794,6 +1830,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, nents = scsi_sg_count(scmnd); scat = scsi_sglist(scmnd); + data_len = scsi_bufflen(scmnd); dev = target->srp_host->srp_dev; ibdev = dev->dev; @@ -1802,8 +1839,31 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, if (unlikely(count == 0)) return -EIO; + if (ch->use_imm_data && + count <= SRP_MAX_IMM_SGE && + SRP_IMM_DATA_OFFSET + data_len <= ch->max_it_iu_len && + scmnd->sc_data_direction == DMA_TO_DEVICE) { + struct srp_imm_buf *buf; + struct ib_sge *sge = &req->cmd->sge[1]; + + fmt = SRP_DATA_DESC_IMM; + len = SRP_IMM_DATA_OFFSET; + req->nmdesc = 0; + buf = (void *)cmd->add_data + cmd->add_cdb_len; + buf->len = cpu_to_be32(data_len); + WARN_ON_ONCE((void *)(buf + 1) > (void *)cmd + len); + for_each_sg(scat, sg, count, i) { + sge[i].addr = ib_sg_dma_address(ibdev, sg); + sge[i].length = ib_sg_dma_len(ibdev, sg); + sge[i].lkey = target->lkey; + } + req->cmd->num_sge += count; + goto map_complete; + } + fmt = SRP_DATA_DESC_DIRECT; - len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf); + len = sizeof(struct srp_cmd) + cmd->add_cdb_len + + sizeof(struct srp_direct_buf); if (count == 1 && target->global_rkey) { /* @@ -1812,8 +1872,9 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, * single entry. So a direct descriptor along with * the DMA MR suffices. */ - struct srp_direct_buf *buf = (void *) cmd->add_data; + struct srp_direct_buf *buf; + buf = (void *)cmd->add_data + cmd->add_cdb_len; buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat)); buf->key = cpu_to_be32(target->global_rkey); buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat)); @@ -1826,7 +1887,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, * We have more than one scatter/gather entry, so build our indirect * descriptor table, trying to merge as many entries as we can. */ - indirect_hdr = (void *) cmd->add_data; + indirect_hdr = (void *)cmd->add_data + cmd->add_cdb_len; ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr, target->indirect_size, DMA_TO_DEVICE); @@ -1861,8 +1922,9 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, * Memory registration collapsed the sg-list into one entry, * so use a direct descriptor. */ - struct srp_direct_buf *buf = (void *) cmd->add_data; + struct srp_direct_buf *buf; + buf = (void *)cmd->add_data + cmd->add_cdb_len; *buf = req->indirect_desc[0]; goto map_complete; } @@ -1880,7 +1942,8 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, idb_len = sizeof(struct srp_indirect_buf) + table_len; fmt = SRP_DATA_DESC_INDIRECT; - len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf); + len = sizeof(struct srp_cmd) + cmd->add_cdb_len + + sizeof(struct srp_indirect_buf); len += count * sizeof (struct srp_direct_buf); memcpy(indirect_hdr->desc_list, req->indirect_desc, @@ -2001,22 +2064,30 @@ static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc) list_add(&iu->list, &ch->free_tx); } +/** + * srp_post_send() - send an SRP information unit + * @ch: RDMA channel over which to send the information unit. + * @iu: Information unit to send. + * @len: Length of the information unit excluding immediate data. + */ static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len) { struct srp_target_port *target = ch->target; - struct ib_sge list; struct ib_send_wr wr; - list.addr = iu->dma; - list.length = len; - list.lkey = target->lkey; + if (WARN_ON_ONCE(iu->num_sge > SRP_MAX_SGE)) + return -EINVAL; + + iu->sge[0].addr = iu->dma; + iu->sge[0].length = len; + iu->sge[0].lkey = target->lkey; iu->cqe.done = srp_send_done; wr.next = NULL; wr.wr_cqe = &iu->cqe; - wr.sg_list = &list; - wr.num_sge = 1; + wr.sg_list = &iu->sge[0]; + wr.num_sge = iu->num_sge; wr.opcode = IB_WR_SEND; wr.send_flags = IB_SEND_SIGNALED; @@ -2129,6 +2200,7 @@ static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta, return 1; } + iu->num_sge = 1; ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE); memcpy(iu->buf, rsp, len); ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE); @@ -2312,7 +2384,7 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) req = &ch->req_ring[idx]; dev = target->srp_host->srp_dev->dev; - ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len, + ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_it_iu_len, DMA_TO_DEVICE); scmnd->host_scribble = (void *) req; @@ -2324,6 +2396,12 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) int_to_scsilun(scmnd->device->lun, &cmd->lun); cmd->tag = tag; memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len); + if (unlikely(scmnd->cmd_len > sizeof(cmd->cdb))) { + cmd->add_cdb_len = round_up(scmnd->cmd_len - sizeof(cmd->cdb), + 4); + if (WARN_ON_ONCE(cmd->add_cdb_len > SRP_MAX_ADD_CDB_LEN)) + goto err_iu; + } req->scmnd = scmnd; req->cmd = iu; @@ -2343,11 +2421,12 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) goto err_iu; } - ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len, + ib_dma_sync_single_for_device(dev, iu->dma, ch->max_it_iu_len, DMA_TO_DEVICE); if (srp_post_send(ch, iu, len)) { shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n"); + scmnd->result = DID_ERROR << 16; goto err_unmap; } @@ -2410,7 +2489,7 @@ static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch) for (i = 0; i < target->queue_size; ++i) { ch->tx_ring[i] = srp_alloc_iu(target->srp_host, - target->max_iu_len, + ch->max_it_iu_len, GFP_KERNEL, DMA_TO_DEVICE); if (!ch->tx_ring[i]) goto err; @@ -2476,6 +2555,15 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id, if (lrsp->opcode == SRP_LOGIN_RSP) { ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len); ch->req_lim = be32_to_cpu(lrsp->req_lim_delta); + ch->use_imm_data = lrsp->rsp_flags & SRP_LOGIN_RSP_IMMED_SUPP; + ch->max_it_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, + ch->use_imm_data); + WARN_ON_ONCE(ch->max_it_iu_len > + be32_to_cpu(lrsp->max_it_iu_len)); + + if (ch->use_imm_data) + shost_printk(KERN_DEBUG, target->scsi_host, + PFX "using immediate data\n"); /* * Reserve credits for task management so we don't @@ -2864,6 +2952,8 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun, return -1; } + iu->num_sge = 1; + ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt, DMA_TO_DEVICE); tsk_mgmt = iu->buf; @@ -3215,7 +3305,6 @@ static struct scsi_host_template srp_template = { .can_queue = SRP_DEFAULT_CMD_SQ_SIZE, .this_id = -1, .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE, - .use_clustering = ENABLE_CLUSTERING, .shost_attrs = srp_host_attrs, .track_queue_depth = 1, }; @@ -3403,6 +3492,9 @@ static const match_table_t srp_opt_tokens = { /** * srp_parse_in - parse an IP address and port number combination + * @net: [in] Network namespace. + * @sa: [out] Address family, IP address and port number. + * @addr_port_str: [in] IP address and port number. * * Parse the following address formats: * - IPv4: <ip_address>:<port>, e.g. 1.2.3.4:5. @@ -3720,6 +3812,7 @@ static ssize_t srp_create_target(struct device *dev, int ret, node_idx, node, cpu, i; unsigned int max_sectors_per_mr, mr_per_cmd = 0; bool multich = false; + uint32_t max_iu_len; target_host = scsi_host_alloc(&srp_template, sizeof (struct srp_target_port)); @@ -3825,9 +3918,7 @@ static ssize_t srp_create_target(struct device *dev, target->mr_per_cmd = mr_per_cmd; target->indirect_size = target->sg_tablesize * sizeof (struct srp_direct_buf); - target->max_iu_len = sizeof (struct srp_cmd) + - sizeof (struct srp_indirect_buf) + - target->cmd_sg_cnt * sizeof (struct srp_direct_buf); + max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, srp_use_imm_data); INIT_WORK(&target->tl_err_work, srp_tl_err_work); INIT_WORK(&target->remove_work, srp_remove_work); @@ -3882,7 +3973,7 @@ static ssize_t srp_create_target(struct device *dev, if (ret) goto err_disconnect; - ret = srp_connect_ch(ch, multich); + ret = srp_connect_ch(ch, max_iu_len, multich); if (ret) { char dst[64]; @@ -4063,8 +4154,10 @@ static void srp_add_one(struct ib_device *device) srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR, max_pages_per_mr); - srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr && - device->map_phys_fmr && device->unmap_fmr); + srp_dev->has_fmr = (device->ops.alloc_fmr && + device->ops.dealloc_fmr && + device->ops.map_phys_fmr && + device->ops.unmap_fmr); srp_dev->has_fr = (attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS); if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) { @@ -4172,6 +4265,11 @@ static int __init srp_init_module(void) { int ret; + BUILD_BUG_ON(sizeof(struct srp_imm_buf) != 4); + BUILD_BUG_ON(sizeof(struct srp_login_req) != 64); + BUILD_BUG_ON(sizeof(struct srp_login_req_rdma) != 56); + BUILD_BUG_ON(sizeof(struct srp_cmd) != 48); + if (srp_sg_tablesize) { pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n"); if (!cmd_sg_entries) diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index a2706086b9c7..b2861cd2087a 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -67,6 +67,17 @@ enum { SRP_TAG_TSK_MGMT = 1U << 31, SRP_MAX_PAGES_PER_MR = 512, + + SRP_MAX_ADD_CDB_LEN = 16, + + SRP_MAX_IMM_SGE = 2, + SRP_MAX_SGE = SRP_MAX_IMM_SGE + 1, + /* + * Choose the immediate data offset such that a 32 byte CDB still fits. + */ + SRP_IMM_DATA_OFFSET = sizeof(struct srp_cmd) + + SRP_MAX_ADD_CDB_LEN + + sizeof(struct srp_imm_buf), }; enum srp_target_state { @@ -130,6 +141,8 @@ struct srp_request { /** * struct srp_rdma_ch * @comp_vector: Completion vector used by this RDMA channel. + * @max_it_iu_len: Maximum initiator-to-target information unit length. + * @max_ti_iu_len: Maximum target-to-initiator information unit length. */ struct srp_rdma_ch { /* These are RW in the hot path, and commonly used together */ @@ -146,6 +159,9 @@ struct srp_rdma_ch { struct ib_fmr_pool *fmr_pool; struct srp_fr_pool *fr_pool; }; + uint32_t max_it_iu_len; + uint32_t max_ti_iu_len; + bool use_imm_data; /* Everything above this point is used in the hot path of * command processing. Try to keep them packed into cachelines. @@ -169,7 +185,6 @@ struct srp_rdma_ch { struct srp_iu **tx_ring; struct srp_iu **rx_ring; struct srp_request *req_ring; - int max_ti_iu_len; int comp_vector; u64 tsk_mgmt_tag; @@ -194,7 +209,6 @@ struct srp_target_port { u32 ch_count; u32 lkey; enum srp_target_state state; - unsigned int max_iu_len; unsigned int cmd_sg_cnt; unsigned int indirect_size; bool allow_ext_sg; @@ -259,6 +273,8 @@ struct srp_iu { void *buf; size_t size; enum dma_data_direction direction; + u32 num_sge; + struct ib_sge sge[SRP_MAX_SGE]; struct ib_cqe cqe; }; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 2357aa727dcf..e9c336cff8f5 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -51,8 +51,6 @@ /* Name of this kernel module. */ #define DRV_NAME "ib_srpt" -#define DRV_VERSION "2.0.0" -#define DRV_RELDATE "2011-02-14" #define SRPT_ID_STRING "Linux SRP target" @@ -60,8 +58,7 @@ #define pr_fmt(fmt) DRV_NAME " " fmt MODULE_AUTHOR("Vu Pham and Bart Van Assche"); -MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol target " - "v" DRV_VERSION " (" DRV_RELDATE ")"); +MODULE_DESCRIPTION("SCSI RDMA Protocol target driver"); MODULE_LICENSE("Dual BSD/GPL"); /* @@ -89,8 +86,7 @@ static int srpt_get_u64_x(char *buffer, const struct kernel_param *kp) module_param_call(srpt_service_guid, NULL, srpt_get_u64_x, &srpt_service_guid, 0444); MODULE_PARM_DESC(srpt_service_guid, - "Using this value for ioc_guid, id_ext, and cm_listen_id" - " instead of using the node_guid of the first HCA."); + "Using this value for ioc_guid, id_ext, and cm_listen_id instead of using the node_guid of the first HCA."); static struct ib_client srpt_client; /* Protects both rdma_cm_port and rdma_cm_id. */ @@ -462,7 +458,7 @@ static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad, static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_wc) { - rdma_destroy_ah(mad_wc->send_buf->ah); + rdma_destroy_ah(mad_wc->send_buf->ah, RDMA_DESTROY_AH_SLEEPABLE); ib_free_send_mad(mad_wc->send_buf); } @@ -529,7 +525,7 @@ static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent, ib_free_send_mad(rsp); err_rsp: - rdma_destroy_ah(ah); + rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE); err: ib_free_recv_mad(mad_wc); } @@ -652,31 +648,33 @@ static void srpt_unregister_mad_agent(struct srpt_device *sdev) * srpt_alloc_ioctx - allocate a SRPT I/O context structure * @sdev: SRPT HCA pointer. * @ioctx_size: I/O context size. - * @dma_size: Size of I/O context DMA buffer. + * @buf_cache: I/O buffer cache. * @dir: DMA data direction. */ static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev, - int ioctx_size, int dma_size, + int ioctx_size, + struct kmem_cache *buf_cache, enum dma_data_direction dir) { struct srpt_ioctx *ioctx; - ioctx = kmalloc(ioctx_size, GFP_KERNEL); + ioctx = kzalloc(ioctx_size, GFP_KERNEL); if (!ioctx) goto err; - ioctx->buf = kmalloc(dma_size, GFP_KERNEL); + ioctx->buf = kmem_cache_alloc(buf_cache, GFP_KERNEL); if (!ioctx->buf) goto err_free_ioctx; - ioctx->dma = ib_dma_map_single(sdev->device, ioctx->buf, dma_size, dir); + ioctx->dma = ib_dma_map_single(sdev->device, ioctx->buf, + kmem_cache_size(buf_cache), dir); if (ib_dma_mapping_error(sdev->device, ioctx->dma)) goto err_free_buf; return ioctx; err_free_buf: - kfree(ioctx->buf); + kmem_cache_free(buf_cache, ioctx->buf); err_free_ioctx: kfree(ioctx); err: @@ -687,17 +685,19 @@ err: * srpt_free_ioctx - free a SRPT I/O context structure * @sdev: SRPT HCA pointer. * @ioctx: I/O context pointer. - * @dma_size: Size of I/O context DMA buffer. + * @buf_cache: I/O buffer cache. * @dir: DMA data direction. */ static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx, - int dma_size, enum dma_data_direction dir) + struct kmem_cache *buf_cache, + enum dma_data_direction dir) { if (!ioctx) return; - ib_dma_unmap_single(sdev->device, ioctx->dma, dma_size, dir); - kfree(ioctx->buf); + ib_dma_unmap_single(sdev->device, ioctx->dma, + kmem_cache_size(buf_cache), dir); + kmem_cache_free(buf_cache, ioctx->buf); kfree(ioctx); } @@ -706,33 +706,38 @@ static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx, * @sdev: Device to allocate the I/O context ring for. * @ring_size: Number of elements in the I/O context ring. * @ioctx_size: I/O context size. - * @dma_size: DMA buffer size. + * @buf_cache: I/O buffer cache. + * @alignment_offset: Offset in each ring buffer at which the SRP information + * unit starts. * @dir: DMA data direction. */ static struct srpt_ioctx **srpt_alloc_ioctx_ring(struct srpt_device *sdev, int ring_size, int ioctx_size, - int dma_size, enum dma_data_direction dir) + struct kmem_cache *buf_cache, + int alignment_offset, + enum dma_data_direction dir) { struct srpt_ioctx **ring; int i; - WARN_ON(ioctx_size != sizeof(struct srpt_recv_ioctx) - && ioctx_size != sizeof(struct srpt_send_ioctx)); + WARN_ON(ioctx_size != sizeof(struct srpt_recv_ioctx) && + ioctx_size != sizeof(struct srpt_send_ioctx)); ring = kvmalloc_array(ring_size, sizeof(ring[0]), GFP_KERNEL); if (!ring) goto out; for (i = 0; i < ring_size; ++i) { - ring[i] = srpt_alloc_ioctx(sdev, ioctx_size, dma_size, dir); + ring[i] = srpt_alloc_ioctx(sdev, ioctx_size, buf_cache, dir); if (!ring[i]) goto err; ring[i]->index = i; + ring[i]->offset = alignment_offset; } goto out; err: while (--i >= 0) - srpt_free_ioctx(sdev, ring[i], dma_size, dir); + srpt_free_ioctx(sdev, ring[i], buf_cache, dir); kvfree(ring); ring = NULL; out: @@ -744,12 +749,13 @@ out: * @ioctx_ring: I/O context ring to be freed. * @sdev: SRPT HCA pointer. * @ring_size: Number of ring elements. - * @dma_size: Size of I/O context DMA buffer. + * @buf_cache: I/O buffer cache. * @dir: DMA data direction. */ static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring, struct srpt_device *sdev, int ring_size, - int dma_size, enum dma_data_direction dir) + struct kmem_cache *buf_cache, + enum dma_data_direction dir) { int i; @@ -757,7 +763,7 @@ static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring, return; for (i = 0; i < ring_size; ++i) - srpt_free_ioctx(sdev, ioctx_ring[i], dma_size, dir); + srpt_free_ioctx(sdev, ioctx_ring[i], buf_cache, dir); kvfree(ioctx_ring); } @@ -819,7 +825,7 @@ static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch, struct ib_recv_wr wr; BUG_ON(!sdev); - list.addr = ioctx->ioctx.dma; + list.addr = ioctx->ioctx.dma + ioctx->ioctx.offset; list.length = srp_max_req_size; list.lkey = sdev->lkey; @@ -985,23 +991,28 @@ static inline void *srpt_get_desc_buf(struct srp_cmd *srp_cmd) /** * srpt_get_desc_tbl - parse the data descriptors of a SRP_CMD request - * @ioctx: Pointer to the I/O context associated with the request. + * @recv_ioctx: I/O context associated with the received command @srp_cmd. + * @ioctx: I/O context that will be used for responding to the initiator. * @srp_cmd: Pointer to the SRP_CMD request data. * @dir: Pointer to the variable to which the transfer direction will be * written. - * @sg: [out] scatterlist allocated for the parsed SRP_CMD. + * @sg: [out] scatterlist for the parsed SRP_CMD. * @sg_cnt: [out] length of @sg. * @data_len: Pointer to the variable to which the total data length of all * descriptors in the SRP_CMD request will be written. + * @imm_data_offset: [in] Offset in SRP_CMD requests at which immediate data + * starts. * * This function initializes ioctx->nrbuf and ioctx->r_bufs. * * Returns -EINVAL when the SRP_CMD request contains inconsistent descriptors; * -ENOMEM when memory allocation fails and zero upon success. */ -static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx, +static int srpt_get_desc_tbl(struct srpt_recv_ioctx *recv_ioctx, + struct srpt_send_ioctx *ioctx, struct srp_cmd *srp_cmd, enum dma_data_direction *dir, - struct scatterlist **sg, unsigned *sg_cnt, u64 *data_len) + struct scatterlist **sg, unsigned int *sg_cnt, u64 *data_len, + u16 imm_data_offset) { BUG_ON(!dir); BUG_ON(!data_len); @@ -1025,7 +1036,7 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx, if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) || ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) { - struct srp_direct_buf *db = srpt_get_desc_buf(srp_cmd); + struct srp_direct_buf *db = srpt_get_desc_buf(srp_cmd); *data_len = be32_to_cpu(db->len); return srpt_alloc_rw_ctxs(ioctx, db, 1, sg, sg_cnt); @@ -1037,8 +1048,7 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx, if (nbufs > (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) { - pr_err("received unsupported SRP_CMD request" - " type (%u out + %u in != %u / %zu)\n", + pr_err("received unsupported SRP_CMD request type (%u out + %u in != %u / %zu)\n", srp_cmd->data_out_desc_cnt, srp_cmd->data_in_desc_cnt, be32_to_cpu(idb->table_desc.len), @@ -1049,6 +1059,40 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx, *data_len = be32_to_cpu(idb->len); return srpt_alloc_rw_ctxs(ioctx, idb->desc_list, nbufs, sg, sg_cnt); + } else if ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_IMM) { + struct srp_imm_buf *imm_buf = srpt_get_desc_buf(srp_cmd); + void *data = (void *)srp_cmd + imm_data_offset; + uint32_t len = be32_to_cpu(imm_buf->len); + uint32_t req_size = imm_data_offset + len; + + if (req_size > srp_max_req_size) { + pr_err("Immediate data (length %d + %d) exceeds request size %d\n", + imm_data_offset, len, srp_max_req_size); + return -EINVAL; + } + if (recv_ioctx->byte_len < req_size) { + pr_err("Received too few data - %d < %d\n", + recv_ioctx->byte_len, req_size); + return -EIO; + } + /* + * The immediate data buffer descriptor must occur before the + * immediate data itself. + */ + if ((void *)(imm_buf + 1) > (void *)data) { + pr_err("Received invalid write request\n"); + return -EINVAL; + } + *data_len = len; + ioctx->recv_ioctx = recv_ioctx; + if ((uintptr_t)data & 511) { + pr_warn_once("Internal error - the receive buffers are not aligned properly.\n"); + return -EINVAL; + } + sg_init_one(&ioctx->imm_sg, data, len); + *sg = &ioctx->imm_sg; + *sg_cnt = 1; + return 0; } else { *data_len = 0; return 0; @@ -1191,6 +1235,7 @@ static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch) BUG_ON(ioctx->ch != ch); ioctx->state = SRPT_STATE_NEW; + WARN_ON_ONCE(ioctx->recv_ioctx); ioctx->n_rdma = 0; ioctx->n_rw_ctx = 0; ioctx->queue_status_only = false; @@ -1352,8 +1397,8 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, BUILD_BUG_ON(MIN_MAX_RSP_SIZE <= sizeof(*srp_rsp)); max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp); if (sense_data_len > max_sense_len) { - pr_warn("truncated sense data from %d to %d" - " bytes\n", sense_data_len, max_sense_len); + pr_warn("truncated sense data from %d to %d bytes\n", + sense_data_len, max_sense_len); sense_data_len = max_sense_len; } @@ -1433,7 +1478,7 @@ static void srpt_handle_cmd(struct srpt_rdma_ch *ch, BUG_ON(!send_ioctx); - srp_cmd = recv_ioctx->ioctx.buf; + srp_cmd = recv_ioctx->ioctx.buf + recv_ioctx->ioctx.offset; cmd = &send_ioctx->cmd; cmd->tag = srp_cmd->tag; @@ -1453,8 +1498,8 @@ static void srpt_handle_cmd(struct srpt_rdma_ch *ch, break; } - rc = srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &sg, &sg_cnt, - &data_len); + rc = srpt_get_desc_tbl(recv_ioctx, send_ioctx, srp_cmd, &dir, + &sg, &sg_cnt, &data_len, ch->imm_data_offset); if (rc) { if (rc != -EAGAIN) { pr_err("0x%llx: parsing SRP descriptor table failed.\n", @@ -1521,7 +1566,7 @@ static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch, BUG_ON(!send_ioctx); - srp_tsk = recv_ioctx->ioctx.buf; + srp_tsk = recv_ioctx->ioctx.buf + recv_ioctx->ioctx.offset; cmd = &send_ioctx->cmd; pr_debug("recv tsk_mgmt fn %d for task_tag %lld and cmd tag %lld ch %p sess %p\n", @@ -1564,10 +1609,11 @@ srpt_handle_new_iu(struct srpt_rdma_ch *ch, struct srpt_recv_ioctx *recv_ioctx) goto push; ib_dma_sync_single_for_cpu(ch->sport->sdev->device, - recv_ioctx->ioctx.dma, srp_max_req_size, + recv_ioctx->ioctx.dma, + recv_ioctx->ioctx.offset + srp_max_req_size, DMA_FROM_DEVICE); - srp_cmd = recv_ioctx->ioctx.buf; + srp_cmd = recv_ioctx->ioctx.buf + recv_ioctx->ioctx.offset; opcode = srp_cmd->opcode; if (opcode == SRP_CMD || opcode == SRP_TSK_MGMT) { send_ioctx = srpt_get_send_ioctx(ch); @@ -1604,7 +1650,8 @@ srpt_handle_new_iu(struct srpt_rdma_ch *ch, struct srpt_recv_ioctx *recv_ioctx) break; } - srpt_post_recv(ch->sport->sdev, ch, recv_ioctx); + if (!send_ioctx || !send_ioctx->recv_ioctx) + srpt_post_recv(ch->sport->sdev, ch, recv_ioctx); res = true; out: @@ -1630,6 +1677,7 @@ static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc) req_lim = atomic_dec_return(&ch->req_lim); if (unlikely(req_lim < 0)) pr_err("req_lim = %d < 0\n", req_lim); + ioctx->byte_len = wc->byte_len; srpt_handle_new_iu(ch, ioctx); } else { pr_info_ratelimited("receiving failed for ioctx %p with status %d\n", @@ -1693,14 +1741,14 @@ static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc) atomic_add(1 + ioctx->n_rdma, &ch->sq_wr_avail); if (wc->status != IB_WC_SUCCESS) - pr_info("sending response for ioctx 0x%p failed" - " with status %d\n", ioctx, wc->status); + pr_info("sending response for ioctx 0x%p failed with status %d\n", + ioctx, wc->status); if (state != SRPT_STATE_DONE) { transport_generic_free_cmd(&ioctx->cmd, 0); } else { - pr_err("IB completion has been received too late for" - " wr_id = %u.\n", ioctx->ioctx.index); + pr_err("IB completion has been received too late for wr_id = %u.\n", + ioctx->ioctx.index); } srpt_process_wait_list(ch); @@ -1754,6 +1802,8 @@ retry: qp_init->cap.max_rdma_ctxs = sq_size / 2; qp_init->cap.max_send_sge = min(attrs->max_send_sge, SRPT_MAX_SG_PER_WQE); + qp_init->cap.max_recv_sge = min(attrs->max_recv_sge, + SRPT_MAX_SG_PER_WQE); qp_init->port_num = ch->sport->port; if (sdev->use_srq) { qp_init->srq = sdev->srq; @@ -2010,6 +2060,14 @@ static void srpt_free_ch(struct kref *kref) kfree_rcu(ch, rcu); } +/* + * Shut down the SCSI target session, tell the connection manager to + * disconnect the associated RDMA channel, transition the QP to the error + * state and remove the channel from the channel list. This function is + * typically called from inside srpt_zerolength_write_done(). Concurrent + * srpt_zerolength_write() calls from inside srpt_close_ch() are possible + * as long as the channel is on sport->nexus_list. + */ static void srpt_release_channel_work(struct work_struct *w) { struct srpt_rdma_ch *ch; @@ -2037,20 +2095,24 @@ static void srpt_release_channel_work(struct work_struct *w) else ib_destroy_cm_id(ch->ib_cm.cm_id); + sport = ch->sport; + mutex_lock(&sport->mutex); + list_del_rcu(&ch->list); + mutex_unlock(&sport->mutex); + srpt_destroy_ch_ib(ch); srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, ch->sport->sdev, ch->rq_size, - ch->max_rsp_size, DMA_TO_DEVICE); + ch->rsp_buf_cache, DMA_TO_DEVICE); + + kmem_cache_destroy(ch->rsp_buf_cache); srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring, sdev, ch->rq_size, - srp_max_req_size, DMA_FROM_DEVICE); + ch->req_buf_cache, DMA_FROM_DEVICE); - sport = ch->sport; - mutex_lock(&sport->mutex); - list_del_rcu(&ch->list); - mutex_unlock(&sport->mutex); + kmem_cache_destroy(ch->req_buf_cache); wake_up(&sport->ch_releaseQ); @@ -2174,14 +2236,19 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, INIT_LIST_HEAD(&ch->cmd_wait_list); ch->max_rsp_size = ch->sport->port_attrib.srp_max_rsp_size; + ch->rsp_buf_cache = kmem_cache_create("srpt-rsp-buf", ch->max_rsp_size, + 512, 0, NULL); + if (!ch->rsp_buf_cache) + goto free_ch; + ch->ioctx_ring = (struct srpt_send_ioctx **) srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size, sizeof(*ch->ioctx_ring[0]), - ch->max_rsp_size, DMA_TO_DEVICE); + ch->rsp_buf_cache, 0, DMA_TO_DEVICE); if (!ch->ioctx_ring) { pr_err("rejected SRP_LOGIN_REQ because creating a new QP SQ ring failed.\n"); rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); - goto free_ch; + goto free_rsp_cache; } INIT_LIST_HEAD(&ch->free_list); @@ -2190,16 +2257,39 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, list_add_tail(&ch->ioctx_ring[i]->free_list, &ch->free_list); } if (!sdev->use_srq) { + u16 imm_data_offset = req->req_flags & SRP_IMMED_REQUESTED ? + be16_to_cpu(req->imm_data_offset) : 0; + u16 alignment_offset; + u32 req_sz; + + if (req->req_flags & SRP_IMMED_REQUESTED) + pr_debug("imm_data_offset = %d\n", + be16_to_cpu(req->imm_data_offset)); + if (imm_data_offset >= sizeof(struct srp_cmd)) { + ch->imm_data_offset = imm_data_offset; + rsp->rsp_flags |= SRP_LOGIN_RSP_IMMED_SUPP; + } else { + ch->imm_data_offset = 0; + } + alignment_offset = round_up(imm_data_offset, 512) - + imm_data_offset; + req_sz = alignment_offset + imm_data_offset + srp_max_req_size; + ch->req_buf_cache = kmem_cache_create("srpt-req-buf", req_sz, + 512, 0, NULL); + if (!ch->req_buf_cache) + goto free_rsp_ring; + ch->ioctx_recv_ring = (struct srpt_recv_ioctx **) srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size, sizeof(*ch->ioctx_recv_ring[0]), - srp_max_req_size, + ch->req_buf_cache, + alignment_offset, DMA_FROM_DEVICE); if (!ch->ioctx_recv_ring) { pr_err("rejected SRP_LOGIN_REQ because creating a new QP RQ ring failed.\n"); rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); - goto free_ring; + goto free_recv_cache; } for (i = 0; i < ch->rq_size; i++) INIT_LIST_HEAD(&ch->ioctx_recv_ring[i]->wait_list); @@ -2249,17 +2339,15 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) { struct srpt_rdma_ch *ch2; - rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN; - list_for_each_entry(ch2, &nexus->ch_list, list) { if (srpt_disconnect_ch(ch2) < 0) continue; pr_info("Relogin - closed existing channel %s\n", ch2->sess_name); - rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_TERMINATED; + rsp->rsp_flags |= SRP_LOGIN_RSP_MULTICHAN_TERMINATED; } } else { - rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED; + rsp->rsp_flags |= SRP_LOGIN_RSP_MULTICHAN_MAINTAINED; } list_add_tail_rcu(&ch->list, &nexus->ch_list); @@ -2289,7 +2377,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, /* create srp_login_response */ rsp->opcode = SRP_LOGIN_RSP; rsp->tag = req->tag; - rsp->max_it_iu_len = req->req_it_iu_len; + rsp->max_it_iu_len = cpu_to_be32(srp_max_req_size); rsp->max_ti_iu_len = req->req_it_iu_len; ch->max_ti_iu_len = it_iu_len; rsp->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | @@ -2353,12 +2441,18 @@ destroy_ib: free_recv_ring: srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring, ch->sport->sdev, ch->rq_size, - srp_max_req_size, DMA_FROM_DEVICE); + ch->req_buf_cache, DMA_FROM_DEVICE); + +free_recv_cache: + kmem_cache_destroy(ch->req_buf_cache); -free_ring: +free_rsp_ring: srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, ch->sport->sdev, ch->rq_size, - ch->max_rsp_size, DMA_TO_DEVICE); + ch->rsp_buf_cache, DMA_TO_DEVICE); + +free_rsp_cache: + kmem_cache_destroy(ch->rsp_buf_cache); free_ch: if (rdma_cm_id) @@ -2439,6 +2533,7 @@ static int srpt_rdma_cm_req_recv(struct rdma_cm_id *cm_id, req.req_flags = req_rdma->req_flags; memcpy(req.initiator_port_id, req_rdma->initiator_port_id, 16); memcpy(req.target_port_id, req_rdma->target_port_id, 16); + req.imm_data_offset = req_rdma->imm_data_offset; snprintf(src_addr, sizeof(src_addr), "%pIS", &cm_id->route.addr.src_addr); @@ -2629,6 +2724,12 @@ static int srpt_write_pending(struct se_cmd *se_cmd) enum srpt_command_state new_state; int ret, i; + if (ioctx->recv_ioctx) { + srpt_set_cmd_state(ioctx, SRPT_STATE_DATA_IN); + target_execute_cmd(&ioctx->cmd); + return 0; + } + new_state = srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA); WARN_ON(new_state == SRPT_STATE_DONE); @@ -2908,7 +3009,9 @@ static void srpt_free_srq(struct srpt_device *sdev) ib_destroy_srq(sdev->srq); srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev, - sdev->srq_size, srp_max_req_size, DMA_FROM_DEVICE); + sdev->srq_size, sdev->req_buf_cache, + DMA_FROM_DEVICE); + kmem_cache_destroy(sdev->req_buf_cache); sdev->srq = NULL; } @@ -2935,14 +3038,17 @@ static int srpt_alloc_srq(struct srpt_device *sdev) pr_debug("create SRQ #wr= %d max_allow=%d dev= %s\n", sdev->srq_size, sdev->device->attrs.max_srq_wr, dev_name(&device->dev)); + sdev->req_buf_cache = kmem_cache_create("srpt-srq-req-buf", + srp_max_req_size, 0, 0, NULL); + if (!sdev->req_buf_cache) + goto free_srq; + sdev->ioctx_ring = (struct srpt_recv_ioctx **) srpt_alloc_ioctx_ring(sdev, sdev->srq_size, sizeof(*sdev->ioctx_ring[0]), - srp_max_req_size, DMA_FROM_DEVICE); - if (!sdev->ioctx_ring) { - ib_destroy_srq(srq); - return -ENOMEM; - } + sdev->req_buf_cache, 0, DMA_FROM_DEVICE); + if (!sdev->ioctx_ring) + goto free_cache; sdev->use_srq = true; sdev->srq = srq; @@ -2953,6 +3059,13 @@ static int srpt_alloc_srq(struct srpt_device *sdev) } return 0; + +free_cache: + kmem_cache_destroy(sdev->req_buf_cache); + +free_srq: + ib_destroy_srq(srq); + return -ENOMEM; } static int srpt_use_srq(struct srpt_device *sdev, bool use_srq) @@ -3015,9 +3128,8 @@ static void srpt_add_one(struct ib_device *device) } /* print out target login information */ - pr_debug("Target login info: id_ext=%016llx,ioc_guid=%016llx," - "pkey=ffff,service_id=%016llx\n", srpt_service_guid, - srpt_service_guid, srpt_service_guid); + pr_debug("Target login info: id_ext=%016llx,ioc_guid=%016llx,pkey=ffff,service_id=%016llx\n", + srpt_service_guid, srpt_service_guid, srpt_service_guid); /* * We do not have a consistent service_id (ie. also id_ext of target_id) @@ -3147,11 +3259,6 @@ static int srpt_check_false(struct se_portal_group *se_tpg) return 0; } -static char *srpt_get_fabric_name(void) -{ - return "srpt"; -} - static struct srpt_port *srpt_tpg_to_sport(struct se_portal_group *tpg) { return tpg->se_tpg_wwn->priv; @@ -3182,11 +3289,18 @@ static void srpt_release_cmd(struct se_cmd *se_cmd) struct srpt_send_ioctx *ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd); struct srpt_rdma_ch *ch = ioctx->ch; + struct srpt_recv_ioctx *recv_ioctx = ioctx->recv_ioctx; unsigned long flags; WARN_ON_ONCE(ioctx->state != SRPT_STATE_DONE && !(ioctx->cmd.transport_state & CMD_T_ABORTED)); + if (recv_ioctx) { + WARN_ON_ONCE(!list_empty(&recv_ioctx->wait_list)); + ioctx->recv_ioctx = NULL; + srpt_post_recv(ch->sport->sdev, ch, recv_ioctx); + } + if (ioctx->n_rw_ctx) { srpt_free_rw_ctxs(ch, ioctx); ioctx->n_rw_ctx = 0; @@ -3572,7 +3686,7 @@ static ssize_t srpt_tpg_enable_show(struct config_item *item, char *page) struct se_portal_group *se_tpg = to_tpg(item); struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); - return snprintf(page, PAGE_SIZE, "%d\n", (sport->enabled) ? 1: 0); + return snprintf(page, PAGE_SIZE, "%d\n", sport->enabled); } static ssize_t srpt_tpg_enable_store(struct config_item *item, @@ -3581,7 +3695,7 @@ static ssize_t srpt_tpg_enable_store(struct config_item *item, struct se_portal_group *se_tpg = to_tpg(item); struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); unsigned long tmp; - int ret; + int ret; ret = kstrtoul(page, 0, &tmp); if (ret < 0) { @@ -3617,7 +3731,7 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn, const char *name) { struct srpt_port *sport = wwn->priv; - static struct se_portal_group *tpg; + struct se_portal_group *tpg; int res; WARN_ON_ONCE(wwn != &sport->port_guid_wwn && @@ -3666,7 +3780,7 @@ static void srpt_drop_tport(struct se_wwn *wwn) static ssize_t srpt_wwn_version_show(struct config_item *item, char *buf) { - return scnprintf(buf, PAGE_SIZE, "%s\n", DRV_VERSION); + return scnprintf(buf, PAGE_SIZE, "\n"); } CONFIGFS_ATTR_RO(srpt_wwn_, version); @@ -3678,8 +3792,7 @@ static struct configfs_attribute *srpt_wwn_attrs[] = { static const struct target_core_fabric_ops srpt_template = { .module = THIS_MODULE, - .name = "srpt", - .get_fabric_name = srpt_get_fabric_name, + .fabric_name = "srpt", .tpg_get_wwn = srpt_get_fabric_wwn, .tpg_get_tag = srpt_get_tag, .tpg_check_demo_mode = srpt_check_false, @@ -3730,16 +3843,14 @@ static int __init srpt_init_module(void) ret = -EINVAL; if (srp_max_req_size < MIN_MAX_REQ_SIZE) { - pr_err("invalid value %d for kernel module parameter" - " srp_max_req_size -- must be at least %d.\n", + pr_err("invalid value %d for kernel module parameter srp_max_req_size -- must be at least %d.\n", srp_max_req_size, MIN_MAX_REQ_SIZE); goto out; } if (srpt_srq_size < MIN_SRPT_SRQ_SIZE || srpt_srq_size > MAX_SRPT_SRQ_SIZE) { - pr_err("invalid value %d for kernel module parameter" - " srpt_srq_size -- must be in the range [%d..%d].\n", + pr_err("invalid value %d for kernel module parameter srpt_srq_size -- must be in the range [%d..%d].\n", srpt_srq_size, MIN_SRPT_SRQ_SIZE, MAX_SRPT_SRQ_SIZE); goto out; } diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index 444dfd7281b5..39b3e50baf3d 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -104,10 +104,6 @@ enum { SRP_CMD_ORDERED_Q = 0x2, SRP_CMD_ACA = 0x4, - SRP_LOGIN_RSP_MULTICHAN_NO_CHAN = 0x0, - SRP_LOGIN_RSP_MULTICHAN_TERMINATED = 0x1, - SRP_LOGIN_RSP_MULTICHAN_MAINTAINED = 0x2, - SRPT_DEF_SG_TABLESIZE = 128, /* * An experimentally determined value that avoids that QP creation @@ -124,11 +120,18 @@ enum { MAX_SRPT_RDMA_SIZE = 1U << 24, MAX_SRPT_RSP_SIZE = 1024, + SRP_MAX_ADD_CDB_LEN = 16, + SRP_MAX_IMM_DATA_OFFSET = 80, + SRP_MAX_IMM_DATA = 8 * 1024, MIN_MAX_REQ_SIZE = 996, - DEFAULT_MAX_REQ_SIZE - = sizeof(struct srp_cmd)/*48*/ - + sizeof(struct srp_indirect_buf)/*20*/ - + 128 * sizeof(struct srp_direct_buf)/*16*/, + DEFAULT_MAX_REQ_SIZE_1 = sizeof(struct srp_cmd)/*48*/ + + SRP_MAX_ADD_CDB_LEN + + sizeof(struct srp_indirect_buf)/*20*/ + + 128 * sizeof(struct srp_direct_buf)/*16*/, + DEFAULT_MAX_REQ_SIZE_2 = SRP_MAX_IMM_DATA_OFFSET + + sizeof(struct srp_imm_buf) + SRP_MAX_IMM_DATA, + DEFAULT_MAX_REQ_SIZE = DEFAULT_MAX_REQ_SIZE_1 > DEFAULT_MAX_REQ_SIZE_2 ? + DEFAULT_MAX_REQ_SIZE_1 : DEFAULT_MAX_REQ_SIZE_2, MIN_MAX_RSP_SIZE = sizeof(struct srp_rsp)/*36*/ + 4, DEFAULT_MAX_RSP_SIZE = 256, /* leaves 220 bytes for sense data */ @@ -165,12 +168,14 @@ enum srpt_command_state { * @cqe: Completion queue element. * @buf: Pointer to the buffer. * @dma: DMA address of the buffer. + * @offset: Offset of the first byte in @buf and @dma that is actually used. * @index: Index of the I/O context in its ioctx_ring array. */ struct srpt_ioctx { struct ib_cqe cqe; void *buf; dma_addr_t dma; + uint32_t offset; uint32_t index; }; @@ -178,12 +183,14 @@ struct srpt_ioctx { * struct srpt_recv_ioctx - SRPT receive I/O context * @ioctx: See above. * @wait_list: Node for insertion in srpt_rdma_ch.cmd_wait_list. + * @byte_len: Number of bytes in @ioctx.buf. */ struct srpt_recv_ioctx { struct srpt_ioctx ioctx; struct list_head wait_list; + int byte_len; }; - + struct srpt_rw_ctx { struct rdma_rw_ctx rw; struct scatterlist *sg; @@ -194,8 +201,11 @@ struct srpt_rw_ctx { * struct srpt_send_ioctx - SRPT send I/O context * @ioctx: See above. * @ch: Channel pointer. + * @recv_ioctx: Receive I/O context associated with this send I/O context. + * Only used for processing immediate data. * @s_rw_ctx: @rw_ctxs points here if only a single rw_ctx is needed. * @rw_ctxs: RDMA read/write contexts. + * @imm_sg: Scatterlist for immediate data. * @rdma_cqe: RDMA completion queue element. * @free_list: Node in srpt_rdma_ch.free_list. * @state: I/O context state. @@ -209,10 +219,13 @@ struct srpt_rw_ctx { struct srpt_send_ioctx { struct srpt_ioctx ioctx; struct srpt_rdma_ch *ch; + struct srpt_recv_ioctx *recv_ioctx; struct srpt_rw_ctx s_rw_ctx; struct srpt_rw_ctx *rw_ctxs; + struct scatterlist imm_sg; + struct ib_cqe rdma_cqe; struct list_head free_list; enum srpt_command_state state; @@ -245,7 +258,10 @@ enum rdma_ch_state { * struct srpt_rdma_ch - RDMA channel * @nexus: I_T nexus this channel is associated with. * @qp: IB queue pair used for communicating over this channel. - * @cm_id: IB CM ID associated with the channel. + * @ib_cm: See below. + * @ib_cm.cm_id: IB CM ID associated with the channel. + * @rdma_cm: See below. + * @rdma_cm.cm_id: RDMA CM ID associated with the channel. * @cq: IB completion queue for this channel. * @zw_cqe: Zero-length write CQE. * @rcu: RCU head. @@ -259,12 +275,15 @@ enum rdma_ch_state { * @req_lim: request limit: maximum number of requests that may be sent * by the initiator without having received a response. * @req_lim_delta: Number of credits not yet sent back to the initiator. + * @imm_data_offset: Offset from start of SRP_CMD for immediate data. * @spinlock: Protects free_list and state. * @free_list: Head of list with free send I/O contexts. * @state: channel state. See also enum rdma_ch_state. * @using_rdma_cm: Whether the RDMA/CM or IB/CM is used for this channel. * @processing_wait_list: Whether or not cmd_wait_list is being processed. + * @rsp_buf_cache: kmem_cache for @ioctx_ring. * @ioctx_ring: Send ring. + * @req_buf_cache: kmem_cache for @ioctx_recv_ring. * @ioctx_recv_ring: Receive I/O context ring. * @list: Node in srpt_nexus.ch_list. * @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This @@ -297,10 +316,13 @@ struct srpt_rdma_ch { int max_ti_iu_len; atomic_t req_lim; atomic_t req_lim_delta; + u16 imm_data_offset; spinlock_t spinlock; struct list_head free_list; enum rdma_ch_state state; + struct kmem_cache *rsp_buf_cache; struct srpt_send_ioctx **ioctx_ring; + struct kmem_cache *req_buf_cache; struct srpt_recv_ioctx **ioctx_recv_ring; struct list_head list; struct list_head cmd_wait_list; @@ -395,6 +417,7 @@ struct srpt_port { * @srq_size: SRQ size. * @sdev_mutex: Serializes use_srq changes. * @use_srq: Whether or not to use SRQ. + * @req_buf_cache: kmem_cache for @ioctx_ring buffers. * @ioctx_ring: Per-HCA SRQ. * @event_handler: Per-HCA asynchronous IB event handler. * @list: Node in srpt_dev_list. @@ -409,6 +432,7 @@ struct srpt_device { int srq_size; struct mutex sdev_mutex; bool use_srq; + struct kmem_cache *req_buf_cache; struct srpt_recv_ioctx **ioctx_ring; struct ib_event_handler event_handler; struct list_head list; diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 1167ff0416cf..567221cca13c 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -55,8 +55,6 @@ #include "amd_iommu_types.h" #include "irq_remapping.h" -#define AMD_IOMMU_MAPPING_ERROR 0 - #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) #define LOOP_TIMEOUT 100000 @@ -2186,7 +2184,7 @@ static int amd_iommu_add_device(struct device *dev) dev_name(dev)); iommu_ignore_device(dev); - dev->dma_ops = &dma_direct_ops; + dev->dma_ops = NULL; goto out; } init_iommu_group(dev); @@ -2339,7 +2337,7 @@ static dma_addr_t __map_single(struct device *dev, paddr &= PAGE_MASK; address = dma_ops_alloc_iova(dev, dma_dom, pages, dma_mask); - if (address == AMD_IOMMU_MAPPING_ERROR) + if (!address) goto out; prot = dir2prot(direction); @@ -2376,7 +2374,7 @@ out_unmap: dma_ops_free_iova(dma_dom, address, pages); - return AMD_IOMMU_MAPPING_ERROR; + return DMA_MAPPING_ERROR; } /* @@ -2427,7 +2425,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page, if (PTR_ERR(domain) == -EINVAL) return (dma_addr_t)paddr; else if (IS_ERR(domain)) - return AMD_IOMMU_MAPPING_ERROR; + return DMA_MAPPING_ERROR; dma_mask = *dev->dma_mask; dma_dom = to_dma_ops_domain(domain); @@ -2504,7 +2502,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, npages = sg_num_pages(dev, sglist, nelems); address = dma_ops_alloc_iova(dev, dma_dom, npages, dma_mask); - if (address == AMD_IOMMU_MAPPING_ERROR) + if (address == DMA_MAPPING_ERROR) goto out_err; prot = dir2prot(direction); @@ -2627,7 +2625,7 @@ static void *alloc_coherent(struct device *dev, size_t size, *dma_addr = __map_single(dev, dma_dom, page_to_phys(page), size, DMA_BIDIRECTIONAL, dma_mask); - if (*dma_addr == AMD_IOMMU_MAPPING_ERROR) + if (*dma_addr == DMA_MAPPING_ERROR) goto out_free; return page_address(page); @@ -2678,11 +2676,6 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask) return check_device(dev); } -static int amd_iommu_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == AMD_IOMMU_MAPPING_ERROR; -} - static const struct dma_map_ops amd_iommu_dma_ops = { .alloc = alloc_coherent, .free = free_coherent, @@ -2691,7 +2684,6 @@ static const struct dma_map_ops amd_iommu_dma_ops = { .map_sg = map_sg, .unmap_sg = unmap_sg, .dma_supported = amd_iommu_dma_supported, - .mapping_error = amd_iommu_mapping_error, }; static int init_reserved_iova_ranges(void) @@ -2778,17 +2770,6 @@ int __init amd_iommu_init_dma_ops(void) swiotlb = (iommu_pass_through || sme_me_mask) ? 1 : 0; iommu_detected = 1; - /* - * In case we don't initialize SWIOTLB (actually the common case - * when AMD IOMMU is enabled and SME is not active), make sure there - * are global dma_ops set as a fall-back for devices not handled by - * this driver (for example non-PCI devices). When SME is active, - * make sure that swiotlb variable remains set so the global dma_ops - * continue to be SWIOTLB. - */ - if (!swiotlb) - dma_ops = &dma_direct_ops; - if (amd_iommu_unmap_flush) pr_info("AMD-Vi: IO/TLB flush on unmap enabled\n"); else diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index d1b04753b204..60c7e9e9901e 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -32,8 +32,6 @@ #include <linux/scatterlist.h> #include <linux/vmalloc.h> -#define IOMMU_MAPPING_ERROR 0 - struct iommu_dma_msi_page { struct list_head list; dma_addr_t iova; @@ -523,7 +521,7 @@ void iommu_dma_free(struct device *dev, struct page **pages, size_t size, { __iommu_dma_unmap(iommu_get_dma_domain(dev), *handle, size); __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT); - *handle = IOMMU_MAPPING_ERROR; + *handle = DMA_MAPPING_ERROR; } /** @@ -556,7 +554,7 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, dma_addr_t iova; unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap; - *handle = IOMMU_MAPPING_ERROR; + *handle = DMA_MAPPING_ERROR; min_size = alloc_sizes & -alloc_sizes; if (min_size < PAGE_SIZE) { @@ -649,11 +647,11 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev); if (!iova) - return IOMMU_MAPPING_ERROR; + return DMA_MAPPING_ERROR; if (iommu_map(domain, iova, phys - iova_off, size, prot)) { iommu_dma_free_iova(cookie, iova, size); - return IOMMU_MAPPING_ERROR; + return DMA_MAPPING_ERROR; } return iova + iova_off; } @@ -694,7 +692,7 @@ static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, s->offset += s_iova_off; s->length = s_length; - sg_dma_address(s) = IOMMU_MAPPING_ERROR; + sg_dma_address(s) = DMA_MAPPING_ERROR; sg_dma_len(s) = 0; /* @@ -737,11 +735,11 @@ static void __invalidate_sg(struct scatterlist *sg, int nents) int i; for_each_sg(sg, s, nents, i) { - if (sg_dma_address(s) != IOMMU_MAPPING_ERROR) + if (sg_dma_address(s) != DMA_MAPPING_ERROR) s->offset += sg_dma_address(s); if (sg_dma_len(s)) s->length = sg_dma_len(s); - sg_dma_address(s) = IOMMU_MAPPING_ERROR; + sg_dma_address(s) = DMA_MAPPING_ERROR; sg_dma_len(s) = 0; } } @@ -858,11 +856,6 @@ void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, __iommu_dma_unmap(iommu_get_dma_domain(dev), handle, size); } -int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == IOMMU_MAPPING_ERROR; -} - static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, phys_addr_t msi_addr, struct iommu_domain *domain) { @@ -882,7 +875,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, return NULL; iova = __iommu_dma_map(dev, msi_addr, size, prot, domain); - if (iommu_dma_mapping_error(dev, iova)) + if (iova == DMA_MAPPING_ERROR) goto out_free_page; INIT_LIST_HEAD(&msi_page->list); diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 41a4b8808802..0ad67d65bbce 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3597,9 +3597,11 @@ static int iommu_no_mapping(struct device *dev) return 0; } -static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, - size_t size, int dir, u64 dma_mask) +static dma_addr_t __intel_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, int dir, + u64 dma_mask) { + phys_addr_t paddr = page_to_phys(page) + offset; struct dmar_domain *domain; phys_addr_t start_paddr; unsigned long iova_pfn; @@ -3615,7 +3617,7 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, domain = get_valid_domain_for_dev(dev); if (!domain) - return 0; + return DMA_MAPPING_ERROR; iommu = domain_get_iommu(domain); size = aligned_nrpages(paddr, size); @@ -3653,7 +3655,7 @@ error: free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size)); pr_err("Device %s request: %zx@%llx dir %d --- failed\n", dev_name(dev), size, (unsigned long long)paddr, dir); - return 0; + return DMA_MAPPING_ERROR; } static dma_addr_t intel_map_page(struct device *dev, struct page *page, @@ -3661,8 +3663,7 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page, enum dma_data_direction dir, unsigned long attrs) { - return __intel_map_single(dev, page_to_phys(page) + offset, size, - dir, *dev->dma_mask); + return __intel_map_page(dev, page, offset, size, dir, *dev->dma_mask); } static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) @@ -3753,10 +3754,9 @@ static void *intel_alloc_coherent(struct device *dev, size_t size, return NULL; memset(page_address(page), 0, size); - *dma_handle = __intel_map_single(dev, page_to_phys(page), size, - DMA_BIDIRECTIONAL, - dev->coherent_dma_mask); - if (*dma_handle) + *dma_handle = __intel_map_page(dev, page, 0, size, DMA_BIDIRECTIONAL, + dev->coherent_dma_mask); + if (*dma_handle != DMA_MAPPING_ERROR) return page_address(page); if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) __free_pages(page, order); @@ -3865,11 +3865,6 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele return nelems; } -static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return !dma_addr; -} - static const struct dma_map_ops intel_dma_ops = { .alloc = intel_alloc_coherent, .free = intel_free_coherent, @@ -3877,7 +3872,6 @@ static const struct dma_map_ops intel_dma_ops = { .unmap_sg = intel_unmap_sg, .map_page = intel_map_page, .unmap_page = intel_unmap_page, - .mapping_error = intel_mapping_error, .dma_supported = dma_direct_supported, }; diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index efb976a863d2..5f82036fe322 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -389,7 +389,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) goto err_dev; } - tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node, NULL); + tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node); if (!tqueue) { ret = -ENOMEM; goto err_disk; @@ -974,7 +974,7 @@ static int nvm_get_bb_meta(struct nvm_dev *dev, sector_t slba, struct ppa_addr ppa; u8 *blks; int ch, lun, nr_blks; - int ret; + int ret = 0; ppa.ppa = slba; ppa = dev_to_generic_addr(dev, ppa); @@ -1140,30 +1140,33 @@ EXPORT_SYMBOL(nvm_alloc_dev); int nvm_register(struct nvm_dev *dev) { - int ret; + int ret, exp_pool_size; if (!dev->q || !dev->ops) return -EINVAL; - dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist"); + ret = nvm_init(dev); + if (ret) + return ret; + + exp_pool_size = max_t(int, PAGE_SIZE, + (NVM_MAX_VLBA * (sizeof(u64) + dev->geo.sos))); + exp_pool_size = round_up(exp_pool_size, PAGE_SIZE); + + dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist", + exp_pool_size); if (!dev->dma_pool) { pr_err("nvm: could not create dma pool\n"); + nvm_free(dev); return -ENOMEM; } - ret = nvm_init(dev); - if (ret) - goto err_init; - /* register device with a supported media manager */ down_write(&nvm_lock); list_add(&dev->devices, &nvm_devices); up_write(&nvm_lock); return 0; -err_init: - dev->ops->destroy_dma_pool(dev->dma_pool); - return ret; } EXPORT_SYMBOL(nvm_register); diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 6944aac43b01..1ff165351180 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -250,8 +250,8 @@ int pblk_alloc_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd) if (rqd->nr_ppas == 1) return 0; - rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size; - rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size; + rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size(pblk); + rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size(pblk); return 0; } @@ -376,7 +376,7 @@ void pblk_write_should_kick(struct pblk *pblk) { unsigned int secs_avail = pblk_rb_read_count(&pblk->rwb); - if (secs_avail >= pblk->min_write_pgs) + if (secs_avail >= pblk->min_write_pgs_data) pblk_write_kick(pblk); } @@ -407,7 +407,9 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line) struct pblk_line_meta *lm = &pblk->lm; struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct list_head *move_list = NULL; - int vsc = le32_to_cpu(*line->vsc); + int packed_meta = (le32_to_cpu(*line->vsc) / pblk->min_write_pgs_data) + * (pblk->min_write_pgs - pblk->min_write_pgs_data); + int vsc = le32_to_cpu(*line->vsc) + packed_meta; lockdep_assert_held(&line->lock); @@ -531,7 +533,7 @@ void pblk_check_chunk_state_update(struct pblk *pblk, struct nvm_rq *rqd) if (caddr == 0) trace_pblk_chunk_state(pblk_disk_name(pblk), ppa, NVM_CHK_ST_OPEN); - else if (caddr == chunk->cnlb) + else if (caddr == (chunk->cnlb - 1)) trace_pblk_chunk_state(pblk_disk_name(pblk), ppa, NVM_CHK_ST_CLOSED); } @@ -620,12 +622,15 @@ out: } int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail, - unsigned long secs_to_flush) + unsigned long secs_to_flush, bool skip_meta) { int max = pblk->sec_per_write; int min = pblk->min_write_pgs; int secs_to_sync = 0; + if (skip_meta && pblk->min_write_pgs_data != pblk->min_write_pgs) + min = max = pblk->min_write_pgs_data; + if (secs_avail >= max) secs_to_sync = max; else if (secs_avail >= min) @@ -796,10 +801,11 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line, rqd.is_seq = 1; for (i = 0; i < lm->smeta_sec; i++, paddr++) { - struct pblk_sec_meta *meta_list = rqd.meta_list; + struct pblk_sec_meta *meta = pblk_get_meta(pblk, + rqd.meta_list, i); rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); - meta_list[i].lba = lba_list[paddr] = addr_empty; + meta->lba = lba_list[paddr] = addr_empty; } ret = pblk_submit_io_sync_sem(pblk, &rqd); @@ -845,13 +851,13 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line, if (!meta_list) return -ENOMEM; - ppa_list = meta_list + pblk_dma_meta_size; - dma_ppa_list = dma_meta_list + pblk_dma_meta_size; + ppa_list = meta_list + pblk_dma_meta_size(pblk); + dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk); next_rq: memset(&rqd, 0, sizeof(struct nvm_rq)); - rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); + rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false); rq_len = rq_ppas * geo->csecs; bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len, @@ -1276,6 +1282,7 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line) return 0; } +/* Line allocations in the recovery path are always single threaded */ int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line) { struct pblk_line_mgmt *l_mg = &pblk->l_mg; @@ -1295,15 +1302,22 @@ int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line) ret = pblk_line_alloc_bitmaps(pblk, line); if (ret) - return ret; + goto fail; if (!pblk_line_init_bb(pblk, line, 0)) { - list_add(&line->list, &l_mg->free_list); - return -EINTR; + ret = -EINTR; + goto fail; } pblk_rl_free_lines_dec(&pblk->rl, line, true); return 0; + +fail: + spin_lock(&l_mg->free_lock); + list_add(&line->list, &l_mg->free_list); + spin_unlock(&l_mg->free_lock); + + return ret; } void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line) @@ -2160,3 +2174,38 @@ void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas, } spin_unlock(&pblk->trans_lock); } + +void *pblk_get_meta_for_writes(struct pblk *pblk, struct nvm_rq *rqd) +{ + void *buffer; + + if (pblk_is_oob_meta_supported(pblk)) { + /* Just use OOB metadata buffer as always */ + buffer = rqd->meta_list; + } else { + /* We need to reuse last page of request (packed metadata) + * in similar way as traditional oob metadata + */ + buffer = page_to_virt( + rqd->bio->bi_io_vec[rqd->bio->bi_vcnt - 1].bv_page); + } + + return buffer; +} + +void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd) +{ + void *meta_list = rqd->meta_list; + void *page; + int i = 0; + + if (pblk_is_oob_meta_supported(pblk)) + return; + + page = page_to_virt(rqd->bio->bi_io_vec[rqd->bio->bi_vcnt - 1].bv_page); + /* We need to fill oob meta buffer with data from packed metadata */ + for (; i < rqd->nr_ppas; i++) + memcpy(pblk_get_meta(pblk, meta_list, i), + page + (i * sizeof(struct pblk_sec_meta)), + sizeof(struct pblk_sec_meta)); +} diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 13822594647c..f9a3e47b6a93 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -207,9 +207,6 @@ static int pblk_rwb_init(struct pblk *pblk) return pblk_rb_init(&pblk->rwb, buffer_size, threshold, geo->csecs); } -/* Minimum pages needed within a lun */ -#define ADDR_POOL_SIZE 64 - static int pblk_set_addrf_12(struct pblk *pblk, struct nvm_geo *geo, struct nvm_addrf_12 *dst) { @@ -350,23 +347,19 @@ fail_destroy_ws: static int pblk_get_global_caches(void) { - int ret; + int ret = 0; mutex_lock(&pblk_caches.mutex); - if (kref_read(&pblk_caches.kref) > 0) { - kref_get(&pblk_caches.kref); - mutex_unlock(&pblk_caches.mutex); - return 0; - } + if (kref_get_unless_zero(&pblk_caches.kref)) + goto out; ret = pblk_create_global_caches(); - if (!ret) - kref_get(&pblk_caches.kref); + kref_init(&pblk_caches.kref); +out: mutex_unlock(&pblk_caches.mutex); - return ret; } @@ -406,12 +399,45 @@ static int pblk_core_init(struct pblk *pblk) pblk->nr_flush_rst = 0; pblk->min_write_pgs = geo->ws_opt; + pblk->min_write_pgs_data = pblk->min_write_pgs; max_write_ppas = pblk->min_write_pgs * geo->all_luns; pblk->max_write_pgs = min_t(int, max_write_ppas, NVM_MAX_VLBA); pblk->max_write_pgs = min_t(int, pblk->max_write_pgs, queue_max_hw_sectors(dev->q) / (geo->csecs >> SECTOR_SHIFT)); pblk_set_sec_per_write(pblk, pblk->min_write_pgs); + pblk->oob_meta_size = geo->sos; + if (!pblk_is_oob_meta_supported(pblk)) { + /* For drives which does not have OOB metadata feature + * in order to support recovery feature we need to use + * so called packed metadata. Packed metada will store + * the same information as OOB metadata (l2p table mapping, + * but in the form of the single page at the end of + * every write request. + */ + if (pblk->min_write_pgs + * sizeof(struct pblk_sec_meta) > PAGE_SIZE) { + /* We want to keep all the packed metadata on single + * page per write requests. So we need to ensure that + * it will fit. + * + * This is more like sanity check, since there is + * no device with such a big minimal write size + * (above 1 metabytes). + */ + pblk_err(pblk, "Not supported min write size\n"); + return -EINVAL; + } + /* For packed meta approach we do some simplification. + * On read path we always issue requests which size + * equal to max_write_pgs, with all pages filled with + * user payload except of last one page which will be + * filled with packed metadata. + */ + pblk->max_write_pgs = pblk->min_write_pgs; + pblk->min_write_pgs_data = pblk->min_write_pgs - 1; + } + pblk->pad_dist = kcalloc(pblk->min_write_pgs - 1, sizeof(atomic64_t), GFP_KERNEL); if (!pblk->pad_dist) @@ -635,40 +661,61 @@ static unsigned int calc_emeta_len(struct pblk *pblk) return (lm->emeta_len[1] + lm->emeta_len[2] + lm->emeta_len[3]); } -static void pblk_set_provision(struct pblk *pblk, long nr_free_blks) +static int pblk_set_provision(struct pblk *pblk, int nr_free_chks) { struct nvm_tgt_dev *dev = pblk->dev; struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_meta *lm = &pblk->lm; struct nvm_geo *geo = &dev->geo; sector_t provisioned; - int sec_meta, blk_meta; + int sec_meta, blk_meta, clba; + int minimum; if (geo->op == NVM_TARGET_DEFAULT_OP) pblk->op = PBLK_DEFAULT_OP; else pblk->op = geo->op; - provisioned = nr_free_blks; + minimum = pblk_get_min_chks(pblk); + provisioned = nr_free_chks; provisioned *= (100 - pblk->op); sector_div(provisioned, 100); - pblk->op_blks = nr_free_blks - provisioned; + if ((nr_free_chks - provisioned) < minimum) { + if (geo->op != NVM_TARGET_DEFAULT_OP) { + pblk_err(pblk, "OP too small to create a sane instance\n"); + return -EINTR; + } + + /* If the user did not specify an OP value, and PBLK_DEFAULT_OP + * is not enough, calculate and set sane value + */ + + provisioned = nr_free_chks - minimum; + pblk->op = (100 * minimum) / nr_free_chks; + pblk_info(pblk, "Default OP insufficient, adjusting OP to %d\n", + pblk->op); + } + + pblk->op_blks = nr_free_chks - provisioned; /* Internally pblk manages all free blocks, but all calculations based * on user capacity consider only provisioned blocks */ - pblk->rl.total_blocks = nr_free_blks; - pblk->rl.nr_secs = nr_free_blks * geo->clba; + pblk->rl.total_blocks = nr_free_chks; + pblk->rl.nr_secs = nr_free_chks * geo->clba; /* Consider sectors used for metadata */ sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines; blk_meta = DIV_ROUND_UP(sec_meta, geo->clba); - pblk->capacity = (provisioned - blk_meta) * geo->clba; + clba = (geo->clba / pblk->min_write_pgs) * pblk->min_write_pgs_data; + pblk->capacity = (provisioned - blk_meta) * clba; - atomic_set(&pblk->rl.free_blocks, nr_free_blks); - atomic_set(&pblk->rl.free_user_blocks, nr_free_blks); + atomic_set(&pblk->rl.free_blocks, nr_free_chks); + atomic_set(&pblk->rl.free_user_blocks, nr_free_chks); + + return 0; } static int pblk_setup_line_meta_chk(struct pblk *pblk, struct pblk_line *line, @@ -984,7 +1031,7 @@ static int pblk_lines_init(struct pblk *pblk) struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line *line; void *chunk_meta; - long nr_free_chks = 0; + int nr_free_chks = 0; int i, ret; ret = pblk_line_meta_init(pblk); @@ -1031,7 +1078,9 @@ static int pblk_lines_init(struct pblk *pblk) goto fail_free_lines; } - pblk_set_provision(pblk, nr_free_chks); + ret = pblk_set_provision(pblk, nr_free_chks); + if (ret) + goto fail_free_lines; vfree(chunk_meta); return 0; @@ -1041,7 +1090,7 @@ fail_free_lines: pblk_line_meta_free(l_mg, &pblk->lines[i]); kfree(pblk->lines); fail_free_chunk_meta: - kfree(chunk_meta); + vfree(chunk_meta); fail_free_luns: kfree(pblk->luns); fail_free_meta: @@ -1154,6 +1203,12 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, return ERR_PTR(-EINVAL); } + if (geo->ext) { + pblk_err(pblk, "extended metadata not supported\n"); + kfree(pblk); + return ERR_PTR(-EINVAL); + } + spin_lock_init(&pblk->resubmit_lock); spin_lock_init(&pblk->trans_lock); spin_lock_init(&pblk->lock); diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c index 6dcbd44e3acb..79df583ea709 100644 --- a/drivers/lightnvm/pblk-map.c +++ b/drivers/lightnvm/pblk-map.c @@ -22,7 +22,7 @@ static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry, struct ppa_addr *ppa_list, unsigned long *lun_bitmap, - struct pblk_sec_meta *meta_list, + void *meta_list, unsigned int valid_secs) { struct pblk_line *line = pblk_line_get_data(pblk); @@ -33,6 +33,9 @@ static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry, int nr_secs = pblk->min_write_pgs; int i; + if (!line) + return -ENOSPC; + if (pblk_line_is_full(line)) { struct pblk_line *prev_line = line; @@ -42,8 +45,11 @@ static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry, line = pblk_line_replace_data(pblk); pblk_line_close_meta(pblk, prev_line); - if (!line) - return -EINTR; + if (!line) { + pblk_pipeline_stop(pblk); + return -ENOSPC; + } + } emeta = line->emeta; @@ -52,6 +58,7 @@ static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry, paddr = pblk_alloc_page(pblk, line, nr_secs); for (i = 0; i < nr_secs; i++, paddr++) { + struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i); __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); /* ppa to be sent to the device */ @@ -68,14 +75,15 @@ static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry, kref_get(&line->ref); w_ctx = pblk_rb_w_ctx(&pblk->rwb, sentry + i); w_ctx->ppa = ppa_list[i]; - meta_list[i].lba = cpu_to_le64(w_ctx->lba); + meta->lba = cpu_to_le64(w_ctx->lba); lba_list[paddr] = cpu_to_le64(w_ctx->lba); if (lba_list[paddr] != addr_empty) line->nr_valid_lbas++; else atomic64_inc(&pblk->pad_wa); } else { - lba_list[paddr] = meta_list[i].lba = addr_empty; + lba_list[paddr] = addr_empty; + meta->lba = addr_empty; __pblk_map_invalidate(pblk, line, paddr); } } @@ -84,50 +92,57 @@ static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry, return 0; } -void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, +int pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, unsigned long *lun_bitmap, unsigned int valid_secs, unsigned int off) { - struct pblk_sec_meta *meta_list = rqd->meta_list; + void *meta_list = pblk_get_meta_for_writes(pblk, rqd); + void *meta_buffer; struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); unsigned int map_secs; int min = pblk->min_write_pgs; int i; + int ret; for (i = off; i < rqd->nr_ppas; i += min) { map_secs = (i + min > valid_secs) ? (valid_secs % min) : min; - if (pblk_map_page_data(pblk, sentry + i, &ppa_list[i], - lun_bitmap, &meta_list[i], map_secs)) { - bio_put(rqd->bio); - pblk_free_rqd(pblk, rqd, PBLK_WRITE); - pblk_pipeline_stop(pblk); - } + meta_buffer = pblk_get_meta(pblk, meta_list, i); + + ret = pblk_map_page_data(pblk, sentry + i, &ppa_list[i], + lun_bitmap, meta_buffer, map_secs); + if (ret) + return ret; } + + return 0; } /* only if erase_ppa is set, acquire erase semaphore */ -void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, +int pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, unsigned long *lun_bitmap, unsigned int valid_secs, struct ppa_addr *erase_ppa) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; struct pblk_line_meta *lm = &pblk->lm; - struct pblk_sec_meta *meta_list = rqd->meta_list; + void *meta_list = pblk_get_meta_for_writes(pblk, rqd); + void *meta_buffer; struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); struct pblk_line *e_line, *d_line; unsigned int map_secs; int min = pblk->min_write_pgs; int i, erase_lun; + int ret; + for (i = 0; i < rqd->nr_ppas; i += min) { map_secs = (i + min > valid_secs) ? (valid_secs % min) : min; - if (pblk_map_page_data(pblk, sentry + i, &ppa_list[i], - lun_bitmap, &meta_list[i], map_secs)) { - bio_put(rqd->bio); - pblk_free_rqd(pblk, rqd, PBLK_WRITE); - pblk_pipeline_stop(pblk); - } + meta_buffer = pblk_get_meta(pblk, meta_list, i); + + ret = pblk_map_page_data(pblk, sentry + i, &ppa_list[i], + lun_bitmap, meta_buffer, map_secs); + if (ret) + return ret; erase_lun = pblk_ppa_to_pos(geo, ppa_list[i]); @@ -163,7 +178,7 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, */ e_line = pblk_line_get_erase(pblk); if (!e_line) - return; + return -ENOSPC; /* Erase blocks that are bad in this line but might not be in next */ if (unlikely(pblk_ppa_empty(*erase_ppa)) && @@ -174,7 +189,7 @@ retry: bit = find_next_bit(d_line->blk_bitmap, lm->blk_per_line, bit + 1); if (bit >= lm->blk_per_line) - return; + return 0; spin_lock(&e_line->lock); if (test_bit(bit, e_line->erase_bitmap)) { @@ -188,4 +203,6 @@ retry: *erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */ erase_ppa->a.blk = e_line->id; } + + return 0; } diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index b1f4b51783f4..d4ca8c64ee0f 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -147,7 +147,7 @@ int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int threshold, /* * Initialize rate-limiter, which controls access to the write buffer - * but user and GC I/O + * by user and GC I/O */ pblk_rl_init(&pblk->rl, rb->nr_entries); @@ -552,6 +552,9 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd, to_read = count; } + /* Add space for packed metadata if in use*/ + pad += (pblk->min_write_pgs - pblk->min_write_pgs_data); + c_ctx->sentry = pos; c_ctx->nr_valid = to_read; c_ctx->nr_padded = pad; diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index 9fba614adeeb..3789185144da 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -43,7 +43,7 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio, sector_t blba, unsigned long *read_bitmap) { - struct pblk_sec_meta *meta_list = rqd->meta_list; + void *meta_list = rqd->meta_list; struct ppa_addr ppas[NVM_MAX_VLBA]; int nr_secs = rqd->nr_ppas; bool advanced_bio = false; @@ -53,12 +53,15 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, for (i = 0; i < nr_secs; i++) { struct ppa_addr p = ppas[i]; + struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i); sector_t lba = blba + i; retry: if (pblk_ppa_empty(p)) { + __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); + WARN_ON(test_and_set_bit(i, read_bitmap)); - meta_list[i].lba = cpu_to_le64(ADDR_EMPTY); + meta->lba = addr_empty; if (unlikely(!advanced_bio)) { bio_advance(bio, (i) * PBLK_EXPOSED_PAGE_SIZE); @@ -78,7 +81,7 @@ retry: goto retry; } WARN_ON(test_and_set_bit(i, read_bitmap)); - meta_list[i].lba = cpu_to_le64(lba); + meta->lba = cpu_to_le64(lba); advanced_bio = true; #ifdef CONFIG_NVM_PBLK_DEBUG atomic_long_inc(&pblk->cache_reads); @@ -105,12 +108,16 @@ next: static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd, sector_t blba) { - struct pblk_sec_meta *meta_lba_list = rqd->meta_list; + void *meta_list = rqd->meta_list; int nr_lbas = rqd->nr_ppas; int i; + if (!pblk_is_oob_meta_supported(pblk)) + return; + for (i = 0; i < nr_lbas; i++) { - u64 lba = le64_to_cpu(meta_lba_list[i].lba); + struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i); + u64 lba = le64_to_cpu(meta->lba); if (lba == ADDR_EMPTY) continue; @@ -134,17 +141,22 @@ static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd, static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd, u64 *lba_list, int nr_lbas) { - struct pblk_sec_meta *meta_lba_list = rqd->meta_list; + void *meta_lba_list = rqd->meta_list; int i, j; + if (!pblk_is_oob_meta_supported(pblk)) + return; + for (i = 0, j = 0; i < nr_lbas; i++) { + struct pblk_sec_meta *meta = pblk_get_meta(pblk, + meta_lba_list, j); u64 lba = lba_list[i]; u64 meta_lba; if (lba == ADDR_EMPTY) continue; - meta_lba = le64_to_cpu(meta_lba_list[j].lba); + meta_lba = le64_to_cpu(meta->lba); if (lba != meta_lba) { #ifdef CONFIG_NVM_PBLK_DEBUG @@ -216,15 +228,15 @@ static void pblk_end_partial_read(struct nvm_rq *rqd) struct pblk *pblk = rqd->private; struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); struct pblk_pr_ctx *pr_ctx = r_ctx->private; + struct pblk_sec_meta *meta; struct bio *new_bio = rqd->bio; struct bio *bio = pr_ctx->orig_bio; struct bio_vec src_bv, dst_bv; - struct pblk_sec_meta *meta_list = rqd->meta_list; + void *meta_list = rqd->meta_list; int bio_init_idx = pr_ctx->bio_init_idx; unsigned long *read_bitmap = pr_ctx->bitmap; int nr_secs = pr_ctx->orig_nr_secs; int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs); - __le64 *lba_list_mem, *lba_list_media; void *src_p, *dst_p; int hole, i; @@ -237,13 +249,10 @@ static void pblk_end_partial_read(struct nvm_rq *rqd) rqd->ppa_list[0] = ppa; } - /* Re-use allocated memory for intermediate lbas */ - lba_list_mem = (((void *)rqd->ppa_list) + pblk_dma_ppa_size); - lba_list_media = (((void *)rqd->ppa_list) + 2 * pblk_dma_ppa_size); - for (i = 0; i < nr_secs; i++) { - lba_list_media[i] = meta_list[i].lba; - meta_list[i].lba = lba_list_mem[i]; + meta = pblk_get_meta(pblk, meta_list, i); + pr_ctx->lba_list_media[i] = le64_to_cpu(meta->lba); + meta->lba = cpu_to_le64(pr_ctx->lba_list_mem[i]); } /* Fill the holes in the original bio */ @@ -255,7 +264,8 @@ static void pblk_end_partial_read(struct nvm_rq *rqd) line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]); kref_put(&line->ref, pblk_line_put); - meta_list[hole].lba = lba_list_media[i]; + meta = pblk_get_meta(pblk, meta_list, hole); + meta->lba = cpu_to_le64(pr_ctx->lba_list_media[i]); src_bv = new_bio->bi_io_vec[i++]; dst_bv = bio->bi_io_vec[bio_init_idx + hole]; @@ -291,17 +301,13 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd, unsigned long *read_bitmap, int nr_holes) { - struct pblk_sec_meta *meta_list = rqd->meta_list; + void *meta_list = rqd->meta_list; struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); struct pblk_pr_ctx *pr_ctx; struct bio *new_bio, *bio = r_ctx->private; - __le64 *lba_list_mem; int nr_secs = rqd->nr_ppas; int i; - /* Re-use allocated memory for intermediate lbas */ - lba_list_mem = (((void *)rqd->ppa_list) + pblk_dma_ppa_size); - new_bio = bio_alloc(GFP_KERNEL, nr_holes); if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes)) @@ -312,12 +318,15 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd, goto fail_free_pages; } - pr_ctx = kmalloc(sizeof(struct pblk_pr_ctx), GFP_KERNEL); + pr_ctx = kzalloc(sizeof(struct pblk_pr_ctx), GFP_KERNEL); if (!pr_ctx) goto fail_free_pages; - for (i = 0; i < nr_secs; i++) - lba_list_mem[i] = meta_list[i].lba; + for (i = 0; i < nr_secs; i++) { + struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i); + + pr_ctx->lba_list_mem[i] = le64_to_cpu(meta->lba); + } new_bio->bi_iter.bi_sector = 0; /* internal bio */ bio_set_op_attrs(new_bio, REQ_OP_READ, 0); @@ -325,7 +334,6 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd, rqd->bio = new_bio; rqd->nr_ppas = nr_holes; - pr_ctx->ppa_ptr = NULL; pr_ctx->orig_bio = bio; bitmap_copy(pr_ctx->bitmap, read_bitmap, NVM_MAX_VLBA); pr_ctx->bio_init_idx = bio_init_idx; @@ -383,7 +391,7 @@ err: static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio, sector_t lba, unsigned long *read_bitmap) { - struct pblk_sec_meta *meta_list = rqd->meta_list; + struct pblk_sec_meta *meta = pblk_get_meta(pblk, rqd->meta_list, 0); struct ppa_addr ppa; pblk_lookup_l2p_seq(pblk, &ppa, lba, 1); @@ -394,8 +402,10 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio, retry: if (pblk_ppa_empty(ppa)) { + __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); + WARN_ON(test_and_set_bit(0, read_bitmap)); - meta_list[0].lba = cpu_to_le64(ADDR_EMPTY); + meta->lba = addr_empty; return; } @@ -409,7 +419,7 @@ retry: } WARN_ON(test_and_set_bit(0, read_bitmap)); - meta_list[0].lba = cpu_to_le64(lba); + meta->lba = cpu_to_le64(lba); #ifdef CONFIG_NVM_PBLK_DEBUG atomic_long_inc(&pblk->cache_reads); diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index 5740b7509bd8..3fcf062d752c 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -13,6 +13,9 @@ * General Public License for more details. * * pblk-recovery.c - pblk's recovery path + * + * The L2P recovery path is single threaded as the L2P table is updated in order + * following the line sequence ID. */ #include "pblk.h" @@ -124,7 +127,7 @@ static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line) struct pblk_recov_alloc { struct ppa_addr *ppa_list; - struct pblk_sec_meta *meta_list; + void *meta_list; struct nvm_rq *rqd; void *data; dma_addr_t dma_ppa_list; @@ -158,7 +161,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line, { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - struct pblk_sec_meta *meta_list; + void *meta_list; struct pblk_pad_rq *pad_rq; struct nvm_rq *rqd; struct bio *bio; @@ -188,7 +191,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line, kref_init(&pad_rq->ref); next_pad_rq: - rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); + rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false); if (rq_ppas < pblk->min_write_pgs) { pblk_err(pblk, "corrupted pad line %d\n", line->id); goto fail_free_pad; @@ -237,12 +240,15 @@ next_pad_rq: for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) { struct ppa_addr dev_ppa; + struct pblk_sec_meta *meta; __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); pblk_map_invalidate(pblk, dev_ppa); - lba_list[w_ptr] = meta_list[i].lba = addr_empty; + lba_list[w_ptr] = addr_empty; + meta = pblk_get_meta(pblk, meta_list, i); + meta->lba = addr_empty; rqd->ppa_list[i] = dev_ppa; } } @@ -334,20 +340,21 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line, struct pblk_recov_alloc p) { struct nvm_tgt_dev *dev = pblk->dev; + struct pblk_line_meta *lm = &pblk->lm; struct nvm_geo *geo = &dev->geo; struct ppa_addr *ppa_list; - struct pblk_sec_meta *meta_list; + void *meta_list; struct nvm_rq *rqd; struct bio *bio; void *data; dma_addr_t dma_ppa_list, dma_meta_list; __le64 *lba_list; - u64 paddr = 0; + u64 paddr = pblk_line_smeta_start(pblk, line) + lm->smeta_sec; bool padded = false; int rq_ppas, rq_len; int i, j; int ret; - u64 left_ppas = pblk_sec_in_open_line(pblk, line); + u64 left_ppas = pblk_sec_in_open_line(pblk, line) - lm->smeta_sec; if (pblk_line_wp_is_unbalanced(pblk, line)) pblk_warn(pblk, "recovering unbalanced line (%d)\n", line->id); @@ -364,17 +371,19 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line, next_rq: memset(rqd, 0, pblk_g_rq_size); - rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); + rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false); if (!rq_ppas) rq_ppas = pblk->min_write_pgs; rq_len = rq_ppas * geo->csecs; +retry_rq: bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); if (IS_ERR(bio)) return PTR_ERR(bio); bio->bi_iter.bi_sector = 0; /* internal bio */ bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio_get(bio); rqd->bio = bio; rqd->opcode = NVM_OP_PREAD; @@ -387,7 +396,6 @@ next_rq: if (pblk_io_aligned(pblk, rq_ppas)) rqd->is_seq = 1; -retry_rq: for (i = 0; i < rqd->nr_ppas; ) { struct ppa_addr ppa; int pos; @@ -410,6 +418,7 @@ retry_rq: if (ret) { pblk_err(pblk, "I/O submission failed: %d\n", ret); bio_put(bio); + bio_put(bio); return ret; } @@ -421,20 +430,28 @@ retry_rq: if (padded) { pblk_log_read_err(pblk, rqd); + bio_put(bio); return -EINTR; } pad_distance = pblk_pad_distance(pblk, line); ret = pblk_recov_pad_line(pblk, line, pad_distance); - if (ret) + if (ret) { + bio_put(bio); return ret; + } padded = true; + bio_put(bio); goto retry_rq; } + pblk_get_packed_meta(pblk, rqd); + bio_put(bio); + for (i = 0; i < rqd->nr_ppas; i++) { - u64 lba = le64_to_cpu(meta_list[i].lba); + struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i); + u64 lba = le64_to_cpu(meta->lba); lba_list[paddr++] = cpu_to_le64(lba); @@ -463,7 +480,7 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line) struct nvm_geo *geo = &dev->geo; struct nvm_rq *rqd; struct ppa_addr *ppa_list; - struct pblk_sec_meta *meta_list; + void *meta_list; struct pblk_recov_alloc p; void *data; dma_addr_t dma_ppa_list, dma_meta_list; @@ -473,8 +490,8 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line) if (!meta_list) return -ENOMEM; - ppa_list = (void *)(meta_list) + pblk_dma_meta_size; - dma_ppa_list = dma_meta_list + pblk_dma_meta_size; + ppa_list = (void *)(meta_list) + pblk_dma_meta_size(pblk); + dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk); data = kcalloc(pblk->max_write_pgs, geo->csecs, GFP_KERNEL); if (!data) { @@ -804,7 +821,6 @@ next: WARN_ON_ONCE(!test_and_clear_bit(meta_line, &l_mg->meta_bitmap)); spin_unlock(&l_mg->free_lock); - pblk_line_replace_data(pblk); } else { spin_lock(&l_mg->free_lock); /* Allocate next line for preparation */ diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c index db55a1c89997..76116d5f78e4 100644 --- a/drivers/lightnvm/pblk-rl.c +++ b/drivers/lightnvm/pblk-rl.c @@ -214,11 +214,10 @@ void pblk_rl_init(struct pblk_rl *rl, int budget) struct nvm_geo *geo = &dev->geo; struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_meta *lm = &pblk->lm; - int min_blocks = lm->blk_per_line * PBLK_GC_RSV_LINE; int sec_meta, blk_meta; - unsigned int rb_windows; + /* Consider sectors used for metadata */ sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines; blk_meta = DIV_ROUND_UP(sec_meta, geo->clba); @@ -226,7 +225,7 @@ void pblk_rl_init(struct pblk_rl *rl, int budget) rl->high = pblk->op_blks - blk_meta - lm->blk_per_line; rl->high_pw = get_count_order(rl->high); - rl->rsv_blocks = min_blocks; + rl->rsv_blocks = pblk_get_min_chks(pblk); /* This will always be a power-of-2 */ rb_windows = budget / NVM_MAX_VLBA; diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c index 2d2818155aa8..7d8958df9472 100644 --- a/drivers/lightnvm/pblk-sysfs.c +++ b/drivers/lightnvm/pblk-sysfs.c @@ -479,6 +479,13 @@ static ssize_t pblk_sysfs_set_sec_per_write(struct pblk *pblk, if (kstrtouint(page, 0, &sec_per_write)) return -EINVAL; + if (!pblk_is_oob_meta_supported(pblk)) { + /* For packed metadata case it is + * not allowed to change sec_per_write. + */ + return -EINVAL; + } + if (sec_per_write < pblk->min_write_pgs || sec_per_write > pblk->max_write_pgs || sec_per_write % pblk->min_write_pgs != 0) diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index fa8726493b39..06d56deb645d 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -105,14 +105,20 @@ retry: } /* Map remaining sectors in chunk, starting from ppa */ -static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa) +static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa, + int rqd_ppas) { struct pblk_line *line; struct ppa_addr map_ppa = *ppa; + __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); + __le64 *lba_list; u64 paddr; int done = 0; + int n = 0; line = pblk_ppa_to_line(pblk, *ppa); + lba_list = emeta_to_lbas(pblk, line->emeta->buf); + spin_lock(&line->lock); while (!done) { @@ -121,10 +127,17 @@ static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa) if (!test_and_set_bit(paddr, line->map_bitmap)) line->left_msecs--; + if (n < rqd_ppas && lba_list[paddr] != addr_empty) + line->nr_valid_lbas--; + + lba_list[paddr] = addr_empty; + if (!test_and_set_bit(paddr, line->invalid_bitmap)) le32_add_cpu(line->vsc, -1); done = nvm_next_ppa_in_chk(pblk->dev, &map_ppa); + + n++; } line->w_err_gc->has_write_err = 1; @@ -148,9 +161,11 @@ static void pblk_prepare_resubmit(struct pblk *pblk, unsigned int sentry, w_ctx = &entry->w_ctx; /* Check if the lba has been overwritten */ - ppa_l2p = pblk_trans_map_get(pblk, w_ctx->lba); - if (!pblk_ppa_comp(ppa_l2p, entry->cacheline)) - w_ctx->lba = ADDR_EMPTY; + if (w_ctx->lba != ADDR_EMPTY) { + ppa_l2p = pblk_trans_map_get(pblk, w_ctx->lba); + if (!pblk_ppa_comp(ppa_l2p, entry->cacheline)) + w_ctx->lba = ADDR_EMPTY; + } /* Mark up the entry as submittable again */ flags = READ_ONCE(w_ctx->flags); @@ -200,7 +215,7 @@ static void pblk_submit_rec(struct work_struct *work) pblk_log_write_err(pblk, rqd); - pblk_map_remaining(pblk, ppa_list); + pblk_map_remaining(pblk, ppa_list, rqd->nr_ppas); pblk_queue_resubmit(pblk, c_ctx); pblk_up_rq(pblk, c_ctx->lun_bitmap); @@ -319,12 +334,13 @@ static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd, } if (likely(!e_line || !atomic_read(&e_line->left_eblks))) - pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, valid, 0); + ret = pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, + valid, 0); else - pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, + ret = pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, valid, erase_ppa); - return 0; + return ret; } static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail, @@ -332,7 +348,7 @@ static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail, { int secs_to_sync; - secs_to_sync = pblk_calc_secs(pblk, secs_avail, secs_to_flush); + secs_to_sync = pblk_calc_secs(pblk, secs_avail, secs_to_flush, true); #ifdef CONFIG_NVM_PBLK_DEBUG if ((!secs_to_sync && secs_to_flush) @@ -548,15 +564,17 @@ static void pblk_free_write_rqd(struct pblk *pblk, struct nvm_rq *rqd) c_ctx->nr_padded); } -static int pblk_submit_write(struct pblk *pblk) +static int pblk_submit_write(struct pblk *pblk, int *secs_left) { struct bio *bio; struct nvm_rq *rqd; unsigned int secs_avail, secs_to_sync, secs_to_com; - unsigned int secs_to_flush; + unsigned int secs_to_flush, packed_meta_pgs; unsigned long pos; unsigned int resubmit; + *secs_left = 0; + spin_lock(&pblk->resubmit_lock); resubmit = !list_empty(&pblk->resubmit_list); spin_unlock(&pblk->resubmit_lock); @@ -586,17 +604,17 @@ static int pblk_submit_write(struct pblk *pblk) */ secs_avail = pblk_rb_read_count(&pblk->rwb); if (!secs_avail) - return 1; + return 0; secs_to_flush = pblk_rb_flush_point_count(&pblk->rwb); - if (!secs_to_flush && secs_avail < pblk->min_write_pgs) - return 1; + if (!secs_to_flush && secs_avail < pblk->min_write_pgs_data) + return 0; secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail, secs_to_flush); if (secs_to_sync > pblk->max_write_pgs) { pblk_err(pblk, "bad buffer sync calculation\n"); - return 1; + return 0; } secs_to_com = (secs_to_sync > secs_avail) ? @@ -604,7 +622,8 @@ static int pblk_submit_write(struct pblk *pblk) pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com); } - bio = bio_alloc(GFP_KERNEL, secs_to_sync); + packed_meta_pgs = (pblk->min_write_pgs - pblk->min_write_pgs_data); + bio = bio_alloc(GFP_KERNEL, secs_to_sync + packed_meta_pgs); bio->bi_iter.bi_sector = 0; /* internal bio */ bio_set_op_attrs(bio, REQ_OP_WRITE, 0); @@ -625,6 +644,7 @@ static int pblk_submit_write(struct pblk *pblk) atomic_long_add(secs_to_sync, &pblk->sub_writes); #endif + *secs_left = 1; return 0; fail_free_bio: @@ -633,16 +653,22 @@ fail_put_bio: bio_put(bio); pblk_free_rqd(pblk, rqd, PBLK_WRITE); - return 1; + return -EINTR; } int pblk_write_ts(void *data) { struct pblk *pblk = data; + int secs_left; + int write_failure = 0; while (!kthread_should_stop()) { - if (!pblk_submit_write(pblk)) - continue; + if (!write_failure) { + write_failure = pblk_submit_write(pblk, &secs_left); + + if (secs_left) + continue; + } set_current_state(TASK_INTERRUPTIBLE); io_schedule(); } diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 02bb2e98f8a9..85e38ed62f85 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -104,7 +104,6 @@ enum { PBLK_RL_LOW = 4 }; -#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * NVM_MAX_VLBA) #define pblk_dma_ppa_size (sizeof(u64) * NVM_MAX_VLBA) /* write buffer completion context */ @@ -132,6 +131,8 @@ struct pblk_pr_ctx { unsigned int bio_init_idx; void *ppa_ptr; dma_addr_t dma_ppa_list; + __le64 lba_list_mem[NVM_MAX_VLBA]; + __le64 lba_list_media[NVM_MAX_VLBA]; }; /* Pad context */ @@ -631,7 +632,9 @@ struct pblk { int state; /* pblk line state */ int min_write_pgs; /* Minimum amount of pages required by controller */ + int min_write_pgs_data; /* Minimum amount of payload pages */ int max_write_pgs; /* Maximum amount of pages supported by controller */ + int oob_meta_size; /* Size of OOB sector metadata */ sector_t capacity; /* Device capacity when bad blocks are subtracted */ @@ -836,7 +839,7 @@ void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs); u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs); u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs); int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail, - unsigned long secs_to_flush); + unsigned long secs_to_flush, bool skip_meta); void pblk_down_rq(struct pblk *pblk, struct ppa_addr ppa, unsigned long *lun_bitmap); void pblk_down_chunk(struct pblk *pblk, struct ppa_addr ppa); @@ -860,6 +863,8 @@ void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas, u64 *lba_list, int nr_secs); void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, sector_t blba, int nr_secs); +void *pblk_get_meta_for_writes(struct pblk *pblk, struct nvm_rq *rqd); +void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd); /* * pblk user I/O write path @@ -871,10 +876,10 @@ int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq); /* * pblk map */ -void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, +int pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, unsigned long *lun_bitmap, unsigned int valid_secs, struct ppa_addr *erase_ppa); -void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, +int pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, unsigned long *lun_bitmap, unsigned int valid_secs, unsigned int off); @@ -905,7 +910,6 @@ int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta); #define PBLK_GC_MAX_READERS 8 /* Max number of outstanding GC reader jobs */ #define PBLK_GC_RQ_QD 128 /* Queue depth for inflight GC requests */ #define PBLK_GC_L_QD 4 /* Queue depth for inflight GC lines */ -#define PBLK_GC_RSV_LINE 1 /* Reserved lines for GC */ int pblk_gc_init(struct pblk *pblk); void pblk_gc_exit(struct pblk *pblk, bool graceful); @@ -1370,4 +1374,33 @@ static inline char *pblk_disk_name(struct pblk *pblk) return disk->disk_name; } + +static inline unsigned int pblk_get_min_chks(struct pblk *pblk) +{ + struct pblk_line_meta *lm = &pblk->lm; + /* In a worst-case scenario every line will have OP invalid sectors. + * We will then need a minimum of 1/OP lines to free up a single line + */ + + return DIV_ROUND_UP(100, pblk->op) * lm->blk_per_line; +} + +static inline struct pblk_sec_meta *pblk_get_meta(struct pblk *pblk, + void *meta, int index) +{ + return meta + + max_t(int, sizeof(struct pblk_sec_meta), pblk->oob_meta_size) + * index; +} + +static inline int pblk_dma_meta_size(struct pblk *pblk) +{ + return max_t(int, sizeof(struct pblk_sec_meta), pblk->oob_meta_size) + * NVM_MAX_VLBA; +} + +static inline int pblk_is_oob_meta_supported(struct pblk *pblk) +{ + return pblk->oob_meta_size >= sizeof(struct pblk_sec_meta); +} #endif /* PBLK_H_ */ diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index b61b83bbcfff..fdf75352e16a 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -627,6 +627,20 @@ struct cache_set { struct bkey gc_done; /* + * For automatical garbage collection after writeback completed, this + * varialbe is used as bit fields, + * - 0000 0001b (BCH_ENABLE_AUTO_GC): enable gc after writeback + * - 0000 0010b (BCH_DO_AUTO_GC): do gc after writeback + * This is an optimization for following write request after writeback + * finished, but read hit rate dropped due to clean data on cache is + * discarded. Unless user explicitly sets it via sysfs, it won't be + * enabled. + */ +#define BCH_ENABLE_AUTO_GC 1 +#define BCH_DO_AUTO_GC 2 + uint8_t gc_after_writeback; + + /* * The allocation code needs gc_mark in struct bucket to be correct, but * it's not while a gc is in progress. Protected by bucket_lock. */ @@ -658,7 +672,11 @@ struct cache_set { /* * A btree node on disk could have too many bsets for an iterator to fit - * on the stack - have to dynamically allocate them + * on the stack - have to dynamically allocate them. + * bch_cache_set_alloc() will make sure the pool can allocate iterators + * equipped with enough room that can host + * (sb.bucket_size / sb.block_size) + * btree_iter_sets, which is more than static MAX_BSETS. */ mempool_t fill_iter; diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 3f4211b5cd33..23cb1dc7296b 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -207,6 +207,11 @@ void bch_btree_node_read_done(struct btree *b) struct bset *i = btree_bset_first(b); struct btree_iter *iter; + /* + * c->fill_iter can allocate an iterator with more memory space + * than static MAX_BSETS. + * See the comment arount cache_set->fill_iter. + */ iter = mempool_alloc(&b->c->fill_iter, GFP_NOIO); iter->size = b->c->sb.bucket_size / b->c->sb.block_size; iter->used = 0; diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index a68d6c55783b..d1c72ef64edf 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -266,6 +266,24 @@ static inline void wake_up_gc(struct cache_set *c) wake_up(&c->gc_wait); } +static inline void force_wake_up_gc(struct cache_set *c) +{ + /* + * Garbage collection thread only works when sectors_to_gc < 0, + * calling wake_up_gc() won't start gc thread if sectors_to_gc is + * not a nagetive value. + * Therefore sectors_to_gc is set to -1 here, before waking up + * gc thread by calling wake_up_gc(). Then gc_should_run() will + * give a chance to permit gc thread to run. "Give a chance" means + * before going into gc_should_run(), there is still possibility + * that c->sectors_to_gc being set to other positive value. So + * this routine won't 100% make sure gc thread will be woken up + * to run. + */ + atomic_set(&c->sectors_to_gc, -1); + wake_up_gc(c); +} + #define MAP_DONE 0 #define MAP_CONTINUE 1 diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 8f448b9c96a1..8b123be05254 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -249,8 +249,7 @@ void bch_debug_init_cache_set(struct cache_set *c) void bch_debug_exit(void) { - if (!IS_ERR_OR_NULL(bcache_debug)) - debugfs_remove_recursive(bcache_debug); + debugfs_remove_recursive(bcache_debug); } void __init bch_debug_init(void) diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 522c7426f3a0..b2fd412715b1 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -663,7 +663,7 @@ static void journal_write_unlocked(struct closure *cl) REQ_SYNC|REQ_META|REQ_PREFLUSH|REQ_FUA); bch_bio_map(bio, w->data); - trace_bcache_journal_write(bio); + trace_bcache_journal_write(bio, w->data->keys); bio_list_add(&list, bio); SET_PTR_OFFSET(k, i, PTR_OFFSET(k, i) + sectors); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 3bf35914bb57..15070412a32e 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -311,11 +311,11 @@ err: * data is written it calls bch_journal, and after the keys have been added to * the next journal write they're inserted into the btree. * - * It inserts the data in s->cache_bio; bi_sector is used for the key offset, + * It inserts the data in op->bio; bi_sector is used for the key offset, * and op->inode is used for the key inode. * - * If s->bypass is true, instead of inserting the data it invalidates the - * region of the cache represented by s->cache_bio and op->inode. + * If op->bypass is true, instead of inserting the data it invalidates the + * region of the cache represented by op->bio and op->inode. */ void bch_data_insert(struct closure *cl) { diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 7bbd670a5a84..4dee119c3664 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -25,8 +25,8 @@ #include <linux/reboot.h> #include <linux/sysfs.h> -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Kent Overstreet <kent.overstreet@gmail.com>"); +unsigned int bch_cutoff_writeback; +unsigned int bch_cutoff_writeback_sync; static const char bcache_magic[] = { 0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca, @@ -1510,8 +1510,7 @@ static void cache_set_free(struct closure *cl) struct cache *ca; unsigned int i; - if (!IS_ERR_OR_NULL(c->debug)) - debugfs_remove(c->debug); + debugfs_remove(c->debug); bch_open_buckets_free(c); bch_btree_cache_free(c); @@ -2424,6 +2423,32 @@ static void bcache_exit(void) mutex_destroy(&bch_register_lock); } +/* Check and fixup module parameters */ +static void check_module_parameters(void) +{ + if (bch_cutoff_writeback_sync == 0) + bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC; + else if (bch_cutoff_writeback_sync > CUTOFF_WRITEBACK_SYNC_MAX) { + pr_warn("set bch_cutoff_writeback_sync (%u) to max value %u", + bch_cutoff_writeback_sync, CUTOFF_WRITEBACK_SYNC_MAX); + bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC_MAX; + } + + if (bch_cutoff_writeback == 0) + bch_cutoff_writeback = CUTOFF_WRITEBACK; + else if (bch_cutoff_writeback > CUTOFF_WRITEBACK_MAX) { + pr_warn("set bch_cutoff_writeback (%u) to max value %u", + bch_cutoff_writeback, CUTOFF_WRITEBACK_MAX); + bch_cutoff_writeback = CUTOFF_WRITEBACK_MAX; + } + + if (bch_cutoff_writeback > bch_cutoff_writeback_sync) { + pr_warn("set bch_cutoff_writeback (%u) to %u", + bch_cutoff_writeback, bch_cutoff_writeback_sync); + bch_cutoff_writeback = bch_cutoff_writeback_sync; + } +} + static int __init bcache_init(void) { static const struct attribute *files[] = { @@ -2432,6 +2457,8 @@ static int __init bcache_init(void) NULL }; + check_module_parameters(); + mutex_init(&bch_register_lock); init_waitqueue_head(&unregister_wait); register_reboot_notifier(&reboot); @@ -2468,5 +2495,18 @@ err: return -ENOMEM; } +/* + * Module hooks + */ module_exit(bcache_exit); module_init(bcache_init); + +module_param(bch_cutoff_writeback, uint, 0); +MODULE_PARM_DESC(bch_cutoff_writeback, "threshold to cutoff writeback"); + +module_param(bch_cutoff_writeback_sync, uint, 0); +MODULE_PARM_DESC(bch_cutoff_writeback_sync, "hard threshold to cutoff writeback"); + +MODULE_DESCRIPTION("Bcache: a Linux block layer cache"); +MODULE_AUTHOR("Kent Overstreet <kent.overstreet@gmail.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 26f035a0c5b9..557a8a3270a1 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -16,7 +16,7 @@ #include <linux/sort.h> #include <linux/sched/clock.h> -/* Default is -1; we skip past it for struct cached_dev's cache mode */ +/* Default is 0 ("writethrough") */ static const char * const bch_cache_modes[] = { "writethrough", "writeback", @@ -25,7 +25,7 @@ static const char * const bch_cache_modes[] = { NULL }; -/* Default is -1; we skip past it for stop_when_cache_set_failed */ +/* Default is 0 ("auto") */ static const char * const bch_stop_on_failure_modes[] = { "auto", "always", @@ -88,6 +88,8 @@ read_attribute(writeback_keys_done); read_attribute(writeback_keys_failed); read_attribute(io_errors); read_attribute(congested); +read_attribute(cutoff_writeback); +read_attribute(cutoff_writeback_sync); rw_attribute(congested_read_threshold_us); rw_attribute(congested_write_threshold_us); @@ -128,6 +130,7 @@ rw_attribute(expensive_debug_checks); rw_attribute(cache_replacement_policy); rw_attribute(btree_shrinker_disabled); rw_attribute(copy_gc_enabled); +rw_attribute(gc_after_writeback); rw_attribute(size); static ssize_t bch_snprint_string_list(char *buf, @@ -264,7 +267,8 @@ STORE(__cached_dev) d_strtoul(writeback_running); d_strtoul(writeback_delay); - sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40); + sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, + 0, bch_cutoff_writeback); if (attr == &sysfs_writeback_rate) { ssize_t ret; @@ -384,8 +388,25 @@ STORE(bch_cached_dev) mutex_lock(&bch_register_lock); size = __cached_dev_store(kobj, attr, buf, size); - if (attr == &sysfs_writeback_running) - bch_writeback_queue(dc); + if (attr == &sysfs_writeback_running) { + /* dc->writeback_running changed in __cached_dev_store() */ + if (IS_ERR_OR_NULL(dc->writeback_thread)) { + /* + * reject setting it to 1 via sysfs if writeback + * kthread is not created yet. + */ + if (dc->writeback_running) { + dc->writeback_running = false; + pr_err("%s: failed to run non-existent writeback thread", + dc->disk.disk->disk_name); + } + } else + /* + * writeback kthread will check if dc->writeback_running + * is true or false. + */ + bch_writeback_queue(dc); + } if (attr == &sysfs_writeback_percent) if (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) @@ -668,6 +689,9 @@ SHOW(__bch_cache_set) sysfs_print(congested_write_threshold_us, c->congested_write_threshold_us); + sysfs_print(cutoff_writeback, bch_cutoff_writeback); + sysfs_print(cutoff_writeback_sync, bch_cutoff_writeback_sync); + sysfs_print(active_journal_entries, fifo_used(&c->journal.pin)); sysfs_printf(verify, "%i", c->verify); sysfs_printf(key_merging_disabled, "%i", c->key_merging_disabled); @@ -676,6 +700,7 @@ SHOW(__bch_cache_set) sysfs_printf(gc_always_rewrite, "%i", c->gc_always_rewrite); sysfs_printf(btree_shrinker_disabled, "%i", c->shrinker_disabled); sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled); + sysfs_printf(gc_after_writeback, "%i", c->gc_after_writeback); sysfs_printf(io_disable, "%i", test_bit(CACHE_SET_IO_DISABLE, &c->flags)); @@ -725,21 +750,8 @@ STORE(__bch_cache_set) bch_cache_accounting_clear(&c->accounting); } - if (attr == &sysfs_trigger_gc) { - /* - * Garbage collection thread only works when sectors_to_gc < 0, - * when users write to sysfs entry trigger_gc, most of time - * they want to forcibly triger gargage collection. Here -1 is - * set to c->sectors_to_gc, to make gc_should_run() give a - * chance to permit gc thread to run. "give a chance" means - * before going into gc_should_run(), there is still chance - * that c->sectors_to_gc being set to other positive value. So - * writing sysfs entry trigger_gc won't always make sure gc - * thread takes effect. - */ - atomic_set(&c->sectors_to_gc, -1); - wake_up_gc(c); - } + if (attr == &sysfs_trigger_gc) + force_wake_up_gc(c); if (attr == &sysfs_prune_cache) { struct shrink_control sc; @@ -789,6 +801,12 @@ STORE(__bch_cache_set) sysfs_strtoul(gc_always_rewrite, c->gc_always_rewrite); sysfs_strtoul(btree_shrinker_disabled, c->shrinker_disabled); sysfs_strtoul(copy_gc_enabled, c->copy_gc_enabled); + /* + * write gc_after_writeback here may overwrite an already set + * BCH_DO_AUTO_GC, it doesn't matter because this flag will be + * set in next chance. + */ + sysfs_strtoul_clamp(gc_after_writeback, c->gc_after_writeback, 0, 1); return size; } @@ -869,7 +887,10 @@ static struct attribute *bch_cache_set_internal_files[] = { &sysfs_gc_always_rewrite, &sysfs_btree_shrinker_disabled, &sysfs_copy_gc_enabled, + &sysfs_gc_after_writeback, &sysfs_io_disable, + &sysfs_cutoff_writeback, + &sysfs_cutoff_writeback_sync, NULL }; KTYPE(bch_cache_set_internal); diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 08c3a9f9676c..73f0efac2b9f 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -17,6 +17,15 @@ #include <linux/sched/clock.h> #include <trace/events/bcache.h> +static void update_gc_after_writeback(struct cache_set *c) +{ + if (c->gc_after_writeback != (BCH_ENABLE_AUTO_GC) || + c->gc_stats.in_use < BCH_AUTO_GC_DIRTY_THRESHOLD) + return; + + c->gc_after_writeback |= BCH_DO_AUTO_GC; +} + /* Rate limiting */ static uint64_t __calc_target_rate(struct cached_dev *dc) { @@ -191,6 +200,7 @@ static void update_writeback_rate(struct work_struct *work) if (!set_at_max_writeback_rate(c, dc)) { down_read(&dc->writeback_lock); __update_writeback_rate(dc); + update_gc_after_writeback(c); up_read(&dc->writeback_lock); } } @@ -689,6 +699,23 @@ static int bch_writeback_thread(void *arg) up_write(&dc->writeback_lock); break; } + + /* + * When dirty data rate is high (e.g. 50%+), there might + * be heavy buckets fragmentation after writeback + * finished, which hurts following write performance. + * If users really care about write performance they + * may set BCH_ENABLE_AUTO_GC via sysfs, then when + * BCH_DO_AUTO_GC is set, garbage collection thread + * will be wake up here. After moving gc, the shrunk + * btree and discarded free buckets SSD space may be + * helpful for following write requests. + */ + if (c->gc_after_writeback == + (BCH_ENABLE_AUTO_GC|BCH_DO_AUTO_GC)) { + c->gc_after_writeback &= ~BCH_DO_AUTO_GC; + force_wake_up_gc(c); + } } up_write(&dc->writeback_lock); @@ -777,7 +804,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc) bch_keybuf_init(&dc->writeback_keys); dc->writeback_metadata = true; - dc->writeback_running = true; + dc->writeback_running = false; dc->writeback_percent = 10; dc->writeback_delay = 30; atomic_long_set(&dc->writeback_rate.rate, 1024); @@ -805,6 +832,7 @@ int bch_cached_dev_writeback_start(struct cached_dev *dc) cached_dev_put(dc); return PTR_ERR(dc->writeback_thread); } + dc->writeback_running = true; WARN_ON(test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)); schedule_delayed_work(&dc->writeback_rate_update, diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index d2b9fdbc8994..6a743d3bb338 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -5,12 +5,17 @@ #define CUTOFF_WRITEBACK 40 #define CUTOFF_WRITEBACK_SYNC 70 +#define CUTOFF_WRITEBACK_MAX 70 +#define CUTOFF_WRITEBACK_SYNC_MAX 90 + #define MAX_WRITEBACKS_IN_PASS 5 #define MAX_WRITESIZE_IN_PASS 5000 /* *512b */ #define WRITEBACK_RATE_UPDATE_SECS_MAX 60 #define WRITEBACK_RATE_UPDATE_SECS_DEFAULT 5 +#define BCH_AUTO_GC_DIRTY_THRESHOLD 50 + /* * 14 (16384ths) is chosen here as something that each backing device * should be a reasonable fraction of the share, and not to blow up @@ -53,6 +58,9 @@ static inline bool bcache_dev_stripe_dirty(struct cached_dev *dc, } } +extern unsigned int bch_cutoff_writeback; +extern unsigned int bch_cutoff_writeback_sync; + static inline bool should_writeback(struct cached_dev *dc, struct bio *bio, unsigned int cache_mode, bool would_skip) { @@ -60,7 +68,7 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio, if (cache_mode != CACHE_MODE_WRITEBACK || test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) || - in_use > CUTOFF_WRITEBACK_SYNC) + in_use > bch_cutoff_writeback_sync) return false; if (dc->partial_stripes_expensive && @@ -73,7 +81,7 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio, return (op_is_sync(bio->bi_opf) || bio->bi_opf & (REQ_META|REQ_PRIO) || - in_use <= CUTOFF_WRITEBACK); + in_use <= bch_cutoff_writeback); } static inline void bch_writeback_queue(struct cached_dev *dc) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 8b0b628e5d1c..1ecef76225a1 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -65,7 +65,7 @@ /* * Linking of buffers: - * All buffers are linked to cache_hash with their hash_list field. + * All buffers are linked to buffer_tree with their node field. * * Clean buffers that are not being written (B_WRITING not set) * are linked to lru[LIST_CLEAN] with their lru_list field. @@ -457,7 +457,7 @@ static void free_buffer(struct dm_buffer *b) } /* - * Link buffer to the hash list and clean or dirty queue. + * Link buffer to the buffer tree and clean or dirty queue. */ static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty) { @@ -472,7 +472,7 @@ static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty) } /* - * Unlink buffer from the hash list and dirty or clean queue. + * Unlink buffer from the buffer tree and dirty or clean queue. */ static void __unlink_buffer(struct dm_buffer *b) { @@ -993,7 +993,7 @@ static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block, /* * We've had a period where the mutex was unlocked, so need to - * recheck the hash table. + * recheck the buffer tree. */ b = __find(c, block); if (b) { @@ -1327,7 +1327,7 @@ again: EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers); /* - * Use dm-io to send and empty barrier flush the device. + * Use dm-io to send an empty barrier to flush the device. */ int dm_bufio_issue_flush(struct dm_bufio_client *c) { @@ -1356,7 +1356,7 @@ EXPORT_SYMBOL_GPL(dm_bufio_issue_flush); * Then, we write the buffer to the original location if it was dirty. * * Then, if we are the only one who is holding the buffer, relink the buffer - * in the hash queue for the new location. + * in the buffer tree for the new location. * * If there was someone else holding the buffer, we write it to the new * location but not relink it, because that other user needs to have the buffer diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 224d44503a06..95c6d86ab5e8 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -65,7 +65,6 @@ struct mapped_device { */ struct work_struct work; wait_queue_head_t wait; - atomic_t pending[2]; spinlock_t deferred_lock; struct bio_list deferred; @@ -107,9 +106,6 @@ struct mapped_device { struct block_device *bdev; - /* zero-length flush that will be cloned and submitted to targets */ - struct bio flush_bio; - struct dm_stats stats; /* for blk-mq request-based DM support */ @@ -119,7 +115,6 @@ struct mapped_device { struct srcu_struct io_barrier; }; -int md_in_flight(struct mapped_device *md); void disable_write_same(struct mapped_device *md); void disable_write_zeroes(struct mapped_device *md); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index a8c32de29e3f..0ff22159a0ca 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -49,7 +49,7 @@ struct convert_context { struct bio *bio_out; struct bvec_iter iter_in; struct bvec_iter iter_out; - sector_t cc_sector; + u64 cc_sector; atomic_t cc_pending; union { struct skcipher_request *req; @@ -81,7 +81,7 @@ struct dm_crypt_request { struct convert_context *ctx; struct scatterlist sg_in[4]; struct scatterlist sg_out[4]; - sector_t iv_sector; + u64 iv_sector; }; struct crypt_config; @@ -160,7 +160,7 @@ struct crypt_config { struct iv_lmk_private lmk; struct iv_tcw_private tcw; } iv_gen_private; - sector_t iv_offset; + u64 iv_offset; unsigned int iv_size; unsigned short int sector_size; unsigned char sector_shift; @@ -1885,6 +1885,13 @@ static int crypt_alloc_tfms_skcipher(struct crypt_config *cc, char *ciphermode) } } + /* + * dm-crypt performance can vary greatly depending on which crypto + * algorithm implementation is used. Help people debug performance + * problems by logging the ->cra_driver_name. + */ + DMINFO("%s using implementation \"%s\"", ciphermode, + crypto_skcipher_alg(any_tfm(cc))->base.cra_driver_name); return 0; } @@ -1903,6 +1910,8 @@ static int crypt_alloc_tfms_aead(struct crypt_config *cc, char *ciphermode) return err; } + DMINFO("%s using implementation \"%s\"", ciphermode, + crypto_aead_alg(any_tfm_aead(cc))->base.cra_driver_name); return 0; } @@ -2781,7 +2790,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) } ret = -EINVAL; - if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1) { + if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) { ti->error = "Invalid device sector"; goto bad; } diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 2fb7bb4304ad..fddffe251bf6 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -141,7 +141,7 @@ static int delay_class_ctr(struct dm_target *ti, struct delay_class *c, char **a unsigned long long tmpll; char dummy; - if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1) { + if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) { ti->error = "Invalid device sector"; return -EINVAL; } diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 3cb97fa4c11d..a9bc518156f2 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -213,7 +213,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) devname = dm_shift_arg(&as); r = -EINVAL; - if (sscanf(dm_shift_arg(&as), "%llu%c", &tmpll, &dummy) != 1) { + if (sscanf(dm_shift_arg(&as), "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) { ti->error = "Invalid device sector"; goto bad; } @@ -287,20 +287,31 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio) static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc) { - unsigned bio_bytes = bio_cur_bytes(bio); - char *data = bio_data(bio); + unsigned int corrupt_bio_byte = fc->corrupt_bio_byte - 1; + + struct bvec_iter iter; + struct bio_vec bvec; + + if (!bio_has_data(bio)) + return; /* - * Overwrite the Nth byte of the data returned. + * Overwrite the Nth byte of the bio's data, on whichever page + * it falls. */ - if (data && bio_bytes >= fc->corrupt_bio_byte) { - data[fc->corrupt_bio_byte - 1] = fc->corrupt_bio_value; - - DMDEBUG("Corrupting data bio=%p by writing %u to byte %u " - "(rw=%c bi_opf=%u bi_sector=%llu cur_bytes=%u)\n", - bio, fc->corrupt_bio_value, fc->corrupt_bio_byte, - (bio_data_dir(bio) == WRITE) ? 'w' : 'r', bio->bi_opf, - (unsigned long long)bio->bi_iter.bi_sector, bio_bytes); + bio_for_each_segment(bvec, bio, iter) { + if (bio_iter_len(bio, iter) > corrupt_bio_byte) { + char *segment = (page_address(bio_iter_page(bio, iter)) + + bio_iter_offset(bio, iter)); + segment[corrupt_bio_byte] = fc->corrupt_bio_value; + DMDEBUG("Corrupting data bio=%p by writing %u to byte %u " + "(rw=%c bi_opf=%u bi_sector=%llu size=%u)\n", + bio, fc->corrupt_bio_value, fc->corrupt_bio_byte, + (bio_data_dir(bio) == WRITE) ? 'w' : 'r', bio->bi_opf, + (unsigned long long)bio->bi_iter.bi_sector, bio->bi_iter.bi_size); + break; + } + corrupt_bio_byte -= bio_iter_len(bio, iter); } } diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 62baa3214cc7..457200ca6287 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -3460,7 +3460,7 @@ try_smaller_buffer: ti->error = "Recalculate is only valid with internal hash"; goto bad; } - ic->recalc_wq = alloc_workqueue("dm-intergrity-recalc", WQ_MEM_RECLAIM, 1); + ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1); if (!ic->recalc_wq ) { ti->error = "Cannot allocate workqueue"; r = -ENOMEM; diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 2fc4213e02b5..671c24332802 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -56,15 +56,17 @@ struct dm_kcopyd_client { atomic_t nr_jobs; /* - * We maintain three lists of jobs: + * We maintain four lists of jobs: * * i) jobs waiting for pages * ii) jobs that have pages, and are waiting for the io to be issued. - * iii) jobs that have completed. + * iii) jobs that don't need to do any IO and just run a callback + * iv) jobs that have completed. * - * All three of these are protected by job_lock. + * All four of these are protected by job_lock. */ spinlock_t job_lock; + struct list_head callback_jobs; struct list_head complete_jobs; struct list_head io_jobs; struct list_head pages_jobs; @@ -625,6 +627,7 @@ static void do_work(struct work_struct *work) struct dm_kcopyd_client *kc = container_of(work, struct dm_kcopyd_client, kcopyd_work); struct blk_plug plug; + unsigned long flags; /* * The order that these are called is *very* important. @@ -633,6 +636,10 @@ static void do_work(struct work_struct *work) * list. io jobs call wake when they complete and it all * starts again. */ + spin_lock_irqsave(&kc->job_lock, flags); + list_splice_tail_init(&kc->callback_jobs, &kc->complete_jobs); + spin_unlock_irqrestore(&kc->job_lock, flags); + blk_start_plug(&plug); process_jobs(&kc->complete_jobs, kc, run_complete_job); process_jobs(&kc->pages_jobs, kc, run_pages_job); @@ -650,7 +657,7 @@ static void dispatch_job(struct kcopyd_job *job) struct dm_kcopyd_client *kc = job->kc; atomic_inc(&kc->nr_jobs); if (unlikely(!job->source.count)) - push(&kc->complete_jobs, job); + push(&kc->callback_jobs, job); else if (job->pages == &zero_page_list) push(&kc->io_jobs, job); else @@ -858,7 +865,7 @@ void dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err) job->read_err = read_err; job->write_err = write_err; - push(&kc->complete_jobs, job); + push(&kc->callback_jobs, job); wake(kc); } EXPORT_SYMBOL(dm_kcopyd_do_callback); @@ -888,6 +895,7 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *thro return ERR_PTR(-ENOMEM); spin_lock_init(&kc->job_lock); + INIT_LIST_HEAD(&kc->callback_jobs); INIT_LIST_HEAD(&kc->complete_jobs); INIT_LIST_HEAD(&kc->io_jobs); INIT_LIST_HEAD(&kc->pages_jobs); @@ -939,6 +947,7 @@ void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc) /* Wait for completion of all jobs submitted by this client. */ wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs)); + BUG_ON(!list_empty(&kc->callback_jobs)); BUG_ON(!list_empty(&kc->complete_jobs)); BUG_ON(!list_empty(&kc->io_jobs)); BUG_ON(!list_empty(&kc->pages_jobs)); diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 8d7ddee6ac4d..ad980a38fb1e 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -45,7 +45,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) } ret = -EINVAL; - if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1) { + if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1 || tmp != (sector_t)tmp) { ti->error = "Invalid device sector"; goto bad; } diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index d6a66921daf4..2ee5e357a0a7 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1211,14 +1211,16 @@ static void flush_multipath_work(struct multipath *m) set_bit(MPATHF_PG_INIT_DISABLED, &m->flags); smp_mb__after_atomic(); - flush_workqueue(kmpath_handlerd); + if (atomic_read(&m->pg_init_in_progress)) + flush_workqueue(kmpath_handlerd); multipath_wait_for_pg_init_completion(m); clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags); smp_mb__after_atomic(); } - flush_workqueue(kmultipathd); + if (m->queue_mode == DM_TYPE_BIO_BASED) + flush_work(&m->process_queued_bios); flush_work(&m->trigger_event); } diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index e1dd1622a290..adcfe8ae10aa 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3690,8 +3690,7 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv, set_bit(MD_RECOVERY_INTR, &mddev->recovery); md_reap_sync_thread(mddev); } - } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || - test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) + } else if (decipher_sync_action(mddev, mddev->recovery) != st_idle) return -EBUSY; else if (!strcasecmp(argv[0], "resync")) ; /* MD_RECOVERY_NEEDED set below */ diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 79eab1071ec2..5a51151f680d 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -943,7 +943,8 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti, char dummy; int ret; - if (sscanf(argv[1], "%llu%c", &offset, &dummy) != 1) { + if (sscanf(argv[1], "%llu%c", &offset, &dummy) != 1 || + offset != (sector_t)offset) { ti->error = "Invalid offset"; return -EINVAL; } diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 7cd36e4d1310..4eb5f8c56535 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -43,7 +43,7 @@ static unsigned dm_get_blk_mq_queue_depth(void) int dm_request_based(struct mapped_device *md) { - return queue_is_rq_based(md->queue); + return queue_is_mq(md->queue); } void dm_start_queue(struct request_queue *q) @@ -128,12 +128,10 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig) * the md may be freed in dm_put() at the end of this function. * Or do dm_get() before calling this function and dm_put() later. */ -static void rq_completed(struct mapped_device *md, int rw, bool run_queue) +static void rq_completed(struct mapped_device *md) { - atomic_dec(&md->pending[rw]); - /* nudge anyone waiting on suspend queue */ - if (!md_in_flight(md)) + if (unlikely(waitqueue_active(&md->wait))) wake_up(&md->wait); /* @@ -149,7 +147,6 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) */ static void dm_end_request(struct request *clone, blk_status_t error) { - int rw = rq_data_dir(clone); struct dm_rq_target_io *tio = clone->end_io_data; struct mapped_device *md = tio->md; struct request *rq = tio->orig; @@ -159,7 +156,7 @@ static void dm_end_request(struct request *clone, blk_status_t error) rq_end_stats(md, rq); blk_mq_end_request(rq, error); - rq_completed(md, rw, true); + rq_completed(md); } static void __dm_mq_kick_requeue_list(struct request_queue *q, unsigned long msecs) @@ -183,7 +180,6 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_ { struct mapped_device *md = tio->md; struct request *rq = tio->orig; - int rw = rq_data_dir(rq); unsigned long delay_ms = delay_requeue ? 100 : 0; rq_end_stats(md, rq); @@ -193,7 +189,7 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_ } dm_mq_delay_requeue_request(rq, delay_ms); - rq_completed(md, rw, false); + rq_completed(md); } static void dm_done(struct request *clone, blk_status_t error, bool mapped) @@ -248,15 +244,13 @@ static void dm_softirq_done(struct request *rq) bool mapped = true; struct dm_rq_target_io *tio = tio_from_request(rq); struct request *clone = tio->clone; - int rw; if (!clone) { struct mapped_device *md = tio->md; rq_end_stats(md, rq); - rw = rq_data_dir(rq); blk_mq_end_request(rq, tio->error); - rq_completed(md, rw, false); + rq_completed(md); return; } @@ -378,7 +372,6 @@ static int map_request(struct dm_rq_target_io *tio) blk_status_t ret; r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone); -check_again: switch (r) { case DM_MAPIO_SUBMITTED: /* The target has taken the I/O to submit by itself later */ @@ -398,8 +391,7 @@ check_again: blk_rq_unprep_clone(clone); tio->ti->type->release_clone_rq(clone); tio->clone = NULL; - r = DM_MAPIO_REQUEUE; - goto check_again; + return DM_MAPIO_REQUEUE; } break; case DM_MAPIO_REQUEUE: @@ -436,7 +428,6 @@ ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md, static void dm_start_request(struct mapped_device *md, struct request *orig) { blk_mq_start_request(orig); - atomic_inc(&md->pending[rq_data_dir(orig)]); if (unlikely(dm_stats_used(&md->stats))) { struct dm_rq_target_io *tio = tio_from_request(orig); @@ -510,7 +501,7 @@ static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, if (map_request(tio) == DM_MAPIO_REQUEUE) { /* Undo dm_start_request() before requeuing */ rq_end_stats(md, rq); - rq_completed(md, rq_data_dir(rq), false); + rq_completed(md); return BLK_STS_RESOURCE; } diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index ae4b33d10924..36805b12661e 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -19,6 +19,7 @@ #include <linux/vmalloc.h> #include <linux/log2.h> #include <linux/dm-kcopyd.h> +#include <linux/semaphore.h> #include "dm.h" @@ -105,6 +106,9 @@ struct dm_snapshot { /* The on disk metadata handler */ struct dm_exception_store *store; + /* Maximum number of in-flight COW jobs. */ + struct semaphore cow_count; + struct dm_kcopyd_client *kcopyd_client; /* Wait for events based on state_bits */ @@ -145,6 +149,19 @@ struct dm_snapshot { #define RUNNING_MERGE 0 #define SHUTDOWN_MERGE 1 +/* + * Maximum number of chunks being copied on write. + * + * The value was decided experimentally as a trade-off between memory + * consumption, stalling the kernel's workqueues and maintaining a high enough + * throughput. + */ +#define DEFAULT_COW_THRESHOLD 2048 + +static int cow_threshold = DEFAULT_COW_THRESHOLD; +module_param_named(snapshot_cow_threshold, cow_threshold, int, 0644); +MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write"); + DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, "A percentage of time allocated for copy on write"); @@ -1190,6 +1207,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_hash_tables; } + sema_init(&s->cow_count, (cow_threshold > 0) ? cow_threshold : INT_MAX); + s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(s->kcopyd_client)) { r = PTR_ERR(s->kcopyd_client); @@ -1575,6 +1594,7 @@ static void copy_callback(int read_err, unsigned long write_err, void *context) rb_link_node(&pe->out_of_order_node, parent, p); rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree); } + up(&s->cow_count); } /* @@ -1598,6 +1618,7 @@ static void start_copy(struct dm_snap_pending_exception *pe) dest.count = src.count; /* Hand over to kcopyd */ + down(&s->cow_count); dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); } @@ -1617,6 +1638,7 @@ static void start_full_bio(struct dm_snap_pending_exception *pe, pe->full_bio = bio; pe->full_bio_end_io = bio->bi_end_io; + down(&s->cow_count); callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client, copy_callback, pe); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 9038c302d5c2..4b1be754cc41 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -919,12 +919,12 @@ static int device_is_rq_based(struct dm_target *ti, struct dm_dev *dev, struct request_queue *q = bdev_get_queue(dev->bdev); struct verify_rq_based_data *v = data; - if (q->mq_ops) + if (queue_is_mq(q)) v->mq_count++; else v->sq_count++; - return queue_is_rq_based(q); + return queue_is_mq(q); } static int dm_table_determine_type(struct dm_table *t) @@ -1927,6 +1927,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, */ if (blk_queue_is_zoned(q)) blk_revalidate_disk_zones(t->md->disk); + + /* Allow reads to exceed readahead limits */ + q->backing_dev_info->io_pages = limits->max_sectors >> (PAGE_SHIFT - 9); } unsigned int dm_table_get_num_targets(struct dm_table *t) diff --git a/drivers/md/dm-unstripe.c b/drivers/md/dm-unstripe.c index 954b7ab4e684..e673dacf6418 100644 --- a/drivers/md/dm-unstripe.c +++ b/drivers/md/dm-unstripe.c @@ -78,7 +78,7 @@ static int unstripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto err; } - if (sscanf(argv[4], "%llu%c", &start, &dummy) != 1) { + if (sscanf(argv[4], "%llu%c", &start, &dummy) != 1 || start != (sector_t)start) { ti->error = "Invalid striped device offset"; goto err; } diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index fc65f0dedf7f..f4c31ffaa88e 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -1040,6 +1040,15 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) v->tfm = NULL; goto bad; } + + /* + * dm-verity performance can vary greatly depending on which hash + * algorithm implementation is used. Help people debug performance + * problems by logging the ->cra_driver_name. + */ + DMINFO("%s using implementation \"%s\"", v->alg_name, + crypto_hash_alg_common(v->tfm)->base.cra_driver_name); + v->digest_size = crypto_ahash_digestsize(v->tfm); if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) { ti->error = "Digest size too big"; diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 2d50eec94cd7..2b8cee35e4d5 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -2061,7 +2061,7 @@ invalid_optional: if (IS_ERR(wc->flush_thread)) { r = PTR_ERR(wc->flush_thread); wc->flush_thread = NULL; - ti->error = "Couldn't spawn endio thread"; + ti->error = "Couldn't spawn flush thread"; goto bad; } wake_up_process(wc->flush_thread); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 63a7c416b224..d67c95ef8d7e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -646,26 +646,38 @@ static void free_tio(struct dm_target_io *tio) bio_put(&tio->clone); } -int md_in_flight(struct mapped_device *md) +static bool md_in_flight_bios(struct mapped_device *md) { - return atomic_read(&md->pending[READ]) + - atomic_read(&md->pending[WRITE]); + int cpu; + struct hd_struct *part = &dm_disk(md)->part0; + long sum = 0; + + for_each_possible_cpu(cpu) { + sum += part_stat_local_read_cpu(part, in_flight[0], cpu); + sum += part_stat_local_read_cpu(part, in_flight[1], cpu); + } + + return sum != 0; +} + +static bool md_in_flight(struct mapped_device *md) +{ + if (queue_is_mq(md->queue)) + return blk_mq_queue_inflight(md->queue); + else + return md_in_flight_bios(md); } static void start_io_acct(struct dm_io *io) { struct mapped_device *md = io->md; struct bio *bio = io->orig_bio; - int rw = bio_data_dir(bio); io->start_time = jiffies; generic_start_io_acct(md->queue, bio_op(bio), bio_sectors(bio), &dm_disk(md)->part0); - atomic_set(&dm_disk(md)->part0.in_flight[rw], - atomic_inc_return(&md->pending[rw])); - if (unlikely(dm_stats_used(&md->stats))) dm_stats_account_io(&md->stats, bio_data_dir(bio), bio->bi_iter.bi_sector, bio_sectors(bio), @@ -677,8 +689,6 @@ static void end_io_acct(struct dm_io *io) struct mapped_device *md = io->md; struct bio *bio = io->orig_bio; unsigned long duration = jiffies - io->start_time; - int pending; - int rw = bio_data_dir(bio); generic_end_io_acct(md->queue, bio_op(bio), &dm_disk(md)->part0, io->start_time); @@ -688,16 +698,8 @@ static void end_io_acct(struct dm_io *io) bio->bi_iter.bi_sector, bio_sectors(bio), true, duration, &io->stats_aux); - /* - * After this is decremented the bio must not be touched if it is - * a flush. - */ - pending = atomic_dec_return(&md->pending[rw]); - atomic_set(&dm_disk(md)->part0.in_flight[rw], pending); - pending += atomic_read(&md->pending[rw^0x1]); - /* nudge anyone waiting on suspend queue */ - if (!pending) + if (unlikely(waitqueue_active(&md->wait))) wake_up(&md->wait); } @@ -1417,10 +1419,21 @@ static int __send_empty_flush(struct clone_info *ci) unsigned target_nr = 0; struct dm_target *ti; + /* + * Empty flush uses a statically initialized bio, as the base for + * cloning. However, blkg association requires that a bdev is + * associated with a gendisk, which doesn't happen until the bdev is + * opened. So, blkg association is done at issue time of the flush + * rather than when the device is created in alloc_dev(). + */ + bio_set_dev(ci->bio, ci->io->md->bdev); + BUG_ON(bio_has_data(ci->bio)); while ((ti = dm_table_get_target(ci->map, target_nr++))) __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL); + bio_disassociate_blkg(ci->bio); + return 0; } @@ -1473,11 +1486,9 @@ static bool is_split_required_for_discard(struct dm_target *ti) } static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti, - get_num_bios_fn get_num_bios, - is_split_required_fn is_split_required) + unsigned num_bios, bool is_split_required) { unsigned len; - unsigned num_bios; /* * Even though the device advertised support for this type of @@ -1485,11 +1496,10 @@ static int __send_changing_extent_only(struct clone_info *ci, struct dm_target * * reconfiguration might also have changed that since the * check was performed. */ - num_bios = get_num_bios ? get_num_bios(ti) : 0; if (!num_bios) return -EOPNOTSUPP; - if (is_split_required && !is_split_required(ti)) + if (!is_split_required) len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); else len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti)); @@ -1504,23 +1514,23 @@ static int __send_changing_extent_only(struct clone_info *ci, struct dm_target * static int __send_discard(struct clone_info *ci, struct dm_target *ti) { - return __send_changing_extent_only(ci, ti, get_num_discard_bios, - is_split_required_for_discard); + return __send_changing_extent_only(ci, ti, get_num_discard_bios(ti), + is_split_required_for_discard(ti)); } static int __send_secure_erase(struct clone_info *ci, struct dm_target *ti) { - return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios, NULL); + return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios(ti), false); } static int __send_write_same(struct clone_info *ci, struct dm_target *ti) { - return __send_changing_extent_only(ci, ti, get_num_write_same_bios, NULL); + return __send_changing_extent_only(ci, ti, get_num_write_same_bios(ti), false); } static int __send_write_zeroes(struct clone_info *ci, struct dm_target *ti) { - return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios, NULL); + return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios(ti), false); } static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti, @@ -1598,7 +1608,16 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, init_clone_info(&ci, md, map, bio); if (bio->bi_opf & REQ_PREFLUSH) { - ci.bio = &ci.io->md->flush_bio; + struct bio flush_bio; + + /* + * Use an on-stack bio for this, it's safe since we don't + * need to reference it after submit. It's just used as + * the basis for the clone(s). + */ + bio_init(&flush_bio, NULL, 0); + flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; + ci.bio = &flush_bio; ci.sector_count = 0; error = __send_empty_flush(&ci); /* dec_pending submits any data associated with flush */ @@ -1654,7 +1673,16 @@ static blk_qc_t __process_bio(struct mapped_device *md, init_clone_info(&ci, md, map, bio); if (bio->bi_opf & REQ_PREFLUSH) { - ci.bio = &ci.io->md->flush_bio; + struct bio flush_bio; + + /* + * Use an on-stack bio for this, it's safe since we don't + * need to reference it after submit. It's just used as + * the basis for the clone(s). + */ + bio_init(&flush_bio, NULL, 0); + flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; + ci.bio = &flush_bio; ci.sector_count = 0; error = __send_empty_flush(&ci); /* dec_pending submits any data associated with flush */ @@ -1685,10 +1713,7 @@ out: return ret; } -typedef blk_qc_t (process_bio_fn)(struct mapped_device *, struct dm_table *, struct bio *); - -static blk_qc_t __dm_make_request(struct request_queue *q, struct bio *bio, - process_bio_fn process_bio) +static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio) { struct mapped_device *md = q->queuedata; blk_qc_t ret = BLK_QC_T_NONE; @@ -1708,26 +1733,15 @@ static blk_qc_t __dm_make_request(struct request_queue *q, struct bio *bio, return ret; } - ret = process_bio(md, map, bio); + if (dm_get_md_type(md) == DM_TYPE_NVME_BIO_BASED) + ret = __process_bio(md, map, bio); + else + ret = __split_and_process_bio(md, map, bio); dm_put_live_table(md, srcu_idx); return ret; } -/* - * The request function that remaps the bio to one target and - * splits off any remainder. - */ -static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio) -{ - return __dm_make_request(q, bio, __split_and_process_bio); -} - -static blk_qc_t dm_make_request_nvme(struct request_queue *q, struct bio *bio) -{ - return __dm_make_request(q, bio, __process_bio); -} - static int dm_any_congested(void *congested_data, int bdi_bits) { int r = bdi_bits; @@ -1898,7 +1912,7 @@ static struct mapped_device *alloc_dev(int minor) INIT_LIST_HEAD(&md->table_devices); spin_lock_init(&md->uevent_lock); - md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id, NULL); + md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id); if (!md->queue) goto bad; md->queue->queuedata = md; @@ -1908,8 +1922,6 @@ static struct mapped_device *alloc_dev(int minor) if (!md->disk) goto bad; - atomic_set(&md->pending[0], 0); - atomic_set(&md->pending[1], 0); init_waitqueue_head(&md->wait); INIT_WORK(&md->work, dm_wq_work); init_waitqueue_head(&md->eventq); @@ -1940,10 +1952,6 @@ static struct mapped_device *alloc_dev(int minor) if (!md->bdev) goto bad; - bio_init(&md->flush_bio, NULL, 0); - bio_set_dev(&md->flush_bio, md->bdev); - md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; - dm_stats_init(&md->stats); /* Populate the mapping, nobody knows we exist yet */ @@ -2221,12 +2229,9 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) break; case DM_TYPE_BIO_BASED: case DM_TYPE_DAX_BIO_BASED: - dm_init_normal_md_queue(md); - blk_queue_make_request(md->queue, dm_make_request); - break; case DM_TYPE_NVME_BIO_BASED: dm_init_normal_md_queue(md); - blk_queue_make_request(md->queue, dm_make_request_nvme); + blk_queue_make_request(md->queue, dm_make_request); break; case DM_TYPE_NONE: WARN_ON_ONCE(true); diff --git a/drivers/md/md.c b/drivers/md/md.c index fc488cb30a94..9a0a1e0934d5 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -334,7 +334,6 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) const int sgrp = op_stat_group(bio_op(bio)); struct mddev *mddev = q->queuedata; unsigned int sectors; - int cpu; blk_queue_split(q, &bio); @@ -359,9 +358,9 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) md_handle_request(mddev, bio); - cpu = part_stat_lock(); - part_stat_inc(cpu, &mddev->gendisk->part0, ios[sgrp]); - part_stat_add(cpu, &mddev->gendisk->part0, sectors[sgrp], sectors); + part_stat_lock(); + part_stat_inc(&mddev->gendisk->part0, ios[sgrp]); + part_stat_add(&mddev->gendisk->part0, sectors[sgrp], sectors); part_stat_unlock(); return BLK_QC_T_NONE; diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index ac1cffd2a09b..f3fb5bb8c82a 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -542,7 +542,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) !discard_bio) continue; bio_chain(discard_bio, bio); - bio_clone_blkcg_association(discard_bio, bio); + bio_clone_blkg_association(discard_bio, bio); if (mddev->gendisk) trace_block_bio_remap(bdev_get_queue(rdev->bdev), discard_bio, disk_devt(mddev->gendisk), diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c index 76382c858c35..1246d69ba187 100644 --- a/drivers/memstick/core/memstick.c +++ b/drivers/memstick/core/memstick.c @@ -18,6 +18,7 @@ #include <linux/delay.h> #include <linux/slab.h> #include <linux/module.h> +#include <linux/pm_runtime.h> #define DRIVER_NAME "memstick" @@ -436,6 +437,7 @@ static void memstick_check(struct work_struct *work) struct memstick_dev *card; dev_dbg(&host->dev, "memstick_check started\n"); + pm_runtime_get_noresume(host->dev.parent); mutex_lock(&host->lock); if (!host->card) { if (memstick_power_on(host)) @@ -479,6 +481,7 @@ out_power_off: host->set_param(host, MEMSTICK_POWER, MEMSTICK_POWER_OFF); mutex_unlock(&host->lock); + pm_runtime_put(host->dev.parent); dev_dbg(&host->dev, "memstick_check finished\n"); } diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c index 8a02f11076f9..82daccc9ea62 100644 --- a/drivers/memstick/core/ms_block.c +++ b/drivers/memstick/core/ms_block.c @@ -15,7 +15,7 @@ #define pr_fmt(fmt) DRIVER_NAME ": " fmt #include <linux/module.h> -#include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/memstick.h> #include <linux/idr.h> #include <linux/hdreg.h> @@ -1873,69 +1873,65 @@ static void msb_io_work(struct work_struct *work) struct msb_data *msb = container_of(work, struct msb_data, io_work); int page, error, len; sector_t lba; - unsigned long flags; struct scatterlist *sg = msb->prealloc_sg; + struct request *req; dbg_verbose("IO: work started"); while (1) { - spin_lock_irqsave(&msb->q_lock, flags); + spin_lock_irq(&msb->q_lock); if (msb->need_flush_cache) { msb->need_flush_cache = false; - spin_unlock_irqrestore(&msb->q_lock, flags); + spin_unlock_irq(&msb->q_lock); msb_cache_flush(msb); continue; } - if (!msb->req) { - msb->req = blk_fetch_request(msb->queue); - if (!msb->req) { - dbg_verbose("IO: no more requests exiting"); - spin_unlock_irqrestore(&msb->q_lock, flags); - return; - } + req = msb->req; + if (!req) { + dbg_verbose("IO: no more requests exiting"); + spin_unlock_irq(&msb->q_lock); + return; } - spin_unlock_irqrestore(&msb->q_lock, flags); - - /* If card was removed meanwhile */ - if (!msb->req) - return; + spin_unlock_irq(&msb->q_lock); /* process the request */ dbg_verbose("IO: processing new request"); - blk_rq_map_sg(msb->queue, msb->req, sg); + blk_rq_map_sg(msb->queue, req, sg); - lba = blk_rq_pos(msb->req); + lba = blk_rq_pos(req); sector_div(lba, msb->page_size / 512); page = sector_div(lba, msb->pages_in_block); if (rq_data_dir(msb->req) == READ) error = msb_do_read_request(msb, lba, page, sg, - blk_rq_bytes(msb->req), &len); + blk_rq_bytes(req), &len); else error = msb_do_write_request(msb, lba, page, sg, - blk_rq_bytes(msb->req), &len); - - spin_lock_irqsave(&msb->q_lock, flags); + blk_rq_bytes(req), &len); - if (len) - if (!__blk_end_request(msb->req, BLK_STS_OK, len)) - msb->req = NULL; + if (len && !blk_update_request(req, BLK_STS_OK, len)) { + __blk_mq_end_request(req, BLK_STS_OK); + spin_lock_irq(&msb->q_lock); + msb->req = NULL; + spin_unlock_irq(&msb->q_lock); + } if (error && msb->req) { blk_status_t ret = errno_to_blk_status(error); + dbg_verbose("IO: ending one sector of the request with error"); - if (!__blk_end_request(msb->req, ret, msb->page_size)) - msb->req = NULL; + blk_mq_end_request(req, ret); + spin_lock_irq(&msb->q_lock); + msb->req = NULL; + spin_unlock_irq(&msb->q_lock); } if (msb->req) dbg_verbose("IO: request still pending"); - - spin_unlock_irqrestore(&msb->q_lock, flags); } } @@ -2002,29 +1998,40 @@ static int msb_bd_getgeo(struct block_device *bdev, return 0; } -static void msb_submit_req(struct request_queue *q) +static blk_status_t msb_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - struct memstick_dev *card = q->queuedata; + struct memstick_dev *card = hctx->queue->queuedata; struct msb_data *msb = memstick_get_drvdata(card); - struct request *req = NULL; + struct request *req = bd->rq; dbg_verbose("Submit request"); + spin_lock_irq(&msb->q_lock); + if (msb->card_dead) { dbg("Refusing requests on removed card"); WARN_ON(!msb->io_queue_stopped); - while ((req = blk_fetch_request(q)) != NULL) - __blk_end_request_all(req, BLK_STS_IOERR); - return; + spin_unlock_irq(&msb->q_lock); + blk_mq_start_request(req); + return BLK_STS_IOERR; } - if (msb->req) - return; + if (msb->req) { + spin_unlock_irq(&msb->q_lock); + return BLK_STS_DEV_RESOURCE; + } + + blk_mq_start_request(req); + msb->req = req; if (!msb->io_queue_stopped) queue_work(msb->io_queue, &msb->io_work); + + spin_unlock_irq(&msb->q_lock); + return BLK_STS_OK; } static int msb_check_card(struct memstick_dev *card) @@ -2040,21 +2047,20 @@ static void msb_stop(struct memstick_dev *card) dbg("Stopping all msblock IO"); + blk_mq_stop_hw_queues(msb->queue); spin_lock_irqsave(&msb->q_lock, flags); - blk_stop_queue(msb->queue); msb->io_queue_stopped = true; spin_unlock_irqrestore(&msb->q_lock, flags); del_timer_sync(&msb->cache_flush_timer); flush_workqueue(msb->io_queue); + spin_lock_irqsave(&msb->q_lock, flags); if (msb->req) { - spin_lock_irqsave(&msb->q_lock, flags); - blk_requeue_request(msb->queue, msb->req); + blk_mq_requeue_request(msb->req, false); msb->req = NULL; - spin_unlock_irqrestore(&msb->q_lock, flags); } - + spin_unlock_irqrestore(&msb->q_lock, flags); } static void msb_start(struct memstick_dev *card) @@ -2077,9 +2083,7 @@ static void msb_start(struct memstick_dev *card) msb->need_flush_cache = true; msb->io_queue_stopped = false; - spin_lock_irqsave(&msb->q_lock, flags); - blk_start_queue(msb->queue); - spin_unlock_irqrestore(&msb->q_lock, flags); + blk_mq_start_hw_queues(msb->queue); queue_work(msb->io_queue, &msb->io_work); @@ -2092,6 +2096,10 @@ static const struct block_device_operations msb_bdops = { .owner = THIS_MODULE }; +static const struct blk_mq_ops msb_mq_ops = { + .queue_rq = msb_queue_rq, +}; + /* Registers the block device */ static int msb_init_disk(struct memstick_dev *card) { @@ -2112,9 +2120,11 @@ static int msb_init_disk(struct memstick_dev *card) goto out_release_id; } - msb->queue = blk_init_queue(msb_submit_req, &msb->q_lock); - if (!msb->queue) { - rc = -ENOMEM; + msb->queue = blk_mq_init_sq_queue(&msb->tag_set, &msb_mq_ops, 2, + BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(msb->queue)) { + rc = PTR_ERR(msb->queue); + msb->queue = NULL; goto out_put_disk; } @@ -2202,12 +2212,13 @@ static void msb_remove(struct memstick_dev *card) /* Take care of unhandled + new requests from now on */ spin_lock_irqsave(&msb->q_lock, flags); msb->card_dead = true; - blk_start_queue(msb->queue); spin_unlock_irqrestore(&msb->q_lock, flags); + blk_mq_start_hw_queues(msb->queue); /* Remove the disk */ del_gendisk(msb->disk); blk_cleanup_queue(msb->queue); + blk_mq_free_tag_set(&msb->tag_set); msb->queue = NULL; mutex_lock(&msb_disk_lock); diff --git a/drivers/memstick/core/ms_block.h b/drivers/memstick/core/ms_block.h index 53962c3b21df..9ba84e0ced63 100644 --- a/drivers/memstick/core/ms_block.h +++ b/drivers/memstick/core/ms_block.h @@ -152,6 +152,7 @@ struct msb_data { struct gendisk *disk; struct request_queue *queue; spinlock_t q_lock; + struct blk_mq_tag_set tag_set; struct hd_geometry geometry; struct attribute_group attr_group; struct request *req; diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index 0cd30dcb6801..aba50ec98b4d 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -12,7 +12,7 @@ * */ -#include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/idr.h> #include <linux/hdreg.h> #include <linux/kthread.h> @@ -142,6 +142,7 @@ struct mspro_block_data { struct gendisk *disk; struct request_queue *queue; struct request *block_req; + struct blk_mq_tag_set tag_set; spinlock_t q_lock; unsigned short page_size; @@ -152,7 +153,6 @@ struct mspro_block_data { unsigned char system; unsigned char read_only:1, eject:1, - has_request:1, data_dir:1, active:1; unsigned char transfer_cmd; @@ -694,13 +694,12 @@ static void h_mspro_block_setup_cmd(struct memstick_dev *card, u64 offset, /*** Data transfer ***/ -static int mspro_block_issue_req(struct memstick_dev *card, int chunk) +static int mspro_block_issue_req(struct memstick_dev *card, bool chunk) { struct mspro_block_data *msb = memstick_get_drvdata(card); u64 t_off; unsigned int count; -try_again: while (chunk) { msb->current_page = 0; msb->current_seg = 0; @@ -709,9 +708,17 @@ try_again: msb->req_sg); if (!msb->seg_count) { - chunk = __blk_end_request_cur(msb->block_req, - BLK_STS_RESOURCE); - continue; + unsigned int bytes = blk_rq_cur_bytes(msb->block_req); + + chunk = blk_update_request(msb->block_req, + BLK_STS_RESOURCE, + bytes); + if (chunk) + continue; + __blk_mq_end_request(msb->block_req, + BLK_STS_RESOURCE); + msb->block_req = NULL; + break; } t_off = blk_rq_pos(msb->block_req); @@ -729,30 +736,22 @@ try_again: return 0; } - dev_dbg(&card->dev, "blk_fetch\n"); - msb->block_req = blk_fetch_request(msb->queue); - if (!msb->block_req) { - dev_dbg(&card->dev, "issue end\n"); - return -EAGAIN; - } - - dev_dbg(&card->dev, "trying again\n"); - chunk = 1; - goto try_again; + return 1; } static int mspro_block_complete_req(struct memstick_dev *card, int error) { struct mspro_block_data *msb = memstick_get_drvdata(card); - int chunk, cnt; + int cnt; + bool chunk; unsigned int t_len = 0; unsigned long flags; spin_lock_irqsave(&msb->q_lock, flags); - dev_dbg(&card->dev, "complete %d, %d\n", msb->has_request ? 1 : 0, + dev_dbg(&card->dev, "complete %d, %d\n", msb->block_req ? 1 : 0, error); - if (msb->has_request) { + if (msb->block_req) { /* Nothing to do - not really an error */ if (error == -EAGAIN) error = 0; @@ -777,15 +776,17 @@ static int mspro_block_complete_req(struct memstick_dev *card, int error) if (error && !t_len) t_len = blk_rq_cur_bytes(msb->block_req); - chunk = __blk_end_request(msb->block_req, + chunk = blk_update_request(msb->block_req, errno_to_blk_status(error), t_len); - - error = mspro_block_issue_req(card, chunk); - - if (!error) - goto out; - else - msb->has_request = 0; + if (chunk) { + error = mspro_block_issue_req(card, chunk); + if (!error) + goto out; + } else { + __blk_mq_end_request(msb->block_req, + errno_to_blk_status(error)); + msb->block_req = NULL; + } } else { if (!error) error = -EAGAIN; @@ -806,8 +807,8 @@ static void mspro_block_stop(struct memstick_dev *card) while (1) { spin_lock_irqsave(&msb->q_lock, flags); - if (!msb->has_request) { - blk_stop_queue(msb->queue); + if (!msb->block_req) { + blk_mq_stop_hw_queues(msb->queue); rc = 1; } spin_unlock_irqrestore(&msb->q_lock, flags); @@ -822,32 +823,37 @@ static void mspro_block_stop(struct memstick_dev *card) static void mspro_block_start(struct memstick_dev *card) { struct mspro_block_data *msb = memstick_get_drvdata(card); - unsigned long flags; - spin_lock_irqsave(&msb->q_lock, flags); - blk_start_queue(msb->queue); - spin_unlock_irqrestore(&msb->q_lock, flags); + blk_mq_start_hw_queues(msb->queue); } -static void mspro_block_submit_req(struct request_queue *q) +static blk_status_t mspro_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - struct memstick_dev *card = q->queuedata; + struct memstick_dev *card = hctx->queue->queuedata; struct mspro_block_data *msb = memstick_get_drvdata(card); - struct request *req = NULL; - if (msb->has_request) - return; + spin_lock_irq(&msb->q_lock); - if (msb->eject) { - while ((req = blk_fetch_request(q)) != NULL) - __blk_end_request_all(req, BLK_STS_IOERR); + if (msb->block_req) { + spin_unlock_irq(&msb->q_lock); + return BLK_STS_DEV_RESOURCE; + } - return; + if (msb->eject) { + spin_unlock_irq(&msb->q_lock); + blk_mq_start_request(bd->rq); + return BLK_STS_IOERR; } - msb->has_request = 1; - if (mspro_block_issue_req(card, 0)) - msb->has_request = 0; + msb->block_req = bd->rq; + blk_mq_start_request(bd->rq); + + if (mspro_block_issue_req(card, true)) + msb->block_req = NULL; + + spin_unlock_irq(&msb->q_lock); + return BLK_STS_OK; } /*** Initialization ***/ @@ -1167,6 +1173,10 @@ static int mspro_block_init_card(struct memstick_dev *card) } +static const struct blk_mq_ops mspro_mq_ops = { + .queue_rq = mspro_queue_rq, +}; + static int mspro_block_init_disk(struct memstick_dev *card) { struct mspro_block_data *msb = memstick_get_drvdata(card); @@ -1206,9 +1216,11 @@ static int mspro_block_init_disk(struct memstick_dev *card) goto out_release_id; } - msb->queue = blk_init_queue(mspro_block_submit_req, &msb->q_lock); - if (!msb->queue) { - rc = -ENOMEM; + msb->queue = blk_mq_init_sq_queue(&msb->tag_set, &mspro_mq_ops, 2, + BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(msb->queue)) { + rc = PTR_ERR(msb->queue); + msb->queue = NULL; goto out_put_disk; } @@ -1318,13 +1330,14 @@ static void mspro_block_remove(struct memstick_dev *card) spin_lock_irqsave(&msb->q_lock, flags); msb->eject = 1; - blk_start_queue(msb->queue); spin_unlock_irqrestore(&msb->q_lock, flags); + blk_mq_start_hw_queues(msb->queue); del_gendisk(msb->disk); dev_dbg(&card->dev, "mspro block remove\n"); blk_cleanup_queue(msb->queue); + blk_mq_free_tag_set(&msb->tag_set); msb->queue = NULL; sysfs_remove_group(&card->dev.kobj, &msb->attr_group); @@ -1344,8 +1357,9 @@ static int mspro_block_suspend(struct memstick_dev *card, pm_message_t state) struct mspro_block_data *msb = memstick_get_drvdata(card); unsigned long flags; + blk_mq_stop_hw_queues(msb->queue); + spin_lock_irqsave(&msb->q_lock, flags); - blk_stop_queue(msb->queue); msb->active = 0; spin_unlock_irqrestore(&msb->q_lock, flags); @@ -1355,7 +1369,6 @@ static int mspro_block_suspend(struct memstick_dev *card, pm_message_t state) static int mspro_block_resume(struct memstick_dev *card) { struct mspro_block_data *msb = memstick_get_drvdata(card); - unsigned long flags; int rc = 0; #ifdef CONFIG_MEMSTICK_UNSAFE_RESUME @@ -1401,9 +1414,7 @@ out_unlock: #endif /* CONFIG_MEMSTICK_UNSAFE_RESUME */ - spin_lock_irqsave(&msb->q_lock, flags); - blk_start_queue(msb->queue); - spin_unlock_irqrestore(&msb->q_lock, flags); + blk_mq_start_hw_queues(msb->queue); return rc; } diff --git a/drivers/memstick/host/rtsx_usb_ms.c b/drivers/memstick/host/rtsx_usb_ms.c index 4f64563df7de..97308dc28ccf 100644 --- a/drivers/memstick/host/rtsx_usb_ms.c +++ b/drivers/memstick/host/rtsx_usb_ms.c @@ -40,15 +40,14 @@ struct rtsx_usb_ms { struct mutex host_mutex; struct work_struct handle_req; - - struct task_struct *detect_ms; - struct completion detect_ms_exit; + struct delayed_work poll_card; u8 ssc_depth; unsigned int clock; int power_mode; unsigned char ifmode; bool eject; + bool system_suspending; }; static inline struct device *ms_dev(struct rtsx_usb_ms *host) @@ -545,7 +544,7 @@ static void rtsx_usb_ms_handle_req(struct work_struct *work) host->req->error); } } while (!rc); - pm_runtime_put(ms_dev(host)); + pm_runtime_put_sync(ms_dev(host)); } } @@ -585,14 +584,14 @@ static int rtsx_usb_ms_set_param(struct memstick_host *msh, break; if (value == MEMSTICK_POWER_ON) { - pm_runtime_get_sync(ms_dev(host)); + pm_runtime_get_noresume(ms_dev(host)); err = ms_power_on(host); + if (err) + pm_runtime_put_noidle(ms_dev(host)); } else if (value == MEMSTICK_POWER_OFF) { err = ms_power_off(host); - if (host->msh->card) + if (!err) pm_runtime_put_noidle(ms_dev(host)); - else - pm_runtime_put(ms_dev(host)); } else err = -EINVAL; if (!err) @@ -638,12 +637,16 @@ static int rtsx_usb_ms_set_param(struct memstick_host *msh, } out: mutex_unlock(&ucr->dev_mutex); - pm_runtime_put(ms_dev(host)); + pm_runtime_put_sync(ms_dev(host)); /* power-on delay */ - if (param == MEMSTICK_POWER && value == MEMSTICK_POWER_ON) + if (param == MEMSTICK_POWER && value == MEMSTICK_POWER_ON) { usleep_range(10000, 12000); + if (!host->eject) + schedule_delayed_work(&host->poll_card, 100); + } + dev_dbg(ms_dev(host), "%s: return = %d\n", __func__, err); return err; } @@ -654,9 +657,24 @@ static int rtsx_usb_ms_suspend(struct device *dev) struct rtsx_usb_ms *host = dev_get_drvdata(dev); struct memstick_host *msh = host->msh; - dev_dbg(ms_dev(host), "--> %s\n", __func__); + /* Since we use rtsx_usb's resume callback to runtime resume its + * children to implement remote wakeup signaling, this causes + * rtsx_usb_ms' runtime resume callback runs after its suspend + * callback: + * rtsx_usb_ms_suspend() + * rtsx_usb_resume() + * -> rtsx_usb_ms_runtime_resume() + * -> memstick_detect_change() + * + * rtsx_usb_suspend() + * + * To avoid this, skip runtime resume/suspend if system suspend is + * underway. + */ + host->system_suspending = true; memstick_suspend_host(msh); + return 0; } @@ -665,58 +683,85 @@ static int rtsx_usb_ms_resume(struct device *dev) struct rtsx_usb_ms *host = dev_get_drvdata(dev); struct memstick_host *msh = host->msh; - dev_dbg(ms_dev(host), "--> %s\n", __func__); - memstick_resume_host(msh); + host->system_suspending = false; + return 0; } #endif /* CONFIG_PM_SLEEP */ -/* - * Thread function of ms card slot detection. The thread starts right after - * successful host addition. It stops while the driver removal function sets - * host->eject true. - */ -static int rtsx_usb_detect_ms_card(void *__host) +#ifdef CONFIG_PM +static int rtsx_usb_ms_runtime_suspend(struct device *dev) { - struct rtsx_usb_ms *host = (struct rtsx_usb_ms *)__host; + struct rtsx_usb_ms *host = dev_get_drvdata(dev); + + if (host->system_suspending) + return 0; + + if (host->msh->card || host->power_mode != MEMSTICK_POWER_OFF) + return -EAGAIN; + + return 0; +} + +static int rtsx_usb_ms_runtime_resume(struct device *dev) +{ + struct rtsx_usb_ms *host = dev_get_drvdata(dev); + + + if (host->system_suspending) + return 0; + + memstick_detect_change(host->msh); + + return 0; +} +#endif /* CONFIG_PM */ + +static const struct dev_pm_ops rtsx_usb_ms_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(rtsx_usb_ms_suspend, rtsx_usb_ms_resume) + SET_RUNTIME_PM_OPS(rtsx_usb_ms_runtime_suspend, rtsx_usb_ms_runtime_resume, NULL) +}; + + +static void rtsx_usb_ms_poll_card(struct work_struct *work) +{ + struct rtsx_usb_ms *host = container_of(work, struct rtsx_usb_ms, + poll_card.work); struct rtsx_ucr *ucr = host->ucr; - u8 val = 0; int err; + u8 val; - for (;;) { - pm_runtime_get_sync(ms_dev(host)); - mutex_lock(&ucr->dev_mutex); - - /* Check pending MS card changes */ - err = rtsx_usb_read_register(ucr, CARD_INT_PEND, &val); - if (err) { - mutex_unlock(&ucr->dev_mutex); - goto poll_again; - } + if (host->eject || host->power_mode != MEMSTICK_POWER_ON) + return; - /* Clear the pending */ - rtsx_usb_write_register(ucr, CARD_INT_PEND, - XD_INT | MS_INT | SD_INT, - XD_INT | MS_INT | SD_INT); + pm_runtime_get_sync(ms_dev(host)); + mutex_lock(&ucr->dev_mutex); + /* Check pending MS card changes */ + err = rtsx_usb_read_register(ucr, CARD_INT_PEND, &val); + if (err) { mutex_unlock(&ucr->dev_mutex); + goto poll_again; + } - if (val & MS_INT) { - dev_dbg(ms_dev(host), "MS slot change detected\n"); - memstick_detect_change(host->msh); - } + /* Clear the pending */ + rtsx_usb_write_register(ucr, CARD_INT_PEND, + XD_INT | MS_INT | SD_INT, + XD_INT | MS_INT | SD_INT); -poll_again: - pm_runtime_put(ms_dev(host)); - if (host->eject) - break; + mutex_unlock(&ucr->dev_mutex); - schedule_timeout_idle(HZ); + if (val & MS_INT) { + dev_dbg(ms_dev(host), "MS slot change detected\n"); + memstick_detect_change(host->msh); } - complete(&host->detect_ms_exit); - return 0; +poll_again: + pm_runtime_put_sync(ms_dev(host)); + + if (!host->eject && host->power_mode == MEMSTICK_POWER_ON) + schedule_delayed_work(&host->poll_card, 100); } static int rtsx_usb_ms_drv_probe(struct platform_device *pdev) @@ -747,45 +792,42 @@ static int rtsx_usb_ms_drv_probe(struct platform_device *pdev) mutex_init(&host->host_mutex); INIT_WORK(&host->handle_req, rtsx_usb_ms_handle_req); - init_completion(&host->detect_ms_exit); - host->detect_ms = kthread_create(rtsx_usb_detect_ms_card, host, - "rtsx_usb_ms_%d", pdev->id); - if (IS_ERR(host->detect_ms)) { - dev_dbg(&(pdev->dev), - "Unable to create polling thread.\n"); - err = PTR_ERR(host->detect_ms); - goto err_out; - } + INIT_DELAYED_WORK(&host->poll_card, rtsx_usb_ms_poll_card); msh->request = rtsx_usb_ms_request; msh->set_param = rtsx_usb_ms_set_param; msh->caps = MEMSTICK_CAP_PAR4; - pm_runtime_enable(&pdev->dev); + pm_runtime_get_noresume(ms_dev(host)); + pm_runtime_set_active(ms_dev(host)); + pm_runtime_enable(ms_dev(host)); + err = memstick_add_host(msh); if (err) goto err_out; - wake_up_process(host->detect_ms); + pm_runtime_put(ms_dev(host)); + return 0; err_out: memstick_free_host(msh); + pm_runtime_disable(ms_dev(host)); + pm_runtime_put_noidle(ms_dev(host)); return err; } static int rtsx_usb_ms_drv_remove(struct platform_device *pdev) { struct rtsx_usb_ms *host = platform_get_drvdata(pdev); - struct memstick_host *msh; + struct memstick_host *msh = host->msh; int err; - msh = host->msh; host->eject = true; cancel_work_sync(&host->handle_req); mutex_lock(&host->host_mutex); if (host->req) { - dev_dbg(&(pdev->dev), + dev_dbg(ms_dev(host), "%s: Controller removed during transfer\n", dev_name(&msh->dev)); host->req->error = -ENOMEDIUM; @@ -797,7 +839,6 @@ static int rtsx_usb_ms_drv_remove(struct platform_device *pdev) } mutex_unlock(&host->host_mutex); - wait_for_completion(&host->detect_ms_exit); memstick_remove_host(msh); memstick_free_host(msh); @@ -807,18 +848,15 @@ static int rtsx_usb_ms_drv_remove(struct platform_device *pdev) if (pm_runtime_active(ms_dev(host))) pm_runtime_put(ms_dev(host)); - pm_runtime_disable(&pdev->dev); + pm_runtime_disable(ms_dev(host)); platform_set_drvdata(pdev, NULL); - dev_dbg(&(pdev->dev), + dev_dbg(ms_dev(host), ": Realtek USB Memstick controller has been removed\n"); return 0; } -static SIMPLE_DEV_PM_OPS(rtsx_usb_ms_pm_ops, - rtsx_usb_ms_suspend, rtsx_usb_ms_resume); - static struct platform_device_id rtsx_usb_ms_ids[] = { { .name = "rtsx_usb_ms", diff --git a/drivers/message/fusion/mptfc.c b/drivers/message/fusion/mptfc.c index b15fdc626fb8..4314a3352b96 100644 --- a/drivers/message/fusion/mptfc.c +++ b/drivers/message/fusion/mptfc.c @@ -129,7 +129,6 @@ static struct scsi_host_template mptfc_driver_template = { .sg_tablesize = MPT_SCSI_SG_DEPTH, .max_sectors = 8192, .cmd_per_lun = 7, - .use_clustering = ENABLE_CLUSTERING, .shost_attrs = mptscsih_host_attrs, }; diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c index 9b404fc69c90..612cb5bc1333 100644 --- a/drivers/message/fusion/mptsas.c +++ b/drivers/message/fusion/mptsas.c @@ -1992,7 +1992,6 @@ static struct scsi_host_template mptsas_driver_template = { .sg_tablesize = MPT_SCSI_SG_DEPTH, .max_sectors = 8192, .cmd_per_lun = 7, - .use_clustering = ENABLE_CLUSTERING, .shost_attrs = mptscsih_host_attrs, .no_write_same = 1, }; diff --git a/drivers/message/fusion/mptspi.c b/drivers/message/fusion/mptspi.c index 9a336a161d9f..7172b0b16bdd 100644 --- a/drivers/message/fusion/mptspi.c +++ b/drivers/message/fusion/mptspi.c @@ -848,7 +848,6 @@ static struct scsi_host_template mptspi_driver_template = { .sg_tablesize = MPT_SCSI_SG_DEPTH, .max_sectors = 8192, .cmd_per_lun = 7, - .use_clustering = ENABLE_CLUSTERING, .shost_attrs = mptscsih_host_attrs, }; diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index af22bbc3d00c..fe3134cf3008 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -57,4 +57,4 @@ obj-$(CONFIG_ASPEED_LPC_CTRL) += aspeed-lpc-ctrl.o obj-$(CONFIG_ASPEED_LPC_SNOOP) += aspeed-lpc-snoop.o obj-$(CONFIG_PCI_ENDPOINT_TEST) += pci_endpoint_test.o obj-$(CONFIG_OCXL) += ocxl/ -obj-$(CONFIG_MISC_RTSX) += cardreader/ +obj-y += cardreader/ diff --git a/drivers/misc/cardreader/Kconfig b/drivers/misc/cardreader/Kconfig index 69e815e32a8c..ed8993b5d058 100644 --- a/drivers/misc/cardreader/Kconfig +++ b/drivers/misc/cardreader/Kconfig @@ -1,3 +1,14 @@ +config MISC_ALCOR_PCI + tristate "Alcor Micro/Alcor Link PCI-E card reader" + depends on PCI + select MFD_CORE + help + This supports for Alcor Micro PCI-Express card reader including au6601, + au6621. + Alcor Micro card readers support access to many types of memory cards, + such as Memory Stick, Memory Stick Pro, Secure Digital and + MultiMediaCard. + config MISC_RTSX_PCI tristate "Realtek PCI-E card reader" depends on PCI diff --git a/drivers/misc/cardreader/Makefile b/drivers/misc/cardreader/Makefile index 9fabfcc6fa7a..9882d2a1025c 100644 --- a/drivers/misc/cardreader/Makefile +++ b/drivers/misc/cardreader/Makefile @@ -1,4 +1,4 @@ -rtsx_pci-objs := rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o rts5249.o rts5260.o - +obj-$(CONFIG_MISC_ALCOR_PCI) += alcor_pci.o obj-$(CONFIG_MISC_RTSX_PCI) += rtsx_pci.o +rtsx_pci-objs := rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o rts5249.o rts5260.o obj-$(CONFIG_MISC_RTSX_USB) += rtsx_usb.o diff --git a/drivers/misc/cardreader/alcor_pci.c b/drivers/misc/cardreader/alcor_pci.c new file mode 100644 index 000000000000..bcb10fa4bc3a --- /dev/null +++ b/drivers/misc/cardreader/alcor_pci.c @@ -0,0 +1,371 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2018 Oleksij Rempel <linux@rempel-privat.de> + * + * Driver for Alcor Micro AU6601 and AU6621 controllers + */ + +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/irq.h> +#include <linux/mfd/core.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/platform_device.h> +#include <linux/pm.h> + +#include <linux/alcor_pci.h> + +#define DRV_NAME_ALCOR_PCI "alcor_pci" + +static DEFINE_IDA(alcor_pci_idr); + +static struct mfd_cell alcor_pci_cells[] = { + [ALCOR_SD_CARD] = { + .name = DRV_NAME_ALCOR_PCI_SDMMC, + }, + [ALCOR_MS_CARD] = { + .name = DRV_NAME_ALCOR_PCI_MS, + }, +}; + +static const struct alcor_dev_cfg alcor_cfg = { + .dma = 0, +}; + +static const struct alcor_dev_cfg au6621_cfg = { + .dma = 1, +}; + +static const struct pci_device_id pci_ids[] = { + { PCI_DEVICE(PCI_ID_ALCOR_MICRO, PCI_ID_AU6601), + .driver_data = (kernel_ulong_t)&alcor_cfg }, + { PCI_DEVICE(PCI_ID_ALCOR_MICRO, PCI_ID_AU6621), + .driver_data = (kernel_ulong_t)&au6621_cfg }, + { }, +}; +MODULE_DEVICE_TABLE(pci, pci_ids); + +void alcor_write8(struct alcor_pci_priv *priv, u8 val, unsigned int addr) +{ + writeb(val, priv->iobase + addr); +} +EXPORT_SYMBOL_GPL(alcor_write8); + +void alcor_write16(struct alcor_pci_priv *priv, u16 val, unsigned int addr) +{ + writew(val, priv->iobase + addr); +} +EXPORT_SYMBOL_GPL(alcor_write16); + +void alcor_write32(struct alcor_pci_priv *priv, u32 val, unsigned int addr) +{ + writel(val, priv->iobase + addr); +} +EXPORT_SYMBOL_GPL(alcor_write32); + +void alcor_write32be(struct alcor_pci_priv *priv, u32 val, unsigned int addr) +{ + iowrite32be(val, priv->iobase + addr); +} +EXPORT_SYMBOL_GPL(alcor_write32be); + +u8 alcor_read8(struct alcor_pci_priv *priv, unsigned int addr) +{ + return readb(priv->iobase + addr); +} +EXPORT_SYMBOL_GPL(alcor_read8); + +u32 alcor_read32(struct alcor_pci_priv *priv, unsigned int addr) +{ + return readl(priv->iobase + addr); +} +EXPORT_SYMBOL_GPL(alcor_read32); + +u32 alcor_read32be(struct alcor_pci_priv *priv, unsigned int addr) +{ + return ioread32be(priv->iobase + addr); +} +EXPORT_SYMBOL_GPL(alcor_read32be); + +static int alcor_pci_find_cap_offset(struct alcor_pci_priv *priv, + struct pci_dev *pci) +{ + int where; + u8 val8; + u32 val32; + + where = ALCOR_CAP_START_OFFSET; + pci_read_config_byte(pci, where, &val8); + if (!val8) + return 0; + + where = (int)val8; + while (1) { + pci_read_config_dword(pci, where, &val32); + if (val32 == 0xffffffff) { + dev_dbg(priv->dev, "find_cap_offset invalid value %x.\n", + val32); + return 0; + } + + if ((val32 & 0xff) == 0x10) { + dev_dbg(priv->dev, "pcie cap offset: %x\n", where); + return where; + } + + if ((val32 & 0xff00) == 0x00) { + dev_dbg(priv->dev, "pci_find_cap_offset invalid value %x.\n", + val32); + break; + } + where = (int)((val32 >> 8) & 0xff); + } + + return 0; +} + +static void alcor_pci_init_check_aspm(struct alcor_pci_priv *priv) +{ + struct pci_dev *pci; + int where; + u32 val32; + + priv->pdev_cap_off = alcor_pci_find_cap_offset(priv, priv->pdev); + priv->parent_cap_off = alcor_pci_find_cap_offset(priv, + priv->parent_pdev); + + if ((priv->pdev_cap_off == 0) || (priv->parent_cap_off == 0)) { + dev_dbg(priv->dev, "pci_cap_off: %x, parent_cap_off: %x\n", + priv->pdev_cap_off, priv->parent_cap_off); + return; + } + + /* link capability */ + pci = priv->pdev; + where = priv->pdev_cap_off + ALCOR_PCIE_LINK_CAP_OFFSET; + pci_read_config_dword(pci, where, &val32); + priv->pdev_aspm_cap = (u8)(val32 >> 10) & 0x03; + + pci = priv->parent_pdev; + where = priv->parent_cap_off + ALCOR_PCIE_LINK_CAP_OFFSET; + pci_read_config_dword(pci, where, &val32); + priv->parent_aspm_cap = (u8)(val32 >> 10) & 0x03; + + if (priv->pdev_aspm_cap != priv->parent_aspm_cap) { + u8 aspm_cap; + + dev_dbg(priv->dev, "pdev_aspm_cap: %x, parent_aspm_cap: %x\n", + priv->pdev_aspm_cap, priv->parent_aspm_cap); + aspm_cap = priv->pdev_aspm_cap & priv->parent_aspm_cap; + priv->pdev_aspm_cap = aspm_cap; + priv->parent_aspm_cap = aspm_cap; + } + + dev_dbg(priv->dev, "ext_config_dev_aspm: %x, pdev_aspm_cap: %x\n", + priv->ext_config_dev_aspm, priv->pdev_aspm_cap); + priv->ext_config_dev_aspm &= priv->pdev_aspm_cap; +} + +static void alcor_pci_aspm_ctrl(struct alcor_pci_priv *priv, u8 aspm_enable) +{ + struct pci_dev *pci; + u8 aspm_ctrl, i; + int where; + u32 val32; + + if ((!priv->pdev_cap_off) || (!priv->parent_cap_off)) { + dev_dbg(priv->dev, "pci_cap_off: %x, parent_cap_off: %x\n", + priv->pdev_cap_off, priv->parent_cap_off); + return; + } + + if (!priv->pdev_aspm_cap) + return; + + aspm_ctrl = 0; + if (aspm_enable) { + aspm_ctrl = priv->ext_config_dev_aspm; + + if (!aspm_ctrl) { + dev_dbg(priv->dev, "aspm_ctrl == 0\n"); + return; + } + } + + for (i = 0; i < 2; i++) { + + if (i) { + pci = priv->parent_pdev; + where = priv->parent_cap_off + + ALCOR_PCIE_LINK_CTRL_OFFSET; + } else { + pci = priv->pdev; + where = priv->pdev_cap_off + + ALCOR_PCIE_LINK_CTRL_OFFSET; + } + + pci_read_config_dword(pci, where, &val32); + val32 &= (~0x03); + val32 |= (aspm_ctrl & priv->pdev_aspm_cap); + pci_write_config_byte(pci, where, (u8)val32); + } + +} + +static inline void alcor_mask_sd_irqs(struct alcor_pci_priv *priv) +{ + alcor_write32(priv, 0, AU6601_REG_INT_ENABLE); +} + +static inline void alcor_unmask_sd_irqs(struct alcor_pci_priv *priv) +{ + alcor_write32(priv, AU6601_INT_CMD_MASK | AU6601_INT_DATA_MASK | + AU6601_INT_CARD_INSERT | AU6601_INT_CARD_REMOVE | + AU6601_INT_OVER_CURRENT_ERR, + AU6601_REG_INT_ENABLE); +} + +static inline void alcor_mask_ms_irqs(struct alcor_pci_priv *priv) +{ + alcor_write32(priv, 0, AU6601_MS_INT_ENABLE); +} + +static inline void alcor_unmask_ms_irqs(struct alcor_pci_priv *priv) +{ + alcor_write32(priv, 0x3d00fa, AU6601_MS_INT_ENABLE); +} + +static int alcor_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct alcor_dev_cfg *cfg; + struct alcor_pci_priv *priv; + int ret, i, bar = 0; + + cfg = (void *)ent->driver_data; + + ret = pcim_enable_device(pdev); + if (ret) + return ret; + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + ret = ida_simple_get(&alcor_pci_idr, 0, 0, GFP_KERNEL); + if (ret < 0) + return ret; + priv->id = ret; + + priv->pdev = pdev; + priv->parent_pdev = pdev->bus->self; + priv->dev = &pdev->dev; + priv->cfg = cfg; + priv->irq = pdev->irq; + + ret = pci_request_regions(pdev, DRV_NAME_ALCOR_PCI); + if (ret) { + dev_err(&pdev->dev, "Cannot request region\n"); + return -ENOMEM; + } + + if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) { + dev_err(&pdev->dev, "BAR %d is not iomem. Aborting.\n", bar); + ret = -ENODEV; + goto error_release_regions; + } + + priv->iobase = pcim_iomap(pdev, bar, 0); + if (!priv->iobase) { + ret = -ENOMEM; + goto error_release_regions; + } + + /* make sure irqs are disabled */ + alcor_write32(priv, 0, AU6601_REG_INT_ENABLE); + alcor_write32(priv, 0, AU6601_MS_INT_ENABLE); + + ret = dma_set_mask_and_coherent(priv->dev, AU6601_SDMA_MASK); + if (ret) { + dev_err(priv->dev, "Failed to set DMA mask\n"); + goto error_release_regions; + } + + pci_set_master(pdev); + pci_set_drvdata(pdev, priv); + alcor_pci_init_check_aspm(priv); + + for (i = 0; i < ARRAY_SIZE(alcor_pci_cells); i++) { + alcor_pci_cells[i].platform_data = priv; + alcor_pci_cells[i].pdata_size = sizeof(*priv); + } + ret = mfd_add_devices(&pdev->dev, priv->id, alcor_pci_cells, + ARRAY_SIZE(alcor_pci_cells), NULL, 0, NULL); + if (ret < 0) + goto error_release_regions; + + alcor_pci_aspm_ctrl(priv, 0); + + return 0; + +error_release_regions: + pci_release_regions(pdev); + return ret; +} + +static void alcor_pci_remove(struct pci_dev *pdev) +{ + struct alcor_pci_priv *priv; + + priv = pci_get_drvdata(pdev); + + alcor_pci_aspm_ctrl(priv, 1); + + mfd_remove_devices(&pdev->dev); + + ida_simple_remove(&alcor_pci_idr, priv->id); + + pci_release_regions(pdev); + pci_set_drvdata(pdev, NULL); +} + +#ifdef CONFIG_PM_SLEEP +static int alcor_suspend(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct alcor_pci_priv *priv = pci_get_drvdata(pdev); + + alcor_pci_aspm_ctrl(priv, 1); + return 0; +} + +static int alcor_resume(struct device *dev) +{ + + struct pci_dev *pdev = to_pci_dev(dev); + struct alcor_pci_priv *priv = pci_get_drvdata(pdev); + + alcor_pci_aspm_ctrl(priv, 0); + return 0; +} +#endif /* CONFIG_PM_SLEEP */ + +static SIMPLE_DEV_PM_OPS(alcor_pci_pm_ops, alcor_suspend, alcor_resume); + +static struct pci_driver alcor_driver = { + .name = DRV_NAME_ALCOR_PCI, + .id_table = pci_ids, + .probe = alcor_pci_probe, + .remove = alcor_pci_remove, + .driver = { + .pm = &alcor_pci_pm_ops + }, +}; + +module_pci_driver(alcor_driver); + +MODULE_AUTHOR("Oleksij Rempel <linux@rempel-privat.de>"); +MODULE_DESCRIPTION("PCI driver for Alcor Micro AU6601 Secure Digital Host Controller Interface"); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/cardreader/rtsx_usb.c b/drivers/misc/cardreader/rtsx_usb.c index b97903ff1a72..f7a66f614085 100644 --- a/drivers/misc/cardreader/rtsx_usb.c +++ b/drivers/misc/cardreader/rtsx_usb.c @@ -723,8 +723,15 @@ static int rtsx_usb_suspend(struct usb_interface *intf, pm_message_t message) return 0; } +static int rtsx_usb_resume_child(struct device *dev, void *data) +{ + pm_request_resume(dev); + return 0; +} + static int rtsx_usb_resume(struct usb_interface *intf) { + device_for_each_child(&intf->dev, NULL, rtsx_usb_resume_child); return 0; } @@ -734,6 +741,7 @@ static int rtsx_usb_reset_resume(struct usb_interface *intf) (struct rtsx_ucr *)usb_get_intfdata(intf); rtsx_usb_reset_chip(ucr); + device_for_each_child(&intf->dev, NULL, rtsx_usb_resume_child); return 0; } diff --git a/drivers/misc/mic/host/mic_boot.c b/drivers/misc/mic/host/mic_boot.c index c327985c9523..6479435ac96b 100644 --- a/drivers/misc/mic/host/mic_boot.c +++ b/drivers/misc/mic/host/mic_boot.c @@ -149,7 +149,7 @@ static void *__mic_dma_alloc(struct device *dev, size_t size, struct scif_hw_dev *scdev = dev_get_drvdata(dev); struct mic_device *mdev = scdev_to_mdev(scdev); dma_addr_t tmp; - void *va = kmalloc(size, gfp); + void *va = kmalloc(size, gfp | __GFP_ZERO); if (va) { tmp = mic_map_single(mdev, va, size); diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 111934838da2..aef1185f383d 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -100,7 +100,6 @@ static DEFINE_IDA(mmc_rpmb_ida); * There is one mmc_blk_data per slot. */ struct mmc_blk_data { - spinlock_t lock; struct device *parent; struct gendisk *disk; struct mmc_queue queue; @@ -1488,7 +1487,7 @@ static void mmc_blk_cqe_complete_rq(struct mmc_queue *mq, struct request *req) blk_mq_end_request(req, BLK_STS_OK); } - spin_lock_irqsave(q->queue_lock, flags); + spin_lock_irqsave(&mq->lock, flags); mq->in_flight[mmc_issue_type(mq, req)] -= 1; @@ -1496,7 +1495,7 @@ static void mmc_blk_cqe_complete_rq(struct mmc_queue *mq, struct request *req) mmc_cqe_check_busy(mq); - spin_unlock_irqrestore(q->queue_lock, flags); + spin_unlock_irqrestore(&mq->lock, flags); if (!mq->cqe_busy) blk_mq_run_hw_queues(q, true); @@ -1961,7 +1960,7 @@ static void mmc_blk_urgent_bkops(struct mmc_queue *mq, struct mmc_queue_req *mqrq) { if (mmc_blk_urgent_bkops_needed(mq, mqrq)) - mmc_start_bkops(mq->card, true); + mmc_run_bkops(mq->card); } void mmc_blk_mq_complete(struct request *req) @@ -1993,17 +1992,16 @@ static void mmc_blk_mq_poll_completion(struct mmc_queue *mq, static void mmc_blk_mq_dec_in_flight(struct mmc_queue *mq, struct request *req) { - struct request_queue *q = req->q; unsigned long flags; bool put_card; - spin_lock_irqsave(q->queue_lock, flags); + spin_lock_irqsave(&mq->lock, flags); mq->in_flight[mmc_issue_type(mq, req)] -= 1; put_card = (mmc_tot_in_flight(mq) == 0); - spin_unlock_irqrestore(q->queue_lock, flags); + spin_unlock_irqrestore(&mq->lock, flags); if (put_card) mmc_put_card(mq->card, &mq->ctx); @@ -2099,11 +2097,11 @@ static void mmc_blk_mq_req_done(struct mmc_request *mrq) * request does not need to wait (although it does need to * complete complete_req first). */ - spin_lock_irqsave(q->queue_lock, flags); + spin_lock_irqsave(&mq->lock, flags); mq->complete_req = req; mq->rw_wait = false; waiting = mq->waiting; - spin_unlock_irqrestore(q->queue_lock, flags); + spin_unlock_irqrestore(&mq->lock, flags); /* * If 'waiting' then the waiting task will complete this @@ -2122,10 +2120,10 @@ static void mmc_blk_mq_req_done(struct mmc_request *mrq) /* Take the recovery path for errors or urgent background operations */ if (mmc_blk_rq_error(&mqrq->brq) || mmc_blk_urgent_bkops_needed(mq, mqrq)) { - spin_lock_irqsave(q->queue_lock, flags); + spin_lock_irqsave(&mq->lock, flags); mq->recovery_needed = true; mq->recovery_req = req; - spin_unlock_irqrestore(q->queue_lock, flags); + spin_unlock_irqrestore(&mq->lock, flags); wake_up(&mq->wait); schedule_work(&mq->recovery_work); return; @@ -2141,7 +2139,6 @@ static void mmc_blk_mq_req_done(struct mmc_request *mrq) static bool mmc_blk_rw_wait_cond(struct mmc_queue *mq, int *err) { - struct request_queue *q = mq->queue; unsigned long flags; bool done; @@ -2149,7 +2146,7 @@ static bool mmc_blk_rw_wait_cond(struct mmc_queue *mq, int *err) * Wait while there is another request in progress, but not if recovery * is needed. Also indicate whether there is a request waiting to start. */ - spin_lock_irqsave(q->queue_lock, flags); + spin_lock_irqsave(&mq->lock, flags); if (mq->recovery_needed) { *err = -EBUSY; done = true; @@ -2157,7 +2154,7 @@ static bool mmc_blk_rw_wait_cond(struct mmc_queue *mq, int *err) done = !mq->rw_wait; } mq->waiting = !done; - spin_unlock_irqrestore(q->queue_lock, flags); + spin_unlock_irqrestore(&mq->lock, flags); return done; } @@ -2334,12 +2331,11 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, goto err_kfree; } - spin_lock_init(&md->lock); INIT_LIST_HEAD(&md->part); INIT_LIST_HEAD(&md->rpmbs); md->usage = 1; - ret = mmc_init_queue(&md->queue, card, &md->lock, subname); + ret = mmc_init_queue(&md->queue, card); if (ret) goto err_putdisk; diff --git a/drivers/mmc/core/card.h b/drivers/mmc/core/card.h index 1170feb8f969..eef301452406 100644 --- a/drivers/mmc/core/card.h +++ b/drivers/mmc/core/card.h @@ -23,15 +23,13 @@ #define MMC_STATE_BLOCKADDR (1<<2) /* card uses block-addressing */ #define MMC_CARD_SDXC (1<<3) /* card is SDXC */ #define MMC_CARD_REMOVED (1<<4) /* card has been removed */ -#define MMC_STATE_DOING_BKOPS (1<<5) /* card is doing BKOPS */ -#define MMC_STATE_SUSPENDED (1<<6) /* card is suspended */ +#define MMC_STATE_SUSPENDED (1<<5) /* card is suspended */ #define mmc_card_present(c) ((c)->state & MMC_STATE_PRESENT) #define mmc_card_readonly(c) ((c)->state & MMC_STATE_READONLY) #define mmc_card_blockaddr(c) ((c)->state & MMC_STATE_BLOCKADDR) #define mmc_card_ext_capacity(c) ((c)->state & MMC_CARD_SDXC) #define mmc_card_removed(c) ((c) && ((c)->state & MMC_CARD_REMOVED)) -#define mmc_card_doing_bkops(c) ((c)->state & MMC_STATE_DOING_BKOPS) #define mmc_card_suspended(c) ((c)->state & MMC_STATE_SUSPENDED) #define mmc_card_set_present(c) ((c)->state |= MMC_STATE_PRESENT) @@ -39,8 +37,6 @@ #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR) #define mmc_card_set_ext_capacity(c) ((c)->state |= MMC_CARD_SDXC) #define mmc_card_set_removed(c) ((c)->state |= MMC_CARD_REMOVED) -#define mmc_card_set_doing_bkops(c) ((c)->state |= MMC_STATE_DOING_BKOPS) -#define mmc_card_clr_doing_bkops(c) ((c)->state &= ~MMC_STATE_DOING_BKOPS) #define mmc_card_set_suspended(c) ((c)->state |= MMC_STATE_SUSPENDED) #define mmc_card_clr_suspended(c) ((c)->state &= ~MMC_STATE_SUSPENDED) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 50a5c340307b..5bd58b95d318 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -887,7 +887,10 @@ void mmc_release_host(struct mmc_host *host) spin_unlock_irqrestore(&host->lock, flags); wake_up(&host->wq); pm_runtime_mark_last_busy(mmc_dev(host)); - pm_runtime_put_autosuspend(mmc_dev(host)); + if (host->caps & MMC_CAP_SYNC_RUNTIME_PM) + pm_runtime_put_sync_suspend(mmc_dev(host)); + else + pm_runtime_put_autosuspend(mmc_dev(host)); } } EXPORT_SYMBOL(mmc_release_host); @@ -2413,20 +2416,6 @@ int mmc_set_blocklen(struct mmc_card *card, unsigned int blocklen) } EXPORT_SYMBOL(mmc_set_blocklen); -int mmc_set_blockcount(struct mmc_card *card, unsigned int blockcount, - bool is_rel_write) -{ - struct mmc_command cmd = {}; - - cmd.opcode = MMC_SET_BLOCK_COUNT; - cmd.arg = blockcount & 0x0000FFFF; - if (is_rel_write) - cmd.arg |= 1 << 31; - cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_AC; - return mmc_wait_for_cmd(card->host, &cmd, 5); -} -EXPORT_SYMBOL(mmc_set_blockcount); - static void mmc_hw_reset_for_init(struct mmc_host *host) { mmc_pwrseq_reset(host); diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h index 087ba68b2920..8fb6bc37f808 100644 --- a/drivers/mmc/core/core.h +++ b/drivers/mmc/core/core.h @@ -118,8 +118,6 @@ int mmc_erase_group_aligned(struct mmc_card *card, unsigned int from, unsigned int mmc_calc_max_discard(struct mmc_card *card); int mmc_set_blocklen(struct mmc_card *card, unsigned int blocklen); -int mmc_set_blockcount(struct mmc_card *card, unsigned int blockcount, - bool is_rel_write); int __mmc_claim_host(struct mmc_host *host, struct mmc_ctx *ctx, atomic_t *abort); diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 55997cf84b39..da892a599524 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1181,6 +1181,9 @@ static int mmc_select_hs400(struct mmc_card *card) if (err) goto out_err; + if (host->ops->hs400_prepare_ddr) + host->ops->hs400_prepare_ddr(host); + /* Switch card to DDR */ err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_BUS_WIDTH, @@ -2011,12 +2014,6 @@ static int _mmc_suspend(struct mmc_host *host, bool is_suspend) if (mmc_card_suspended(host->card)) goto out; - if (mmc_card_doing_bkops(host->card)) { - err = mmc_stop_bkops(host->card); - if (err) - goto out; - } - err = mmc_flush_cache(host->card); if (err) goto out; diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c index 873b2aa0c155..9054329fe903 100644 --- a/drivers/mmc/core/mmc_ops.c +++ b/drivers/mmc/core/mmc_ops.c @@ -802,12 +802,6 @@ static int mmc_send_hpi_cmd(struct mmc_card *card, u32 *status) unsigned int opcode; int err; - if (!card->ext_csd.hpi) { - pr_warn("%s: Card didn't support HPI command\n", - mmc_hostname(card->host)); - return -EINVAL; - } - opcode = card->ext_csd.hpi_cmd; if (opcode == MMC_STOP_TRANSMISSION) cmd.flags = MMC_RSP_R1B | MMC_CMD_AC; @@ -897,34 +891,6 @@ int mmc_can_ext_csd(struct mmc_card *card) return (card && card->csd.mmca_vsn > CSD_SPEC_VER_3); } -/** - * mmc_stop_bkops - stop ongoing BKOPS - * @card: MMC card to check BKOPS - * - * Send HPI command to stop ongoing background operations to - * allow rapid servicing of foreground operations, e.g. read/ - * writes. Wait until the card comes out of the programming state - * to avoid errors in servicing read/write requests. - */ -int mmc_stop_bkops(struct mmc_card *card) -{ - int err = 0; - - err = mmc_interrupt_hpi(card); - - /* - * If err is EINVAL, we can't issue an HPI. - * It should complete the BKOPS. - */ - if (!err || (err == -EINVAL)) { - mmc_card_clr_doing_bkops(card); - mmc_retune_release(card->host); - err = 0; - } - - return err; -} - static int mmc_read_bkops_status(struct mmc_card *card) { int err; @@ -941,22 +907,17 @@ static int mmc_read_bkops_status(struct mmc_card *card) } /** - * mmc_start_bkops - start BKOPS for supported cards - * @card: MMC card to start BKOPS - * @from_exception: A flag to indicate if this function was - * called due to an exception raised by the card + * mmc_run_bkops - Run BKOPS for supported cards + * @card: MMC card to run BKOPS for * - * Start background operations whenever requested. - * When the urgent BKOPS bit is set in a R1 command response - * then background operations should be started immediately. + * Run background operations synchronously for cards having manual BKOPS + * enabled and in case it reports urgent BKOPS level. */ -void mmc_start_bkops(struct mmc_card *card, bool from_exception) +void mmc_run_bkops(struct mmc_card *card) { int err; - int timeout; - bool use_busy_signal; - if (!card->ext_csd.man_bkops_en || mmc_card_doing_bkops(card)) + if (!card->ext_csd.man_bkops_en) return; err = mmc_read_bkops_status(card); @@ -966,44 +927,26 @@ void mmc_start_bkops(struct mmc_card *card, bool from_exception) return; } - if (!card->ext_csd.raw_bkops_status) - return; - - if (card->ext_csd.raw_bkops_status < EXT_CSD_BKOPS_LEVEL_2 && - from_exception) + if (!card->ext_csd.raw_bkops_status || + card->ext_csd.raw_bkops_status < EXT_CSD_BKOPS_LEVEL_2) return; - if (card->ext_csd.raw_bkops_status >= EXT_CSD_BKOPS_LEVEL_2) { - timeout = MMC_OPS_TIMEOUT_MS; - use_busy_signal = true; - } else { - timeout = 0; - use_busy_signal = false; - } - mmc_retune_hold(card->host); - err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, - EXT_CSD_BKOPS_START, 1, timeout, 0, - use_busy_signal, true, false); - if (err) { + /* + * For urgent BKOPS status, LEVEL_2 and higher, let's execute + * synchronously. Future wise, we may consider to start BKOPS, for less + * urgent levels by using an asynchronous background task, when idle. + */ + err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, + EXT_CSD_BKOPS_START, 1, MMC_OPS_TIMEOUT_MS); + if (err) pr_warn("%s: Error %d starting bkops\n", mmc_hostname(card->host), err); - mmc_retune_release(card->host); - return; - } - /* - * For urgent bkops status (LEVEL_2 and more) - * bkops executed synchronously, otherwise - * the operation is in progress - */ - if (!use_busy_signal) - mmc_card_set_doing_bkops(card); - else - mmc_retune_release(card->host); + mmc_retune_release(card->host); } -EXPORT_SYMBOL(mmc_start_bkops); +EXPORT_SYMBOL(mmc_run_bkops); /* * Flush the cache to the non-volatile storage. diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h index a1390d486381..018a5e3f66d6 100644 --- a/drivers/mmc/core/mmc_ops.h +++ b/drivers/mmc/core/mmc_ops.h @@ -40,8 +40,7 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value, bool use_busy_signal, bool send_status, bool retry_crc_err); int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value, unsigned int timeout_ms); -int mmc_stop_bkops(struct mmc_card *card); -void mmc_start_bkops(struct mmc_card *card, bool from_exception); +void mmc_run_bkops(struct mmc_card *card); int mmc_flush_cache(struct mmc_card *card); int mmc_cmdq_enable(struct mmc_card *card); int mmc_cmdq_disable(struct mmc_card *card); diff --git a/drivers/mmc/core/mmc_test.c b/drivers/mmc/core/mmc_test.c index ef18daeaa4cc..eabb1cab1765 100644 --- a/drivers/mmc/core/mmc_test.c +++ b/drivers/mmc/core/mmc_test.c @@ -3145,17 +3145,7 @@ static int mtf_testlist_show(struct seq_file *sf, void *data) return 0; } -static int mtf_testlist_open(struct inode *inode, struct file *file) -{ - return single_open(file, mtf_testlist_show, inode->i_private); -} - -static const struct file_operations mmc_test_fops_testlist = { - .open = mtf_testlist_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(mtf_testlist); static void mmc_test_free_dbgfs_file(struct mmc_card *card) { @@ -3216,7 +3206,7 @@ static int mmc_test_register_dbgfs_file(struct mmc_card *card) goto err; ret = __mmc_test_register_dbgfs_file(card, "testlist", S_IRUGO, - &mmc_test_fops_testlist); + &mtf_testlist_fops); if (ret) goto err; diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 6edffeed9953..35cc138b096d 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -89,9 +89,9 @@ void mmc_cqe_recovery_notifier(struct mmc_request *mrq) struct mmc_queue *mq = q->queuedata; unsigned long flags; - spin_lock_irqsave(q->queue_lock, flags); + spin_lock_irqsave(&mq->lock, flags); __mmc_cqe_recovery_notifier(mq); - spin_unlock_irqrestore(q->queue_lock, flags); + spin_unlock_irqrestore(&mq->lock, flags); } static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req) @@ -128,14 +128,14 @@ static enum blk_eh_timer_return mmc_mq_timed_out(struct request *req, unsigned long flags; int ret; - spin_lock_irqsave(q->queue_lock, flags); + spin_lock_irqsave(&mq->lock, flags); if (mq->recovery_needed || !mq->use_cqe) ret = BLK_EH_RESET_TIMER; else ret = mmc_cqe_timed_out(req); - spin_unlock_irqrestore(q->queue_lock, flags); + spin_unlock_irqrestore(&mq->lock, flags); return ret; } @@ -157,9 +157,9 @@ static void mmc_mq_recovery_handler(struct work_struct *work) mq->in_recovery = false; - spin_lock_irq(q->queue_lock); + spin_lock_irq(&mq->lock); mq->recovery_needed = false; - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&mq->lock); mmc_put_card(mq->card, &mq->ctx); @@ -258,10 +258,10 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, issue_type = mmc_issue_type(mq, req); - spin_lock_irq(q->queue_lock); + spin_lock_irq(&mq->lock); if (mq->recovery_needed || mq->busy) { - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&mq->lock); return BLK_STS_RESOURCE; } @@ -269,7 +269,7 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, case MMC_ISSUE_DCMD: if (mmc_cqe_dcmd_busy(mq)) { mq->cqe_busy |= MMC_CQE_DCMD_BUSY; - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&mq->lock); return BLK_STS_RESOURCE; } break; @@ -294,7 +294,7 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, get_card = (mmc_tot_in_flight(mq) == 1); cqe_retune_ok = (mmc_cqe_qcnt(mq) == 1); - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&mq->lock); if (!(req->rq_flags & RQF_DONTPREP)) { req_to_mmc_queue_req(req)->retries = 0; @@ -328,12 +328,12 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, if (issued != MMC_REQ_STARTED) { bool put_card = false; - spin_lock_irq(q->queue_lock); + spin_lock_irq(&mq->lock); mq->in_flight[issue_type] -= 1; if (mmc_tot_in_flight(mq) == 0) put_card = true; mq->busy = false; - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&mq->lock); if (put_card) mmc_put_card(card, &mq->ctx); } else { @@ -378,14 +378,37 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) init_waitqueue_head(&mq->wait); } -static int mmc_mq_init_queue(struct mmc_queue *mq, int q_depth, - const struct blk_mq_ops *mq_ops, spinlock_t *lock) +/* Set queue depth to get a reasonable value for q->nr_requests */ +#define MMC_QUEUE_DEPTH 64 + +/** + * mmc_init_queue - initialise a queue structure. + * @mq: mmc queue + * @card: mmc card to attach this queue + * + * Initialise a MMC card request queue. + */ +int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card) { + struct mmc_host *host = card->host; int ret; + mq->card = card; + mq->use_cqe = host->cqe_enabled; + + spin_lock_init(&mq->lock); + memset(&mq->tag_set, 0, sizeof(mq->tag_set)); - mq->tag_set.ops = mq_ops; - mq->tag_set.queue_depth = q_depth; + mq->tag_set.ops = &mmc_mq_ops; + /* + * The queue depth for CQE must match the hardware because the request + * tag is used to index the hardware queue. + */ + if (mq->use_cqe) + mq->tag_set.queue_depth = + min_t(int, card->ext_csd.cmdq_depth, host->cqe_qdepth); + else + mq->tag_set.queue_depth = MMC_QUEUE_DEPTH; mq->tag_set.numa_node = NUMA_NO_NODE; mq->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING; @@ -403,68 +426,17 @@ static int mmc_mq_init_queue(struct mmc_queue *mq, int q_depth, goto free_tag_set; } - mq->queue->queue_lock = lock; mq->queue->queuedata = mq; + blk_queue_rq_timeout(mq->queue, 60 * HZ); + mmc_setup_queue(mq, card); return 0; free_tag_set: blk_mq_free_tag_set(&mq->tag_set); - return ret; } -/* Set queue depth to get a reasonable value for q->nr_requests */ -#define MMC_QUEUE_DEPTH 64 - -static int mmc_mq_init(struct mmc_queue *mq, struct mmc_card *card, - spinlock_t *lock) -{ - struct mmc_host *host = card->host; - int q_depth; - int ret; - - /* - * The queue depth for CQE must match the hardware because the request - * tag is used to index the hardware queue. - */ - if (mq->use_cqe) - q_depth = min_t(int, card->ext_csd.cmdq_depth, host->cqe_qdepth); - else - q_depth = MMC_QUEUE_DEPTH; - - ret = mmc_mq_init_queue(mq, q_depth, &mmc_mq_ops, lock); - if (ret) - return ret; - - blk_queue_rq_timeout(mq->queue, 60 * HZ); - - mmc_setup_queue(mq, card); - - return 0; -} - -/** - * mmc_init_queue - initialise a queue structure. - * @mq: mmc queue - * @card: mmc card to attach this queue - * @lock: queue lock - * @subname: partition subname - * - * Initialise a MMC card request queue. - */ -int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, - spinlock_t *lock, const char *subname) -{ - struct mmc_host *host = card->host; - - mq->card = card; - - mq->use_cqe = host->cqe_enabled; - - return mmc_mq_init(mq, card, lock); -} - void mmc_queue_suspend(struct mmc_queue *mq) { blk_mq_quiesce_queue(mq->queue); diff --git a/drivers/mmc/core/queue.h b/drivers/mmc/core/queue.h index 9bf3c9245075..fd11491ced9f 100644 --- a/drivers/mmc/core/queue.h +++ b/drivers/mmc/core/queue.h @@ -77,6 +77,7 @@ struct mmc_queue { struct blk_mq_tag_set tag_set; struct mmc_blk_data *blkdata; struct request_queue *queue; + spinlock_t lock; int in_flight[MMC_ISSUE_MAX]; unsigned int cqe_busy; #define MMC_CQE_DCMD_BUSY BIT(0) @@ -95,8 +96,7 @@ struct mmc_queue { struct work_struct complete_work; }; -extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *, - const char *); +extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *); extern void mmc_cleanup_queue(struct mmc_queue *); extern void mmc_queue_suspend(struct mmc_queue *); extern void mmc_queue_resume(struct mmc_queue *); diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c index 86803a3a04dc..319ccd93383d 100644 --- a/drivers/mmc/core/slot-gpio.c +++ b/drivers/mmc/core/slot-gpio.c @@ -9,7 +9,6 @@ */ #include <linux/err.h> -#include <linux/gpio.h> #include <linux/gpio/consumer.h> #include <linux/interrupt.h> #include <linux/jiffies.h> @@ -27,8 +26,8 @@ struct mmc_gpio { bool override_cd_active_level; irqreturn_t (*cd_gpio_isr)(int irq, void *dev_id); char *ro_label; + char *cd_label; u32 cd_debounce_delay_ms; - char cd_label[]; }; static irqreturn_t mmc_gpio_cd_irqt(int irq, void *dev_id) @@ -45,15 +44,19 @@ static irqreturn_t mmc_gpio_cd_irqt(int irq, void *dev_id) int mmc_gpio_alloc(struct mmc_host *host) { - size_t len = strlen(dev_name(host->parent)) + 4; struct mmc_gpio *ctx = devm_kzalloc(host->parent, - sizeof(*ctx) + 2 * len, GFP_KERNEL); + sizeof(*ctx), GFP_KERNEL); if (ctx) { - ctx->ro_label = ctx->cd_label + len; ctx->cd_debounce_delay_ms = 200; - snprintf(ctx->cd_label, len, "%s cd", dev_name(host->parent)); - snprintf(ctx->ro_label, len, "%s ro", dev_name(host->parent)); + ctx->cd_label = devm_kasprintf(host->parent, GFP_KERNEL, + "%s cd", dev_name(host->parent)); + if (!ctx->cd_label) + return -ENOMEM; + ctx->ro_label = devm_kasprintf(host->parent, GFP_KERNEL, + "%s ro", dev_name(host->parent)); + if (!ctx->ro_label) + return -ENOMEM; host->slot.handler_priv = ctx; host->slot.cd_irq = -EINVAL; } @@ -98,36 +101,6 @@ int mmc_gpio_get_cd(struct mmc_host *host) } EXPORT_SYMBOL(mmc_gpio_get_cd); -/** - * mmc_gpio_request_ro - request a gpio for write-protection - * @host: mmc host - * @gpio: gpio number requested - * - * As devm_* managed functions are used in mmc_gpio_request_ro(), client - * drivers do not need to worry about freeing up memory. - * - * Returns zero on success, else an error. - */ -int mmc_gpio_request_ro(struct mmc_host *host, unsigned int gpio) -{ - struct mmc_gpio *ctx = host->slot.handler_priv; - int ret; - - if (!gpio_is_valid(gpio)) - return -EINVAL; - - ret = devm_gpio_request_one(host->parent, gpio, GPIOF_DIR_IN, - ctx->ro_label); - if (ret < 0) - return ret; - - ctx->override_ro_active_level = true; - ctx->ro_gpio = gpio_to_desc(gpio); - - return 0; -} -EXPORT_SYMBOL(mmc_gpio_request_ro); - void mmc_gpiod_request_cd_irq(struct mmc_host *host) { struct mmc_gpio *ctx = host->slot.handler_priv; @@ -197,50 +170,6 @@ void mmc_gpio_set_cd_isr(struct mmc_host *host, EXPORT_SYMBOL(mmc_gpio_set_cd_isr); /** - * mmc_gpio_request_cd - request a gpio for card-detection - * @host: mmc host - * @gpio: gpio number requested - * @debounce: debounce time in microseconds - * - * As devm_* managed functions are used in mmc_gpio_request_cd(), client - * drivers do not need to worry about freeing up memory. - * - * If GPIO debouncing is desired, set the debounce parameter to a non-zero - * value. The caller is responsible for ensuring that the GPIO driver associated - * with the GPIO supports debouncing, otherwise an error will be returned. - * - * Returns zero on success, else an error. - */ -int mmc_gpio_request_cd(struct mmc_host *host, unsigned int gpio, - unsigned int debounce) -{ - struct mmc_gpio *ctx = host->slot.handler_priv; - int ret; - - ret = devm_gpio_request_one(host->parent, gpio, GPIOF_DIR_IN, - ctx->cd_label); - if (ret < 0) - /* - * don't bother freeing memory. It might still get used by other - * slot functions, in any case it will be freed, when the device - * is destroyed. - */ - return ret; - - if (debounce) { - ret = gpio_set_debounce(gpio, debounce); - if (ret < 0) - return ret; - } - - ctx->override_cd_active_level = true; - ctx->cd_gpio = gpio_to_desc(gpio); - - return 0; -} -EXPORT_SYMBOL(mmc_gpio_request_cd); - -/** * mmc_gpiod_request_cd - request a gpio descriptor for card-detection * @host: mmc host * @con_id: function within the GPIO consumer @@ -250,8 +179,7 @@ EXPORT_SYMBOL(mmc_gpio_request_cd); * @gpio_invert: will return whether the GPIO line is inverted or not, set * to NULL to ignore * - * Use this function in place of mmc_gpio_request_cd() to use the GPIO - * descriptor API. Note that it must be called prior to mmc_add_host() + * Note that this must be called prior to mmc_add_host() * otherwise the caller must also call mmc_gpiod_request_cd_irq(). * * Returns zero on success, else an error. @@ -302,9 +230,6 @@ EXPORT_SYMBOL(mmc_can_gpio_cd); * @gpio_invert: will return whether the GPIO line is inverted or not, * set to NULL to ignore * - * Use this function in place of mmc_gpio_request_ro() to use the GPIO - * descriptor API. - * * Returns zero on success, else an error. */ int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id, diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 1b58739d9744..e26b8145efb3 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -441,6 +441,13 @@ config MMC_WBSD If unsure, say N. +config MMC_ALCOR + tristate "Alcor Micro/Alcor Link SD/MMC controller" + depends on MISC_ALCOR_PCI + help + Say Y here to include driver code to support SD/MMC card interface + of Alcor Micro PCI-E card reader + config MMC_AU1X tristate "Alchemy AU1XX0 MMC Card Interface support" depends on MIPS_ALCHEMY @@ -646,13 +653,14 @@ config MMC_SDHI_SYS_DMAC config MMC_SDHI_INTERNAL_DMAC tristate "DMA for SDHI SD/SDIO controllers using on-chip bus mastering" - depends on ARM64 || ARCH_R8A77470 || COMPILE_TEST + depends on ARM64 || ARCH_R7S9210 || ARCH_R8A77470 || COMPILE_TEST depends on MMC_SDHI - default MMC_SDHI if (ARM64 || ARCH_R8A77470) + default MMC_SDHI if (ARM64 || ARCH_R7S9210 || ARCH_R8A77470) help This provides DMA support for SDHI SD/SDIO controllers using on-chip bus mastering. This supports the controllers - found in arm64 based SoCs. + found in arm64 based SoCs. This controller is also found in + some RZ family SoCs. config MMC_UNIPHIER tristate "UniPhier SD/eMMC Host Controller support" @@ -969,6 +977,8 @@ config MMC_SDHCI_XENON config MMC_SDHCI_OMAP tristate "TI SDHCI Controller Support" depends on MMC_SDHCI_PLTFM && OF + select THERMAL + select TI_SOC_THERMAL help This selects the Secure Digital Host Controller Interface (SDHCI) support present in TI's DRA7 SOCs. The controller supports @@ -977,3 +987,15 @@ config MMC_SDHCI_OMAP If you have a controller with this interface, say Y or M here. If unsure, say N. + +config MMC_SDHCI_AM654 + tristate "Support for the SDHCI Controller in TI's AM654 SOCs" + depends on MMC_SDHCI_PLTFM && OF + help + This selects the Secure Digital Host Controller Interface (SDHCI) + support present in TI's AM654 SOCs. The controller supports + SD/MMC/SDIO devices. + + If you have a controller with this interface, say Y or M here. + + If unsure, say N. diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile index 720d37777098..73578718f119 100644 --- a/drivers/mmc/host/Makefile +++ b/drivers/mmc/host/Makefile @@ -22,8 +22,10 @@ obj-$(CONFIG_MMC_SDHCI_S3C) += sdhci-s3c.o obj-$(CONFIG_MMC_SDHCI_SIRF) += sdhci-sirf.o obj-$(CONFIG_MMC_SDHCI_F_SDH30) += sdhci_f_sdh30.o obj-$(CONFIG_MMC_SDHCI_SPEAR) += sdhci-spear.o +obj-$(CONFIG_MMC_SDHCI_AM654) += sdhci_am654.o obj-$(CONFIG_MMC_WBSD) += wbsd.o obj-$(CONFIG_MMC_AU1X) += au1xmmc.o +obj-$(CONFIG_MMC_ALCOR) += alcor.o obj-$(CONFIG_MMC_MTK) += mtk-sd.o obj-$(CONFIG_MMC_OMAP) += omap.o obj-$(CONFIG_MMC_OMAP_HS) += omap_hsmmc.o diff --git a/drivers/mmc/host/alcor.c b/drivers/mmc/host/alcor.c new file mode 100644 index 000000000000..c712b7deb3a9 --- /dev/null +++ b/drivers/mmc/host/alcor.c @@ -0,0 +1,1162 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2018 Oleksij Rempel <linux@rempel-privat.de> + * + * Driver for Alcor Micro AU6601 and AU6621 controllers + */ + +/* Note: this driver was created without any documentation. Based + * on sniffing, testing and in some cases mimic of original driver. + * As soon as some one with documentation or more experience in SD/MMC, or + * reverse engineering then me, please review this driver and question every + * thing what I did. 2018 Oleksij Rempel <linux@rempel-privat.de> + */ + +#include <linux/delay.h> +#include <linux/pci.h> +#include <linux/module.h> +#include <linux/io.h> +#include <linux/pm.h> +#include <linux/irq.h> +#include <linux/interrupt.h> +#include <linux/platform_device.h> + +#include <linux/mmc/host.h> +#include <linux/mmc/mmc.h> + +#include <linux/alcor_pci.h> + +enum alcor_cookie { + COOKIE_UNMAPPED, + COOKIE_PRE_MAPPED, + COOKIE_MAPPED, +}; + +struct alcor_pll_conf { + unsigned int clk_src_freq; + unsigned int clk_src_reg; + unsigned int min_div; + unsigned int max_div; +}; + +struct alcor_sdmmc_host { + struct device *dev; + struct alcor_pci_priv *alcor_pci; + + struct mmc_host *mmc; + struct mmc_request *mrq; + struct mmc_command *cmd; + struct mmc_data *data; + unsigned int dma_on:1; + unsigned int early_data:1; + + struct mutex cmd_mutex; + + struct delayed_work timeout_work; + + struct sg_mapping_iter sg_miter; /* SG state for PIO */ + struct scatterlist *sg; + unsigned int blocks; /* remaining PIO blocks */ + int sg_count; + + u32 irq_status_sd; + unsigned char cur_power_mode; +}; + +static const struct alcor_pll_conf alcor_pll_cfg[] = { + /* MHZ, CLK src, max div, min div */ + { 31250000, AU6601_CLK_31_25_MHZ, 1, 511}, + { 48000000, AU6601_CLK_48_MHZ, 1, 511}, + {125000000, AU6601_CLK_125_MHZ, 1, 511}, + {384000000, AU6601_CLK_384_MHZ, 1, 511}, +}; + +static inline void alcor_rmw8(struct alcor_sdmmc_host *host, unsigned int addr, + u8 clear, u8 set) +{ + struct alcor_pci_priv *priv = host->alcor_pci; + u32 var; + + var = alcor_read8(priv, addr); + var &= ~clear; + var |= set; + alcor_write8(priv, var, addr); +} + +/* As soon as irqs are masked, some status updates may be missed. + * Use this with care. + */ +static inline void alcor_mask_sd_irqs(struct alcor_sdmmc_host *host) +{ + struct alcor_pci_priv *priv = host->alcor_pci; + + alcor_write32(priv, 0, AU6601_REG_INT_ENABLE); +} + +static inline void alcor_unmask_sd_irqs(struct alcor_sdmmc_host *host) +{ + struct alcor_pci_priv *priv = host->alcor_pci; + + alcor_write32(priv, AU6601_INT_CMD_MASK | AU6601_INT_DATA_MASK | + AU6601_INT_CARD_INSERT | AU6601_INT_CARD_REMOVE | + AU6601_INT_OVER_CURRENT_ERR, + AU6601_REG_INT_ENABLE); +} + +static void alcor_reset(struct alcor_sdmmc_host *host, u8 val) +{ + struct alcor_pci_priv *priv = host->alcor_pci; + int i; + + alcor_write8(priv, val | AU6601_BUF_CTRL_RESET, + AU6601_REG_SW_RESET); + for (i = 0; i < 100; i++) { + if (!(alcor_read8(priv, AU6601_REG_SW_RESET) & val)) + return; + udelay(50); + } + dev_err(host->dev, "%s: timeout\n", __func__); +} + +static void alcor_data_set_dma(struct alcor_sdmmc_host *host) +{ + struct alcor_pci_priv *priv = host->alcor_pci; + u32 addr; + + if (!host->sg_count) + return; + + if (!host->sg) { + dev_err(host->dev, "have blocks, but no SG\n"); + return; + } + + if (!sg_dma_len(host->sg)) { + dev_err(host->dev, "DMA SG len == 0\n"); + return; + } + + + addr = (u32)sg_dma_address(host->sg); + + alcor_write32(priv, addr, AU6601_REG_SDMA_ADDR); + host->sg = sg_next(host->sg); + host->sg_count--; +} + +static void alcor_trigger_data_transfer(struct alcor_sdmmc_host *host, + bool early) +{ + struct alcor_pci_priv *priv = host->alcor_pci; + struct mmc_data *data = host->data; + u8 ctrl = 0; + + if (data->flags & MMC_DATA_WRITE) + ctrl |= AU6601_DATA_WRITE; + + if (data->host_cookie == COOKIE_MAPPED) { + if (host->early_data) { + host->early_data = false; + return; + } + + host->early_data = early; + + alcor_data_set_dma(host); + ctrl |= AU6601_DATA_DMA_MODE; + host->dma_on = 1; + alcor_write32(priv, data->sg_count * 0x1000, + AU6601_REG_BLOCK_SIZE); + } else { + alcor_write32(priv, data->blksz, AU6601_REG_BLOCK_SIZE); + } + + alcor_write8(priv, ctrl | AU6601_DATA_START_XFER, + AU6601_DATA_XFER_CTRL); +} + +static void alcor_trf_block_pio(struct alcor_sdmmc_host *host, bool read) +{ + struct alcor_pci_priv *priv = host->alcor_pci; + size_t blksize, len; + u8 *buf; + + if (!host->blocks) + return; + + if (host->dma_on) { + dev_err(host->dev, "configured DMA but got PIO request.\n"); + return; + } + + if (!!(host->data->flags & MMC_DATA_READ) != read) { + dev_err(host->dev, "got unexpected direction %i != %i\n", + !!(host->data->flags & MMC_DATA_READ), read); + } + + if (!sg_miter_next(&host->sg_miter)) + return; + + blksize = host->data->blksz; + len = min(host->sg_miter.length, blksize); + + dev_dbg(host->dev, "PIO, %s block size: 0x%zx\n", + read ? "read" : "write", blksize); + + host->sg_miter.consumed = len; + host->blocks--; + + buf = host->sg_miter.addr; + + if (read) + ioread32_rep(priv->iobase + AU6601_REG_BUFFER, buf, len >> 2); + else + iowrite32_rep(priv->iobase + AU6601_REG_BUFFER, buf, len >> 2); + + sg_miter_stop(&host->sg_miter); +} + +static void alcor_prepare_sg_miter(struct alcor_sdmmc_host *host) +{ + unsigned int flags = SG_MITER_ATOMIC; + struct mmc_data *data = host->data; + + if (data->flags & MMC_DATA_READ) + flags |= SG_MITER_TO_SG; + else + flags |= SG_MITER_FROM_SG; + sg_miter_start(&host->sg_miter, data->sg, data->sg_len, flags); +} + +static void alcor_prepare_data(struct alcor_sdmmc_host *host, + struct mmc_command *cmd) +{ + struct mmc_data *data = cmd->data; + + if (!data) + return; + + + host->data = data; + host->data->bytes_xfered = 0; + host->blocks = data->blocks; + host->sg = data->sg; + host->sg_count = data->sg_count; + dev_dbg(host->dev, "prepare DATA: sg %i, blocks: %i\n", + host->sg_count, host->blocks); + + if (data->host_cookie != COOKIE_MAPPED) + alcor_prepare_sg_miter(host); + + alcor_trigger_data_transfer(host, true); +} + +static void alcor_send_cmd(struct alcor_sdmmc_host *host, + struct mmc_command *cmd, bool set_timeout) +{ + struct alcor_pci_priv *priv = host->alcor_pci; + unsigned long timeout = 0; + u8 ctrl = 0; + + host->cmd = cmd; + alcor_prepare_data(host, cmd); + + dev_dbg(host->dev, "send CMD. opcode: 0x%02x, arg; 0x%08x\n", + cmd->opcode, cmd->arg); + alcor_write8(priv, cmd->opcode | 0x40, AU6601_REG_CMD_OPCODE); + alcor_write32be(priv, cmd->arg, AU6601_REG_CMD_ARG); + + switch (mmc_resp_type(cmd)) { + case MMC_RSP_NONE: + ctrl = AU6601_CMD_NO_RESP; + break; + case MMC_RSP_R1: + ctrl = AU6601_CMD_6_BYTE_CRC; + break; + case MMC_RSP_R1B: + ctrl = AU6601_CMD_6_BYTE_CRC | AU6601_CMD_STOP_WAIT_RDY; + break; + case MMC_RSP_R2: + ctrl = AU6601_CMD_17_BYTE_CRC; + break; + case MMC_RSP_R3: + ctrl = AU6601_CMD_6_BYTE_WO_CRC; + break; + default: + dev_err(host->dev, "%s: cmd->flag (0x%02x) is not valid\n", + mmc_hostname(host->mmc), mmc_resp_type(cmd)); + break; + } + + if (set_timeout) { + if (!cmd->data && cmd->busy_timeout) + timeout = cmd->busy_timeout; + else + timeout = 10000; + + schedule_delayed_work(&host->timeout_work, + msecs_to_jiffies(timeout)); + } + + dev_dbg(host->dev, "xfer ctrl: 0x%02x; timeout: %lu\n", ctrl, timeout); + alcor_write8(priv, ctrl | AU6601_CMD_START_XFER, + AU6601_CMD_XFER_CTRL); +} + +static void alcor_request_complete(struct alcor_sdmmc_host *host, + bool cancel_timeout) +{ + struct mmc_request *mrq; + + /* + * If this work gets rescheduled while running, it will + * be run again afterwards but without any active request. + */ + if (!host->mrq) + return; + + if (cancel_timeout) + cancel_delayed_work(&host->timeout_work); + + mrq = host->mrq; + + host->mrq = NULL; + host->cmd = NULL; + host->data = NULL; + host->dma_on = 0; + + mmc_request_done(host->mmc, mrq); +} + +static void alcor_finish_data(struct alcor_sdmmc_host *host) +{ + struct mmc_data *data; + + data = host->data; + host->data = NULL; + host->dma_on = 0; + + /* + * The specification states that the block count register must + * be updated, but it does not specify at what point in the + * data flow. That makes the register entirely useless to read + * back so we have to assume that nothing made it to the card + * in the event of an error. + */ + if (data->error) + data->bytes_xfered = 0; + else + data->bytes_xfered = data->blksz * data->blocks; + + /* + * Need to send CMD12 if - + * a) open-ended multiblock transfer (no CMD23) + * b) error in multiblock transfer + */ + if (data->stop && + (data->error || + !host->mrq->sbc)) { + + /* + * The controller needs a reset of internal state machines + * upon error conditions. + */ + if (data->error) + alcor_reset(host, AU6601_RESET_CMD | AU6601_RESET_DATA); + + alcor_unmask_sd_irqs(host); + alcor_send_cmd(host, data->stop, false); + return; + } + + alcor_request_complete(host, 1); +} + +static void alcor_err_irq(struct alcor_sdmmc_host *host, u32 intmask) +{ + dev_dbg(host->dev, "ERR IRQ %x\n", intmask); + + if (host->cmd) { + if (intmask & AU6601_INT_CMD_TIMEOUT_ERR) + host->cmd->error = -ETIMEDOUT; + else + host->cmd->error = -EILSEQ; + } + + if (host->data) { + if (intmask & AU6601_INT_DATA_TIMEOUT_ERR) + host->data->error = -ETIMEDOUT; + else + host->data->error = -EILSEQ; + + host->data->bytes_xfered = 0; + } + + alcor_reset(host, AU6601_RESET_CMD | AU6601_RESET_DATA); + alcor_request_complete(host, 1); +} + +static int alcor_cmd_irq_done(struct alcor_sdmmc_host *host, u32 intmask) +{ + struct alcor_pci_priv *priv = host->alcor_pci; + + intmask &= AU6601_INT_CMD_END; + + if (!intmask) + return true; + + /* got CMD_END but no CMD is in progress, wake thread an process the + * error + */ + if (!host->cmd) + return false; + + if (host->cmd->flags & MMC_RSP_PRESENT) { + struct mmc_command *cmd = host->cmd; + + cmd->resp[0] = alcor_read32be(priv, AU6601_REG_CMD_RSP0); + dev_dbg(host->dev, "RSP0: 0x%04x\n", cmd->resp[0]); + if (host->cmd->flags & MMC_RSP_136) { + cmd->resp[1] = + alcor_read32be(priv, AU6601_REG_CMD_RSP1); + cmd->resp[2] = + alcor_read32be(priv, AU6601_REG_CMD_RSP2); + cmd->resp[3] = + alcor_read32be(priv, AU6601_REG_CMD_RSP3); + dev_dbg(host->dev, "RSP1,2,3: 0x%04x 0x%04x 0x%04x\n", + cmd->resp[1], cmd->resp[2], cmd->resp[3]); + } + + } + + host->cmd->error = 0; + + /* Processed actual command. */ + if (!host->data) + return false; + + alcor_trigger_data_transfer(host, false); + host->cmd = NULL; + return true; +} + +static void alcor_cmd_irq_thread(struct alcor_sdmmc_host *host, u32 intmask) +{ + intmask &= AU6601_INT_CMD_END; + + if (!intmask) + return; + + if (!host->cmd && intmask & AU6601_INT_CMD_END) { + dev_dbg(host->dev, "Got command interrupt 0x%08x even though no command operation was in progress.\n", + intmask); + } + + /* Processed actual command. */ + if (!host->data) + alcor_request_complete(host, 1); + else + alcor_trigger_data_transfer(host, false); + host->cmd = NULL; +} + +static int alcor_data_irq_done(struct alcor_sdmmc_host *host, u32 intmask) +{ + u32 tmp; + + intmask &= AU6601_INT_DATA_MASK; + + /* nothing here to do */ + if (!intmask) + return 1; + + /* we was too fast and got DATA_END after it was processed? + * lets ignore it for now. + */ + if (!host->data && intmask == AU6601_INT_DATA_END) + return 1; + + /* looks like an error, so lets handle it. */ + if (!host->data) + return 0; + + tmp = intmask & (AU6601_INT_READ_BUF_RDY | AU6601_INT_WRITE_BUF_RDY + | AU6601_INT_DMA_END); + switch (tmp) { + case 0: + break; + case AU6601_INT_READ_BUF_RDY: + alcor_trf_block_pio(host, true); + if (!host->blocks) + break; + alcor_trigger_data_transfer(host, false); + return 1; + case AU6601_INT_WRITE_BUF_RDY: + alcor_trf_block_pio(host, false); + if (!host->blocks) + break; + alcor_trigger_data_transfer(host, false); + return 1; + case AU6601_INT_DMA_END: + if (!host->sg_count) + break; + + alcor_data_set_dma(host); + break; + default: + dev_err(host->dev, "Got READ_BUF_RDY and WRITE_BUF_RDY at same time\n"); + break; + } + + if (intmask & AU6601_INT_DATA_END) + return 0; + + return 1; +} + +static void alcor_data_irq_thread(struct alcor_sdmmc_host *host, u32 intmask) +{ + intmask &= AU6601_INT_DATA_MASK; + + if (!intmask) + return; + + if (!host->data) { + dev_dbg(host->dev, "Got data interrupt 0x%08x even though no data operation was in progress.\n", + intmask); + alcor_reset(host, AU6601_RESET_DATA); + return; + } + + if (alcor_data_irq_done(host, intmask)) + return; + + if ((intmask & AU6601_INT_DATA_END) || !host->blocks || + (host->dma_on && !host->sg_count)) + alcor_finish_data(host); +} + +static void alcor_cd_irq(struct alcor_sdmmc_host *host, u32 intmask) +{ + dev_dbg(host->dev, "card %s\n", + intmask & AU6601_INT_CARD_REMOVE ? "removed" : "inserted"); + + if (host->mrq) { + dev_dbg(host->dev, "cancel all pending tasks.\n"); + + if (host->data) + host->data->error = -ENOMEDIUM; + + if (host->cmd) + host->cmd->error = -ENOMEDIUM; + else + host->mrq->cmd->error = -ENOMEDIUM; + + alcor_request_complete(host, 1); + } + + mmc_detect_change(host->mmc, msecs_to_jiffies(1)); +} + +static irqreturn_t alcor_irq_thread(int irq, void *d) +{ + struct alcor_sdmmc_host *host = d; + irqreturn_t ret = IRQ_HANDLED; + u32 intmask, tmp; + + mutex_lock(&host->cmd_mutex); + + intmask = host->irq_status_sd; + + /* some thing bad */ + if (unlikely(!intmask || AU6601_INT_ALL_MASK == intmask)) { + dev_dbg(host->dev, "unexpected IRQ: 0x%04x\n", intmask); + ret = IRQ_NONE; + goto exit; + } + + tmp = intmask & (AU6601_INT_CMD_MASK | AU6601_INT_DATA_MASK); + if (tmp) { + if (tmp & AU6601_INT_ERROR_MASK) + alcor_err_irq(host, tmp); + else { + alcor_cmd_irq_thread(host, tmp); + alcor_data_irq_thread(host, tmp); + } + intmask &= ~(AU6601_INT_CMD_MASK | AU6601_INT_DATA_MASK); + } + + if (intmask & (AU6601_INT_CARD_INSERT | AU6601_INT_CARD_REMOVE)) { + alcor_cd_irq(host, intmask); + intmask &= ~(AU6601_INT_CARD_INSERT | AU6601_INT_CARD_REMOVE); + } + + if (intmask & AU6601_INT_OVER_CURRENT_ERR) { + dev_warn(host->dev, + "warning: over current detected!\n"); + intmask &= ~AU6601_INT_OVER_CURRENT_ERR; + } + + if (intmask) + dev_dbg(host->dev, "got not handled IRQ: 0x%04x\n", intmask); + +exit: + mutex_unlock(&host->cmd_mutex); + alcor_unmask_sd_irqs(host); + return ret; +} + + +static irqreturn_t alcor_irq(int irq, void *d) +{ + struct alcor_sdmmc_host *host = d; + struct alcor_pci_priv *priv = host->alcor_pci; + u32 status, tmp; + irqreturn_t ret; + int cmd_done, data_done; + + status = alcor_read32(priv, AU6601_REG_INT_STATUS); + if (!status) + return IRQ_NONE; + + alcor_write32(priv, status, AU6601_REG_INT_STATUS); + + tmp = status & (AU6601_INT_READ_BUF_RDY | AU6601_INT_WRITE_BUF_RDY + | AU6601_INT_DATA_END | AU6601_INT_DMA_END + | AU6601_INT_CMD_END); + if (tmp == status) { + cmd_done = alcor_cmd_irq_done(host, tmp); + data_done = alcor_data_irq_done(host, tmp); + /* use fast path for simple tasks */ + if (cmd_done && data_done) { + ret = IRQ_HANDLED; + goto alcor_irq_done; + } + } + + host->irq_status_sd = status; + ret = IRQ_WAKE_THREAD; + alcor_mask_sd_irqs(host); +alcor_irq_done: + return ret; +} + +static void alcor_set_clock(struct alcor_sdmmc_host *host, unsigned int clock) +{ + struct alcor_pci_priv *priv = host->alcor_pci; + int i, diff = 0x7fffffff, tmp_clock = 0; + u16 clk_src = 0; + u8 clk_div = 0; + + if (clock == 0) { + alcor_write16(priv, 0, AU6601_CLK_SELECT); + return; + } + + for (i = 0; i < ARRAY_SIZE(alcor_pll_cfg); i++) { + unsigned int tmp_div, tmp_diff; + const struct alcor_pll_conf *cfg = &alcor_pll_cfg[i]; + + tmp_div = DIV_ROUND_UP(cfg->clk_src_freq, clock); + if (cfg->min_div > tmp_div || tmp_div > cfg->max_div) + continue; + + tmp_clock = DIV_ROUND_UP(cfg->clk_src_freq, tmp_div); + tmp_diff = abs(clock - tmp_clock); + + if (tmp_diff >= 0 && tmp_diff < diff) { + diff = tmp_diff; + clk_src = cfg->clk_src_reg; + clk_div = tmp_div; + } + } + + clk_src |= ((clk_div - 1) << 8); + clk_src |= AU6601_CLK_ENABLE; + + dev_dbg(host->dev, "set freq %d cal freq %d, use div %d, mod %x\n", + clock, tmp_clock, clk_div, clk_src); + + alcor_write16(priv, clk_src, AU6601_CLK_SELECT); + +} + +static void alcor_set_timing(struct mmc_host *mmc, struct mmc_ios *ios) +{ + struct alcor_sdmmc_host *host = mmc_priv(mmc); + + if (ios->timing == MMC_TIMING_LEGACY) { + alcor_rmw8(host, AU6601_CLK_DELAY, + AU6601_CLK_POSITIVE_EDGE_ALL, 0); + } else { + alcor_rmw8(host, AU6601_CLK_DELAY, + 0, AU6601_CLK_POSITIVE_EDGE_ALL); + } +} + +static void alcor_set_bus_width(struct mmc_host *mmc, struct mmc_ios *ios) +{ + struct alcor_sdmmc_host *host = mmc_priv(mmc); + struct alcor_pci_priv *priv = host->alcor_pci; + + if (ios->bus_width == MMC_BUS_WIDTH_1) { + alcor_write8(priv, 0, AU6601_REG_BUS_CTRL); + } else if (ios->bus_width == MMC_BUS_WIDTH_4) { + alcor_write8(priv, AU6601_BUS_WIDTH_4BIT, + AU6601_REG_BUS_CTRL); + } else + dev_err(host->dev, "Unknown BUS mode\n"); + +} + +static int alcor_card_busy(struct mmc_host *mmc) +{ + struct alcor_sdmmc_host *host = mmc_priv(mmc); + struct alcor_pci_priv *priv = host->alcor_pci; + u8 status; + + /* Check whether dat[0:3] low */ + status = alcor_read8(priv, AU6601_DATA_PIN_STATE); + + return !(status & AU6601_BUS_STAT_DAT_MASK); +} + +static int alcor_get_cd(struct mmc_host *mmc) +{ + struct alcor_sdmmc_host *host = mmc_priv(mmc); + struct alcor_pci_priv *priv = host->alcor_pci; + u8 detect; + + detect = alcor_read8(priv, AU6601_DETECT_STATUS) + & AU6601_DETECT_STATUS_M; + /* check if card is present then send command and data */ + return (detect == AU6601_SD_DETECTED); +} + +static int alcor_get_ro(struct mmc_host *mmc) +{ + struct alcor_sdmmc_host *host = mmc_priv(mmc); + struct alcor_pci_priv *priv = host->alcor_pci; + u8 status; + + /* get write protect pin status */ + status = alcor_read8(priv, AU6601_INTERFACE_MODE_CTRL); + + return !!(status & AU6601_SD_CARD_WP); +} + +static void alcor_request(struct mmc_host *mmc, struct mmc_request *mrq) +{ + struct alcor_sdmmc_host *host = mmc_priv(mmc); + + mutex_lock(&host->cmd_mutex); + + host->mrq = mrq; + + /* check if card is present then send command and data */ + if (alcor_get_cd(mmc)) + alcor_send_cmd(host, mrq->cmd, true); + else { + mrq->cmd->error = -ENOMEDIUM; + alcor_request_complete(host, 1); + } + + mutex_unlock(&host->cmd_mutex); +} + +static void alcor_pre_req(struct mmc_host *mmc, + struct mmc_request *mrq) +{ + struct alcor_sdmmc_host *host = mmc_priv(mmc); + struct mmc_data *data = mrq->data; + struct mmc_command *cmd = mrq->cmd; + struct scatterlist *sg; + unsigned int i, sg_len; + + if (!data || !cmd) + return; + + data->host_cookie = COOKIE_UNMAPPED; + + /* FIXME: looks like the DMA engine works only with CMD18 */ + if (cmd->opcode != 18) + return; + /* + * We don't do DMA on "complex" transfers, i.e. with + * non-word-aligned buffers or lengths. Also, we don't bother + * with all the DMA setup overhead for short transfers. + */ + if (data->blocks * data->blksz < AU6601_MAX_DMA_BLOCK_SIZE) + return; + + if (data->blksz & 3) + return; + + for_each_sg(data->sg, sg, data->sg_len, i) { + if (sg->length != AU6601_MAX_DMA_BLOCK_SIZE) + return; + } + + /* This data might be unmapped at this time */ + + sg_len = dma_map_sg(host->dev, data->sg, data->sg_len, + mmc_get_dma_dir(data)); + if (sg_len) + data->host_cookie = COOKIE_MAPPED; + + data->sg_count = sg_len; +} + +static void alcor_post_req(struct mmc_host *mmc, + struct mmc_request *mrq, + int err) +{ + struct alcor_sdmmc_host *host = mmc_priv(mmc); + struct mmc_data *data = mrq->data; + + if (!data) + return; + + if (data->host_cookie == COOKIE_MAPPED) { + dma_unmap_sg(host->dev, + data->sg, + data->sg_len, + mmc_get_dma_dir(data)); + } + + data->host_cookie = COOKIE_UNMAPPED; +} + +static void alcor_set_power_mode(struct mmc_host *mmc, struct mmc_ios *ios) +{ + struct alcor_sdmmc_host *host = mmc_priv(mmc); + struct alcor_pci_priv *priv = host->alcor_pci; + + switch (ios->power_mode) { + case MMC_POWER_OFF: + alcor_set_clock(host, ios->clock); + /* set all pins to input */ + alcor_write8(priv, 0, AU6601_OUTPUT_ENABLE); + /* turn of VDD */ + alcor_write8(priv, 0, AU6601_POWER_CONTROL); + break; + case MMC_POWER_UP: + break; + case MMC_POWER_ON: + /* This is most trickiest part. The order and timings of + * instructions seems to play important role. Any changes may + * confuse internal state engine if this HW. + * FIXME: If we will ever get access to documentation, then this + * part should be reviewed again. + */ + + /* enable SD card mode */ + alcor_write8(priv, AU6601_SD_CARD, + AU6601_ACTIVE_CTRL); + /* set signal voltage to 3.3V */ + alcor_write8(priv, 0, AU6601_OPT); + /* no documentation about clk delay, for now just try to mimic + * original driver. + */ + alcor_write8(priv, 0x20, AU6601_CLK_DELAY); + /* set BUS width to 1 bit */ + alcor_write8(priv, 0, AU6601_REG_BUS_CTRL); + /* set CLK first time */ + alcor_set_clock(host, ios->clock); + /* power on VDD */ + alcor_write8(priv, AU6601_SD_CARD, + AU6601_POWER_CONTROL); + /* wait until the CLK will get stable */ + mdelay(20); + /* set CLK again, mimic original driver. */ + alcor_set_clock(host, ios->clock); + + /* enable output */ + alcor_write8(priv, AU6601_SD_CARD, + AU6601_OUTPUT_ENABLE); + /* The clk will not work on au6621. We need to trigger data + * transfer. + */ + alcor_write8(priv, AU6601_DATA_WRITE, + AU6601_DATA_XFER_CTRL); + /* configure timeout. Not clear what exactly it means. */ + alcor_write8(priv, 0x7d, AU6601_TIME_OUT_CTRL); + mdelay(100); + break; + default: + dev_err(host->dev, "Unknown power parameter\n"); + } +} + +static void alcor_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) +{ + struct alcor_sdmmc_host *host = mmc_priv(mmc); + + mutex_lock(&host->cmd_mutex); + + dev_dbg(host->dev, "set ios. bus width: %x, power mode: %x\n", + ios->bus_width, ios->power_mode); + + if (ios->power_mode != host->cur_power_mode) { + alcor_set_power_mode(mmc, ios); + host->cur_power_mode = ios->power_mode; + } else { + alcor_set_timing(mmc, ios); + alcor_set_bus_width(mmc, ios); + alcor_set_clock(host, ios->clock); + } + + mutex_unlock(&host->cmd_mutex); +} + +static int alcor_signal_voltage_switch(struct mmc_host *mmc, + struct mmc_ios *ios) +{ + struct alcor_sdmmc_host *host = mmc_priv(mmc); + + mutex_lock(&host->cmd_mutex); + + switch (ios->signal_voltage) { + case MMC_SIGNAL_VOLTAGE_330: + alcor_rmw8(host, AU6601_OPT, AU6601_OPT_SD_18V, 0); + break; + case MMC_SIGNAL_VOLTAGE_180: + alcor_rmw8(host, AU6601_OPT, 0, AU6601_OPT_SD_18V); + break; + default: + /* No signal voltage switch required */ + break; + } + + mutex_unlock(&host->cmd_mutex); + return 0; +} + +static const struct mmc_host_ops alcor_sdc_ops = { + .card_busy = alcor_card_busy, + .get_cd = alcor_get_cd, + .get_ro = alcor_get_ro, + .post_req = alcor_post_req, + .pre_req = alcor_pre_req, + .request = alcor_request, + .set_ios = alcor_set_ios, + .start_signal_voltage_switch = alcor_signal_voltage_switch, +}; + +static void alcor_timeout_timer(struct work_struct *work) +{ + struct delayed_work *d = to_delayed_work(work); + struct alcor_sdmmc_host *host = container_of(d, struct alcor_sdmmc_host, + timeout_work); + mutex_lock(&host->cmd_mutex); + + dev_dbg(host->dev, "triggered timeout\n"); + if (host->mrq) { + dev_err(host->dev, "Timeout waiting for hardware interrupt.\n"); + + if (host->data) { + host->data->error = -ETIMEDOUT; + } else { + if (host->cmd) + host->cmd->error = -ETIMEDOUT; + else + host->mrq->cmd->error = -ETIMEDOUT; + } + + alcor_reset(host, AU6601_RESET_CMD | AU6601_RESET_DATA); + alcor_request_complete(host, 0); + } + + mmiowb(); + mutex_unlock(&host->cmd_mutex); +} + +static void alcor_hw_init(struct alcor_sdmmc_host *host) +{ + struct alcor_pci_priv *priv = host->alcor_pci; + struct alcor_dev_cfg *cfg = priv->cfg; + + /* FIXME: This part is a mimics HW init of original driver. + * If we will ever get access to documentation, then this part + * should be reviewed again. + */ + + /* reset command state engine */ + alcor_reset(host, AU6601_RESET_CMD); + + alcor_write8(priv, 0, AU6601_DMA_BOUNDARY); + /* enable sd card mode */ + alcor_write8(priv, AU6601_SD_CARD, AU6601_ACTIVE_CTRL); + + /* set BUS width to 1 bit */ + alcor_write8(priv, 0, AU6601_REG_BUS_CTRL); + + /* reset data state engine */ + alcor_reset(host, AU6601_RESET_DATA); + /* Not sure if a voodoo with AU6601_DMA_BOUNDARY is really needed */ + alcor_write8(priv, 0, AU6601_DMA_BOUNDARY); + + alcor_write8(priv, 0, AU6601_INTERFACE_MODE_CTRL); + /* not clear what we are doing here. */ + alcor_write8(priv, 0x44, AU6601_PAD_DRIVE0); + alcor_write8(priv, 0x44, AU6601_PAD_DRIVE1); + alcor_write8(priv, 0x00, AU6601_PAD_DRIVE2); + + /* for 6601 - dma_boundary; for 6621 - dma_page_cnt + * exact meaning of this register is not clear. + */ + alcor_write8(priv, cfg->dma, AU6601_DMA_BOUNDARY); + + /* make sure all pins are set to input and VDD is off */ + alcor_write8(priv, 0, AU6601_OUTPUT_ENABLE); + alcor_write8(priv, 0, AU6601_POWER_CONTROL); + + alcor_write8(priv, AU6601_DETECT_EN, AU6601_DETECT_STATUS); + /* now we should be safe to enable IRQs */ + alcor_unmask_sd_irqs(host); +} + +static void alcor_hw_uninit(struct alcor_sdmmc_host *host) +{ + struct alcor_pci_priv *priv = host->alcor_pci; + + alcor_mask_sd_irqs(host); + alcor_reset(host, AU6601_RESET_CMD | AU6601_RESET_DATA); + + alcor_write8(priv, 0, AU6601_DETECT_STATUS); + + alcor_write8(priv, 0, AU6601_OUTPUT_ENABLE); + alcor_write8(priv, 0, AU6601_POWER_CONTROL); + + alcor_write8(priv, 0, AU6601_OPT); +} + +static void alcor_init_mmc(struct alcor_sdmmc_host *host) +{ + struct mmc_host *mmc = host->mmc; + + mmc->f_min = AU6601_MIN_CLOCK; + mmc->f_max = AU6601_MAX_CLOCK; + mmc->ocr_avail = MMC_VDD_33_34; + mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_SD_HIGHSPEED + | MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 | MMC_CAP_UHS_SDR50 + | MMC_CAP_UHS_SDR104 | MMC_CAP_UHS_DDR50; + mmc->caps2 = MMC_CAP2_NO_SDIO; + mmc->ops = &alcor_sdc_ops; + + /* Hardware cannot do scatter lists */ + mmc->max_segs = AU6601_MAX_DMA_SEGMENTS; + mmc->max_seg_size = AU6601_MAX_DMA_BLOCK_SIZE; + + mmc->max_blk_size = mmc->max_seg_size; + mmc->max_blk_count = mmc->max_segs; + + mmc->max_req_size = mmc->max_seg_size * mmc->max_segs; +} + +static int alcor_pci_sdmmc_drv_probe(struct platform_device *pdev) +{ + struct alcor_pci_priv *priv = pdev->dev.platform_data; + struct mmc_host *mmc; + struct alcor_sdmmc_host *host; + int ret; + + mmc = mmc_alloc_host(sizeof(*host), &pdev->dev); + if (!mmc) { + dev_err(&pdev->dev, "Can't allocate MMC\n"); + return -ENOMEM; + } + + host = mmc_priv(mmc); + host->mmc = mmc; + host->dev = &pdev->dev; + host->cur_power_mode = MMC_POWER_UNDEFINED; + host->alcor_pci = priv; + + /* make sure irqs are disabled */ + alcor_write32(priv, 0, AU6601_REG_INT_ENABLE); + alcor_write32(priv, 0, AU6601_MS_INT_ENABLE); + + ret = devm_request_threaded_irq(&pdev->dev, priv->irq, + alcor_irq, alcor_irq_thread, IRQF_SHARED, + DRV_NAME_ALCOR_PCI_SDMMC, host); + + if (ret) { + dev_err(&pdev->dev, "Failed to get irq for data line\n"); + return ret; + } + + mutex_init(&host->cmd_mutex); + INIT_DELAYED_WORK(&host->timeout_work, alcor_timeout_timer); + + alcor_init_mmc(host); + alcor_hw_init(host); + + dev_set_drvdata(&pdev->dev, host); + mmc_add_host(mmc); + return 0; +} + +static int alcor_pci_sdmmc_drv_remove(struct platform_device *pdev) +{ + struct alcor_sdmmc_host *host = dev_get_drvdata(&pdev->dev); + + if (cancel_delayed_work_sync(&host->timeout_work)) + alcor_request_complete(host, 0); + + alcor_hw_uninit(host); + mmc_remove_host(host->mmc); + mmc_free_host(host->mmc); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int alcor_pci_sdmmc_suspend(struct device *dev) +{ + struct alcor_sdmmc_host *host = dev_get_drvdata(dev); + + if (cancel_delayed_work_sync(&host->timeout_work)) + alcor_request_complete(host, 0); + + alcor_hw_uninit(host); + + return 0; +} + +static int alcor_pci_sdmmc_resume(struct device *dev) +{ + struct alcor_sdmmc_host *host = dev_get_drvdata(dev); + + alcor_hw_init(host); + + return 0; +} +#endif /* CONFIG_PM_SLEEP */ + +static SIMPLE_DEV_PM_OPS(alcor_mmc_pm_ops, alcor_pci_sdmmc_suspend, + alcor_pci_sdmmc_resume); + +static const struct platform_device_id alcor_pci_sdmmc_ids[] = { + { + .name = DRV_NAME_ALCOR_PCI_SDMMC, + }, { + /* sentinel */ + } +}; +MODULE_DEVICE_TABLE(platform, alcor_pci_sdmmc_ids); + +static struct platform_driver alcor_pci_sdmmc_driver = { + .probe = alcor_pci_sdmmc_drv_probe, + .remove = alcor_pci_sdmmc_drv_remove, + .id_table = alcor_pci_sdmmc_ids, + .driver = { + .name = DRV_NAME_ALCOR_PCI_SDMMC, + .pm = &alcor_mmc_pm_ops + }, +}; +module_platform_driver(alcor_pci_sdmmc_driver); + +MODULE_AUTHOR("Oleksij Rempel <linux@rempel-privat.de>"); +MODULE_DESCRIPTION("PCI driver for Alcor Micro AU6601 Secure Digital Host Controller Interface"); +MODULE_LICENSE("GPL"); diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c index be53044086c7..47189f9ed4e2 100644 --- a/drivers/mmc/host/atmel-mci.c +++ b/drivers/mmc/host/atmel-mci.c @@ -446,18 +446,7 @@ static int atmci_req_show(struct seq_file *s, void *v) return 0; } -static int atmci_req_open(struct inode *inode, struct file *file) -{ - return single_open(file, atmci_req_show, inode->i_private); -} - -static const struct file_operations atmci_req_fops = { - .owner = THIS_MODULE, - .open = atmci_req_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(atmci_req); static void atmci_show_status_reg(struct seq_file *s, const char *regname, u32 value) @@ -583,18 +572,7 @@ static int atmci_regs_show(struct seq_file *s, void *v) return ret; } -static int atmci_regs_open(struct inode *inode, struct file *file) -{ - return single_open(file, atmci_regs_show, inode->i_private); -} - -static const struct file_operations atmci_regs_fops = { - .owner = THIS_MODULE, - .open = atmci_regs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(atmci_regs); static void atmci_init_debugfs(struct atmel_mci_slot *slot) { @@ -608,13 +586,14 @@ static void atmci_init_debugfs(struct atmel_mci_slot *slot) return; node = debugfs_create_file("regs", S_IRUSR, root, host, - &atmci_regs_fops); + &atmci_regs_fops); if (IS_ERR(node)) return; if (!node) goto err; - node = debugfs_create_file("req", S_IRUSR, root, slot, &atmci_req_fops); + node = debugfs_create_file("req", S_IRUSR, root, slot, + &atmci_req_fops); if (!node) goto err; @@ -1954,13 +1933,14 @@ static void atmci_tasklet_func(unsigned long priv) } atmci_request_end(host, host->mrq); - state = STATE_IDLE; + goto unlock; /* atmci_request_end() sets host->state */ break; } } while (state != prev_state); host->state = state; +unlock: spin_unlock(&host->lock); } diff --git a/drivers/mmc/host/bcm2835.c b/drivers/mmc/host/bcm2835.c index 768972af8b85..50293529d6de 100644 --- a/drivers/mmc/host/bcm2835.c +++ b/drivers/mmc/host/bcm2835.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * bcm2835 sdhost driver. * @@ -25,18 +26,6 @@ * sdhci-bcm2708.c by Broadcom * sdhci-bcm2835.c by Stephen Warren and Oleksandr Tymoshenko * sdhci.c and sdhci-pci.c by Pierre Ossman - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <linux/clk.h> #include <linux/delay.h> @@ -286,6 +275,7 @@ static void bcm2835_reset(struct mmc_host *mmc) if (host->dma_chan) dmaengine_terminate_sync(host->dma_chan); + host->dma_chan = NULL; bcm2835_reset_internal(host); } @@ -463,7 +453,7 @@ static void bcm2835_transfer_pio(struct bcm2835_host *host) static void bcm2835_prepare_dma(struct bcm2835_host *host, struct mmc_data *data) { - int len, dir_data, dir_slave; + int sg_len, dir_data, dir_slave; struct dma_async_tx_descriptor *desc = NULL; struct dma_chan *dma_chan; @@ -509,23 +499,24 @@ void bcm2835_prepare_dma(struct bcm2835_host *host, struct mmc_data *data) &host->dma_cfg_rx : &host->dma_cfg_tx); - len = dma_map_sg(dma_chan->device->dev, data->sg, data->sg_len, - dir_data); + sg_len = dma_map_sg(dma_chan->device->dev, data->sg, data->sg_len, + dir_data); + if (!sg_len) + return; - if (len > 0) { - desc = dmaengine_prep_slave_sg(dma_chan, data->sg, - len, dir_slave, - DMA_PREP_INTERRUPT | - DMA_CTRL_ACK); - } + desc = dmaengine_prep_slave_sg(dma_chan, data->sg, sg_len, dir_slave, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); - if (desc) { - desc->callback = bcm2835_dma_complete; - desc->callback_param = host; - host->dma_desc = desc; - host->dma_chan = dma_chan; - host->dma_dir = dir_data; + if (!desc) { + dma_unmap_sg(dma_chan->device->dev, data->sg, sg_len, dir_data); + return; } + + desc->callback = bcm2835_dma_complete; + desc->callback_param = host; + host->dma_desc = desc; + host->dma_chan = dma_chan; + host->dma_dir = dir_data; } static void bcm2835_start_dma(struct bcm2835_host *host) @@ -607,7 +598,7 @@ static void bcm2835_finish_request(struct bcm2835_host *host) struct dma_chan *terminate_chan = NULL; struct mmc_request *mrq; - cancel_delayed_work(&host->timeout_work); + cancel_delayed_work_sync(&host->timeout_work); mrq = host->mrq; @@ -772,6 +763,8 @@ static void bcm2835_finish_command(struct bcm2835_host *host) if (!(sdhsts & SDHSTS_CRC7_ERROR) || (host->cmd->opcode != MMC_SEND_OP_COND)) { + u32 edm, fsm; + if (sdhsts & SDHSTS_CMD_TIME_OUT) { host->cmd->error = -ETIMEDOUT; } else { @@ -780,6 +773,13 @@ static void bcm2835_finish_command(struct bcm2835_host *host) bcm2835_dumpregs(host); host->cmd->error = -EILSEQ; } + edm = readl(host->ioaddr + SDEDM); + fsm = edm & SDEDM_FSM_MASK; + if (fsm == SDEDM_FSM_READWAIT || + fsm == SDEDM_FSM_WRITESTART1) + /* Kick the FSM out of its wait */ + writel(edm | SDEDM_FORCE_DATA_MODE, + host->ioaddr + SDEDM); bcm2835_finish_request(host); return; } @@ -837,6 +837,8 @@ static void bcm2835_timeout(struct work_struct *work) dev_err(dev, "timeout waiting for hardware interrupt.\n"); bcm2835_dumpregs(host); + bcm2835_reset(host->mmc); + if (host->data) { host->data->error = -ETIMEDOUT; bcm2835_finish_data(host); @@ -1052,10 +1054,12 @@ static void bcm2835_dma_complete_work(struct work_struct *work) { struct bcm2835_host *host = container_of(work, struct bcm2835_host, dma_work); - struct mmc_data *data = host->data; + struct mmc_data *data; mutex_lock(&host->mutex); + data = host->data; + if (host->dma_chan) { dma_unmap_sg(host->dma_chan->device->dev, data->sg, data->sg_len, @@ -1180,9 +1184,6 @@ static void bcm2835_request(struct mmc_host *mmc, struct mmc_request *mrq) return; } - if (host->use_dma && mrq->data && (mrq->data->blocks > PIO_THRESHOLD)) - bcm2835_prepare_dma(host, mrq->data); - mutex_lock(&host->mutex); WARN_ON(host->mrq); @@ -1206,6 +1207,9 @@ static void bcm2835_request(struct mmc_host *mmc, struct mmc_request *mrq) return; } + if (host->use_dma && mrq->data && (mrq->data->blocks > PIO_THRESHOLD)) + bcm2835_prepare_dma(host, mrq->data); + host->use_sbc = !!mrq->sbc && host->mrq->data && (host->mrq->data->flags & MMC_DATA_READ); if (host->use_sbc) { @@ -1445,6 +1449,9 @@ static int bcm2835_remove(struct platform_device *pdev) cancel_work_sync(&host->dma_work); cancel_delayed_work_sync(&host->timeout_work); + if (host->dma_chan_rxtx) + dma_release_channel(host->dma_chan_rxtx); + mmc_free_host(host->mmc); platform_set_drvdata(pdev, NULL); diff --git a/drivers/mmc/host/dw_mmc-bluefield.c b/drivers/mmc/host/dw_mmc-bluefield.c index 54c3fbb4a391..ed8f2254b66a 100644 --- a/drivers/mmc/host/dw_mmc-bluefield.c +++ b/drivers/mmc/host/dw_mmc-bluefield.c @@ -52,16 +52,7 @@ MODULE_DEVICE_TABLE(of, dw_mci_bluefield_match); static int dw_mci_bluefield_probe(struct platform_device *pdev) { - const struct dw_mci_drv_data *drv_data = NULL; - const struct of_device_id *match; - - if (pdev->dev.of_node) { - match = of_match_node(dw_mci_bluefield_match, - pdev->dev.of_node); - drv_data = match->data; - } - - return dw_mci_pltfm_register(pdev, drv_data); + return dw_mci_pltfm_register(pdev, &bluefield_drv_data); } static struct platform_driver dw_mci_bluefield_pltfm_driver = { diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c index 0c1efd5100b7..33215d66afa2 100644 --- a/drivers/mmc/host/jz4740_mmc.c +++ b/drivers/mmc/host/jz4740_mmc.c @@ -21,7 +21,7 @@ #include <linux/dmaengine.h> #include <linux/dma-mapping.h> #include <linux/err.h> -#include <linux/gpio.h> +#include <linux/gpio/consumer.h> #include <linux/interrupt.h> #include <linux/io.h> #include <linux/irq.h> @@ -126,9 +126,23 @@ enum jz4740_mmc_state { JZ4740_MMC_STATE_DONE, }; -struct jz4740_mmc_host_next { - int sg_len; - s32 cookie; +/* + * The MMC core allows to prepare a mmc_request while another mmc_request + * is in-flight. This is used via the pre_req/post_req hooks. + * This driver uses the pre_req/post_req hooks to map/unmap the mmc_request. + * Following what other drivers do (sdhci, dw_mmc) we use the following cookie + * flags to keep track of the mmc_request mapping state. + * + * COOKIE_UNMAPPED: the request is not mapped. + * COOKIE_PREMAPPED: the request was mapped in pre_req, + * and should be unmapped in post_req. + * COOKIE_MAPPED: the request was mapped in the irq handler, + * and should be unmapped before mmc_request_done is called.. + */ +enum jz4780_cookie { + COOKIE_UNMAPPED = 0, + COOKIE_PREMAPPED, + COOKIE_MAPPED, }; struct jz4740_mmc_host { @@ -136,6 +150,7 @@ struct jz4740_mmc_host { struct platform_device *pdev; struct jz4740_mmc_platform_data *pdata; struct clk *clk; + struct gpio_desc *power; enum jz4740_mmc_version version; @@ -162,9 +177,7 @@ struct jz4740_mmc_host { /* DMA support */ struct dma_chan *dma_rx; struct dma_chan *dma_tx; - struct jz4740_mmc_host_next next_data; bool use_dma; - int sg_len; /* The DMA trigger level is 8 words, that is to say, the DMA read * trigger is when data words in MSC_RXFIFO is >= 8 and the DMA write @@ -226,9 +239,6 @@ static int jz4740_mmc_acquire_dma_channels(struct jz4740_mmc_host *host) return PTR_ERR(host->dma_rx); } - /* Initialize DMA pre request cookie */ - host->next_data.cookie = 1; - return 0; } @@ -245,60 +255,44 @@ static void jz4740_mmc_dma_unmap(struct jz4740_mmc_host *host, enum dma_data_direction dir = mmc_get_dma_dir(data); dma_unmap_sg(chan->device->dev, data->sg, data->sg_len, dir); + data->host_cookie = COOKIE_UNMAPPED; } -/* Prepares DMA data for current/next transfer, returns non-zero on failure */ +/* Prepares DMA data for current or next transfer. + * A request can be in-flight when this is called. + */ static int jz4740_mmc_prepare_dma_data(struct jz4740_mmc_host *host, struct mmc_data *data, - struct jz4740_mmc_host_next *next, - struct dma_chan *chan) + int cookie) { - struct jz4740_mmc_host_next *next_data = &host->next_data; + struct dma_chan *chan = jz4740_mmc_get_dma_chan(host, data); enum dma_data_direction dir = mmc_get_dma_dir(data); - int sg_len; - - if (!next && data->host_cookie && - data->host_cookie != host->next_data.cookie) { - dev_warn(mmc_dev(host->mmc), - "[%s] invalid cookie: data->host_cookie %d host->next_data.cookie %d\n", - __func__, - data->host_cookie, - host->next_data.cookie); - data->host_cookie = 0; - } + int sg_count; - /* Check if next job is already prepared */ - if (next || data->host_cookie != host->next_data.cookie) { - sg_len = dma_map_sg(chan->device->dev, - data->sg, - data->sg_len, - dir); + if (data->host_cookie == COOKIE_PREMAPPED) + return data->sg_count; - } else { - sg_len = next_data->sg_len; - next_data->sg_len = 0; - } + sg_count = dma_map_sg(chan->device->dev, + data->sg, + data->sg_len, + dir); - if (sg_len <= 0) { + if (sg_count <= 0) { dev_err(mmc_dev(host->mmc), "Failed to map scatterlist for DMA operation\n"); return -EINVAL; } - if (next) { - next->sg_len = sg_len; - data->host_cookie = ++next->cookie < 0 ? 1 : next->cookie; - } else - host->sg_len = sg_len; + data->sg_count = sg_count; + data->host_cookie = cookie; - return 0; + return data->sg_count; } static int jz4740_mmc_start_dma_transfer(struct jz4740_mmc_host *host, struct mmc_data *data) { - int ret; - struct dma_chan *chan; + struct dma_chan *chan = jz4740_mmc_get_dma_chan(host, data); struct dma_async_tx_descriptor *desc; struct dma_slave_config conf = { .src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES, @@ -306,29 +300,26 @@ static int jz4740_mmc_start_dma_transfer(struct jz4740_mmc_host *host, .src_maxburst = JZ4740_MMC_FIFO_HALF_SIZE, .dst_maxburst = JZ4740_MMC_FIFO_HALF_SIZE, }; + int sg_count; if (data->flags & MMC_DATA_WRITE) { conf.direction = DMA_MEM_TO_DEV; conf.dst_addr = host->mem_res->start + JZ_REG_MMC_TXFIFO; conf.slave_id = JZ4740_DMA_TYPE_MMC_TRANSMIT; - chan = host->dma_tx; } else { conf.direction = DMA_DEV_TO_MEM; conf.src_addr = host->mem_res->start + JZ_REG_MMC_RXFIFO; conf.slave_id = JZ4740_DMA_TYPE_MMC_RECEIVE; - chan = host->dma_rx; } - ret = jz4740_mmc_prepare_dma_data(host, data, NULL, chan); - if (ret) - return ret; + sg_count = jz4740_mmc_prepare_dma_data(host, data, COOKIE_MAPPED); + if (sg_count < 0) + return sg_count; dmaengine_slave_config(chan, &conf); - desc = dmaengine_prep_slave_sg(chan, - data->sg, - host->sg_len, - conf.direction, - DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + desc = dmaengine_prep_slave_sg(chan, data->sg, sg_count, + conf.direction, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); if (!desc) { dev_err(mmc_dev(host->mmc), "Failed to allocate DMA %s descriptor", @@ -342,7 +333,8 @@ static int jz4740_mmc_start_dma_transfer(struct jz4740_mmc_host *host, return 0; dma_unmap: - jz4740_mmc_dma_unmap(host, data); + if (data->host_cookie == COOKIE_MAPPED) + jz4740_mmc_dma_unmap(host, data); return -ENOMEM; } @@ -351,16 +343,13 @@ static void jz4740_mmc_pre_request(struct mmc_host *mmc, { struct jz4740_mmc_host *host = mmc_priv(mmc); struct mmc_data *data = mrq->data; - struct jz4740_mmc_host_next *next_data = &host->next_data; - BUG_ON(data->host_cookie); - - if (host->use_dma) { - struct dma_chan *chan = jz4740_mmc_get_dma_chan(host, data); + if (!host->use_dma) + return; - if (jz4740_mmc_prepare_dma_data(host, data, next_data, chan)) - data->host_cookie = 0; - } + data->host_cookie = COOKIE_UNMAPPED; + if (jz4740_mmc_prepare_dma_data(host, data, COOKIE_PREMAPPED) < 0) + data->host_cookie = COOKIE_UNMAPPED; } static void jz4740_mmc_post_request(struct mmc_host *mmc, @@ -370,10 +359,8 @@ static void jz4740_mmc_post_request(struct mmc_host *mmc, struct jz4740_mmc_host *host = mmc_priv(mmc); struct mmc_data *data = mrq->data; - if (host->use_dma && data->host_cookie) { + if (data && data->host_cookie != COOKIE_UNMAPPED) jz4740_mmc_dma_unmap(host, data); - data->host_cookie = 0; - } if (err) { struct dma_chan *chan = jz4740_mmc_get_dma_chan(host, data); @@ -436,10 +423,14 @@ static void jz4740_mmc_reset(struct jz4740_mmc_host *host) static void jz4740_mmc_request_done(struct jz4740_mmc_host *host) { struct mmc_request *req; + struct mmc_data *data; req = host->req; + data = req->data; host->req = NULL; + if (data && data->host_cookie == COOKIE_MAPPED) + jz4740_mmc_dma_unmap(host, data); mmc_request_done(host->mmc, req); } @@ -903,18 +894,16 @@ static void jz4740_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) switch (ios->power_mode) { case MMC_POWER_UP: jz4740_mmc_reset(host); - if (host->pdata && gpio_is_valid(host->pdata->gpio_power)) - gpio_set_value(host->pdata->gpio_power, - !host->pdata->power_active_low); + if (host->power) + gpiod_set_value(host->power, 1); host->cmdat |= JZ_MMC_CMDAT_INIT; clk_prepare_enable(host->clk); break; case MMC_POWER_ON: break; default: - if (host->pdata && gpio_is_valid(host->pdata->gpio_power)) - gpio_set_value(host->pdata->gpio_power, - host->pdata->power_active_low); + if (host->power) + gpiod_set_value(host->power, 0); clk_disable_unprepare(host->clk); break; } @@ -947,30 +936,9 @@ static const struct mmc_host_ops jz4740_mmc_ops = { .enable_sdio_irq = jz4740_mmc_enable_sdio_irq, }; -static int jz4740_mmc_request_gpio(struct device *dev, int gpio, - const char *name, bool output, int value) -{ - int ret; - - if (!gpio_is_valid(gpio)) - return 0; - - ret = gpio_request(gpio, name); - if (ret) { - dev_err(dev, "Failed to request %s gpio: %d\n", name, ret); - return ret; - } - - if (output) - gpio_direction_output(gpio, value); - else - gpio_direction_input(gpio); - - return 0; -} - -static int jz4740_mmc_request_gpios(struct mmc_host *mmc, - struct platform_device *pdev) +static int jz4740_mmc_request_gpios(struct jz4740_mmc_host *host, + struct mmc_host *mmc, + struct platform_device *pdev) { struct jz4740_mmc_platform_data *pdata = dev_get_platdata(&pdev->dev); int ret = 0; @@ -983,31 +951,21 @@ static int jz4740_mmc_request_gpios(struct mmc_host *mmc, if (!pdata->read_only_active_low) mmc->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH; - if (gpio_is_valid(pdata->gpio_card_detect)) { - ret = mmc_gpio_request_cd(mmc, pdata->gpio_card_detect, 0); - if (ret) - return ret; - } - - if (gpio_is_valid(pdata->gpio_read_only)) { - ret = mmc_gpio_request_ro(mmc, pdata->gpio_read_only); - if (ret) - return ret; - } - - return jz4740_mmc_request_gpio(&pdev->dev, pdata->gpio_power, - "MMC read only", true, pdata->power_active_low); -} - -static void jz4740_mmc_free_gpios(struct platform_device *pdev) -{ - struct jz4740_mmc_platform_data *pdata = dev_get_platdata(&pdev->dev); + /* + * Get optional card detect and write protect GPIOs, + * only back out on probe deferral. + */ + ret = mmc_gpiod_request_cd(mmc, "cd", 0, false, 0, NULL); + if (ret == -EPROBE_DEFER) + return ret; - if (!pdata) - return; + ret = mmc_gpiod_request_ro(mmc, "wp", 0, false, 0, NULL); + if (ret == -EPROBE_DEFER) + return ret; - if (gpio_is_valid(pdata->gpio_power)) - gpio_free(pdata->gpio_power); + host->power = devm_gpiod_get_optional(&pdev->dev, "power", + GPIOD_OUT_HIGH); + return PTR_ERR_OR_ZERO(host->power); } static const struct of_device_id jz4740_mmc_of_match[] = { @@ -1053,7 +1011,7 @@ static int jz4740_mmc_probe(struct platform_device* pdev) mmc->caps |= MMC_CAP_SDIO_IRQ; if (!(pdata && pdata->data_1bit)) mmc->caps |= MMC_CAP_4_BIT_DATA; - ret = jz4740_mmc_request_gpios(mmc, pdev); + ret = jz4740_mmc_request_gpios(host, mmc, pdev); if (ret) goto err_free_host; } @@ -1104,7 +1062,7 @@ static int jz4740_mmc_probe(struct platform_device* pdev) dev_name(&pdev->dev), host); if (ret) { dev_err(&pdev->dev, "Failed to request irq: %d\n", ret); - goto err_free_gpios; + goto err_free_host; } jz4740_mmc_clock_disable(host); @@ -1135,8 +1093,6 @@ err_release_dma: jz4740_mmc_release_dma_channels(host); err_free_irq: free_irq(host->irq, host); -err_free_gpios: - jz4740_mmc_free_gpios(pdev); err_free_host: mmc_free_host(mmc); @@ -1155,8 +1111,6 @@ static int jz4740_mmc_remove(struct platform_device *pdev) free_irq(host->irq, host); - jz4740_mmc_free_gpios(pdev); - if (host->use_dma) jz4740_mmc_release_dma_channels(host); diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index c201c378537e..c2690c1a50ff 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -21,11 +21,11 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> +#include <linux/delay.h> #include <linux/device.h> #include <linux/of_device.h> #include <linux/platform_device.h> #include <linux/ioport.h> -#include <linux/spinlock.h> #include <linux/dma-mapping.h> #include <linux/mmc/host.h> #include <linux/mmc/mmc.h> @@ -66,6 +66,9 @@ #define SD_EMMC_DELAY 0x4 #define SD_EMMC_ADJUST 0x8 +#define ADJUST_ADJ_DELAY_MASK GENMASK(21, 16) +#define ADJUST_DS_EN BIT(15) +#define ADJUST_ADJ_EN BIT(13) #define SD_EMMC_DELAY1 0x4 #define SD_EMMC_DELAY2 0x8 @@ -90,9 +93,11 @@ #define CFG_CLK_ALWAYS_ON BIT(18) #define CFG_CHK_DS BIT(20) #define CFG_AUTO_CLK BIT(23) +#define CFG_ERR_ABORT BIT(27) #define SD_EMMC_STATUS 0x48 #define STATUS_BUSY BIT(31) +#define STATUS_DESC_BUSY BIT(30) #define STATUS_DATI GENMASK(23, 16) #define SD_EMMC_IRQ_EN 0x4c @@ -141,6 +146,7 @@ struct meson_mmc_data { unsigned int tx_delay_mask; unsigned int rx_delay_mask; unsigned int always_on; + unsigned int adjust; }; struct sd_emmc_desc { @@ -156,7 +162,6 @@ struct meson_host { struct mmc_host *mmc; struct mmc_command *cmd; - spinlock_t lock; void __iomem *regs; struct clk *core_clk; struct clk *mmc_clk; @@ -633,14 +638,8 @@ static int meson_mmc_clk_init(struct meson_host *host) if (ret) return ret; - /* - * Set phases : These values are mostly the datasheet recommended ones - * except for the Tx phase. Datasheet recommends 180 but some cards - * fail at initialisation with it. 270 works just fine, it fixes these - * initialisation issues and enable eMMC DDR52 mode. - */ clk_set_phase(host->mmc_clk, 180); - clk_set_phase(host->tx_clk, 270); + clk_set_phase(host->tx_clk, 0); clk_set_phase(host->rx_clk, 0); return clk_prepare_enable(host->mmc_clk); @@ -928,6 +927,7 @@ static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd) cmd_cfg |= FIELD_PREP(CMD_CFG_CMD_INDEX_MASK, cmd->opcode); cmd_cfg |= CMD_CFG_OWNER; /* owned by CPU */ + cmd_cfg |= CMD_CFG_ERROR; /* stop in case of error */ meson_mmc_set_response_bits(cmd, &cmd_cfg); @@ -1022,29 +1022,34 @@ static irqreturn_t meson_mmc_irq(int irq, void *dev_id) u32 irq_en, status, raw_status; irqreturn_t ret = IRQ_NONE; - if (WARN_ON(!host) || WARN_ON(!host->cmd)) + irq_en = readl(host->regs + SD_EMMC_IRQ_EN); + raw_status = readl(host->regs + SD_EMMC_STATUS); + status = raw_status & irq_en; + + if (!status) { + dev_dbg(host->dev, + "Unexpected IRQ! irq_en 0x%08x - status 0x%08x\n", + irq_en, raw_status); return IRQ_NONE; + } - spin_lock(&host->lock); + if (WARN_ON(!host) || WARN_ON(!host->cmd)) + return IRQ_NONE; cmd = host->cmd; data = cmd->data; - irq_en = readl(host->regs + SD_EMMC_IRQ_EN); - raw_status = readl(host->regs + SD_EMMC_STATUS); - status = raw_status & irq_en; - cmd->error = 0; if (status & IRQ_CRC_ERR) { dev_dbg(host->dev, "CRC Error - status 0x%08x\n", status); cmd->error = -EILSEQ; - ret = IRQ_HANDLED; + ret = IRQ_WAKE_THREAD; goto out; } if (status & IRQ_TIMEOUTS) { dev_dbg(host->dev, "Timeout - status 0x%08x\n", status); cmd->error = -ETIMEDOUT; - ret = IRQ_HANDLED; + ret = IRQ_WAKE_THREAD; goto out; } @@ -1069,17 +1074,48 @@ out: /* ack all enabled interrupts */ writel(irq_en, host->regs + SD_EMMC_STATUS); + if (cmd->error) { + /* Stop desc in case of errors */ + u32 start = readl(host->regs + SD_EMMC_START); + + start &= ~START_DESC_BUSY; + writel(start, host->regs + SD_EMMC_START); + } + if (ret == IRQ_HANDLED) meson_mmc_request_done(host->mmc, cmd->mrq); - else if (ret == IRQ_NONE) - dev_warn(host->dev, - "Unexpected IRQ! status=0x%08x, irq_en=0x%08x\n", - raw_status, irq_en); - spin_unlock(&host->lock); return ret; } +static int meson_mmc_wait_desc_stop(struct meson_host *host) +{ + int loop; + u32 status; + + /* + * It may sometimes take a while for it to actually halt. Here, we + * are giving it 5ms to comply + * + * If we don't confirm the descriptor is stopped, it might raise new + * IRQs after we have called mmc_request_done() which is bad. + */ + for (loop = 50; loop; loop--) { + status = readl(host->regs + SD_EMMC_STATUS); + if (status & (STATUS_BUSY | STATUS_DESC_BUSY)) + udelay(100); + else + break; + } + + if (status & (STATUS_BUSY | STATUS_DESC_BUSY)) { + dev_err(host->dev, "Timed out waiting for host to stop\n"); + return -ETIMEDOUT; + } + + return 0; +} + static irqreturn_t meson_mmc_irq_thread(int irq, void *dev_id) { struct meson_host *host = dev_id; @@ -1090,6 +1126,13 @@ static irqreturn_t meson_mmc_irq_thread(int irq, void *dev_id) if (WARN_ON(!cmd)) return IRQ_NONE; + if (cmd->error) { + meson_mmc_wait_desc_stop(host); + meson_mmc_request_done(host->mmc, cmd->mrq); + + return IRQ_HANDLED; + } + data = cmd->data; if (meson_mmc_bounce_buf_read(data)) { xfer_bytes = data->blksz * data->blocks; @@ -1123,14 +1166,21 @@ static int meson_mmc_get_cd(struct mmc_host *mmc) static void meson_mmc_cfg_init(struct meson_host *host) { - u32 cfg = 0; + u32 cfg = 0, adj = 0; cfg |= FIELD_PREP(CFG_RESP_TIMEOUT_MASK, ilog2(SD_EMMC_CFG_RESP_TIMEOUT)); cfg |= FIELD_PREP(CFG_RC_CC_MASK, ilog2(SD_EMMC_CFG_CMD_GAP)); cfg |= FIELD_PREP(CFG_BLK_LEN_MASK, ilog2(SD_EMMC_CFG_BLK_SIZE)); + /* abort chain on R/W errors */ + cfg |= CFG_ERR_ABORT; + writel(cfg, host->regs + SD_EMMC_CFG); + + /* enable signal resampling w/o delay */ + adj = ADJUST_ADJ_EN; + writel(adj, host->regs + host->data->adjust); } static int meson_mmc_card_busy(struct mmc_host *mmc) @@ -1191,8 +1241,6 @@ static int meson_mmc_probe(struct platform_device *pdev) host->dev = &pdev->dev; dev_set_drvdata(&pdev->dev, host); - spin_lock_init(&host->lock); - /* Get regulators and the supported OCR mask */ host->vqmmc_enabled = false; ret = mmc_regulator_get_supply(mmc); @@ -1356,12 +1404,14 @@ static const struct meson_mmc_data meson_gx_data = { .tx_delay_mask = CLK_V2_TX_DELAY_MASK, .rx_delay_mask = CLK_V2_RX_DELAY_MASK, .always_on = CLK_V2_ALWAYS_ON, + .adjust = SD_EMMC_ADJUST, }; static const struct meson_mmc_data meson_axg_data = { .tx_delay_mask = CLK_V3_TX_DELAY_MASK, .rx_delay_mask = CLK_V3_RX_DELAY_MASK, .always_on = CLK_V3_ALWAYS_ON, + .adjust = SD_EMMC_V3_ADJUST, }; static const struct of_device_id meson_mmc_of_match[] = { diff --git a/drivers/mmc/host/meson-mx-sdio.c b/drivers/mmc/host/meson-mx-sdio.c index abe253c262a2..ec980bda071c 100644 --- a/drivers/mmc/host/meson-mx-sdio.c +++ b/drivers/mmc/host/meson-mx-sdio.c @@ -596,6 +596,9 @@ static int meson_mx_mmc_register_clks(struct meson_mx_mmc_host *host) init.name = devm_kasprintf(host->controller_dev, GFP_KERNEL, "%s#fixed_factor", dev_name(host->controller_dev)); + if (!init.name) + return -ENOMEM; + init.ops = &clk_fixed_factor_ops; init.flags = 0; init.parent_names = &clk_fixed_factor_parent; @@ -612,6 +615,9 @@ static int meson_mx_mmc_register_clks(struct meson_mx_mmc_host *host) clk_div_parent = __clk_get_name(host->fixed_factor_clk); init.name = devm_kasprintf(host->controller_dev, GFP_KERNEL, "%s#div", dev_name(host->controller_dev)); + if (!init.name) + return -ENOMEM; + init.ops = &clk_divider_ops; init.flags = CLK_SET_RATE_PARENT; init.parent_names = &clk_div_parent; diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index 476e53d30128..10ba46b728e8 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -1434,13 +1434,16 @@ static int mmc_spi_probe(struct spi_device *spi) if (status != 0) goto fail_add_host; - if (host->pdata && host->pdata->flags & MMC_SPI_USE_CD_GPIO) { - status = mmc_gpio_request_cd(mmc, host->pdata->cd_gpio, - host->pdata->cd_debounce); - if (status != 0) - goto fail_add_host; - - /* The platform has a CD GPIO signal that may support + /* + * Index 0 is card detect + * Old boardfiles were specifying 1 ms as debounce + */ + status = mmc_gpiod_request_cd(mmc, NULL, 0, false, 1, NULL); + if (status == -EPROBE_DEFER) + goto fail_add_host; + if (!status) { + /* + * The platform has a CD GPIO signal that may support * interrupts, so let mmc_gpiod_request_cd_irq() decide * if polling is needed or not. */ @@ -1448,12 +1451,12 @@ static int mmc_spi_probe(struct spi_device *spi) mmc_gpiod_request_cd_irq(mmc); } - if (host->pdata && host->pdata->flags & MMC_SPI_USE_RO_GPIO) { + /* Index 1 is write protect/read only */ + status = mmc_gpiod_request_ro(mmc, NULL, 1, false, 0, NULL); + if (status == -EPROBE_DEFER) + goto fail_add_host; + if (!status) has_ro = true; - status = mmc_gpio_request_ro(mmc, host->pdata->ro_gpio); - if (status != 0) - goto fail_add_host; - } dev_info(&spi->dev, "SD/MMC host %s%s%s%s%s\n", dev_name(&mmc->class_dev), diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index 82bab35fff41..e352f5ad5801 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -21,6 +21,7 @@ #include <linux/err.h> #include <linux/highmem.h> #include <linux/log2.h> +#include <linux/mmc/mmc.h> #include <linux/mmc/pm.h> #include <linux/mmc/host.h> #include <linux/mmc/card.h> @@ -274,6 +275,7 @@ static struct variant_data variant_stm32_sdmmc = { .cmdreg_lrsp_crc = MCI_CPSM_STM32_LRSP_CRC, .cmdreg_srsp_crc = MCI_CPSM_STM32_SRSP_CRC, .cmdreg_srsp = MCI_CPSM_STM32_SRSP, + .cmdreg_stop = MCI_CPSM_STM32_CMDSTOP, .data_cmd_enable = MCI_CPSM_STM32_CMDTRANS, .irq_pio_mask = MCI_IRQ_PIO_STM32_MASK, .datactrl_first = true, @@ -1100,6 +1102,10 @@ mmci_start_command(struct mmci_host *host, struct mmc_command *cmd, u32 c) mmci_reg_delay(host); } + if (host->variant->cmdreg_stop && + cmd->opcode == MMC_STOP_TRANSMISSION) + c |= host->variant->cmdreg_stop; + c |= cmd->opcode | host->variant->cmdreg_cpsm_enable; if (cmd->flags & MMC_RSP_PRESENT) { if (cmd->flags & MMC_RSP_136) @@ -1190,11 +1196,10 @@ mmci_data_irq(struct mmci_host *host, struct mmc_data *data, /* The error clause is handled above, success! */ data->bytes_xfered = data->blksz * data->blocks; - if (!data->stop || host->mrq->sbc) { + if (!data->stop || (host->mrq->sbc && !data->error)) mmci_request_end(host, data->mrq); - } else { + else mmci_start_command(host, data->stop, 0); - } } } diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h index 550dd3914461..24229097d05c 100644 --- a/drivers/mmc/host/mmci.h +++ b/drivers/mmc/host/mmci.h @@ -264,6 +264,7 @@ struct mmci_host; * @cmdreg_lrsp_crc: enable value for long response with crc * @cmdreg_srsp_crc: enable value for short response with crc * @cmdreg_srsp: enable value for short response without crc + * @cmdreg_stop: enable value for stop and abort transmission * @datalength_bits: number of bits in the MMCIDATALENGTH register * @fifosize: number of bytes that can be written when MMCI_TXFIFOEMPTY * is asserted (likewise for RX) @@ -316,6 +317,7 @@ struct variant_data { unsigned int cmdreg_lrsp_crc; unsigned int cmdreg_srsp_crc; unsigned int cmdreg_srsp; + unsigned int cmdreg_stop; unsigned int datalength_bits; unsigned int fifosize; unsigned int fifohalfsize; diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index 6334cc752d8b..8afeaf81ae66 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -1114,6 +1114,7 @@ static void msdc_start_command(struct msdc_host *host, struct mmc_request *mrq, struct mmc_command *cmd) { u32 rawcmd; + unsigned long flags; WARN_ON(host->cmd); host->cmd = cmd; @@ -1131,7 +1132,10 @@ static void msdc_start_command(struct msdc_host *host, cmd->error = 0; rawcmd = msdc_cmd_prepare_raw_cmd(host, mrq, cmd); + spin_lock_irqsave(&host->lock, flags); sdr_set_bits(host->base + MSDC_INTEN, cmd_ints_mask); + spin_unlock_irqrestore(&host->lock, flags); + writel(cmd->arg, host->base + SDC_ARG); writel(rawcmd, host->base + SDC_CMD); } @@ -1351,6 +1355,31 @@ static void msdc_request_timeout(struct work_struct *work) } } +static void __msdc_enable_sdio_irq(struct mmc_host *mmc, int enb) +{ + unsigned long flags; + struct msdc_host *host = mmc_priv(mmc); + + spin_lock_irqsave(&host->lock, flags); + if (enb) + sdr_set_bits(host->base + MSDC_INTEN, MSDC_INTEN_SDIOIRQ); + else + sdr_clr_bits(host->base + MSDC_INTEN, MSDC_INTEN_SDIOIRQ); + spin_unlock_irqrestore(&host->lock, flags); +} + +static void msdc_enable_sdio_irq(struct mmc_host *mmc, int enb) +{ + struct msdc_host *host = mmc_priv(mmc); + + __msdc_enable_sdio_irq(mmc, enb); + + if (enb) + pm_runtime_get_noresume(host->dev); + else + pm_runtime_put_noidle(host->dev); +} + static irqreturn_t msdc_irq(int irq, void *dev_id) { struct msdc_host *host = (struct msdc_host *) dev_id; @@ -1373,7 +1402,12 @@ static irqreturn_t msdc_irq(int irq, void *dev_id) data = host->data; spin_unlock_irqrestore(&host->lock, flags); - if (!(events & event_mask)) + if ((events & event_mask) & MSDC_INT_SDIOIRQ) { + __msdc_enable_sdio_irq(host->mmc, 0); + sdio_signal_irq(host->mmc); + } + + if (!(events & (event_mask & ~MSDC_INT_SDIOIRQ))) break; if (!mrq) { @@ -1493,8 +1527,11 @@ static void msdc_init_hw(struct msdc_host *host) */ sdr_set_bits(host->base + SDC_CFG, SDC_CFG_SDIO); - /* disable detect SDIO device interrupt function */ - sdr_clr_bits(host->base + SDC_CFG, SDC_CFG_SDIOIDE); + /* Config SDIO device detect interrupt function */ + if (host->mmc->caps & MMC_CAP_SDIO_IRQ) + sdr_set_bits(host->base + SDC_CFG, SDC_CFG_SDIOIDE); + else + sdr_clr_bits(host->base + SDC_CFG, SDC_CFG_SDIOIDE); /* Configure to default data timeout */ sdr_set_field(host->base + SDC_CFG, SDC_CFG_DTOC, 3); @@ -2013,6 +2050,11 @@ static void msdc_hw_reset(struct mmc_host *mmc) sdr_clr_bits(host->base + EMMC_IOCON, 1); } +static void msdc_ack_sdio_irq(struct mmc_host *mmc) +{ + __msdc_enable_sdio_irq(mmc, 1); +} + static const struct mmc_host_ops mt_msdc_ops = { .post_req = msdc_post_req, .pre_req = msdc_pre_req, @@ -2020,6 +2062,8 @@ static const struct mmc_host_ops mt_msdc_ops = { .set_ios = msdc_ops_set_ios, .get_ro = mmc_gpio_get_ro, .get_cd = mmc_gpio_get_cd, + .enable_sdio_irq = msdc_enable_sdio_irq, + .ack_sdio_irq = msdc_ack_sdio_irq, .start_signal_voltage_switch = msdc_ops_switch_volt, .card_busy = msdc_card_busy, .execute_tuning = msdc_execute_tuning, @@ -2147,6 +2191,9 @@ static int msdc_drv_probe(struct platform_device *pdev) else mmc->f_min = DIV_ROUND_UP(host->src_clk_freq, 4 * 4095); + if (mmc->caps & MMC_CAP_SDIO_IRQ) + mmc->caps2 |= MMC_CAP2_SDIO_IRQ_NOTHREAD; + mmc->caps |= MMC_CAP_ERASE | MMC_CAP_CMD23; /* MMC core transfer sizes tunable parameters */ mmc->max_segs = MAX_BD_NUM; diff --git a/drivers/mmc/host/of_mmc_spi.c b/drivers/mmc/host/of_mmc_spi.c index c9eed8436b6b..b294b221f225 100644 --- a/drivers/mmc/host/of_mmc_spi.c +++ b/drivers/mmc/host/of_mmc_spi.c @@ -16,9 +16,7 @@ #include <linux/device.h> #include <linux/slab.h> #include <linux/irq.h> -#include <linux/gpio.h> #include <linux/of.h> -#include <linux/of_gpio.h> #include <linux/of_irq.h> #include <linux/spi/spi.h> #include <linux/spi/mmc_spi.h> @@ -32,15 +30,7 @@ MODULE_LICENSE("GPL"); -enum { - CD_GPIO = 0, - WP_GPIO, - NUM_GPIOS, -}; - struct of_mmc_spi { - int gpios[NUM_GPIOS]; - bool alow_gpios[NUM_GPIOS]; int detect_irq; struct mmc_spi_platform_data pdata; }; @@ -102,30 +92,6 @@ struct mmc_spi_platform_data *mmc_spi_get_pdata(struct spi_device *spi) oms->pdata.ocr_mask |= mask; } - for (i = 0; i < ARRAY_SIZE(oms->gpios); i++) { - enum of_gpio_flags gpio_flags; - - oms->gpios[i] = of_get_gpio_flags(np, i, &gpio_flags); - if (!gpio_is_valid(oms->gpios[i])) - continue; - - if (gpio_flags & OF_GPIO_ACTIVE_LOW) - oms->alow_gpios[i] = true; - } - - if (gpio_is_valid(oms->gpios[CD_GPIO])) { - oms->pdata.cd_gpio = oms->gpios[CD_GPIO]; - oms->pdata.flags |= MMC_SPI_USE_CD_GPIO; - if (!oms->alow_gpios[CD_GPIO]) - oms->pdata.caps2 |= MMC_CAP2_CD_ACTIVE_HIGH; - } - if (gpio_is_valid(oms->gpios[WP_GPIO])) { - oms->pdata.ro_gpio = oms->gpios[WP_GPIO]; - oms->pdata.flags |= MMC_SPI_USE_RO_GPIO; - if (!oms->alow_gpios[WP_GPIO]) - oms->pdata.caps2 |= MMC_CAP2_RO_ACTIVE_HIGH; - } - oms->detect_irq = irq_of_parse_and_map(np, 0); if (oms->detect_irq != 0) { oms->pdata.init = of_mmc_spi_init; diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 3f4ea8f624be..29a1ddaa7466 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -1652,7 +1652,7 @@ static struct mmc_host_ops omap_hsmmc_ops = { #ifdef CONFIG_DEBUG_FS -static int omap_hsmmc_regs_show(struct seq_file *s, void *data) +static int mmc_regs_show(struct seq_file *s, void *data) { struct mmc_host *mmc = s->private; struct omap_hsmmc_host *host = mmc_priv(mmc); @@ -1691,17 +1691,7 @@ static int omap_hsmmc_regs_show(struct seq_file *s, void *data) return 0; } -static int omap_hsmmc_regs_open(struct inode *inode, struct file *file) -{ - return single_open(file, omap_hsmmc_regs_show, inode->i_private); -} - -static const struct file_operations mmc_regs_fops = { - .open = omap_hsmmc_regs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(mmc_regs); static void omap_hsmmc_debugfs(struct mmc_host *mmc) { diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c index f7ffbf1676b1..8779bbaa6b69 100644 --- a/drivers/mmc/host/pxamci.c +++ b/drivers/mmc/host/pxamci.c @@ -30,10 +30,9 @@ #include <linux/mmc/slot-gpio.h> #include <linux/io.h> #include <linux/regulator/consumer.h> -#include <linux/gpio.h> +#include <linux/gpio/consumer.h> #include <linux/gfp.h> #include <linux/of.h> -#include <linux/of_gpio.h> #include <linux/of_device.h> #include <asm/sizes.h> @@ -63,6 +62,8 @@ struct pxamci_host { unsigned int imask; unsigned int power_mode; unsigned long detect_delay_ms; + bool use_ro_gpio; + struct gpio_desc *power; struct pxamci_platform_data *pdata; struct mmc_request *mrq; @@ -101,16 +102,13 @@ static inline int pxamci_set_power(struct pxamci_host *host, { struct mmc_host *mmc = host->mmc; struct regulator *supply = mmc->supply.vmmc; - int on; if (!IS_ERR(supply)) return mmc_regulator_set_ocr(mmc, supply, vdd); - if (host->pdata && - gpio_is_valid(host->pdata->gpio_power)) { - on = ((1 << vdd) & host->pdata->ocr_mask); - gpio_set_value(host->pdata->gpio_power, - !!on ^ host->pdata->gpio_power_invert); + if (host->power) { + bool on = !!((1 << vdd) & host->pdata->ocr_mask); + gpiod_set_value(host->power, on); } if (host->pdata && host->pdata->setpower) @@ -432,7 +430,7 @@ static int pxamci_get_ro(struct mmc_host *mmc) { struct pxamci_host *host = mmc_priv(mmc); - if (host->pdata && gpio_is_valid(host->pdata->gpio_card_ro)) + if (host->use_ro_gpio) return mmc_gpio_get_ro(mmc); if (host->pdata && host->pdata->get_ro) return !!host->pdata->get_ro(mmc_dev(mmc)); @@ -730,52 +728,38 @@ static int pxamci_probe(struct platform_device *pdev) } if (host->pdata) { - int gpio_cd = host->pdata->gpio_card_detect; - int gpio_ro = host->pdata->gpio_card_ro; - int gpio_power = host->pdata->gpio_power; - host->detect_delay_ms = host->pdata->detect_delay_ms; - if (gpio_is_valid(gpio_power)) { - ret = devm_gpio_request(dev, gpio_power, - "mmc card power"); - if (ret) { - dev_err(dev, - "Failed requesting gpio_power %d\n", - gpio_power); - goto out; - } - gpio_direction_output(gpio_power, - host->pdata->gpio_power_invert); + host->power = devm_gpiod_get_optional(dev, "power", GPIOD_OUT_LOW); + if (IS_ERR(host->power)) { + dev_err(dev, "Failed requesting gpio_power\n"); + goto out; } - if (gpio_is_valid(gpio_ro)) { - ret = mmc_gpio_request_ro(mmc, gpio_ro); - if (ret) { - dev_err(dev, - "Failed requesting gpio_ro %d\n", - gpio_ro); - goto out; - } else { - mmc->caps2 |= host->pdata->gpio_card_ro_invert ? - 0 : MMC_CAP2_RO_ACTIVE_HIGH; - } + /* FIXME: should we pass detection delay to debounce? */ + ret = mmc_gpiod_request_cd(mmc, "cd", 0, false, 0, NULL); + if (ret && ret != -ENOENT) { + dev_err(dev, "Failed requesting gpio_cd\n"); + goto out; } - if (gpio_is_valid(gpio_cd)) - ret = mmc_gpio_request_cd(mmc, gpio_cd, 0); - if (ret) { - dev_err(dev, "Failed requesting gpio_cd %d\n", - gpio_cd); + ret = mmc_gpiod_request_ro(mmc, "wp", 0, false, 0, NULL); + if (ret && ret != -ENOENT) { + dev_err(dev, "Failed requesting gpio_ro\n"); goto out; } + if (!ret) { + host->use_ro_gpio = true; + mmc->caps2 |= host->pdata->gpio_card_ro_invert ? + 0 : MMC_CAP2_RO_ACTIVE_HIGH; + } if (host->pdata->init) host->pdata->init(dev, pxamci_detect_irq, mmc); - if (gpio_is_valid(gpio_power) && host->pdata->setpower) + if (host->power && host->pdata->setpower) dev_warn(dev, "gpio_power and setpower() both defined\n"); - if (gpio_is_valid(gpio_ro) && host->pdata->get_ro) + if (host->use_ro_gpio && host->pdata->get_ro) dev_warn(dev, "gpio_ro and get_ro() both defined\n"); } diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index d3ac43c3d0b6..31a351a20dc0 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -32,6 +32,7 @@ #include <linux/pinctrl/consumer.h> #include <linux/pinctrl/pinctrl-state.h> #include <linux/regulator/consumer.h> +#include <linux/sys_soc.h> #include "renesas_sdhi.h" #include "tmio_mmc.h" @@ -45,6 +46,11 @@ #define SDHI_VER_GEN3_SD 0xcc10 #define SDHI_VER_GEN3_SDMMC 0xcd10 +struct renesas_sdhi_quirks { + bool hs400_disabled; + bool hs400_4taps; +}; + static void renesas_sdhi_sdbuf_width(struct tmio_mmc_host *host, int width) { u32 val; @@ -163,15 +169,6 @@ static void renesas_sdhi_set_clock(struct tmio_mmc_host *host, if (new_clock == 0) goto out; - /* - * Both HS400 and HS200/SD104 set 200MHz, but some devices need to - * set 400MHz to distinguish the CPG settings in HS400. - */ - if (host->mmc->ios.timing == MMC_TIMING_MMC_HS400 && - host->pdata->flags & TMIO_MMC_HAVE_4TAP_HS400 && - new_clock == 200000000) - new_clock = 400000000; - clock = renesas_sdhi_clk_update(host, new_clock) / 512; for (clk = 0x80000080; new_clock >= (clock << 1); clk >>= 1) @@ -532,6 +529,10 @@ static void renesas_sdhi_hw_reset(struct tmio_mmc_host *host) sd_scc_write32(host, priv, SH_MOBILE_SDHI_SCC_RVSCNTL, ~SH_MOBILE_SDHI_SCC_RVSCNTL_RVSEN & sd_scc_read32(host, priv, SH_MOBILE_SDHI_SCC_RVSCNTL)); + + if (host->pdata->flags & TMIO_MMC_MIN_RCAR2) + sd_ctrl_write32_as_16_and_16(host, CTL_IRQ_MASK, + TMIO_MASK_INIT_RCAR2); } static int renesas_sdhi_wait_idle(struct tmio_mmc_host *host, u32 bit) @@ -602,11 +603,31 @@ static void renesas_sdhi_enable_dma(struct tmio_mmc_host *host, bool enable) renesas_sdhi_sdbuf_width(host, enable ? width : 16); } +static const struct renesas_sdhi_quirks sdhi_quirks_h3_m3w_es1 = { + .hs400_disabled = true, + .hs400_4taps = true, +}; + +static const struct renesas_sdhi_quirks sdhi_quirks_h3_es2 = { + .hs400_disabled = false, + .hs400_4taps = true, +}; + +static const struct soc_device_attribute sdhi_quirks_match[] = { + { .soc_id = "r8a7795", .revision = "ES1.*", .data = &sdhi_quirks_h3_m3w_es1 }, + { .soc_id = "r8a7795", .revision = "ES2.0", .data = &sdhi_quirks_h3_es2 }, + { .soc_id = "r8a7796", .revision = "ES1.0", .data = &sdhi_quirks_h3_m3w_es1 }, + { .soc_id = "r8a7796", .revision = "ES1.1", .data = &sdhi_quirks_h3_m3w_es1 }, + { /* Sentinel. */ }, +}; + int renesas_sdhi_probe(struct platform_device *pdev, const struct tmio_mmc_dma_ops *dma_ops) { struct tmio_mmc_data *mmd = pdev->dev.platform_data; + const struct renesas_sdhi_quirks *quirks = NULL; const struct renesas_sdhi_of_data *of_data; + const struct soc_device_attribute *attr; struct tmio_mmc_data *mmc_data; struct tmio_mmc_dma *dma_priv; struct tmio_mmc_host *host; @@ -616,6 +637,10 @@ int renesas_sdhi_probe(struct platform_device *pdev, of_data = of_device_get_match_data(&pdev->dev); + attr = soc_device_match(sdhi_quirks_match); + if (attr) + quirks = attr->data; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) return -EINVAL; @@ -681,6 +706,12 @@ int renesas_sdhi_probe(struct platform_device *pdev, host->multi_io_quirk = renesas_sdhi_multi_io_quirk; host->dma_ops = dma_ops; + if (quirks && quirks->hs400_disabled) + host->mmc->caps2 &= ~(MMC_CAP2_HS400 | MMC_CAP2_HS400_ES); + + if (quirks && quirks->hs400_4taps) + mmc_data->flags |= TMIO_MMC_HAVE_4TAP_HS400; + /* For some SoC, we disable internal WP. GPIO may override this */ if (mmc_can_gpio_ro(host->mmc)) mmc_data->capabilities2 &= ~MMC_CAP2_NO_WRITE_PROTECT; @@ -691,6 +722,7 @@ int renesas_sdhi_probe(struct platform_device *pdev, host->ops.card_busy = renesas_sdhi_card_busy; host->ops.start_signal_voltage_switch = renesas_sdhi_start_signal_voltage_switch; + host->sdcard_irq_setbit_mask = TMIO_STAT_ALWAYS_SET_27; } /* Orginally registers were 16 bit apart, could be 32 or 64 nowadays */ diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c index b6f54102bfdd..92c9b15252da 100644 --- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c +++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c @@ -34,7 +34,7 @@ #define DTRAN_MODE_CH_NUM_CH0 0 /* "downstream" = for write commands */ #define DTRAN_MODE_CH_NUM_CH1 BIT(16) /* "upstream" = for read commands */ #define DTRAN_MODE_BUS_WIDTH (BIT(5) | BIT(4)) -#define DTRAN_MODE_ADDR_MODE BIT(0) /* 1 = Increment address */ +#define DTRAN_MODE_ADDR_MODE BIT(0) /* 1 = Increment address, 0 = Fixed */ /* DM_CM_DTRAN_CTRL */ #define DTRAN_CTRL_DM_START BIT(0) @@ -73,6 +73,9 @@ static unsigned long global_flags; #define SDHI_INTERNAL_DMAC_ONE_RX_ONLY 0 #define SDHI_INTERNAL_DMAC_RX_IN_USE 1 +/* RZ/A2 does not have the ADRR_MODE bit */ +#define SDHI_INTERNAL_DMAC_ADDR_MODE_FIXED_ONLY 2 + /* Definitions for sampling clocks */ static struct renesas_sdhi_scc rcar_gen3_scc_taps[] = { { @@ -81,15 +84,14 @@ static struct renesas_sdhi_scc rcar_gen3_scc_taps[] = { }, }; -static const struct renesas_sdhi_of_data of_rcar_r8a7795_compatible = { +static const struct renesas_sdhi_of_data of_rza2_compatible = { .tmio_flags = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_CLK_ACTUAL | - TMIO_MMC_HAVE_CBSY | TMIO_MMC_MIN_RCAR2 | - TMIO_MMC_HAVE_4TAP_HS400, + TMIO_MMC_HAVE_CBSY, + .tmio_ocr_mask = MMC_VDD_32_33, .capabilities = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ | MMC_CAP_CMD23, - .capabilities2 = MMC_CAP2_NO_WRITE_PROTECT, .bus_shift = 2, - .scc_offset = 0x1000, + .scc_offset = 0 - 0x1000, .taps = rcar_gen3_scc_taps, .taps_num = ARRAY_SIZE(rcar_gen3_scc_taps), /* DMAC can handle 0xffffffff blk count but only 1 segment */ @@ -113,9 +115,10 @@ static const struct renesas_sdhi_of_data of_rcar_gen3_compatible = { }; static const struct of_device_id renesas_sdhi_internal_dmac_of_match[] = { + { .compatible = "renesas,sdhi-r7s9210", .data = &of_rza2_compatible, }, { .compatible = "renesas,sdhi-mmc-r8a77470", .data = &of_rcar_gen3_compatible, }, - { .compatible = "renesas,sdhi-r8a7795", .data = &of_rcar_r8a7795_compatible, }, - { .compatible = "renesas,sdhi-r8a7796", .data = &of_rcar_r8a7795_compatible, }, + { .compatible = "renesas,sdhi-r8a7795", .data = &of_rcar_gen3_compatible, }, + { .compatible = "renesas,sdhi-r8a7796", .data = &of_rcar_gen3_compatible, }, { .compatible = "renesas,rcar-gen3-sdhi", .data = &of_rcar_gen3_compatible, }, {}, }; @@ -172,7 +175,10 @@ renesas_sdhi_internal_dmac_start_dma(struct tmio_mmc_host *host, struct mmc_data *data) { struct scatterlist *sg = host->sg_ptr; - u32 dtran_mode = DTRAN_MODE_BUS_WIDTH | DTRAN_MODE_ADDR_MODE; + u32 dtran_mode = DTRAN_MODE_BUS_WIDTH; + + if (!test_bit(SDHI_INTERNAL_DMAC_ADDR_MODE_FIXED_ONLY, &global_flags)) + dtran_mode |= DTRAN_MODE_ADDR_MODE; if (!dma_map_sg(&host->pdev->dev, sg, host->sg_len, mmc_get_dma_dir(data))) @@ -292,18 +298,22 @@ static const struct tmio_mmc_dma_ops renesas_sdhi_internal_dmac_dma_ops = { */ static const struct soc_device_attribute soc_whitelist[] = { /* specific ones */ + { .soc_id = "r7s9210", + .data = (void *)BIT(SDHI_INTERNAL_DMAC_ADDR_MODE_FIXED_ONLY) }, { .soc_id = "r8a7795", .revision = "ES1.*", .data = (void *)BIT(SDHI_INTERNAL_DMAC_ONE_RX_ONLY) }, { .soc_id = "r8a7796", .revision = "ES1.0", .data = (void *)BIT(SDHI_INTERNAL_DMAC_ONE_RX_ONLY) }, /* generic ones */ { .soc_id = "r8a774a1" }, + { .soc_id = "r8a774c0" }, { .soc_id = "r8a77470" }, { .soc_id = "r8a7795" }, { .soc_id = "r8a7796" }, { .soc_id = "r8a77965" }, { .soc_id = "r8a77970" }, { .soc_id = "r8a77980" }, + { .soc_id = "r8a77990" }, { .soc_id = "r8a77995" }, { /* sentinel */ } }; diff --git a/drivers/mmc/host/renesas_sdhi_sys_dmac.c b/drivers/mmc/host/renesas_sdhi_sys_dmac.c index 1a4016f635d3..8471160316e0 100644 --- a/drivers/mmc/host/renesas_sdhi_sys_dmac.c +++ b/drivers/mmc/host/renesas_sdhi_sys_dmac.c @@ -75,19 +75,6 @@ static struct renesas_sdhi_scc rcar_gen3_scc_taps[] = { }, }; -static const struct renesas_sdhi_of_data of_rcar_r8a7795_compatible = { - .tmio_flags = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_CLK_ACTUAL | - TMIO_MMC_HAVE_CBSY | TMIO_MMC_MIN_RCAR2 | - TMIO_MMC_HAVE_4TAP_HS400, - .capabilities = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ | - MMC_CAP_CMD23, - .capabilities2 = MMC_CAP2_NO_WRITE_PROTECT, - .bus_shift = 2, - .scc_offset = 0x1000, - .taps = rcar_gen3_scc_taps, - .taps_num = ARRAY_SIZE(rcar_gen3_scc_taps), -}; - static const struct renesas_sdhi_of_data of_rcar_gen3_compatible = { .tmio_flags = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_CLK_ACTUAL | TMIO_MMC_HAVE_CBSY | TMIO_MMC_MIN_RCAR2, @@ -114,8 +101,8 @@ static const struct of_device_id renesas_sdhi_sys_dmac_of_match[] = { { .compatible = "renesas,sdhi-r8a7792", .data = &of_rcar_gen2_compatible, }, { .compatible = "renesas,sdhi-r8a7793", .data = &of_rcar_gen2_compatible, }, { .compatible = "renesas,sdhi-r8a7794", .data = &of_rcar_gen2_compatible, }, - { .compatible = "renesas,sdhi-r8a7795", .data = &of_rcar_r8a7795_compatible, }, - { .compatible = "renesas,sdhi-r8a7796", .data = &of_rcar_r8a7795_compatible, }, + { .compatible = "renesas,sdhi-r8a7795", .data = &of_rcar_gen3_compatible, }, + { .compatible = "renesas,sdhi-r8a7796", .data = &of_rcar_gen3_compatible, }, { .compatible = "renesas,rcar-gen1-sdhi", .data = &of_rcar_gen1_compatible, }, { .compatible = "renesas,rcar-gen2-sdhi", .data = &of_rcar_gen2_compatible, }, { .compatible = "renesas,rcar-gen3-sdhi", .data = &of_rcar_gen3_compatible, }, @@ -493,8 +480,7 @@ static const struct soc_device_attribute gen3_soc_whitelist[] = { static int renesas_sdhi_sys_dmac_probe(struct platform_device *pdev) { - if ((of_device_get_match_data(&pdev->dev) == &of_rcar_gen3_compatible || - of_device_get_match_data(&pdev->dev) == &of_rcar_r8a7795_compatible) && + if (of_device_get_match_data(&pdev->dev) == &of_rcar_gen3_compatible && !soc_device_match(gen3_soc_whitelist)) return -ENODEV; diff --git a/drivers/mmc/host/rtsx_usb_sdmmc.c b/drivers/mmc/host/rtsx_usb_sdmmc.c index 9a3ff22dd0fe..669c6ab021c8 100644 --- a/drivers/mmc/host/rtsx_usb_sdmmc.c +++ b/drivers/mmc/host/rtsx_usb_sdmmc.c @@ -28,6 +28,7 @@ #include <linux/mmc/sd.h> #include <linux/mmc/card.h> #include <linux/scatterlist.h> +#include <linux/pm.h> #include <linux/pm_runtime.h> #include <linux/rtsx_usb.h> @@ -1042,9 +1043,9 @@ static int sd_set_power_mode(struct rtsx_usb_sdmmc *host, if (power_mode == MMC_POWER_OFF) { err = sd_power_off(host); - pm_runtime_put(sdmmc_dev(host)); + pm_runtime_put_noidle(sdmmc_dev(host)); } else { - pm_runtime_get_sync(sdmmc_dev(host)); + pm_runtime_get_noresume(sdmmc_dev(host)); err = sd_power_on(host); } @@ -1297,16 +1298,20 @@ static void rtsx_usb_update_led(struct work_struct *work) container_of(work, struct rtsx_usb_sdmmc, led_work); struct rtsx_ucr *ucr = host->ucr; - pm_runtime_get_sync(sdmmc_dev(host)); + pm_runtime_get_noresume(sdmmc_dev(host)); mutex_lock(&ucr->dev_mutex); + if (host->power_mode == MMC_POWER_OFF) + goto out; + if (host->led.brightness == LED_OFF) rtsx_usb_turn_off_led(ucr); else rtsx_usb_turn_on_led(ucr); +out: mutex_unlock(&ucr->dev_mutex); - pm_runtime_put(sdmmc_dev(host)); + pm_runtime_put_sync_suspend(sdmmc_dev(host)); } #endif @@ -1320,7 +1325,7 @@ static void rtsx_usb_init_host(struct rtsx_usb_sdmmc *host) mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED | MMC_CAP_BUS_WIDTH_TEST | MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 | MMC_CAP_UHS_SDR50 | - MMC_CAP_NEEDS_POLL | MMC_CAP_ERASE; + MMC_CAP_ERASE | MMC_CAP_SYNC_RUNTIME_PM; mmc->caps2 = MMC_CAP2_NO_PRESCAN_POWERUP | MMC_CAP2_FULL_PWR_CYCLE | MMC_CAP2_NO_SDIO; @@ -1363,8 +1368,6 @@ static int rtsx_usb_sdmmc_drv_probe(struct platform_device *pdev) mutex_init(&host->host_mutex); rtsx_usb_init_host(host); - pm_runtime_use_autosuspend(&pdev->dev); - pm_runtime_set_autosuspend_delay(&pdev->dev, 50); pm_runtime_enable(&pdev->dev); #ifdef RTSX_USB_USE_LEDS_CLASS @@ -1419,7 +1422,6 @@ static int rtsx_usb_sdmmc_drv_remove(struct platform_device *pdev) mmc_free_host(mmc); pm_runtime_disable(&pdev->dev); - pm_runtime_dont_use_autosuspend(&pdev->dev); platform_set_drvdata(pdev, NULL); dev_dbg(&(pdev->dev), @@ -1428,6 +1430,31 @@ static int rtsx_usb_sdmmc_drv_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM +static int rtsx_usb_sdmmc_runtime_suspend(struct device *dev) +{ + struct rtsx_usb_sdmmc *host = dev_get_drvdata(dev); + + host->mmc->caps &= ~MMC_CAP_NEEDS_POLL; + return 0; +} + +static int rtsx_usb_sdmmc_runtime_resume(struct device *dev) +{ + struct rtsx_usb_sdmmc *host = dev_get_drvdata(dev); + + host->mmc->caps |= MMC_CAP_NEEDS_POLL; + if (sdmmc_get_cd(host->mmc) == 1) + mmc_detect_change(host->mmc, 0); + return 0; +} +#endif + +static const struct dev_pm_ops rtsx_usb_sdmmc_dev_pm_ops = { + SET_RUNTIME_PM_OPS(rtsx_usb_sdmmc_runtime_suspend, + rtsx_usb_sdmmc_runtime_resume, NULL) +}; + static const struct platform_device_id rtsx_usb_sdmmc_ids[] = { { .name = "rtsx_usb_sdmmc", @@ -1443,6 +1470,7 @@ static struct platform_driver rtsx_usb_sdmmc_driver = { .id_table = rtsx_usb_sdmmc_ids, .driver = { .name = "rtsx_usb_sdmmc", + .pm = &rtsx_usb_sdmmc_dev_pm_ops, }, }; module_platform_driver(rtsx_usb_sdmmc_driver); diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c index f77493604312..10f5219b3b40 100644 --- a/drivers/mmc/host/s3cmci.c +++ b/drivers/mmc/host/s3cmci.c @@ -26,7 +26,6 @@ #include <linux/io.h> #include <linux/of.h> #include <linux/of_device.h> -#include <linux/of_gpio.h> #include <linux/mmc/slot-gpio.h> #include <plat/gpio-cfg.h> @@ -1406,18 +1405,7 @@ static int s3cmci_state_show(struct seq_file *seq, void *v) return 0; } -static int s3cmci_state_open(struct inode *inode, struct file *file) -{ - return single_open(file, s3cmci_state_show, inode->i_private); -} - -static const struct file_operations s3cmci_fops_state = { - .owner = THIS_MODULE, - .open = s3cmci_state_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(s3cmci_state); #define DBG_REG(_r) { .addr = S3C2410_SDI##_r, .name = #_r } @@ -1459,18 +1447,7 @@ static int s3cmci_regs_show(struct seq_file *seq, void *v) return 0; } -static int s3cmci_regs_open(struct inode *inode, struct file *file) -{ - return single_open(file, s3cmci_regs_show, inode->i_private); -} - -static const struct file_operations s3cmci_fops_regs = { - .owner = THIS_MODULE, - .open = s3cmci_regs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(s3cmci_regs); static void s3cmci_debugfs_attach(struct s3cmci_host *host) { @@ -1484,14 +1461,14 @@ static void s3cmci_debugfs_attach(struct s3cmci_host *host) host->debug_state = debugfs_create_file("state", 0444, host->debug_root, host, - &s3cmci_fops_state); + &s3cmci_state_fops); if (IS_ERR(host->debug_state)) dev_err(dev, "failed to create debug state file\n"); host->debug_regs = debugfs_create_file("regs", 0444, host->debug_root, host, - &s3cmci_fops_regs); + &s3cmci_regs_fops); if (IS_ERR(host->debug_regs)) dev_err(dev, "failed to create debug regs file\n"); @@ -1545,25 +1522,19 @@ static int s3cmci_probe_pdata(struct s3cmci_host *host) if (pdata->wprotect_invert) mmc->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH; - if (pdata->detect_invert) - mmc->caps2 |= MMC_CAP2_CD_ACTIVE_HIGH; - - if (gpio_is_valid(pdata->gpio_detect)) { - ret = mmc_gpio_request_cd(mmc, pdata->gpio_detect, 0); - if (ret) { - dev_err(&pdev->dev, "error requesting GPIO for CD %d\n", - ret); - return ret; - } + /* If we get -ENOENT we have no card detect GPIO line */ + ret = mmc_gpiod_request_cd(mmc, "cd", 0, false, 0, NULL); + if (ret != -ENOENT) { + dev_err(&pdev->dev, "error requesting GPIO for CD %d\n", + ret); + return ret; } - if (gpio_is_valid(pdata->gpio_wprotect)) { - ret = mmc_gpio_request_ro(mmc, pdata->gpio_wprotect); - if (ret) { - dev_err(&pdev->dev, "error requesting GPIO for WP %d\n", - ret); - return ret; - } + ret = mmc_gpiod_request_ro(host->mmc, "wp", 0, false, 0, NULL); + if (ret != -ENOENT) { + dev_err(&pdev->dev, "error requesting GPIO for WP %d\n", + ret); + return ret; } return 0; diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index 057e24f4a620..6669e540851d 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -437,7 +437,8 @@ static const struct sdhci_acpi_slot sdhci_acpi_slot_int_emmc = { MMC_CAP_HW_RESET | MMC_CAP_1_8V_DDR | MMC_CAP_CMD_DURING_TFR | MMC_CAP_WAIT_WHILE_BUSY, .flags = SDHCI_ACPI_RUNTIME_PM, - .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, + .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC | + SDHCI_QUIRK_NO_LED, .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN | SDHCI_QUIRK2_STOP_WITH_TC | SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400, @@ -448,6 +449,7 @@ static const struct sdhci_acpi_slot sdhci_acpi_slot_int_emmc = { static const struct sdhci_acpi_slot sdhci_acpi_slot_int_sdio = { .quirks = SDHCI_QUIRK_BROKEN_CARD_DETECTION | + SDHCI_QUIRK_NO_LED | SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, .quirks2 = SDHCI_QUIRK2_HOST_OFF_CARD_ON, .caps = MMC_CAP_NONREMOVABLE | MMC_CAP_POWER_OFF_CARD | @@ -462,7 +464,8 @@ static const struct sdhci_acpi_slot sdhci_acpi_slot_int_sdio = { static const struct sdhci_acpi_slot sdhci_acpi_slot_int_sd = { .flags = SDHCI_ACPI_SD_CD | SDHCI_ACPI_SD_CD_OVERRIDE_LEVEL | SDHCI_ACPI_RUNTIME_PM, - .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, + .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC | + SDHCI_QUIRK_NO_LED, .quirks2 = SDHCI_QUIRK2_CARD_ON_NEEDS_BUS_ON | SDHCI_QUIRK2_STOP_WITH_TC, .caps = MMC_CAP_WAIT_WHILE_BUSY | MMC_CAP_AGGRESSIVE_PM, diff --git a/drivers/mmc/host/sdhci-cadence.c b/drivers/mmc/host/sdhci-cadence.c index 7a343b87b5e5..e2412875dac5 100644 --- a/drivers/mmc/host/sdhci-cadence.c +++ b/drivers/mmc/host/sdhci-cadence.c @@ -14,7 +14,7 @@ */ #include <linux/bitfield.h> -#include <linux/bitops.h> +#include <linux/bits.h> #include <linux/iopoll.h> #include <linux/module.h> #include <linux/mmc/host.h> diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index f44e49014a44..d0d319398a54 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -12,7 +12,6 @@ #include <linux/delay.h> #include <linux/err.h> #include <linux/clk.h> -#include <linux/gpio.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/mmc/host.h> @@ -21,7 +20,6 @@ #include <linux/mmc/slot-gpio.h> #include <linux/of.h> #include <linux/of_device.h> -#include <linux/of_gpio.h> #include <linux/pinctrl/consumer.h> #include <linux/platform_data/mmc-esdhc-imx.h> #include <linux/pm_runtime.h> @@ -429,7 +427,7 @@ static u16 esdhc_readw_le(struct sdhci_host *host, int reg) val = readl(host->ioaddr + ESDHC_MIX_CTRL); else if (imx_data->socdata->flags & ESDHC_FLAG_STD_TUNING) /* the std tuning bits is in ACMD12_ERR for imx6sl */ - val = readl(host->ioaddr + SDHCI_ACMD12_ERR); + val = readl(host->ioaddr + SDHCI_AUTO_CMD_STATUS); } if (val & ESDHC_MIX_CTRL_EXE_TUNE) @@ -494,7 +492,7 @@ static void esdhc_writew_le(struct sdhci_host *host, u16 val, int reg) } writel(new_val , host->ioaddr + ESDHC_MIX_CTRL); } else if (imx_data->socdata->flags & ESDHC_FLAG_STD_TUNING) { - u32 v = readl(host->ioaddr + SDHCI_ACMD12_ERR); + u32 v = readl(host->ioaddr + SDHCI_AUTO_CMD_STATUS); u32 m = readl(host->ioaddr + ESDHC_MIX_CTRL); if (val & SDHCI_CTRL_TUNED_CLK) { v |= ESDHC_MIX_CTRL_SMPCLK_SEL; @@ -512,7 +510,7 @@ static void esdhc_writew_le(struct sdhci_host *host, u16 val, int reg) v &= ~ESDHC_MIX_CTRL_EXE_TUNE; } - writel(v, host->ioaddr + SDHCI_ACMD12_ERR); + writel(v, host->ioaddr + SDHCI_AUTO_CMD_STATUS); writel(m, host->ioaddr + ESDHC_MIX_CTRL); } return; @@ -957,9 +955,9 @@ static void esdhc_reset_tuning(struct sdhci_host *host) writel(ctrl, host->ioaddr + ESDHC_MIX_CTRL); writel(0, host->ioaddr + ESDHC_TUNE_CTRL_STATUS); } else if (imx_data->socdata->flags & ESDHC_FLAG_STD_TUNING) { - ctrl = readl(host->ioaddr + SDHCI_ACMD12_ERR); + ctrl = readl(host->ioaddr + SDHCI_AUTO_CMD_STATUS); ctrl &= ~ESDHC_MIX_CTRL_SMPCLK_SEL; - writel(ctrl, host->ioaddr + SDHCI_ACMD12_ERR); + writel(ctrl, host->ioaddr + SDHCI_AUTO_CMD_STATUS); } } } @@ -1139,8 +1137,12 @@ sdhci_esdhc_imx_probe_dt(struct platform_device *pdev, if (of_get_property(np, "fsl,wp-controller", NULL)) boarddata->wp_type = ESDHC_WP_CONTROLLER; - boarddata->wp_gpio = of_get_named_gpio(np, "wp-gpios", 0); - if (gpio_is_valid(boarddata->wp_gpio)) + /* + * If we have this property, then activate WP check. + * Retrieveing and requesting the actual WP GPIO will happen + * in the call to mmc_of_parse(). + */ + if (of_property_read_bool(np, "wp-gpios")) boarddata->wp_type = ESDHC_WP_GPIO; of_property_read_u32(np, "fsl,tuning-step", &boarddata->tuning_step); @@ -1198,7 +1200,7 @@ static int sdhci_esdhc_imx_probe_nondt(struct platform_device *pdev, host->mmc->parent->platform_data); /* write_protect */ if (boarddata->wp_type == ESDHC_WP_GPIO) { - err = mmc_gpio_request_ro(host->mmc, boarddata->wp_gpio); + err = mmc_gpiod_request_ro(host->mmc, "wp", 0, false, 0, NULL); if (err) { dev_err(mmc_dev(host->mmc), "failed to request write-protect gpio!\n"); @@ -1210,7 +1212,7 @@ static int sdhci_esdhc_imx_probe_nondt(struct platform_device *pdev, /* card_detect */ switch (boarddata->cd_type) { case ESDHC_CD_GPIO: - err = mmc_gpio_request_cd(host->mmc, boarddata->cd_gpio, 0); + err = mmc_gpiod_request_cd(host->mmc, "cd", 0, false, 0, NULL); if (err) { dev_err(mmc_dev(host->mmc), "failed to request card-detect gpio!\n"); @@ -1317,7 +1319,7 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev) /* clear tuning bits in case ROM has set it already */ writel(0x0, host->ioaddr + ESDHC_MIX_CTRL); - writel(0x0, host->ioaddr + SDHCI_ACMD12_ERR); + writel(0x0, host->ioaddr + SDHCI_AUTO_CMD_STATUS); writel(0x0, host->ioaddr + ESDHC_TUNE_CTRL_STATUS); } diff --git a/drivers/mmc/host/sdhci-esdhc.h b/drivers/mmc/host/sdhci-esdhc.h index 3f16d9c90ba2..39dbbd6eaf28 100644 --- a/drivers/mmc/host/sdhci-esdhc.h +++ b/drivers/mmc/host/sdhci-esdhc.h @@ -59,9 +59,33 @@ /* Tuning Block Control Register */ #define ESDHC_TBCTL 0x120 +#define ESDHC_HS400_WNDW_ADJUST 0x00000040 +#define ESDHC_HS400_MODE 0x00000010 #define ESDHC_TB_EN 0x00000004 #define ESDHC_TBPTR 0x128 +/* SD Clock Control Register */ +#define ESDHC_SDCLKCTL 0x144 +#define ESDHC_LPBK_CLK_SEL 0x80000000 +#define ESDHC_CMD_CLK_CTL 0x00008000 + +/* SD Timing Control Register */ +#define ESDHC_SDTIMNGCTL 0x148 +#define ESDHC_FLW_CTL_BG 0x00008000 + +/* DLL Config 0 Register */ +#define ESDHC_DLLCFG0 0x160 +#define ESDHC_DLL_ENABLE 0x80000000 +#define ESDHC_DLL_FREQ_SEL 0x08000000 + +/* DLL Config 1 Register */ +#define ESDHC_DLLCFG1 0x164 +#define ESDHC_DLL_PD_PULSE_STRETCH_SEL 0x80000000 + +/* DLL Status 0 Register */ +#define ESDHC_DLLSTAT0 0x170 +#define ESDHC_DLL_STS_SLV_LOCK 0x08000000 + /* Control Register for DMA transfer */ #define ESDHC_DMA_SYSCTL 0x40c #define ESDHC_PERIPHERAL_CLK_SEL 0x00080000 diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c index 3cc8bfee6c18..d6c9ebd8d263 100644 --- a/drivers/mmc/host/sdhci-msm.c +++ b/drivers/mmc/host/sdhci-msm.c @@ -232,6 +232,7 @@ struct sdhci_msm_variant_ops { */ struct sdhci_msm_variant_info { bool mci_removed; + bool restore_dll_config; const struct sdhci_msm_variant_ops *var_ops; const struct sdhci_msm_offset *offset; }; @@ -256,8 +257,11 @@ struct sdhci_msm_host { bool pwr_irq_flag; u32 caps_0; bool mci_removed; + bool restore_dll_config; const struct sdhci_msm_variant_ops *var_ops; const struct sdhci_msm_offset *offset; + bool use_cdr; + u32 transfer_mode; }; static const struct sdhci_msm_offset *sdhci_priv_msm_offset(struct sdhci_host *host) @@ -1025,6 +1029,69 @@ out: return ret; } +static bool sdhci_msm_is_tuning_needed(struct sdhci_host *host) +{ + struct mmc_ios *ios = &host->mmc->ios; + + /* + * Tuning is required for SDR104, HS200 and HS400 cards and + * if clock frequency is greater than 100MHz in these modes. + */ + if (host->clock <= CORE_FREQ_100MHZ || + !(ios->timing == MMC_TIMING_MMC_HS400 || + ios->timing == MMC_TIMING_MMC_HS200 || + ios->timing == MMC_TIMING_UHS_SDR104) || + ios->enhanced_strobe) + return false; + + return true; +} + +static int sdhci_msm_restore_sdr_dll_config(struct sdhci_host *host) +{ + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host); + int ret; + + /* + * SDR DLL comes into picture only for timing modes which needs + * tuning. + */ + if (!sdhci_msm_is_tuning_needed(host)) + return 0; + + /* Reset the tuning block */ + ret = msm_init_cm_dll(host); + if (ret) + return ret; + + /* Restore the tuning block */ + ret = msm_config_cm_dll_phase(host, msm_host->saved_tuning_phase); + + return ret; +} + +static void sdhci_msm_set_cdr(struct sdhci_host *host, bool enable) +{ + const struct sdhci_msm_offset *msm_offset = sdhci_priv_msm_offset(host); + u32 config, oldconfig = readl_relaxed(host->ioaddr + + msm_offset->core_dll_config); + + config = oldconfig; + if (enable) { + config |= CORE_CDR_EN; + config &= ~CORE_CDR_EXT_EN; + } else { + config &= ~CORE_CDR_EN; + config |= CORE_CDR_EXT_EN; + } + + if (config != oldconfig) { + writel_relaxed(config, host->ioaddr + + msm_offset->core_dll_config); + } +} + static int sdhci_msm_execute_tuning(struct mmc_host *mmc, u32 opcode) { struct sdhci_host *host = mmc_priv(mmc); @@ -1035,15 +1102,14 @@ static int sdhci_msm_execute_tuning(struct mmc_host *mmc, u32 opcode) struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host); - /* - * Tuning is required for SDR104, HS200 and HS400 cards and - * if clock frequency is greater than 100MHz in these modes. - */ - if (host->clock <= CORE_FREQ_100MHZ || - !(ios.timing == MMC_TIMING_MMC_HS400 || - ios.timing == MMC_TIMING_MMC_HS200 || - ios.timing == MMC_TIMING_UHS_SDR104)) + if (!sdhci_msm_is_tuning_needed(host)) { + msm_host->use_cdr = false; + sdhci_msm_set_cdr(host, false); return 0; + } + + /* Clock-Data-Recovery used to dynamically adjust RX sampling point */ + msm_host->use_cdr = true; /* * For HS400 tuning in HS200 timing requires: @@ -1069,7 +1135,6 @@ retry: if (rc) return rc; - msm_host->saved_tuning_phase = phase; rc = mmc_send_tuning(mmc, opcode, NULL); if (!rc) { /* Tuning is successful at this tuning point */ @@ -1094,6 +1159,7 @@ retry: rc = msm_config_cm_dll_phase(host, phase); if (rc) return rc; + msm_host->saved_tuning_phase = phase; dev_dbg(mmc_dev(mmc), "%s: Setting the tuning phase to %d\n", mmc_hostname(mmc), phase); } else { @@ -1525,6 +1591,19 @@ static int __sdhci_msm_check_write(struct sdhci_host *host, u16 val, int reg) case SDHCI_POWER_CONTROL: req_type = !val ? REQ_BUS_OFF : REQ_BUS_ON; break; + case SDHCI_TRANSFER_MODE: + msm_host->transfer_mode = val; + break; + case SDHCI_COMMAND: + if (!msm_host->use_cdr) + break; + if ((msm_host->transfer_mode & SDHCI_TRNS_READ) && + SDHCI_GET_CMD(val) != MMC_SEND_TUNING_BLOCK_HS200 && + SDHCI_GET_CMD(val) != MMC_SEND_TUNING_BLOCK) + sdhci_msm_set_cdr(host, true); + else + sdhci_msm_set_cdr(host, false); + break; } if (req_type) { @@ -1616,7 +1695,6 @@ static const struct sdhci_msm_variant_ops v5_var_ops = { }; static const struct sdhci_msm_variant_info sdhci_msm_mci_var = { - .mci_removed = false, .var_ops = &mci_var_ops, .offset = &sdhci_msm_mci_offset, }; @@ -1627,9 +1705,17 @@ static const struct sdhci_msm_variant_info sdhci_msm_v5_var = { .offset = &sdhci_msm_v5_offset, }; +static const struct sdhci_msm_variant_info sdm845_sdhci_var = { + .mci_removed = true, + .restore_dll_config = true, + .var_ops = &v5_var_ops, + .offset = &sdhci_msm_v5_offset, +}; + static const struct of_device_id sdhci_msm_dt_match[] = { {.compatible = "qcom,sdhci-msm-v4", .data = &sdhci_msm_mci_var}, {.compatible = "qcom,sdhci-msm-v5", .data = &sdhci_msm_v5_var}, + {.compatible = "qcom,sdm845-sdhci", .data = &sdm845_sdhci_var}, {}, }; @@ -1689,6 +1775,7 @@ static int sdhci_msm_probe(struct platform_device *pdev) var_info = of_device_get_match_data(&pdev->dev); msm_host->mci_removed = var_info->mci_removed; + msm_host->restore_dll_config = var_info->restore_dll_config; msm_host->var_ops = var_info->var_ops; msm_host->offset = var_info->offset; @@ -1910,8 +1997,7 @@ static int sdhci_msm_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM -static int sdhci_msm_runtime_suspend(struct device *dev) +static __maybe_unused int sdhci_msm_runtime_suspend(struct device *dev) { struct sdhci_host *host = dev_get_drvdata(dev); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); @@ -1923,16 +2009,26 @@ static int sdhci_msm_runtime_suspend(struct device *dev) return 0; } -static int sdhci_msm_runtime_resume(struct device *dev) +static __maybe_unused int sdhci_msm_runtime_resume(struct device *dev) { struct sdhci_host *host = dev_get_drvdata(dev); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host); + int ret; - return clk_bulk_prepare_enable(ARRAY_SIZE(msm_host->bulk_clks), + ret = clk_bulk_prepare_enable(ARRAY_SIZE(msm_host->bulk_clks), msm_host->bulk_clks); + if (ret) + return ret; + /* + * Whenever core-clock is gated dynamically, it's needed to + * restore the SDR DLL settings when the clock is ungated. + */ + if (msm_host->restore_dll_config && msm_host->clk_rate) + return sdhci_msm_restore_sdr_dll_config(host); + + return 0; } -#endif static const struct dev_pm_ops sdhci_msm_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c index 142c4b802f31..c9e3e050ccc8 100644 --- a/drivers/mmc/host/sdhci-of-arasan.c +++ b/drivers/mmc/host/sdhci-of-arasan.c @@ -231,25 +231,6 @@ static void sdhci_arasan_set_clock(struct sdhci_host *host, unsigned int clock) } } -static void sdhci_arasan_am654_set_clock(struct sdhci_host *host, - unsigned int clock) -{ - struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); - struct sdhci_arasan_data *sdhci_arasan = sdhci_pltfm_priv(pltfm_host); - - if (sdhci_arasan->is_phy_on) { - phy_power_off(sdhci_arasan->phy); - sdhci_arasan->is_phy_on = false; - } - - sdhci_set_clock(host, clock); - - if (clock > PHY_CLK_TOO_SLOW_HZ) { - phy_power_on(sdhci_arasan->phy); - sdhci_arasan->is_phy_on = true; - } -} - static void sdhci_arasan_hs400_enhanced_strobe(struct mmc_host *mmc, struct mmc_ios *ios) { @@ -335,29 +316,6 @@ static struct sdhci_arasan_of_data sdhci_arasan_data = { .pdata = &sdhci_arasan_pdata, }; -static const struct sdhci_ops sdhci_arasan_am654_ops = { - .set_clock = sdhci_arasan_am654_set_clock, - .get_max_clock = sdhci_pltfm_clk_get_max_clock, - .get_timeout_clock = sdhci_pltfm_clk_get_max_clock, - .set_bus_width = sdhci_set_bus_width, - .reset = sdhci_arasan_reset, - .set_uhs_signaling = sdhci_set_uhs_signaling, -}; - -static const struct sdhci_pltfm_data sdhci_arasan_am654_pdata = { - .ops = &sdhci_arasan_am654_ops, - .quirks = SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN | - SDHCI_QUIRK_INVERTED_WRITE_PROTECT | - SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12, - .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN | - SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN | - SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400, -}; - -static const struct sdhci_arasan_of_data sdhci_arasan_am654_data = { - .pdata = &sdhci_arasan_am654_pdata, -}; - static u32 sdhci_arasan_cqhci_irq(struct sdhci_host *host, u32 intmask) { int cmd_error = 0; @@ -520,10 +478,6 @@ static const struct of_device_id sdhci_arasan_of_match[] = { .compatible = "rockchip,rk3399-sdhci-5.1", .data = &sdhci_arasan_rk3399_data, }, - { - .compatible = "ti,am654-sdhci-5.1", - .data = &sdhci_arasan_am654_data, - }, /* Generic compatible below here */ { .compatible = "arasan,sdhci-8.9a", diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c index 86fc9f022002..4e669b4edfc1 100644 --- a/drivers/mmc/host/sdhci-of-esdhc.c +++ b/drivers/mmc/host/sdhci-of-esdhc.c @@ -78,6 +78,8 @@ struct sdhci_esdhc { u8 vendor_ver; u8 spec_ver; bool quirk_incorrect_hostver; + bool quirk_limited_clk_division; + bool quirk_unreliable_pulse_detection; bool quirk_fixup_tuning; unsigned int peripheral_clock; const struct esdhc_clk_fixup *clk_fixup; @@ -528,8 +530,12 @@ static void esdhc_clock_enable(struct sdhci_host *host, bool enable) /* Wait max 20 ms */ timeout = ktime_add_ms(ktime_get(), 20); val = ESDHC_CLOCK_STABLE; - while (!(sdhci_readl(host, ESDHC_PRSSTAT) & val)) { - if (ktime_after(ktime_get(), timeout)) { + while (1) { + bool timedout = ktime_after(ktime_get(), timeout); + + if (sdhci_readl(host, ESDHC_PRSSTAT) & val) + break; + if (timedout) { pr_err("%s: Internal clock never stabilised.\n", mmc_hostname(host->mmc)); break; @@ -544,6 +550,7 @@ static void esdhc_of_set_clock(struct sdhci_host *host, unsigned int clock) struct sdhci_esdhc *esdhc = sdhci_pltfm_priv(pltfm_host); int pre_div = 1; int div = 1; + int division; ktime_t timeout; long fixup = 0; u32 temp; @@ -579,6 +586,26 @@ static void esdhc_of_set_clock(struct sdhci_host *host, unsigned int clock) while (host->max_clk / pre_div / div > clock && div < 16) div++; + if (esdhc->quirk_limited_clk_division && + clock == MMC_HS200_MAX_DTR && + (host->mmc->ios.timing == MMC_TIMING_MMC_HS400 || + host->flags & SDHCI_HS400_TUNING)) { + division = pre_div * div; + if (division <= 4) { + pre_div = 4; + div = 1; + } else if (division <= 8) { + pre_div = 4; + div = 2; + } else if (division <= 12) { + pre_div = 4; + div = 3; + } else { + pr_warn("%s: using unsupported clock division.\n", + mmc_hostname(host->mmc)); + } + } + dev_dbg(mmc_dev(host->mmc), "desired SD clock: %d, actual: %d\n", clock, host->max_clk / pre_div / div); host->mmc->actual_clock = host->max_clk / pre_div / div; @@ -592,10 +619,36 @@ static void esdhc_of_set_clock(struct sdhci_host *host, unsigned int clock) | (pre_div << ESDHC_PREDIV_SHIFT)); sdhci_writel(host, temp, ESDHC_SYSTEM_CONTROL); + if (host->mmc->ios.timing == MMC_TIMING_MMC_HS400 && + clock == MMC_HS200_MAX_DTR) { + temp = sdhci_readl(host, ESDHC_TBCTL); + sdhci_writel(host, temp | ESDHC_HS400_MODE, ESDHC_TBCTL); + temp = sdhci_readl(host, ESDHC_SDCLKCTL); + sdhci_writel(host, temp | ESDHC_CMD_CLK_CTL, ESDHC_SDCLKCTL); + esdhc_clock_enable(host, true); + + temp = sdhci_readl(host, ESDHC_DLLCFG0); + temp |= ESDHC_DLL_ENABLE; + if (host->mmc->actual_clock == MMC_HS200_MAX_DTR) + temp |= ESDHC_DLL_FREQ_SEL; + sdhci_writel(host, temp, ESDHC_DLLCFG0); + temp = sdhci_readl(host, ESDHC_TBCTL); + sdhci_writel(host, temp | ESDHC_HS400_WNDW_ADJUST, ESDHC_TBCTL); + + esdhc_clock_enable(host, false); + temp = sdhci_readl(host, ESDHC_DMA_SYSCTL); + temp |= ESDHC_FLUSH_ASYNC_FIFO; + sdhci_writel(host, temp, ESDHC_DMA_SYSCTL); + } + /* Wait max 20 ms */ timeout = ktime_add_ms(ktime_get(), 20); - while (!(sdhci_readl(host, ESDHC_PRSSTAT) & ESDHC_CLOCK_STABLE)) { - if (ktime_after(ktime_get(), timeout)) { + while (1) { + bool timedout = ktime_after(ktime_get(), timeout); + + if (sdhci_readl(host, ESDHC_PRSSTAT) & ESDHC_CLOCK_STABLE) + break; + if (timedout) { pr_err("%s: Internal clock never stabilised.\n", mmc_hostname(host->mmc)); return; @@ -603,6 +656,7 @@ static void esdhc_of_set_clock(struct sdhci_host *host, unsigned int clock) udelay(10); } + temp = sdhci_readl(host, ESDHC_SYSTEM_CONTROL); temp |= ESDHC_CLOCK_SDCLKEN; sdhci_writel(host, temp, ESDHC_SYSTEM_CONTROL); } @@ -631,6 +685,8 @@ static void esdhc_pltfm_set_bus_width(struct sdhci_host *host, int width) static void esdhc_reset(struct sdhci_host *host, u8 mask) { + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_esdhc *esdhc = sdhci_pltfm_priv(pltfm_host); u32 val; sdhci_reset(host, mask); @@ -642,6 +698,12 @@ static void esdhc_reset(struct sdhci_host *host, u8 mask) val = sdhci_readl(host, ESDHC_TBCTL); val &= ~ESDHC_TB_EN; sdhci_writel(host, val, ESDHC_TBCTL); + + if (esdhc->quirk_unreliable_pulse_detection) { + val = sdhci_readl(host, ESDHC_DLLCFG1); + val &= ~ESDHC_DLL_PD_PULSE_STRETCH_SEL; + sdhci_writel(host, val, ESDHC_DLLCFG1); + } } } @@ -728,25 +790,50 @@ static struct soc_device_attribute soc_fixup_tuning[] = { { }, }; -static int esdhc_execute_tuning(struct mmc_host *mmc, u32 opcode) +static void esdhc_tuning_block_enable(struct sdhci_host *host, bool enable) { - struct sdhci_host *host = mmc_priv(mmc); - struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); - struct sdhci_esdhc *esdhc = sdhci_pltfm_priv(pltfm_host); u32 val; - /* Use tuning block for tuning procedure */ esdhc_clock_enable(host, false); + val = sdhci_readl(host, ESDHC_DMA_SYSCTL); val |= ESDHC_FLUSH_ASYNC_FIFO; sdhci_writel(host, val, ESDHC_DMA_SYSCTL); val = sdhci_readl(host, ESDHC_TBCTL); - val |= ESDHC_TB_EN; + if (enable) + val |= ESDHC_TB_EN; + else + val &= ~ESDHC_TB_EN; sdhci_writel(host, val, ESDHC_TBCTL); + esdhc_clock_enable(host, true); +} + +static int esdhc_execute_tuning(struct mmc_host *mmc, u32 opcode) +{ + struct sdhci_host *host = mmc_priv(mmc); + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_esdhc *esdhc = sdhci_pltfm_priv(pltfm_host); + bool hs400_tuning; + u32 val; + int ret; + + if (esdhc->quirk_limited_clk_division && + host->flags & SDHCI_HS400_TUNING) + esdhc_of_set_clock(host, host->clock); + + esdhc_tuning_block_enable(host, true); + + hs400_tuning = host->flags & SDHCI_HS400_TUNING; + ret = sdhci_execute_tuning(mmc, opcode); + + if (hs400_tuning) { + val = sdhci_readl(host, ESDHC_SDTIMNGCTL); + val |= ESDHC_FLW_CTL_BG; + sdhci_writel(host, val, ESDHC_SDTIMNGCTL); + } - sdhci_execute_tuning(mmc, opcode); if (host->tuning_err == -EAGAIN && esdhc->quirk_fixup_tuning) { /* program TBPTR[TB_WNDW_END_PTR] = 3*DIV_RATIO and @@ -765,7 +852,16 @@ static int esdhc_execute_tuning(struct mmc_host *mmc, u32 opcode) sdhci_writel(host, val, ESDHC_TBCTL); sdhci_execute_tuning(mmc, opcode); } - return 0; + return ret; +} + +static void esdhc_set_uhs_signaling(struct sdhci_host *host, + unsigned int timing) +{ + if (timing == MMC_TIMING_MMC_HS400) + esdhc_tuning_block_enable(host, true); + else + sdhci_set_uhs_signaling(host, timing); } #ifdef CONFIG_PM_SLEEP @@ -814,7 +910,7 @@ static const struct sdhci_ops sdhci_esdhc_be_ops = { .adma_workaround = esdhc_of_adma_workaround, .set_bus_width = esdhc_pltfm_set_bus_width, .reset = esdhc_reset, - .set_uhs_signaling = sdhci_set_uhs_signaling, + .set_uhs_signaling = esdhc_set_uhs_signaling, }; static const struct sdhci_ops sdhci_esdhc_le_ops = { @@ -831,7 +927,7 @@ static const struct sdhci_ops sdhci_esdhc_le_ops = { .adma_workaround = esdhc_of_adma_workaround, .set_bus_width = esdhc_pltfm_set_bus_width, .reset = esdhc_reset, - .set_uhs_signaling = sdhci_set_uhs_signaling, + .set_uhs_signaling = esdhc_set_uhs_signaling, }; static const struct sdhci_pltfm_data sdhci_esdhc_be_pdata = { @@ -857,6 +953,16 @@ static struct soc_device_attribute soc_incorrect_hostver[] = { { }, }; +static struct soc_device_attribute soc_fixup_sdhc_clkdivs[] = { + { .family = "QorIQ LX2160A", .revision = "1.0", }, + { }, +}; + +static struct soc_device_attribute soc_unreliable_pulse_detection[] = { + { .family = "QorIQ LX2160A", .revision = "1.0", }, + { }, +}; + static void esdhc_init(struct platform_device *pdev, struct sdhci_host *host) { const struct of_device_id *match; @@ -879,6 +985,16 @@ static void esdhc_init(struct platform_device *pdev, struct sdhci_host *host) else esdhc->quirk_incorrect_hostver = false; + if (soc_device_match(soc_fixup_sdhc_clkdivs)) + esdhc->quirk_limited_clk_division = true; + else + esdhc->quirk_limited_clk_division = false; + + if (soc_device_match(soc_unreliable_pulse_detection)) + esdhc->quirk_unreliable_pulse_detection = true; + else + esdhc->quirk_unreliable_pulse_detection = false; + match = of_match_node(sdhci_esdhc_of_match, pdev->dev.of_node); if (match) esdhc->clk_fixup = match->data; @@ -909,6 +1025,12 @@ static void esdhc_init(struct platform_device *pdev, struct sdhci_host *host) } } +static int esdhc_hs400_prepare_ddr(struct mmc_host *mmc) +{ + esdhc_tuning_block_enable(mmc_priv(mmc), false); + return 0; +} + static int sdhci_esdhc_probe(struct platform_device *pdev) { struct sdhci_host *host; @@ -932,6 +1054,7 @@ static int sdhci_esdhc_probe(struct platform_device *pdev) host->mmc_host_ops.start_signal_voltage_switch = esdhc_signal_voltage_switch; host->mmc_host_ops.execute_tuning = esdhc_execute_tuning; + host->mmc_host_ops.hs400_prepare_ddr = esdhc_hs400_prepare_ddr; host->tuning_delay = 1; esdhc_init(pdev, host); diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c index d264391616f9..c11c18a9aacb 100644 --- a/drivers/mmc/host/sdhci-omap.c +++ b/drivers/mmc/host/sdhci-omap.c @@ -27,6 +27,7 @@ #include <linux/regulator/consumer.h> #include <linux/pinctrl/consumer.h> #include <linux/sys_soc.h> +#include <linux/thermal.h> #include "sdhci-pltfm.h" @@ -115,6 +116,7 @@ struct sdhci_omap_host { struct pinctrl *pinctrl; struct pinctrl_state **pinctrl_state; + bool is_tuning; }; static void sdhci_omap_start_clock(struct sdhci_omap_host *omap_host); @@ -220,8 +222,12 @@ static void sdhci_omap_conf_bus_power(struct sdhci_omap_host *omap_host, /* wait 1ms */ timeout = ktime_add_ms(ktime_get(), SDHCI_OMAP_TIMEOUT); - while (!(sdhci_omap_readl(omap_host, SDHCI_OMAP_HCTL) & HCTL_SDBP)) { - if (WARN_ON(ktime_after(ktime_get(), timeout))) + while (1) { + bool timedout = ktime_after(ktime_get(), timeout); + + if (sdhci_omap_readl(omap_host, SDHCI_OMAP_HCTL) & HCTL_SDBP) + break; + if (WARN_ON(timedout)) return; usleep_range(5, 10); } @@ -285,19 +291,19 @@ static int sdhci_omap_execute_tuning(struct mmc_host *mmc, u32 opcode) struct sdhci_host *host = mmc_priv(mmc); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_omap_host *omap_host = sdhci_pltfm_priv(pltfm_host); + struct thermal_zone_device *thermal_dev; struct device *dev = omap_host->dev; struct mmc_ios *ios = &mmc->ios; u32 start_window = 0, max_window = 0; + bool single_point_failure = false; bool dcrc_was_enabled = false; u8 cur_match, prev_match = 0; u32 length = 0, max_len = 0; u32 phase_delay = 0; + int temperature; int ret = 0; u32 reg; - - pltfm_host = sdhci_priv(host); - omap_host = sdhci_pltfm_priv(pltfm_host); - dev = omap_host->dev; + int i; /* clock tuning is not needed for upto 52MHz */ if (ios->clock <= 52000000) @@ -307,6 +313,16 @@ static int sdhci_omap_execute_tuning(struct mmc_host *mmc, u32 opcode) if (ios->timing == MMC_TIMING_UHS_SDR50 && !(reg & CAPA2_TSDR50)) return 0; + thermal_dev = thermal_zone_get_zone_by_name("cpu_thermal"); + if (IS_ERR(thermal_dev)) { + dev_err(dev, "Unable to get thermal zone for tuning\n"); + return PTR_ERR(thermal_dev); + } + + ret = thermal_zone_get_temp(thermal_dev, &temperature); + if (ret) + return ret; + reg = sdhci_omap_readl(omap_host, SDHCI_OMAP_DLL); reg |= DLL_SWT; sdhci_omap_writel(omap_host, SDHCI_OMAP_DLL, reg); @@ -322,6 +338,13 @@ static int sdhci_omap_execute_tuning(struct mmc_host *mmc, u32 opcode) dcrc_was_enabled = true; } + omap_host->is_tuning = true; + + /* + * Stage 1: Search for a maximum pass window ignoring any + * any single point failures. If the tuning value ends up + * near it, move away from it in stage 2 below + */ while (phase_delay <= MAX_PHASE_DELAY) { sdhci_omap_set_dll(omap_host, phase_delay); @@ -329,10 +352,15 @@ static int sdhci_omap_execute_tuning(struct mmc_host *mmc, u32 opcode) if (cur_match) { if (prev_match) { length++; + } else if (single_point_failure) { + /* ignore single point failure */ + length++; } else { start_window = phase_delay; length = 1; } + } else { + single_point_failure = prev_match; } if (length > max_len) { @@ -350,18 +378,84 @@ static int sdhci_omap_execute_tuning(struct mmc_host *mmc, u32 opcode) goto tuning_error; } + /* + * Assign tuning value as a ratio of maximum pass window based + * on temperature + */ + if (temperature < -20000) + phase_delay = min(max_window + 4 * max_len - 24, + max_window + + DIV_ROUND_UP(13 * max_len, 16) * 4); + else if (temperature < 20000) + phase_delay = max_window + DIV_ROUND_UP(9 * max_len, 16) * 4; + else if (temperature < 40000) + phase_delay = max_window + DIV_ROUND_UP(8 * max_len, 16) * 4; + else if (temperature < 70000) + phase_delay = max_window + DIV_ROUND_UP(7 * max_len, 16) * 4; + else if (temperature < 90000) + phase_delay = max_window + DIV_ROUND_UP(5 * max_len, 16) * 4; + else if (temperature < 120000) + phase_delay = max_window + DIV_ROUND_UP(4 * max_len, 16) * 4; + else + phase_delay = max_window + DIV_ROUND_UP(3 * max_len, 16) * 4; + + /* + * Stage 2: Search for a single point failure near the chosen tuning + * value in two steps. First in the +3 to +10 range and then in the + * +2 to -10 range. If found, move away from it in the appropriate + * direction by the appropriate amount depending on the temperature. + */ + for (i = 3; i <= 10; i++) { + sdhci_omap_set_dll(omap_host, phase_delay + i); + + if (mmc_send_tuning(mmc, opcode, NULL)) { + if (temperature < 10000) + phase_delay += i + 6; + else if (temperature < 20000) + phase_delay += i - 12; + else if (temperature < 70000) + phase_delay += i - 8; + else + phase_delay += i - 6; + + goto single_failure_found; + } + } + + for (i = 2; i >= -10; i--) { + sdhci_omap_set_dll(omap_host, phase_delay + i); + + if (mmc_send_tuning(mmc, opcode, NULL)) { + if (temperature < 10000) + phase_delay += i + 12; + else if (temperature < 20000) + phase_delay += i + 8; + else if (temperature < 70000) + phase_delay += i + 8; + else if (temperature < 90000) + phase_delay += i + 10; + else + phase_delay += i + 12; + + goto single_failure_found; + } + } + +single_failure_found: reg = sdhci_omap_readl(omap_host, SDHCI_OMAP_AC12); if (!(reg & AC12_SCLK_SEL)) { ret = -EIO; goto tuning_error; } - phase_delay = max_window + 4 * (max_len >> 1); sdhci_omap_set_dll(omap_host, phase_delay); + omap_host->is_tuning = false; + goto ret; tuning_error: + omap_host->is_tuning = false; dev_err(dev, "Tuning failed\n"); sdhci_omap_disable_tuning(omap_host); @@ -653,8 +747,12 @@ static void sdhci_omap_init_74_clocks(struct sdhci_host *host, u8 power_mode) /* wait 1ms */ timeout = ktime_add_ms(ktime_get(), SDHCI_OMAP_TIMEOUT); - while (!(sdhci_omap_readl(omap_host, SDHCI_OMAP_STAT) & INT_CC_EN)) { - if (WARN_ON(ktime_after(ktime_get(), timeout))) + while (1) { + bool timedout = ktime_after(ktime_get(), timeout); + + if (sdhci_omap_readl(omap_host, SDHCI_OMAP_STAT) & INT_CC_EN) + break; + if (WARN_ON(timedout)) return; usleep_range(5, 10); } @@ -687,6 +785,18 @@ static void sdhci_omap_set_uhs_signaling(struct sdhci_host *host, sdhci_omap_start_clock(omap_host); } +void sdhci_omap_reset(struct sdhci_host *host, u8 mask) +{ + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_omap_host *omap_host = sdhci_pltfm_priv(pltfm_host); + + /* Don't reset data lines during tuning operation */ + if (omap_host->is_tuning) + mask &= ~SDHCI_RESET_DATA; + + sdhci_reset(host, mask); +} + static struct sdhci_ops sdhci_omap_ops = { .set_clock = sdhci_omap_set_clock, .set_power = sdhci_omap_set_power, @@ -695,7 +805,7 @@ static struct sdhci_ops sdhci_omap_ops = { .get_min_clock = sdhci_omap_get_min_clock, .set_bus_width = sdhci_omap_set_bus_width, .platform_send_init_74_clocks = sdhci_omap_init_74_clocks, - .reset = sdhci_reset, + .reset = sdhci_omap_reset, .set_uhs_signaling = sdhci_omap_set_uhs_signaling, }; diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index c4115bae5db1..2a6eba74b94e 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -710,11 +710,15 @@ static int intel_execute_tuning(struct mmc_host *mmc, u32 opcode) static void byt_probe_slot(struct sdhci_pci_slot *slot) { struct mmc_host_ops *ops = &slot->host->mmc_host_ops; + struct device *dev = &slot->chip->pdev->dev; + struct mmc_host *mmc = slot->host->mmc; byt_read_dsm(slot); ops->execute_tuning = intel_execute_tuning; ops->start_signal_voltage_switch = intel_start_signal_voltage_switch; + + device_property_read_u32(dev, "max-frequency", &mmc->f_max); } static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot) @@ -937,7 +941,8 @@ static int byt_sd_probe_slot(struct sdhci_pci_slot *slot) static const struct sdhci_pci_fixes sdhci_intel_byt_emmc = { .allow_runtime_pm = true, .probe_slot = byt_emmc_probe_slot, - .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, + .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC | + SDHCI_QUIRK_NO_LED, .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN | SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400 | SDHCI_QUIRK2_STOP_WITH_TC, @@ -957,7 +962,8 @@ static const struct sdhci_pci_fixes sdhci_intel_glk_emmc = { .runtime_suspend = glk_runtime_suspend, .runtime_resume = glk_runtime_resume, #endif - .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, + .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC | + SDHCI_QUIRK_NO_LED, .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN | SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400 | SDHCI_QUIRK2_STOP_WITH_TC, @@ -966,7 +972,8 @@ static const struct sdhci_pci_fixes sdhci_intel_glk_emmc = { }; static const struct sdhci_pci_fixes sdhci_ni_byt_sdio = { - .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, + .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC | + SDHCI_QUIRK_NO_LED, .quirks2 = SDHCI_QUIRK2_HOST_OFF_CARD_ON | SDHCI_QUIRK2_PRESET_VALUE_BROKEN, .allow_runtime_pm = true, @@ -976,7 +983,8 @@ static const struct sdhci_pci_fixes sdhci_ni_byt_sdio = { }; static const struct sdhci_pci_fixes sdhci_intel_byt_sdio = { - .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, + .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC | + SDHCI_QUIRK_NO_LED, .quirks2 = SDHCI_QUIRK2_HOST_OFF_CARD_ON | SDHCI_QUIRK2_PRESET_VALUE_BROKEN, .allow_runtime_pm = true, @@ -986,7 +994,8 @@ static const struct sdhci_pci_fixes sdhci_intel_byt_sdio = { }; static const struct sdhci_pci_fixes sdhci_intel_byt_sd = { - .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, + .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC | + SDHCI_QUIRK_NO_LED, .quirks2 = SDHCI_QUIRK2_CARD_ON_NEEDS_BUS_ON | SDHCI_QUIRK2_PRESET_VALUE_BROKEN | SDHCI_QUIRK2_STOP_WITH_TC, diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c index 5956e90380e8..5b5eb53a63d2 100644 --- a/drivers/mmc/host/sdhci-xenon-phy.c +++ b/drivers/mmc/host/sdhci-xenon-phy.c @@ -357,9 +357,13 @@ static int xenon_emmc_phy_enable_dll(struct sdhci_host *host) /* Wait max 32 ms */ timeout = ktime_add_ms(ktime_get(), 32); - while (!(sdhci_readw(host, XENON_SLOT_EXT_PRESENT_STATE) & - XENON_DLL_LOCK_STATE)) { - if (ktime_after(ktime_get(), timeout)) { + while (1) { + bool timedout = ktime_after(ktime_get(), timeout); + + if (sdhci_readw(host, XENON_SLOT_EXT_PRESENT_STATE) & + XENON_DLL_LOCK_STATE) + break; + if (timedout) { dev_err(mmc_dev(host->mmc), "Wait for DLL Lock time-out\n"); return -ETIMEDOUT; } diff --git a/drivers/mmc/host/sdhci-xenon.c b/drivers/mmc/host/sdhci-xenon.c index 4d0791f6ec23..a0b5089b3274 100644 --- a/drivers/mmc/host/sdhci-xenon.c +++ b/drivers/mmc/host/sdhci-xenon.c @@ -34,9 +34,13 @@ static int xenon_enable_internal_clk(struct sdhci_host *host) sdhci_writel(host, reg, SDHCI_CLOCK_CONTROL); /* Wait max 20 ms */ timeout = ktime_add_ms(ktime_get(), 20); - while (!((reg = sdhci_readw(host, SDHCI_CLOCK_CONTROL)) - & SDHCI_CLOCK_INT_STABLE)) { - if (ktime_after(ktime_get(), timeout)) { + while (1) { + bool timedout = ktime_after(ktime_get(), timeout); + + reg = sdhci_readw(host, SDHCI_CLOCK_CONTROL); + if (reg & SDHCI_CLOCK_INT_STABLE) + break; + if (timedout) { dev_err(mmc_dev(host->mmc), "Internal clock never stabilised.\n"); return -ETIMEDOUT; } diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index df05352b6a4a..a22e11a65658 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -82,8 +82,8 @@ void sdhci_dumpregs(struct sdhci_host *host) SDHCI_DUMP("Int enab: 0x%08x | Sig enab: 0x%08x\n", sdhci_readl(host, SDHCI_INT_ENABLE), sdhci_readl(host, SDHCI_SIGNAL_ENABLE)); - SDHCI_DUMP("AC12 err: 0x%08x | Slot int: 0x%08x\n", - sdhci_readw(host, SDHCI_ACMD12_ERR), + SDHCI_DUMP("ACmd stat: 0x%08x | Slot int: 0x%08x\n", + sdhci_readw(host, SDHCI_AUTO_CMD_STATUS), sdhci_readw(host, SDHCI_SLOT_INT_STATUS)); SDHCI_DUMP("Caps: 0x%08x | Caps_1: 0x%08x\n", sdhci_readl(host, SDHCI_CAPABILITIES), @@ -349,6 +349,9 @@ static void __sdhci_led_activate(struct sdhci_host *host) { u8 ctrl; + if (host->quirks & SDHCI_QUIRK_NO_LED) + return; + ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL); ctrl |= SDHCI_CTRL_LED; sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL); @@ -358,6 +361,9 @@ static void __sdhci_led_deactivate(struct sdhci_host *host) { u8 ctrl; + if (host->quirks & SDHCI_QUIRK_NO_LED) + return; + ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL); ctrl &= ~SDHCI_CTRL_LED; sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL); @@ -387,6 +393,9 @@ static int sdhci_led_register(struct sdhci_host *host) { struct mmc_host *mmc = host->mmc; + if (host->quirks & SDHCI_QUIRK_NO_LED) + return 0; + snprintf(host->led_name, sizeof(host->led_name), "%s::", mmc_hostname(mmc)); @@ -400,6 +409,9 @@ static int sdhci_led_register(struct sdhci_host *host) static void sdhci_led_unregister(struct sdhci_host *host) { + if (host->quirks & SDHCI_QUIRK_NO_LED) + return; + led_classdev_unregister(&host->led); } @@ -933,6 +945,11 @@ static void sdhci_set_transfer_irqs(struct sdhci_host *host) else host->ier = (host->ier & ~dma_irqs) | pio_irqs; + if (host->flags & (SDHCI_AUTO_CMD23 | SDHCI_AUTO_CMD12)) + host->ier |= SDHCI_INT_AUTO_CMD_ERR; + else + host->ier &= ~SDHCI_INT_AUTO_CMD_ERR; + sdhci_writel(host, host->ier, SDHCI_INT_ENABLE); sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE); } @@ -1191,8 +1208,7 @@ static bool sdhci_needs_reset(struct sdhci_host *host, struct mmc_request *mrq) return (!(host->flags & SDHCI_DEVICE_DEAD) && ((mrq->cmd && mrq->cmd->error) || (mrq->sbc && mrq->sbc->error) || - (mrq->data && ((mrq->data->error && !mrq->data->stop) || - (mrq->data->stop && mrq->data->stop->error))) || + (mrq->data && mrq->data->stop && mrq->data->stop->error) || (host->quirks & SDHCI_QUIRK_RESET_AFTER_REQUEST))); } @@ -1244,6 +1260,16 @@ static void sdhci_finish_data(struct sdhci_host *host) host->data = NULL; host->data_cmd = NULL; + /* + * The controller needs a reset of internal state machines upon error + * conditions. + */ + if (data->error) { + if (!host->cmd || host->cmd == data_cmd) + sdhci_do_reset(host, SDHCI_RESET_CMD); + sdhci_do_reset(host, SDHCI_RESET_DATA); + } + if ((host->flags & (SDHCI_REQ_USE_DMA | SDHCI_USE_ADMA)) == (SDHCI_REQ_USE_DMA | SDHCI_USE_ADMA)) sdhci_adma_table_post(host, data); @@ -1268,17 +1294,6 @@ static void sdhci_finish_data(struct sdhci_host *host) if (data->stop && (data->error || !data->mrq->sbc)) { - - /* - * The controller needs a reset of internal state machines - * upon error conditions. - */ - if (data->error) { - if (!host->cmd || host->cmd == data_cmd) - sdhci_do_reset(host, SDHCI_RESET_CMD); - sdhci_do_reset(host, SDHCI_RESET_DATA); - } - /* * 'cap_cmd_during_tfr' request must not use the command line * after mmc_command_done() has been called. It is upper layer's @@ -2757,8 +2772,23 @@ static void sdhci_timeout_data_timer(struct timer_list *t) * * \*****************************************************************************/ -static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask) +static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p) { + /* Handle auto-CMD12 error */ + if (intmask & SDHCI_INT_AUTO_CMD_ERR && host->data_cmd) { + struct mmc_request *mrq = host->data_cmd->mrq; + u16 auto_cmd_status = sdhci_readw(host, SDHCI_AUTO_CMD_STATUS); + int data_err_bit = (auto_cmd_status & SDHCI_AUTO_CMD_TIMEOUT) ? + SDHCI_INT_DATA_TIMEOUT : + SDHCI_INT_DATA_CRC; + + /* Treat auto-CMD12 error the same as data error */ + if (!mrq->sbc && (host->flags & SDHCI_AUTO_CMD12)) { + *intmask_p |= data_err_bit; + return; + } + } + if (!host->cmd) { /* * SDHCI recovers from errors by resetting the cmd and data @@ -2780,20 +2810,12 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask) else host->cmd->error = -EILSEQ; - /* - * If this command initiates a data phase and a response - * CRC error is signalled, the card can start transferring - * data - the card may have received the command without - * error. We must not terminate the mmc_request early. - * - * If the card did not receive the command or returned an - * error which prevented it sending data, the data phase - * will time out. - */ + /* Treat data command CRC error the same as data CRC error */ if (host->cmd->data && (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == SDHCI_INT_CRC) { host->cmd = NULL; + *intmask_p |= SDHCI_INT_DATA_CRC; return; } @@ -2801,6 +2823,21 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask) return; } + /* Handle auto-CMD23 error */ + if (intmask & SDHCI_INT_AUTO_CMD_ERR) { + struct mmc_request *mrq = host->cmd->mrq; + u16 auto_cmd_status = sdhci_readw(host, SDHCI_AUTO_CMD_STATUS); + int err = (auto_cmd_status & SDHCI_AUTO_CMD_TIMEOUT) ? + -ETIMEDOUT : + -EILSEQ; + + if (mrq->sbc && (host->flags & SDHCI_AUTO_CMD23)) { + mrq->sbc->error = err; + sdhci_finish_mrq(host, mrq); + return; + } + } + if (intmask & SDHCI_INT_RESPONSE) sdhci_finish_command(host); } @@ -3021,7 +3058,7 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id) } if (intmask & SDHCI_INT_CMD_MASK) - sdhci_cmd_irq(host, intmask & SDHCI_INT_CMD_MASK); + sdhci_cmd_irq(host, intmask & SDHCI_INT_CMD_MASK, &intmask); if (intmask & SDHCI_INT_DATA_MASK) sdhci_data_irq(host, intmask & SDHCI_INT_DATA_MASK); @@ -3541,7 +3578,7 @@ void __sdhci_read_caps(struct sdhci_host *host, u16 *ver, u32 *caps, u32 *caps1) } EXPORT_SYMBOL_GPL(__sdhci_read_caps); -static int sdhci_allocate_bounce_buffer(struct sdhci_host *host) +static void sdhci_allocate_bounce_buffer(struct sdhci_host *host) { struct mmc_host *mmc = host->mmc; unsigned int max_blocks; @@ -3579,7 +3616,7 @@ static int sdhci_allocate_bounce_buffer(struct sdhci_host *host) * Exiting with zero here makes sure we proceed with * mmc->max_segs == 1. */ - return 0; + return; } host->bounce_addr = dma_map_single(mmc->parent, @@ -3589,7 +3626,7 @@ static int sdhci_allocate_bounce_buffer(struct sdhci_host *host) ret = dma_mapping_error(mmc->parent, host->bounce_addr); if (ret) /* Again fall back to max_segs == 1 */ - return 0; + return; host->bounce_buffer_size = bounce_size; /* Lie about this since we're bouncing */ @@ -3599,8 +3636,6 @@ static int sdhci_allocate_bounce_buffer(struct sdhci_host *host) pr_info("%s bounce up to %u segments into one, max segment size %u bytes\n", mmc_hostname(mmc), max_blocks, bounce_size); - - return 0; } static inline bool sdhci_can_64bit_dma(struct sdhci_host *host) @@ -4134,12 +4169,9 @@ int sdhci_setup_host(struct sdhci_host *host) */ mmc->max_blk_count = (host->quirks & SDHCI_QUIRK_NO_MULTIBLOCK) ? 1 : 65535; - if (mmc->max_segs == 1) { + if (mmc->max_segs == 1) /* This may alter mmc->*_blk_* parameters */ - ret = sdhci_allocate_bounce_buffer(host); - if (ret) - return ret; - } + sdhci_allocate_bounce_buffer(host); return 0; diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index b001cf4d3d7e..6cc9a3c2ac66 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -146,14 +146,15 @@ #define SDHCI_INT_DATA_CRC 0x00200000 #define SDHCI_INT_DATA_END_BIT 0x00400000 #define SDHCI_INT_BUS_POWER 0x00800000 -#define SDHCI_INT_ACMD12ERR 0x01000000 +#define SDHCI_INT_AUTO_CMD_ERR 0x01000000 #define SDHCI_INT_ADMA_ERROR 0x02000000 #define SDHCI_INT_NORMAL_MASK 0x00007FFF #define SDHCI_INT_ERROR_MASK 0xFFFF8000 #define SDHCI_INT_CMD_MASK (SDHCI_INT_RESPONSE | SDHCI_INT_TIMEOUT | \ - SDHCI_INT_CRC | SDHCI_INT_END_BIT | SDHCI_INT_INDEX) + SDHCI_INT_CRC | SDHCI_INT_END_BIT | SDHCI_INT_INDEX | \ + SDHCI_INT_AUTO_CMD_ERR) #define SDHCI_INT_DATA_MASK (SDHCI_INT_DATA_END | SDHCI_INT_DMA_END | \ SDHCI_INT_DATA_AVAIL | SDHCI_INT_SPACE_AVAIL | \ SDHCI_INT_DATA_TIMEOUT | SDHCI_INT_DATA_CRC | \ @@ -168,7 +169,11 @@ #define SDHCI_CQE_INT_MASK (SDHCI_CQE_INT_ERR_MASK | SDHCI_INT_CQE) -#define SDHCI_ACMD12_ERR 0x3C +#define SDHCI_AUTO_CMD_STATUS 0x3C +#define SDHCI_AUTO_CMD_TIMEOUT 0x00000002 +#define SDHCI_AUTO_CMD_CRC 0x00000004 +#define SDHCI_AUTO_CMD_END_BIT 0x00000008 +#define SDHCI_AUTO_CMD_INDEX 0x00000010 #define SDHCI_HOST_CONTROL2 0x3E #define SDHCI_CTRL_UHS_MASK 0x0007 @@ -403,6 +408,8 @@ struct sdhci_host { #define SDHCI_QUIRK_INVERTED_WRITE_PROTECT (1<<16) /* Controller does not like fast PIO transfers */ #define SDHCI_QUIRK_PIO_NEEDS_DELAY (1<<18) +/* Controller does not have a LED */ +#define SDHCI_QUIRK_NO_LED (1<<19) /* Controller has to be forced to use block size of 2048 bytes */ #define SDHCI_QUIRK_FORCE_BLK_SZ_2048 (1<<20) /* Controller cannot do multi-block transfers */ diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c new file mode 100644 index 000000000000..8c05879850a0 --- /dev/null +++ b/drivers/mmc/host/sdhci_am654.c @@ -0,0 +1,374 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * sdhci_am654.c - SDHCI driver for TI's AM654 SOCs + * + * Copyright (C) 2018 Texas Instruments Incorporated - http://www.ti.com + * + */ +#include <linux/clk.h> +#include <linux/module.h> +#include <linux/pm_runtime.h> +#include <linux/property.h> +#include <linux/regmap.h> + +#include "sdhci-pltfm.h" + +/* CTL_CFG Registers */ +#define CTL_CFG_2 0x14 + +#define SLOTTYPE_MASK GENMASK(31, 30) +#define SLOTTYPE_EMBEDDED BIT(30) + +/* PHY Registers */ +#define PHY_CTRL1 0x100 +#define PHY_CTRL2 0x104 +#define PHY_CTRL3 0x108 +#define PHY_CTRL4 0x10C +#define PHY_CTRL5 0x110 +#define PHY_CTRL6 0x114 +#define PHY_STAT1 0x130 +#define PHY_STAT2 0x134 + +#define IOMUX_ENABLE_SHIFT 31 +#define IOMUX_ENABLE_MASK BIT(IOMUX_ENABLE_SHIFT) +#define OTAPDLYENA_SHIFT 20 +#define OTAPDLYENA_MASK BIT(OTAPDLYENA_SHIFT) +#define OTAPDLYSEL_SHIFT 12 +#define OTAPDLYSEL_MASK GENMASK(15, 12) +#define STRBSEL_SHIFT 24 +#define STRBSEL_MASK GENMASK(27, 24) +#define SEL50_SHIFT 8 +#define SEL50_MASK BIT(SEL50_SHIFT) +#define SEL100_SHIFT 9 +#define SEL100_MASK BIT(SEL100_SHIFT) +#define DLL_TRIM_ICP_SHIFT 4 +#define DLL_TRIM_ICP_MASK GENMASK(7, 4) +#define DR_TY_SHIFT 20 +#define DR_TY_MASK GENMASK(22, 20) +#define ENDLL_SHIFT 1 +#define ENDLL_MASK BIT(ENDLL_SHIFT) +#define DLLRDY_SHIFT 0 +#define DLLRDY_MASK BIT(DLLRDY_SHIFT) +#define PDB_SHIFT 0 +#define PDB_MASK BIT(PDB_SHIFT) +#define CALDONE_SHIFT 1 +#define CALDONE_MASK BIT(CALDONE_SHIFT) +#define RETRIM_SHIFT 17 +#define RETRIM_MASK BIT(RETRIM_SHIFT) + +#define DRIVER_STRENGTH_50_OHM 0x0 +#define DRIVER_STRENGTH_33_OHM 0x1 +#define DRIVER_STRENGTH_66_OHM 0x2 +#define DRIVER_STRENGTH_100_OHM 0x3 +#define DRIVER_STRENGTH_40_OHM 0x4 + +#define CLOCK_TOO_SLOW_HZ 400000 + +static struct regmap_config sdhci_am654_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, + .fast_io = true, +}; + +struct sdhci_am654_data { + struct regmap *base; + int otap_del_sel; + int trm_icp; + int drv_strength; + bool dll_on; +}; + +static void sdhci_am654_set_clock(struct sdhci_host *host, unsigned int clock) +{ + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host); + int sel50, sel100; + u32 mask, val; + int ret; + + if (sdhci_am654->dll_on) { + regmap_update_bits(sdhci_am654->base, PHY_CTRL1, + ENDLL_MASK, 0); + + sdhci_am654->dll_on = false; + } + + sdhci_set_clock(host, clock); + + if (clock > CLOCK_TOO_SLOW_HZ) { + /* Setup DLL Output TAP delay */ + mask = OTAPDLYENA_MASK | OTAPDLYSEL_MASK; + val = (1 << OTAPDLYENA_SHIFT) | + (sdhci_am654->otap_del_sel << OTAPDLYSEL_SHIFT); + regmap_update_bits(sdhci_am654->base, PHY_CTRL4, + mask, val); + switch (clock) { + case 200000000: + sel50 = 0; + sel100 = 0; + break; + case 100000000: + sel50 = 0; + sel100 = 1; + break; + default: + sel50 = 1; + sel100 = 0; + } + + /* Configure PHY DLL frequency */ + mask = SEL50_MASK | SEL100_MASK; + val = (sel50 << SEL50_SHIFT) | (sel100 << SEL100_SHIFT); + regmap_update_bits(sdhci_am654->base, PHY_CTRL5, + mask, val); + /* Configure DLL TRIM */ + mask = DLL_TRIM_ICP_MASK; + val = sdhci_am654->trm_icp << DLL_TRIM_ICP_SHIFT; + + /* Configure DLL driver strength */ + mask |= DR_TY_MASK; + val |= sdhci_am654->drv_strength << DR_TY_SHIFT; + regmap_update_bits(sdhci_am654->base, PHY_CTRL1, + mask, val); + /* Enable DLL */ + regmap_update_bits(sdhci_am654->base, PHY_CTRL1, + ENDLL_MASK, 0x1 << ENDLL_SHIFT); + /* + * Poll for DLL ready. Use a one second timeout. + * Works in all experiments done so far + */ + ret = regmap_read_poll_timeout(sdhci_am654->base, + PHY_STAT1, val, + val & DLLRDY_MASK, + 1000, 1000000); + + sdhci_am654->dll_on = true; + } +} + +static void sdhci_am654_set_power(struct sdhci_host *host, unsigned char mode, + unsigned short vdd) +{ + if (!IS_ERR(host->mmc->supply.vmmc)) { + struct mmc_host *mmc = host->mmc; + + mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd); + } + sdhci_set_power_noreg(host, mode, vdd); +} + +struct sdhci_ops sdhci_am654_ops = { + .get_max_clock = sdhci_pltfm_clk_get_max_clock, + .get_timeout_clock = sdhci_pltfm_clk_get_max_clock, + .set_uhs_signaling = sdhci_set_uhs_signaling, + .set_bus_width = sdhci_set_bus_width, + .set_power = sdhci_am654_set_power, + .set_clock = sdhci_am654_set_clock, + .reset = sdhci_reset, +}; + +static const struct sdhci_pltfm_data sdhci_am654_pdata = { + .ops = &sdhci_am654_ops, + .quirks = SDHCI_QUIRK_INVERTED_WRITE_PROTECT | + SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12, + .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN, +}; + +static int sdhci_am654_init(struct sdhci_host *host) +{ + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host); + u32 ctl_cfg_2 = 0; + u32 mask; + u32 val; + int ret; + + /* Reset OTAP to default value */ + mask = OTAPDLYENA_MASK | OTAPDLYSEL_MASK; + regmap_update_bits(sdhci_am654->base, PHY_CTRL4, + mask, 0x0); + + regmap_read(sdhci_am654->base, PHY_STAT1, &val); + if (~val & CALDONE_MASK) { + /* Calibrate IO lines */ + regmap_update_bits(sdhci_am654->base, PHY_CTRL1, + PDB_MASK, PDB_MASK); + ret = regmap_read_poll_timeout(sdhci_am654->base, PHY_STAT1, + val, val & CALDONE_MASK, 1, 20); + if (ret) + return ret; + } + + /* Enable pins by setting IO mux to 0 */ + regmap_update_bits(sdhci_am654->base, PHY_CTRL1, + IOMUX_ENABLE_MASK, 0); + + /* Set slot type based on SD or eMMC */ + if (host->mmc->caps & MMC_CAP_NONREMOVABLE) + ctl_cfg_2 = SLOTTYPE_EMBEDDED; + + regmap_update_bits(sdhci_am654->base, CTL_CFG_2, + ctl_cfg_2, SLOTTYPE_MASK); + + return sdhci_add_host(host); +} + +static int sdhci_am654_get_of_property(struct platform_device *pdev, + struct sdhci_am654_data *sdhci_am654) +{ + struct device *dev = &pdev->dev; + int drv_strength; + int ret; + + ret = device_property_read_u32(dev, "ti,trm-icp", + &sdhci_am654->trm_icp); + if (ret) + return ret; + + ret = device_property_read_u32(dev, "ti,otap-del-sel", + &sdhci_am654->otap_del_sel); + if (ret) + return ret; + + ret = device_property_read_u32(dev, "ti,driver-strength-ohm", + &drv_strength); + if (ret) + return ret; + + switch (drv_strength) { + case 50: + sdhci_am654->drv_strength = DRIVER_STRENGTH_50_OHM; + break; + case 33: + sdhci_am654->drv_strength = DRIVER_STRENGTH_33_OHM; + break; + case 66: + sdhci_am654->drv_strength = DRIVER_STRENGTH_66_OHM; + break; + case 100: + sdhci_am654->drv_strength = DRIVER_STRENGTH_100_OHM; + break; + case 40: + sdhci_am654->drv_strength = DRIVER_STRENGTH_40_OHM; + break; + default: + dev_err(dev, "Invalid driver strength\n"); + return -EINVAL; + } + + sdhci_get_of_property(pdev); + + return 0; +} + +static int sdhci_am654_probe(struct platform_device *pdev) +{ + struct sdhci_pltfm_host *pltfm_host; + struct sdhci_am654_data *sdhci_am654; + struct sdhci_host *host; + struct resource *res; + struct clk *clk_xin; + struct device *dev = &pdev->dev; + void __iomem *base; + int ret; + + host = sdhci_pltfm_init(pdev, &sdhci_am654_pdata, sizeof(*sdhci_am654)); + if (IS_ERR(host)) + return PTR_ERR(host); + + pltfm_host = sdhci_priv(host); + sdhci_am654 = sdhci_pltfm_priv(pltfm_host); + + clk_xin = devm_clk_get(dev, "clk_xin"); + if (IS_ERR(clk_xin)) { + dev_err(dev, "clk_xin clock not found.\n"); + ret = PTR_ERR(clk_xin); + goto err_pltfm_free; + } + + pltfm_host->clk = clk_xin; + + /* Clocks are enabled using pm_runtime */ + pm_runtime_enable(dev); + ret = pm_runtime_get_sync(dev); + if (ret < 0) { + pm_runtime_put_noidle(dev); + goto pm_runtime_disable; + } + + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + base = devm_ioremap_resource(dev, res); + if (IS_ERR(base)) { + ret = PTR_ERR(base); + goto pm_runtime_put; + } + + sdhci_am654->base = devm_regmap_init_mmio(dev, base, + &sdhci_am654_regmap_config); + if (IS_ERR(sdhci_am654->base)) { + dev_err(dev, "Failed to initialize regmap\n"); + ret = PTR_ERR(sdhci_am654->base); + goto pm_runtime_put; + } + + ret = sdhci_am654_get_of_property(pdev, sdhci_am654); + if (ret) + goto pm_runtime_put; + + ret = mmc_of_parse(host->mmc); + if (ret) { + dev_err(dev, "parsing dt failed (%d)\n", ret); + goto pm_runtime_put; + } + + ret = sdhci_am654_init(host); + if (ret) + goto pm_runtime_put; + + return 0; + +pm_runtime_put: + pm_runtime_put_sync(dev); +pm_runtime_disable: + pm_runtime_disable(dev); +err_pltfm_free: + sdhci_pltfm_free(pdev); + return ret; +} + +static int sdhci_am654_remove(struct platform_device *pdev) +{ + struct sdhci_host *host = platform_get_drvdata(pdev); + int ret; + + sdhci_remove_host(host, true); + ret = pm_runtime_put_sync(&pdev->dev); + if (ret < 0) + return ret; + + pm_runtime_disable(&pdev->dev); + sdhci_pltfm_free(pdev); + + return 0; +} + +static const struct of_device_id sdhci_am654_of_match[] = { + { .compatible = "ti,am654-sdhci-5.1" }, + { /* sentinel */ } +}; + +static struct platform_driver sdhci_am654_driver = { + .driver = { + .name = "sdhci-am654", + .of_match_table = sdhci_am654_of_match, + }, + .probe = sdhci_am654_probe, + .remove = sdhci_am654_remove, +}; + +module_platform_driver(sdhci_am654_driver); + +MODULE_DESCRIPTION("Driver for SDHCI Controller on TI's AM654 devices"); +MODULE_AUTHOR("Faiz Abbas <faiz_abbas@ti.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h index 1e317027bf53..c03529e3f01a 100644 --- a/drivers/mmc/host/tmio_mmc.h +++ b/drivers/mmc/host/tmio_mmc.h @@ -70,6 +70,7 @@ #define TMIO_STAT_DAT0 BIT(23) /* only known on R-Car so far */ #define TMIO_STAT_RXRDY BIT(24) #define TMIO_STAT_TXRQ BIT(25) +#define TMIO_STAT_ALWAYS_SET_27 BIT(27) /* only known on R-Car 2+ so far */ #define TMIO_STAT_ILL_FUNC BIT(29) /* only when !TMIO_MMC_HAS_IDLE_WAIT */ #define TMIO_STAT_SCLKDIVEN BIT(29) /* only when TMIO_MMC_HAS_IDLE_WAIT */ #define TMIO_STAT_CMD_BUSY BIT(30) @@ -96,6 +97,7 @@ /* Define some IRQ masks */ /* This is the mask used at reset by the chip */ +#define TMIO_MASK_INIT_RCAR2 0x8b7f031d /* Initial value for R-Car Gen2+ */ #define TMIO_MASK_ALL 0x837f031d #define TMIO_MASK_READOP (TMIO_STAT_RXRDY | TMIO_STAT_DATAEND) #define TMIO_MASK_WRITEOP (TMIO_STAT_TXRQ | TMIO_STAT_DATAEND) @@ -153,6 +155,7 @@ struct tmio_mmc_host { u32 sdcard_irq_mask; u32 sdio_irq_mask; unsigned int clk_cache; + u32 sdcard_irq_setbit_mask; spinlock_t lock; /* protect host private data */ unsigned long last_req_ts; @@ -267,6 +270,9 @@ static inline void sd_ctrl_write16_rep(struct tmio_mmc_host *host, int addr, static inline void sd_ctrl_write32_as_16_and_16(struct tmio_mmc_host *host, int addr, u32 val) { + if (addr == CTL_IRQ_MASK || addr == CTL_STATUS) + val |= host->sdcard_irq_setbit_mask; + iowrite16(val & 0xffff, host->ctl + (addr << host->bus_shift)); iowrite16(val >> 16, host->ctl + ((addr + 2) << host->bus_shift)); } diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 8d64f6196f33..085a0fab769c 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -171,6 +171,18 @@ static void tmio_mmc_reset(struct tmio_mmc_host *host) } } +static void tmio_mmc_hw_reset(struct mmc_host *mmc) +{ + struct tmio_mmc_host *host = mmc_priv(mmc); + + host->reset(host); + + tmio_mmc_abort_dma(host); + + if (host->hw_reset) + host->hw_reset(host); +} + static void tmio_mmc_reset_work(struct work_struct *work) { struct tmio_mmc_host *host = container_of(work, struct tmio_mmc_host, @@ -209,12 +221,11 @@ static void tmio_mmc_reset_work(struct work_struct *work) spin_unlock_irqrestore(&host->lock, flags); - host->reset(host); + tmio_mmc_hw_reset(host->mmc); /* Ready for new calls */ host->mrq = NULL; - tmio_mmc_abort_dma(host); mmc_request_done(host->mmc, mrq); } @@ -696,14 +707,6 @@ static int tmio_mmc_start_data(struct tmio_mmc_host *host, return 0; } -static void tmio_mmc_hw_reset(struct mmc_host *mmc) -{ - struct tmio_mmc_host *host = mmc_priv(mmc); - - if (host->hw_reset) - host->hw_reset(host); -} - static int tmio_mmc_execute_tuning(struct mmc_host *mmc, u32 opcode) { struct tmio_mmc_host *host = mmc_priv(mmc); @@ -734,8 +737,6 @@ static int tmio_mmc_execute_tuning(struct mmc_host *mmc, u32 opcode) ret = mmc_send_tuning(mmc, opcode, NULL); if (ret == 0) set_bit(i, host->taps); - - usleep_range(1000, 1200); } ret = host->select_tuning(host); @@ -1167,11 +1168,13 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host) if (ret < 0) return ret; - if (pdata->flags & TMIO_MMC_USE_GPIO_CD) { - ret = mmc_gpio_request_cd(mmc, pdata->cd_gpio, 0); - if (ret) - return ret; - } + /* + * Look for a card detect GPIO, if it fails with anything + * else than a probe deferral, just live without it. + */ + ret = mmc_gpiod_request_cd(mmc, "cd", 0, false, 0, NULL); + if (ret == -EPROBE_DEFER) + return ret; mmc->caps |= MMC_CAP_4_BIT_DATA | pdata->capabilities; mmc->caps2 |= pdata->capabilities2; @@ -1228,7 +1231,7 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host) _host->sdio_irq_mask = TMIO_SDIO_MASK_ALL; _host->set_clock(_host, 0); - _host->reset(_host); + tmio_mmc_hw_reset(mmc); _host->sdcard_irq_mask = sd_ctrl_read16_and_16_as_32(_host, CTL_IRQ_MASK); tmio_mmc_disable_mmc_irqs(_host, TMIO_MASK_ALL); @@ -1328,8 +1331,8 @@ int tmio_mmc_host_runtime_resume(struct device *dev) { struct tmio_mmc_host *host = dev_get_drvdata(dev); - host->reset(host); tmio_mmc_clk_enable(host); + tmio_mmc_hw_reset(host->mmc); if (host->clk_cache) host->set_clock(host, host->clk_cache); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 77896c11f6f3..be81b319b0dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -669,11 +669,9 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) priv->numa_node = dev_to_node(&dev->pdev->dev); - priv->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), mlx5_debugfs_root); - if (!priv->dbg_root) { - dev_err(&pdev->dev, "Cannot create debugfs dir, aborting\n"); - return -ENOMEM; - } + if (mlx5_debugfs_root) + priv->dbg_root = + debugfs_create_dir(pci_name(pdev), mlx5_debugfs_root); err = mlx5_pci_enable_device(dev); if (err) { diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index 59dd50866932..5477a014e1fb 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -1322,7 +1322,7 @@ static int ath6kl_cfg80211_set_default_key(struct wiphy *wiphy, struct ath6kl_vif *vif = netdev_priv(ndev); struct ath6kl_key *key = NULL; u8 key_usage; - enum crypto_type key_type = NONE_CRYPT; + enum ath6kl_crypto_type key_type = NONE_CRYPT; ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: index %d\n", __func__, key_index); diff --git a/drivers/net/wireless/ath/ath6kl/common.h b/drivers/net/wireless/ath/ath6kl/common.h index 4f82e8632d37..d6e5234f67a1 100644 --- a/drivers/net/wireless/ath/ath6kl/common.h +++ b/drivers/net/wireless/ath/ath6kl/common.h @@ -67,7 +67,7 @@ struct ath6kl_llc_snap_hdr { __be16 eth_type; } __packed; -enum crypto_type { +enum ath6kl_crypto_type { NONE_CRYPT = 0x01, WEP_CRYPT = 0x02, TKIP_CRYPT = 0x04, diff --git a/drivers/net/wireless/ath/ath6kl/wmi.c b/drivers/net/wireless/ath/ath6kl/wmi.c index 777acc564ac9..9d7ac1ab2d02 100644 --- a/drivers/net/wireless/ath/ath6kl/wmi.c +++ b/drivers/net/wireless/ath/ath6kl/wmi.c @@ -1849,9 +1849,9 @@ int ath6kl_wmi_connect_cmd(struct wmi *wmi, u8 if_idx, enum network_type nw_type, enum dot11_auth_mode dot11_auth_mode, enum auth_mode auth_mode, - enum crypto_type pairwise_crypto, + enum ath6kl_crypto_type pairwise_crypto, u8 pairwise_crypto_len, - enum crypto_type group_crypto, + enum ath6kl_crypto_type group_crypto, u8 group_crypto_len, int ssid_len, u8 *ssid, u8 *bssid, u16 channel, u32 ctrl_flags, u8 nw_subtype) @@ -2301,7 +2301,7 @@ int ath6kl_wmi_disctimeout_cmd(struct wmi *wmi, u8 if_idx, u8 timeout) } int ath6kl_wmi_addkey_cmd(struct wmi *wmi, u8 if_idx, u8 key_index, - enum crypto_type key_type, + enum ath6kl_crypto_type key_type, u8 key_usage, u8 key_len, u8 *key_rsc, unsigned int key_rsc_len, u8 *key_material, diff --git a/drivers/net/wireless/ath/ath6kl/wmi.h b/drivers/net/wireless/ath/ath6kl/wmi.h index a60bb49fe920..784940ba4c90 100644 --- a/drivers/net/wireless/ath/ath6kl/wmi.h +++ b/drivers/net/wireless/ath/ath6kl/wmi.h @@ -2556,9 +2556,9 @@ int ath6kl_wmi_connect_cmd(struct wmi *wmi, u8 if_idx, enum network_type nw_type, enum dot11_auth_mode dot11_auth_mode, enum auth_mode auth_mode, - enum crypto_type pairwise_crypto, + enum ath6kl_crypto_type pairwise_crypto, u8 pairwise_crypto_len, - enum crypto_type group_crypto, + enum ath6kl_crypto_type group_crypto, u8 group_crypto_len, int ssid_len, u8 *ssid, u8 *bssid, u16 channel, u32 ctrl_flags, u8 nw_subtype); @@ -2610,7 +2610,7 @@ int ath6kl_wmi_config_debug_module_cmd(struct wmi *wmi, u32 valid, u32 config); int ath6kl_wmi_get_stats_cmd(struct wmi *wmi, u8 if_idx); int ath6kl_wmi_addkey_cmd(struct wmi *wmi, u8 if_idx, u8 key_index, - enum crypto_type key_type, + enum ath6kl_crypto_type key_type, u8 key_usage, u8 key_len, u8 *key_rsc, unsigned int key_rsc_len, u8 *key_material, diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig index 9d36473dc2a2..5e27918e4624 100644 --- a/drivers/nvdimm/Kconfig +++ b/drivers/nvdimm/Kconfig @@ -112,4 +112,9 @@ config OF_PMEM Select Y if unsure. +config NVDIMM_KEYS + def_bool y + depends on ENCRYPTED_KEYS + depends on (LIBNVDIMM=ENCRYPTED_KEYS) || LIBNVDIMM=m + endif diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile index e8847045dac0..6f2a088afad6 100644 --- a/drivers/nvdimm/Makefile +++ b/drivers/nvdimm/Makefile @@ -27,3 +27,4 @@ libnvdimm-$(CONFIG_ND_CLAIM) += claim.o libnvdimm-$(CONFIG_BTT) += btt_devs.o libnvdimm-$(CONFIG_NVDIMM_PFN) += pfn_devs.o libnvdimm-$(CONFIG_NVDIMM_DAX) += dax_devs.o +libnvdimm-$(CONFIG_NVDIMM_KEYS) += security.o diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index f1fb39921236..dca5f7a805cb 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -331,6 +331,12 @@ struct nvdimm_bus *to_nvdimm_bus(struct device *dev) } EXPORT_SYMBOL_GPL(to_nvdimm_bus); +struct nvdimm_bus *nvdimm_to_bus(struct nvdimm *nvdimm) +{ + return to_nvdimm_bus(nvdimm->dev.parent); +} +EXPORT_SYMBOL_GPL(nvdimm_to_bus); + struct nvdimm_bus *nvdimm_bus_register(struct device *parent, struct nvdimm_bus_descriptor *nd_desc) { @@ -344,12 +350,12 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent, INIT_LIST_HEAD(&nvdimm_bus->mapping_list); init_waitqueue_head(&nvdimm_bus->probe_wait); nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL); - mutex_init(&nvdimm_bus->reconfig_mutex); - badrange_init(&nvdimm_bus->badrange); if (nvdimm_bus->id < 0) { kfree(nvdimm_bus); return NULL; } + mutex_init(&nvdimm_bus->reconfig_mutex); + badrange_init(&nvdimm_bus->badrange); nvdimm_bus->nd_desc = nd_desc; nvdimm_bus->dev.parent = parent; nvdimm_bus->dev.release = nvdimm_bus_release; @@ -387,9 +393,24 @@ static int child_unregister(struct device *dev, void *data) * i.e. remove classless children */ if (dev->class) - /* pass */; - else - nd_device_unregister(dev, ND_SYNC); + return 0; + + if (is_nvdimm(dev)) { + struct nvdimm *nvdimm = to_nvdimm(dev); + bool dev_put = false; + + /* We are shutting down. Make state frozen artificially. */ + nvdimm_bus_lock(dev); + nvdimm->sec.state = NVDIMM_SECURITY_FROZEN; + if (test_and_clear_bit(NDD_WORK_PENDING, &nvdimm->flags)) + dev_put = true; + nvdimm_bus_unlock(dev); + cancel_delayed_work_sync(&nvdimm->dwork); + if (dev_put) + put_device(dev); + } + nd_device_unregister(dev, ND_SYNC); + return 0; } @@ -902,7 +923,7 @@ static int nd_cmd_clear_to_send(struct nvdimm_bus *nvdimm_bus, /* ask the bus provider if it would like to block this request */ if (nd_desc->clear_to_send) { - int rc = nd_desc->clear_to_send(nd_desc, nvdimm, cmd); + int rc = nd_desc->clear_to_send(nd_desc, nvdimm, cmd, data); if (rc) return rc; diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c index 9899c97138a3..0cf58cabc9ed 100644 --- a/drivers/nvdimm/dimm.c +++ b/drivers/nvdimm/dimm.c @@ -34,7 +34,11 @@ static int nvdimm_probe(struct device *dev) return rc; } - /* reset locked, to be validated below... */ + /* + * The locked status bit reflects explicit status codes from the + * label reading commands, revalidate it each time the driver is + * activated and re-reads the label area. + */ nvdimm_clear_locked(dev); ndd = kzalloc(sizeof(*ndd), GFP_KERNEL); @@ -52,6 +56,16 @@ static int nvdimm_probe(struct device *dev) kref_init(&ndd->kref); /* + * Attempt to unlock, if the DIMM supports security commands, + * otherwise the locked indication is determined by explicit + * status codes from the label reading commands. + */ + rc = nvdimm_security_unlock(dev); + if (rc < 0) + dev_dbg(dev, "failed to unlock dimm: %d\n", rc); + + + /* * EACCES failures reading the namespace label-area-properties * are interpreted as the DIMM capacity being locked but the * namespace labels themselves being accessible. diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 6c3de2317390..4890310df874 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -370,23 +370,172 @@ static ssize_t available_slots_show(struct device *dev, } static DEVICE_ATTR_RO(available_slots); +__weak ssize_t security_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + + switch (nvdimm->sec.state) { + case NVDIMM_SECURITY_DISABLED: + return sprintf(buf, "disabled\n"); + case NVDIMM_SECURITY_UNLOCKED: + return sprintf(buf, "unlocked\n"); + case NVDIMM_SECURITY_LOCKED: + return sprintf(buf, "locked\n"); + case NVDIMM_SECURITY_FROZEN: + return sprintf(buf, "frozen\n"); + case NVDIMM_SECURITY_OVERWRITE: + return sprintf(buf, "overwrite\n"); + default: + return -ENOTTY; + } + + return -ENOTTY; +} + +#define OPS \ + C( OP_FREEZE, "freeze", 1), \ + C( OP_DISABLE, "disable", 2), \ + C( OP_UPDATE, "update", 3), \ + C( OP_ERASE, "erase", 2), \ + C( OP_OVERWRITE, "overwrite", 2), \ + C( OP_MASTER_UPDATE, "master_update", 3), \ + C( OP_MASTER_ERASE, "master_erase", 2) +#undef C +#define C(a, b, c) a +enum nvdimmsec_op_ids { OPS }; +#undef C +#define C(a, b, c) { b, c } +static struct { + const char *name; + int args; +} ops[] = { OPS }; +#undef C + +#define SEC_CMD_SIZE 32 +#define KEY_ID_SIZE 10 + +static ssize_t __security_store(struct device *dev, const char *buf, size_t len) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + ssize_t rc; + char cmd[SEC_CMD_SIZE+1], keystr[KEY_ID_SIZE+1], + nkeystr[KEY_ID_SIZE+1]; + unsigned int key, newkey; + int i; + + if (atomic_read(&nvdimm->busy)) + return -EBUSY; + + rc = sscanf(buf, "%"__stringify(SEC_CMD_SIZE)"s" + " %"__stringify(KEY_ID_SIZE)"s" + " %"__stringify(KEY_ID_SIZE)"s", + cmd, keystr, nkeystr); + if (rc < 1) + return -EINVAL; + for (i = 0; i < ARRAY_SIZE(ops); i++) + if (sysfs_streq(cmd, ops[i].name)) + break; + if (i >= ARRAY_SIZE(ops)) + return -EINVAL; + if (ops[i].args > 1) + rc = kstrtouint(keystr, 0, &key); + if (rc >= 0 && ops[i].args > 2) + rc = kstrtouint(nkeystr, 0, &newkey); + if (rc < 0) + return rc; + + if (i == OP_FREEZE) { + dev_dbg(dev, "freeze\n"); + rc = nvdimm_security_freeze(nvdimm); + } else if (i == OP_DISABLE) { + dev_dbg(dev, "disable %u\n", key); + rc = nvdimm_security_disable(nvdimm, key); + } else if (i == OP_UPDATE) { + dev_dbg(dev, "update %u %u\n", key, newkey); + rc = nvdimm_security_update(nvdimm, key, newkey, NVDIMM_USER); + } else if (i == OP_ERASE) { + dev_dbg(dev, "erase %u\n", key); + rc = nvdimm_security_erase(nvdimm, key, NVDIMM_USER); + } else if (i == OP_OVERWRITE) { + dev_dbg(dev, "overwrite %u\n", key); + rc = nvdimm_security_overwrite(nvdimm, key); + } else if (i == OP_MASTER_UPDATE) { + dev_dbg(dev, "master_update %u %u\n", key, newkey); + rc = nvdimm_security_update(nvdimm, key, newkey, + NVDIMM_MASTER); + } else if (i == OP_MASTER_ERASE) { + dev_dbg(dev, "master_erase %u\n", key); + rc = nvdimm_security_erase(nvdimm, key, + NVDIMM_MASTER); + } else + return -EINVAL; + + if (rc == 0) + rc = len; + return rc; +} + +static ssize_t security_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) + +{ + ssize_t rc; + + /* + * Require all userspace triggered security management to be + * done while probing is idle and the DIMM is not in active use + * in any region. + */ + device_lock(dev); + nvdimm_bus_lock(dev); + wait_nvdimm_bus_probe_idle(dev); + rc = __security_store(dev, buf, len); + nvdimm_bus_unlock(dev); + device_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RW(security); + static struct attribute *nvdimm_attributes[] = { &dev_attr_state.attr, &dev_attr_flags.attr, &dev_attr_commands.attr, &dev_attr_available_slots.attr, + &dev_attr_security.attr, NULL, }; +static umode_t nvdimm_visible(struct kobject *kobj, struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, typeof(*dev), kobj); + struct nvdimm *nvdimm = to_nvdimm(dev); + + if (a != &dev_attr_security.attr) + return a->mode; + if (nvdimm->sec.state < 0) + return 0; + /* Are there any state mutation ops? */ + if (nvdimm->sec.ops->freeze || nvdimm->sec.ops->disable + || nvdimm->sec.ops->change_key + || nvdimm->sec.ops->erase + || nvdimm->sec.ops->overwrite) + return a->mode; + return 0444; +} + struct attribute_group nvdimm_attribute_group = { .attrs = nvdimm_attributes, + .is_visible = nvdimm_visible, }; EXPORT_SYMBOL_GPL(nvdimm_attribute_group); -struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, - const struct attribute_group **groups, unsigned long flags, - unsigned long cmd_mask, int num_flush, - struct resource *flush_wpq) +struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus, + void *provider_data, const struct attribute_group **groups, + unsigned long flags, unsigned long cmd_mask, int num_flush, + struct resource *flush_wpq, const char *dimm_id, + const struct nvdimm_security_ops *sec_ops) { struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL); struct device *dev; @@ -399,6 +548,8 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, kfree(nvdimm); return NULL; } + + nvdimm->dimm_id = dimm_id; nvdimm->provider_data = provider_data; nvdimm->flags = flags; nvdimm->cmd_mask = cmd_mask; @@ -411,11 +562,60 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, dev->type = &nvdimm_device_type; dev->devt = MKDEV(nvdimm_major, nvdimm->id); dev->groups = groups; + nvdimm->sec.ops = sec_ops; + nvdimm->sec.overwrite_tmo = 0; + INIT_DELAYED_WORK(&nvdimm->dwork, nvdimm_security_overwrite_query); + /* + * Security state must be initialized before device_add() for + * attribute visibility. + */ + /* get security state and extended (master) state */ + nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER); + nvdimm->sec.ext_state = nvdimm_security_state(nvdimm, NVDIMM_MASTER); nd_device_register(dev); return nvdimm; } -EXPORT_SYMBOL_GPL(nvdimm_create); +EXPORT_SYMBOL_GPL(__nvdimm_create); + +int nvdimm_security_setup_events(struct nvdimm *nvdimm) +{ + nvdimm->sec.overwrite_state = sysfs_get_dirent(nvdimm->dev.kobj.sd, + "security"); + if (!nvdimm->sec.overwrite_state) + return -ENODEV; + return 0; +} +EXPORT_SYMBOL_GPL(nvdimm_security_setup_events); + +int nvdimm_in_overwrite(struct nvdimm *nvdimm) +{ + return test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags); +} +EXPORT_SYMBOL_GPL(nvdimm_in_overwrite); + +int nvdimm_security_freeze(struct nvdimm *nvdimm) +{ + int rc; + + WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm->dev)); + + if (!nvdimm->sec.ops || !nvdimm->sec.ops->freeze) + return -EOPNOTSUPP; + + if (nvdimm->sec.state < 0) + return -EIO; + + if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) { + dev_warn(&nvdimm->dev, "Overwrite operation in progress.\n"); + return -EBUSY; + } + + rc = nvdimm->sec.ops->freeze(nvdimm); + nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER); + + return rc; +} int alias_dpa_busy(struct device *dev, void *data) { diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index 750dbaa6ce82..a11bf4e6b451 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -944,8 +944,7 @@ static int __blk_label_update(struct nd_region *nd_region, victims = 0; if (old_num_resources) { /* convert old local-label-map to dimm-slot victim-map */ - victim_map = kcalloc(BITS_TO_LONGS(nslot), sizeof(long), - GFP_KERNEL); + victim_map = bitmap_zalloc(nslot, GFP_KERNEL); if (!victim_map) return -ENOMEM; @@ -968,7 +967,7 @@ static int __blk_label_update(struct nd_region *nd_region, /* don't allow updates that consume the last label */ if (nfree - alloc < 0 || nfree - alloc + victims < 1) { dev_info(&nsblk->common.dev, "insufficient label space\n"); - kfree(victim_map); + bitmap_free(victim_map); return -ENOSPC; } /* from here on we need to abort on error */ @@ -1140,7 +1139,7 @@ static int __blk_label_update(struct nd_region *nd_region, out: kfree(old_res_list); - kfree(victim_map); + bitmap_free(victim_map); return rc; abort: diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 681af3a8fd62..4b077555ac70 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -270,11 +270,10 @@ static ssize_t __alt_name_store(struct device *dev, const char *buf, if (dev->driver || to_ndns(dev)->claim) return -EBUSY; - input = kmemdup(buf, len + 1, GFP_KERNEL); + input = kstrndup(buf, len, GFP_KERNEL); if (!input) return -ENOMEM; - input[len] = '\0'; pos = strim(input); if (strlen(pos) + 1 > NSLABEL_NAME_LEN) { rc = -EINVAL; diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index d0c621b32f72..2b2cf4e554d3 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -21,6 +21,7 @@ extern struct list_head nvdimm_bus_list; extern struct mutex nvdimm_bus_list_mutex; extern int nvdimm_major; +extern struct workqueue_struct *nvdimm_wq; struct nvdimm_bus { struct nvdimm_bus_descriptor *nd_desc; @@ -41,8 +42,64 @@ struct nvdimm { atomic_t busy; int id, num_flush; struct resource *flush_wpq; + const char *dimm_id; + struct { + const struct nvdimm_security_ops *ops; + enum nvdimm_security_state state; + enum nvdimm_security_state ext_state; + unsigned int overwrite_tmo; + struct kernfs_node *overwrite_state; + } sec; + struct delayed_work dwork; }; +static inline enum nvdimm_security_state nvdimm_security_state( + struct nvdimm *nvdimm, bool master) +{ + if (!nvdimm->sec.ops) + return -ENXIO; + + return nvdimm->sec.ops->state(nvdimm, master); +} +int nvdimm_security_freeze(struct nvdimm *nvdimm); +#if IS_ENABLED(CONFIG_NVDIMM_KEYS) +int nvdimm_security_disable(struct nvdimm *nvdimm, unsigned int keyid); +int nvdimm_security_update(struct nvdimm *nvdimm, unsigned int keyid, + unsigned int new_keyid, + enum nvdimm_passphrase_type pass_type); +int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid, + enum nvdimm_passphrase_type pass_type); +int nvdimm_security_overwrite(struct nvdimm *nvdimm, unsigned int keyid); +void nvdimm_security_overwrite_query(struct work_struct *work); +#else +static inline int nvdimm_security_disable(struct nvdimm *nvdimm, + unsigned int keyid) +{ + return -EOPNOTSUPP; +} +static inline int nvdimm_security_update(struct nvdimm *nvdimm, + unsigned int keyid, + unsigned int new_keyid, + enum nvdimm_passphrase_type pass_type) +{ + return -EOPNOTSUPP; +} +static inline int nvdimm_security_erase(struct nvdimm *nvdimm, + unsigned int keyid, + enum nvdimm_passphrase_type pass_type) +{ + return -EOPNOTSUPP; +} +static inline int nvdimm_security_overwrite(struct nvdimm *nvdimm, + unsigned int keyid) +{ + return -EOPNOTSUPP; +} +static inline void nvdimm_security_overwrite_query(struct work_struct *work) +{ +} +#endif + /** * struct blk_alloc_info - tracking info for BLK dpa scanning * @nd_mapping: blk region mapping boundaries diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index e79cc8e5c114..cfde992684e7 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -250,6 +250,14 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, void nvdimm_set_aliasing(struct device *dev); void nvdimm_set_locked(struct device *dev); void nvdimm_clear_locked(struct device *dev); +#if IS_ENABLED(CONFIG_NVDIMM_KEYS) +int nvdimm_security_unlock(struct device *dev); +#else +static inline int nvdimm_security_unlock(struct device *dev) +{ + return 0; +} +#endif struct nd_btt *to_nd_btt(struct device *dev); struct nd_gen_sb { diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index d28418b05a04..bc2f700feef8 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -396,7 +396,7 @@ static int pmem_attach_disk(struct device *dev, return -EBUSY; } - q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev), NULL); + q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev)); if (!q) return -ENOMEM; diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index e7377f1028ef..e2818f94f292 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -79,6 +79,11 @@ int nd_region_activate(struct nd_region *nd_region) struct nd_mapping *nd_mapping = &nd_region->mapping[i]; struct nvdimm *nvdimm = nd_mapping->nvdimm; + if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) { + nvdimm_bus_unlock(&nd_region->dev); + return -EBUSY; + } + /* at least one null hint slot per-dimm for the "no-hint" case */ flush_data_size += sizeof(void *); num_flush = min_not_zero(num_flush, nvdimm->num_flush); diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c new file mode 100644 index 000000000000..f8bb746a549f --- /dev/null +++ b/drivers/nvdimm/security.c @@ -0,0 +1,454 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2018 Intel Corporation. All rights reserved. */ + +#include <linux/module.h> +#include <linux/device.h> +#include <linux/ndctl.h> +#include <linux/slab.h> +#include <linux/io.h> +#include <linux/mm.h> +#include <linux/cred.h> +#include <linux/key.h> +#include <linux/key-type.h> +#include <keys/user-type.h> +#include <keys/encrypted-type.h> +#include "nd-core.h" +#include "nd.h" + +#define NVDIMM_BASE_KEY 0 +#define NVDIMM_NEW_KEY 1 + +static bool key_revalidate = true; +module_param(key_revalidate, bool, 0444); +MODULE_PARM_DESC(key_revalidate, "Require key validation at init."); + +static void *key_data(struct key *key) +{ + struct encrypted_key_payload *epayload = dereference_key_locked(key); + + lockdep_assert_held_read(&key->sem); + + return epayload->decrypted_data; +} + +static void nvdimm_put_key(struct key *key) +{ + if (!key) + return; + + up_read(&key->sem); + key_put(key); +} + +/* + * Retrieve kernel key for DIMM and request from user space if + * necessary. Returns a key held for read and must be put by + * nvdimm_put_key() before the usage goes out of scope. + */ +static struct key *nvdimm_request_key(struct nvdimm *nvdimm) +{ + struct key *key = NULL; + static const char NVDIMM_PREFIX[] = "nvdimm:"; + char desc[NVDIMM_KEY_DESC_LEN + sizeof(NVDIMM_PREFIX)]; + struct device *dev = &nvdimm->dev; + + sprintf(desc, "%s%s", NVDIMM_PREFIX, nvdimm->dimm_id); + key = request_key(&key_type_encrypted, desc, ""); + if (IS_ERR(key)) { + if (PTR_ERR(key) == -ENOKEY) + dev_dbg(dev, "request_key() found no key\n"); + else + dev_dbg(dev, "request_key() upcall failed\n"); + key = NULL; + } else { + struct encrypted_key_payload *epayload; + + down_read(&key->sem); + epayload = dereference_key_locked(key); + if (epayload->decrypted_datalen != NVDIMM_PASSPHRASE_LEN) { + up_read(&key->sem); + key_put(key); + key = NULL; + } + } + + return key; +} + +static struct key *nvdimm_lookup_user_key(struct nvdimm *nvdimm, + key_serial_t id, int subclass) +{ + key_ref_t keyref; + struct key *key; + struct encrypted_key_payload *epayload; + struct device *dev = &nvdimm->dev; + + keyref = lookup_user_key(id, 0, 0); + if (IS_ERR(keyref)) + return NULL; + + key = key_ref_to_ptr(keyref); + if (key->type != &key_type_encrypted) { + key_put(key); + return NULL; + } + + dev_dbg(dev, "%s: key found: %#x\n", __func__, key_serial(key)); + + down_read_nested(&key->sem, subclass); + epayload = dereference_key_locked(key); + if (epayload->decrypted_datalen != NVDIMM_PASSPHRASE_LEN) { + up_read(&key->sem); + key_put(key); + key = NULL; + } + return key; +} + +static struct key *nvdimm_key_revalidate(struct nvdimm *nvdimm) +{ + struct key *key; + int rc; + + if (!nvdimm->sec.ops->change_key) + return NULL; + + key = nvdimm_request_key(nvdimm); + if (!key) + return NULL; + + /* + * Send the same key to the hardware as new and old key to + * verify that the key is good. + */ + rc = nvdimm->sec.ops->change_key(nvdimm, key_data(key), + key_data(key), NVDIMM_USER); + if (rc < 0) { + nvdimm_put_key(key); + key = NULL; + } + return key; +} + +static int __nvdimm_security_unlock(struct nvdimm *nvdimm) +{ + struct device *dev = &nvdimm->dev; + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + struct key *key = NULL; + int rc; + + /* The bus lock should be held at the top level of the call stack */ + lockdep_assert_held(&nvdimm_bus->reconfig_mutex); + + if (!nvdimm->sec.ops || !nvdimm->sec.ops->unlock + || nvdimm->sec.state < 0) + return -EIO; + + if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) { + dev_dbg(dev, "Security operation in progress.\n"); + return -EBUSY; + } + + /* + * If the pre-OS has unlocked the DIMM, attempt to send the key + * from request_key() to the hardware for verification. Failure + * to revalidate the key against the hardware results in a + * freeze of the security configuration. I.e. if the OS does not + * have the key, security is being managed pre-OS. + */ + if (nvdimm->sec.state == NVDIMM_SECURITY_UNLOCKED) { + if (!key_revalidate) + return 0; + + key = nvdimm_key_revalidate(nvdimm); + if (!key) + return nvdimm_security_freeze(nvdimm); + } else + key = nvdimm_request_key(nvdimm); + + if (!key) + return -ENOKEY; + + rc = nvdimm->sec.ops->unlock(nvdimm, key_data(key)); + dev_dbg(dev, "key: %d unlock: %s\n", key_serial(key), + rc == 0 ? "success" : "fail"); + + nvdimm_put_key(key); + nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER); + return rc; +} + +int nvdimm_security_unlock(struct device *dev) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + int rc; + + nvdimm_bus_lock(dev); + rc = __nvdimm_security_unlock(nvdimm); + nvdimm_bus_unlock(dev); + return rc; +} + +int nvdimm_security_disable(struct nvdimm *nvdimm, unsigned int keyid) +{ + struct device *dev = &nvdimm->dev; + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + struct key *key; + int rc; + + /* The bus lock should be held at the top level of the call stack */ + lockdep_assert_held(&nvdimm_bus->reconfig_mutex); + + if (!nvdimm->sec.ops || !nvdimm->sec.ops->disable + || nvdimm->sec.state < 0) + return -EOPNOTSUPP; + + if (nvdimm->sec.state >= NVDIMM_SECURITY_FROZEN) { + dev_dbg(dev, "Incorrect security state: %d\n", + nvdimm->sec.state); + return -EIO; + } + + if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) { + dev_dbg(dev, "Security operation in progress.\n"); + return -EBUSY; + } + + key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY); + if (!key) + return -ENOKEY; + + rc = nvdimm->sec.ops->disable(nvdimm, key_data(key)); + dev_dbg(dev, "key: %d disable: %s\n", key_serial(key), + rc == 0 ? "success" : "fail"); + + nvdimm_put_key(key); + nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER); + return rc; +} + +int nvdimm_security_update(struct nvdimm *nvdimm, unsigned int keyid, + unsigned int new_keyid, + enum nvdimm_passphrase_type pass_type) +{ + struct device *dev = &nvdimm->dev; + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + struct key *key, *newkey; + int rc; + + /* The bus lock should be held at the top level of the call stack */ + lockdep_assert_held(&nvdimm_bus->reconfig_mutex); + + if (!nvdimm->sec.ops || !nvdimm->sec.ops->change_key + || nvdimm->sec.state < 0) + return -EOPNOTSUPP; + + if (nvdimm->sec.state >= NVDIMM_SECURITY_FROZEN) { + dev_dbg(dev, "Incorrect security state: %d\n", + nvdimm->sec.state); + return -EIO; + } + + if (keyid == 0) + key = NULL; + else { + key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY); + if (!key) + return -ENOKEY; + } + + newkey = nvdimm_lookup_user_key(nvdimm, new_keyid, NVDIMM_NEW_KEY); + if (!newkey) { + nvdimm_put_key(key); + return -ENOKEY; + } + + rc = nvdimm->sec.ops->change_key(nvdimm, key ? key_data(key) : NULL, + key_data(newkey), pass_type); + dev_dbg(dev, "key: %d %d update%s: %s\n", + key_serial(key), key_serial(newkey), + pass_type == NVDIMM_MASTER ? "(master)" : "(user)", + rc == 0 ? "success" : "fail"); + + nvdimm_put_key(newkey); + nvdimm_put_key(key); + if (pass_type == NVDIMM_MASTER) + nvdimm->sec.ext_state = nvdimm_security_state(nvdimm, + NVDIMM_MASTER); + else + nvdimm->sec.state = nvdimm_security_state(nvdimm, + NVDIMM_USER); + return rc; +} + +int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid, + enum nvdimm_passphrase_type pass_type) +{ + struct device *dev = &nvdimm->dev; + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + struct key *key; + int rc; + + /* The bus lock should be held at the top level of the call stack */ + lockdep_assert_held(&nvdimm_bus->reconfig_mutex); + + if (!nvdimm->sec.ops || !nvdimm->sec.ops->erase + || nvdimm->sec.state < 0) + return -EOPNOTSUPP; + + if (atomic_read(&nvdimm->busy)) { + dev_dbg(dev, "Unable to secure erase while DIMM active.\n"); + return -EBUSY; + } + + if (nvdimm->sec.state >= NVDIMM_SECURITY_FROZEN) { + dev_dbg(dev, "Incorrect security state: %d\n", + nvdimm->sec.state); + return -EIO; + } + + if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) { + dev_dbg(dev, "Security operation in progress.\n"); + return -EBUSY; + } + + if (nvdimm->sec.ext_state != NVDIMM_SECURITY_UNLOCKED + && pass_type == NVDIMM_MASTER) { + dev_dbg(dev, + "Attempt to secure erase in wrong master state.\n"); + return -EOPNOTSUPP; + } + + key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY); + if (!key) + return -ENOKEY; + + rc = nvdimm->sec.ops->erase(nvdimm, key_data(key), pass_type); + dev_dbg(dev, "key: %d erase%s: %s\n", key_serial(key), + pass_type == NVDIMM_MASTER ? "(master)" : "(user)", + rc == 0 ? "success" : "fail"); + + nvdimm_put_key(key); + nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER); + return rc; +} + +int nvdimm_security_overwrite(struct nvdimm *nvdimm, unsigned int keyid) +{ + struct device *dev = &nvdimm->dev; + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + struct key *key; + int rc; + + /* The bus lock should be held at the top level of the call stack */ + lockdep_assert_held(&nvdimm_bus->reconfig_mutex); + + if (!nvdimm->sec.ops || !nvdimm->sec.ops->overwrite + || nvdimm->sec.state < 0) + return -EOPNOTSUPP; + + if (atomic_read(&nvdimm->busy)) { + dev_dbg(dev, "Unable to overwrite while DIMM active.\n"); + return -EBUSY; + } + + if (dev->driver == NULL) { + dev_dbg(dev, "Unable to overwrite while DIMM active.\n"); + return -EINVAL; + } + + if (nvdimm->sec.state >= NVDIMM_SECURITY_FROZEN) { + dev_dbg(dev, "Incorrect security state: %d\n", + nvdimm->sec.state); + return -EIO; + } + + if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) { + dev_dbg(dev, "Security operation in progress.\n"); + return -EBUSY; + } + + if (keyid == 0) + key = NULL; + else { + key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY); + if (!key) + return -ENOKEY; + } + + rc = nvdimm->sec.ops->overwrite(nvdimm, key ? key_data(key) : NULL); + dev_dbg(dev, "key: %d overwrite submission: %s\n", key_serial(key), + rc == 0 ? "success" : "fail"); + + nvdimm_put_key(key); + if (rc == 0) { + set_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags); + set_bit(NDD_WORK_PENDING, &nvdimm->flags); + nvdimm->sec.state = NVDIMM_SECURITY_OVERWRITE; + /* + * Make sure we don't lose device while doing overwrite + * query. + */ + get_device(dev); + queue_delayed_work(system_wq, &nvdimm->dwork, 0); + } + + return rc; +} + +void __nvdimm_security_overwrite_query(struct nvdimm *nvdimm) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nvdimm->dev); + int rc; + unsigned int tmo; + + /* The bus lock should be held at the top level of the call stack */ + lockdep_assert_held(&nvdimm_bus->reconfig_mutex); + + /* + * Abort and release device if we no longer have the overwrite + * flag set. It means the work has been canceled. + */ + if (!test_bit(NDD_WORK_PENDING, &nvdimm->flags)) + return; + + tmo = nvdimm->sec.overwrite_tmo; + + if (!nvdimm->sec.ops || !nvdimm->sec.ops->query_overwrite + || nvdimm->sec.state < 0) + return; + + rc = nvdimm->sec.ops->query_overwrite(nvdimm); + if (rc == -EBUSY) { + + /* setup delayed work again */ + tmo += 10; + queue_delayed_work(system_wq, &nvdimm->dwork, tmo * HZ); + nvdimm->sec.overwrite_tmo = min(15U * 60U, tmo); + return; + } + + if (rc < 0) + dev_dbg(&nvdimm->dev, "overwrite failed\n"); + else + dev_dbg(&nvdimm->dev, "overwrite completed\n"); + + if (nvdimm->sec.overwrite_state) + sysfs_notify_dirent(nvdimm->sec.overwrite_state); + nvdimm->sec.overwrite_tmo = 0; + clear_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags); + clear_bit(NDD_WORK_PENDING, &nvdimm->flags); + put_device(&nvdimm->dev); + nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER); + nvdimm->sec.ext_state = nvdimm_security_state(nvdimm, NVDIMM_MASTER); +} + +void nvdimm_security_overwrite_query(struct work_struct *work) +{ + struct nvdimm *nvdimm = + container_of(work, typeof(*nvdimm), dwork.work); + + nvdimm_bus_lock(&nvdimm->dev); + __nvdimm_security_overwrite_query(nvdimm); + nvdimm_bus_unlock(&nvdimm->dev); +} diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index 88a8b5916624..0f345e207675 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -57,3 +57,18 @@ config NVME_FC from https://github.com/linux-nvme/nvme-cli. If unsure, say N. + +config NVME_TCP + tristate "NVM Express over Fabrics TCP host driver" + depends on INET + depends on BLK_DEV_NVME + select NVME_FABRICS + help + This provides support for the NVMe over Fabrics protocol using + the TCP transport. This allows you to use remote block devices + exported using the NVMe protocol set. + + To configure a NVMe over Fabrics controller use the nvme-cli tool + from https://github.com/linux-nvme/nvme-cli. + + If unsure, say N. diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile index aea459c65ae1..8a4b671c5f0c 100644 --- a/drivers/nvme/host/Makefile +++ b/drivers/nvme/host/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_BLK_DEV_NVME) += nvme.o obj-$(CONFIG_NVME_FABRICS) += nvme-fabrics.o obj-$(CONFIG_NVME_RDMA) += nvme-rdma.o obj-$(CONFIG_NVME_FC) += nvme-fc.o +obj-$(CONFIG_NVME_TCP) += nvme-tcp.o nvme-core-y := core.o nvme-core-$(CONFIG_TRACING) += trace.o @@ -21,3 +22,5 @@ nvme-fabrics-y += fabrics.o nvme-rdma-y += rdma.o nvme-fc-y += fc.o + +nvme-tcp-y += tcp.o diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 962012135b62..08f2c92602f4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -97,7 +97,6 @@ static dev_t nvme_chr_devt; static struct class *nvme_class; static struct class *nvme_subsys_class; -static void nvme_ns_remove(struct nvme_ns *ns); static int nvme_revalidate_disk(struct gendisk *disk); static void nvme_put_subsystem(struct nvme_subsystem *subsys); static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, @@ -245,12 +244,31 @@ static inline bool nvme_req_needs_retry(struct request *req) return true; } +static void nvme_retry_req(struct request *req) +{ + struct nvme_ns *ns = req->q->queuedata; + unsigned long delay = 0; + u16 crd; + + /* The mask and shift result must be <= 3 */ + crd = (nvme_req(req)->status & NVME_SC_CRD) >> 11; + if (ns && crd) + delay = ns->ctrl->crdt[crd - 1] * 100; + + nvme_req(req)->retries++; + blk_mq_requeue_request(req, false); + blk_mq_delay_kick_requeue_list(req->q, delay); +} + void nvme_complete_rq(struct request *req) { blk_status_t status = nvme_error_status(req); trace_nvme_complete_rq(req); + if (nvme_req(req)->ctrl->kas) + nvme_req(req)->ctrl->comp_seen = true; + if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) { if ((req->cmd_flags & REQ_NVME_MPATH) && blk_path_error(status)) { @@ -259,8 +277,7 @@ void nvme_complete_rq(struct request *req) } if (!blk_queue_dying(req->q)) { - nvme_req(req)->retries++; - blk_mq_requeue_request(req, true); + nvme_retry_req(req); return; } } @@ -268,14 +285,14 @@ void nvme_complete_rq(struct request *req) } EXPORT_SYMBOL_GPL(nvme_complete_rq); -void nvme_cancel_request(struct request *req, void *data, bool reserved) +bool nvme_cancel_request(struct request *req, void *data, bool reserved) { dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device, "Cancelling I/O %d", req->tag); nvme_req(req)->status = NVME_SC_ABORT_REQ; blk_mq_complete_request(req); - + return true; } EXPORT_SYMBOL_GPL(nvme_cancel_request); @@ -536,7 +553,6 @@ static void nvme_assign_write_stream(struct nvme_ctrl *ctrl, static inline void nvme_setup_flush(struct nvme_ns *ns, struct nvme_command *cmnd) { - memset(cmnd, 0, sizeof(*cmnd)); cmnd->common.opcode = nvme_cmd_flush; cmnd->common.nsid = cpu_to_le32(ns->head->ns_id); } @@ -548,9 +564,19 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, struct nvme_dsm_range *range; struct bio *bio; - range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC); - if (!range) - return BLK_STS_RESOURCE; + range = kmalloc_array(segments, sizeof(*range), + GFP_ATOMIC | __GFP_NOWARN); + if (!range) { + /* + * If we fail allocation our range, fallback to the controller + * discard page. If that's also busy, it's safe to return + * busy, as we know we can make progress once that's freed. + */ + if (test_and_set_bit_lock(0, &ns->ctrl->discard_page_busy)) + return BLK_STS_RESOURCE; + + range = page_address(ns->ctrl->discard_page); + } __rq_for_each_bio(bio, req) { u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector); @@ -565,11 +591,13 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, } if (WARN_ON_ONCE(n != segments)) { - kfree(range); + if (virt_to_page(range) == ns->ctrl->discard_page) + clear_bit_unlock(0, &ns->ctrl->discard_page_busy); + else + kfree(range); return BLK_STS_IOERR; } - memset(cmnd, 0, sizeof(*cmnd)); cmnd->dsm.opcode = nvme_cmd_dsm; cmnd->dsm.nsid = cpu_to_le32(ns->head->ns_id); cmnd->dsm.nr = cpu_to_le32(segments - 1); @@ -598,7 +626,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, if (req->cmd_flags & REQ_RAHEAD) dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH; - memset(cmnd, 0, sizeof(*cmnd)); cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read); cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id); cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); @@ -650,8 +677,13 @@ void nvme_cleanup_cmd(struct request *req) blk_rq_bytes(req) >> ns->lba_shift); } if (req->rq_flags & RQF_SPECIAL_PAYLOAD) { - kfree(page_address(req->special_vec.bv_page) + - req->special_vec.bv_offset); + struct nvme_ns *ns = req->rq_disk->private_data; + struct page *page = req->special_vec.bv_page; + + if (page == ns->ctrl->discard_page) + clear_bit_unlock(0, &ns->ctrl->discard_page_busy); + else + kfree(page_address(page) + req->special_vec.bv_offset); } } EXPORT_SYMBOL_GPL(nvme_cleanup_cmd); @@ -663,6 +695,7 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, nvme_clear_nvme_request(req); + memset(cmd, 0, sizeof(*cmd)); switch (req_op(req)) { case REQ_OP_DRV_IN: case REQ_OP_DRV_OUT: @@ -691,6 +724,31 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, } EXPORT_SYMBOL_GPL(nvme_setup_cmd); +static void nvme_end_sync_rq(struct request *rq, blk_status_t error) +{ + struct completion *waiting = rq->end_io_data; + + rq->end_io_data = NULL; + complete(waiting); +} + +static void nvme_execute_rq_polled(struct request_queue *q, + struct gendisk *bd_disk, struct request *rq, int at_head) +{ + DECLARE_COMPLETION_ONSTACK(wait); + + WARN_ON_ONCE(!test_bit(QUEUE_FLAG_POLL, &q->queue_flags)); + + rq->cmd_flags |= REQ_HIPRI; + rq->end_io_data = &wait; + blk_execute_rq_nowait(q, bd_disk, rq, at_head, nvme_end_sync_rq); + + while (!completion_done(&wait)) { + blk_poll(q, request_to_qc_t(rq->mq_hctx, rq), true); + cond_resched(); + } +} + /* * Returns 0 on success. If the result is negative, it's a Linux error code; * if the result is positive, it's an NVM Express status code @@ -698,7 +756,7 @@ EXPORT_SYMBOL_GPL(nvme_setup_cmd); int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, union nvme_result *result, void *buffer, unsigned bufflen, unsigned timeout, int qid, int at_head, - blk_mq_req_flags_t flags) + blk_mq_req_flags_t flags, bool poll) { struct request *req; int ret; @@ -715,7 +773,10 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, goto out; } - blk_execute_rq(req->q, NULL, req, at_head); + if (poll) + nvme_execute_rq_polled(req->q, NULL, req, at_head); + else + blk_execute_rq(req->q, NULL, req, at_head); if (result) *result = nvme_req(req)->result; if (nvme_req(req)->flags & NVME_REQ_CANCELLED) @@ -732,7 +793,7 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, void *buffer, unsigned bufflen) { return __nvme_submit_sync_cmd(q, cmd, NULL, buffer, bufflen, 0, - NVME_QID_ANY, 0, 0); + NVME_QID_ANY, 0, 0, false); } EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd); @@ -843,6 +904,7 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status) return; } + ctrl->comp_seen = false; spin_lock_irqsave(&ctrl->lock, flags); if (ctrl->state == NVME_CTRL_LIVE || ctrl->state == NVME_CTRL_CONNECTING) @@ -873,6 +935,15 @@ static void nvme_keep_alive_work(struct work_struct *work) { struct nvme_ctrl *ctrl = container_of(to_delayed_work(work), struct nvme_ctrl, ka_work); + bool comp_seen = ctrl->comp_seen; + + if ((ctrl->ctratt & NVME_CTRL_ATTR_TBKAS) && comp_seen) { + dev_dbg(ctrl->device, + "reschedule traffic based keep-alive timer\n"); + ctrl->comp_seen = false; + schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); + return; + } if (nvme_keep_alive(ctrl)) { /* allocation failure, reset the controller */ @@ -1041,7 +1112,7 @@ static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword c.features.dword11 = cpu_to_le32(dword11); ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res, - buffer, buflen, 0, NVME_QID_ANY, 0, 0); + buffer, buflen, 0, NVME_QID_ANY, 0, 0, false); if (ret >= 0 && result) *result = le32_to_cpu(res.u32); return ret; @@ -1240,12 +1311,12 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, c.common.nsid = cpu_to_le32(cmd.nsid); c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); - c.common.cdw10[0] = cpu_to_le32(cmd.cdw10); - c.common.cdw10[1] = cpu_to_le32(cmd.cdw11); - c.common.cdw10[2] = cpu_to_le32(cmd.cdw12); - c.common.cdw10[3] = cpu_to_le32(cmd.cdw13); - c.common.cdw10[4] = cpu_to_le32(cmd.cdw14); - c.common.cdw10[5] = cpu_to_le32(cmd.cdw15); + c.common.cdw10 = cpu_to_le32(cmd.cdw10); + c.common.cdw11 = cpu_to_le32(cmd.cdw11); + c.common.cdw12 = cpu_to_le32(cmd.cdw12); + c.common.cdw13 = cpu_to_le32(cmd.cdw13); + c.common.cdw14 = cpu_to_le32(cmd.cdw14); + c.common.cdw15 = cpu_to_le32(cmd.cdw15); if (cmd.timeout_ms) timeout = msecs_to_jiffies(cmd.timeout_ms); @@ -1524,8 +1595,6 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) if (ns->noiob) nvme_set_chunk_size(ns); nvme_update_disk_info(disk, ns, id); - if (ns->ndev) - nvme_nvm_update_nvm_info(ns); #ifdef CONFIG_NVME_MULTIPATH if (ns->head->disk) { nvme_update_disk_info(ns->head->disk, ns, id); @@ -1608,7 +1677,7 @@ static int nvme_pr_command(struct block_device *bdev, u32 cdw10, memset(&c, 0, sizeof(c)); c.common.opcode = op; c.common.nsid = cpu_to_le32(ns->head->ns_id); - c.common.cdw10[0] = cpu_to_le32(cdw10); + c.common.cdw10 = cpu_to_le32(cdw10); ret = nvme_submit_sync_cmd(ns->queue, &c, data, 16); nvme_put_ns_from_disk(head, srcu_idx); @@ -1682,11 +1751,11 @@ int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, else cmd.common.opcode = nvme_admin_security_recv; cmd.common.nsid = 0; - cmd.common.cdw10[0] = cpu_to_le32(((u32)secp) << 24 | ((u32)spsp) << 8); - cmd.common.cdw10[1] = cpu_to_le32(len); + cmd.common.cdw10 = cpu_to_le32(((u32)secp) << 24 | ((u32)spsp) << 8); + cmd.common.cdw11 = cpu_to_le32(len); return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len, - ADMIN_TIMEOUT, NVME_QID_ANY, 1, 0); + ADMIN_TIMEOUT, NVME_QID_ANY, 1, 0, false); } EXPORT_SYMBOL_GPL(nvme_sec_submit); #endif /* CONFIG_BLK_SED_OPAL */ @@ -1881,6 +1950,26 @@ static int nvme_configure_timestamp(struct nvme_ctrl *ctrl) return ret; } +static int nvme_configure_acre(struct nvme_ctrl *ctrl) +{ + struct nvme_feat_host_behavior *host; + int ret; + + /* Don't bother enabling the feature if retry delay is not reported */ + if (!ctrl->crdt[0]) + return 0; + + host = kzalloc(sizeof(*host), GFP_KERNEL); + if (!host) + return 0; + + host->acre = NVME_ENABLE_ACRE; + ret = nvme_set_features(ctrl, NVME_FEAT_HOST_BEHAVIOR, 0, + host, sizeof(*host), NULL); + kfree(host); + return ret; +} + static int nvme_configure_apst(struct nvme_ctrl *ctrl) { /* @@ -2402,6 +2491,10 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS; } + ctrl->crdt[0] = le16_to_cpu(id->crdt1); + ctrl->crdt[1] = le16_to_cpu(id->crdt2); + ctrl->crdt[2] = le16_to_cpu(id->crdt3); + ctrl->oacs = le16_to_cpu(id->oacs); ctrl->oncs = le16_to_cpup(&id->oncs); ctrl->oaes = le32_to_cpu(id->oaes); @@ -2419,6 +2512,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->sgls = le32_to_cpu(id->sgls); ctrl->kas = le16_to_cpu(id->kas); ctrl->max_namespaces = le32_to_cpu(id->mnan); + ctrl->ctratt = le32_to_cpu(id->ctratt); if (id->rtd3e) { /* us -> s */ @@ -2501,6 +2595,10 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) if (ret < 0) return ret; + ret = nvme_configure_acre(ctrl); + if (ret < 0) + return ret; + ctrl->identified = true; return 0; @@ -2776,6 +2874,7 @@ static ssize_t field##_show(struct device *dev, \ static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL); nvme_show_int_function(cntlid); +nvme_show_int_function(numa_node); static ssize_t nvme_sysfs_delete(struct device *dev, struct device_attribute *attr, const char *buf, @@ -2855,6 +2954,7 @@ static struct attribute *nvme_dev_attrs[] = { &dev_attr_subsysnqn.attr, &dev_attr_address.attr, &dev_attr_state.attr, + &dev_attr_numa_node.attr, NULL }; @@ -3065,7 +3165,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) struct gendisk *disk; struct nvme_id_ns *id; char disk_name[DISK_NAME_LEN]; - int node = dev_to_node(ctrl->dev), flags = GENHD_FL_EXT_DEVT; + int node = ctrl->numa_node, flags = GENHD_FL_EXT_DEVT; ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node); if (!ns) @@ -3100,13 +3200,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) nvme_setup_streams_ns(ctrl, ns); nvme_set_disk_name(disk_name, ns, ctrl, &flags); - if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { - if (nvme_nvm_register(ns, disk_name, node)) { - dev_warn(ctrl->device, "LightNVM init failure\n"); - goto out_unlink_ns; - } - } - disk = alloc_disk_node(0, node); if (!disk) goto out_unlink_ns; @@ -3120,6 +3213,13 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) __nvme_revalidate_disk(disk, id); + if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { + if (nvme_nvm_register(ns, disk_name, node)) { + dev_warn(ctrl->device, "LightNVM init failure\n"); + goto out_put_disk; + } + } + down_write(&ctrl->namespaces_rwsem); list_add_tail(&ns->list, &ctrl->namespaces); up_write(&ctrl->namespaces_rwsem); @@ -3133,6 +3233,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) kfree(id); return; + out_put_disk: + put_disk(ns->disk); out_unlink_ns: mutex_lock(&ctrl->subsys->lock); list_del_rcu(&ns->siblings); @@ -3522,6 +3624,7 @@ static void nvme_free_ctrl(struct device *dev) ida_simple_remove(&nvme_instance_ida, ctrl->instance); kfree(ctrl->effects); nvme_mpath_uninit(ctrl); + __free_page(ctrl->discard_page); if (subsys) { mutex_lock(&subsys->lock); @@ -3562,6 +3665,14 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd)); ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive; + BUILD_BUG_ON(NVME_DSM_MAX_RANGES * sizeof(struct nvme_dsm_range) > + PAGE_SIZE); + ctrl->discard_page = alloc_page(GFP_KERNEL); + if (!ctrl->discard_page) { + ret = -ENOMEM; + goto out; + } + ret = ida_simple_get(&nvme_instance_ida, 0, 0, GFP_KERNEL); if (ret < 0) goto out; @@ -3599,6 +3710,8 @@ out_free_name: out_release_instance: ida_simple_remove(&nvme_instance_ida, ctrl->instance); out: + if (ctrl->discard_page) + __free_page(ctrl->discard_page); return ret; } EXPORT_SYMBOL_GPL(nvme_init_ctrl); @@ -3746,7 +3859,7 @@ out: return result; } -void nvme_core_exit(void) +void __exit nvme_core_exit(void) { ida_destroy(&nvme_subsystems_ida); class_destroy(nvme_subsys_class); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index bd0969db6225..b2ab213f43de 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -159,7 +159,7 @@ int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val) cmd.prop_get.offset = cpu_to_le32(off); ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &res, NULL, 0, 0, - NVME_QID_ANY, 0, 0); + NVME_QID_ANY, 0, 0, false); if (ret >= 0) *val = le64_to_cpu(res.u64); @@ -206,7 +206,7 @@ int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) cmd.prop_get.offset = cpu_to_le32(off); ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &res, NULL, 0, 0, - NVME_QID_ANY, 0, 0); + NVME_QID_ANY, 0, 0, false); if (ret >= 0) *val = le64_to_cpu(res.u64); @@ -252,7 +252,7 @@ int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val) cmd.prop_set.value = cpu_to_le64(val); ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, NULL, 0, 0, - NVME_QID_ANY, 0, 0); + NVME_QID_ANY, 0, 0, false); if (unlikely(ret)) dev_err(ctrl->device, "Property Set error: %d, offset %#x\n", @@ -392,6 +392,9 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) cmd.connect.kato = ctrl->opts->discovery_nqn ? 0 : cpu_to_le32((ctrl->kato + NVME_KATO_GRACE) * 1000); + if (ctrl->opts->disable_sqflow) + cmd.connect.cattr |= NVME_CONNECT_DISABLE_SQFLOW; + data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; @@ -403,7 +406,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &res, data, sizeof(*data), 0, NVME_QID_ANY, 1, - BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); + BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT, false); if (ret) { nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32), &cmd, data); @@ -438,7 +441,7 @@ EXPORT_SYMBOL_GPL(nvmf_connect_admin_queue); * > 0: NVMe error status code * < 0: Linux errno error code */ -int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) +int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid, bool poll) { struct nvme_command cmd; struct nvmf_connect_data *data; @@ -451,6 +454,9 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) cmd.connect.qid = cpu_to_le16(qid); cmd.connect.sqsize = cpu_to_le16(ctrl->sqsize); + if (ctrl->opts->disable_sqflow) + cmd.connect.cattr |= NVME_CONNECT_DISABLE_SQFLOW; + data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; @@ -462,7 +468,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) ret = __nvme_submit_sync_cmd(ctrl->connect_q, &cmd, &res, data, sizeof(*data), 0, qid, 1, - BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); + BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT, poll); if (ret) { nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32), &cmd, data); @@ -607,6 +613,11 @@ static const match_table_t opt_tokens = { { NVMF_OPT_HOST_TRADDR, "host_traddr=%s" }, { NVMF_OPT_HOST_ID, "hostid=%s" }, { NVMF_OPT_DUP_CONNECT, "duplicate_connect" }, + { NVMF_OPT_DISABLE_SQFLOW, "disable_sqflow" }, + { NVMF_OPT_HDR_DIGEST, "hdr_digest" }, + { NVMF_OPT_DATA_DIGEST, "data_digest" }, + { NVMF_OPT_NR_WRITE_QUEUES, "nr_write_queues=%d" }, + { NVMF_OPT_NR_POLL_QUEUES, "nr_poll_queues=%d" }, { NVMF_OPT_ERR, NULL } }; @@ -626,6 +637,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY; opts->kato = NVME_DEFAULT_KATO; opts->duplicate_connect = false; + opts->hdr_digest = false; + opts->data_digest = false; options = o = kstrdup(buf, GFP_KERNEL); if (!options) @@ -817,6 +830,39 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, case NVMF_OPT_DUP_CONNECT: opts->duplicate_connect = true; break; + case NVMF_OPT_DISABLE_SQFLOW: + opts->disable_sqflow = true; + break; + case NVMF_OPT_HDR_DIGEST: + opts->hdr_digest = true; + break; + case NVMF_OPT_DATA_DIGEST: + opts->data_digest = true; + break; + case NVMF_OPT_NR_WRITE_QUEUES: + if (match_int(args, &token)) { + ret = -EINVAL; + goto out; + } + if (token <= 0) { + pr_err("Invalid nr_write_queues %d\n", token); + ret = -EINVAL; + goto out; + } + opts->nr_write_queues = token; + break; + case NVMF_OPT_NR_POLL_QUEUES: + if (match_int(args, &token)) { + ret = -EINVAL; + goto out; + } + if (token <= 0) { + pr_err("Invalid nr_poll_queues %d\n", token); + ret = -EINVAL; + goto out; + } + opts->nr_poll_queues = token; + break; default: pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n", p); @@ -933,7 +979,8 @@ EXPORT_SYMBOL_GPL(nvmf_free_options); #define NVMF_REQUIRED_OPTS (NVMF_OPT_TRANSPORT | NVMF_OPT_NQN) #define NVMF_ALLOWED_OPTS (NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \ NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \ - NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT) + NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\ + NVMF_OPT_DISABLE_SQFLOW) static struct nvme_ctrl * nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 6ea6275f332a..478343b73e38 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -58,6 +58,11 @@ enum { NVMF_OPT_CTRL_LOSS_TMO = 1 << 11, NVMF_OPT_HOST_ID = 1 << 12, NVMF_OPT_DUP_CONNECT = 1 << 13, + NVMF_OPT_DISABLE_SQFLOW = 1 << 14, + NVMF_OPT_HDR_DIGEST = 1 << 15, + NVMF_OPT_DATA_DIGEST = 1 << 16, + NVMF_OPT_NR_WRITE_QUEUES = 1 << 17, + NVMF_OPT_NR_POLL_QUEUES = 1 << 18, }; /** @@ -85,6 +90,11 @@ enum { * @max_reconnects: maximum number of allowed reconnect attempts before removing * the controller, (-1) means reconnect forever, zero means remove * immediately; + * @disable_sqflow: disable controller sq flow control + * @hdr_digest: generate/verify header digest (TCP) + * @data_digest: generate/verify data digest (TCP) + * @nr_write_queues: number of queues for write I/O + * @nr_poll_queues: number of queues for polling I/O */ struct nvmf_ctrl_options { unsigned mask; @@ -101,6 +111,11 @@ struct nvmf_ctrl_options { unsigned int kato; struct nvmf_host *host; int max_reconnects; + bool disable_sqflow; + bool hdr_digest; + bool data_digest; + unsigned int nr_write_queues; + unsigned int nr_poll_queues; }; /* @@ -156,7 +171,7 @@ int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val); int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val); int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val); int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl); -int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid); +int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid, bool poll); int nvmf_register_transport(struct nvmf_transport_ops *ops); void nvmf_unregister_transport(struct nvmf_transport_ops *ops); void nvmf_free_options(struct nvmf_ctrl_options *opts); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index feb86b59170e..89accc76d71c 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1975,7 +1975,7 @@ nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) (qsize / 5)); if (ret) break; - ret = nvmf_connect_io_queue(&ctrl->ctrl, i); + ret = nvmf_connect_io_queue(&ctrl->ctrl, i, false); if (ret) break; @@ -2326,38 +2326,6 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, return nvme_fc_start_fcp_op(ctrl, queue, op, data_len, io_dir); } -static struct blk_mq_tags * -nvme_fc_tagset(struct nvme_fc_queue *queue) -{ - if (queue->qnum == 0) - return queue->ctrl->admin_tag_set.tags[queue->qnum]; - - return queue->ctrl->tag_set.tags[queue->qnum - 1]; -} - -static int -nvme_fc_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) - -{ - struct nvme_fc_queue *queue = hctx->driver_data; - struct nvme_fc_ctrl *ctrl = queue->ctrl; - struct request *req; - struct nvme_fc_fcp_op *op; - - req = blk_mq_tag_to_rq(nvme_fc_tagset(queue), tag); - if (!req) - return 0; - - op = blk_mq_rq_to_pdu(req); - - if ((atomic_read(&op->state) == FCPOP_STATE_ACTIVE) && - (ctrl->lport->ops->poll_queue)) - ctrl->lport->ops->poll_queue(&ctrl->lport->localport, - queue->lldd_handle); - - return ((atomic_read(&op->state) != FCPOP_STATE_ACTIVE)); -} - static void nvme_fc_submit_async_event(struct nvme_ctrl *arg) { @@ -2410,7 +2378,7 @@ nvme_fc_complete_rq(struct request *rq) * status. The done path will return the io request back to the block * layer with an error status. */ -static void +static bool nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) { struct nvme_ctrl *nctrl = data; @@ -2418,6 +2386,7 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); __nvme_fc_abort_op(ctrl, op); + return true; } @@ -2427,7 +2396,6 @@ static const struct blk_mq_ops nvme_fc_mq_ops = { .init_request = nvme_fc_init_request, .exit_request = nvme_fc_exit_request, .init_hctx = nvme_fc_init_hctx, - .poll = nvme_fc_poll, .timeout = nvme_fc_timeout, }; @@ -2457,7 +2425,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) ctrl->tag_set.ops = &nvme_fc_mq_ops; ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; ctrl->tag_set.reserved_tags = 1; /* fabric connect */ - ctrl->tag_set.numa_node = NUMA_NO_NODE; + ctrl->tag_set.numa_node = ctrl->ctrl.numa_node; ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; ctrl->tag_set.cmd_size = struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, @@ -3050,6 +3018,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->ctrl.opts = opts; ctrl->ctrl.nr_reconnects = 0; + ctrl->ctrl.numa_node = dev_to_node(lport->dev); INIT_LIST_HEAD(&ctrl->ctrl_list); ctrl->lport = lport; ctrl->rport = rport; @@ -3090,7 +3059,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */ - ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; + ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node; ctrl->admin_tag_set.cmd_size = struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, ctrl->lport->ops->fcprqst_priv_sz); diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index a4f3b263cd6c..b759c25c89c8 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -577,7 +577,8 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev, struct ppa_addr ppa; size_t left = nchks * sizeof(struct nvme_nvm_chk_meta); size_t log_pos, offset, len; - int ret, i, max_len; + int i, max_len; + int ret = 0; /* * limit requests to maximum 256K to avoid issuing arbitrary large @@ -731,11 +732,12 @@ static int nvme_nvm_submit_io_sync(struct nvm_dev *dev, struct nvm_rq *rqd) return ret; } -static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name) +static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name, + int size) { struct nvme_ns *ns = nvmdev->q->queuedata; - return dma_pool_create(name, ns->ctrl->dev, PAGE_SIZE, PAGE_SIZE, 0); + return dma_pool_create(name, ns->ctrl->dev, size, PAGE_SIZE, 0); } static void nvme_nvm_destroy_dma_pool(void *pool) @@ -935,9 +937,9 @@ static int nvme_nvm_user_vcmd(struct nvme_ns *ns, int admin, /* cdw11-12 */ c.ph_rw.length = cpu_to_le16(vcmd.nppas); c.ph_rw.control = cpu_to_le16(vcmd.control); - c.common.cdw10[3] = cpu_to_le32(vcmd.cdw13); - c.common.cdw10[4] = cpu_to_le32(vcmd.cdw14); - c.common.cdw10[5] = cpu_to_le32(vcmd.cdw15); + c.common.cdw13 = cpu_to_le32(vcmd.cdw13); + c.common.cdw14 = cpu_to_le32(vcmd.cdw14); + c.common.cdw15 = cpu_to_le32(vcmd.cdw15); if (vcmd.timeout_ms) timeout = msecs_to_jiffies(vcmd.timeout_ms); @@ -972,22 +974,11 @@ int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg) } } -void nvme_nvm_update_nvm_info(struct nvme_ns *ns) -{ - struct nvm_dev *ndev = ns->ndev; - struct nvm_geo *geo = &ndev->geo; - - if (geo->version == NVM_OCSSD_SPEC_12) - return; - - geo->csecs = 1 << ns->lba_shift; - geo->sos = ns->ms; -} - int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) { struct request_queue *q = ns->queue; struct nvm_dev *dev; + struct nvm_geo *geo; _nvme_nvm_check_size(); @@ -995,6 +986,12 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) if (!dev) return -ENOMEM; + /* Note that csecs and sos will be overridden if it is a 1.2 drive. */ + geo = &dev->geo; + geo->csecs = 1 << ns->lba_shift; + geo->sos = ns->ms; + geo->ext = ns->ext; + dev->q = q; memcpy(dev->name, disk_name, DISK_NAME_LEN); dev->ops = &nvme_nvm_dev_ops; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 9901afd804ce..183ec17ba067 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -141,7 +141,7 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node) test_bit(NVME_NS_ANA_PENDING, &ns->flags)) continue; - distance = node_distance(node, dev_to_node(ns->ctrl->dev)); + distance = node_distance(node, ns->ctrl->numa_node); switch (ns->ana_state) { case NVME_ANA_OPTIMIZED: @@ -220,21 +220,6 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, return ret; } -static bool nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc) -{ - struct nvme_ns_head *head = q->queuedata; - struct nvme_ns *ns; - bool found = false; - int srcu_idx; - - srcu_idx = srcu_read_lock(&head->srcu); - ns = srcu_dereference(head->current_path[numa_node_id()], &head->srcu); - if (likely(ns && nvme_path_is_optimized(ns))) - found = ns->queue->poll_fn(q, qc); - srcu_read_unlock(&head->srcu, srcu_idx); - return found; -} - static void nvme_requeue_work(struct work_struct *work) { struct nvme_ns_head *head = @@ -276,12 +261,11 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath) return 0; - q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL); + q = blk_alloc_queue_node(GFP_KERNEL, ctrl->numa_node); if (!q) goto out; q->queuedata = head; blk_queue_make_request(q, nvme_ns_head_make_request); - q->poll_fn = nvme_ns_head_poll; blk_queue_flag_set(QUEUE_FLAG_NONROT, q); /* set to a default value for 512 until disk is validated */ blk_queue_logical_block_size(q, 512); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 081cbdcce880..2b36ac922596 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -145,6 +145,7 @@ enum nvme_ctrl_state { }; struct nvme_ctrl { + bool comp_seen; enum nvme_ctrl_state state; bool identified; spinlock_t lock; @@ -153,6 +154,7 @@ struct nvme_ctrl { struct request_queue *connect_q; struct device *dev; int instance; + int numa_node; struct blk_mq_tag_set *tagset; struct blk_mq_tag_set *admin_tagset; struct list_head namespaces; @@ -179,6 +181,7 @@ struct nvme_ctrl { u32 page_size; u32 max_hw_sectors; u32 max_segments; + u16 crdt[3]; u16 oncs; u16 oacs; u16 nssa; @@ -193,6 +196,7 @@ struct nvme_ctrl { u8 apsta; u32 oaes; u32 aen_result; + u32 ctratt; unsigned int shutdown_timeout; unsigned int kato; bool subsystem; @@ -237,6 +241,9 @@ struct nvme_ctrl { u16 maxcmd; int nr_reconnects; struct nvmf_ctrl_options *opts; + + struct page *discard_page; + unsigned long discard_page_busy; }; struct nvme_subsystem { @@ -364,15 +371,6 @@ static inline void nvme_fault_inject_fini(struct nvme_ns *ns) {} static inline void nvme_should_fail(struct request *req) {} #endif -static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl) -{ - u32 val = 0; - - if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &val)) - return false; - return val & NVME_CSTS_RDY; -} - static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl) { if (!ctrl->subsystem) @@ -408,7 +406,7 @@ static inline void nvme_put_ctrl(struct nvme_ctrl *ctrl) } void nvme_complete_rq(struct request *req); -void nvme_cancel_request(struct request *req, void *data, bool reserved); +bool nvme_cancel_request(struct request *req, void *data, bool reserved); bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, enum nvme_ctrl_state new_state); int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap); @@ -449,7 +447,7 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, union nvme_result *result, void *buffer, unsigned bufflen, unsigned timeout, int qid, int at_head, - blk_mq_req_flags_t flags); + blk_mq_req_flags_t flags, bool poll); int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); int nvme_reset_ctrl(struct nvme_ctrl *ctrl); @@ -545,13 +543,11 @@ static inline void nvme_mpath_stop(struct nvme_ctrl *ctrl) #endif /* CONFIG_NVME_MULTIPATH */ #ifdef CONFIG_NVM -void nvme_nvm_update_nvm_info(struct nvme_ns *ns); int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); void nvme_nvm_unregister(struct nvme_ns *ns); extern const struct attribute_group nvme_nvm_attr_group; int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg); #else -static inline void nvme_nvm_update_nvm_info(struct nvme_ns *ns) {}; static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) { @@ -572,6 +568,6 @@ static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) } int __init nvme_core_init(void); -void nvme_core_exit(void); +void __exit nvme_core_exit(void); #endif /* _NVME_H */ diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c33bb201b884..5a0bf6a24d50 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -32,6 +32,7 @@ #include <linux/sed-opal.h> #include <linux/pci-p2pdma.h> +#include "trace.h" #include "nvme.h" #define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) @@ -74,6 +75,22 @@ static int io_queue_depth = 1024; module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644); MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2"); +static int queue_count_set(const char *val, const struct kernel_param *kp); +static const struct kernel_param_ops queue_count_ops = { + .set = queue_count_set, + .get = param_get_int, +}; + +static int write_queues; +module_param_cb(write_queues, &queue_count_ops, &write_queues, 0644); +MODULE_PARM_DESC(write_queues, + "Number of queues to use for writes. If not set, reads and writes " + "will share a queue set."); + +static int poll_queues = 0; +module_param_cb(poll_queues, &queue_count_ops, &poll_queues, 0644); +MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO."); + struct nvme_dev; struct nvme_queue; @@ -92,6 +109,7 @@ struct nvme_dev { struct dma_pool *prp_small_pool; unsigned online_queues; unsigned max_qid; + unsigned io_queues[HCTX_MAX_TYPES]; unsigned int num_vecs; int q_depth; u32 db_stride; @@ -105,7 +123,6 @@ struct nvme_dev { u32 cmbsz; u32 cmbloc; struct nvme_ctrl ctrl; - struct completion ioq_wait; mempool_t *iod_mempool; @@ -134,6 +151,17 @@ static int io_queue_depth_set(const char *val, const struct kernel_param *kp) return param_set_int(val, kp); } +static int queue_count_set(const char *val, const struct kernel_param *kp) +{ + int n = 0, ret; + + ret = kstrtoint(val, 10, &n); + if (n > num_possible_cpus()) + n = num_possible_cpus(); + + return param_set_int(val, kp); +} + static inline unsigned int sq_idx(unsigned int qid, u32 stride) { return qid * 2 * stride; @@ -158,8 +186,8 @@ struct nvme_queue { struct nvme_dev *dev; spinlock_t sq_lock; struct nvme_command *sq_cmds; - bool sq_cmds_is_io; - spinlock_t cq_lock ____cacheline_aligned_in_smp; + /* only used for poll queues: */ + spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; volatile struct nvme_completion *cqes; struct blk_mq_tags **tags; dma_addr_t sq_dma_addr; @@ -168,14 +196,20 @@ struct nvme_queue { u16 q_depth; s16 cq_vector; u16 sq_tail; + u16 last_sq_tail; u16 cq_head; u16 last_cq_head; u16 qid; u8 cq_phase; + unsigned long flags; +#define NVMEQ_ENABLED 0 +#define NVMEQ_SQ_CMB 1 +#define NVMEQ_DELETE_ERROR 2 u32 *dbbuf_sq_db; u32 *dbbuf_cq_db; u32 *dbbuf_sq_ei; u32 *dbbuf_cq_ei; + struct completion delete_done; }; /* @@ -218,9 +252,20 @@ static inline void _nvme_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); } +static unsigned int max_io_queues(void) +{ + return num_possible_cpus() + write_queues + poll_queues; +} + +static unsigned int max_queue_count(void) +{ + /* IO queues + admin queue */ + return 1 + max_io_queues(); +} + static inline unsigned int nvme_dbbuf_size(u32 stride) { - return ((num_possible_cpus() + 1) * 8 * stride); + return (max_queue_count() * 8 * stride); } static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev) @@ -431,30 +476,90 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req, return 0; } +static int queue_irq_offset(struct nvme_dev *dev) +{ + /* if we have more than 1 vec, admin queue offsets us by 1 */ + if (dev->num_vecs > 1) + return 1; + + return 0; +} + static int nvme_pci_map_queues(struct blk_mq_tag_set *set) { struct nvme_dev *dev = set->driver_data; + int i, qoff, offset; + + offset = queue_irq_offset(dev); + for (i = 0, qoff = 0; i < set->nr_maps; i++) { + struct blk_mq_queue_map *map = &set->map[i]; + + map->nr_queues = dev->io_queues[i]; + if (!map->nr_queues) { + BUG_ON(i == HCTX_TYPE_DEFAULT); + continue; + } - return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev), - dev->num_vecs > 1 ? 1 /* admin queue */ : 0); + /* + * The poll queue(s) doesn't have an IRQ (and hence IRQ + * affinity), so use the regular blk-mq cpu mapping + */ + map->queue_offset = qoff; + if (i != HCTX_TYPE_POLL) + blk_mq_pci_map_queues(map, to_pci_dev(dev->dev), offset); + else + blk_mq_map_queues(map); + qoff += map->nr_queues; + offset += map->nr_queues; + } + + return 0; +} + +/* + * Write sq tail if we are asked to, or if the next command would wrap. + */ +static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq) +{ + if (!write_sq) { + u16 next_tail = nvmeq->sq_tail + 1; + + if (next_tail == nvmeq->q_depth) + next_tail = 0; + if (next_tail != nvmeq->last_sq_tail) + return; + } + + if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail, + nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei)) + writel(nvmeq->sq_tail, nvmeq->q_db); + nvmeq->last_sq_tail = nvmeq->sq_tail; } /** * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell * @nvmeq: The queue to use * @cmd: The command to send + * @write_sq: whether to write to the SQ doorbell */ -static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) +static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd, + bool write_sq) { spin_lock(&nvmeq->sq_lock); - memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd)); - if (++nvmeq->sq_tail == nvmeq->q_depth) nvmeq->sq_tail = 0; - if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail, - nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei)) - writel(nvmeq->sq_tail, nvmeq->q_db); + nvme_write_sq_db(nvmeq, write_sq); + spin_unlock(&nvmeq->sq_lock); +} + +static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx) +{ + struct nvme_queue *nvmeq = hctx->driver_data; + + spin_lock(&nvmeq->sq_lock); + if (nvmeq->sq_tail != nvmeq->last_sq_tail) + nvme_write_sq_db(nvmeq, true); spin_unlock(&nvmeq->sq_lock); } @@ -822,7 +927,7 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, * We should not need to do this, but we're still using this to * ensure we can drain requests on a dying queue. */ - if (unlikely(nvmeq->cq_vector < 0)) + if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags))) return BLK_STS_IOERR; ret = nvme_setup_cmd(ns, req, &cmnd); @@ -840,7 +945,7 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, } blk_mq_start_request(req); - nvme_submit_cmd(nvmeq, &cmnd); + nvme_submit_cmd(nvmeq, &cmnd, bd->last); return BLK_STS_OK; out_cleanup_iod: nvme_free_iod(dev, req); @@ -899,6 +1004,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) } req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id); + trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail); nvme_end_request(req, cqe->status, cqe->result); } @@ -919,15 +1025,15 @@ static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) } } -static inline bool nvme_process_cq(struct nvme_queue *nvmeq, u16 *start, - u16 *end, int tag) +static inline int nvme_process_cq(struct nvme_queue *nvmeq, u16 *start, + u16 *end, unsigned int tag) { - bool found = false; + int found = 0; *start = nvmeq->cq_head; - while (!found && nvme_cqe_pending(nvmeq)) { - if (nvmeq->cqes[nvmeq->cq_head].command_id == tag) - found = true; + while (nvme_cqe_pending(nvmeq)) { + if (tag == -1U || nvmeq->cqes[nvmeq->cq_head].command_id == tag) + found++; nvme_update_cq_head(nvmeq); } *end = nvmeq->cq_head; @@ -943,12 +1049,16 @@ static irqreturn_t nvme_irq(int irq, void *data) irqreturn_t ret = IRQ_NONE; u16 start, end; - spin_lock(&nvmeq->cq_lock); + /* + * The rmb/wmb pair ensures we see all updates from a previous run of + * the irq handler, even if that was on another CPU. + */ + rmb(); if (nvmeq->cq_head != nvmeq->last_cq_head) ret = IRQ_HANDLED; nvme_process_cq(nvmeq, &start, &end, -1); nvmeq->last_cq_head = nvmeq->cq_head; - spin_unlock(&nvmeq->cq_lock); + wmb(); if (start != end) { nvme_complete_cqes(nvmeq, start, end); @@ -966,27 +1076,50 @@ static irqreturn_t nvme_irq_check(int irq, void *data) return IRQ_NONE; } -static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag) +/* + * Poll for completions any queue, including those not dedicated to polling. + * Can be called from any context. + */ +static int nvme_poll_irqdisable(struct nvme_queue *nvmeq, unsigned int tag) { + struct pci_dev *pdev = to_pci_dev(nvmeq->dev->dev); u16 start, end; - bool found; + int found; - if (!nvme_cqe_pending(nvmeq)) - return 0; - - spin_lock_irq(&nvmeq->cq_lock); - found = nvme_process_cq(nvmeq, &start, &end, tag); - spin_unlock_irq(&nvmeq->cq_lock); + /* + * For a poll queue we need to protect against the polling thread + * using the CQ lock. For normal interrupt driven threads we have + * to disable the interrupt to avoid racing with it. + */ + if (nvmeq->cq_vector == -1) { + spin_lock(&nvmeq->cq_poll_lock); + found = nvme_process_cq(nvmeq, &start, &end, tag); + spin_unlock(&nvmeq->cq_poll_lock); + } else { + disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); + found = nvme_process_cq(nvmeq, &start, &end, tag); + enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); + } nvme_complete_cqes(nvmeq, start, end); return found; } -static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) +static int nvme_poll(struct blk_mq_hw_ctx *hctx) { struct nvme_queue *nvmeq = hctx->driver_data; + u16 start, end; + bool found; + + if (!nvme_cqe_pending(nvmeq)) + return 0; + + spin_lock(&nvmeq->cq_poll_lock); + found = nvme_process_cq(nvmeq, &start, &end, -1); + spin_unlock(&nvmeq->cq_poll_lock); - return __nvme_poll(nvmeq, tag); + nvme_complete_cqes(nvmeq, start, end); + return found; } static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl) @@ -998,7 +1131,7 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl) memset(&c, 0, sizeof(c)); c.common.opcode = nvme_admin_async_event; c.common.command_id = NVME_AQ_BLK_MQ_DEPTH; - nvme_submit_cmd(nvmeq, &c); + nvme_submit_cmd(nvmeq, &c, true); } static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) @@ -1016,7 +1149,10 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, struct nvme_queue *nvmeq, s16 vector) { struct nvme_command c; - int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED; + int flags = NVME_QUEUE_PHYS_CONTIG; + + if (vector != -1) + flags |= NVME_CQ_IRQ_ENABLED; /* * Note: we (ab)use the fact that the prp fields survive if no data @@ -1028,7 +1164,10 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, c.create_cq.cqid = cpu_to_le16(qid); c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1); c.create_cq.cq_flags = cpu_to_le16(flags); - c.create_cq.irq_vector = cpu_to_le16(vector); + if (vector != -1) + c.create_cq.irq_vector = cpu_to_le16(vector); + else + c.create_cq.irq_vector = 0; return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0); } @@ -1157,7 +1296,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) /* * Did we miss an interrupt? */ - if (__nvme_poll(nvmeq, req->tag)) { + if (nvme_poll_irqdisable(nvmeq, req->tag)) { dev_warn(dev->ctrl.device, "I/O %d QID %d timeout, completion polled\n", req->tag, nvmeq->qid); @@ -1237,17 +1376,15 @@ static void nvme_free_queue(struct nvme_queue *nvmeq) { dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), (void *)nvmeq->cqes, nvmeq->cq_dma_addr); + if (!nvmeq->sq_cmds) + return; - if (nvmeq->sq_cmds) { - if (nvmeq->sq_cmds_is_io) - pci_free_p2pmem(to_pci_dev(nvmeq->q_dmadev), - nvmeq->sq_cmds, - SQ_SIZE(nvmeq->q_depth)); - else - dma_free_coherent(nvmeq->q_dmadev, - SQ_SIZE(nvmeq->q_depth), - nvmeq->sq_cmds, - nvmeq->sq_dma_addr); + if (test_and_clear_bit(NVMEQ_SQ_CMB, &nvmeq->flags)) { + pci_free_p2pmem(to_pci_dev(nvmeq->q_dmadev), + nvmeq->sq_cmds, SQ_SIZE(nvmeq->q_depth)); + } else { + dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), + nvmeq->sq_cmds, nvmeq->sq_dma_addr); } } @@ -1267,47 +1404,32 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest) */ static int nvme_suspend_queue(struct nvme_queue *nvmeq) { - int vector; - - spin_lock_irq(&nvmeq->cq_lock); - if (nvmeq->cq_vector == -1) { - spin_unlock_irq(&nvmeq->cq_lock); + if (!test_and_clear_bit(NVMEQ_ENABLED, &nvmeq->flags)) return 1; - } - vector = nvmeq->cq_vector; - nvmeq->dev->online_queues--; - nvmeq->cq_vector = -1; - spin_unlock_irq(&nvmeq->cq_lock); - /* - * Ensure that nvme_queue_rq() sees it ->cq_vector == -1 without - * having to grab the lock. - */ + /* ensure that nvme_queue_rq() sees NVMEQ_ENABLED cleared */ mb(); + nvmeq->dev->online_queues--; if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q) blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q); - - pci_free_irq(to_pci_dev(nvmeq->dev->dev), vector, nvmeq); - + if (nvmeq->cq_vector == -1) + return 0; + pci_free_irq(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector, nvmeq); + nvmeq->cq_vector = -1; return 0; } static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) { struct nvme_queue *nvmeq = &dev->queues[0]; - u16 start, end; if (shutdown) nvme_shutdown_ctrl(&dev->ctrl); else nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap); - spin_lock_irq(&nvmeq->cq_lock); - nvme_process_cq(nvmeq, &start, &end, -1); - spin_unlock_irq(&nvmeq->cq_lock); - - nvme_complete_cqes(nvmeq, start, end); + nvme_poll_irqdisable(nvmeq, -1); } static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues, @@ -1343,15 +1465,14 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(depth)); nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev, nvmeq->sq_cmds); - nvmeq->sq_cmds_is_io = true; - } - - if (!nvmeq->sq_cmds) { - nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth), - &nvmeq->sq_dma_addr, GFP_KERNEL); - nvmeq->sq_cmds_is_io = false; + if (nvmeq->sq_dma_addr) { + set_bit(NVMEQ_SQ_CMB, &nvmeq->flags); + return 0; + } } + nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth), + &nvmeq->sq_dma_addr, GFP_KERNEL); if (!nvmeq->sq_cmds) return -ENOMEM; return 0; @@ -1375,7 +1496,7 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) nvmeq->q_dmadev = dev->dev; nvmeq->dev = dev; spin_lock_init(&nvmeq->sq_lock); - spin_lock_init(&nvmeq->cq_lock); + spin_lock_init(&nvmeq->cq_poll_lock); nvmeq->cq_head = 0; nvmeq->cq_phase = 1; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; @@ -1411,28 +1532,34 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) { struct nvme_dev *dev = nvmeq->dev; - spin_lock_irq(&nvmeq->cq_lock); nvmeq->sq_tail = 0; + nvmeq->last_sq_tail = 0; nvmeq->cq_head = 0; nvmeq->cq_phase = 1; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth)); nvme_dbbuf_init(dev, nvmeq, qid); dev->online_queues++; - spin_unlock_irq(&nvmeq->cq_lock); + wmb(); /* ensure the first interrupt sees the initialization */ } -static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) +static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) { struct nvme_dev *dev = nvmeq->dev; int result; s16 vector; + clear_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags); + /* * A queue's vector matches the queue identifier unless the controller * has only one vector available. */ - vector = dev->num_vecs == 1 ? 0 : qid; + if (!polled) + vector = dev->num_vecs == 1 ? 0 : qid; + else + vector = -1; + result = adapter_alloc_cq(dev, qid, nvmeq, vector); if (result) return result; @@ -1443,17 +1570,16 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) else if (result) goto release_cq; - /* - * Set cq_vector after alloc cq/sq, otherwise nvme_suspend_queue will - * invoke free_irq for it and cause a 'Trying to free already-free IRQ - * xxx' warning if the create CQ/SQ command times out. - */ nvmeq->cq_vector = vector; nvme_init_queue(nvmeq, qid); - result = queue_request_irq(nvmeq); - if (result < 0) - goto release_sq; + if (vector != -1) { + result = queue_request_irq(nvmeq); + if (result < 0) + goto release_sq; + } + + set_bit(NVMEQ_ENABLED, &nvmeq->flags); return result; release_sq: @@ -1477,6 +1603,7 @@ static const struct blk_mq_ops nvme_mq_admin_ops = { static const struct blk_mq_ops nvme_mq_ops = { .queue_rq = nvme_queue_rq, .complete = nvme_pci_complete_rq, + .commit_rqs = nvme_commit_rqs, .init_hctx = nvme_init_hctx, .init_request = nvme_init_request, .map_queues = nvme_pci_map_queues, @@ -1602,12 +1729,13 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev) return result; } + set_bit(NVMEQ_ENABLED, &nvmeq->flags); return result; } static int nvme_create_io_queues(struct nvme_dev *dev) { - unsigned i, max; + unsigned i, max, rw_queues; int ret = 0; for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) { @@ -1618,8 +1746,17 @@ static int nvme_create_io_queues(struct nvme_dev *dev) } max = min(dev->max_qid, dev->ctrl.queue_count - 1); + if (max != 1 && dev->io_queues[HCTX_TYPE_POLL]) { + rw_queues = dev->io_queues[HCTX_TYPE_DEFAULT] + + dev->io_queues[HCTX_TYPE_READ]; + } else { + rw_queues = max; + } + for (i = dev->online_queues; i <= max; i++) { - ret = nvme_create_queue(&dev->queues[i], i); + bool polled = i > rw_queues; + + ret = nvme_create_queue(&dev->queues[i], i, polled); if (ret) break; } @@ -1891,6 +2028,110 @@ static int nvme_setup_host_mem(struct nvme_dev *dev) return ret; } +static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int irq_queues) +{ + unsigned int this_w_queues = write_queues; + + /* + * Setup read/write queue split + */ + if (irq_queues == 1) { + dev->io_queues[HCTX_TYPE_DEFAULT] = 1; + dev->io_queues[HCTX_TYPE_READ] = 0; + return; + } + + /* + * If 'write_queues' is set, ensure it leaves room for at least + * one read queue + */ + if (this_w_queues >= irq_queues) + this_w_queues = irq_queues - 1; + + /* + * If 'write_queues' is set to zero, reads and writes will share + * a queue set. + */ + if (!this_w_queues) { + dev->io_queues[HCTX_TYPE_DEFAULT] = irq_queues; + dev->io_queues[HCTX_TYPE_READ] = 0; + } else { + dev->io_queues[HCTX_TYPE_DEFAULT] = this_w_queues; + dev->io_queues[HCTX_TYPE_READ] = irq_queues - this_w_queues; + } +} + +static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) +{ + struct pci_dev *pdev = to_pci_dev(dev->dev); + int irq_sets[2]; + struct irq_affinity affd = { + .pre_vectors = 1, + .nr_sets = ARRAY_SIZE(irq_sets), + .sets = irq_sets, + }; + int result = 0; + unsigned int irq_queues, this_p_queues; + + /* + * Poll queues don't need interrupts, but we need at least one IO + * queue left over for non-polled IO. + */ + this_p_queues = poll_queues; + if (this_p_queues >= nr_io_queues) { + this_p_queues = nr_io_queues - 1; + irq_queues = 1; + } else { + irq_queues = nr_io_queues - this_p_queues; + } + dev->io_queues[HCTX_TYPE_POLL] = this_p_queues; + + /* + * For irq sets, we have to ask for minvec == maxvec. This passes + * any reduction back to us, so we can adjust our queue counts and + * IRQ vector needs. + */ + do { + nvme_calc_io_queues(dev, irq_queues); + irq_sets[0] = dev->io_queues[HCTX_TYPE_DEFAULT]; + irq_sets[1] = dev->io_queues[HCTX_TYPE_READ]; + if (!irq_sets[1]) + affd.nr_sets = 1; + + /* + * If we got a failure and we're down to asking for just + * 1 + 1 queues, just ask for a single vector. We'll share + * that between the single IO queue and the admin queue. + */ + if (result >= 0 && irq_queues > 1) + irq_queues = irq_sets[0] + irq_sets[1] + 1; + + result = pci_alloc_irq_vectors_affinity(pdev, irq_queues, + irq_queues, + PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd); + + /* + * Need to reduce our vec counts. If we get ENOSPC, the + * platform should support mulitple vecs, we just need + * to decrease our ask. If we get EINVAL, the platform + * likely does not. Back down to ask for just one vector. + */ + if (result == -ENOSPC) { + irq_queues--; + if (!irq_queues) + return result; + continue; + } else if (result == -EINVAL) { + irq_queues = 1; + continue; + } else if (result <= 0) + return -EIO; + break; + } while (1); + + return result; +} + static int nvme_setup_io_queues(struct nvme_dev *dev) { struct nvme_queue *adminq = &dev->queues[0]; @@ -1898,17 +2139,15 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) int result, nr_io_queues; unsigned long size; - struct irq_affinity affd = { - .pre_vectors = 1 - }; - - nr_io_queues = num_possible_cpus(); + nr_io_queues = max_io_queues(); result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); if (result < 0) return result; if (nr_io_queues == 0) return 0; + + clear_bit(NVMEQ_ENABLED, &adminq->flags); if (dev->cmb_use_sqes) { result = nvme_cmb_qdepth(dev, nr_io_queues, @@ -1937,12 +2176,19 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) * setting up the full range we need. */ pci_free_irq_vectors(pdev); - result = pci_alloc_irq_vectors_affinity(pdev, 1, nr_io_queues + 1, - PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd); + + result = nvme_setup_irqs(dev, nr_io_queues); if (result <= 0) return -EIO; + dev->num_vecs = result; - dev->max_qid = max(result - 1, 1); + result = max(result - 1, 1); + dev->max_qid = result + dev->io_queues[HCTX_TYPE_POLL]; + + dev_info(dev->ctrl.device, "%d/%d/%d default/read/poll queues\n", + dev->io_queues[HCTX_TYPE_DEFAULT], + dev->io_queues[HCTX_TYPE_READ], + dev->io_queues[HCTX_TYPE_POLL]); /* * Should investigate if there's a performance win from allocating @@ -1956,6 +2202,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) adminq->cq_vector = -1; return result; } + set_bit(NVMEQ_ENABLED, &adminq->flags); return nvme_create_io_queues(dev); } @@ -1964,23 +2211,15 @@ static void nvme_del_queue_end(struct request *req, blk_status_t error) struct nvme_queue *nvmeq = req->end_io_data; blk_mq_free_request(req); - complete(&nvmeq->dev->ioq_wait); + complete(&nvmeq->delete_done); } static void nvme_del_cq_end(struct request *req, blk_status_t error) { struct nvme_queue *nvmeq = req->end_io_data; - u16 start, end; - - if (!error) { - unsigned long flags; - spin_lock_irqsave(&nvmeq->cq_lock, flags); - nvme_process_cq(nvmeq, &start, &end, -1); - spin_unlock_irqrestore(&nvmeq->cq_lock, flags); - - nvme_complete_cqes(nvmeq, start, end); - } + if (error) + set_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags); nvme_del_queue_end(req, error); } @@ -2002,37 +2241,44 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) req->timeout = ADMIN_TIMEOUT; req->end_io_data = nvmeq; + init_completion(&nvmeq->delete_done); blk_execute_rq_nowait(q, NULL, req, false, opcode == nvme_admin_delete_cq ? nvme_del_cq_end : nvme_del_queue_end); return 0; } -static void nvme_disable_io_queues(struct nvme_dev *dev) +static bool nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode) { - int pass, queues = dev->online_queues - 1; + int nr_queues = dev->online_queues - 1, sent = 0; unsigned long timeout; - u8 opcode = nvme_admin_delete_sq; - - for (pass = 0; pass < 2; pass++) { - int sent = 0, i = queues; - reinit_completion(&dev->ioq_wait); retry: - timeout = ADMIN_TIMEOUT; - for (; i > 0; i--, sent++) - if (nvme_delete_queue(&dev->queues[i], opcode)) - break; - - while (sent--) { - timeout = wait_for_completion_io_timeout(&dev->ioq_wait, timeout); - if (timeout == 0) - return; - if (i) - goto retry; - } - opcode = nvme_admin_delete_cq; + timeout = ADMIN_TIMEOUT; + while (nr_queues > 0) { + if (nvme_delete_queue(&dev->queues[nr_queues], opcode)) + break; + nr_queues--; + sent++; } + while (sent) { + struct nvme_queue *nvmeq = &dev->queues[nr_queues + sent]; + + timeout = wait_for_completion_io_timeout(&nvmeq->delete_done, + timeout); + if (timeout == 0) + return false; + + /* handle any remaining CQEs */ + if (opcode == nvme_admin_delete_cq && + !test_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags)) + nvme_poll_irqdisable(nvmeq, -1); + + sent--; + if (nr_queues) + goto retry; + } + return true; } /* @@ -2045,6 +2291,10 @@ static int nvme_dev_add(struct nvme_dev *dev) if (!dev->ctrl.tagset) { dev->tagset.ops = &nvme_mq_ops; dev->tagset.nr_hw_queues = dev->online_queues - 1; + dev->tagset.nr_maps = 2; /* default + read */ + if (dev->io_queues[HCTX_TYPE_POLL]) + dev->tagset.nr_maps++; + dev->tagset.nr_maps = HCTX_MAX_TYPES; dev->tagset.timeout = NVME_IO_TIMEOUT; dev->tagset.numa_node = dev_to_node(dev->dev); dev->tagset.queue_depth = @@ -2187,7 +2437,8 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) nvme_stop_queues(&dev->ctrl); if (!dead && dev->ctrl.queue_count > 0) { - nvme_disable_io_queues(dev); + if (nvme_disable_io_queues(dev, nvme_admin_delete_sq)) + nvme_disable_io_queues(dev, nvme_admin_delete_cq); nvme_disable_admin_queue(dev, shutdown); } for (i = dev->ctrl.queue_count - 1; i >= 0; i--) @@ -2491,8 +2742,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (!dev) return -ENOMEM; - dev->queues = kcalloc_node(num_possible_cpus() + 1, - sizeof(struct nvme_queue), GFP_KERNEL, node); + dev->queues = kcalloc_node(max_queue_count(), sizeof(struct nvme_queue), + GFP_KERNEL, node); if (!dev->queues) goto free; @@ -2506,7 +2757,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work); INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work); mutex_init(&dev->shutdown_lock); - init_completion(&dev->ioq_wait); result = nvme_setup_prp_pools(dev); if (result) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index ab6ec7295bf9..0a2fd2949ad7 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -162,6 +162,13 @@ static inline int nvme_rdma_queue_idx(struct nvme_rdma_queue *queue) return queue - queue->ctrl->queues; } +static bool nvme_rdma_poll_queue(struct nvme_rdma_queue *queue) +{ + return nvme_rdma_queue_idx(queue) > + queue->ctrl->ctrl.opts->nr_io_queues + + queue->ctrl->ctrl.opts->nr_write_queues; +} + static inline size_t nvme_rdma_inline_data_size(struct nvme_rdma_queue *queue) { return queue->cmnd_capsule_len - sizeof(struct nvme_command); @@ -440,6 +447,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) const int send_wr_factor = 3; /* MR, SEND, INV */ const int cq_factor = send_wr_factor + 1; /* + RECV */ int comp_vector, idx = nvme_rdma_queue_idx(queue); + enum ib_poll_context poll_ctx; int ret; queue->device = nvme_rdma_find_get_device(queue->cm_id); @@ -456,10 +464,16 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) */ comp_vector = idx == 0 ? idx : idx - 1; + /* Polling queues need direct cq polling context */ + if (nvme_rdma_poll_queue(queue)) + poll_ctx = IB_POLL_DIRECT; + else + poll_ctx = IB_POLL_SOFTIRQ; + /* +1 for ib_stop_cq */ queue->ib_cq = ib_alloc_cq(ibdev, queue, cq_factor * queue->queue_size + 1, - comp_vector, IB_POLL_SOFTIRQ); + comp_vector, poll_ctx); if (IS_ERR(queue->ib_cq)) { ret = PTR_ERR(queue->ib_cq); goto out_put_dev; @@ -595,15 +609,17 @@ static void nvme_rdma_stop_io_queues(struct nvme_rdma_ctrl *ctrl) static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx) { + struct nvme_rdma_queue *queue = &ctrl->queues[idx]; + bool poll = nvme_rdma_poll_queue(queue); int ret; if (idx) - ret = nvmf_connect_io_queue(&ctrl->ctrl, idx); + ret = nvmf_connect_io_queue(&ctrl->ctrl, idx, poll); else ret = nvmf_connect_admin_queue(&ctrl->ctrl); if (!ret) - set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[idx].flags); + set_bit(NVME_RDMA_Q_LIVE, &queue->flags); else dev_info(ctrl->ctrl.device, "failed to connect queue: %d ret=%d\n", idx, ret); @@ -645,6 +661,9 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl) nr_io_queues = min_t(unsigned int, nr_io_queues, ibdev->num_comp_vectors); + nr_io_queues += min(opts->nr_write_queues, num_online_cpus()); + nr_io_queues += min(opts->nr_poll_queues, num_online_cpus()); + ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); if (ret) return ret; @@ -694,7 +713,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, set->ops = &nvme_rdma_admin_mq_ops; set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; set->reserved_tags = 2; /* connect + keep-alive */ - set->numa_node = NUMA_NO_NODE; + set->numa_node = nctrl->numa_node; set->cmd_size = sizeof(struct nvme_rdma_request) + SG_CHUNK_SIZE * sizeof(struct scatterlist); set->driver_data = ctrl; @@ -707,13 +726,14 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, set->ops = &nvme_rdma_mq_ops; set->queue_depth = nctrl->sqsize + 1; set->reserved_tags = 1; /* fabric connect */ - set->numa_node = NUMA_NO_NODE; + set->numa_node = nctrl->numa_node; set->flags = BLK_MQ_F_SHOULD_MERGE; set->cmd_size = sizeof(struct nvme_rdma_request) + SG_CHUNK_SIZE * sizeof(struct scatterlist); set->driver_data = ctrl; set->nr_hw_queues = nctrl->queue_count - 1; set->timeout = NVME_IO_TIMEOUT; + set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2; } ret = blk_mq_alloc_tag_set(set); @@ -763,6 +783,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, return error; ctrl->device = ctrl->queues[0].device; + ctrl->ctrl.numa_node = dev_to_node(ctrl->device->dev->dma_device); ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev); @@ -1411,12 +1432,11 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg) WARN_ON_ONCE(ret); } -static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, - struct nvme_completion *cqe, struct ib_wc *wc, int tag) +static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, + struct nvme_completion *cqe, struct ib_wc *wc) { struct request *rq; struct nvme_rdma_request *req; - int ret = 0; rq = blk_mq_tag_to_rq(nvme_rdma_tagset(queue), cqe->command_id); if (!rq) { @@ -1424,7 +1444,7 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, "tag 0x%x on QP %#x not found\n", cqe->command_id, queue->qp->qp_num); nvme_rdma_error_recovery(queue->ctrl); - return ret; + return; } req = blk_mq_rq_to_pdu(rq); @@ -1439,6 +1459,8 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, nvme_rdma_error_recovery(queue->ctrl); } } else if (req->mr) { + int ret; + ret = nvme_rdma_inv_rkey(queue, req); if (unlikely(ret < 0)) { dev_err(queue->ctrl->ctrl.device, @@ -1447,19 +1469,14 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, nvme_rdma_error_recovery(queue->ctrl); } /* the local invalidation completion will end the request */ - return 0; + return; } - if (refcount_dec_and_test(&req->ref)) { - if (rq->tag == tag) - ret = 1; + if (refcount_dec_and_test(&req->ref)) nvme_end_request(rq, req->status, req->result); - } - - return ret; } -static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag) +static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) { struct nvme_rdma_qe *qe = container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe); @@ -1467,11 +1484,10 @@ static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag) struct ib_device *ibdev = queue->device->dev; struct nvme_completion *cqe = qe->data; const size_t len = sizeof(struct nvme_completion); - int ret = 0; if (unlikely(wc->status != IB_WC_SUCCESS)) { nvme_rdma_wr_error(cq, wc, "RECV"); - return 0; + return; } ib_dma_sync_single_for_cpu(ibdev, qe->dma, len, DMA_FROM_DEVICE); @@ -1486,16 +1502,10 @@ static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag) nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status, &cqe->result); else - ret = nvme_rdma_process_nvme_rsp(queue, cqe, wc, tag); + nvme_rdma_process_nvme_rsp(queue, cqe, wc); ib_dma_sync_single_for_device(ibdev, qe->dma, len, DMA_FROM_DEVICE); nvme_rdma_post_recv(queue, qe); - return ret; -} - -static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) -{ - __nvme_rdma_recv_done(cq, wc, -1); } static int nvme_rdma_conn_established(struct nvme_rdma_queue *queue) @@ -1749,25 +1759,11 @@ err: return BLK_STS_IOERR; } -static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) +static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx) { struct nvme_rdma_queue *queue = hctx->driver_data; - struct ib_cq *cq = queue->ib_cq; - struct ib_wc wc; - int found = 0; - - while (ib_poll_cq(cq, 1, &wc) > 0) { - struct ib_cqe *cqe = wc.wr_cqe; - - if (cqe) { - if (cqe->done == nvme_rdma_recv_done) - found |= __nvme_rdma_recv_done(cq, &wc, tag); - else - cqe->done(cq, &wc); - } - } - return found; + return ib_process_cq_direct(queue->ib_cq, -1); } static void nvme_rdma_complete_rq(struct request *rq) @@ -1782,7 +1778,36 @@ static int nvme_rdma_map_queues(struct blk_mq_tag_set *set) { struct nvme_rdma_ctrl *ctrl = set->driver_data; - return blk_mq_rdma_map_queues(set, ctrl->device->dev, 0); + set->map[HCTX_TYPE_DEFAULT].queue_offset = 0; + set->map[HCTX_TYPE_READ].nr_queues = ctrl->ctrl.opts->nr_io_queues; + if (ctrl->ctrl.opts->nr_write_queues) { + /* separate read/write queues */ + set->map[HCTX_TYPE_DEFAULT].nr_queues = + ctrl->ctrl.opts->nr_write_queues; + set->map[HCTX_TYPE_READ].queue_offset = + ctrl->ctrl.opts->nr_write_queues; + } else { + /* mixed read/write queues */ + set->map[HCTX_TYPE_DEFAULT].nr_queues = + ctrl->ctrl.opts->nr_io_queues; + set->map[HCTX_TYPE_READ].queue_offset = 0; + } + blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_DEFAULT], + ctrl->device->dev, 0); + blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_READ], + ctrl->device->dev, 0); + + if (ctrl->ctrl.opts->nr_poll_queues) { + set->map[HCTX_TYPE_POLL].nr_queues = + ctrl->ctrl.opts->nr_poll_queues; + set->map[HCTX_TYPE_POLL].queue_offset = + ctrl->ctrl.opts->nr_io_queues; + if (ctrl->ctrl.opts->nr_write_queues) + set->map[HCTX_TYPE_POLL].queue_offset += + ctrl->ctrl.opts->nr_write_queues; + blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]); + } + return 0; } static const struct blk_mq_ops nvme_rdma_mq_ops = { @@ -1791,9 +1816,9 @@ static const struct blk_mq_ops nvme_rdma_mq_ops = { .init_request = nvme_rdma_init_request, .exit_request = nvme_rdma_exit_request, .init_hctx = nvme_rdma_init_hctx, - .poll = nvme_rdma_poll, .timeout = nvme_rdma_timeout, .map_queues = nvme_rdma_map_queues, + .poll = nvme_rdma_poll, }; static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { @@ -1938,7 +1963,8 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work); INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work); - ctrl->ctrl.queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */ + ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues + + opts->nr_poll_queues + 1; ctrl->ctrl.sqsize = opts->queue_size - 1; ctrl->ctrl.kato = opts->kato; @@ -1989,7 +2015,8 @@ static struct nvmf_transport_ops nvme_rdma_transport = { .module = THIS_MODULE, .required_opts = NVMF_OPT_TRADDR, .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY | - NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO, + NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO | + NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES, .create_ctrl = nvme_rdma_create_ctrl, }; diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c new file mode 100644 index 000000000000..de174912445e --- /dev/null +++ b/drivers/nvme/host/tcp.c @@ -0,0 +1,2278 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NVMe over Fabrics TCP host. + * Copyright (c) 2018 Lightbits Labs. All rights reserved. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/nvme-tcp.h> +#include <net/sock.h> +#include <net/tcp.h> +#include <linux/blk-mq.h> +#include <crypto/hash.h> + +#include "nvme.h" +#include "fabrics.h" + +struct nvme_tcp_queue; + +enum nvme_tcp_send_state { + NVME_TCP_SEND_CMD_PDU = 0, + NVME_TCP_SEND_H2C_PDU, + NVME_TCP_SEND_DATA, + NVME_TCP_SEND_DDGST, +}; + +struct nvme_tcp_request { + struct nvme_request req; + void *pdu; + struct nvme_tcp_queue *queue; + u32 data_len; + u32 pdu_len; + u32 pdu_sent; + u16 ttag; + struct list_head entry; + __le32 ddgst; + + struct bio *curr_bio; + struct iov_iter iter; + + /* send state */ + size_t offset; + size_t data_sent; + enum nvme_tcp_send_state state; +}; + +enum nvme_tcp_queue_flags { + NVME_TCP_Q_ALLOCATED = 0, + NVME_TCP_Q_LIVE = 1, +}; + +enum nvme_tcp_recv_state { + NVME_TCP_RECV_PDU = 0, + NVME_TCP_RECV_DATA, + NVME_TCP_RECV_DDGST, +}; + +struct nvme_tcp_ctrl; +struct nvme_tcp_queue { + struct socket *sock; + struct work_struct io_work; + int io_cpu; + + spinlock_t lock; + struct list_head send_list; + + /* recv state */ + void *pdu; + int pdu_remaining; + int pdu_offset; + size_t data_remaining; + size_t ddgst_remaining; + + /* send state */ + struct nvme_tcp_request *request; + + int queue_size; + size_t cmnd_capsule_len; + struct nvme_tcp_ctrl *ctrl; + unsigned long flags; + bool rd_enabled; + + bool hdr_digest; + bool data_digest; + struct ahash_request *rcv_hash; + struct ahash_request *snd_hash; + __le32 exp_ddgst; + __le32 recv_ddgst; + + struct page_frag_cache pf_cache; + + void (*state_change)(struct sock *); + void (*data_ready)(struct sock *); + void (*write_space)(struct sock *); +}; + +struct nvme_tcp_ctrl { + /* read only in the hot path */ + struct nvme_tcp_queue *queues; + struct blk_mq_tag_set tag_set; + + /* other member variables */ + struct list_head list; + struct blk_mq_tag_set admin_tag_set; + struct sockaddr_storage addr; + struct sockaddr_storage src_addr; + struct nvme_ctrl ctrl; + + struct work_struct err_work; + struct delayed_work connect_work; + struct nvme_tcp_request async_req; +}; + +static LIST_HEAD(nvme_tcp_ctrl_list); +static DEFINE_MUTEX(nvme_tcp_ctrl_mutex); +static struct workqueue_struct *nvme_tcp_wq; +static struct blk_mq_ops nvme_tcp_mq_ops; +static struct blk_mq_ops nvme_tcp_admin_mq_ops; + +static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl) +{ + return container_of(ctrl, struct nvme_tcp_ctrl, ctrl); +} + +static inline int nvme_tcp_queue_id(struct nvme_tcp_queue *queue) +{ + return queue - queue->ctrl->queues; +} + +static inline struct blk_mq_tags *nvme_tcp_tagset(struct nvme_tcp_queue *queue) +{ + u32 queue_idx = nvme_tcp_queue_id(queue); + + if (queue_idx == 0) + return queue->ctrl->admin_tag_set.tags[queue_idx]; + return queue->ctrl->tag_set.tags[queue_idx - 1]; +} + +static inline u8 nvme_tcp_hdgst_len(struct nvme_tcp_queue *queue) +{ + return queue->hdr_digest ? NVME_TCP_DIGEST_LENGTH : 0; +} + +static inline u8 nvme_tcp_ddgst_len(struct nvme_tcp_queue *queue) +{ + return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0; +} + +static inline size_t nvme_tcp_inline_data_size(struct nvme_tcp_queue *queue) +{ + return queue->cmnd_capsule_len - sizeof(struct nvme_command); +} + +static inline bool nvme_tcp_async_req(struct nvme_tcp_request *req) +{ + return req == &req->queue->ctrl->async_req; +} + +static inline bool nvme_tcp_has_inline_data(struct nvme_tcp_request *req) +{ + struct request *rq; + unsigned int bytes; + + if (unlikely(nvme_tcp_async_req(req))) + return false; /* async events don't have a request */ + + rq = blk_mq_rq_from_pdu(req); + bytes = blk_rq_payload_bytes(rq); + + return rq_data_dir(rq) == WRITE && bytes && + bytes <= nvme_tcp_inline_data_size(req->queue); +} + +static inline struct page *nvme_tcp_req_cur_page(struct nvme_tcp_request *req) +{ + return req->iter.bvec->bv_page; +} + +static inline size_t nvme_tcp_req_cur_offset(struct nvme_tcp_request *req) +{ + return req->iter.bvec->bv_offset + req->iter.iov_offset; +} + +static inline size_t nvme_tcp_req_cur_length(struct nvme_tcp_request *req) +{ + return min_t(size_t, req->iter.bvec->bv_len - req->iter.iov_offset, + req->pdu_len - req->pdu_sent); +} + +static inline size_t nvme_tcp_req_offset(struct nvme_tcp_request *req) +{ + return req->iter.iov_offset; +} + +static inline size_t nvme_tcp_pdu_data_left(struct nvme_tcp_request *req) +{ + return rq_data_dir(blk_mq_rq_from_pdu(req)) == WRITE ? + req->pdu_len - req->pdu_sent : 0; +} + +static inline size_t nvme_tcp_pdu_last_send(struct nvme_tcp_request *req, + int len) +{ + return nvme_tcp_pdu_data_left(req) <= len; +} + +static void nvme_tcp_init_iter(struct nvme_tcp_request *req, + unsigned int dir) +{ + struct request *rq = blk_mq_rq_from_pdu(req); + struct bio_vec *vec; + unsigned int size; + int nsegs; + size_t offset; + + if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) { + vec = &rq->special_vec; + nsegs = 1; + size = blk_rq_payload_bytes(rq); + offset = 0; + } else { + struct bio *bio = req->curr_bio; + + vec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); + nsegs = bio_segments(bio); + size = bio->bi_iter.bi_size; + offset = bio->bi_iter.bi_bvec_done; + } + + iov_iter_bvec(&req->iter, dir, vec, nsegs, size); + req->iter.iov_offset = offset; +} + +static inline void nvme_tcp_advance_req(struct nvme_tcp_request *req, + int len) +{ + req->data_sent += len; + req->pdu_sent += len; + iov_iter_advance(&req->iter, len); + if (!iov_iter_count(&req->iter) && + req->data_sent < req->data_len) { + req->curr_bio = req->curr_bio->bi_next; + nvme_tcp_init_iter(req, WRITE); + } +} + +static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req) +{ + struct nvme_tcp_queue *queue = req->queue; + + spin_lock(&queue->lock); + list_add_tail(&req->entry, &queue->send_list); + spin_unlock(&queue->lock); + + queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); +} + +static inline struct nvme_tcp_request * +nvme_tcp_fetch_request(struct nvme_tcp_queue *queue) +{ + struct nvme_tcp_request *req; + + spin_lock(&queue->lock); + req = list_first_entry_or_null(&queue->send_list, + struct nvme_tcp_request, entry); + if (req) + list_del(&req->entry); + spin_unlock(&queue->lock); + + return req; +} + +static inline void nvme_tcp_ddgst_final(struct ahash_request *hash, + __le32 *dgst) +{ + ahash_request_set_crypt(hash, NULL, (u8 *)dgst, 0); + crypto_ahash_final(hash); +} + +static inline void nvme_tcp_ddgst_update(struct ahash_request *hash, + struct page *page, off_t off, size_t len) +{ + struct scatterlist sg; + + sg_init_marker(&sg, 1); + sg_set_page(&sg, page, len, off); + ahash_request_set_crypt(hash, &sg, NULL, len); + crypto_ahash_update(hash); +} + +static inline void nvme_tcp_hdgst(struct ahash_request *hash, + void *pdu, size_t len) +{ + struct scatterlist sg; + + sg_init_one(&sg, pdu, len); + ahash_request_set_crypt(hash, &sg, pdu + len, len); + crypto_ahash_digest(hash); +} + +static int nvme_tcp_verify_hdgst(struct nvme_tcp_queue *queue, + void *pdu, size_t pdu_len) +{ + struct nvme_tcp_hdr *hdr = pdu; + __le32 recv_digest; + __le32 exp_digest; + + if (unlikely(!(hdr->flags & NVME_TCP_F_HDGST))) { + dev_err(queue->ctrl->ctrl.device, + "queue %d: header digest flag is cleared\n", + nvme_tcp_queue_id(queue)); + return -EPROTO; + } + + recv_digest = *(__le32 *)(pdu + hdr->hlen); + nvme_tcp_hdgst(queue->rcv_hash, pdu, pdu_len); + exp_digest = *(__le32 *)(pdu + hdr->hlen); + if (recv_digest != exp_digest) { + dev_err(queue->ctrl->ctrl.device, + "header digest error: recv %#x expected %#x\n", + le32_to_cpu(recv_digest), le32_to_cpu(exp_digest)); + return -EIO; + } + + return 0; +} + +static int nvme_tcp_check_ddgst(struct nvme_tcp_queue *queue, void *pdu) +{ + struct nvme_tcp_hdr *hdr = pdu; + u8 digest_len = nvme_tcp_hdgst_len(queue); + u32 len; + + len = le32_to_cpu(hdr->plen) - hdr->hlen - + ((hdr->flags & NVME_TCP_F_HDGST) ? digest_len : 0); + + if (unlikely(len && !(hdr->flags & NVME_TCP_F_DDGST))) { + dev_err(queue->ctrl->ctrl.device, + "queue %d: data digest flag is cleared\n", + nvme_tcp_queue_id(queue)); + return -EPROTO; + } + crypto_ahash_init(queue->rcv_hash); + + return 0; +} + +static void nvme_tcp_exit_request(struct blk_mq_tag_set *set, + struct request *rq, unsigned int hctx_idx) +{ + struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); + + page_frag_free(req->pdu); +} + +static int nvme_tcp_init_request(struct blk_mq_tag_set *set, + struct request *rq, unsigned int hctx_idx, + unsigned int numa_node) +{ + struct nvme_tcp_ctrl *ctrl = set->driver_data; + struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); + int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; + struct nvme_tcp_queue *queue = &ctrl->queues[queue_idx]; + u8 hdgst = nvme_tcp_hdgst_len(queue); + + req->pdu = page_frag_alloc(&queue->pf_cache, + sizeof(struct nvme_tcp_cmd_pdu) + hdgst, + GFP_KERNEL | __GFP_ZERO); + if (!req->pdu) + return -ENOMEM; + + req->queue = queue; + nvme_req(rq)->ctrl = &ctrl->ctrl; + + return 0; +} + +static int nvme_tcp_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, + unsigned int hctx_idx) +{ + struct nvme_tcp_ctrl *ctrl = data; + struct nvme_tcp_queue *queue = &ctrl->queues[hctx_idx + 1]; + + hctx->driver_data = queue; + return 0; +} + +static int nvme_tcp_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, + unsigned int hctx_idx) +{ + struct nvme_tcp_ctrl *ctrl = data; + struct nvme_tcp_queue *queue = &ctrl->queues[0]; + + hctx->driver_data = queue; + return 0; +} + +static enum nvme_tcp_recv_state +nvme_tcp_recv_state(struct nvme_tcp_queue *queue) +{ + return (queue->pdu_remaining) ? NVME_TCP_RECV_PDU : + (queue->ddgst_remaining) ? NVME_TCP_RECV_DDGST : + NVME_TCP_RECV_DATA; +} + +static void nvme_tcp_init_recv_ctx(struct nvme_tcp_queue *queue) +{ + queue->pdu_remaining = sizeof(struct nvme_tcp_rsp_pdu) + + nvme_tcp_hdgst_len(queue); + queue->pdu_offset = 0; + queue->data_remaining = -1; + queue->ddgst_remaining = 0; +} + +static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl) +{ + if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) + return; + + queue_work(nvme_wq, &to_tcp_ctrl(ctrl)->err_work); +} + +static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue, + struct nvme_completion *cqe) +{ + struct request *rq; + + rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), cqe->command_id); + if (!rq) { + dev_err(queue->ctrl->ctrl.device, + "queue %d tag 0x%x not found\n", + nvme_tcp_queue_id(queue), cqe->command_id); + nvme_tcp_error_recovery(&queue->ctrl->ctrl); + return -EINVAL; + } + + nvme_end_request(rq, cqe->status, cqe->result); + + return 0; +} + +static int nvme_tcp_handle_c2h_data(struct nvme_tcp_queue *queue, + struct nvme_tcp_data_pdu *pdu) +{ + struct request *rq; + + rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id); + if (!rq) { + dev_err(queue->ctrl->ctrl.device, + "queue %d tag %#x not found\n", + nvme_tcp_queue_id(queue), pdu->command_id); + return -ENOENT; + } + + if (!blk_rq_payload_bytes(rq)) { + dev_err(queue->ctrl->ctrl.device, + "queue %d tag %#x unexpected data\n", + nvme_tcp_queue_id(queue), rq->tag); + return -EIO; + } + + queue->data_remaining = le32_to_cpu(pdu->data_length); + + return 0; + +} + +static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue, + struct nvme_tcp_rsp_pdu *pdu) +{ + struct nvme_completion *cqe = &pdu->cqe; + int ret = 0; + + /* + * AEN requests are special as they don't time out and can + * survive any kind of queue freeze and often don't respond to + * aborts. We don't even bother to allocate a struct request + * for them but rather special case them here. + */ + if (unlikely(nvme_tcp_queue_id(queue) == 0 && + cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH)) + nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status, + &cqe->result); + else + ret = nvme_tcp_process_nvme_cqe(queue, cqe); + + return ret; +} + +static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req, + struct nvme_tcp_r2t_pdu *pdu) +{ + struct nvme_tcp_data_pdu *data = req->pdu; + struct nvme_tcp_queue *queue = req->queue; + struct request *rq = blk_mq_rq_from_pdu(req); + u8 hdgst = nvme_tcp_hdgst_len(queue); + u8 ddgst = nvme_tcp_ddgst_len(queue); + + req->pdu_len = le32_to_cpu(pdu->r2t_length); + req->pdu_sent = 0; + + if (unlikely(req->data_sent + req->pdu_len > req->data_len)) { + dev_err(queue->ctrl->ctrl.device, + "req %d r2t len %u exceeded data len %u (%zu sent)\n", + rq->tag, req->pdu_len, req->data_len, + req->data_sent); + return -EPROTO; + } + + if (unlikely(le32_to_cpu(pdu->r2t_offset) < req->data_sent)) { + dev_err(queue->ctrl->ctrl.device, + "req %d unexpected r2t offset %u (expected %zu)\n", + rq->tag, le32_to_cpu(pdu->r2t_offset), + req->data_sent); + return -EPROTO; + } + + memset(data, 0, sizeof(*data)); + data->hdr.type = nvme_tcp_h2c_data; + data->hdr.flags = NVME_TCP_F_DATA_LAST; + if (queue->hdr_digest) + data->hdr.flags |= NVME_TCP_F_HDGST; + if (queue->data_digest) + data->hdr.flags |= NVME_TCP_F_DDGST; + data->hdr.hlen = sizeof(*data); + data->hdr.pdo = data->hdr.hlen + hdgst; + data->hdr.plen = + cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst); + data->ttag = pdu->ttag; + data->command_id = rq->tag; + data->data_offset = cpu_to_le32(req->data_sent); + data->data_length = cpu_to_le32(req->pdu_len); + return 0; +} + +static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue, + struct nvme_tcp_r2t_pdu *pdu) +{ + struct nvme_tcp_request *req; + struct request *rq; + int ret; + + rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id); + if (!rq) { + dev_err(queue->ctrl->ctrl.device, + "queue %d tag %#x not found\n", + nvme_tcp_queue_id(queue), pdu->command_id); + return -ENOENT; + } + req = blk_mq_rq_to_pdu(rq); + + ret = nvme_tcp_setup_h2c_data_pdu(req, pdu); + if (unlikely(ret)) + return ret; + + req->state = NVME_TCP_SEND_H2C_PDU; + req->offset = 0; + + nvme_tcp_queue_request(req); + + return 0; +} + +static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb, + unsigned int *offset, size_t *len) +{ + struct nvme_tcp_hdr *hdr; + char *pdu = queue->pdu; + size_t rcv_len = min_t(size_t, *len, queue->pdu_remaining); + int ret; + + ret = skb_copy_bits(skb, *offset, + &pdu[queue->pdu_offset], rcv_len); + if (unlikely(ret)) + return ret; + + queue->pdu_remaining -= rcv_len; + queue->pdu_offset += rcv_len; + *offset += rcv_len; + *len -= rcv_len; + if (queue->pdu_remaining) + return 0; + + hdr = queue->pdu; + if (queue->hdr_digest) { + ret = nvme_tcp_verify_hdgst(queue, queue->pdu, hdr->hlen); + if (unlikely(ret)) + return ret; + } + + + if (queue->data_digest) { + ret = nvme_tcp_check_ddgst(queue, queue->pdu); + if (unlikely(ret)) + return ret; + } + + switch (hdr->type) { + case nvme_tcp_c2h_data: + ret = nvme_tcp_handle_c2h_data(queue, (void *)queue->pdu); + break; + case nvme_tcp_rsp: + nvme_tcp_init_recv_ctx(queue); + ret = nvme_tcp_handle_comp(queue, (void *)queue->pdu); + break; + case nvme_tcp_r2t: + nvme_tcp_init_recv_ctx(queue); + ret = nvme_tcp_handle_r2t(queue, (void *)queue->pdu); + break; + default: + dev_err(queue->ctrl->ctrl.device, + "unsupported pdu type (%d)\n", hdr->type); + return -EINVAL; + } + + return ret; +} + +static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, + unsigned int *offset, size_t *len) +{ + struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu; + struct nvme_tcp_request *req; + struct request *rq; + + rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id); + if (!rq) { + dev_err(queue->ctrl->ctrl.device, + "queue %d tag %#x not found\n", + nvme_tcp_queue_id(queue), pdu->command_id); + return -ENOENT; + } + req = blk_mq_rq_to_pdu(rq); + + while (true) { + int recv_len, ret; + + recv_len = min_t(size_t, *len, queue->data_remaining); + if (!recv_len) + break; + + if (!iov_iter_count(&req->iter)) { + req->curr_bio = req->curr_bio->bi_next; + + /* + * If we don`t have any bios it means that controller + * sent more data than we requested, hence error + */ + if (!req->curr_bio) { + dev_err(queue->ctrl->ctrl.device, + "queue %d no space in request %#x", + nvme_tcp_queue_id(queue), rq->tag); + nvme_tcp_init_recv_ctx(queue); + return -EIO; + } + nvme_tcp_init_iter(req, READ); + } + + /* we can read only from what is left in this bio */ + recv_len = min_t(size_t, recv_len, + iov_iter_count(&req->iter)); + + if (queue->data_digest) + ret = skb_copy_and_hash_datagram_iter(skb, *offset, + &req->iter, recv_len, queue->rcv_hash); + else + ret = skb_copy_datagram_iter(skb, *offset, + &req->iter, recv_len); + if (ret) { + dev_err(queue->ctrl->ctrl.device, + "queue %d failed to copy request %#x data", + nvme_tcp_queue_id(queue), rq->tag); + return ret; + } + + *len -= recv_len; + *offset += recv_len; + queue->data_remaining -= recv_len; + } + + if (!queue->data_remaining) { + if (queue->data_digest) { + nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst); + queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH; + } else { + nvme_tcp_init_recv_ctx(queue); + } + } + + return 0; +} + +static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue, + struct sk_buff *skb, unsigned int *offset, size_t *len) +{ + char *ddgst = (char *)&queue->recv_ddgst; + size_t recv_len = min_t(size_t, *len, queue->ddgst_remaining); + off_t off = NVME_TCP_DIGEST_LENGTH - queue->ddgst_remaining; + int ret; + + ret = skb_copy_bits(skb, *offset, &ddgst[off], recv_len); + if (unlikely(ret)) + return ret; + + queue->ddgst_remaining -= recv_len; + *offset += recv_len; + *len -= recv_len; + if (queue->ddgst_remaining) + return 0; + + if (queue->recv_ddgst != queue->exp_ddgst) { + dev_err(queue->ctrl->ctrl.device, + "data digest error: recv %#x expected %#x\n", + le32_to_cpu(queue->recv_ddgst), + le32_to_cpu(queue->exp_ddgst)); + return -EIO; + } + + nvme_tcp_init_recv_ctx(queue); + return 0; +} + +static int nvme_tcp_recv_skb(read_descriptor_t *desc, struct sk_buff *skb, + unsigned int offset, size_t len) +{ + struct nvme_tcp_queue *queue = desc->arg.data; + size_t consumed = len; + int result; + + while (len) { + switch (nvme_tcp_recv_state(queue)) { + case NVME_TCP_RECV_PDU: + result = nvme_tcp_recv_pdu(queue, skb, &offset, &len); + break; + case NVME_TCP_RECV_DATA: + result = nvme_tcp_recv_data(queue, skb, &offset, &len); + break; + case NVME_TCP_RECV_DDGST: + result = nvme_tcp_recv_ddgst(queue, skb, &offset, &len); + break; + default: + result = -EFAULT; + } + if (result) { + dev_err(queue->ctrl->ctrl.device, + "receive failed: %d\n", result); + queue->rd_enabled = false; + nvme_tcp_error_recovery(&queue->ctrl->ctrl); + return result; + } + } + + return consumed; +} + +static void nvme_tcp_data_ready(struct sock *sk) +{ + struct nvme_tcp_queue *queue; + + read_lock(&sk->sk_callback_lock); + queue = sk->sk_user_data; + if (likely(queue && queue->rd_enabled)) + queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); + read_unlock(&sk->sk_callback_lock); +} + +static void nvme_tcp_write_space(struct sock *sk) +{ + struct nvme_tcp_queue *queue; + + read_lock_bh(&sk->sk_callback_lock); + queue = sk->sk_user_data; + if (likely(queue && sk_stream_is_writeable(sk))) { + clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); + } + read_unlock_bh(&sk->sk_callback_lock); +} + +static void nvme_tcp_state_change(struct sock *sk) +{ + struct nvme_tcp_queue *queue; + + read_lock(&sk->sk_callback_lock); + queue = sk->sk_user_data; + if (!queue) + goto done; + + switch (sk->sk_state) { + case TCP_CLOSE: + case TCP_CLOSE_WAIT: + case TCP_LAST_ACK: + case TCP_FIN_WAIT1: + case TCP_FIN_WAIT2: + /* fallthrough */ + nvme_tcp_error_recovery(&queue->ctrl->ctrl); + break; + default: + dev_info(queue->ctrl->ctrl.device, + "queue %d socket state %d\n", + nvme_tcp_queue_id(queue), sk->sk_state); + } + + queue->state_change(sk); +done: + read_unlock(&sk->sk_callback_lock); +} + +static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue) +{ + queue->request = NULL; +} + +static void nvme_tcp_fail_request(struct nvme_tcp_request *req) +{ + union nvme_result res = {}; + + nvme_end_request(blk_mq_rq_from_pdu(req), + cpu_to_le16(NVME_SC_DATA_XFER_ERROR), res); +} + +static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) +{ + struct nvme_tcp_queue *queue = req->queue; + + while (true) { + struct page *page = nvme_tcp_req_cur_page(req); + size_t offset = nvme_tcp_req_cur_offset(req); + size_t len = nvme_tcp_req_cur_length(req); + bool last = nvme_tcp_pdu_last_send(req, len); + int ret, flags = MSG_DONTWAIT; + + if (last && !queue->data_digest) + flags |= MSG_EOR; + else + flags |= MSG_MORE; + + ret = kernel_sendpage(queue->sock, page, offset, len, flags); + if (ret <= 0) + return ret; + + nvme_tcp_advance_req(req, ret); + if (queue->data_digest) + nvme_tcp_ddgst_update(queue->snd_hash, page, + offset, ret); + + /* fully successful last write*/ + if (last && ret == len) { + if (queue->data_digest) { + nvme_tcp_ddgst_final(queue->snd_hash, + &req->ddgst); + req->state = NVME_TCP_SEND_DDGST; + req->offset = 0; + } else { + nvme_tcp_done_send_req(queue); + } + return 1; + } + } + return -EAGAIN; +} + +static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req) +{ + struct nvme_tcp_queue *queue = req->queue; + struct nvme_tcp_cmd_pdu *pdu = req->pdu; + bool inline_data = nvme_tcp_has_inline_data(req); + int flags = MSG_DONTWAIT | (inline_data ? MSG_MORE : MSG_EOR); + u8 hdgst = nvme_tcp_hdgst_len(queue); + int len = sizeof(*pdu) + hdgst - req->offset; + int ret; + + if (queue->hdr_digest && !req->offset) + nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); + + ret = kernel_sendpage(queue->sock, virt_to_page(pdu), + offset_in_page(pdu) + req->offset, len, flags); + if (unlikely(ret <= 0)) + return ret; + + len -= ret; + if (!len) { + if (inline_data) { + req->state = NVME_TCP_SEND_DATA; + if (queue->data_digest) + crypto_ahash_init(queue->snd_hash); + nvme_tcp_init_iter(req, WRITE); + } else { + nvme_tcp_done_send_req(queue); + } + return 1; + } + req->offset += ret; + + return -EAGAIN; +} + +static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req) +{ + struct nvme_tcp_queue *queue = req->queue; + struct nvme_tcp_data_pdu *pdu = req->pdu; + u8 hdgst = nvme_tcp_hdgst_len(queue); + int len = sizeof(*pdu) - req->offset + hdgst; + int ret; + + if (queue->hdr_digest && !req->offset) + nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); + + ret = kernel_sendpage(queue->sock, virt_to_page(pdu), + offset_in_page(pdu) + req->offset, len, + MSG_DONTWAIT | MSG_MORE); + if (unlikely(ret <= 0)) + return ret; + + len -= ret; + if (!len) { + req->state = NVME_TCP_SEND_DATA; + if (queue->data_digest) + crypto_ahash_init(queue->snd_hash); + if (!req->data_sent) + nvme_tcp_init_iter(req, WRITE); + return 1; + } + req->offset += ret; + + return -EAGAIN; +} + +static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) +{ + struct nvme_tcp_queue *queue = req->queue; + int ret; + struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_EOR }; + struct kvec iov = { + .iov_base = &req->ddgst + req->offset, + .iov_len = NVME_TCP_DIGEST_LENGTH - req->offset + }; + + ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); + if (unlikely(ret <= 0)) + return ret; + + if (req->offset + ret == NVME_TCP_DIGEST_LENGTH) { + nvme_tcp_done_send_req(queue); + return 1; + } + + req->offset += ret; + return -EAGAIN; +} + +static int nvme_tcp_try_send(struct nvme_tcp_queue *queue) +{ + struct nvme_tcp_request *req; + int ret = 1; + + if (!queue->request) { + queue->request = nvme_tcp_fetch_request(queue); + if (!queue->request) + return 0; + } + req = queue->request; + + if (req->state == NVME_TCP_SEND_CMD_PDU) { + ret = nvme_tcp_try_send_cmd_pdu(req); + if (ret <= 0) + goto done; + if (!nvme_tcp_has_inline_data(req)) + return ret; + } + + if (req->state == NVME_TCP_SEND_H2C_PDU) { + ret = nvme_tcp_try_send_data_pdu(req); + if (ret <= 0) + goto done; + } + + if (req->state == NVME_TCP_SEND_DATA) { + ret = nvme_tcp_try_send_data(req); + if (ret <= 0) + goto done; + } + + if (req->state == NVME_TCP_SEND_DDGST) + ret = nvme_tcp_try_send_ddgst(req); +done: + if (ret == -EAGAIN) + ret = 0; + return ret; +} + +static int nvme_tcp_try_recv(struct nvme_tcp_queue *queue) +{ + struct sock *sk = queue->sock->sk; + read_descriptor_t rd_desc; + int consumed; + + rd_desc.arg.data = queue; + rd_desc.count = 1; + lock_sock(sk); + consumed = tcp_read_sock(sk, &rd_desc, nvme_tcp_recv_skb); + release_sock(sk); + return consumed; +} + +static void nvme_tcp_io_work(struct work_struct *w) +{ + struct nvme_tcp_queue *queue = + container_of(w, struct nvme_tcp_queue, io_work); + unsigned long start = jiffies + msecs_to_jiffies(1); + + do { + bool pending = false; + int result; + + result = nvme_tcp_try_send(queue); + if (result > 0) { + pending = true; + } else if (unlikely(result < 0)) { + dev_err(queue->ctrl->ctrl.device, + "failed to send request %d\n", result); + if (result != -EPIPE) + nvme_tcp_fail_request(queue->request); + nvme_tcp_done_send_req(queue); + return; + } + + result = nvme_tcp_try_recv(queue); + if (result > 0) + pending = true; + + if (!pending) + return; + + } while (time_after(jiffies, start)); /* quota is exhausted */ + + queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); +} + +static void nvme_tcp_free_crypto(struct nvme_tcp_queue *queue) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(queue->rcv_hash); + + ahash_request_free(queue->rcv_hash); + ahash_request_free(queue->snd_hash); + crypto_free_ahash(tfm); +} + +static int nvme_tcp_alloc_crypto(struct nvme_tcp_queue *queue) +{ + struct crypto_ahash *tfm; + + tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + queue->snd_hash = ahash_request_alloc(tfm, GFP_KERNEL); + if (!queue->snd_hash) + goto free_tfm; + ahash_request_set_callback(queue->snd_hash, 0, NULL, NULL); + + queue->rcv_hash = ahash_request_alloc(tfm, GFP_KERNEL); + if (!queue->rcv_hash) + goto free_snd_hash; + ahash_request_set_callback(queue->rcv_hash, 0, NULL, NULL); + + return 0; +free_snd_hash: + ahash_request_free(queue->snd_hash); +free_tfm: + crypto_free_ahash(tfm); + return -ENOMEM; +} + +static void nvme_tcp_free_async_req(struct nvme_tcp_ctrl *ctrl) +{ + struct nvme_tcp_request *async = &ctrl->async_req; + + page_frag_free(async->pdu); +} + +static int nvme_tcp_alloc_async_req(struct nvme_tcp_ctrl *ctrl) +{ + struct nvme_tcp_queue *queue = &ctrl->queues[0]; + struct nvme_tcp_request *async = &ctrl->async_req; + u8 hdgst = nvme_tcp_hdgst_len(queue); + + async->pdu = page_frag_alloc(&queue->pf_cache, + sizeof(struct nvme_tcp_cmd_pdu) + hdgst, + GFP_KERNEL | __GFP_ZERO); + if (!async->pdu) + return -ENOMEM; + + async->queue = &ctrl->queues[0]; + return 0; +} + +static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid) +{ + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); + struct nvme_tcp_queue *queue = &ctrl->queues[qid]; + + if (!test_and_clear_bit(NVME_TCP_Q_ALLOCATED, &queue->flags)) + return; + + if (queue->hdr_digest || queue->data_digest) + nvme_tcp_free_crypto(queue); + + sock_release(queue->sock); + kfree(queue->pdu); +} + +static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) +{ + struct nvme_tcp_icreq_pdu *icreq; + struct nvme_tcp_icresp_pdu *icresp; + struct msghdr msg = {}; + struct kvec iov; + bool ctrl_hdgst, ctrl_ddgst; + int ret; + + icreq = kzalloc(sizeof(*icreq), GFP_KERNEL); + if (!icreq) + return -ENOMEM; + + icresp = kzalloc(sizeof(*icresp), GFP_KERNEL); + if (!icresp) { + ret = -ENOMEM; + goto free_icreq; + } + + icreq->hdr.type = nvme_tcp_icreq; + icreq->hdr.hlen = sizeof(*icreq); + icreq->hdr.pdo = 0; + icreq->hdr.plen = cpu_to_le32(icreq->hdr.hlen); + icreq->pfv = cpu_to_le16(NVME_TCP_PFV_1_0); + icreq->maxr2t = 0; /* single inflight r2t supported */ + icreq->hpda = 0; /* no alignment constraint */ + if (queue->hdr_digest) + icreq->digest |= NVME_TCP_HDR_DIGEST_ENABLE; + if (queue->data_digest) + icreq->digest |= NVME_TCP_DATA_DIGEST_ENABLE; + + iov.iov_base = icreq; + iov.iov_len = sizeof(*icreq); + ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); + if (ret < 0) + goto free_icresp; + + memset(&msg, 0, sizeof(msg)); + iov.iov_base = icresp; + iov.iov_len = sizeof(*icresp); + ret = kernel_recvmsg(queue->sock, &msg, &iov, 1, + iov.iov_len, msg.msg_flags); + if (ret < 0) + goto free_icresp; + + ret = -EINVAL; + if (icresp->hdr.type != nvme_tcp_icresp) { + pr_err("queue %d: bad type returned %d\n", + nvme_tcp_queue_id(queue), icresp->hdr.type); + goto free_icresp; + } + + if (le32_to_cpu(icresp->hdr.plen) != sizeof(*icresp)) { + pr_err("queue %d: bad pdu length returned %d\n", + nvme_tcp_queue_id(queue), icresp->hdr.plen); + goto free_icresp; + } + + if (icresp->pfv != NVME_TCP_PFV_1_0) { + pr_err("queue %d: bad pfv returned %d\n", + nvme_tcp_queue_id(queue), icresp->pfv); + goto free_icresp; + } + + ctrl_ddgst = !!(icresp->digest & NVME_TCP_DATA_DIGEST_ENABLE); + if ((queue->data_digest && !ctrl_ddgst) || + (!queue->data_digest && ctrl_ddgst)) { + pr_err("queue %d: data digest mismatch host: %s ctrl: %s\n", + nvme_tcp_queue_id(queue), + queue->data_digest ? "enabled" : "disabled", + ctrl_ddgst ? "enabled" : "disabled"); + goto free_icresp; + } + + ctrl_hdgst = !!(icresp->digest & NVME_TCP_HDR_DIGEST_ENABLE); + if ((queue->hdr_digest && !ctrl_hdgst) || + (!queue->hdr_digest && ctrl_hdgst)) { + pr_err("queue %d: header digest mismatch host: %s ctrl: %s\n", + nvme_tcp_queue_id(queue), + queue->hdr_digest ? "enabled" : "disabled", + ctrl_hdgst ? "enabled" : "disabled"); + goto free_icresp; + } + + if (icresp->cpda != 0) { + pr_err("queue %d: unsupported cpda returned %d\n", + nvme_tcp_queue_id(queue), icresp->cpda); + goto free_icresp; + } + + ret = 0; +free_icresp: + kfree(icresp); +free_icreq: + kfree(icreq); + return ret; +} + +static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, + int qid, size_t queue_size) +{ + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); + struct nvme_tcp_queue *queue = &ctrl->queues[qid]; + struct linger sol = { .l_onoff = 1, .l_linger = 0 }; + int ret, opt, rcv_pdu_size, n; + + queue->ctrl = ctrl; + INIT_LIST_HEAD(&queue->send_list); + spin_lock_init(&queue->lock); + INIT_WORK(&queue->io_work, nvme_tcp_io_work); + queue->queue_size = queue_size; + + if (qid > 0) + queue->cmnd_capsule_len = ctrl->ctrl.ioccsz * 16; + else + queue->cmnd_capsule_len = sizeof(struct nvme_command) + + NVME_TCP_ADMIN_CCSZ; + + ret = sock_create(ctrl->addr.ss_family, SOCK_STREAM, + IPPROTO_TCP, &queue->sock); + if (ret) { + dev_err(ctrl->ctrl.device, + "failed to create socket: %d\n", ret); + return ret; + } + + /* Single syn retry */ + opt = 1; + ret = kernel_setsockopt(queue->sock, IPPROTO_TCP, TCP_SYNCNT, + (char *)&opt, sizeof(opt)); + if (ret) { + dev_err(ctrl->ctrl.device, + "failed to set TCP_SYNCNT sock opt %d\n", ret); + goto err_sock; + } + + /* Set TCP no delay */ + opt = 1; + ret = kernel_setsockopt(queue->sock, IPPROTO_TCP, + TCP_NODELAY, (char *)&opt, sizeof(opt)); + if (ret) { + dev_err(ctrl->ctrl.device, + "failed to set TCP_NODELAY sock opt %d\n", ret); + goto err_sock; + } + + /* + * Cleanup whatever is sitting in the TCP transmit queue on socket + * close. This is done to prevent stale data from being sent should + * the network connection be restored before TCP times out. + */ + ret = kernel_setsockopt(queue->sock, SOL_SOCKET, SO_LINGER, + (char *)&sol, sizeof(sol)); + if (ret) { + dev_err(ctrl->ctrl.device, + "failed to set SO_LINGER sock opt %d\n", ret); + goto err_sock; + } + + queue->sock->sk->sk_allocation = GFP_ATOMIC; + if (!qid) + n = 0; + else + n = (qid - 1) % num_online_cpus(); + queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false); + queue->request = NULL; + queue->data_remaining = 0; + queue->ddgst_remaining = 0; + queue->pdu_remaining = 0; + queue->pdu_offset = 0; + sk_set_memalloc(queue->sock->sk); + + if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) { + ret = kernel_bind(queue->sock, (struct sockaddr *)&ctrl->src_addr, + sizeof(ctrl->src_addr)); + if (ret) { + dev_err(ctrl->ctrl.device, + "failed to bind queue %d socket %d\n", + qid, ret); + goto err_sock; + } + } + + queue->hdr_digest = nctrl->opts->hdr_digest; + queue->data_digest = nctrl->opts->data_digest; + if (queue->hdr_digest || queue->data_digest) { + ret = nvme_tcp_alloc_crypto(queue); + if (ret) { + dev_err(ctrl->ctrl.device, + "failed to allocate queue %d crypto\n", qid); + goto err_sock; + } + } + + rcv_pdu_size = sizeof(struct nvme_tcp_rsp_pdu) + + nvme_tcp_hdgst_len(queue); + queue->pdu = kmalloc(rcv_pdu_size, GFP_KERNEL); + if (!queue->pdu) { + ret = -ENOMEM; + goto err_crypto; + } + + dev_dbg(ctrl->ctrl.device, "connecting queue %d\n", + nvme_tcp_queue_id(queue)); + + ret = kernel_connect(queue->sock, (struct sockaddr *)&ctrl->addr, + sizeof(ctrl->addr), 0); + if (ret) { + dev_err(ctrl->ctrl.device, + "failed to connect socket: %d\n", ret); + goto err_rcv_pdu; + } + + ret = nvme_tcp_init_connection(queue); + if (ret) + goto err_init_connect; + + queue->rd_enabled = true; + set_bit(NVME_TCP_Q_ALLOCATED, &queue->flags); + nvme_tcp_init_recv_ctx(queue); + + write_lock_bh(&queue->sock->sk->sk_callback_lock); + queue->sock->sk->sk_user_data = queue; + queue->state_change = queue->sock->sk->sk_state_change; + queue->data_ready = queue->sock->sk->sk_data_ready; + queue->write_space = queue->sock->sk->sk_write_space; + queue->sock->sk->sk_data_ready = nvme_tcp_data_ready; + queue->sock->sk->sk_state_change = nvme_tcp_state_change; + queue->sock->sk->sk_write_space = nvme_tcp_write_space; + write_unlock_bh(&queue->sock->sk->sk_callback_lock); + + return 0; + +err_init_connect: + kernel_sock_shutdown(queue->sock, SHUT_RDWR); +err_rcv_pdu: + kfree(queue->pdu); +err_crypto: + if (queue->hdr_digest || queue->data_digest) + nvme_tcp_free_crypto(queue); +err_sock: + sock_release(queue->sock); + queue->sock = NULL; + return ret; +} + +static void nvme_tcp_restore_sock_calls(struct nvme_tcp_queue *queue) +{ + struct socket *sock = queue->sock; + + write_lock_bh(&sock->sk->sk_callback_lock); + sock->sk->sk_user_data = NULL; + sock->sk->sk_data_ready = queue->data_ready; + sock->sk->sk_state_change = queue->state_change; + sock->sk->sk_write_space = queue->write_space; + write_unlock_bh(&sock->sk->sk_callback_lock); +} + +static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue) +{ + kernel_sock_shutdown(queue->sock, SHUT_RDWR); + nvme_tcp_restore_sock_calls(queue); + cancel_work_sync(&queue->io_work); +} + +static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) +{ + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); + struct nvme_tcp_queue *queue = &ctrl->queues[qid]; + + if (!test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags)) + return; + + __nvme_tcp_stop_queue(queue); +} + +static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx) +{ + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); + int ret; + + if (idx) + ret = nvmf_connect_io_queue(nctrl, idx, false); + else + ret = nvmf_connect_admin_queue(nctrl); + + if (!ret) { + set_bit(NVME_TCP_Q_LIVE, &ctrl->queues[idx].flags); + } else { + __nvme_tcp_stop_queue(&ctrl->queues[idx]); + dev_err(nctrl->device, + "failed to connect queue: %d ret=%d\n", idx, ret); + } + return ret; +} + +static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl, + bool admin) +{ + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); + struct blk_mq_tag_set *set; + int ret; + + if (admin) { + set = &ctrl->admin_tag_set; + memset(set, 0, sizeof(*set)); + set->ops = &nvme_tcp_admin_mq_ops; + set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; + set->reserved_tags = 2; /* connect + keep-alive */ + set->numa_node = NUMA_NO_NODE; + set->cmd_size = sizeof(struct nvme_tcp_request); + set->driver_data = ctrl; + set->nr_hw_queues = 1; + set->timeout = ADMIN_TIMEOUT; + } else { + set = &ctrl->tag_set; + memset(set, 0, sizeof(*set)); + set->ops = &nvme_tcp_mq_ops; + set->queue_depth = nctrl->sqsize + 1; + set->reserved_tags = 1; /* fabric connect */ + set->numa_node = NUMA_NO_NODE; + set->flags = BLK_MQ_F_SHOULD_MERGE; + set->cmd_size = sizeof(struct nvme_tcp_request); + set->driver_data = ctrl; + set->nr_hw_queues = nctrl->queue_count - 1; + set->timeout = NVME_IO_TIMEOUT; + set->nr_maps = 2 /* default + read */; + } + + ret = blk_mq_alloc_tag_set(set); + if (ret) + return ERR_PTR(ret); + + return set; +} + +static void nvme_tcp_free_admin_queue(struct nvme_ctrl *ctrl) +{ + if (to_tcp_ctrl(ctrl)->async_req.pdu) { + nvme_tcp_free_async_req(to_tcp_ctrl(ctrl)); + to_tcp_ctrl(ctrl)->async_req.pdu = NULL; + } + + nvme_tcp_free_queue(ctrl, 0); +} + +static void nvme_tcp_free_io_queues(struct nvme_ctrl *ctrl) +{ + int i; + + for (i = 1; i < ctrl->queue_count; i++) + nvme_tcp_free_queue(ctrl, i); +} + +static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl) +{ + int i; + + for (i = 1; i < ctrl->queue_count; i++) + nvme_tcp_stop_queue(ctrl, i); +} + +static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl) +{ + int i, ret = 0; + + for (i = 1; i < ctrl->queue_count; i++) { + ret = nvme_tcp_start_queue(ctrl, i); + if (ret) + goto out_stop_queues; + } + + return 0; + +out_stop_queues: + for (i--; i >= 1; i--) + nvme_tcp_stop_queue(ctrl, i); + return ret; +} + +static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl) +{ + int ret; + + ret = nvme_tcp_alloc_queue(ctrl, 0, NVME_AQ_DEPTH); + if (ret) + return ret; + + ret = nvme_tcp_alloc_async_req(to_tcp_ctrl(ctrl)); + if (ret) + goto out_free_queue; + + return 0; + +out_free_queue: + nvme_tcp_free_queue(ctrl, 0); + return ret; +} + +static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) +{ + int i, ret; + + for (i = 1; i < ctrl->queue_count; i++) { + ret = nvme_tcp_alloc_queue(ctrl, i, + ctrl->sqsize + 1); + if (ret) + goto out_free_queues; + } + + return 0; + +out_free_queues: + for (i--; i >= 1; i--) + nvme_tcp_free_queue(ctrl, i); + + return ret; +} + +static unsigned int nvme_tcp_nr_io_queues(struct nvme_ctrl *ctrl) +{ + unsigned int nr_io_queues; + + nr_io_queues = min(ctrl->opts->nr_io_queues, num_online_cpus()); + nr_io_queues += min(ctrl->opts->nr_write_queues, num_online_cpus()); + + return nr_io_queues; +} + +static int nvme_alloc_io_queues(struct nvme_ctrl *ctrl) +{ + unsigned int nr_io_queues; + int ret; + + nr_io_queues = nvme_tcp_nr_io_queues(ctrl); + ret = nvme_set_queue_count(ctrl, &nr_io_queues); + if (ret) + return ret; + + ctrl->queue_count = nr_io_queues + 1; + if (ctrl->queue_count < 2) + return 0; + + dev_info(ctrl->device, + "creating %d I/O queues.\n", nr_io_queues); + + return nvme_tcp_alloc_io_queues(ctrl); +} + +static void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove) +{ + nvme_tcp_stop_io_queues(ctrl); + if (remove) { + if (ctrl->ops->flags & NVME_F_FABRICS) + blk_cleanup_queue(ctrl->connect_q); + blk_mq_free_tag_set(ctrl->tagset); + } + nvme_tcp_free_io_queues(ctrl); +} + +static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) +{ + int ret; + + ret = nvme_alloc_io_queues(ctrl); + if (ret) + return ret; + + if (new) { + ctrl->tagset = nvme_tcp_alloc_tagset(ctrl, false); + if (IS_ERR(ctrl->tagset)) { + ret = PTR_ERR(ctrl->tagset); + goto out_free_io_queues; + } + + if (ctrl->ops->flags & NVME_F_FABRICS) { + ctrl->connect_q = blk_mq_init_queue(ctrl->tagset); + if (IS_ERR(ctrl->connect_q)) { + ret = PTR_ERR(ctrl->connect_q); + goto out_free_tag_set; + } + } + } else { + blk_mq_update_nr_hw_queues(ctrl->tagset, + ctrl->queue_count - 1); + } + + ret = nvme_tcp_start_io_queues(ctrl); + if (ret) + goto out_cleanup_connect_q; + + return 0; + +out_cleanup_connect_q: + if (new && (ctrl->ops->flags & NVME_F_FABRICS)) + blk_cleanup_queue(ctrl->connect_q); +out_free_tag_set: + if (new) + blk_mq_free_tag_set(ctrl->tagset); +out_free_io_queues: + nvme_tcp_free_io_queues(ctrl); + return ret; +} + +static void nvme_tcp_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove) +{ + nvme_tcp_stop_queue(ctrl, 0); + if (remove) { + free_opal_dev(ctrl->opal_dev); + blk_cleanup_queue(ctrl->admin_q); + blk_mq_free_tag_set(ctrl->admin_tagset); + } + nvme_tcp_free_admin_queue(ctrl); +} + +static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new) +{ + int error; + + error = nvme_tcp_alloc_admin_queue(ctrl); + if (error) + return error; + + if (new) { + ctrl->admin_tagset = nvme_tcp_alloc_tagset(ctrl, true); + if (IS_ERR(ctrl->admin_tagset)) { + error = PTR_ERR(ctrl->admin_tagset); + goto out_free_queue; + } + + ctrl->admin_q = blk_mq_init_queue(ctrl->admin_tagset); + if (IS_ERR(ctrl->admin_q)) { + error = PTR_ERR(ctrl->admin_q); + goto out_free_tagset; + } + } + + error = nvme_tcp_start_queue(ctrl, 0); + if (error) + goto out_cleanup_queue; + + error = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap); + if (error) { + dev_err(ctrl->device, + "prop_get NVME_REG_CAP failed\n"); + goto out_stop_queue; + } + + ctrl->sqsize = min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->sqsize); + + error = nvme_enable_ctrl(ctrl, ctrl->cap); + if (error) + goto out_stop_queue; + + error = nvme_init_identify(ctrl); + if (error) + goto out_stop_queue; + + return 0; + +out_stop_queue: + nvme_tcp_stop_queue(ctrl, 0); +out_cleanup_queue: + if (new) + blk_cleanup_queue(ctrl->admin_q); +out_free_tagset: + if (new) + blk_mq_free_tag_set(ctrl->admin_tagset); +out_free_queue: + nvme_tcp_free_admin_queue(ctrl); + return error; +} + +static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl, + bool remove) +{ + blk_mq_quiesce_queue(ctrl->admin_q); + nvme_tcp_stop_queue(ctrl, 0); + blk_mq_tagset_busy_iter(ctrl->admin_tagset, nvme_cancel_request, ctrl); + blk_mq_unquiesce_queue(ctrl->admin_q); + nvme_tcp_destroy_admin_queue(ctrl, remove); +} + +static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, + bool remove) +{ + if (ctrl->queue_count <= 1) + return; + nvme_stop_queues(ctrl); + nvme_tcp_stop_io_queues(ctrl); + blk_mq_tagset_busy_iter(ctrl->tagset, nvme_cancel_request, ctrl); + if (remove) + nvme_start_queues(ctrl); + nvme_tcp_destroy_io_queues(ctrl, remove); +} + +static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl) +{ + /* If we are resetting/deleting then do nothing */ + if (ctrl->state != NVME_CTRL_CONNECTING) { + WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW || + ctrl->state == NVME_CTRL_LIVE); + return; + } + + if (nvmf_should_reconnect(ctrl)) { + dev_info(ctrl->device, "Reconnecting in %d seconds...\n", + ctrl->opts->reconnect_delay); + queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work, + ctrl->opts->reconnect_delay * HZ); + } else { + dev_info(ctrl->device, "Removing controller...\n"); + nvme_delete_ctrl(ctrl); + } +} + +static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new) +{ + struct nvmf_ctrl_options *opts = ctrl->opts; + int ret = -EINVAL; + + ret = nvme_tcp_configure_admin_queue(ctrl, new); + if (ret) + return ret; + + if (ctrl->icdoff) { + dev_err(ctrl->device, "icdoff is not supported!\n"); + goto destroy_admin; + } + + if (opts->queue_size > ctrl->sqsize + 1) + dev_warn(ctrl->device, + "queue_size %zu > ctrl sqsize %u, clamping down\n", + opts->queue_size, ctrl->sqsize + 1); + + if (ctrl->sqsize + 1 > ctrl->maxcmd) { + dev_warn(ctrl->device, + "sqsize %u > ctrl maxcmd %u, clamping down\n", + ctrl->sqsize + 1, ctrl->maxcmd); + ctrl->sqsize = ctrl->maxcmd - 1; + } + + if (ctrl->queue_count > 1) { + ret = nvme_tcp_configure_io_queues(ctrl, new); + if (ret) + goto destroy_admin; + } + + if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) { + /* state change failure is ok if we're in DELETING state */ + WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING); + ret = -EINVAL; + goto destroy_io; + } + + nvme_start_ctrl(ctrl); + return 0; + +destroy_io: + if (ctrl->queue_count > 1) + nvme_tcp_destroy_io_queues(ctrl, new); +destroy_admin: + nvme_tcp_stop_queue(ctrl, 0); + nvme_tcp_destroy_admin_queue(ctrl, new); + return ret; +} + +static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work) +{ + struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work), + struct nvme_tcp_ctrl, connect_work); + struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl; + + ++ctrl->nr_reconnects; + + if (nvme_tcp_setup_ctrl(ctrl, false)) + goto requeue; + + dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n", + ctrl->nr_reconnects); + + ctrl->nr_reconnects = 0; + + return; + +requeue: + dev_info(ctrl->device, "Failed reconnect attempt %d\n", + ctrl->nr_reconnects); + nvme_tcp_reconnect_or_remove(ctrl); +} + +static void nvme_tcp_error_recovery_work(struct work_struct *work) +{ + struct nvme_tcp_ctrl *tcp_ctrl = container_of(work, + struct nvme_tcp_ctrl, err_work); + struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl; + + nvme_stop_keep_alive(ctrl); + nvme_tcp_teardown_io_queues(ctrl, false); + /* unquiesce to fail fast pending requests */ + nvme_start_queues(ctrl); + nvme_tcp_teardown_admin_queue(ctrl, false); + + if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) { + /* state change failure is ok if we're in DELETING state */ + WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING); + return; + } + + nvme_tcp_reconnect_or_remove(ctrl); +} + +static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown) +{ + nvme_tcp_teardown_io_queues(ctrl, shutdown); + if (shutdown) + nvme_shutdown_ctrl(ctrl); + else + nvme_disable_ctrl(ctrl, ctrl->cap); + nvme_tcp_teardown_admin_queue(ctrl, shutdown); +} + +static void nvme_tcp_delete_ctrl(struct nvme_ctrl *ctrl) +{ + nvme_tcp_teardown_ctrl(ctrl, true); +} + +static void nvme_reset_ctrl_work(struct work_struct *work) +{ + struct nvme_ctrl *ctrl = + container_of(work, struct nvme_ctrl, reset_work); + + nvme_stop_ctrl(ctrl); + nvme_tcp_teardown_ctrl(ctrl, false); + + if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) { + /* state change failure is ok if we're in DELETING state */ + WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING); + return; + } + + if (nvme_tcp_setup_ctrl(ctrl, false)) + goto out_fail; + + return; + +out_fail: + ++ctrl->nr_reconnects; + nvme_tcp_reconnect_or_remove(ctrl); +} + +static void nvme_tcp_stop_ctrl(struct nvme_ctrl *ctrl) +{ + cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work); + cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work); +} + +static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl) +{ + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); + + if (list_empty(&ctrl->list)) + goto free_ctrl; + + mutex_lock(&nvme_tcp_ctrl_mutex); + list_del(&ctrl->list); + mutex_unlock(&nvme_tcp_ctrl_mutex); + + nvmf_free_options(nctrl->opts); +free_ctrl: + kfree(ctrl->queues); + kfree(ctrl); +} + +static void nvme_tcp_set_sg_null(struct nvme_command *c) +{ + struct nvme_sgl_desc *sg = &c->common.dptr.sgl; + + sg->addr = 0; + sg->length = 0; + sg->type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) | + NVME_SGL_FMT_TRANSPORT_A; +} + +static void nvme_tcp_set_sg_inline(struct nvme_tcp_queue *queue, + struct nvme_command *c, u32 data_len) +{ + struct nvme_sgl_desc *sg = &c->common.dptr.sgl; + + sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff); + sg->length = cpu_to_le32(data_len); + sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET; +} + +static void nvme_tcp_set_sg_host_data(struct nvme_command *c, + u32 data_len) +{ + struct nvme_sgl_desc *sg = &c->common.dptr.sgl; + + sg->addr = 0; + sg->length = cpu_to_le32(data_len); + sg->type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) | + NVME_SGL_FMT_TRANSPORT_A; +} + +static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg) +{ + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(arg); + struct nvme_tcp_queue *queue = &ctrl->queues[0]; + struct nvme_tcp_cmd_pdu *pdu = ctrl->async_req.pdu; + struct nvme_command *cmd = &pdu->cmd; + u8 hdgst = nvme_tcp_hdgst_len(queue); + + memset(pdu, 0, sizeof(*pdu)); + pdu->hdr.type = nvme_tcp_cmd; + if (queue->hdr_digest) + pdu->hdr.flags |= NVME_TCP_F_HDGST; + pdu->hdr.hlen = sizeof(*pdu); + pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst); + + cmd->common.opcode = nvme_admin_async_event; + cmd->common.command_id = NVME_AQ_BLK_MQ_DEPTH; + cmd->common.flags |= NVME_CMD_SGL_METABUF; + nvme_tcp_set_sg_null(cmd); + + ctrl->async_req.state = NVME_TCP_SEND_CMD_PDU; + ctrl->async_req.offset = 0; + ctrl->async_req.curr_bio = NULL; + ctrl->async_req.data_len = 0; + + nvme_tcp_queue_request(&ctrl->async_req); +} + +static enum blk_eh_timer_return +nvme_tcp_timeout(struct request *rq, bool reserved) +{ + struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); + struct nvme_tcp_ctrl *ctrl = req->queue->ctrl; + struct nvme_tcp_cmd_pdu *pdu = req->pdu; + + dev_dbg(ctrl->ctrl.device, + "queue %d: timeout request %#x type %d\n", + nvme_tcp_queue_id(req->queue), rq->tag, + pdu->hdr.type); + + if (ctrl->ctrl.state != NVME_CTRL_LIVE) { + union nvme_result res = {}; + + nvme_req(rq)->flags |= NVME_REQ_CANCELLED; + nvme_end_request(rq, cpu_to_le16(NVME_SC_ABORT_REQ), res); + return BLK_EH_DONE; + } + + /* queue error recovery */ + nvme_tcp_error_recovery(&ctrl->ctrl); + + return BLK_EH_RESET_TIMER; +} + +static blk_status_t nvme_tcp_map_data(struct nvme_tcp_queue *queue, + struct request *rq) +{ + struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); + struct nvme_tcp_cmd_pdu *pdu = req->pdu; + struct nvme_command *c = &pdu->cmd; + + c->common.flags |= NVME_CMD_SGL_METABUF; + + if (rq_data_dir(rq) == WRITE && req->data_len && + req->data_len <= nvme_tcp_inline_data_size(queue)) + nvme_tcp_set_sg_inline(queue, c, req->data_len); + else + nvme_tcp_set_sg_host_data(c, req->data_len); + + return 0; +} + +static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns, + struct request *rq) +{ + struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); + struct nvme_tcp_cmd_pdu *pdu = req->pdu; + struct nvme_tcp_queue *queue = req->queue; + u8 hdgst = nvme_tcp_hdgst_len(queue), ddgst = 0; + blk_status_t ret; + + ret = nvme_setup_cmd(ns, rq, &pdu->cmd); + if (ret) + return ret; + + req->state = NVME_TCP_SEND_CMD_PDU; + req->offset = 0; + req->data_sent = 0; + req->pdu_len = 0; + req->pdu_sent = 0; + req->data_len = blk_rq_payload_bytes(rq); + req->curr_bio = rq->bio; + + if (rq_data_dir(rq) == WRITE && + req->data_len <= nvme_tcp_inline_data_size(queue)) + req->pdu_len = req->data_len; + else if (req->curr_bio) + nvme_tcp_init_iter(req, READ); + + pdu->hdr.type = nvme_tcp_cmd; + pdu->hdr.flags = 0; + if (queue->hdr_digest) + pdu->hdr.flags |= NVME_TCP_F_HDGST; + if (queue->data_digest && req->pdu_len) { + pdu->hdr.flags |= NVME_TCP_F_DDGST; + ddgst = nvme_tcp_ddgst_len(queue); + } + pdu->hdr.hlen = sizeof(*pdu); + pdu->hdr.pdo = req->pdu_len ? pdu->hdr.hlen + hdgst : 0; + pdu->hdr.plen = + cpu_to_le32(pdu->hdr.hlen + hdgst + req->pdu_len + ddgst); + + ret = nvme_tcp_map_data(queue, rq); + if (unlikely(ret)) { + dev_err(queue->ctrl->ctrl.device, + "Failed to map data (%d)\n", ret); + return ret; + } + + return 0; +} + +static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) +{ + struct nvme_ns *ns = hctx->queue->queuedata; + struct nvme_tcp_queue *queue = hctx->driver_data; + struct request *rq = bd->rq; + struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); + bool queue_ready = test_bit(NVME_TCP_Q_LIVE, &queue->flags); + blk_status_t ret; + + if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) + return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq); + + ret = nvme_tcp_setup_cmd_pdu(ns, rq); + if (unlikely(ret)) + return ret; + + blk_mq_start_request(rq); + + nvme_tcp_queue_request(req); + + return BLK_STS_OK; +} + +static int nvme_tcp_map_queues(struct blk_mq_tag_set *set) +{ + struct nvme_tcp_ctrl *ctrl = set->driver_data; + + set->map[HCTX_TYPE_DEFAULT].queue_offset = 0; + set->map[HCTX_TYPE_READ].nr_queues = ctrl->ctrl.opts->nr_io_queues; + if (ctrl->ctrl.opts->nr_write_queues) { + /* separate read/write queues */ + set->map[HCTX_TYPE_DEFAULT].nr_queues = + ctrl->ctrl.opts->nr_write_queues; + set->map[HCTX_TYPE_READ].queue_offset = + ctrl->ctrl.opts->nr_write_queues; + } else { + /* mixed read/write queues */ + set->map[HCTX_TYPE_DEFAULT].nr_queues = + ctrl->ctrl.opts->nr_io_queues; + set->map[HCTX_TYPE_READ].queue_offset = 0; + } + blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]); + blk_mq_map_queues(&set->map[HCTX_TYPE_READ]); + return 0; +} + +static struct blk_mq_ops nvme_tcp_mq_ops = { + .queue_rq = nvme_tcp_queue_rq, + .complete = nvme_complete_rq, + .init_request = nvme_tcp_init_request, + .exit_request = nvme_tcp_exit_request, + .init_hctx = nvme_tcp_init_hctx, + .timeout = nvme_tcp_timeout, + .map_queues = nvme_tcp_map_queues, +}; + +static struct blk_mq_ops nvme_tcp_admin_mq_ops = { + .queue_rq = nvme_tcp_queue_rq, + .complete = nvme_complete_rq, + .init_request = nvme_tcp_init_request, + .exit_request = nvme_tcp_exit_request, + .init_hctx = nvme_tcp_init_admin_hctx, + .timeout = nvme_tcp_timeout, +}; + +static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = { + .name = "tcp", + .module = THIS_MODULE, + .flags = NVME_F_FABRICS, + .reg_read32 = nvmf_reg_read32, + .reg_read64 = nvmf_reg_read64, + .reg_write32 = nvmf_reg_write32, + .free_ctrl = nvme_tcp_free_ctrl, + .submit_async_event = nvme_tcp_submit_async_event, + .delete_ctrl = nvme_tcp_delete_ctrl, + .get_address = nvmf_get_address, + .stop_ctrl = nvme_tcp_stop_ctrl, +}; + +static bool +nvme_tcp_existing_controller(struct nvmf_ctrl_options *opts) +{ + struct nvme_tcp_ctrl *ctrl; + bool found = false; + + mutex_lock(&nvme_tcp_ctrl_mutex); + list_for_each_entry(ctrl, &nvme_tcp_ctrl_list, list) { + found = nvmf_ip_options_match(&ctrl->ctrl, opts); + if (found) + break; + } + mutex_unlock(&nvme_tcp_ctrl_mutex); + + return found; +} + +static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, + struct nvmf_ctrl_options *opts) +{ + struct nvme_tcp_ctrl *ctrl; + int ret; + + ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); + if (!ctrl) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&ctrl->list); + ctrl->ctrl.opts = opts; + ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues + 1; + ctrl->ctrl.sqsize = opts->queue_size - 1; + ctrl->ctrl.kato = opts->kato; + + INIT_DELAYED_WORK(&ctrl->connect_work, + nvme_tcp_reconnect_ctrl_work); + INIT_WORK(&ctrl->err_work, nvme_tcp_error_recovery_work); + INIT_WORK(&ctrl->ctrl.reset_work, nvme_reset_ctrl_work); + + if (!(opts->mask & NVMF_OPT_TRSVCID)) { + opts->trsvcid = + kstrdup(__stringify(NVME_TCP_DISC_PORT), GFP_KERNEL); + if (!opts->trsvcid) { + ret = -ENOMEM; + goto out_free_ctrl; + } + opts->mask |= NVMF_OPT_TRSVCID; + } + + ret = inet_pton_with_scope(&init_net, AF_UNSPEC, + opts->traddr, opts->trsvcid, &ctrl->addr); + if (ret) { + pr_err("malformed address passed: %s:%s\n", + opts->traddr, opts->trsvcid); + goto out_free_ctrl; + } + + if (opts->mask & NVMF_OPT_HOST_TRADDR) { + ret = inet_pton_with_scope(&init_net, AF_UNSPEC, + opts->host_traddr, NULL, &ctrl->src_addr); + if (ret) { + pr_err("malformed src address passed: %s\n", + opts->host_traddr); + goto out_free_ctrl; + } + } + + if (!opts->duplicate_connect && nvme_tcp_existing_controller(opts)) { + ret = -EALREADY; + goto out_free_ctrl; + } + + ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues), + GFP_KERNEL); + if (!ctrl->queues) { + ret = -ENOMEM; + goto out_free_ctrl; + } + + ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_tcp_ctrl_ops, 0); + if (ret) + goto out_kfree_queues; + + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { + WARN_ON_ONCE(1); + ret = -EINTR; + goto out_uninit_ctrl; + } + + ret = nvme_tcp_setup_ctrl(&ctrl->ctrl, true); + if (ret) + goto out_uninit_ctrl; + + dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n", + ctrl->ctrl.opts->subsysnqn, &ctrl->addr); + + nvme_get_ctrl(&ctrl->ctrl); + + mutex_lock(&nvme_tcp_ctrl_mutex); + list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list); + mutex_unlock(&nvme_tcp_ctrl_mutex); + + return &ctrl->ctrl; + +out_uninit_ctrl: + nvme_uninit_ctrl(&ctrl->ctrl); + nvme_put_ctrl(&ctrl->ctrl); + if (ret > 0) + ret = -EIO; + return ERR_PTR(ret); +out_kfree_queues: + kfree(ctrl->queues); +out_free_ctrl: + kfree(ctrl); + return ERR_PTR(ret); +} + +static struct nvmf_transport_ops nvme_tcp_transport = { + .name = "tcp", + .module = THIS_MODULE, + .required_opts = NVMF_OPT_TRADDR, + .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY | + NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO | + NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST | + NVMF_OPT_NR_WRITE_QUEUES, + .create_ctrl = nvme_tcp_create_ctrl, +}; + +static int __init nvme_tcp_init_module(void) +{ + nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq", + WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); + if (!nvme_tcp_wq) + return -ENOMEM; + + nvmf_register_transport(&nvme_tcp_transport); + return 0; +} + +static void __exit nvme_tcp_cleanup_module(void) +{ + struct nvme_tcp_ctrl *ctrl; + + nvmf_unregister_transport(&nvme_tcp_transport); + + mutex_lock(&nvme_tcp_ctrl_mutex); + list_for_each_entry(ctrl, &nvme_tcp_ctrl_list, list) + nvme_delete_ctrl(&ctrl->ctrl); + mutex_unlock(&nvme_tcp_ctrl_mutex); + flush_workqueue(nvme_delete_wq); + + destroy_workqueue(nvme_tcp_wq); +} + +module_init(nvme_tcp_init_module); +module_exit(nvme_tcp_cleanup_module); + +MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c index 25b0e310f4a8..5566dda3237a 100644 --- a/drivers/nvme/host/trace.c +++ b/drivers/nvme/host/trace.c @@ -139,3 +139,6 @@ const char *nvme_trace_disk_name(struct trace_seq *p, char *name) return ret; } +EXPORT_SYMBOL_GPL(nvme_trace_disk_name); + +EXPORT_TRACEPOINT_SYMBOL_GPL(nvme_sq); diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index 196d5bd56718..3564120aa7b3 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -115,8 +115,8 @@ TRACE_EVENT(nvme_setup_cmd, __entry->nsid = le32_to_cpu(cmd->common.nsid); __entry->metadata = le64_to_cpu(cmd->common.metadata); __assign_disk_name(__entry->disk, req->rq_disk); - memcpy(__entry->cdw10, cmd->common.cdw10, - sizeof(__entry->cdw10)); + memcpy(__entry->cdw10, &cmd->common.cdw10, + 6 * sizeof(__entry->cdw10)); ), TP_printk("nvme%d: %sqid=%d, cmdid=%u, nsid=%u, flags=0x%x, meta=0x%llx, cmd=(%s %s)", __entry->ctrl_id, __print_disk_name(__entry->disk), @@ -184,6 +184,29 @@ TRACE_EVENT(nvme_async_event, #undef aer_name +TRACE_EVENT(nvme_sq, + TP_PROTO(struct request *req, __le16 sq_head, int sq_tail), + TP_ARGS(req, sq_head, sq_tail), + TP_STRUCT__entry( + __field(int, ctrl_id) + __array(char, disk, DISK_NAME_LEN) + __field(int, qid) + __field(u16, sq_head) + __field(u16, sq_tail) + ), + TP_fast_assign( + __entry->ctrl_id = nvme_req(req)->ctrl->instance; + __assign_disk_name(__entry->disk, req->rq_disk); + __entry->qid = nvme_req_qid(req); + __entry->sq_head = le16_to_cpu(sq_head); + __entry->sq_tail = sq_tail; + ), + TP_printk("nvme%d: %sqid=%d, head=%u, tail=%u", + __entry->ctrl_id, __print_disk_name(__entry->disk), + __entry->qid, __entry->sq_head, __entry->sq_tail + ) +); + #endif /* _TRACE_NVME_H */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index 3c7b61ddb0d1..d94f25cde019 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -60,3 +60,13 @@ config NVME_TARGET_FCLOOP to test NVMe-FC transport interfaces. If unsure, say N. + +config NVME_TARGET_TCP + tristate "NVMe over Fabrics TCP target support" + depends on INET + depends on NVME_TARGET + help + This enables the NVMe TCP target support, which allows exporting NVMe + devices over TCP. + + If unsure, say N. diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile index 8118c93391c6..8c3ad0fb6860 100644 --- a/drivers/nvme/target/Makefile +++ b/drivers/nvme/target/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_NVME_TARGET_LOOP) += nvme-loop.o obj-$(CONFIG_NVME_TARGET_RDMA) += nvmet-rdma.o obj-$(CONFIG_NVME_TARGET_FC) += nvmet-fc.o obj-$(CONFIG_NVME_TARGET_FCLOOP) += nvme-fcloop.o +obj-$(CONFIG_NVME_TARGET_TCP) += nvmet-tcp.o nvmet-y += core.o configfs.o admin-cmd.o fabrics-cmd.o \ discovery.o io-cmd-file.o io-cmd-bdev.o @@ -12,3 +13,4 @@ nvme-loop-y += loop.o nvmet-rdma-y += rdma.o nvmet-fc-y += fc.o nvme-fcloop-y += fcloop.o +nvmet-tcp-y += tcp.o diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 1179f6314323..11baeb14c388 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -19,19 +19,6 @@ #include <asm/unaligned.h> #include "nvmet.h" -/* - * This helper allows us to clear the AEN based on the RAE bit, - * Please use this helper when processing the log pages which are - * associated with the AEN. - */ -static inline void nvmet_clear_aen(struct nvmet_req *req, u32 aen_bit) -{ - int rae = le32_to_cpu(req->cmd->common.cdw10[0]) & 1 << 15; - - if (!rae) - clear_bit(aen_bit, &req->sq->ctrl->aen_masked); -} - u32 nvmet_get_log_page_len(struct nvme_command *cmd) { u32 len = le16_to_cpu(cmd->get_log_page.numdu); @@ -50,6 +37,34 @@ static void nvmet_execute_get_log_page_noop(struct nvmet_req *req) nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->data_len)); } +static void nvmet_execute_get_log_page_error(struct nvmet_req *req) +{ + struct nvmet_ctrl *ctrl = req->sq->ctrl; + u16 status = NVME_SC_SUCCESS; + unsigned long flags; + off_t offset = 0; + u64 slot; + u64 i; + + spin_lock_irqsave(&ctrl->error_lock, flags); + slot = ctrl->err_counter % NVMET_ERROR_LOG_SLOTS; + + for (i = 0; i < NVMET_ERROR_LOG_SLOTS; i++) { + status = nvmet_copy_to_sgl(req, offset, &ctrl->slots[slot], + sizeof(struct nvme_error_slot)); + if (status) + break; + + if (slot == 0) + slot = NVMET_ERROR_LOG_SLOTS - 1; + else + slot--; + offset += sizeof(struct nvme_error_slot); + } + spin_unlock_irqrestore(&ctrl->error_lock, flags); + nvmet_req_complete(req, status); +} + static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req, struct nvme_smart_log *slog) { @@ -60,6 +75,7 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req, if (!ns) { pr_err("Could not find namespace id : %d\n", le32_to_cpu(req->cmd->get_log_page.nsid)); + req->error_loc = offsetof(struct nvme_rw_command, nsid); return NVME_SC_INVALID_NS; } @@ -119,6 +135,7 @@ static void nvmet_execute_get_log_page_smart(struct nvmet_req *req) { struct nvme_smart_log *log; u16 status = NVME_SC_INTERNAL; + unsigned long flags; if (req->data_len != sizeof(*log)) goto out; @@ -134,6 +151,11 @@ static void nvmet_execute_get_log_page_smart(struct nvmet_req *req) if (status) goto out_free_log; + spin_lock_irqsave(&req->sq->ctrl->error_lock, flags); + put_unaligned_le64(req->sq->ctrl->err_counter, + &log->num_err_log_entries); + spin_unlock_irqrestore(&req->sq->ctrl->error_lock, flags); + status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log)); out_free_log: kfree(log); @@ -189,7 +211,7 @@ static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req) if (!status) status = nvmet_zero_sgl(req, len, req->data_len - len); ctrl->nr_changed_ns = 0; - nvmet_clear_aen(req, NVME_AEN_CFG_NS_ATTR); + nvmet_clear_aen_bit(req, NVME_AEN_BIT_NS_ATTR); mutex_unlock(&ctrl->lock); out: nvmet_req_complete(req, status); @@ -252,7 +274,7 @@ static void nvmet_execute_get_log_page_ana(struct nvmet_req *req) hdr.chgcnt = cpu_to_le64(nvmet_ana_chgcnt); hdr.ngrps = cpu_to_le16(ngrps); - nvmet_clear_aen(req, NVME_AEN_CFG_ANA_CHANGE); + nvmet_clear_aen_bit(req, NVME_AEN_BIT_ANA_CHANGE); up_read(&nvmet_ana_sem); kfree(desc); @@ -304,7 +326,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) /* XXX: figure out what to do about RTD3R/RTD3 */ id->oaes = cpu_to_le32(NVMET_AEN_CFG_OPTIONAL); - id->ctratt = cpu_to_le32(1 << 0); + id->ctratt = cpu_to_le32(NVME_CTRL_ATTR_HID_128_BIT | + NVME_CTRL_ATTR_TBKAS); id->oacs = 0; @@ -392,6 +415,7 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) u16 status = 0; if (le32_to_cpu(req->cmd->identify.nsid) == NVME_NSID_ALL) { + req->error_loc = offsetof(struct nvme_identify, nsid); status = NVME_SC_INVALID_NS | NVME_SC_DNR; goto out; } @@ -512,6 +536,7 @@ static void nvmet_execute_identify_desclist(struct nvmet_req *req) ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid); if (!ns) { + req->error_loc = offsetof(struct nvme_identify, nsid); status = NVME_SC_INVALID_NS | NVME_SC_DNR; goto out; } @@ -569,13 +594,15 @@ static u16 nvmet_write_protect_flush_sync(struct nvmet_req *req) static u16 nvmet_set_feat_write_protect(struct nvmet_req *req) { - u32 write_protect = le32_to_cpu(req->cmd->common.cdw10[1]); + u32 write_protect = le32_to_cpu(req->cmd->common.cdw11); struct nvmet_subsys *subsys = req->sq->ctrl->subsys; u16 status = NVME_SC_FEATURE_NOT_CHANGEABLE; req->ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->rw.nsid); - if (unlikely(!req->ns)) + if (unlikely(!req->ns)) { + req->error_loc = offsetof(struct nvme_common_command, nsid); return status; + } mutex_lock(&subsys->lock); switch (write_protect) { @@ -599,11 +626,36 @@ static u16 nvmet_set_feat_write_protect(struct nvmet_req *req) return status; } +u16 nvmet_set_feat_kato(struct nvmet_req *req) +{ + u32 val32 = le32_to_cpu(req->cmd->common.cdw11); + + req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000); + + nvmet_set_result(req, req->sq->ctrl->kato); + + return 0; +} + +u16 nvmet_set_feat_async_event(struct nvmet_req *req, u32 mask) +{ + u32 val32 = le32_to_cpu(req->cmd->common.cdw11); + + if (val32 & ~mask) { + req->error_loc = offsetof(struct nvme_common_command, cdw11); + return NVME_SC_INVALID_FIELD | NVME_SC_DNR; + } + + WRITE_ONCE(req->sq->ctrl->aen_enabled, val32); + nvmet_set_result(req, val32); + + return 0; +} + static void nvmet_execute_set_features(struct nvmet_req *req) { struct nvmet_subsys *subsys = req->sq->ctrl->subsys; - u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]); - u32 val32; + u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); u16 status = 0; switch (cdw10 & 0xff) { @@ -612,19 +664,10 @@ static void nvmet_execute_set_features(struct nvmet_req *req) (subsys->max_qid - 1) | ((subsys->max_qid - 1) << 16)); break; case NVME_FEAT_KATO: - val32 = le32_to_cpu(req->cmd->common.cdw10[1]); - req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000); - nvmet_set_result(req, req->sq->ctrl->kato); + status = nvmet_set_feat_kato(req); break; case NVME_FEAT_ASYNC_EVENT: - val32 = le32_to_cpu(req->cmd->common.cdw10[1]); - if (val32 & ~NVMET_AEN_CFG_ALL) { - status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; - break; - } - - WRITE_ONCE(req->sq->ctrl->aen_enabled, val32); - nvmet_set_result(req, val32); + status = nvmet_set_feat_async_event(req, NVMET_AEN_CFG_ALL); break; case NVME_FEAT_HOST_ID: status = NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; @@ -633,6 +676,7 @@ static void nvmet_execute_set_features(struct nvmet_req *req) status = nvmet_set_feat_write_protect(req); break; default: + req->error_loc = offsetof(struct nvme_common_command, cdw10); status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; break; } @@ -646,9 +690,10 @@ static u16 nvmet_get_feat_write_protect(struct nvmet_req *req) u32 result; req->ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->common.nsid); - if (!req->ns) + if (!req->ns) { + req->error_loc = offsetof(struct nvme_common_command, nsid); return NVME_SC_INVALID_NS | NVME_SC_DNR; - + } mutex_lock(&subsys->lock); if (req->ns->readonly == true) result = NVME_NS_WRITE_PROTECT; @@ -660,10 +705,20 @@ static u16 nvmet_get_feat_write_protect(struct nvmet_req *req) return 0; } +void nvmet_get_feat_kato(struct nvmet_req *req) +{ + nvmet_set_result(req, req->sq->ctrl->kato * 1000); +} + +void nvmet_get_feat_async_event(struct nvmet_req *req) +{ + nvmet_set_result(req, READ_ONCE(req->sq->ctrl->aen_enabled)); +} + static void nvmet_execute_get_features(struct nvmet_req *req) { struct nvmet_subsys *subsys = req->sq->ctrl->subsys; - u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]); + u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); u16 status = 0; switch (cdw10 & 0xff) { @@ -689,7 +744,7 @@ static void nvmet_execute_get_features(struct nvmet_req *req) break; #endif case NVME_FEAT_ASYNC_EVENT: - nvmet_set_result(req, READ_ONCE(req->sq->ctrl->aen_enabled)); + nvmet_get_feat_async_event(req); break; case NVME_FEAT_VOLATILE_WC: nvmet_set_result(req, 1); @@ -699,11 +754,13 @@ static void nvmet_execute_get_features(struct nvmet_req *req) (subsys->max_qid-1) | ((subsys->max_qid-1) << 16)); break; case NVME_FEAT_KATO: - nvmet_set_result(req, req->sq->ctrl->kato * 1000); + nvmet_get_feat_kato(req); break; case NVME_FEAT_HOST_ID: /* need 128-bit host identifier flag */ - if (!(req->cmd->common.cdw10[1] & cpu_to_le32(1 << 0))) { + if (!(req->cmd->common.cdw11 & cpu_to_le32(1 << 0))) { + req->error_loc = + offsetof(struct nvme_common_command, cdw11); status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; break; } @@ -715,6 +772,8 @@ static void nvmet_execute_get_features(struct nvmet_req *req) status = nvmet_get_feat_write_protect(req); break; default: + req->error_loc = + offsetof(struct nvme_common_command, cdw10); status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; break; } @@ -722,7 +781,7 @@ static void nvmet_execute_get_features(struct nvmet_req *req) nvmet_req_complete(req, status); } -static void nvmet_execute_async_event(struct nvmet_req *req) +void nvmet_execute_async_event(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; @@ -738,7 +797,7 @@ static void nvmet_execute_async_event(struct nvmet_req *req) schedule_work(&ctrl->async_event_work); } -static void nvmet_execute_keep_alive(struct nvmet_req *req) +void nvmet_execute_keep_alive(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; @@ -764,13 +823,7 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req) switch (cmd->get_log_page.lid) { case NVME_LOG_ERROR: - /* - * We currently never set the More bit in the status - * field, so all error log entries are invalid and can - * be zeroed out. This is called a minum viable - * implementation (TM) of this mandatory log page. - */ - req->execute = nvmet_execute_get_log_page_noop; + req->execute = nvmet_execute_get_log_page_error; return 0; case NVME_LOG_SMART: req->execute = nvmet_execute_get_log_page_smart; @@ -836,5 +889,6 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req) pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode, req->sq->qid); + req->error_loc = offsetof(struct nvme_common_command, opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index d895579b6c5d..618bbd006544 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -25,12 +25,16 @@ static const struct config_item_type nvmet_host_type; static const struct config_item_type nvmet_subsys_type; +static LIST_HEAD(nvmet_ports_list); +struct list_head *nvmet_ports = &nvmet_ports_list; + static const struct nvmet_transport_name { u8 type; const char *name; } nvmet_transport_names[] = { { NVMF_TRTYPE_RDMA, "rdma" }, { NVMF_TRTYPE_FC, "fc" }, + { NVMF_TRTYPE_TCP, "tcp" }, { NVMF_TRTYPE_LOOP, "loop" }, }; @@ -150,7 +154,8 @@ CONFIGFS_ATTR(nvmet_, addr_traddr); static ssize_t nvmet_addr_treq_show(struct config_item *item, char *page) { - switch (to_nvmet_port(item)->disc_addr.treq) { + switch (to_nvmet_port(item)->disc_addr.treq & + NVME_TREQ_SECURE_CHANNEL_MASK) { case NVMF_TREQ_NOT_SPECIFIED: return sprintf(page, "not specified\n"); case NVMF_TREQ_REQUIRED: @@ -166,6 +171,7 @@ static ssize_t nvmet_addr_treq_store(struct config_item *item, const char *page, size_t count) { struct nvmet_port *port = to_nvmet_port(item); + u8 treq = port->disc_addr.treq & ~NVME_TREQ_SECURE_CHANNEL_MASK; if (port->enabled) { pr_err("Cannot modify address while enabled\n"); @@ -174,15 +180,16 @@ static ssize_t nvmet_addr_treq_store(struct config_item *item, } if (sysfs_streq(page, "not specified")) { - port->disc_addr.treq = NVMF_TREQ_NOT_SPECIFIED; + treq |= NVMF_TREQ_NOT_SPECIFIED; } else if (sysfs_streq(page, "required")) { - port->disc_addr.treq = NVMF_TREQ_REQUIRED; + treq |= NVMF_TREQ_REQUIRED; } else if (sysfs_streq(page, "not required")) { - port->disc_addr.treq = NVMF_TREQ_NOT_REQUIRED; + treq |= NVMF_TREQ_NOT_REQUIRED; } else { pr_err("Invalid value '%s' for treq\n", page); return -EINVAL; } + port->disc_addr.treq = treq; return count; } @@ -646,7 +653,8 @@ static int nvmet_port_subsys_allow_link(struct config_item *parent, } list_add_tail(&link->entry, &port->subsystems); - nvmet_genctr++; + nvmet_port_disc_changed(port, subsys); + up_write(&nvmet_config_sem); return 0; @@ -673,7 +681,8 @@ static void nvmet_port_subsys_drop_link(struct config_item *parent, found: list_del(&p->entry); - nvmet_genctr++; + nvmet_port_disc_changed(port, subsys); + if (list_empty(&port->subsystems)) nvmet_disable_port(port); up_write(&nvmet_config_sem); @@ -722,7 +731,8 @@ static int nvmet_allowed_hosts_allow_link(struct config_item *parent, goto out_free_link; } list_add_tail(&link->entry, &subsys->hosts); - nvmet_genctr++; + nvmet_subsys_disc_changed(subsys, host); + up_write(&nvmet_config_sem); return 0; out_free_link: @@ -748,7 +758,8 @@ static void nvmet_allowed_hosts_drop_link(struct config_item *parent, found: list_del(&p->entry); - nvmet_genctr++; + nvmet_subsys_disc_changed(subsys, host); + up_write(&nvmet_config_sem); kfree(p); } @@ -787,7 +798,11 @@ static ssize_t nvmet_subsys_attr_allow_any_host_store(struct config_item *item, goto out_unlock; } - subsys->allow_any_host = allow_any_host; + if (subsys->allow_any_host != allow_any_host) { + subsys->allow_any_host = allow_any_host; + nvmet_subsys_disc_changed(subsys, NULL); + } + out_unlock: up_write(&nvmet_config_sem); return ret ? ret : count; @@ -936,7 +951,7 @@ static ssize_t nvmet_referral_enable_store(struct config_item *item, if (enable) nvmet_referral_enable(parent, port); else - nvmet_referral_disable(port); + nvmet_referral_disable(parent, port); return count; inval: @@ -962,9 +977,10 @@ static struct configfs_attribute *nvmet_referral_attrs[] = { static void nvmet_referral_release(struct config_item *item) { + struct nvmet_port *parent = to_nvmet_port(item->ci_parent->ci_parent); struct nvmet_port *port = to_nvmet_port(item); - nvmet_referral_disable(port); + nvmet_referral_disable(parent, port); kfree(port); } @@ -1137,6 +1153,8 @@ static void nvmet_port_release(struct config_item *item) { struct nvmet_port *port = to_nvmet_port(item); + list_del(&port->global_entry); + kfree(port->ana_state); kfree(port); } @@ -1189,12 +1207,15 @@ static struct config_group *nvmet_ports_make(struct config_group *group, port->ana_state[i] = NVME_ANA_INACCESSIBLE; } + list_add(&port->global_entry, &nvmet_ports_list); + INIT_LIST_HEAD(&port->entry); INIT_LIST_HEAD(&port->subsystems); INIT_LIST_HEAD(&port->referrals); port->inline_data_size = -1; /* < 0 == let the transport choose */ port->disc_addr.portid = cpu_to_le16(portid); + port->disc_addr.treq = NVMF_TREQ_DISABLE_SQFLOW; config_group_init_type_name(&port->group, name, &nvmet_port_type); config_group_init_type_name(&port->subsys_group, diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index a5f9bbce863f..88d260f31835 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -45,28 +45,72 @@ u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1]; u64 nvmet_ana_chgcnt; DECLARE_RWSEM(nvmet_ana_sem); +inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno) +{ + u16 status; + + switch (errno) { + case -ENOSPC: + req->error_loc = offsetof(struct nvme_rw_command, length); + status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR; + break; + case -EREMOTEIO: + req->error_loc = offsetof(struct nvme_rw_command, slba); + status = NVME_SC_LBA_RANGE | NVME_SC_DNR; + break; + case -EOPNOTSUPP: + req->error_loc = offsetof(struct nvme_common_command, opcode); + switch (req->cmd->common.opcode) { + case nvme_cmd_dsm: + case nvme_cmd_write_zeroes: + status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR; + break; + default: + status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR; + } + break; + case -ENODATA: + req->error_loc = offsetof(struct nvme_rw_command, nsid); + status = NVME_SC_ACCESS_DENIED; + break; + case -EIO: + /* FALLTHRU */ + default: + req->error_loc = offsetof(struct nvme_common_command, opcode); + status = NVME_SC_INTERNAL | NVME_SC_DNR; + } + + return status; +} + static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, const char *subsysnqn); u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf, size_t len) { - if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len) + if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len) { + req->error_loc = offsetof(struct nvme_common_command, dptr); return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; + } return 0; } u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len) { - if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len) + if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len) { + req->error_loc = offsetof(struct nvme_common_command, dptr); return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; + } return 0; } u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len) { - if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len) + if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len) { + req->error_loc = offsetof(struct nvme_common_command, dptr); return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; + } return 0; } @@ -130,7 +174,7 @@ static void nvmet_async_event_work(struct work_struct *work) } } -static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, +void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, u8 event_info, u8 log_page) { struct nvmet_async_event *aen; @@ -150,13 +194,6 @@ static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, schedule_work(&ctrl->async_event_work); } -static bool nvmet_aen_disabled(struct nvmet_ctrl *ctrl, u32 aen) -{ - if (!(READ_ONCE(ctrl->aen_enabled) & aen)) - return true; - return test_and_set_bit(aen, &ctrl->aen_masked); -} - static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid) { u32 i; @@ -187,7 +224,7 @@ void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid)); - if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_NS_ATTR)) + if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR)) continue; nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, NVME_AER_NOTICE_NS_CHANGED, @@ -204,7 +241,7 @@ void nvmet_send_ana_event(struct nvmet_subsys *subsys, list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { if (port && ctrl->port != port) continue; - if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_ANA_CHANGE)) + if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_ANA_CHANGE)) continue; nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, NVME_AER_NOTICE_ANA, NVME_LOG_ANA); @@ -299,6 +336,15 @@ static void nvmet_keep_alive_timer(struct work_struct *work) { struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work), struct nvmet_ctrl, ka_work); + bool cmd_seen = ctrl->cmd_seen; + + ctrl->cmd_seen = false; + if (cmd_seen) { + pr_debug("ctrl %d reschedule traffic based keep-alive timer\n", + ctrl->cntlid); + schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); + return; + } pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n", ctrl->cntlid, ctrl->kato); @@ -595,26 +641,58 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) return ns; } -static void __nvmet_req_complete(struct nvmet_req *req, u16 status) +static void nvmet_update_sq_head(struct nvmet_req *req) { - u32 old_sqhd, new_sqhd; - u16 sqhd; - - if (status) - nvmet_set_status(req, status); - if (req->sq->size) { + u32 old_sqhd, new_sqhd; + do { old_sqhd = req->sq->sqhd; new_sqhd = (old_sqhd + 1) % req->sq->size; } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) != old_sqhd); } - sqhd = req->sq->sqhd & 0x0000FFFF; - req->rsp->sq_head = cpu_to_le16(sqhd); + req->rsp->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF); +} + +static void nvmet_set_error(struct nvmet_req *req, u16 status) +{ + struct nvmet_ctrl *ctrl = req->sq->ctrl; + struct nvme_error_slot *new_error_slot; + unsigned long flags; + + req->rsp->status = cpu_to_le16(status << 1); + + if (!ctrl || req->error_loc == NVMET_NO_ERROR_LOC) + return; + + spin_lock_irqsave(&ctrl->error_lock, flags); + ctrl->err_counter++; + new_error_slot = + &ctrl->slots[ctrl->err_counter % NVMET_ERROR_LOG_SLOTS]; + + new_error_slot->error_count = cpu_to_le64(ctrl->err_counter); + new_error_slot->sqid = cpu_to_le16(req->sq->qid); + new_error_slot->cmdid = cpu_to_le16(req->cmd->common.command_id); + new_error_slot->status_field = cpu_to_le16(status << 1); + new_error_slot->param_error_location = cpu_to_le16(req->error_loc); + new_error_slot->lba = cpu_to_le64(req->error_slba); + new_error_slot->nsid = req->cmd->common.nsid; + spin_unlock_irqrestore(&ctrl->error_lock, flags); + + /* set the more bit for this request */ + req->rsp->status |= cpu_to_le16(1 << 14); +} + +static void __nvmet_req_complete(struct nvmet_req *req, u16 status) +{ + if (!req->sq->sqhd_disabled) + nvmet_update_sq_head(req); req->rsp->sq_id = cpu_to_le16(req->sq->qid); req->rsp->command_id = req->cmd->common.command_id; + if (unlikely(status)) + nvmet_set_error(req, status); if (req->ns) nvmet_put_namespace(req->ns); req->ops->queue_response(req); @@ -735,14 +813,20 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req) return ret; req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid); - if (unlikely(!req->ns)) + if (unlikely(!req->ns)) { + req->error_loc = offsetof(struct nvme_common_command, nsid); return NVME_SC_INVALID_NS | NVME_SC_DNR; + } ret = nvmet_check_ana_state(req->port, req->ns); - if (unlikely(ret)) + if (unlikely(ret)) { + req->error_loc = offsetof(struct nvme_common_command, nsid); return ret; + } ret = nvmet_io_cmd_check_access(req); - if (unlikely(ret)) + if (unlikely(ret)) { + req->error_loc = offsetof(struct nvme_common_command, nsid); return ret; + } if (req->ns->file) return nvmet_file_parse_io_cmd(req); @@ -763,10 +847,14 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, req->sg_cnt = 0; req->transfer_len = 0; req->rsp->status = 0; + req->rsp->sq_head = 0; req->ns = NULL; + req->error_loc = NVMET_NO_ERROR_LOC; + req->error_slba = 0; /* no support for fused commands yet */ if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) { + req->error_loc = offsetof(struct nvme_common_command, flags); status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; goto fail; } @@ -777,6 +865,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, * byte aligned. */ if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) { + req->error_loc = offsetof(struct nvme_common_command, flags); status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; goto fail; } @@ -801,6 +890,9 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, goto fail; } + if (sq->ctrl) + sq->ctrl->cmd_seen = true; + return true; fail: @@ -819,9 +911,10 @@ EXPORT_SYMBOL_GPL(nvmet_req_uninit); void nvmet_req_execute(struct nvmet_req *req) { - if (unlikely(req->data_len != req->transfer_len)) + if (unlikely(req->data_len != req->transfer_len)) { + req->error_loc = offsetof(struct nvme_common_command, dptr); nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR); - else + } else req->execute(req); } EXPORT_SYMBOL_GPL(nvmet_req_execute); @@ -1027,14 +1120,18 @@ u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd) return 0; } -static bool __nvmet_host_allowed(struct nvmet_subsys *subsys, - const char *hostnqn) +bool nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn) { struct nvmet_host_link *p; + lockdep_assert_held(&nvmet_config_sem); + if (subsys->allow_any_host) return true; + if (subsys->type == NVME_NQN_DISC) /* allow all access to disc subsys */ + return true; + list_for_each_entry(p, &subsys->hosts, entry) { if (!strcmp(nvmet_host_name(p->host), hostnqn)) return true; @@ -1043,30 +1140,6 @@ static bool __nvmet_host_allowed(struct nvmet_subsys *subsys, return false; } -static bool nvmet_host_discovery_allowed(struct nvmet_req *req, - const char *hostnqn) -{ - struct nvmet_subsys_link *s; - - list_for_each_entry(s, &req->port->subsystems, entry) { - if (__nvmet_host_allowed(s->subsys, hostnqn)) - return true; - } - - return false; -} - -bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys, - const char *hostnqn) -{ - lockdep_assert_held(&nvmet_config_sem); - - if (subsys->type == NVME_NQN_DISC) - return nvmet_host_discovery_allowed(req, hostnqn); - else - return __nvmet_host_allowed(subsys, hostnqn); -} - /* * Note: ctrl->subsys->lock should be held when calling this function */ @@ -1117,7 +1190,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; down_read(&nvmet_config_sem); - if (!nvmet_host_allowed(req, subsys, hostnqn)) { + if (!nvmet_host_allowed(subsys, hostnqn)) { pr_info("connect by host %s for subsystem %s not allowed\n", hostnqn, subsysnqn); req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn); @@ -1175,31 +1248,20 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, ctrl->cntlid = ret; ctrl->ops = req->ops; - if (ctrl->subsys->type == NVME_NQN_DISC) { - /* Don't accept keep-alive timeout for discovery controllers */ - if (kato) { - status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; - goto out_remove_ida; - } - /* - * Discovery controllers use some arbitrary high value in order - * to cleanup stale discovery sessions - * - * From the latest base diff RC: - * "The Keep Alive command is not supported by - * Discovery controllers. A transport may specify a - * fixed Discovery controller activity timeout value - * (e.g., 2 minutes). If no commands are received - * by a Discovery controller within that time - * period, the controller may perform the - * actions for Keep Alive Timer expiration". - */ - ctrl->kato = NVMET_DISC_KATO; - } else { - /* keep-alive timeout in seconds */ - ctrl->kato = DIV_ROUND_UP(kato, 1000); - } + /* + * Discovery controllers may use some arbitrary high value + * in order to cleanup stale discovery sessions + */ + if ((ctrl->subsys->type == NVME_NQN_DISC) && !kato) + kato = NVMET_DISC_KATO_MS; + + /* keep-alive timeout in seconds */ + ctrl->kato = DIV_ROUND_UP(kato, 1000); + + ctrl->err_counter = 0; + spin_lock_init(&ctrl->error_lock); + nvmet_start_keep_alive_timer(ctrl); mutex_lock(&subsys->lock); @@ -1210,8 +1272,6 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, *ctrlp = ctrl; return 0; -out_remove_ida: - ida_simple_remove(&cntlid_ida, ctrl->cntlid); out_free_sqs: kfree(ctrl->sqs); out_free_cqs: diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index bc0aa0bf1543..d2cb71a0b419 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -18,7 +18,65 @@ struct nvmet_subsys *nvmet_disc_subsys; -u64 nvmet_genctr; +static u64 nvmet_genctr; + +static void __nvmet_disc_changed(struct nvmet_port *port, + struct nvmet_ctrl *ctrl) +{ + if (ctrl->port != port) + return; + + if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_DISC_CHANGE)) + return; + + nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, + NVME_AER_NOTICE_DISC_CHANGED, NVME_LOG_DISC); +} + +void nvmet_port_disc_changed(struct nvmet_port *port, + struct nvmet_subsys *subsys) +{ + struct nvmet_ctrl *ctrl; + + nvmet_genctr++; + + list_for_each_entry(ctrl, &nvmet_disc_subsys->ctrls, subsys_entry) { + if (subsys && !nvmet_host_allowed(subsys, ctrl->hostnqn)) + continue; + + __nvmet_disc_changed(port, ctrl); + } +} + +static void __nvmet_subsys_disc_changed(struct nvmet_port *port, + struct nvmet_subsys *subsys, + struct nvmet_host *host) +{ + struct nvmet_ctrl *ctrl; + + list_for_each_entry(ctrl, &nvmet_disc_subsys->ctrls, subsys_entry) { + if (host && strcmp(nvmet_host_name(host), ctrl->hostnqn)) + continue; + + __nvmet_disc_changed(port, ctrl); + } +} + +void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys, + struct nvmet_host *host) +{ + struct nvmet_port *port; + struct nvmet_subsys_link *s; + + nvmet_genctr++; + + list_for_each_entry(port, nvmet_ports, global_entry) + list_for_each_entry(s, &port->subsystems, entry) { + if (s->subsys != subsys) + continue; + __nvmet_subsys_disc_changed(port, subsys, host); + } +} void nvmet_referral_enable(struct nvmet_port *parent, struct nvmet_port *port) { @@ -26,18 +84,18 @@ void nvmet_referral_enable(struct nvmet_port *parent, struct nvmet_port *port) if (list_empty(&port->entry)) { list_add_tail(&port->entry, &parent->referrals); port->enabled = true; - nvmet_genctr++; + nvmet_port_disc_changed(parent, NULL); } up_write(&nvmet_config_sem); } -void nvmet_referral_disable(struct nvmet_port *port) +void nvmet_referral_disable(struct nvmet_port *parent, struct nvmet_port *port) { down_write(&nvmet_config_sem); if (!list_empty(&port->entry)) { port->enabled = false; list_del_init(&port->entry); - nvmet_genctr++; + nvmet_port_disc_changed(parent, NULL); } up_write(&nvmet_config_sem); } @@ -107,7 +165,7 @@ static void nvmet_execute_get_disc_log_page(struct nvmet_req *req) down_read(&nvmet_config_sem); list_for_each_entry(p, &req->port->subsystems, entry) { - if (!nvmet_host_allowed(req, p->subsys, ctrl->hostnqn)) + if (!nvmet_host_allowed(p->subsys, ctrl->hostnqn)) continue; if (residual_len >= entry_size) { char traddr[NVMF_TRADDR_SIZE]; @@ -136,6 +194,8 @@ static void nvmet_execute_get_disc_log_page(struct nvmet_req *req) hdr->numrec = cpu_to_le64(numrec); hdr->recfmt = cpu_to_le16(0); + nvmet_clear_aen_bit(req, NVME_AEN_BIT_DISC_CHANGE); + up_read(&nvmet_config_sem); status = nvmet_copy_to_sgl(req, 0, hdr, data_len); @@ -174,6 +234,8 @@ static void nvmet_execute_identify_disc_ctrl(struct nvmet_req *req) if (req->port->inline_data_size) id->sgls |= cpu_to_le32(1 << 20); + id->oaes = cpu_to_le32(NVMET_DISC_AEN_CFG_OPTIONAL); + strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn)); status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); @@ -183,6 +245,51 @@ out: nvmet_req_complete(req, status); } +static void nvmet_execute_disc_set_features(struct nvmet_req *req) +{ + u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); + u16 stat; + + switch (cdw10 & 0xff) { + case NVME_FEAT_KATO: + stat = nvmet_set_feat_kato(req); + break; + case NVME_FEAT_ASYNC_EVENT: + stat = nvmet_set_feat_async_event(req, + NVMET_DISC_AEN_CFG_OPTIONAL); + break; + default: + req->error_loc = + offsetof(struct nvme_common_command, cdw10); + stat = NVME_SC_INVALID_FIELD | NVME_SC_DNR; + break; + } + + nvmet_req_complete(req, stat); +} + +static void nvmet_execute_disc_get_features(struct nvmet_req *req) +{ + u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); + u16 stat = 0; + + switch (cdw10 & 0xff) { + case NVME_FEAT_KATO: + nvmet_get_feat_kato(req); + break; + case NVME_FEAT_ASYNC_EVENT: + nvmet_get_feat_async_event(req); + break; + default: + req->error_loc = + offsetof(struct nvme_common_command, cdw10); + stat = NVME_SC_INVALID_FIELD | NVME_SC_DNR; + break; + } + + nvmet_req_complete(req, stat); +} + u16 nvmet_parse_discovery_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; @@ -190,10 +297,28 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req) if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { pr_err("got cmd %d while not ready\n", cmd->common.opcode); + req->error_loc = + offsetof(struct nvme_common_command, opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } switch (cmd->common.opcode) { + case nvme_admin_set_features: + req->execute = nvmet_execute_disc_set_features; + req->data_len = 0; + return 0; + case nvme_admin_get_features: + req->execute = nvmet_execute_disc_get_features; + req->data_len = 0; + return 0; + case nvme_admin_async_event: + req->execute = nvmet_execute_async_event; + req->data_len = 0; + return 0; + case nvme_admin_keep_alive: + req->execute = nvmet_execute_keep_alive; + req->data_len = 0; + return 0; case nvme_admin_get_log_page: req->data_len = nvmet_get_log_page_len(cmd); @@ -204,6 +329,8 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req) default: pr_err("unsupported get_log_page lid %d\n", cmd->get_log_page.lid); + req->error_loc = + offsetof(struct nvme_get_log_page_command, lid); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } case nvme_admin_identify: @@ -216,10 +343,12 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req) default: pr_err("unsupported identify cns %d\n", cmd->identify.cns); + req->error_loc = offsetof(struct nvme_identify, cns); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } default: pr_err("unhandled cmd %d\n", cmd->common.opcode); + req->error_loc = offsetof(struct nvme_common_command, opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index d84ae004cb85..6cf1fd9eb32e 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -17,23 +17,26 @@ static void nvmet_execute_prop_set(struct nvmet_req *req) { + u64 val = le64_to_cpu(req->cmd->prop_set.value); u16 status = 0; - if (!(req->cmd->prop_set.attrib & 1)) { - u64 val = le64_to_cpu(req->cmd->prop_set.value); - - switch (le32_to_cpu(req->cmd->prop_set.offset)) { - case NVME_REG_CC: - nvmet_update_cc(req->sq->ctrl, val); - break; - default: - status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; - break; - } - } else { + if (req->cmd->prop_set.attrib & 1) { + req->error_loc = + offsetof(struct nvmf_property_set_command, attrib); status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; + goto out; } + switch (le32_to_cpu(req->cmd->prop_set.offset)) { + case NVME_REG_CC: + nvmet_update_cc(req->sq->ctrl, val); + break; + default: + req->error_loc = + offsetof(struct nvmf_property_set_command, offset); + status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; + } +out: nvmet_req_complete(req, status); } @@ -69,6 +72,14 @@ static void nvmet_execute_prop_get(struct nvmet_req *req) } } + if (status && req->cmd->prop_get.attrib & 1) { + req->error_loc = + offsetof(struct nvmf_property_get_command, offset); + } else { + req->error_loc = + offsetof(struct nvmf_property_get_command, attrib); + } + req->rsp->result.u64 = cpu_to_le64(val); nvmet_req_complete(req, status); } @@ -89,6 +100,7 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req) default: pr_err("received unknown capsule type 0x%x\n", cmd->fabrics.fctype); + req->error_loc = offsetof(struct nvmf_common_command, fctype); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } @@ -105,16 +117,34 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) old = cmpxchg(&req->sq->ctrl, NULL, ctrl); if (old) { pr_warn("queue already connected!\n"); + req->error_loc = offsetof(struct nvmf_connect_command, opcode); return NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR; } if (!sqsize) { pr_warn("queue size zero!\n"); + req->error_loc = offsetof(struct nvmf_connect_command, sqsize); return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; } /* note: convert queue size from 0's-based value to 1's-based value */ nvmet_cq_setup(ctrl, req->cq, qid, sqsize + 1); nvmet_sq_setup(ctrl, req->sq, qid, sqsize + 1); + + if (c->cattr & NVME_CONNECT_DISABLE_SQFLOW) { + req->sq->sqhd_disabled = true; + req->rsp->sq_head = cpu_to_le16(0xffff); + } + + if (ctrl->ops->install_queue) { + u16 ret = ctrl->ops->install_queue(req->sq); + + if (ret) { + pr_err("failed to install queue %d cntlid %d ret %x\n", + qid, ret, ctrl->cntlid); + return ret; + } + } + return 0; } @@ -141,6 +171,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) if (c->recfmt != 0) { pr_warn("invalid connect version (%d).\n", le16_to_cpu(c->recfmt)); + req->error_loc = offsetof(struct nvmf_connect_command, recfmt); status = NVME_SC_CONNECT_FORMAT | NVME_SC_DNR; goto out; } @@ -155,8 +186,13 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) status = nvmet_alloc_ctrl(d->subsysnqn, d->hostnqn, req, le32_to_cpu(c->kato), &ctrl); - if (status) + if (status) { + if (status == (NVME_SC_INVALID_FIELD | NVME_SC_DNR)) + req->error_loc = + offsetof(struct nvme_common_command, opcode); goto out; + } + uuid_copy(&ctrl->hostid, &d->hostid); status = nvmet_install_queue(ctrl, req); @@ -243,11 +279,13 @@ u16 nvmet_parse_connect_cmd(struct nvmet_req *req) if (cmd->common.opcode != nvme_fabrics_command) { pr_err("invalid command 0x%x on unconnected queue.\n", cmd->fabrics.opcode); + req->error_loc = offsetof(struct nvme_common_command, opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } if (cmd->fabrics.fctype != nvme_fabrics_type_connect) { pr_err("invalid capsule type 0x%x on unconnected queue.\n", cmd->fabrics.fctype); + req->error_loc = offsetof(struct nvmf_common_command, fctype); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 409081a03b24..f98f5c5bea26 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -86,8 +86,6 @@ struct nvmet_fc_fcp_iod { spinlock_t flock; struct nvmet_req req; - struct work_struct work; - struct work_struct done_work; struct work_struct defer_work; struct nvmet_fc_tgtport *tgtport; @@ -134,7 +132,6 @@ struct nvmet_fc_tgt_queue { u16 sqsize; u16 ersp_ratio; __le16 sqhd; - int cpu; atomic_t connected; atomic_t sqtail; atomic_t zrspcnt; @@ -232,8 +229,6 @@ static LIST_HEAD(nvmet_fc_portentry_list); static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work); -static void nvmet_fc_handle_fcp_rqst_work(struct work_struct *work); -static void nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work); static void nvmet_fc_fcp_rqst_op_defer_work(struct work_struct *work); static void nvmet_fc_tgt_a_put(struct nvmet_fc_tgt_assoc *assoc); static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc); @@ -438,8 +433,6 @@ nvmet_fc_prep_fcp_iodlist(struct nvmet_fc_tgtport *tgtport, int i; for (i = 0; i < queue->sqsize; fod++, i++) { - INIT_WORK(&fod->work, nvmet_fc_handle_fcp_rqst_work); - INIT_WORK(&fod->done_work, nvmet_fc_fcp_rqst_op_done_work); INIT_WORK(&fod->defer_work, nvmet_fc_fcp_rqst_op_defer_work); fod->tgtport = tgtport; fod->queue = queue; @@ -517,10 +510,7 @@ nvmet_fc_queue_fcp_req(struct nvmet_fc_tgtport *tgtport, fcpreq->hwqid = queue->qid ? ((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0; - if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR) - queue_work_on(queue->cpu, queue->work_q, &fod->work); - else - nvmet_fc_handle_fcp_rqst(tgtport, fod); + nvmet_fc_handle_fcp_rqst(tgtport, fod); } static void @@ -599,30 +589,6 @@ nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, queue_work(queue->work_q, &fod->defer_work); } -static int -nvmet_fc_queue_to_cpu(struct nvmet_fc_tgtport *tgtport, int qid) -{ - int cpu, idx, cnt; - - if (tgtport->ops->max_hw_queues == 1) - return WORK_CPU_UNBOUND; - - /* Simple cpu selection based on qid modulo active cpu count */ - idx = !qid ? 0 : (qid - 1) % num_active_cpus(); - - /* find the n'th active cpu */ - for (cpu = 0, cnt = 0; ; ) { - if (cpu_active(cpu)) { - if (cnt == idx) - break; - cnt++; - } - cpu = (cpu + 1) % num_possible_cpus(); - } - - return cpu; -} - static struct nvmet_fc_tgt_queue * nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, u16 qid, u16 sqsize) @@ -653,7 +619,6 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, queue->qid = qid; queue->sqsize = sqsize; queue->assoc = assoc; - queue->cpu = nvmet_fc_queue_to_cpu(assoc->tgtport, qid); INIT_LIST_HEAD(&queue->fod_list); INIT_LIST_HEAD(&queue->avail_defer_list); INIT_LIST_HEAD(&queue->pending_cmd_list); @@ -2146,25 +2111,11 @@ nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod) } static void -nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work) -{ - struct nvmet_fc_fcp_iod *fod = - container_of(work, struct nvmet_fc_fcp_iod, done_work); - - nvmet_fc_fod_op_done(fod); -} - -static void nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) { struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private; - struct nvmet_fc_tgt_queue *queue = fod->queue; - if (fod->tgtport->ops->target_features & NVMET_FCTGTFEAT_OPDONE_IN_ISR) - /* context switch so completion is not in ISR context */ - queue_work_on(queue->cpu, queue->work_q, &fod->done_work); - else - nvmet_fc_fod_op_done(fod); + nvmet_fc_fod_op_done(fod); } /* @@ -2332,19 +2283,6 @@ transport_error: nvmet_fc_abort_op(tgtport, fod); } -/* - * Actual processing routine for received FC-NVME LS Requests from the LLD - */ -static void -nvmet_fc_handle_fcp_rqst_work(struct work_struct *work) -{ - struct nvmet_fc_fcp_iod *fod = - container_of(work, struct nvmet_fc_fcp_iod, work); - struct nvmet_fc_tgtport *tgtport = fod->tgtport; - - nvmet_fc_handle_fcp_rqst(tgtport, fod); -} - /** * nvmet_fc_rcv_fcp_req - transport entry point called by an LLDD * upon the reception of a NVME FCP CMD IU. diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index c1ec3475a140..b6d030d3259f 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -44,13 +44,69 @@ void nvmet_bdev_ns_disable(struct nvmet_ns *ns) } } +static u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts) +{ + u16 status = NVME_SC_SUCCESS; + + if (likely(blk_sts == BLK_STS_OK)) + return status; + /* + * Right now there exists M : 1 mapping between block layer error + * to the NVMe status code (see nvme_error_status()). For consistency, + * when we reverse map we use most appropriate NVMe Status code from + * the group of the NVMe staus codes used in the nvme_error_status(). + */ + switch (blk_sts) { + case BLK_STS_NOSPC: + status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR; + req->error_loc = offsetof(struct nvme_rw_command, length); + break; + case BLK_STS_TARGET: + status = NVME_SC_LBA_RANGE | NVME_SC_DNR; + req->error_loc = offsetof(struct nvme_rw_command, slba); + break; + case BLK_STS_NOTSUPP: + req->error_loc = offsetof(struct nvme_common_command, opcode); + switch (req->cmd->common.opcode) { + case nvme_cmd_dsm: + case nvme_cmd_write_zeroes: + status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR; + break; + default: + status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR; + } + break; + case BLK_STS_MEDIUM: + status = NVME_SC_ACCESS_DENIED; + req->error_loc = offsetof(struct nvme_rw_command, nsid); + break; + case BLK_STS_IOERR: + /* fallthru */ + default: + status = NVME_SC_INTERNAL | NVME_SC_DNR; + req->error_loc = offsetof(struct nvme_common_command, opcode); + } + + switch (req->cmd->common.opcode) { + case nvme_cmd_read: + case nvme_cmd_write: + req->error_slba = le64_to_cpu(req->cmd->rw.slba); + break; + case nvme_cmd_write_zeroes: + req->error_slba = + le64_to_cpu(req->cmd->write_zeroes.slba); + break; + default: + req->error_slba = 0; + } + return status; +} + static void nvmet_bio_done(struct bio *bio) { struct nvmet_req *req = bio->bi_private; - nvmet_req_complete(req, - bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0); - + nvmet_req_complete(req, blk_to_nvme_status(req, bio->bi_status)); if (bio != &req->b.inline_bio) bio_put(bio); } @@ -61,7 +117,6 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) struct bio *bio; struct scatterlist *sg; sector_t sector; - blk_qc_t cookie; int op, op_flags = 0, i; if (!req->sg_cnt) { @@ -114,9 +169,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) sg_cnt--; } - cookie = submit_bio(bio); - - blk_poll(bdev_get_queue(req->ns->bdev), cookie); + submit_bio(bio); } static void nvmet_bdev_execute_flush(struct nvmet_req *req) @@ -139,18 +192,21 @@ u16 nvmet_bdev_flush(struct nvmet_req *req) return 0; } -static u16 nvmet_bdev_discard_range(struct nvmet_ns *ns, +static u16 nvmet_bdev_discard_range(struct nvmet_req *req, struct nvme_dsm_range *range, struct bio **bio) { + struct nvmet_ns *ns = req->ns; int ret; ret = __blkdev_issue_discard(ns->bdev, le64_to_cpu(range->slba) << (ns->blksize_shift - 9), le32_to_cpu(range->nlb) << (ns->blksize_shift - 9), GFP_KERNEL, 0, bio); - if (ret && ret != -EOPNOTSUPP) - return NVME_SC_INTERNAL | NVME_SC_DNR; - return 0; + + if (ret) + req->error_slba = le64_to_cpu(range->slba); + + return blk_to_nvme_status(req, errno_to_blk_status(ret)); } static void nvmet_bdev_execute_discard(struct nvmet_req *req) @@ -166,7 +222,7 @@ static void nvmet_bdev_execute_discard(struct nvmet_req *req) if (status) break; - status = nvmet_bdev_discard_range(req->ns, &range, &bio); + status = nvmet_bdev_discard_range(req, &range, &bio); if (status) break; } @@ -207,16 +263,16 @@ static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req) u16 status = NVME_SC_SUCCESS; sector_t sector; sector_t nr_sector; + int ret; sector = le64_to_cpu(write_zeroes->slba) << (req->ns->blksize_shift - 9); nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) << (req->ns->blksize_shift - 9)); - if (__blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector, - GFP_KERNEL, &bio, 0)) - status = NVME_SC_INTERNAL | NVME_SC_DNR; - + ret = __blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector, + GFP_KERNEL, &bio, 0); + status = blk_to_nvme_status(req, errno_to_blk_status(ret)); if (bio) { bio->bi_private = req; bio->bi_end_io = nvmet_bio_done; @@ -251,6 +307,7 @@ u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req) default: pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode, req->sq->qid); + req->error_loc = offsetof(struct nvme_common_command, opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } } diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index 01feebec29ea..517522305e5c 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -83,17 +83,16 @@ static void nvmet_file_init_bvec(struct bio_vec *bv, struct sg_page_iter *iter) } static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos, - unsigned long nr_segs, size_t count) + unsigned long nr_segs, size_t count, int ki_flags) { struct kiocb *iocb = &req->f.iocb; ssize_t (*call_iter)(struct kiocb *iocb, struct iov_iter *iter); struct iov_iter iter; - int ki_flags = 0, rw; - ssize_t ret; + int rw; if (req->cmd->rw.opcode == nvme_cmd_write) { if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) - ki_flags = IOCB_DSYNC; + ki_flags |= IOCB_DSYNC; call_iter = req->ns->file->f_op->write_iter; rw = WRITE; } else { @@ -107,17 +106,13 @@ static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos, iocb->ki_filp = req->ns->file; iocb->ki_flags = ki_flags | iocb_flags(req->ns->file); - ret = call_iter(iocb, &iter); - - if (ret != -EIOCBQUEUED && iocb->ki_complete) - iocb->ki_complete(iocb, ret, 0); - - return ret; + return call_iter(iocb, &iter); } static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2) { struct nvmet_req *req = container_of(iocb, struct nvmet_req, f.iocb); + u16 status = NVME_SC_SUCCESS; if (req->f.bvec != req->inline_bvec) { if (likely(req->f.mpool_alloc == false)) @@ -126,11 +121,12 @@ static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2) mempool_free(req->f.bvec, req->ns->bvec_pool); } - nvmet_req_complete(req, ret != req->data_len ? - NVME_SC_INTERNAL | NVME_SC_DNR : 0); + if (unlikely(ret != req->data_len)) + status = errno_to_nvme_status(req, ret); + nvmet_req_complete(req, status); } -static void nvmet_file_execute_rw(struct nvmet_req *req) +static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags) { ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE); struct sg_page_iter sg_pg_iter; @@ -140,30 +136,14 @@ static void nvmet_file_execute_rw(struct nvmet_req *req) ssize_t ret = 0; loff_t pos; - if (!req->sg_cnt || !nr_bvec) { - nvmet_req_complete(req, 0); - return; - } + + if (req->f.mpool_alloc && nr_bvec > NVMET_MAX_MPOOL_BVEC) + is_sync = true; pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift; if (unlikely(pos + req->data_len > req->ns->size)) { - nvmet_req_complete(req, NVME_SC_LBA_RANGE | NVME_SC_DNR); - return; - } - - if (nr_bvec > NVMET_MAX_INLINE_BIOVEC) - req->f.bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec), - GFP_KERNEL); - else - req->f.bvec = req->inline_bvec; - - req->f.mpool_alloc = false; - if (unlikely(!req->f.bvec)) { - /* fallback under memory pressure */ - req->f.bvec = mempool_alloc(req->ns->bvec_pool, GFP_KERNEL); - req->f.mpool_alloc = true; - if (nr_bvec > NVMET_MAX_MPOOL_BVEC) - is_sync = true; + nvmet_req_complete(req, errno_to_nvme_status(req, -ENOSPC)); + return true; } memset(&req->f.iocb, 0, sizeof(struct kiocb)); @@ -177,9 +157,10 @@ static void nvmet_file_execute_rw(struct nvmet_req *req) if (unlikely(is_sync) && (nr_bvec - 1 == 0 || bv_cnt == NVMET_MAX_MPOOL_BVEC)) { - ret = nvmet_file_submit_bvec(req, pos, bv_cnt, len); + ret = nvmet_file_submit_bvec(req, pos, bv_cnt, len, 0); if (ret < 0) - goto out; + goto complete; + pos += len; bv_cnt = 0; len = 0; @@ -187,35 +168,95 @@ static void nvmet_file_execute_rw(struct nvmet_req *req) nr_bvec--; } - if (WARN_ON_ONCE(total_len != req->data_len)) + if (WARN_ON_ONCE(total_len != req->data_len)) { ret = -EIO; -out: - if (unlikely(is_sync || ret)) { - nvmet_file_io_done(&req->f.iocb, ret < 0 ? ret : total_len, 0); - return; + goto complete; + } + + if (unlikely(is_sync)) { + ret = total_len; + goto complete; } - req->f.iocb.ki_complete = nvmet_file_io_done; - nvmet_file_submit_bvec(req, pos, bv_cnt, total_len); + + /* + * A NULL ki_complete ask for synchronous execution, which we want + * for the IOCB_NOWAIT case. + */ + if (!(ki_flags & IOCB_NOWAIT)) + req->f.iocb.ki_complete = nvmet_file_io_done; + + ret = nvmet_file_submit_bvec(req, pos, bv_cnt, total_len, ki_flags); + + switch (ret) { + case -EIOCBQUEUED: + return true; + case -EAGAIN: + if (WARN_ON_ONCE(!(ki_flags & IOCB_NOWAIT))) + goto complete; + return false; + case -EOPNOTSUPP: + /* + * For file systems returning error -EOPNOTSUPP, handle + * IOCB_NOWAIT error case separately and retry without + * IOCB_NOWAIT. + */ + if ((ki_flags & IOCB_NOWAIT)) + return false; + break; + } + +complete: + nvmet_file_io_done(&req->f.iocb, ret, 0); + return true; } static void nvmet_file_buffered_io_work(struct work_struct *w) { struct nvmet_req *req = container_of(w, struct nvmet_req, f.work); - nvmet_file_execute_rw(req); + nvmet_file_execute_io(req, 0); } -static void nvmet_file_execute_rw_buffered_io(struct nvmet_req *req) +static void nvmet_file_submit_buffered_io(struct nvmet_req *req) { INIT_WORK(&req->f.work, nvmet_file_buffered_io_work); queue_work(buffered_io_wq, &req->f.work); } +static void nvmet_file_execute_rw(struct nvmet_req *req) +{ + ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE); + + if (!req->sg_cnt || !nr_bvec) { + nvmet_req_complete(req, 0); + return; + } + + if (nr_bvec > NVMET_MAX_INLINE_BIOVEC) + req->f.bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec), + GFP_KERNEL); + else + req->f.bvec = req->inline_bvec; + + if (unlikely(!req->f.bvec)) { + /* fallback under memory pressure */ + req->f.bvec = mempool_alloc(req->ns->bvec_pool, GFP_KERNEL); + req->f.mpool_alloc = true; + } else + req->f.mpool_alloc = false; + + if (req->ns->buffered_io) { + if (likely(!req->f.mpool_alloc) && + nvmet_file_execute_io(req, IOCB_NOWAIT)) + return; + nvmet_file_submit_buffered_io(req); + } else + nvmet_file_execute_io(req, 0); +} + u16 nvmet_file_flush(struct nvmet_req *req) { - if (vfs_fsync(req->ns->file, 1) < 0) - return NVME_SC_INTERNAL | NVME_SC_DNR; - return 0; + return errno_to_nvme_status(req, vfs_fsync(req->ns->file, 1)); } static void nvmet_file_flush_work(struct work_struct *w) @@ -236,30 +277,34 @@ static void nvmet_file_execute_discard(struct nvmet_req *req) int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE; struct nvme_dsm_range range; loff_t offset, len; - u16 ret; + u16 status = 0; + int ret; int i; for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) { - ret = nvmet_copy_from_sgl(req, i * sizeof(range), &range, + status = nvmet_copy_from_sgl(req, i * sizeof(range), &range, sizeof(range)); - if (ret) + if (status) break; offset = le64_to_cpu(range.slba) << req->ns->blksize_shift; len = le32_to_cpu(range.nlb); len <<= req->ns->blksize_shift; if (offset + len > req->ns->size) { - ret = NVME_SC_LBA_RANGE | NVME_SC_DNR; + req->error_slba = le64_to_cpu(range.slba); + status = errno_to_nvme_status(req, -ENOSPC); break; } - if (vfs_fallocate(req->ns->file, mode, offset, len)) { - ret = NVME_SC_INTERNAL | NVME_SC_DNR; + ret = vfs_fallocate(req->ns->file, mode, offset, len); + if (ret) { + req->error_slba = le64_to_cpu(range.slba); + status = errno_to_nvme_status(req, ret); break; } } - nvmet_req_complete(req, ret); + nvmet_req_complete(req, status); } static void nvmet_file_dsm_work(struct work_struct *w) @@ -299,12 +344,12 @@ static void nvmet_file_write_zeroes_work(struct work_struct *w) req->ns->blksize_shift); if (unlikely(offset + len > req->ns->size)) { - nvmet_req_complete(req, NVME_SC_LBA_RANGE | NVME_SC_DNR); + nvmet_req_complete(req, errno_to_nvme_status(req, -ENOSPC)); return; } ret = vfs_fallocate(req->ns->file, mode, offset, len); - nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0); + nvmet_req_complete(req, ret < 0 ? errno_to_nvme_status(req, ret) : 0); } static void nvmet_file_execute_write_zeroes(struct nvmet_req *req) @@ -320,10 +365,7 @@ u16 nvmet_file_parse_io_cmd(struct nvmet_req *req) switch (cmd->common.opcode) { case nvme_cmd_read: case nvme_cmd_write: - if (req->ns->buffered_io) - req->execute = nvmet_file_execute_rw_buffered_io; - else - req->execute = nvmet_file_execute_rw; + req->execute = nvmet_file_execute_rw; req->data_len = nvmet_rw_len(req); return 0; case nvme_cmd_flush: @@ -342,6 +384,7 @@ u16 nvmet_file_parse_io_cmd(struct nvmet_req *req) default: pr_err("unhandled cmd for file ns %d on qid %d\n", cmd->common.opcode, req->sq->qid); + req->error_loc = offsetof(struct nvme_common_command, opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } } diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 9908082b32c4..4aac1b4a8112 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -345,7 +345,7 @@ static int nvme_loop_connect_io_queues(struct nvme_loop_ctrl *ctrl) int i, ret; for (i = 1; i < ctrl->ctrl.queue_count; i++) { - ret = nvmf_connect_io_queue(&ctrl->ctrl, i); + ret = nvmf_connect_io_queue(&ctrl->ctrl, i, false); if (ret) return ret; set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[i].flags); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index c2b4d9ee6391..3e4719fdba85 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -30,12 +30,15 @@ #define NVMET_ASYNC_EVENTS 4 #define NVMET_ERROR_LOG_SLOTS 128 +#define NVMET_NO_ERROR_LOC ((u16)-1) /* * Supported optional AENs: */ #define NVMET_AEN_CFG_OPTIONAL \ (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_ANA_CHANGE) +#define NVMET_DISC_AEN_CFG_OPTIONAL \ + (NVME_AEN_CFG_DISC_CHANGE) /* * Plus mandatory SMART AENs (we'll never send them, but allow enabling them): @@ -104,6 +107,7 @@ struct nvmet_sq { u16 qid; u16 size; u32 sqhd; + bool sqhd_disabled; struct completion free_done; struct completion confirm_done; }; @@ -137,6 +141,7 @@ struct nvmet_port { struct list_head subsystems; struct config_group referrals_group; struct list_head referrals; + struct list_head global_entry; struct config_group ana_groups_group; struct nvmet_ana_group ana_default_group; enum nvme_ana_state *ana_state; @@ -163,6 +168,8 @@ struct nvmet_ctrl { struct nvmet_cq **cqs; struct nvmet_sq **sqs; + bool cmd_seen; + struct mutex lock; u64 cap; u32 cc; @@ -194,8 +201,12 @@ struct nvmet_ctrl { char subsysnqn[NVMF_NQN_FIELD_LEN]; char hostnqn[NVMF_NQN_FIELD_LEN]; - struct device *p2p_client; - struct radix_tree_root p2p_ns_map; + struct device *p2p_client; + struct radix_tree_root p2p_ns_map; + + spinlock_t error_lock; + u64 err_counter; + struct nvme_error_slot slots[NVMET_ERROR_LOG_SLOTS]; }; struct nvmet_subsys { @@ -273,6 +284,7 @@ struct nvmet_fabrics_ops { void (*delete_ctrl)(struct nvmet_ctrl *ctrl); void (*disc_traddr)(struct nvmet_req *req, struct nvmet_port *port, char *traddr); + u16 (*install_queue)(struct nvmet_sq *nvme_sq); }; #define NVMET_MAX_INLINE_BIOVEC 8 @@ -308,17 +320,14 @@ struct nvmet_req { void (*execute)(struct nvmet_req *req); const struct nvmet_fabrics_ops *ops; - struct pci_dev *p2p_dev; - struct device *p2p_client; + struct pci_dev *p2p_dev; + struct device *p2p_client; + u16 error_loc; + u64 error_slba; }; extern struct workqueue_struct *buffered_io_wq; -static inline void nvmet_set_status(struct nvmet_req *req, u16 status) -{ - req->rsp->status = cpu_to_le16(status << 1); -} - static inline void nvmet_set_result(struct nvmet_req *req, u32 result) { req->rsp->result.u32 = cpu_to_le32(result); @@ -340,6 +349,27 @@ struct nvmet_async_event { u8 log_page; }; +static inline void nvmet_clear_aen_bit(struct nvmet_req *req, u32 bn) +{ + int rae = le32_to_cpu(req->cmd->common.cdw10) & 1 << 15; + + if (!rae) + clear_bit(bn, &req->sq->ctrl->aen_masked); +} + +static inline bool nvmet_aen_bit_disabled(struct nvmet_ctrl *ctrl, u32 bn) +{ + if (!(READ_ONCE(ctrl->aen_enabled) & (1 << bn))) + return true; + return test_and_set_bit(bn, &ctrl->aen_masked); +} + +void nvmet_get_feat_kato(struct nvmet_req *req); +void nvmet_get_feat_async_event(struct nvmet_req *req); +u16 nvmet_set_feat_kato(struct nvmet_req *req); +u16 nvmet_set_feat_async_event(struct nvmet_req *req, u32 mask); +void nvmet_execute_async_event(struct nvmet_req *req); + u16 nvmet_parse_connect_cmd(struct nvmet_req *req); u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req); u16 nvmet_file_parse_io_cmd(struct nvmet_req *req); @@ -355,6 +385,8 @@ void nvmet_req_complete(struct nvmet_req *req, u16 status); int nvmet_req_alloc_sgl(struct nvmet_req *req); void nvmet_req_free_sgl(struct nvmet_req *req); +void nvmet_execute_keep_alive(struct nvmet_req *req); + void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, u16 qid, u16 size); void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, u16 qid, @@ -395,7 +427,7 @@ int nvmet_enable_port(struct nvmet_port *port); void nvmet_disable_port(struct nvmet_port *port); void nvmet_referral_enable(struct nvmet_port *parent, struct nvmet_port *port); -void nvmet_referral_disable(struct nvmet_port *port); +void nvmet_referral_disable(struct nvmet_port *parent, struct nvmet_port *port); u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf, size_t len); @@ -405,6 +437,14 @@ u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len); u32 nvmet_get_log_page_len(struct nvme_command *cmd); +extern struct list_head *nvmet_ports; +void nvmet_port_disc_changed(struct nvmet_port *port, + struct nvmet_subsys *subsys); +void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys, + struct nvmet_host *host); +void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, + u8 event_info, u8 log_page); + #define NVMET_QUEUE_SIZE 1024 #define NVMET_NR_QUEUES 128 #define NVMET_MAX_CMD NVMET_QUEUE_SIZE @@ -425,7 +465,7 @@ u32 nvmet_get_log_page_len(struct nvme_command *cmd); #define NVMET_DEFAULT_ANA_GRPID 1 #define NVMET_KAS 10 -#define NVMET_DISC_KATO 120 +#define NVMET_DISC_KATO_MS 120000 int __init nvmet_init_configfs(void); void __exit nvmet_exit_configfs(void); @@ -434,15 +474,13 @@ int __init nvmet_init_discovery(void); void nvmet_exit_discovery(void); extern struct nvmet_subsys *nvmet_disc_subsys; -extern u64 nvmet_genctr; extern struct rw_semaphore nvmet_config_sem; extern u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1]; extern u64 nvmet_ana_chgcnt; extern struct rw_semaphore nvmet_ana_sem; -bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys, - const char *hostnqn); +bool nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn); int nvmet_bdev_ns_enable(struct nvmet_ns *ns); int nvmet_file_ns_enable(struct nvmet_ns *ns); @@ -457,4 +495,6 @@ static inline u32 nvmet_rw_len(struct nvmet_req *req) return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) << req->ns->blksize_shift; } + +u16 errno_to_nvme_status(struct nvmet_req *req, int errno); #endif /* _NVMET_H */ diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 583086dd9cb9..a8d23eb80192 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -196,7 +196,7 @@ nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp) { unsigned long flags; - if (rsp->allocated) { + if (unlikely(rsp->allocated)) { kfree(rsp); return; } @@ -630,8 +630,11 @@ static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp) u64 off = le64_to_cpu(sgl->addr); u32 len = le32_to_cpu(sgl->length); - if (!nvme_is_write(rsp->req.cmd)) + if (!nvme_is_write(rsp->req.cmd)) { + rsp->req.error_loc = + offsetof(struct nvme_common_command, opcode); return NVME_SC_INVALID_FIELD | NVME_SC_DNR; + } if (off + len > rsp->queue->dev->inline_data_size) { pr_err("invalid inline data offset!\n"); @@ -696,6 +699,8 @@ static u16 nvmet_rdma_map_sgl(struct nvmet_rdma_rsp *rsp) return nvmet_rdma_map_sgl_inline(rsp); default: pr_err("invalid SGL subtype: %#x\n", sgl->type); + rsp->req.error_loc = + offsetof(struct nvme_common_command, dptr); return NVME_SC_INVALID_FIELD | NVME_SC_DNR; } case NVME_KEY_SGL_FMT_DATA_DESC: @@ -706,10 +711,13 @@ static u16 nvmet_rdma_map_sgl(struct nvmet_rdma_rsp *rsp) return nvmet_rdma_map_sgl_keyed(rsp, sgl, false); default: pr_err("invalid SGL subtype: %#x\n", sgl->type); + rsp->req.error_loc = + offsetof(struct nvme_common_command, dptr); return NVME_SC_INVALID_FIELD | NVME_SC_DNR; } default: pr_err("invalid SGL type: %#x\n", sgl->type); + rsp->req.error_loc = offsetof(struct nvme_common_command, dptr); return NVME_SC_SGL_INVALID_TYPE | NVME_SC_DNR; } } diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c new file mode 100644 index 000000000000..44b37b202e39 --- /dev/null +++ b/drivers/nvme/target/tcp.c @@ -0,0 +1,1737 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NVMe over Fabrics TCP target. + * Copyright (c) 2018 Lightbits Labs. All rights reserved. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/nvme-tcp.h> +#include <net/sock.h> +#include <net/tcp.h> +#include <linux/inet.h> +#include <linux/llist.h> +#include <crypto/hash.h> + +#include "nvmet.h" + +#define NVMET_TCP_DEF_INLINE_DATA_SIZE (4 * PAGE_SIZE) + +#define NVMET_TCP_RECV_BUDGET 8 +#define NVMET_TCP_SEND_BUDGET 8 +#define NVMET_TCP_IO_WORK_BUDGET 64 + +enum nvmet_tcp_send_state { + NVMET_TCP_SEND_DATA_PDU, + NVMET_TCP_SEND_DATA, + NVMET_TCP_SEND_R2T, + NVMET_TCP_SEND_DDGST, + NVMET_TCP_SEND_RESPONSE +}; + +enum nvmet_tcp_recv_state { + NVMET_TCP_RECV_PDU, + NVMET_TCP_RECV_DATA, + NVMET_TCP_RECV_DDGST, + NVMET_TCP_RECV_ERR, +}; + +enum { + NVMET_TCP_F_INIT_FAILED = (1 << 0), +}; + +struct nvmet_tcp_cmd { + struct nvmet_tcp_queue *queue; + struct nvmet_req req; + + struct nvme_tcp_cmd_pdu *cmd_pdu; + struct nvme_tcp_rsp_pdu *rsp_pdu; + struct nvme_tcp_data_pdu *data_pdu; + struct nvme_tcp_r2t_pdu *r2t_pdu; + + u32 rbytes_done; + u32 wbytes_done; + + u32 pdu_len; + u32 pdu_recv; + int sg_idx; + int nr_mapped; + struct msghdr recv_msg; + struct kvec *iov; + u32 flags; + + struct list_head entry; + struct llist_node lentry; + + /* send state */ + u32 offset; + struct scatterlist *cur_sg; + enum nvmet_tcp_send_state state; + + __le32 exp_ddgst; + __le32 recv_ddgst; +}; + +enum nvmet_tcp_queue_state { + NVMET_TCP_Q_CONNECTING, + NVMET_TCP_Q_LIVE, + NVMET_TCP_Q_DISCONNECTING, +}; + +struct nvmet_tcp_queue { + struct socket *sock; + struct nvmet_tcp_port *port; + struct work_struct io_work; + int cpu; + struct nvmet_cq nvme_cq; + struct nvmet_sq nvme_sq; + + /* send state */ + struct nvmet_tcp_cmd *cmds; + unsigned int nr_cmds; + struct list_head free_list; + struct llist_head resp_list; + struct list_head resp_send_list; + int send_list_len; + struct nvmet_tcp_cmd *snd_cmd; + + /* recv state */ + int offset; + int left; + enum nvmet_tcp_recv_state rcv_state; + struct nvmet_tcp_cmd *cmd; + union nvme_tcp_pdu pdu; + + /* digest state */ + bool hdr_digest; + bool data_digest; + struct ahash_request *snd_hash; + struct ahash_request *rcv_hash; + + spinlock_t state_lock; + enum nvmet_tcp_queue_state state; + + struct sockaddr_storage sockaddr; + struct sockaddr_storage sockaddr_peer; + struct work_struct release_work; + + int idx; + struct list_head queue_list; + + struct nvmet_tcp_cmd connect; + + struct page_frag_cache pf_cache; + + void (*data_ready)(struct sock *); + void (*state_change)(struct sock *); + void (*write_space)(struct sock *); +}; + +struct nvmet_tcp_port { + struct socket *sock; + struct work_struct accept_work; + struct nvmet_port *nport; + struct sockaddr_storage addr; + int last_cpu; + void (*data_ready)(struct sock *); +}; + +static DEFINE_IDA(nvmet_tcp_queue_ida); +static LIST_HEAD(nvmet_tcp_queue_list); +static DEFINE_MUTEX(nvmet_tcp_queue_mutex); + +static struct workqueue_struct *nvmet_tcp_wq; +static struct nvmet_fabrics_ops nvmet_tcp_ops; +static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c); +static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd); + +static inline u16 nvmet_tcp_cmd_tag(struct nvmet_tcp_queue *queue, + struct nvmet_tcp_cmd *cmd) +{ + return cmd - queue->cmds; +} + +static inline bool nvmet_tcp_has_data_in(struct nvmet_tcp_cmd *cmd) +{ + return nvme_is_write(cmd->req.cmd) && + cmd->rbytes_done < cmd->req.transfer_len; +} + +static inline bool nvmet_tcp_need_data_in(struct nvmet_tcp_cmd *cmd) +{ + return nvmet_tcp_has_data_in(cmd) && !cmd->req.rsp->status; +} + +static inline bool nvmet_tcp_need_data_out(struct nvmet_tcp_cmd *cmd) +{ + return !nvme_is_write(cmd->req.cmd) && + cmd->req.transfer_len > 0 && + !cmd->req.rsp->status; +} + +static inline bool nvmet_tcp_has_inline_data(struct nvmet_tcp_cmd *cmd) +{ + return nvme_is_write(cmd->req.cmd) && cmd->pdu_len && + !cmd->rbytes_done; +} + +static inline struct nvmet_tcp_cmd * +nvmet_tcp_get_cmd(struct nvmet_tcp_queue *queue) +{ + struct nvmet_tcp_cmd *cmd; + + cmd = list_first_entry_or_null(&queue->free_list, + struct nvmet_tcp_cmd, entry); + if (!cmd) + return NULL; + list_del_init(&cmd->entry); + + cmd->rbytes_done = cmd->wbytes_done = 0; + cmd->pdu_len = 0; + cmd->pdu_recv = 0; + cmd->iov = NULL; + cmd->flags = 0; + return cmd; +} + +static inline void nvmet_tcp_put_cmd(struct nvmet_tcp_cmd *cmd) +{ + if (unlikely(cmd == &cmd->queue->connect)) + return; + + list_add_tail(&cmd->entry, &cmd->queue->free_list); +} + +static inline u8 nvmet_tcp_hdgst_len(struct nvmet_tcp_queue *queue) +{ + return queue->hdr_digest ? NVME_TCP_DIGEST_LENGTH : 0; +} + +static inline u8 nvmet_tcp_ddgst_len(struct nvmet_tcp_queue *queue) +{ + return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0; +} + +static inline void nvmet_tcp_hdgst(struct ahash_request *hash, + void *pdu, size_t len) +{ + struct scatterlist sg; + + sg_init_one(&sg, pdu, len); + ahash_request_set_crypt(hash, &sg, pdu + len, len); + crypto_ahash_digest(hash); +} + +static int nvmet_tcp_verify_hdgst(struct nvmet_tcp_queue *queue, + void *pdu, size_t len) +{ + struct nvme_tcp_hdr *hdr = pdu; + __le32 recv_digest; + __le32 exp_digest; + + if (unlikely(!(hdr->flags & NVME_TCP_F_HDGST))) { + pr_err("queue %d: header digest enabled but no header digest\n", + queue->idx); + return -EPROTO; + } + + recv_digest = *(__le32 *)(pdu + hdr->hlen); + nvmet_tcp_hdgst(queue->rcv_hash, pdu, len); + exp_digest = *(__le32 *)(pdu + hdr->hlen); + if (recv_digest != exp_digest) { + pr_err("queue %d: header digest error: recv %#x expected %#x\n", + queue->idx, le32_to_cpu(recv_digest), + le32_to_cpu(exp_digest)); + return -EPROTO; + } + + return 0; +} + +static int nvmet_tcp_check_ddgst(struct nvmet_tcp_queue *queue, void *pdu) +{ + struct nvme_tcp_hdr *hdr = pdu; + u8 digest_len = nvmet_tcp_hdgst_len(queue); + u32 len; + + len = le32_to_cpu(hdr->plen) - hdr->hlen - + (hdr->flags & NVME_TCP_F_HDGST ? digest_len : 0); + + if (unlikely(len && !(hdr->flags & NVME_TCP_F_DDGST))) { + pr_err("queue %d: data digest flag is cleared\n", queue->idx); + return -EPROTO; + } + + return 0; +} + +static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd) +{ + struct scatterlist *sg; + int i; + + sg = &cmd->req.sg[cmd->sg_idx]; + + for (i = 0; i < cmd->nr_mapped; i++) + kunmap(sg_page(&sg[i])); +} + +static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd) +{ + struct kvec *iov = cmd->iov; + struct scatterlist *sg; + u32 length, offset, sg_offset; + + length = cmd->pdu_len; + cmd->nr_mapped = DIV_ROUND_UP(length, PAGE_SIZE); + offset = cmd->rbytes_done; + cmd->sg_idx = DIV_ROUND_UP(offset, PAGE_SIZE); + sg_offset = offset % PAGE_SIZE; + sg = &cmd->req.sg[cmd->sg_idx]; + + while (length) { + u32 iov_len = min_t(u32, length, sg->length - sg_offset); + + iov->iov_base = kmap(sg_page(sg)) + sg->offset + sg_offset; + iov->iov_len = iov_len; + + length -= iov_len; + sg = sg_next(sg); + iov++; + } + + iov_iter_kvec(&cmd->recv_msg.msg_iter, READ, cmd->iov, + cmd->nr_mapped, cmd->pdu_len); +} + +static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue) +{ + queue->rcv_state = NVMET_TCP_RECV_ERR; + if (queue->nvme_sq.ctrl) + nvmet_ctrl_fatal_error(queue->nvme_sq.ctrl); + else + kernel_sock_shutdown(queue->sock, SHUT_RDWR); +} + +static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd) +{ + struct nvme_sgl_desc *sgl = &cmd->req.cmd->common.dptr.sgl; + u32 len = le32_to_cpu(sgl->length); + + if (!cmd->req.data_len) + return 0; + + if (sgl->type == ((NVME_SGL_FMT_DATA_DESC << 4) | + NVME_SGL_FMT_OFFSET)) { + if (!nvme_is_write(cmd->req.cmd)) + return NVME_SC_INVALID_FIELD | NVME_SC_DNR; + + if (len > cmd->req.port->inline_data_size) + return NVME_SC_SGL_INVALID_OFFSET | NVME_SC_DNR; + cmd->pdu_len = len; + } + cmd->req.transfer_len += len; + + cmd->req.sg = sgl_alloc(len, GFP_KERNEL, &cmd->req.sg_cnt); + if (!cmd->req.sg) + return NVME_SC_INTERNAL; + cmd->cur_sg = cmd->req.sg; + + if (nvmet_tcp_has_data_in(cmd)) { + cmd->iov = kmalloc_array(cmd->req.sg_cnt, + sizeof(*cmd->iov), GFP_KERNEL); + if (!cmd->iov) + goto err; + } + + return 0; +err: + sgl_free(cmd->req.sg); + return NVME_SC_INTERNAL; +} + +static void nvmet_tcp_ddgst(struct ahash_request *hash, + struct nvmet_tcp_cmd *cmd) +{ + ahash_request_set_crypt(hash, cmd->req.sg, + (void *)&cmd->exp_ddgst, cmd->req.transfer_len); + crypto_ahash_digest(hash); +} + +static void nvmet_setup_c2h_data_pdu(struct nvmet_tcp_cmd *cmd) +{ + struct nvme_tcp_data_pdu *pdu = cmd->data_pdu; + struct nvmet_tcp_queue *queue = cmd->queue; + u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); + u8 ddgst = nvmet_tcp_ddgst_len(cmd->queue); + + cmd->offset = 0; + cmd->state = NVMET_TCP_SEND_DATA_PDU; + + pdu->hdr.type = nvme_tcp_c2h_data; + pdu->hdr.flags = NVME_TCP_F_DATA_LAST; + pdu->hdr.hlen = sizeof(*pdu); + pdu->hdr.pdo = pdu->hdr.hlen + hdgst; + pdu->hdr.plen = + cpu_to_le32(pdu->hdr.hlen + hdgst + + cmd->req.transfer_len + ddgst); + pdu->command_id = cmd->req.rsp->command_id; + pdu->data_length = cpu_to_le32(cmd->req.transfer_len); + pdu->data_offset = cpu_to_le32(cmd->wbytes_done); + + if (queue->data_digest) { + pdu->hdr.flags |= NVME_TCP_F_DDGST; + nvmet_tcp_ddgst(queue->snd_hash, cmd); + } + + if (cmd->queue->hdr_digest) { + pdu->hdr.flags |= NVME_TCP_F_HDGST; + nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); + } +} + +static void nvmet_setup_r2t_pdu(struct nvmet_tcp_cmd *cmd) +{ + struct nvme_tcp_r2t_pdu *pdu = cmd->r2t_pdu; + struct nvmet_tcp_queue *queue = cmd->queue; + u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); + + cmd->offset = 0; + cmd->state = NVMET_TCP_SEND_R2T; + + pdu->hdr.type = nvme_tcp_r2t; + pdu->hdr.flags = 0; + pdu->hdr.hlen = sizeof(*pdu); + pdu->hdr.pdo = 0; + pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst); + + pdu->command_id = cmd->req.cmd->common.command_id; + pdu->ttag = nvmet_tcp_cmd_tag(cmd->queue, cmd); + pdu->r2t_length = cpu_to_le32(cmd->req.transfer_len - cmd->rbytes_done); + pdu->r2t_offset = cpu_to_le32(cmd->rbytes_done); + if (cmd->queue->hdr_digest) { + pdu->hdr.flags |= NVME_TCP_F_HDGST; + nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); + } +} + +static void nvmet_setup_response_pdu(struct nvmet_tcp_cmd *cmd) +{ + struct nvme_tcp_rsp_pdu *pdu = cmd->rsp_pdu; + struct nvmet_tcp_queue *queue = cmd->queue; + u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); + + cmd->offset = 0; + cmd->state = NVMET_TCP_SEND_RESPONSE; + + pdu->hdr.type = nvme_tcp_rsp; + pdu->hdr.flags = 0; + pdu->hdr.hlen = sizeof(*pdu); + pdu->hdr.pdo = 0; + pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst); + if (cmd->queue->hdr_digest) { + pdu->hdr.flags |= NVME_TCP_F_HDGST; + nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); + } +} + +static void nvmet_tcp_process_resp_list(struct nvmet_tcp_queue *queue) +{ + struct llist_node *node; + + node = llist_del_all(&queue->resp_list); + if (!node) + return; + + while (node) { + struct nvmet_tcp_cmd *cmd = llist_entry(node, + struct nvmet_tcp_cmd, lentry); + + list_add(&cmd->entry, &queue->resp_send_list); + node = node->next; + queue->send_list_len++; + } +} + +static struct nvmet_tcp_cmd *nvmet_tcp_fetch_cmd(struct nvmet_tcp_queue *queue) +{ + queue->snd_cmd = list_first_entry_or_null(&queue->resp_send_list, + struct nvmet_tcp_cmd, entry); + if (!queue->snd_cmd) { + nvmet_tcp_process_resp_list(queue); + queue->snd_cmd = + list_first_entry_or_null(&queue->resp_send_list, + struct nvmet_tcp_cmd, entry); + if (unlikely(!queue->snd_cmd)) + return NULL; + } + + list_del_init(&queue->snd_cmd->entry); + queue->send_list_len--; + + if (nvmet_tcp_need_data_out(queue->snd_cmd)) + nvmet_setup_c2h_data_pdu(queue->snd_cmd); + else if (nvmet_tcp_need_data_in(queue->snd_cmd)) + nvmet_setup_r2t_pdu(queue->snd_cmd); + else + nvmet_setup_response_pdu(queue->snd_cmd); + + return queue->snd_cmd; +} + +static void nvmet_tcp_queue_response(struct nvmet_req *req) +{ + struct nvmet_tcp_cmd *cmd = + container_of(req, struct nvmet_tcp_cmd, req); + struct nvmet_tcp_queue *queue = cmd->queue; + + llist_add(&cmd->lentry, &queue->resp_list); + queue_work_on(cmd->queue->cpu, nvmet_tcp_wq, &cmd->queue->io_work); +} + +static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd) +{ + u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); + int left = sizeof(*cmd->data_pdu) - cmd->offset + hdgst; + int ret; + + ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->data_pdu), + offset_in_page(cmd->data_pdu) + cmd->offset, + left, MSG_DONTWAIT | MSG_MORE); + if (ret <= 0) + return ret; + + cmd->offset += ret; + left -= ret; + + if (left) + return -EAGAIN; + + cmd->state = NVMET_TCP_SEND_DATA; + cmd->offset = 0; + return 1; +} + +static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd) +{ + struct nvmet_tcp_queue *queue = cmd->queue; + int ret; + + while (cmd->cur_sg) { + struct page *page = sg_page(cmd->cur_sg); + u32 left = cmd->cur_sg->length - cmd->offset; + + ret = kernel_sendpage(cmd->queue->sock, page, cmd->offset, + left, MSG_DONTWAIT | MSG_MORE); + if (ret <= 0) + return ret; + + cmd->offset += ret; + cmd->wbytes_done += ret; + + /* Done with sg?*/ + if (cmd->offset == cmd->cur_sg->length) { + cmd->cur_sg = sg_next(cmd->cur_sg); + cmd->offset = 0; + } + } + + if (queue->data_digest) { + cmd->state = NVMET_TCP_SEND_DDGST; + cmd->offset = 0; + } else { + nvmet_setup_response_pdu(cmd); + } + return 1; + +} + +static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd, + bool last_in_batch) +{ + u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); + int left = sizeof(*cmd->rsp_pdu) - cmd->offset + hdgst; + int flags = MSG_DONTWAIT; + int ret; + + if (!last_in_batch && cmd->queue->send_list_len) + flags |= MSG_MORE; + else + flags |= MSG_EOR; + + ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->rsp_pdu), + offset_in_page(cmd->rsp_pdu) + cmd->offset, left, flags); + if (ret <= 0) + return ret; + cmd->offset += ret; + left -= ret; + + if (left) + return -EAGAIN; + + kfree(cmd->iov); + sgl_free(cmd->req.sg); + cmd->queue->snd_cmd = NULL; + nvmet_tcp_put_cmd(cmd); + return 1; +} + +static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch) +{ + u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); + int left = sizeof(*cmd->r2t_pdu) - cmd->offset + hdgst; + int flags = MSG_DONTWAIT; + int ret; + + if (!last_in_batch && cmd->queue->send_list_len) + flags |= MSG_MORE; + else + flags |= MSG_EOR; + + ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->r2t_pdu), + offset_in_page(cmd->r2t_pdu) + cmd->offset, left, flags); + if (ret <= 0) + return ret; + cmd->offset += ret; + left -= ret; + + if (left) + return -EAGAIN; + + cmd->queue->snd_cmd = NULL; + return 1; +} + +static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd) +{ + struct nvmet_tcp_queue *queue = cmd->queue; + struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; + struct kvec iov = { + .iov_base = &cmd->exp_ddgst + cmd->offset, + .iov_len = NVME_TCP_DIGEST_LENGTH - cmd->offset + }; + int ret; + + ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); + if (unlikely(ret <= 0)) + return ret; + + cmd->offset += ret; + nvmet_setup_response_pdu(cmd); + return 1; +} + +static int nvmet_tcp_try_send_one(struct nvmet_tcp_queue *queue, + bool last_in_batch) +{ + struct nvmet_tcp_cmd *cmd = queue->snd_cmd; + int ret = 0; + + if (!cmd || queue->state == NVMET_TCP_Q_DISCONNECTING) { + cmd = nvmet_tcp_fetch_cmd(queue); + if (unlikely(!cmd)) + return 0; + } + + if (cmd->state == NVMET_TCP_SEND_DATA_PDU) { + ret = nvmet_try_send_data_pdu(cmd); + if (ret <= 0) + goto done_send; + } + + if (cmd->state == NVMET_TCP_SEND_DATA) { + ret = nvmet_try_send_data(cmd); + if (ret <= 0) + goto done_send; + } + + if (cmd->state == NVMET_TCP_SEND_DDGST) { + ret = nvmet_try_send_ddgst(cmd); + if (ret <= 0) + goto done_send; + } + + if (cmd->state == NVMET_TCP_SEND_R2T) { + ret = nvmet_try_send_r2t(cmd, last_in_batch); + if (ret <= 0) + goto done_send; + } + + if (cmd->state == NVMET_TCP_SEND_RESPONSE) + ret = nvmet_try_send_response(cmd, last_in_batch); + +done_send: + if (ret < 0) { + if (ret == -EAGAIN) + return 0; + return ret; + } + + return 1; +} + +static int nvmet_tcp_try_send(struct nvmet_tcp_queue *queue, + int budget, int *sends) +{ + int i, ret = 0; + + for (i = 0; i < budget; i++) { + ret = nvmet_tcp_try_send_one(queue, i == budget - 1); + if (ret <= 0) + break; + (*sends)++; + } + + return ret; +} + +static void nvmet_prepare_receive_pdu(struct nvmet_tcp_queue *queue) +{ + queue->offset = 0; + queue->left = sizeof(struct nvme_tcp_hdr); + queue->cmd = NULL; + queue->rcv_state = NVMET_TCP_RECV_PDU; +} + +static void nvmet_tcp_free_crypto(struct nvmet_tcp_queue *queue) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(queue->rcv_hash); + + ahash_request_free(queue->rcv_hash); + ahash_request_free(queue->snd_hash); + crypto_free_ahash(tfm); +} + +static int nvmet_tcp_alloc_crypto(struct nvmet_tcp_queue *queue) +{ + struct crypto_ahash *tfm; + + tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + queue->snd_hash = ahash_request_alloc(tfm, GFP_KERNEL); + if (!queue->snd_hash) + goto free_tfm; + ahash_request_set_callback(queue->snd_hash, 0, NULL, NULL); + + queue->rcv_hash = ahash_request_alloc(tfm, GFP_KERNEL); + if (!queue->rcv_hash) + goto free_snd_hash; + ahash_request_set_callback(queue->rcv_hash, 0, NULL, NULL); + + return 0; +free_snd_hash: + ahash_request_free(queue->snd_hash); +free_tfm: + crypto_free_ahash(tfm); + return -ENOMEM; +} + + +static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) +{ + struct nvme_tcp_icreq_pdu *icreq = &queue->pdu.icreq; + struct nvme_tcp_icresp_pdu *icresp = &queue->pdu.icresp; + struct msghdr msg = {}; + struct kvec iov; + int ret; + + if (le32_to_cpu(icreq->hdr.plen) != sizeof(struct nvme_tcp_icreq_pdu)) { + pr_err("bad nvme-tcp pdu length (%d)\n", + le32_to_cpu(icreq->hdr.plen)); + nvmet_tcp_fatal_error(queue); + } + + if (icreq->pfv != NVME_TCP_PFV_1_0) { + pr_err("queue %d: bad pfv %d\n", queue->idx, icreq->pfv); + return -EPROTO; + } + + if (icreq->hpda != 0) { + pr_err("queue %d: unsupported hpda %d\n", queue->idx, + icreq->hpda); + return -EPROTO; + } + + if (icreq->maxr2t != 0) { + pr_err("queue %d: unsupported maxr2t %d\n", queue->idx, + le32_to_cpu(icreq->maxr2t) + 1); + return -EPROTO; + } + + queue->hdr_digest = !!(icreq->digest & NVME_TCP_HDR_DIGEST_ENABLE); + queue->data_digest = !!(icreq->digest & NVME_TCP_DATA_DIGEST_ENABLE); + if (queue->hdr_digest || queue->data_digest) { + ret = nvmet_tcp_alloc_crypto(queue); + if (ret) + return ret; + } + + memset(icresp, 0, sizeof(*icresp)); + icresp->hdr.type = nvme_tcp_icresp; + icresp->hdr.hlen = sizeof(*icresp); + icresp->hdr.pdo = 0; + icresp->hdr.plen = cpu_to_le32(icresp->hdr.hlen); + icresp->pfv = cpu_to_le16(NVME_TCP_PFV_1_0); + icresp->maxdata = cpu_to_le32(0xffff); /* FIXME: support r2t */ + icresp->cpda = 0; + if (queue->hdr_digest) + icresp->digest |= NVME_TCP_HDR_DIGEST_ENABLE; + if (queue->data_digest) + icresp->digest |= NVME_TCP_DATA_DIGEST_ENABLE; + + iov.iov_base = icresp; + iov.iov_len = sizeof(*icresp); + ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); + if (ret < 0) + goto free_crypto; + + queue->state = NVMET_TCP_Q_LIVE; + nvmet_prepare_receive_pdu(queue); + return 0; +free_crypto: + if (queue->hdr_digest || queue->data_digest) + nvmet_tcp_free_crypto(queue); + return ret; +} + +static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue, + struct nvmet_tcp_cmd *cmd, struct nvmet_req *req) +{ + int ret; + + /* recover the expected data transfer length */ + req->data_len = le32_to_cpu(req->cmd->common.dptr.sgl.length); + + if (!nvme_is_write(cmd->req.cmd) || + req->data_len > cmd->req.port->inline_data_size) { + nvmet_prepare_receive_pdu(queue); + return; + } + + ret = nvmet_tcp_map_data(cmd); + if (unlikely(ret)) { + pr_err("queue %d: failed to map data\n", queue->idx); + nvmet_tcp_fatal_error(queue); + return; + } + + queue->rcv_state = NVMET_TCP_RECV_DATA; + nvmet_tcp_map_pdu_iovec(cmd); + cmd->flags |= NVMET_TCP_F_INIT_FAILED; +} + +static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) +{ + struct nvme_tcp_data_pdu *data = &queue->pdu.data; + struct nvmet_tcp_cmd *cmd; + + cmd = &queue->cmds[data->ttag]; + + if (le32_to_cpu(data->data_offset) != cmd->rbytes_done) { + pr_err("ttag %u unexpected data offset %u (expected %u)\n", + data->ttag, le32_to_cpu(data->data_offset), + cmd->rbytes_done); + /* FIXME: use path and transport errors */ + nvmet_req_complete(&cmd->req, + NVME_SC_INVALID_FIELD | NVME_SC_DNR); + return -EPROTO; + } + + cmd->pdu_len = le32_to_cpu(data->data_length); + cmd->pdu_recv = 0; + nvmet_tcp_map_pdu_iovec(cmd); + queue->cmd = cmd; + queue->rcv_state = NVMET_TCP_RECV_DATA; + + return 0; +} + +static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) +{ + struct nvme_tcp_hdr *hdr = &queue->pdu.cmd.hdr; + struct nvme_command *nvme_cmd = &queue->pdu.cmd.cmd; + struct nvmet_req *req; + int ret; + + if (unlikely(queue->state == NVMET_TCP_Q_CONNECTING)) { + if (hdr->type != nvme_tcp_icreq) { + pr_err("unexpected pdu type (%d) before icreq\n", + hdr->type); + nvmet_tcp_fatal_error(queue); + return -EPROTO; + } + return nvmet_tcp_handle_icreq(queue); + } + + if (hdr->type == nvme_tcp_h2c_data) { + ret = nvmet_tcp_handle_h2c_data_pdu(queue); + if (unlikely(ret)) + return ret; + return 0; + } + + queue->cmd = nvmet_tcp_get_cmd(queue); + if (unlikely(!queue->cmd)) { + /* This should never happen */ + pr_err("queue %d: out of commands (%d) send_list_len: %d, opcode: %d", + queue->idx, queue->nr_cmds, queue->send_list_len, + nvme_cmd->common.opcode); + nvmet_tcp_fatal_error(queue); + return -ENOMEM; + } + + req = &queue->cmd->req; + memcpy(req->cmd, nvme_cmd, sizeof(*nvme_cmd)); + + if (unlikely(!nvmet_req_init(req, &queue->nvme_cq, + &queue->nvme_sq, &nvmet_tcp_ops))) { + pr_err("failed cmd %p id %d opcode %d, data_len: %d\n", + req->cmd, req->cmd->common.command_id, + req->cmd->common.opcode, + le32_to_cpu(req->cmd->common.dptr.sgl.length)); + + nvmet_tcp_handle_req_failure(queue, queue->cmd, req); + return -EAGAIN; + } + + ret = nvmet_tcp_map_data(queue->cmd); + if (unlikely(ret)) { + pr_err("queue %d: failed to map data\n", queue->idx); + if (nvmet_tcp_has_inline_data(queue->cmd)) + nvmet_tcp_fatal_error(queue); + else + nvmet_req_complete(req, ret); + ret = -EAGAIN; + goto out; + } + + if (nvmet_tcp_need_data_in(queue->cmd)) { + if (nvmet_tcp_has_inline_data(queue->cmd)) { + queue->rcv_state = NVMET_TCP_RECV_DATA; + nvmet_tcp_map_pdu_iovec(queue->cmd); + return 0; + } + /* send back R2T */ + nvmet_tcp_queue_response(&queue->cmd->req); + goto out; + } + + nvmet_req_execute(&queue->cmd->req); +out: + nvmet_prepare_receive_pdu(queue); + return ret; +} + +static const u8 nvme_tcp_pdu_sizes[] = { + [nvme_tcp_icreq] = sizeof(struct nvme_tcp_icreq_pdu), + [nvme_tcp_cmd] = sizeof(struct nvme_tcp_cmd_pdu), + [nvme_tcp_h2c_data] = sizeof(struct nvme_tcp_data_pdu), +}; + +static inline u8 nvmet_tcp_pdu_size(u8 type) +{ + size_t idx = type; + + return (idx < ARRAY_SIZE(nvme_tcp_pdu_sizes) && + nvme_tcp_pdu_sizes[idx]) ? + nvme_tcp_pdu_sizes[idx] : 0; +} + +static inline bool nvmet_tcp_pdu_valid(u8 type) +{ + switch (type) { + case nvme_tcp_icreq: + case nvme_tcp_cmd: + case nvme_tcp_h2c_data: + /* fallthru */ + return true; + } + + return false; +} + +static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue *queue) +{ + struct nvme_tcp_hdr *hdr = &queue->pdu.cmd.hdr; + int len; + struct kvec iov; + struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; + +recv: + iov.iov_base = (void *)&queue->pdu + queue->offset; + iov.iov_len = queue->left; + len = kernel_recvmsg(queue->sock, &msg, &iov, 1, + iov.iov_len, msg.msg_flags); + if (unlikely(len < 0)) + return len; + + queue->offset += len; + queue->left -= len; + if (queue->left) + return -EAGAIN; + + if (queue->offset == sizeof(struct nvme_tcp_hdr)) { + u8 hdgst = nvmet_tcp_hdgst_len(queue); + + if (unlikely(!nvmet_tcp_pdu_valid(hdr->type))) { + pr_err("unexpected pdu type %d\n", hdr->type); + nvmet_tcp_fatal_error(queue); + return -EIO; + } + + if (unlikely(hdr->hlen != nvmet_tcp_pdu_size(hdr->type))) { + pr_err("pdu %d bad hlen %d\n", hdr->type, hdr->hlen); + return -EIO; + } + + queue->left = hdr->hlen - queue->offset + hdgst; + goto recv; + } + + if (queue->hdr_digest && + nvmet_tcp_verify_hdgst(queue, &queue->pdu, queue->offset)) { + nvmet_tcp_fatal_error(queue); /* fatal */ + return -EPROTO; + } + + if (queue->data_digest && + nvmet_tcp_check_ddgst(queue, &queue->pdu)) { + nvmet_tcp_fatal_error(queue); /* fatal */ + return -EPROTO; + } + + return nvmet_tcp_done_recv_pdu(queue); +} + +static void nvmet_tcp_prep_recv_ddgst(struct nvmet_tcp_cmd *cmd) +{ + struct nvmet_tcp_queue *queue = cmd->queue; + + nvmet_tcp_ddgst(queue->rcv_hash, cmd); + queue->offset = 0; + queue->left = NVME_TCP_DIGEST_LENGTH; + queue->rcv_state = NVMET_TCP_RECV_DDGST; +} + +static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue) +{ + struct nvmet_tcp_cmd *cmd = queue->cmd; + int ret; + + while (msg_data_left(&cmd->recv_msg)) { + ret = sock_recvmsg(cmd->queue->sock, &cmd->recv_msg, + cmd->recv_msg.msg_flags); + if (ret <= 0) + return ret; + + cmd->pdu_recv += ret; + cmd->rbytes_done += ret; + } + + nvmet_tcp_unmap_pdu_iovec(cmd); + + if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) && + cmd->rbytes_done == cmd->req.transfer_len) { + if (queue->data_digest) { + nvmet_tcp_prep_recv_ddgst(cmd); + return 0; + } + nvmet_req_execute(&cmd->req); + } + + nvmet_prepare_receive_pdu(queue); + return 0; +} + +static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue) +{ + struct nvmet_tcp_cmd *cmd = queue->cmd; + int ret; + struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; + struct kvec iov = { + .iov_base = (void *)&cmd->recv_ddgst + queue->offset, + .iov_len = queue->left + }; + + ret = kernel_recvmsg(queue->sock, &msg, &iov, 1, + iov.iov_len, msg.msg_flags); + if (unlikely(ret < 0)) + return ret; + + queue->offset += ret; + queue->left -= ret; + if (queue->left) + return -EAGAIN; + + if (queue->data_digest && cmd->exp_ddgst != cmd->recv_ddgst) { + pr_err("queue %d: cmd %d pdu (%d) data digest error: recv %#x expected %#x\n", + queue->idx, cmd->req.cmd->common.command_id, + queue->pdu.cmd.hdr.type, le32_to_cpu(cmd->recv_ddgst), + le32_to_cpu(cmd->exp_ddgst)); + nvmet_tcp_finish_cmd(cmd); + nvmet_tcp_fatal_error(queue); + ret = -EPROTO; + goto out; + } + + if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) && + cmd->rbytes_done == cmd->req.transfer_len) + nvmet_req_execute(&cmd->req); + ret = 0; +out: + nvmet_prepare_receive_pdu(queue); + return ret; +} + +static int nvmet_tcp_try_recv_one(struct nvmet_tcp_queue *queue) +{ + int result; + + if (unlikely(queue->rcv_state == NVMET_TCP_RECV_ERR)) + return 0; + + if (queue->rcv_state == NVMET_TCP_RECV_PDU) { + result = nvmet_tcp_try_recv_pdu(queue); + if (result != 0) + goto done_recv; + } + + if (queue->rcv_state == NVMET_TCP_RECV_DATA) { + result = nvmet_tcp_try_recv_data(queue); + if (result != 0) + goto done_recv; + } + + if (queue->rcv_state == NVMET_TCP_RECV_DDGST) { + result = nvmet_tcp_try_recv_ddgst(queue); + if (result != 0) + goto done_recv; + } + +done_recv: + if (result < 0) { + if (result == -EAGAIN) + return 0; + return result; + } + return 1; +} + +static int nvmet_tcp_try_recv(struct nvmet_tcp_queue *queue, + int budget, int *recvs) +{ + int i, ret = 0; + + for (i = 0; i < budget; i++) { + ret = nvmet_tcp_try_recv_one(queue); + if (ret <= 0) + break; + (*recvs)++; + } + + return ret; +} + +static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue *queue) +{ + spin_lock(&queue->state_lock); + if (queue->state != NVMET_TCP_Q_DISCONNECTING) { + queue->state = NVMET_TCP_Q_DISCONNECTING; + schedule_work(&queue->release_work); + } + spin_unlock(&queue->state_lock); +} + +static void nvmet_tcp_io_work(struct work_struct *w) +{ + struct nvmet_tcp_queue *queue = + container_of(w, struct nvmet_tcp_queue, io_work); + bool pending; + int ret, ops = 0; + + do { + pending = false; + + ret = nvmet_tcp_try_recv(queue, NVMET_TCP_RECV_BUDGET, &ops); + if (ret > 0) { + pending = true; + } else if (ret < 0) { + if (ret == -EPIPE || ret == -ECONNRESET) + kernel_sock_shutdown(queue->sock, SHUT_RDWR); + else + nvmet_tcp_fatal_error(queue); + return; + } + + ret = nvmet_tcp_try_send(queue, NVMET_TCP_SEND_BUDGET, &ops); + if (ret > 0) { + /* transmitted message/data */ + pending = true; + } else if (ret < 0) { + if (ret == -EPIPE || ret == -ECONNRESET) + kernel_sock_shutdown(queue->sock, SHUT_RDWR); + else + nvmet_tcp_fatal_error(queue); + return; + } + + } while (pending && ops < NVMET_TCP_IO_WORK_BUDGET); + + /* + * We exahusted our budget, requeue our selves + */ + if (pending) + queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work); +} + +static int nvmet_tcp_alloc_cmd(struct nvmet_tcp_queue *queue, + struct nvmet_tcp_cmd *c) +{ + u8 hdgst = nvmet_tcp_hdgst_len(queue); + + c->queue = queue; + c->req.port = queue->port->nport; + + c->cmd_pdu = page_frag_alloc(&queue->pf_cache, + sizeof(*c->cmd_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO); + if (!c->cmd_pdu) + return -ENOMEM; + c->req.cmd = &c->cmd_pdu->cmd; + + c->rsp_pdu = page_frag_alloc(&queue->pf_cache, + sizeof(*c->rsp_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO); + if (!c->rsp_pdu) + goto out_free_cmd; + c->req.rsp = &c->rsp_pdu->cqe; + + c->data_pdu = page_frag_alloc(&queue->pf_cache, + sizeof(*c->data_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO); + if (!c->data_pdu) + goto out_free_rsp; + + c->r2t_pdu = page_frag_alloc(&queue->pf_cache, + sizeof(*c->r2t_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO); + if (!c->r2t_pdu) + goto out_free_data; + + c->recv_msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; + + list_add_tail(&c->entry, &queue->free_list); + + return 0; +out_free_data: + page_frag_free(c->data_pdu); +out_free_rsp: + page_frag_free(c->rsp_pdu); +out_free_cmd: + page_frag_free(c->cmd_pdu); + return -ENOMEM; +} + +static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c) +{ + page_frag_free(c->r2t_pdu); + page_frag_free(c->data_pdu); + page_frag_free(c->rsp_pdu); + page_frag_free(c->cmd_pdu); +} + +static int nvmet_tcp_alloc_cmds(struct nvmet_tcp_queue *queue) +{ + struct nvmet_tcp_cmd *cmds; + int i, ret = -EINVAL, nr_cmds = queue->nr_cmds; + + cmds = kcalloc(nr_cmds, sizeof(struct nvmet_tcp_cmd), GFP_KERNEL); + if (!cmds) + goto out; + + for (i = 0; i < nr_cmds; i++) { + ret = nvmet_tcp_alloc_cmd(queue, cmds + i); + if (ret) + goto out_free; + } + + queue->cmds = cmds; + + return 0; +out_free: + while (--i >= 0) + nvmet_tcp_free_cmd(cmds + i); + kfree(cmds); +out: + return ret; +} + +static void nvmet_tcp_free_cmds(struct nvmet_tcp_queue *queue) +{ + struct nvmet_tcp_cmd *cmds = queue->cmds; + int i; + + for (i = 0; i < queue->nr_cmds; i++) + nvmet_tcp_free_cmd(cmds + i); + + nvmet_tcp_free_cmd(&queue->connect); + kfree(cmds); +} + +static void nvmet_tcp_restore_socket_callbacks(struct nvmet_tcp_queue *queue) +{ + struct socket *sock = queue->sock; + + write_lock_bh(&sock->sk->sk_callback_lock); + sock->sk->sk_data_ready = queue->data_ready; + sock->sk->sk_state_change = queue->state_change; + sock->sk->sk_write_space = queue->write_space; + sock->sk->sk_user_data = NULL; + write_unlock_bh(&sock->sk->sk_callback_lock); +} + +static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd) +{ + nvmet_req_uninit(&cmd->req); + nvmet_tcp_unmap_pdu_iovec(cmd); + sgl_free(cmd->req.sg); +} + +static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) +{ + struct nvmet_tcp_cmd *cmd = queue->cmds; + int i; + + for (i = 0; i < queue->nr_cmds; i++, cmd++) { + if (nvmet_tcp_need_data_in(cmd)) + nvmet_tcp_finish_cmd(cmd); + } + + if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) { + /* failed in connect */ + nvmet_tcp_finish_cmd(&queue->connect); + } +} + +static void nvmet_tcp_release_queue_work(struct work_struct *w) +{ + struct nvmet_tcp_queue *queue = + container_of(w, struct nvmet_tcp_queue, release_work); + + mutex_lock(&nvmet_tcp_queue_mutex); + list_del_init(&queue->queue_list); + mutex_unlock(&nvmet_tcp_queue_mutex); + + nvmet_tcp_restore_socket_callbacks(queue); + flush_work(&queue->io_work); + + nvmet_tcp_uninit_data_in_cmds(queue); + nvmet_sq_destroy(&queue->nvme_sq); + cancel_work_sync(&queue->io_work); + sock_release(queue->sock); + nvmet_tcp_free_cmds(queue); + if (queue->hdr_digest || queue->data_digest) + nvmet_tcp_free_crypto(queue); + ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx); + + kfree(queue); +} + +static void nvmet_tcp_data_ready(struct sock *sk) +{ + struct nvmet_tcp_queue *queue; + + read_lock_bh(&sk->sk_callback_lock); + queue = sk->sk_user_data; + if (likely(queue)) + queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work); + read_unlock_bh(&sk->sk_callback_lock); +} + +static void nvmet_tcp_write_space(struct sock *sk) +{ + struct nvmet_tcp_queue *queue; + + read_lock_bh(&sk->sk_callback_lock); + queue = sk->sk_user_data; + if (unlikely(!queue)) + goto out; + + if (unlikely(queue->state == NVMET_TCP_Q_CONNECTING)) { + queue->write_space(sk); + goto out; + } + + if (sk_stream_is_writeable(sk)) { + clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work); + } +out: + read_unlock_bh(&sk->sk_callback_lock); +} + +static void nvmet_tcp_state_change(struct sock *sk) +{ + struct nvmet_tcp_queue *queue; + + write_lock_bh(&sk->sk_callback_lock); + queue = sk->sk_user_data; + if (!queue) + goto done; + + switch (sk->sk_state) { + case TCP_FIN_WAIT1: + case TCP_CLOSE_WAIT: + case TCP_CLOSE: + /* FALLTHRU */ + sk->sk_user_data = NULL; + nvmet_tcp_schedule_release_queue(queue); + break; + default: + pr_warn("queue %d unhandled state %d\n", + queue->idx, sk->sk_state); + } +done: + write_unlock_bh(&sk->sk_callback_lock); +} + +static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue) +{ + struct socket *sock = queue->sock; + struct linger sol = { .l_onoff = 1, .l_linger = 0 }; + int ret; + + ret = kernel_getsockname(sock, + (struct sockaddr *)&queue->sockaddr); + if (ret < 0) + return ret; + + ret = kernel_getpeername(sock, + (struct sockaddr *)&queue->sockaddr_peer); + if (ret < 0) + return ret; + + /* + * Cleanup whatever is sitting in the TCP transmit queue on socket + * close. This is done to prevent stale data from being sent should + * the network connection be restored before TCP times out. + */ + ret = kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, + (char *)&sol, sizeof(sol)); + if (ret) + return ret; + + write_lock_bh(&sock->sk->sk_callback_lock); + sock->sk->sk_user_data = queue; + queue->data_ready = sock->sk->sk_data_ready; + sock->sk->sk_data_ready = nvmet_tcp_data_ready; + queue->state_change = sock->sk->sk_state_change; + sock->sk->sk_state_change = nvmet_tcp_state_change; + queue->write_space = sock->sk->sk_write_space; + sock->sk->sk_write_space = nvmet_tcp_write_space; + write_unlock_bh(&sock->sk->sk_callback_lock); + + return 0; +} + +static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, + struct socket *newsock) +{ + struct nvmet_tcp_queue *queue; + int ret; + + queue = kzalloc(sizeof(*queue), GFP_KERNEL); + if (!queue) + return -ENOMEM; + + INIT_WORK(&queue->release_work, nvmet_tcp_release_queue_work); + INIT_WORK(&queue->io_work, nvmet_tcp_io_work); + queue->sock = newsock; + queue->port = port; + queue->nr_cmds = 0; + spin_lock_init(&queue->state_lock); + queue->state = NVMET_TCP_Q_CONNECTING; + INIT_LIST_HEAD(&queue->free_list); + init_llist_head(&queue->resp_list); + INIT_LIST_HEAD(&queue->resp_send_list); + + queue->idx = ida_simple_get(&nvmet_tcp_queue_ida, 0, 0, GFP_KERNEL); + if (queue->idx < 0) { + ret = queue->idx; + goto out_free_queue; + } + + ret = nvmet_tcp_alloc_cmd(queue, &queue->connect); + if (ret) + goto out_ida_remove; + + ret = nvmet_sq_init(&queue->nvme_sq); + if (ret) + goto out_free_connect; + + port->last_cpu = cpumask_next_wrap(port->last_cpu, + cpu_online_mask, -1, false); + queue->cpu = port->last_cpu; + nvmet_prepare_receive_pdu(queue); + + mutex_lock(&nvmet_tcp_queue_mutex); + list_add_tail(&queue->queue_list, &nvmet_tcp_queue_list); + mutex_unlock(&nvmet_tcp_queue_mutex); + + ret = nvmet_tcp_set_queue_sock(queue); + if (ret) + goto out_destroy_sq; + + queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work); + + return 0; +out_destroy_sq: + mutex_lock(&nvmet_tcp_queue_mutex); + list_del_init(&queue->queue_list); + mutex_unlock(&nvmet_tcp_queue_mutex); + nvmet_sq_destroy(&queue->nvme_sq); +out_free_connect: + nvmet_tcp_free_cmd(&queue->connect); +out_ida_remove: + ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx); +out_free_queue: + kfree(queue); + return ret; +} + +static void nvmet_tcp_accept_work(struct work_struct *w) +{ + struct nvmet_tcp_port *port = + container_of(w, struct nvmet_tcp_port, accept_work); + struct socket *newsock; + int ret; + + while (true) { + ret = kernel_accept(port->sock, &newsock, O_NONBLOCK); + if (ret < 0) { + if (ret != -EAGAIN) + pr_warn("failed to accept err=%d\n", ret); + return; + } + ret = nvmet_tcp_alloc_queue(port, newsock); + if (ret) { + pr_err("failed to allocate queue\n"); + sock_release(newsock); + } + } +} + +static void nvmet_tcp_listen_data_ready(struct sock *sk) +{ + struct nvmet_tcp_port *port; + + read_lock_bh(&sk->sk_callback_lock); + port = sk->sk_user_data; + if (!port) + goto out; + + if (sk->sk_state == TCP_LISTEN) + schedule_work(&port->accept_work); +out: + read_unlock_bh(&sk->sk_callback_lock); +} + +static int nvmet_tcp_add_port(struct nvmet_port *nport) +{ + struct nvmet_tcp_port *port; + __kernel_sa_family_t af; + int opt, ret; + + port = kzalloc(sizeof(*port), GFP_KERNEL); + if (!port) + return -ENOMEM; + + switch (nport->disc_addr.adrfam) { + case NVMF_ADDR_FAMILY_IP4: + af = AF_INET; + break; + case NVMF_ADDR_FAMILY_IP6: + af = AF_INET6; + break; + default: + pr_err("address family %d not supported\n", + nport->disc_addr.adrfam); + ret = -EINVAL; + goto err_port; + } + + ret = inet_pton_with_scope(&init_net, af, nport->disc_addr.traddr, + nport->disc_addr.trsvcid, &port->addr); + if (ret) { + pr_err("malformed ip/port passed: %s:%s\n", + nport->disc_addr.traddr, nport->disc_addr.trsvcid); + goto err_port; + } + + port->nport = nport; + port->last_cpu = -1; + INIT_WORK(&port->accept_work, nvmet_tcp_accept_work); + if (port->nport->inline_data_size < 0) + port->nport->inline_data_size = NVMET_TCP_DEF_INLINE_DATA_SIZE; + + ret = sock_create(port->addr.ss_family, SOCK_STREAM, + IPPROTO_TCP, &port->sock); + if (ret) { + pr_err("failed to create a socket\n"); + goto err_port; + } + + port->sock->sk->sk_user_data = port; + port->data_ready = port->sock->sk->sk_data_ready; + port->sock->sk->sk_data_ready = nvmet_tcp_listen_data_ready; + + opt = 1; + ret = kernel_setsockopt(port->sock, IPPROTO_TCP, + TCP_NODELAY, (char *)&opt, sizeof(opt)); + if (ret) { + pr_err("failed to set TCP_NODELAY sock opt %d\n", ret); + goto err_sock; + } + + ret = kernel_setsockopt(port->sock, SOL_SOCKET, SO_REUSEADDR, + (char *)&opt, sizeof(opt)); + if (ret) { + pr_err("failed to set SO_REUSEADDR sock opt %d\n", ret); + goto err_sock; + } + + ret = kernel_bind(port->sock, (struct sockaddr *)&port->addr, + sizeof(port->addr)); + if (ret) { + pr_err("failed to bind port socket %d\n", ret); + goto err_sock; + } + + ret = kernel_listen(port->sock, 128); + if (ret) { + pr_err("failed to listen %d on port sock\n", ret); + goto err_sock; + } + + nport->priv = port; + pr_info("enabling port %d (%pISpc)\n", + le16_to_cpu(nport->disc_addr.portid), &port->addr); + + return 0; + +err_sock: + sock_release(port->sock); +err_port: + kfree(port); + return ret; +} + +static void nvmet_tcp_remove_port(struct nvmet_port *nport) +{ + struct nvmet_tcp_port *port = nport->priv; + + write_lock_bh(&port->sock->sk->sk_callback_lock); + port->sock->sk->sk_data_ready = port->data_ready; + port->sock->sk->sk_user_data = NULL; + write_unlock_bh(&port->sock->sk->sk_callback_lock); + cancel_work_sync(&port->accept_work); + + sock_release(port->sock); + kfree(port); +} + +static void nvmet_tcp_delete_ctrl(struct nvmet_ctrl *ctrl) +{ + struct nvmet_tcp_queue *queue; + + mutex_lock(&nvmet_tcp_queue_mutex); + list_for_each_entry(queue, &nvmet_tcp_queue_list, queue_list) + if (queue->nvme_sq.ctrl == ctrl) + kernel_sock_shutdown(queue->sock, SHUT_RDWR); + mutex_unlock(&nvmet_tcp_queue_mutex); +} + +static u16 nvmet_tcp_install_queue(struct nvmet_sq *sq) +{ + struct nvmet_tcp_queue *queue = + container_of(sq, struct nvmet_tcp_queue, nvme_sq); + + if (sq->qid == 0) { + /* Let inflight controller teardown complete */ + flush_scheduled_work(); + } + + queue->nr_cmds = sq->size * 2; + if (nvmet_tcp_alloc_cmds(queue)) + return NVME_SC_INTERNAL; + return 0; +} + +static void nvmet_tcp_disc_port_addr(struct nvmet_req *req, + struct nvmet_port *nport, char *traddr) +{ + struct nvmet_tcp_port *port = nport->priv; + + if (inet_addr_is_any((struct sockaddr *)&port->addr)) { + struct nvmet_tcp_cmd *cmd = + container_of(req, struct nvmet_tcp_cmd, req); + struct nvmet_tcp_queue *queue = cmd->queue; + + sprintf(traddr, "%pISc", (struct sockaddr *)&queue->sockaddr); + } else { + memcpy(traddr, nport->disc_addr.traddr, NVMF_TRADDR_SIZE); + } +} + +static struct nvmet_fabrics_ops nvmet_tcp_ops = { + .owner = THIS_MODULE, + .type = NVMF_TRTYPE_TCP, + .msdbd = 1, + .has_keyed_sgls = 0, + .add_port = nvmet_tcp_add_port, + .remove_port = nvmet_tcp_remove_port, + .queue_response = nvmet_tcp_queue_response, + .delete_ctrl = nvmet_tcp_delete_ctrl, + .install_queue = nvmet_tcp_install_queue, + .disc_traddr = nvmet_tcp_disc_port_addr, +}; + +static int __init nvmet_tcp_init(void) +{ + int ret; + + nvmet_tcp_wq = alloc_workqueue("nvmet_tcp_wq", WQ_HIGHPRI, 0); + if (!nvmet_tcp_wq) + return -ENOMEM; + + ret = nvmet_register_transport(&nvmet_tcp_ops); + if (ret) + goto err; + + return 0; +err: + destroy_workqueue(nvmet_tcp_wq); + return ret; +} + +static void __exit nvmet_tcp_exit(void) +{ + struct nvmet_tcp_queue *queue; + + nvmet_unregister_transport(&nvmet_tcp_ops); + + flush_scheduled_work(); + mutex_lock(&nvmet_tcp_queue_mutex); + list_for_each_entry(queue, &nvmet_tcp_queue_list, queue_list) + kernel_sock_shutdown(queue->sock, SHUT_RDWR); + mutex_unlock(&nvmet_tcp_queue_mutex); + flush_scheduled_work(); + + destroy_workqueue(nvmet_tcp_wq); +} + +module_init(nvmet_tcp_init); +module_exit(nvmet_tcp_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("nvmet-transport-3"); /* 3 == NVMF_TRTYPE_TCP */ diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index 358e380eb7fa..8d2fc84119c6 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -110,8 +110,6 @@ #define CMD_TLB_DIRECT_WRITE 35 /* IO_COMMAND for I/O TLB Writes */ #define CMD_TLB_PURGE 33 /* IO_COMMAND to Purge I/O TLB entry */ -#define CCIO_MAPPING_ERROR (~(dma_addr_t)0) - struct ioa_registers { /* Runway Supervisory Set */ int32_t unused1[12]; @@ -740,7 +738,7 @@ ccio_map_single(struct device *dev, void *addr, size_t size, BUG_ON(!dev); ioc = GET_IOC(dev); if (!ioc) - return CCIO_MAPPING_ERROR; + return DMA_MAPPING_ERROR; BUG_ON(size <= 0); @@ -1021,11 +1019,6 @@ ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, DBG_RUN_SG("%s() DONE (nents %d)\n", __func__, nents); } -static int ccio_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == CCIO_MAPPING_ERROR; -} - static const struct dma_map_ops ccio_ops = { .dma_supported = ccio_dma_supported, .alloc = ccio_alloc, @@ -1034,7 +1027,6 @@ static const struct dma_map_ops ccio_ops = { .unmap_page = ccio_unmap_page, .map_sg = ccio_map_sg, .unmap_sg = ccio_unmap_sg, - .mapping_error = ccio_mapping_error, }; #ifdef CONFIG_PROC_FS diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index e0655949480a..42172eb32235 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -93,8 +93,6 @@ #define DEFAULT_DMA_HINT_REG 0 -#define SBA_MAPPING_ERROR (~(dma_addr_t)0) - struct sba_device *sba_list; EXPORT_SYMBOL_GPL(sba_list); @@ -725,7 +723,7 @@ sba_map_single(struct device *dev, void *addr, size_t size, ioc = GET_IOC(dev); if (!ioc) - return SBA_MAPPING_ERROR; + return DMA_MAPPING_ERROR; /* save offset bits */ offset = ((dma_addr_t) (long) addr) & ~IOVP_MASK; @@ -1080,11 +1078,6 @@ sba_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, } -static int sba_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == SBA_MAPPING_ERROR; -} - static const struct dma_map_ops sba_ops = { .dma_supported = sba_dma_supported, .alloc = sba_alloc, @@ -1093,7 +1086,6 @@ static const struct dma_map_ops sba_ops = { .unmap_page = sba_unmap_page, .map_sg = sba_map_sg, .unmap_sg = sba_unmap_sg, - .mapping_error = sba_mapping_error, }; diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index e50b0b5815ff..3890812cdf87 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -307,39 +307,32 @@ static struct device *to_vmd_dev(struct device *dev) return &vmd->dev->dev; } -static const struct dma_map_ops *vmd_dma_ops(struct device *dev) -{ - return get_dma_ops(to_vmd_dev(dev)); -} - static void *vmd_alloc(struct device *dev, size_t size, dma_addr_t *addr, gfp_t flag, unsigned long attrs) { - return vmd_dma_ops(dev)->alloc(to_vmd_dev(dev), size, addr, flag, - attrs); + return dma_alloc_attrs(to_vmd_dev(dev), size, addr, flag, attrs); } static void vmd_free(struct device *dev, size_t size, void *vaddr, dma_addr_t addr, unsigned long attrs) { - return vmd_dma_ops(dev)->free(to_vmd_dev(dev), size, vaddr, addr, - attrs); + return dma_free_attrs(to_vmd_dev(dev), size, vaddr, addr, attrs); } static int vmd_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t addr, size_t size, unsigned long attrs) { - return vmd_dma_ops(dev)->mmap(to_vmd_dev(dev), vma, cpu_addr, addr, - size, attrs); + return dma_mmap_attrs(to_vmd_dev(dev), vma, cpu_addr, addr, size, + attrs); } static int vmd_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t addr, size_t size, unsigned long attrs) { - return vmd_dma_ops(dev)->get_sgtable(to_vmd_dev(dev), sgt, cpu_addr, - addr, size, attrs); + return dma_get_sgtable_attrs(to_vmd_dev(dev), sgt, cpu_addr, addr, size, + attrs); } static dma_addr_t vmd_map_page(struct device *dev, struct page *page, @@ -347,66 +340,60 @@ static dma_addr_t vmd_map_page(struct device *dev, struct page *page, enum dma_data_direction dir, unsigned long attrs) { - return vmd_dma_ops(dev)->map_page(to_vmd_dev(dev), page, offset, size, - dir, attrs); + return dma_map_page_attrs(to_vmd_dev(dev), page, offset, size, dir, + attrs); } static void vmd_unmap_page(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - vmd_dma_ops(dev)->unmap_page(to_vmd_dev(dev), addr, size, dir, attrs); + dma_unmap_page_attrs(to_vmd_dev(dev), addr, size, dir, attrs); } static int vmd_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) { - return vmd_dma_ops(dev)->map_sg(to_vmd_dev(dev), sg, nents, dir, attrs); + return dma_map_sg_attrs(to_vmd_dev(dev), sg, nents, dir, attrs); } static void vmd_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) { - vmd_dma_ops(dev)->unmap_sg(to_vmd_dev(dev), sg, nents, dir, attrs); + dma_unmap_sg_attrs(to_vmd_dev(dev), sg, nents, dir, attrs); } static void vmd_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { - vmd_dma_ops(dev)->sync_single_for_cpu(to_vmd_dev(dev), addr, size, dir); + dma_sync_single_for_cpu(to_vmd_dev(dev), addr, size, dir); } static void vmd_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { - vmd_dma_ops(dev)->sync_single_for_device(to_vmd_dev(dev), addr, size, - dir); + dma_sync_single_for_device(to_vmd_dev(dev), addr, size, dir); } static void vmd_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir) { - vmd_dma_ops(dev)->sync_sg_for_cpu(to_vmd_dev(dev), sg, nents, dir); + dma_sync_sg_for_cpu(to_vmd_dev(dev), sg, nents, dir); } static void vmd_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir) { - vmd_dma_ops(dev)->sync_sg_for_device(to_vmd_dev(dev), sg, nents, dir); -} - -static int vmd_mapping_error(struct device *dev, dma_addr_t addr) -{ - return vmd_dma_ops(dev)->mapping_error(to_vmd_dev(dev), addr); + dma_sync_sg_for_device(to_vmd_dev(dev), sg, nents, dir); } static int vmd_dma_supported(struct device *dev, u64 mask) { - return vmd_dma_ops(dev)->dma_supported(to_vmd_dev(dev), mask); + return dma_supported(to_vmd_dev(dev), mask); } static u64 vmd_get_required_mask(struct device *dev) { - return vmd_dma_ops(dev)->get_required_mask(to_vmd_dev(dev)); + return dma_get_required_mask(to_vmd_dev(dev)); } static void vmd_teardown_dma_ops(struct vmd_dev *vmd) @@ -446,7 +433,6 @@ static void vmd_setup_dma_ops(struct vmd_dev *vmd) ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_device); ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_cpu); ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_device); - ASSIGN_VMD_DMA_OPS(source, dest, mapping_error); ASSIGN_VMD_DMA_OPS(source, dest, dma_supported); ASSIGN_VMD_DMA_OPS(source, dest, get_required_mask); add_dma_domain(domain); diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index bef17c3fca67..ea55444e6ead 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -1600,10 +1600,8 @@ static int pci_dma_configure(struct device *dev) ret = of_dma_configure(dev, bridge->parent->of_node, true); } else if (has_acpi_companion(bridge)) { struct acpi_device *adev = to_acpi_device_node(bridge->fwnode); - enum dev_dma_attr attr = acpi_get_dma_attr(adev); - if (attr != DEV_DMA_NOT_SUPPORTED) - ret = acpi_dma_configure(dev, attr); + ret = acpi_dma_configure(dev, acpi_get_dma_attr(adev)); } pci_put_host_bridge_device(bridge); diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index 2016e0ed5865..8e26001dc11c 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -412,6 +412,7 @@ static int dasd_ioctl_information(struct dasd_block *block, struct ccw_dev_id dev_id; struct dasd_device *base; struct ccw_device *cdev; + struct list_head *l; unsigned long flags; int rc; @@ -462,23 +463,10 @@ static int dasd_ioctl_information(struct dasd_block *block, memcpy(dasd_info->type, base->discipline->name, 4); - if (block->request_queue->request_fn) { - struct list_head *l; -#ifdef DASD_EXTENDED_PROFILING - { - struct list_head *l; - spin_lock_irqsave(&block->lock, flags); - list_for_each(l, &block->request_queue->queue_head) - dasd_info->req_queue_len++; - spin_unlock_irqrestore(&block->lock, flags); - } -#endif /* DASD_EXTENDED_PROFILING */ - spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags); - list_for_each(l, &base->ccw_queue) - dasd_info->chanq_len++; - spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), - flags); - } + spin_lock_irqsave(&block->queue_lock, flags); + list_for_each(l, &base->ccw_queue) + dasd_info->chanq_len++; + spin_unlock_irqrestore(&block->queue_lock, flags); rc = 0; if (copy_to_user(argp, dasd_info, diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index 94f4d8fe85e0..9cf30d124b9e 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -4,7 +4,7 @@ * * Module interface and handling of zfcp data structures. * - * Copyright IBM Corp. 2002, 2013 + * Copyright IBM Corp. 2002, 2017 */ /* @@ -124,6 +124,9 @@ static int __init zfcp_module_init(void) { int retval = -ENOMEM; + if (zfcp_experimental_dix) + pr_warn("DIX is enabled. It is experimental and might cause problems\n"); + zfcp_fsf_qtcb_cache = zfcp_cache_hw_align("zfcp_fsf_qtcb", sizeof(struct fsf_qtcb)); if (!zfcp_fsf_qtcb_cache) @@ -248,43 +251,36 @@ static int zfcp_allocate_low_mem_buffers(struct zfcp_adapter *adapter) static void zfcp_free_low_mem_buffers(struct zfcp_adapter *adapter) { - if (adapter->pool.erp_req) - mempool_destroy(adapter->pool.erp_req); - if (adapter->pool.scsi_req) - mempool_destroy(adapter->pool.scsi_req); - if (adapter->pool.scsi_abort) - mempool_destroy(adapter->pool.scsi_abort); - if (adapter->pool.qtcb_pool) - mempool_destroy(adapter->pool.qtcb_pool); - if (adapter->pool.status_read_req) - mempool_destroy(adapter->pool.status_read_req); - if (adapter->pool.sr_data) - mempool_destroy(adapter->pool.sr_data); - if (adapter->pool.gid_pn) - mempool_destroy(adapter->pool.gid_pn); + mempool_destroy(adapter->pool.erp_req); + mempool_destroy(adapter->pool.scsi_req); + mempool_destroy(adapter->pool.scsi_abort); + mempool_destroy(adapter->pool.qtcb_pool); + mempool_destroy(adapter->pool.status_read_req); + mempool_destroy(adapter->pool.sr_data); + mempool_destroy(adapter->pool.gid_pn); } /** * zfcp_status_read_refill - refill the long running status_read_requests * @adapter: ptr to struct zfcp_adapter for which the buffers should be refilled * - * Returns: 0 on success, 1 otherwise - * - * if there are 16 or more status_read requests missing an adapter_reopen - * is triggered + * Return: + * * 0 on success meaning at least one status read is pending + * * 1 if posting failed and not a single status read buffer is pending, + * also triggers adapter reopen recovery */ int zfcp_status_read_refill(struct zfcp_adapter *adapter) { - while (atomic_read(&adapter->stat_miss) > 0) + while (atomic_add_unless(&adapter->stat_miss, -1, 0)) if (zfcp_fsf_status_read(adapter->qdio)) { + atomic_inc(&adapter->stat_miss); /* undo add -1 */ if (atomic_read(&adapter->stat_miss) >= adapter->stat_read_buf_num) { zfcp_erp_adapter_reopen(adapter, 0, "axsref1"); return 1; } break; - } else - atomic_dec(&adapter->stat_miss); + } return 0; } @@ -542,45 +538,3 @@ err_out: zfcp_ccw_adapter_put(adapter); return ERR_PTR(retval); } - -/** - * zfcp_sg_free_table - free memory used by scatterlists - * @sg: pointer to scatterlist - * @count: number of scatterlist which are to be free'ed - * the scatterlist are expected to reference pages always - */ -void zfcp_sg_free_table(struct scatterlist *sg, int count) -{ - int i; - - for (i = 0; i < count; i++, sg++) - if (sg) - free_page((unsigned long) sg_virt(sg)); - else - break; -} - -/** - * zfcp_sg_setup_table - init scatterlist and allocate, assign buffers - * @sg: pointer to struct scatterlist - * @count: number of scatterlists which should be assigned with buffers - * of size page - * - * Returns: 0 on success, -ENOMEM otherwise - */ -int zfcp_sg_setup_table(struct scatterlist *sg, int count) -{ - void *addr; - int i; - - sg_init_table(sg, count); - for (i = 0; i < count; i++, sg++) { - addr = (void *) get_zeroed_page(GFP_KERNEL); - if (!addr) { - zfcp_sg_free_table(sg, i); - return -ENOMEM; - } - sg_set_buf(sg, addr, PAGE_SIZE); - } - return 0; -} diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c index 3b368fcf13f4..dccdb41bed8c 100644 --- a/drivers/s390/scsi/zfcp_dbf.c +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -63,7 +63,8 @@ void zfcp_dbf_pl_write(struct zfcp_dbf *dbf, void *data, u16 length, char *area, /** * zfcp_dbf_hba_fsf_res - trace event for fsf responses - * @tag: tag indicating which kind of unsolicited status has been received + * @tag: tag indicating which kind of FSF response has been received + * @level: trace level to be used for event * @req: request for which a response was received */ void zfcp_dbf_hba_fsf_res(char *tag, int level, struct zfcp_fsf_req *req) @@ -81,8 +82,8 @@ void zfcp_dbf_hba_fsf_res(char *tag, int level, struct zfcp_fsf_req *req) rec->id = ZFCP_DBF_HBA_RES; rec->fsf_req_id = req->req_id; rec->fsf_req_status = req->status; - rec->fsf_cmd = req->fsf_command; - rec->fsf_seq_no = req->seq_no; + rec->fsf_cmd = q_head->fsf_command; + rec->fsf_seq_no = q_pref->req_seq_no; rec->u.res.req_issued = req->issued; rec->u.res.prot_status = q_pref->prot_status; rec->u.res.fsf_status = q_head->fsf_status; @@ -94,7 +95,7 @@ void zfcp_dbf_hba_fsf_res(char *tag, int level, struct zfcp_fsf_req *req) memcpy(rec->u.res.fsf_status_qual, &q_head->fsf_status_qual, FSF_STATUS_QUALIFIER_SIZE); - if (req->fsf_command != FSF_QTCB_FCP_CMND) { + if (q_head->fsf_command != FSF_QTCB_FCP_CMND) { rec->pl_len = q_head->log_length; zfcp_dbf_pl_write(dbf, (char *)q_pref + q_head->log_start, rec->pl_len, "fsf_res", req->req_id); @@ -127,7 +128,7 @@ void zfcp_dbf_hba_fsf_uss(char *tag, struct zfcp_fsf_req *req) rec->id = ZFCP_DBF_HBA_USS; rec->fsf_req_id = req->req_id; rec->fsf_req_status = req->status; - rec->fsf_cmd = req->fsf_command; + rec->fsf_cmd = FSF_QTCB_UNSOLICITED_STATUS; if (!srb) goto log; @@ -153,7 +154,7 @@ log: /** * zfcp_dbf_hba_bit_err - trace event for bit error conditions - * @tag: tag indicating which kind of unsolicited status has been received + * @tag: tag indicating which kind of bit error unsolicited status was received * @req: request which caused the bit_error condition */ void zfcp_dbf_hba_bit_err(char *tag, struct zfcp_fsf_req *req) @@ -174,7 +175,7 @@ void zfcp_dbf_hba_bit_err(char *tag, struct zfcp_fsf_req *req) rec->id = ZFCP_DBF_HBA_BIT; rec->fsf_req_id = req->req_id; rec->fsf_req_status = req->status; - rec->fsf_cmd = req->fsf_command; + rec->fsf_cmd = FSF_QTCB_UNSOLICITED_STATUS; memcpy(&rec->u.be, &sr_buf->payload.bit_error, sizeof(struct fsf_bit_error_payload)); @@ -224,6 +225,7 @@ void zfcp_dbf_hba_def_err(struct zfcp_adapter *adapter, u64 req_id, u16 scount, /** * zfcp_dbf_hba_basic - trace event for basic adapter events + * @tag: identifier for event * @adapter: pointer to struct zfcp_adapter */ void zfcp_dbf_hba_basic(char *tag, struct zfcp_adapter *adapter) @@ -357,7 +359,7 @@ void zfcp_dbf_rec_run_lvl(int level, char *tag, struct zfcp_erp_action *erp) rec->u.run.fsf_req_id = erp->fsf_req_id; rec->u.run.rec_status = erp->status; rec->u.run.rec_step = erp->step; - rec->u.run.rec_action = erp->action; + rec->u.run.rec_action = erp->type; if (erp->sdev) rec->u.run.rec_count = @@ -478,7 +480,8 @@ out: /** * zfcp_dbf_san_req - trace event for issued SAN request * @tag: identifier for event - * @fsf_req: request containing issued CT data + * @fsf: request containing issued CT or ELS data + * @d_id: N_Port_ID where SAN request is sent to * d_id: destination ID */ void zfcp_dbf_san_req(char *tag, struct zfcp_fsf_req *fsf, u32 d_id) @@ -560,7 +563,7 @@ static u16 zfcp_dbf_san_res_cap_len_if_gpn_ft(char *tag, /** * zfcp_dbf_san_res - trace event for received SAN request * @tag: identifier for event - * @fsf_req: request containing issued CT data + * @fsf: request containing received CT or ELS data */ void zfcp_dbf_san_res(char *tag, struct zfcp_fsf_req *fsf) { @@ -580,7 +583,7 @@ void zfcp_dbf_san_res(char *tag, struct zfcp_fsf_req *fsf) /** * zfcp_dbf_san_in_els - trace event for incoming ELS * @tag: identifier for event - * @fsf_req: request containing issued CT data + * @fsf: request containing received ELS data */ void zfcp_dbf_san_in_els(char *tag, struct zfcp_fsf_req *fsf) { diff --git a/drivers/s390/scsi/zfcp_dbf.h b/drivers/s390/scsi/zfcp_dbf.h index d116c07ed77a..900c779cc39b 100644 --- a/drivers/s390/scsi/zfcp_dbf.h +++ b/drivers/s390/scsi/zfcp_dbf.h @@ -42,7 +42,8 @@ struct zfcp_dbf_rec_trigger { * @fsf_req_id: request id for fsf requests * @rec_status: status of the fsf request * @rec_step: current step of the recovery action - * rec_count: recovery counter + * @rec_action: ERP action type + * @rec_count: recoveries including retries for particular @rec_action */ struct zfcp_dbf_rec_running { u64 fsf_req_id; @@ -72,6 +73,7 @@ enum zfcp_dbf_rec_id { * @adapter_status: current status of the adapter * @port_status: current status of the port * @lun_status: current status of the lun + * @u: record type specific data * @u.trig: structure zfcp_dbf_rec_trigger * @u.run: structure zfcp_dbf_rec_running */ @@ -126,6 +128,8 @@ struct zfcp_dbf_san { * @prot_status_qual: protocol status qualifier * @fsf_status: fsf status * @fsf_status_qual: fsf status qualifier + * @port_handle: handle for port + * @lun_handle: handle for LUN */ struct zfcp_dbf_hba_res { u64 req_issued; @@ -158,6 +162,7 @@ struct zfcp_dbf_hba_uss { * @ZFCP_DBF_HBA_RES: response trace record * @ZFCP_DBF_HBA_USS: unsolicited status trace record * @ZFCP_DBF_HBA_BIT: bit error trace record + * @ZFCP_DBF_HBA_BASIC: basic adapter event, only trace tag, no other data */ enum zfcp_dbf_hba_id { ZFCP_DBF_HBA_RES = 1, @@ -176,6 +181,9 @@ enum zfcp_dbf_hba_id { * @fsf_seq_no: fsf sequence number * @pl_len: length of payload stored as zfcp_dbf_pay * @u: record type specific data + * @u.res: data for fsf responses + * @u.uss: data for unsolicited status buffer + * @u.be: data for bit error unsolicited status buffer */ struct zfcp_dbf_hba { u8 id; @@ -339,8 +347,8 @@ void zfcp_dbf_hba_fsf_response(struct zfcp_fsf_req *req) zfcp_dbf_hba_fsf_resp_suppress(req) ? 5 : 1, req); - } else if ((req->fsf_command == FSF_QTCB_OPEN_PORT_WITH_DID) || - (req->fsf_command == FSF_QTCB_OPEN_LUN)) { + } else if ((qtcb->header.fsf_command == FSF_QTCB_OPEN_PORT_WITH_DID) || + (qtcb->header.fsf_command == FSF_QTCB_OPEN_LUN)) { zfcp_dbf_hba_fsf_resp("fs_open", 4, req); } else if (qtcb->header.log_length) { diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h index 3396a47721a7..87d2f47a6990 100644 --- a/drivers/s390/scsi/zfcp_def.h +++ b/drivers/s390/scsi/zfcp_def.h @@ -4,7 +4,7 @@ * * Global definitions for the zfcp device driver. * - * Copyright IBM Corp. 2002, 2010 + * Copyright IBM Corp. 2002, 2017 */ #ifndef ZFCP_DEF_H @@ -41,24 +41,16 @@ #include "zfcp_fc.h" #include "zfcp_qdio.h" -struct zfcp_reqlist; - -/********************* SCSI SPECIFIC DEFINES *********************************/ -#define ZFCP_SCSI_ER_TIMEOUT (10*HZ) - /********************* FSF SPECIFIC DEFINES *********************************/ /* ATTENTION: value must not be used by hardware */ #define FSF_QTCB_UNSOLICITED_STATUS 0x6305 -/* timeout value for "default timer" for fsf requests */ -#define ZFCP_FSF_REQUEST_TIMEOUT (60*HZ) - /*************** ADAPTER/PORT/UNIT AND FSF_REQ STATUS FLAGS ******************/ /* - * Note, the leftmost status byte is common among adapter, port - * and unit + * Note, the leftmost 12 status bits (3 nibbles) are common among adapter, port + * and unit. This is a mask for bitwise 'and' with status values. */ #define ZFCP_COMMON_FLAGS 0xfff00000 @@ -97,49 +89,60 @@ struct zfcp_reqlist; /************************* STRUCTURE DEFINITIONS *****************************/ -struct zfcp_fsf_req; +/** + * enum zfcp_erp_act_type - Type of ERP action object. + * @ZFCP_ERP_ACTION_REOPEN_LUN: LUN recovery. + * @ZFCP_ERP_ACTION_REOPEN_PORT: Port recovery. + * @ZFCP_ERP_ACTION_REOPEN_PORT_FORCED: Forced port recovery. + * @ZFCP_ERP_ACTION_REOPEN_ADAPTER: Adapter recovery. + * + * Values must fit into u8 because of code dependencies: + * zfcp_dbf_rec_trig(), &zfcp_dbf_rec_trigger.want, &zfcp_dbf_rec_trigger.need; + * zfcp_dbf_rec_run_lvl(), zfcp_dbf_rec_run(), &zfcp_dbf_rec_running.rec_action. + */ +enum zfcp_erp_act_type { + ZFCP_ERP_ACTION_REOPEN_LUN = 1, + ZFCP_ERP_ACTION_REOPEN_PORT = 2, + ZFCP_ERP_ACTION_REOPEN_PORT_FORCED = 3, + ZFCP_ERP_ACTION_REOPEN_ADAPTER = 4, +}; -/* holds various memory pools of an adapter */ -struct zfcp_adapter_mempool { - mempool_t *erp_req; - mempool_t *gid_pn_req; - mempool_t *scsi_req; - mempool_t *scsi_abort; - mempool_t *status_read_req; - mempool_t *sr_data; - mempool_t *gid_pn; - mempool_t *qtcb_pool; +/* + * Values must fit into u16 because of code dependencies: + * zfcp_dbf_rec_run_lvl(), zfcp_dbf_rec_run(), zfcp_dbf_rec_run_wka(), + * &zfcp_dbf_rec_running.rec_step. + */ +enum zfcp_erp_steps { + ZFCP_ERP_STEP_UNINITIALIZED = 0x0000, + ZFCP_ERP_STEP_PHYS_PORT_CLOSING = 0x0010, + ZFCP_ERP_STEP_PORT_CLOSING = 0x0100, + ZFCP_ERP_STEP_PORT_OPENING = 0x0800, + ZFCP_ERP_STEP_LUN_CLOSING = 0x1000, + ZFCP_ERP_STEP_LUN_OPENING = 0x2000, }; struct zfcp_erp_action { struct list_head list; - int action; /* requested action code */ + enum zfcp_erp_act_type type; /* requested action code */ struct zfcp_adapter *adapter; /* device which should be recovered */ struct zfcp_port *port; struct scsi_device *sdev; u32 status; /* recovery status */ - u32 step; /* active step of this erp action */ + enum zfcp_erp_steps step; /* active step of this erp action */ unsigned long fsf_req_id; struct timer_list timer; }; -struct fsf_latency_record { - u32 min; - u32 max; - u64 sum; -}; - -struct latency_cont { - struct fsf_latency_record channel; - struct fsf_latency_record fabric; - u64 counter; -}; - -struct zfcp_latencies { - struct latency_cont read; - struct latency_cont write; - struct latency_cont cmd; - spinlock_t lock; +/* holds various memory pools of an adapter */ +struct zfcp_adapter_mempool { + mempool_t *erp_req; + mempool_t *gid_pn_req; + mempool_t *scsi_req; + mempool_t *scsi_abort; + mempool_t *status_read_req; + mempool_t *sr_data; + mempool_t *gid_pn; + mempool_t *qtcb_pool; }; struct zfcp_adapter { @@ -220,6 +223,25 @@ struct zfcp_port { unsigned int starget_id; }; +struct zfcp_latency_record { + u32 min; + u32 max; + u64 sum; +}; + +struct zfcp_latency_cont { + struct zfcp_latency_record channel; + struct zfcp_latency_record fabric; + u64 counter; +}; + +struct zfcp_latencies { + struct zfcp_latency_cont read; + struct zfcp_latency_cont write; + struct zfcp_latency_cont cmd; + spinlock_t lock; +}; + /** * struct zfcp_unit - LUN configured via zfcp sysfs * @dev: struct device for sysfs representation and reference counting @@ -287,9 +309,7 @@ static inline u64 zfcp_scsi_dev_lun(struct scsi_device *sdev) * @qdio_req: qdio queue related values * @completion: used to signal the completion of the request * @status: status of the request - * @fsf_command: FSF command issued * @qtcb: associated QTCB - * @seq_no: sequence number of this request * @data: private data * @timer: timer data of this request * @erp_action: reference to erp action if request issued on behalf of ERP @@ -304,9 +324,7 @@ struct zfcp_fsf_req { struct zfcp_qdio_req qdio_req; struct completion completion; u32 status; - u32 fsf_command; struct fsf_qtcb *qtcb; - u32 seq_no; void *data; struct timer_list timer; struct zfcp_erp_action *erp_action; @@ -321,4 +339,9 @@ int zfcp_adapter_multi_buffer_active(struct zfcp_adapter *adapter) return atomic_read(&adapter->status) & ZFCP_STATUS_ADAPTER_MB_ACT; } +static inline bool zfcp_fsf_req_is_status_read_buffer(struct zfcp_fsf_req *req) +{ + return req->qtcb == NULL; +} + #endif /* ZFCP_DEF_H */ diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index e7e6b63905e2..744a64680d5b 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -4,7 +4,7 @@ * * Error Recovery Procedures (ERP). * - * Copyright IBM Corp. 2002, 2016 + * Copyright IBM Corp. 2002, 2017 */ #define KMSG_COMPONENT "zfcp" @@ -24,38 +24,18 @@ enum zfcp_erp_act_flags { ZFCP_STATUS_ERP_NO_REF = 0x00800000, }; -enum zfcp_erp_steps { - ZFCP_ERP_STEP_UNINITIALIZED = 0x0000, - ZFCP_ERP_STEP_PHYS_PORT_CLOSING = 0x0010, - ZFCP_ERP_STEP_PORT_CLOSING = 0x0100, - ZFCP_ERP_STEP_PORT_OPENING = 0x0800, - ZFCP_ERP_STEP_LUN_CLOSING = 0x1000, - ZFCP_ERP_STEP_LUN_OPENING = 0x2000, -}; - -/** - * enum zfcp_erp_act_type - Type of ERP action object. - * @ZFCP_ERP_ACTION_REOPEN_LUN: LUN recovery. - * @ZFCP_ERP_ACTION_REOPEN_PORT: Port recovery. - * @ZFCP_ERP_ACTION_REOPEN_PORT_FORCED: Forced port recovery. - * @ZFCP_ERP_ACTION_REOPEN_ADAPTER: Adapter recovery. - * @ZFCP_ERP_ACTION_NONE: Eyecatcher pseudo flag to bitwise or-combine with - * either of the first four enum values. - * Used to indicate that an ERP action could not be - * set up despite a detected need for some recovery. - * @ZFCP_ERP_ACTION_FAILED: Eyecatcher pseudo flag to bitwise or-combine with - * either of the first four enum values. - * Used to indicate that ERP not needed because - * the object has ZFCP_STATUS_COMMON_ERP_FAILED. +/* + * Eyecatcher pseudo flag to bitwise or-combine with enum zfcp_erp_act_type. + * Used to indicate that an ERP action could not be set up despite a detected + * need for some recovery. */ -enum zfcp_erp_act_type { - ZFCP_ERP_ACTION_REOPEN_LUN = 1, - ZFCP_ERP_ACTION_REOPEN_PORT = 2, - ZFCP_ERP_ACTION_REOPEN_PORT_FORCED = 3, - ZFCP_ERP_ACTION_REOPEN_ADAPTER = 4, - ZFCP_ERP_ACTION_NONE = 0xc0, - ZFCP_ERP_ACTION_FAILED = 0xe0, -}; +#define ZFCP_ERP_ACTION_NONE 0xc0 +/* + * Eyecatcher pseudo flag to bitwise or-combine with enum zfcp_erp_act_type. + * Used to indicate that ERP not needed because the object has + * ZFCP_STATUS_COMMON_ERP_FAILED. + */ +#define ZFCP_ERP_ACTION_FAILED 0xe0 enum zfcp_erp_act_result { ZFCP_ERP_SUCCEEDED = 0, @@ -136,11 +116,11 @@ static void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *adapter) } } -static int zfcp_erp_handle_failed(int want, struct zfcp_adapter *adapter, - struct zfcp_port *port, - struct scsi_device *sdev) +static enum zfcp_erp_act_type zfcp_erp_handle_failed( + enum zfcp_erp_act_type want, struct zfcp_adapter *adapter, + struct zfcp_port *port, struct scsi_device *sdev) { - int need = want; + enum zfcp_erp_act_type need = want; struct zfcp_scsi_dev *zsdev; switch (want) { @@ -171,19 +151,17 @@ static int zfcp_erp_handle_failed(int want, struct zfcp_adapter *adapter, adapter, ZFCP_STATUS_COMMON_ERP_FAILED); } break; - default: - need = 0; - break; } return need; } -static int zfcp_erp_required_act(int want, struct zfcp_adapter *adapter, +static enum zfcp_erp_act_type zfcp_erp_required_act(enum zfcp_erp_act_type want, + struct zfcp_adapter *adapter, struct zfcp_port *port, struct scsi_device *sdev) { - int need = want; + enum zfcp_erp_act_type need = want; int l_status, p_status, a_status; struct zfcp_scsi_dev *zfcp_sdev; @@ -230,7 +208,8 @@ static int zfcp_erp_required_act(int want, struct zfcp_adapter *adapter, return need; } -static struct zfcp_erp_action *zfcp_erp_setup_act(int need, u32 act_status, +static struct zfcp_erp_action *zfcp_erp_setup_act(enum zfcp_erp_act_type need, + u32 act_status, struct zfcp_adapter *adapter, struct zfcp_port *port, struct scsi_device *sdev) @@ -278,9 +257,6 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(int need, u32 act_status, ZFCP_STATUS_COMMON_RUNNING)) act_status |= ZFCP_STATUS_ERP_CLOSE_ONLY; break; - - default: - return NULL; } WARN_ON_ONCE(erp_action->adapter != adapter); @@ -288,18 +264,19 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(int need, u32 act_status, memset(&erp_action->timer, 0, sizeof(erp_action->timer)); erp_action->step = ZFCP_ERP_STEP_UNINITIALIZED; erp_action->fsf_req_id = 0; - erp_action->action = need; + erp_action->type = need; erp_action->status = act_status; return erp_action; } -static void zfcp_erp_action_enqueue(int want, struct zfcp_adapter *adapter, +static void zfcp_erp_action_enqueue(enum zfcp_erp_act_type want, + struct zfcp_adapter *adapter, struct zfcp_port *port, struct scsi_device *sdev, - char *id, u32 act_status) + char *dbftag, u32 act_status) { - int need; + enum zfcp_erp_act_type need; struct zfcp_erp_action *act; need = zfcp_erp_handle_failed(want, adapter, port, sdev); @@ -327,10 +304,11 @@ static void zfcp_erp_action_enqueue(int want, struct zfcp_adapter *adapter, list_add_tail(&act->list, &adapter->erp_ready_head); wake_up(&adapter->erp_ready_wq); out: - zfcp_dbf_rec_trig(id, adapter, port, sdev, want, need); + zfcp_dbf_rec_trig(dbftag, adapter, port, sdev, want, need); } -void zfcp_erp_port_forced_no_port_dbf(char *id, struct zfcp_adapter *adapter, +void zfcp_erp_port_forced_no_port_dbf(char *dbftag, + struct zfcp_adapter *adapter, u64 port_name, u32 port_id) { unsigned long flags; @@ -344,29 +322,30 @@ void zfcp_erp_port_forced_no_port_dbf(char *id, struct zfcp_adapter *adapter, atomic_set(&tmpport.status, -1); /* unknown */ tmpport.wwpn = port_name; tmpport.d_id = port_id; - zfcp_dbf_rec_trig(id, adapter, &tmpport, NULL, + zfcp_dbf_rec_trig(dbftag, adapter, &tmpport, NULL, ZFCP_ERP_ACTION_REOPEN_PORT_FORCED, ZFCP_ERP_ACTION_NONE); write_unlock_irqrestore(&adapter->erp_lock, flags); } static void _zfcp_erp_adapter_reopen(struct zfcp_adapter *adapter, - int clear_mask, char *id) + int clear_mask, char *dbftag) { zfcp_erp_adapter_block(adapter, clear_mask); zfcp_scsi_schedule_rports_block(adapter); zfcp_erp_action_enqueue(ZFCP_ERP_ACTION_REOPEN_ADAPTER, - adapter, NULL, NULL, id, 0); + adapter, NULL, NULL, dbftag, 0); } /** * zfcp_erp_adapter_reopen - Reopen adapter. * @adapter: Adapter to reopen. * @clear: Status flags to clear. - * @id: Id for debug trace event. + * @dbftag: Tag for debug trace event. */ -void zfcp_erp_adapter_reopen(struct zfcp_adapter *adapter, int clear, char *id) +void zfcp_erp_adapter_reopen(struct zfcp_adapter *adapter, int clear, + char *dbftag) { unsigned long flags; @@ -375,7 +354,7 @@ void zfcp_erp_adapter_reopen(struct zfcp_adapter *adapter, int clear, char *id) write_lock_irqsave(&adapter->erp_lock, flags); zfcp_erp_action_enqueue(ZFCP_ERP_ACTION_REOPEN_ADAPTER, adapter, - NULL, NULL, id, 0); + NULL, NULL, dbftag, 0); write_unlock_irqrestore(&adapter->erp_lock, flags); } @@ -383,25 +362,25 @@ void zfcp_erp_adapter_reopen(struct zfcp_adapter *adapter, int clear, char *id) * zfcp_erp_adapter_shutdown - Shutdown adapter. * @adapter: Adapter to shut down. * @clear: Status flags to clear. - * @id: Id for debug trace event. + * @dbftag: Tag for debug trace event. */ void zfcp_erp_adapter_shutdown(struct zfcp_adapter *adapter, int clear, - char *id) + char *dbftag) { int flags = ZFCP_STATUS_COMMON_RUNNING | ZFCP_STATUS_COMMON_ERP_FAILED; - zfcp_erp_adapter_reopen(adapter, clear | flags, id); + zfcp_erp_adapter_reopen(adapter, clear | flags, dbftag); } /** * zfcp_erp_port_shutdown - Shutdown port * @port: Port to shut down. * @clear: Status flags to clear. - * @id: Id for debug trace event. + * @dbftag: Tag for debug trace event. */ -void zfcp_erp_port_shutdown(struct zfcp_port *port, int clear, char *id) +void zfcp_erp_port_shutdown(struct zfcp_port *port, int clear, char *dbftag) { int flags = ZFCP_STATUS_COMMON_RUNNING | ZFCP_STATUS_COMMON_ERP_FAILED; - zfcp_erp_port_reopen(port, clear | flags, id); + zfcp_erp_port_reopen(port, clear | flags, dbftag); } static void zfcp_erp_port_block(struct zfcp_port *port, int clear) @@ -411,53 +390,55 @@ static void zfcp_erp_port_block(struct zfcp_port *port, int clear) } static void _zfcp_erp_port_forced_reopen(struct zfcp_port *port, int clear, - char *id) + char *dbftag) { zfcp_erp_port_block(port, clear); zfcp_scsi_schedule_rport_block(port); zfcp_erp_action_enqueue(ZFCP_ERP_ACTION_REOPEN_PORT_FORCED, - port->adapter, port, NULL, id, 0); + port->adapter, port, NULL, dbftag, 0); } /** * zfcp_erp_port_forced_reopen - Forced close of port and open again * @port: Port to force close and to reopen. * @clear: Status flags to clear. - * @id: Id for debug trace event. + * @dbftag: Tag for debug trace event. */ -void zfcp_erp_port_forced_reopen(struct zfcp_port *port, int clear, char *id) +void zfcp_erp_port_forced_reopen(struct zfcp_port *port, int clear, + char *dbftag) { unsigned long flags; struct zfcp_adapter *adapter = port->adapter; write_lock_irqsave(&adapter->erp_lock, flags); - _zfcp_erp_port_forced_reopen(port, clear, id); + _zfcp_erp_port_forced_reopen(port, clear, dbftag); write_unlock_irqrestore(&adapter->erp_lock, flags); } -static void _zfcp_erp_port_reopen(struct zfcp_port *port, int clear, char *id) +static void _zfcp_erp_port_reopen(struct zfcp_port *port, int clear, + char *dbftag) { zfcp_erp_port_block(port, clear); zfcp_scsi_schedule_rport_block(port); zfcp_erp_action_enqueue(ZFCP_ERP_ACTION_REOPEN_PORT, - port->adapter, port, NULL, id, 0); + port->adapter, port, NULL, dbftag, 0); } /** * zfcp_erp_port_reopen - trigger remote port recovery * @port: port to recover - * @clear_mask: flags in port status to be cleared - * @id: Id for debug trace event. + * @clear: flags in port status to be cleared + * @dbftag: Tag for debug trace event. */ -void zfcp_erp_port_reopen(struct zfcp_port *port, int clear, char *id) +void zfcp_erp_port_reopen(struct zfcp_port *port, int clear, char *dbftag) { unsigned long flags; struct zfcp_adapter *adapter = port->adapter; write_lock_irqsave(&adapter->erp_lock, flags); - _zfcp_erp_port_reopen(port, clear, id); + _zfcp_erp_port_reopen(port, clear, dbftag); write_unlock_irqrestore(&adapter->erp_lock, flags); } @@ -467,8 +448,8 @@ static void zfcp_erp_lun_block(struct scsi_device *sdev, int clear_mask) ZFCP_STATUS_COMMON_UNBLOCKED | clear_mask); } -static void _zfcp_erp_lun_reopen(struct scsi_device *sdev, int clear, char *id, - u32 act_status) +static void _zfcp_erp_lun_reopen(struct scsi_device *sdev, int clear, + char *dbftag, u32 act_status) { struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev); struct zfcp_adapter *adapter = zfcp_sdev->port->adapter; @@ -476,18 +457,18 @@ static void _zfcp_erp_lun_reopen(struct scsi_device *sdev, int clear, char *id, zfcp_erp_lun_block(sdev, clear); zfcp_erp_action_enqueue(ZFCP_ERP_ACTION_REOPEN_LUN, adapter, - zfcp_sdev->port, sdev, id, act_status); + zfcp_sdev->port, sdev, dbftag, act_status); } /** * zfcp_erp_lun_reopen - initiate reopen of a LUN * @sdev: SCSI device / LUN to be reopened - * @clear_mask: specifies flags in LUN status to be cleared - * @id: Id for debug trace event. + * @clear: specifies flags in LUN status to be cleared + * @dbftag: Tag for debug trace event. * * Return: 0 on success, < 0 on error */ -void zfcp_erp_lun_reopen(struct scsi_device *sdev, int clear, char *id) +void zfcp_erp_lun_reopen(struct scsi_device *sdev, int clear, char *dbftag) { unsigned long flags; struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev); @@ -495,7 +476,7 @@ void zfcp_erp_lun_reopen(struct scsi_device *sdev, int clear, char *id) struct zfcp_adapter *adapter = port->adapter; write_lock_irqsave(&adapter->erp_lock, flags); - _zfcp_erp_lun_reopen(sdev, clear, id, 0); + _zfcp_erp_lun_reopen(sdev, clear, dbftag, 0); write_unlock_irqrestore(&adapter->erp_lock, flags); } @@ -503,25 +484,25 @@ void zfcp_erp_lun_reopen(struct scsi_device *sdev, int clear, char *id) * zfcp_erp_lun_shutdown - Shutdown LUN * @sdev: SCSI device / LUN to shut down. * @clear: Status flags to clear. - * @id: Id for debug trace event. + * @dbftag: Tag for debug trace event. */ -void zfcp_erp_lun_shutdown(struct scsi_device *sdev, int clear, char *id) +void zfcp_erp_lun_shutdown(struct scsi_device *sdev, int clear, char *dbftag) { int flags = ZFCP_STATUS_COMMON_RUNNING | ZFCP_STATUS_COMMON_ERP_FAILED; - zfcp_erp_lun_reopen(sdev, clear | flags, id); + zfcp_erp_lun_reopen(sdev, clear | flags, dbftag); } /** * zfcp_erp_lun_shutdown_wait - Shutdown LUN and wait for erp completion * @sdev: SCSI device / LUN to shut down. - * @id: Id for debug trace event. + * @dbftag: Tag for debug trace event. * * Do not acquire a reference for the LUN when creating the ERP * action. It is safe, because this function waits for the ERP to * complete first. This allows to shutdown the LUN, even when the SCSI * device is in the state SDEV_DEL when scsi_device_get will fail. */ -void zfcp_erp_lun_shutdown_wait(struct scsi_device *sdev, char *id) +void zfcp_erp_lun_shutdown_wait(struct scsi_device *sdev, char *dbftag) { unsigned long flags; struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev); @@ -530,7 +511,7 @@ void zfcp_erp_lun_shutdown_wait(struct scsi_device *sdev, char *id) int clear = ZFCP_STATUS_COMMON_RUNNING | ZFCP_STATUS_COMMON_ERP_FAILED; write_lock_irqsave(&adapter->erp_lock, flags); - _zfcp_erp_lun_reopen(sdev, clear, id, ZFCP_STATUS_ERP_NO_REF); + _zfcp_erp_lun_reopen(sdev, clear, dbftag, ZFCP_STATUS_ERP_NO_REF); write_unlock_irqrestore(&adapter->erp_lock, flags); zfcp_erp_wait(adapter); @@ -619,7 +600,7 @@ void zfcp_erp_notify(struct zfcp_erp_action *erp_action, unsigned long set_mask) /** * zfcp_erp_timeout_handler - Trigger ERP action from timed out ERP request - * @data: ERP action (from timer data) + * @t: timer list entry embedded in zfcp FSF request */ void zfcp_erp_timeout_handler(struct timer_list *t) { @@ -644,31 +625,31 @@ static void zfcp_erp_strategy_memwait(struct zfcp_erp_action *erp_action) } static void _zfcp_erp_port_reopen_all(struct zfcp_adapter *adapter, - int clear, char *id) + int clear, char *dbftag) { struct zfcp_port *port; read_lock(&adapter->port_list_lock); list_for_each_entry(port, &adapter->port_list, list) - _zfcp_erp_port_reopen(port, clear, id); + _zfcp_erp_port_reopen(port, clear, dbftag); read_unlock(&adapter->port_list_lock); } static void _zfcp_erp_lun_reopen_all(struct zfcp_port *port, int clear, - char *id) + char *dbftag) { struct scsi_device *sdev; spin_lock(port->adapter->scsi_host->host_lock); __shost_for_each_device(sdev, port->adapter->scsi_host) if (sdev_to_zfcp(sdev)->port == port) - _zfcp_erp_lun_reopen(sdev, clear, id, 0); + _zfcp_erp_lun_reopen(sdev, clear, dbftag, 0); spin_unlock(port->adapter->scsi_host->host_lock); } static void zfcp_erp_strategy_followup_failed(struct zfcp_erp_action *act) { - switch (act->action) { + switch (act->type) { case ZFCP_ERP_ACTION_REOPEN_ADAPTER: _zfcp_erp_adapter_reopen(act->adapter, 0, "ersff_1"); break; @@ -686,7 +667,7 @@ static void zfcp_erp_strategy_followup_failed(struct zfcp_erp_action *act) static void zfcp_erp_strategy_followup_success(struct zfcp_erp_action *act) { - switch (act->action) { + switch (act->type) { case ZFCP_ERP_ACTION_REOPEN_ADAPTER: _zfcp_erp_port_reopen_all(act->adapter, 0, "ersfs_1"); break; @@ -696,6 +677,9 @@ static void zfcp_erp_strategy_followup_success(struct zfcp_erp_action *act) case ZFCP_ERP_ACTION_REOPEN_PORT: _zfcp_erp_lun_reopen_all(act->port, 0, "ersfs_3"); break; + case ZFCP_ERP_ACTION_REOPEN_LUN: + /* NOP */ + break; } } @@ -723,7 +707,8 @@ static void zfcp_erp_enqueue_ptp_port(struct zfcp_adapter *adapter) _zfcp_erp_port_reopen(port, 0, "ereptp1"); } -static int zfcp_erp_adapter_strat_fsf_xconf(struct zfcp_erp_action *erp_action) +static enum zfcp_erp_act_result zfcp_erp_adapter_strat_fsf_xconf( + struct zfcp_erp_action *erp_action) { int retries; int sleep = 1; @@ -768,7 +753,8 @@ static int zfcp_erp_adapter_strat_fsf_xconf(struct zfcp_erp_action *erp_action) return ZFCP_ERP_SUCCEEDED; } -static int zfcp_erp_adapter_strategy_open_fsf_xport(struct zfcp_erp_action *act) +static enum zfcp_erp_act_result zfcp_erp_adapter_strategy_open_fsf_xport( + struct zfcp_erp_action *act) { int ret; struct zfcp_adapter *adapter = act->adapter; @@ -793,7 +779,8 @@ static int zfcp_erp_adapter_strategy_open_fsf_xport(struct zfcp_erp_action *act) return ZFCP_ERP_SUCCEEDED; } -static int zfcp_erp_adapter_strategy_open_fsf(struct zfcp_erp_action *act) +static enum zfcp_erp_act_result zfcp_erp_adapter_strategy_open_fsf( + struct zfcp_erp_action *act) { if (zfcp_erp_adapter_strat_fsf_xconf(act) == ZFCP_ERP_FAILED) return ZFCP_ERP_FAILED; @@ -832,7 +819,8 @@ static void zfcp_erp_adapter_strategy_close(struct zfcp_erp_action *act) ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED, &adapter->status); } -static int zfcp_erp_adapter_strategy_open(struct zfcp_erp_action *act) +static enum zfcp_erp_act_result zfcp_erp_adapter_strategy_open( + struct zfcp_erp_action *act) { struct zfcp_adapter *adapter = act->adapter; @@ -853,7 +841,8 @@ static int zfcp_erp_adapter_strategy_open(struct zfcp_erp_action *act) return ZFCP_ERP_SUCCEEDED; } -static int zfcp_erp_adapter_strategy(struct zfcp_erp_action *act) +static enum zfcp_erp_act_result zfcp_erp_adapter_strategy( + struct zfcp_erp_action *act) { struct zfcp_adapter *adapter = act->adapter; @@ -871,7 +860,8 @@ static int zfcp_erp_adapter_strategy(struct zfcp_erp_action *act) return ZFCP_ERP_SUCCEEDED; } -static int zfcp_erp_port_forced_strategy_close(struct zfcp_erp_action *act) +static enum zfcp_erp_act_result zfcp_erp_port_forced_strategy_close( + struct zfcp_erp_action *act) { int retval; @@ -885,7 +875,8 @@ static int zfcp_erp_port_forced_strategy_close(struct zfcp_erp_action *act) return ZFCP_ERP_CONTINUES; } -static int zfcp_erp_port_forced_strategy(struct zfcp_erp_action *erp_action) +static enum zfcp_erp_act_result zfcp_erp_port_forced_strategy( + struct zfcp_erp_action *erp_action) { struct zfcp_port *port = erp_action->port; int status = atomic_read(&port->status); @@ -901,11 +892,19 @@ static int zfcp_erp_port_forced_strategy(struct zfcp_erp_action *erp_action) case ZFCP_ERP_STEP_PHYS_PORT_CLOSING: if (!(status & ZFCP_STATUS_PORT_PHYS_OPEN)) return ZFCP_ERP_SUCCEEDED; + break; + case ZFCP_ERP_STEP_PORT_CLOSING: + case ZFCP_ERP_STEP_PORT_OPENING: + case ZFCP_ERP_STEP_LUN_CLOSING: + case ZFCP_ERP_STEP_LUN_OPENING: + /* NOP */ + break; } return ZFCP_ERP_FAILED; } -static int zfcp_erp_port_strategy_close(struct zfcp_erp_action *erp_action) +static enum zfcp_erp_act_result zfcp_erp_port_strategy_close( + struct zfcp_erp_action *erp_action) { int retval; @@ -918,7 +917,8 @@ static int zfcp_erp_port_strategy_close(struct zfcp_erp_action *erp_action) return ZFCP_ERP_CONTINUES; } -static int zfcp_erp_port_strategy_open_port(struct zfcp_erp_action *erp_action) +static enum zfcp_erp_act_result zfcp_erp_port_strategy_open_port( + struct zfcp_erp_action *erp_action) { int retval; @@ -944,7 +944,8 @@ static int zfcp_erp_open_ptp_port(struct zfcp_erp_action *act) return zfcp_erp_port_strategy_open_port(act); } -static int zfcp_erp_port_strategy_open_common(struct zfcp_erp_action *act) +static enum zfcp_erp_act_result zfcp_erp_port_strategy_open_common( + struct zfcp_erp_action *act) { struct zfcp_adapter *adapter = act->adapter; struct zfcp_port *port = act->port; @@ -975,12 +976,18 @@ static int zfcp_erp_port_strategy_open_common(struct zfcp_erp_action *act) port->d_id = 0; return ZFCP_ERP_FAILED; } - /* fall through otherwise */ + /* no early return otherwise, continue after switch case */ + break; + case ZFCP_ERP_STEP_LUN_CLOSING: + case ZFCP_ERP_STEP_LUN_OPENING: + /* NOP */ + break; } return ZFCP_ERP_FAILED; } -static int zfcp_erp_port_strategy(struct zfcp_erp_action *erp_action) +static enum zfcp_erp_act_result zfcp_erp_port_strategy( + struct zfcp_erp_action *erp_action) { struct zfcp_port *port = erp_action->port; int p_status = atomic_read(&port->status); @@ -999,6 +1006,12 @@ static int zfcp_erp_port_strategy(struct zfcp_erp_action *erp_action) if (p_status & ZFCP_STATUS_COMMON_OPEN) return ZFCP_ERP_FAILED; break; + case ZFCP_ERP_STEP_PHYS_PORT_CLOSING: + case ZFCP_ERP_STEP_PORT_OPENING: + case ZFCP_ERP_STEP_LUN_CLOSING: + case ZFCP_ERP_STEP_LUN_OPENING: + /* NOP */ + break; } close_init_done: @@ -1016,7 +1029,8 @@ static void zfcp_erp_lun_strategy_clearstati(struct scsi_device *sdev) &zfcp_sdev->status); } -static int zfcp_erp_lun_strategy_close(struct zfcp_erp_action *erp_action) +static enum zfcp_erp_act_result zfcp_erp_lun_strategy_close( + struct zfcp_erp_action *erp_action) { int retval = zfcp_fsf_close_lun(erp_action); if (retval == -ENOMEM) @@ -1027,7 +1041,8 @@ static int zfcp_erp_lun_strategy_close(struct zfcp_erp_action *erp_action) return ZFCP_ERP_CONTINUES; } -static int zfcp_erp_lun_strategy_open(struct zfcp_erp_action *erp_action) +static enum zfcp_erp_act_result zfcp_erp_lun_strategy_open( + struct zfcp_erp_action *erp_action) { int retval = zfcp_fsf_open_lun(erp_action); if (retval == -ENOMEM) @@ -1038,7 +1053,8 @@ static int zfcp_erp_lun_strategy_open(struct zfcp_erp_action *erp_action) return ZFCP_ERP_CONTINUES; } -static int zfcp_erp_lun_strategy(struct zfcp_erp_action *erp_action) +static enum zfcp_erp_act_result zfcp_erp_lun_strategy( + struct zfcp_erp_action *erp_action) { struct scsi_device *sdev = erp_action->sdev; struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev); @@ -1048,7 +1064,8 @@ static int zfcp_erp_lun_strategy(struct zfcp_erp_action *erp_action) zfcp_erp_lun_strategy_clearstati(sdev); if (atomic_read(&zfcp_sdev->status) & ZFCP_STATUS_COMMON_OPEN) return zfcp_erp_lun_strategy_close(erp_action); - /* already closed, fall through */ + /* already closed */ + /* fall through */ case ZFCP_ERP_STEP_LUN_CLOSING: if (atomic_read(&zfcp_sdev->status) & ZFCP_STATUS_COMMON_OPEN) return ZFCP_ERP_FAILED; @@ -1059,11 +1076,18 @@ static int zfcp_erp_lun_strategy(struct zfcp_erp_action *erp_action) case ZFCP_ERP_STEP_LUN_OPENING: if (atomic_read(&zfcp_sdev->status) & ZFCP_STATUS_COMMON_OPEN) return ZFCP_ERP_SUCCEEDED; + break; + case ZFCP_ERP_STEP_PHYS_PORT_CLOSING: + case ZFCP_ERP_STEP_PORT_CLOSING: + case ZFCP_ERP_STEP_PORT_OPENING: + /* NOP */ + break; } return ZFCP_ERP_FAILED; } -static int zfcp_erp_strategy_check_lun(struct scsi_device *sdev, int result) +static enum zfcp_erp_act_result zfcp_erp_strategy_check_lun( + struct scsi_device *sdev, enum zfcp_erp_act_result result) { struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev); @@ -1084,6 +1108,12 @@ static int zfcp_erp_strategy_check_lun(struct scsi_device *sdev, int result) ZFCP_STATUS_COMMON_ERP_FAILED); } break; + case ZFCP_ERP_CONTINUES: + case ZFCP_ERP_EXIT: + case ZFCP_ERP_DISMISSED: + case ZFCP_ERP_NOMEM: + /* NOP */ + break; } if (atomic_read(&zfcp_sdev->status) & ZFCP_STATUS_COMMON_ERP_FAILED) { @@ -1093,7 +1123,8 @@ static int zfcp_erp_strategy_check_lun(struct scsi_device *sdev, int result) return result; } -static int zfcp_erp_strategy_check_port(struct zfcp_port *port, int result) +static enum zfcp_erp_act_result zfcp_erp_strategy_check_port( + struct zfcp_port *port, enum zfcp_erp_act_result result) { switch (result) { case ZFCP_ERP_SUCCEEDED : @@ -1115,6 +1146,12 @@ static int zfcp_erp_strategy_check_port(struct zfcp_port *port, int result) ZFCP_STATUS_COMMON_ERP_FAILED); } break; + case ZFCP_ERP_CONTINUES: + case ZFCP_ERP_EXIT: + case ZFCP_ERP_DISMISSED: + case ZFCP_ERP_NOMEM: + /* NOP */ + break; } if (atomic_read(&port->status) & ZFCP_STATUS_COMMON_ERP_FAILED) { @@ -1124,8 +1161,8 @@ static int zfcp_erp_strategy_check_port(struct zfcp_port *port, int result) return result; } -static int zfcp_erp_strategy_check_adapter(struct zfcp_adapter *adapter, - int result) +static enum zfcp_erp_act_result zfcp_erp_strategy_check_adapter( + struct zfcp_adapter *adapter, enum zfcp_erp_act_result result) { switch (result) { case ZFCP_ERP_SUCCEEDED : @@ -1143,6 +1180,12 @@ static int zfcp_erp_strategy_check_adapter(struct zfcp_adapter *adapter, ZFCP_STATUS_COMMON_ERP_FAILED); } break; + case ZFCP_ERP_CONTINUES: + case ZFCP_ERP_EXIT: + case ZFCP_ERP_DISMISSED: + case ZFCP_ERP_NOMEM: + /* NOP */ + break; } if (atomic_read(&adapter->status) & ZFCP_STATUS_COMMON_ERP_FAILED) { @@ -1152,14 +1195,14 @@ static int zfcp_erp_strategy_check_adapter(struct zfcp_adapter *adapter, return result; } -static int zfcp_erp_strategy_check_target(struct zfcp_erp_action *erp_action, - int result) +static enum zfcp_erp_act_result zfcp_erp_strategy_check_target( + struct zfcp_erp_action *erp_action, enum zfcp_erp_act_result result) { struct zfcp_adapter *adapter = erp_action->adapter; struct zfcp_port *port = erp_action->port; struct scsi_device *sdev = erp_action->sdev; - switch (erp_action->action) { + switch (erp_action->type) { case ZFCP_ERP_ACTION_REOPEN_LUN: result = zfcp_erp_strategy_check_lun(sdev, result); @@ -1192,16 +1235,17 @@ static int zfcp_erp_strat_change_det(atomic_t *target_status, u32 erp_status) return 0; } -static int zfcp_erp_strategy_statechange(struct zfcp_erp_action *act, int ret) +static enum zfcp_erp_act_result zfcp_erp_strategy_statechange( + struct zfcp_erp_action *act, enum zfcp_erp_act_result result) { - int action = act->action; + enum zfcp_erp_act_type type = act->type; struct zfcp_adapter *adapter = act->adapter; struct zfcp_port *port = act->port; struct scsi_device *sdev = act->sdev; struct zfcp_scsi_dev *zfcp_sdev; u32 erp_status = act->status; - switch (action) { + switch (type) { case ZFCP_ERP_ACTION_REOPEN_ADAPTER: if (zfcp_erp_strat_change_det(&adapter->status, erp_status)) { _zfcp_erp_adapter_reopen(adapter, @@ -1231,7 +1275,7 @@ static int zfcp_erp_strategy_statechange(struct zfcp_erp_action *act, int ret) } break; } - return ret; + return result; } static void zfcp_erp_action_dequeue(struct zfcp_erp_action *erp_action) @@ -1248,7 +1292,7 @@ static void zfcp_erp_action_dequeue(struct zfcp_erp_action *erp_action) list_del(&erp_action->list); zfcp_dbf_rec_run("eractd1", erp_action); - switch (erp_action->action) { + switch (erp_action->type) { case ZFCP_ERP_ACTION_REOPEN_LUN: zfcp_sdev = sdev_to_zfcp(erp_action->sdev); atomic_andnot(ZFCP_STATUS_COMMON_ERP_INUSE, @@ -1324,13 +1368,14 @@ static void zfcp_erp_try_rport_unblock(struct zfcp_port *port) write_unlock_irqrestore(&adapter->erp_lock, flags); } -static void zfcp_erp_action_cleanup(struct zfcp_erp_action *act, int result) +static void zfcp_erp_action_cleanup(struct zfcp_erp_action *act, + enum zfcp_erp_act_result result) { struct zfcp_adapter *adapter = act->adapter; struct zfcp_port *port = act->port; struct scsi_device *sdev = act->sdev; - switch (act->action) { + switch (act->type) { case ZFCP_ERP_ACTION_REOPEN_LUN: if (!(act->status & ZFCP_STATUS_ERP_NO_REF)) scsi_device_put(sdev); @@ -1364,9 +1409,10 @@ static void zfcp_erp_action_cleanup(struct zfcp_erp_action *act, int result) } } -static int zfcp_erp_strategy_do_action(struct zfcp_erp_action *erp_action) +static enum zfcp_erp_act_result zfcp_erp_strategy_do_action( + struct zfcp_erp_action *erp_action) { - switch (erp_action->action) { + switch (erp_action->type) { case ZFCP_ERP_ACTION_REOPEN_ADAPTER: return zfcp_erp_adapter_strategy(erp_action); case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED: @@ -1379,9 +1425,10 @@ static int zfcp_erp_strategy_do_action(struct zfcp_erp_action *erp_action) return ZFCP_ERP_FAILED; } -static int zfcp_erp_strategy(struct zfcp_erp_action *erp_action) +static enum zfcp_erp_act_result zfcp_erp_strategy( + struct zfcp_erp_action *erp_action) { - int retval; + enum zfcp_erp_act_result result; unsigned long flags; struct zfcp_adapter *adapter = erp_action->adapter; @@ -1392,12 +1439,12 @@ static int zfcp_erp_strategy(struct zfcp_erp_action *erp_action) if (erp_action->status & ZFCP_STATUS_ERP_DISMISSED) { zfcp_erp_action_dequeue(erp_action); - retval = ZFCP_ERP_DISMISSED; + result = ZFCP_ERP_DISMISSED; goto unlock; } if (erp_action->status & ZFCP_STATUS_ERP_TIMEDOUT) { - retval = ZFCP_ERP_FAILED; + result = ZFCP_ERP_FAILED; goto check_target; } @@ -1405,13 +1452,13 @@ static int zfcp_erp_strategy(struct zfcp_erp_action *erp_action) /* no lock to allow for blocking operations */ write_unlock_irqrestore(&adapter->erp_lock, flags); - retval = zfcp_erp_strategy_do_action(erp_action); + result = zfcp_erp_strategy_do_action(erp_action); write_lock_irqsave(&adapter->erp_lock, flags); if (erp_action->status & ZFCP_STATUS_ERP_DISMISSED) - retval = ZFCP_ERP_CONTINUES; + result = ZFCP_ERP_CONTINUES; - switch (retval) { + switch (result) { case ZFCP_ERP_NOMEM: if (!(erp_action->status & ZFCP_STATUS_ERP_LOWMEM)) { ++adapter->erp_low_mem_count; @@ -1421,7 +1468,7 @@ static int zfcp_erp_strategy(struct zfcp_erp_action *erp_action) _zfcp_erp_adapter_reopen(adapter, 0, "erstgy1"); else { zfcp_erp_strategy_memwait(erp_action); - retval = ZFCP_ERP_CONTINUES; + result = ZFCP_ERP_CONTINUES; } goto unlock; @@ -1431,27 +1478,33 @@ static int zfcp_erp_strategy(struct zfcp_erp_action *erp_action) erp_action->status &= ~ZFCP_STATUS_ERP_LOWMEM; } goto unlock; + case ZFCP_ERP_SUCCEEDED: + case ZFCP_ERP_FAILED: + case ZFCP_ERP_EXIT: + case ZFCP_ERP_DISMISSED: + /* NOP */ + break; } check_target: - retval = zfcp_erp_strategy_check_target(erp_action, retval); + result = zfcp_erp_strategy_check_target(erp_action, result); zfcp_erp_action_dequeue(erp_action); - retval = zfcp_erp_strategy_statechange(erp_action, retval); - if (retval == ZFCP_ERP_EXIT) + result = zfcp_erp_strategy_statechange(erp_action, result); + if (result == ZFCP_ERP_EXIT) goto unlock; - if (retval == ZFCP_ERP_SUCCEEDED) + if (result == ZFCP_ERP_SUCCEEDED) zfcp_erp_strategy_followup_success(erp_action); - if (retval == ZFCP_ERP_FAILED) + if (result == ZFCP_ERP_FAILED) zfcp_erp_strategy_followup_failed(erp_action); unlock: write_unlock_irqrestore(&adapter->erp_lock, flags); - if (retval != ZFCP_ERP_CONTINUES) - zfcp_erp_action_cleanup(erp_action, retval); + if (result != ZFCP_ERP_CONTINUES) + zfcp_erp_action_cleanup(erp_action, result); kref_put(&adapter->ref, zfcp_adapter_release); - return retval; + return result; } static int zfcp_erp_thread(void *data) @@ -1489,7 +1542,7 @@ static int zfcp_erp_thread(void *data) * zfcp_erp_thread_setup - Start ERP thread for adapter * @adapter: Adapter to start the ERP thread for * - * Returns 0 on success or error code from kernel_thread() + * Return: 0 on success, or error code from kthread_run(). */ int zfcp_erp_thread_setup(struct zfcp_adapter *adapter) { @@ -1694,11 +1747,11 @@ void zfcp_erp_clear_lun_status(struct scsi_device *sdev, u32 mask) /** * zfcp_erp_adapter_reset_sync() - Really reopen adapter and wait. * @adapter: Pointer to zfcp_adapter to reopen. - * @id: Trace tag string of length %ZFCP_DBF_TAG_LEN. + * @dbftag: Trace tag string of length %ZFCP_DBF_TAG_LEN. */ -void zfcp_erp_adapter_reset_sync(struct zfcp_adapter *adapter, char *id) +void zfcp_erp_adapter_reset_sync(struct zfcp_adapter *adapter, char *dbftag) { zfcp_erp_set_adapter_status(adapter, ZFCP_STATUS_COMMON_RUNNING); - zfcp_erp_adapter_reopen(adapter, ZFCP_STATUS_COMMON_ERP_FAILED, id); + zfcp_erp_adapter_reopen(adapter, ZFCP_STATUS_COMMON_ERP_FAILED, dbftag); zfcp_erp_wait(adapter); } diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index bd0c5a9f04cb..3fce47b0b21b 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -59,14 +59,15 @@ extern void zfcp_dbf_scsi_eh(char *tag, struct zfcp_adapter *adapter, /* zfcp_erp.c */ extern void zfcp_erp_set_adapter_status(struct zfcp_adapter *, u32); extern void zfcp_erp_clear_adapter_status(struct zfcp_adapter *, u32); -extern void zfcp_erp_port_forced_no_port_dbf(char *id, +extern void zfcp_erp_port_forced_no_port_dbf(char *dbftag, struct zfcp_adapter *adapter, u64 port_name, u32 port_id); extern void zfcp_erp_adapter_reopen(struct zfcp_adapter *, int, char *); extern void zfcp_erp_adapter_shutdown(struct zfcp_adapter *, int, char *); extern void zfcp_erp_set_port_status(struct zfcp_port *, u32); extern void zfcp_erp_clear_port_status(struct zfcp_port *, u32); -extern void zfcp_erp_port_reopen(struct zfcp_port *port, int clear, char *id); +extern void zfcp_erp_port_reopen(struct zfcp_port *port, int clear, + char *dbftag); extern void zfcp_erp_port_shutdown(struct zfcp_port *, int, char *); extern void zfcp_erp_port_forced_reopen(struct zfcp_port *, int, char *); extern void zfcp_erp_set_lun_status(struct scsi_device *, u32); @@ -79,7 +80,8 @@ extern void zfcp_erp_thread_kill(struct zfcp_adapter *); extern void zfcp_erp_wait(struct zfcp_adapter *); extern void zfcp_erp_notify(struct zfcp_erp_action *, unsigned long); extern void zfcp_erp_timeout_handler(struct timer_list *t); -extern void zfcp_erp_adapter_reset_sync(struct zfcp_adapter *adapter, char *id); +extern void zfcp_erp_adapter_reset_sync(struct zfcp_adapter *adapter, + char *dbftag); /* zfcp_fc.c */ extern struct kmem_cache *zfcp_fc_req_cache; @@ -144,6 +146,7 @@ extern void zfcp_qdio_close(struct zfcp_qdio *); extern void zfcp_qdio_siosl(struct zfcp_adapter *); /* zfcp_scsi.c */ +extern bool zfcp_experimental_dix; extern struct scsi_transport_template *zfcp_scsi_transport_template; extern int zfcp_scsi_adapter_register(struct zfcp_adapter *); extern void zfcp_scsi_adapter_unregister(struct zfcp_adapter *); diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index f6c415d6ef48..db00b5e3abbe 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c @@ -312,7 +312,7 @@ static void zfcp_fc_incoming_logo(struct zfcp_fsf_req *req) /** * zfcp_fc_incoming_els - handle incoming ELS - * @fsf_req - request which contains incoming ELS + * @fsf_req: request which contains incoming ELS */ void zfcp_fc_incoming_els(struct zfcp_fsf_req *fsf_req) { @@ -597,6 +597,48 @@ void zfcp_fc_test_link(struct zfcp_port *port) put_device(&port->dev); } +/** + * zfcp_fc_sg_free_table - free memory used by scatterlists + * @sg: pointer to scatterlist + * @count: number of scatterlist which are to be free'ed + * the scatterlist are expected to reference pages always + */ +static void zfcp_fc_sg_free_table(struct scatterlist *sg, int count) +{ + int i; + + for (i = 0; i < count; i++, sg++) + if (sg) + free_page((unsigned long) sg_virt(sg)); + else + break; +} + +/** + * zfcp_fc_sg_setup_table - init scatterlist and allocate, assign buffers + * @sg: pointer to struct scatterlist + * @count: number of scatterlists which should be assigned with buffers + * of size page + * + * Returns: 0 on success, -ENOMEM otherwise + */ +static int zfcp_fc_sg_setup_table(struct scatterlist *sg, int count) +{ + void *addr; + int i; + + sg_init_table(sg, count); + for (i = 0; i < count; i++, sg++) { + addr = (void *) get_zeroed_page(GFP_KERNEL); + if (!addr) { + zfcp_fc_sg_free_table(sg, i); + return -ENOMEM; + } + sg_set_buf(sg, addr, PAGE_SIZE); + } + return 0; +} + static struct zfcp_fc_req *zfcp_fc_alloc_sg_env(int buf_num) { struct zfcp_fc_req *fc_req; @@ -605,7 +647,7 @@ static struct zfcp_fc_req *zfcp_fc_alloc_sg_env(int buf_num) if (!fc_req) return NULL; - if (zfcp_sg_setup_table(&fc_req->sg_rsp, buf_num)) { + if (zfcp_fc_sg_setup_table(&fc_req->sg_rsp, buf_num)) { kmem_cache_free(zfcp_fc_req_cache, fc_req); return NULL; } @@ -763,7 +805,7 @@ void zfcp_fc_scan_ports(struct work_struct *work) break; } } - zfcp_sg_free_table(&fc_req->sg_rsp, buf_num); + zfcp_fc_sg_free_table(&fc_req->sg_rsp, buf_num); kmem_cache_free(zfcp_fc_req_cache, fc_req); out: zfcp_fc_wka_port_put(&adapter->gs->ds); diff --git a/drivers/s390/scsi/zfcp_fc.h b/drivers/s390/scsi/zfcp_fc.h index 3cd74729cfb9..6902ae1f8e4f 100644 --- a/drivers/s390/scsi/zfcp_fc.h +++ b/drivers/s390/scsi/zfcp_fc.h @@ -121,9 +121,24 @@ struct zfcp_fc_rspn_req { /** * struct zfcp_fc_req - Container for FC ELS and CT requests sent from zfcp * @ct_els: data required for issuing fsf command - * @sg_req: scatterlist entry for request data - * @sg_rsp: scatterlist entry for response data - * @u: request specific data + * @sg_req: scatterlist entry for request data, refers to embedded @u submember + * @sg_rsp: scatterlist entry for response data, refers to embedded @u submember + * @u: request and response specific data + * @u.adisc: ADISC specific data + * @u.adisc.req: ADISC request + * @u.adisc.rsp: ADISC response + * @u.gid_pn: GID_PN specific data + * @u.gid_pn.req: GID_PN request + * @u.gid_pn.rsp: GID_PN response + * @u.gpn_ft: GPN_FT specific data + * @u.gpn_ft.sg_rsp2: GPN_FT response, not embedded here, allocated elsewhere + * @u.gpn_ft.req: GPN_FT request + * @u.gspn: GSPN specific data + * @u.gspn.req: GSPN request + * @u.gspn.rsp: GSPN response + * @u.rspn: RSPN specific data + * @u.rspn.req: RSPN request + * @u.rspn.rsp: RSPN response */ struct zfcp_fc_req { struct zfcp_fsf_ct_els ct_els; diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 3c86e27f094d..d94496ee6883 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -19,6 +19,11 @@ #include "zfcp_qdio.h" #include "zfcp_reqlist.h" +/* timeout for FSF requests sent during scsi_eh: abort or FCP TMF */ +#define ZFCP_FSF_SCSI_ER_TIMEOUT (10*HZ) +/* timeout for: exchange config/port data outside ERP, or open/close WKA port */ +#define ZFCP_FSF_REQUEST_TIMEOUT (60*HZ) + struct kmem_cache *zfcp_fsf_qtcb_cache; static void zfcp_fsf_request_timeout_handler(struct timer_list *t) @@ -74,18 +79,18 @@ static void zfcp_fsf_class_not_supp(struct zfcp_fsf_req *req) /** * zfcp_fsf_req_free - free memory used by fsf request - * @fsf_req: pointer to struct zfcp_fsf_req + * @req: pointer to struct zfcp_fsf_req */ void zfcp_fsf_req_free(struct zfcp_fsf_req *req) { if (likely(req->pool)) { - if (likely(req->qtcb)) + if (likely(!zfcp_fsf_req_is_status_read_buffer(req))) mempool_free(req->qtcb, req->adapter->pool.qtcb_pool); mempool_free(req, req->pool); return; } - if (likely(req->qtcb)) + if (likely(!zfcp_fsf_req_is_status_read_buffer(req))) kmem_cache_free(zfcp_fsf_qtcb_cache, req->qtcb); kfree(req); } @@ -379,7 +384,7 @@ static void zfcp_fsf_protstatus_eval(struct zfcp_fsf_req *req) /** * zfcp_fsf_req_complete - process completion of a FSF request - * @fsf_req: The FSF request that has been completed. + * @req: The FSF request that has been completed. * * When a request has been completed either from the FCP adapter, * or it has been dismissed due to a queue shutdown, this function @@ -388,7 +393,7 @@ static void zfcp_fsf_protstatus_eval(struct zfcp_fsf_req *req) */ static void zfcp_fsf_req_complete(struct zfcp_fsf_req *req) { - if (unlikely(req->fsf_command == FSF_QTCB_UNSOLICITED_STATUS)) { + if (unlikely(zfcp_fsf_req_is_status_read_buffer(req))) { zfcp_fsf_status_read_handler(req); return; } @@ -705,7 +710,6 @@ static struct zfcp_fsf_req *zfcp_fsf_req_create(struct zfcp_qdio *qdio, init_completion(&req->completion); req->adapter = adapter; - req->fsf_command = fsf_cmd; req->req_id = adapter->req_no; if (likely(fsf_cmd != FSF_QTCB_UNSOLICITED_STATUS)) { @@ -720,14 +724,13 @@ static struct zfcp_fsf_req *zfcp_fsf_req_create(struct zfcp_qdio *qdio, return ERR_PTR(-ENOMEM); } - req->seq_no = adapter->fsf_req_seq_no; req->qtcb->prefix.req_seq_no = adapter->fsf_req_seq_no; req->qtcb->prefix.req_id = req->req_id; req->qtcb->prefix.ulp_info = 26; - req->qtcb->prefix.qtcb_type = fsf_qtcb_type[req->fsf_command]; + req->qtcb->prefix.qtcb_type = fsf_qtcb_type[fsf_cmd]; req->qtcb->prefix.qtcb_version = FSF_QTCB_CURRENT_VERSION; req->qtcb->header.req_handle = req->req_id; - req->qtcb->header.fsf_command = req->fsf_command; + req->qtcb->header.fsf_command = fsf_cmd; } zfcp_qdio_req_init(adapter->qdio, &req->qdio_req, req->req_id, sbtype, @@ -740,7 +743,6 @@ static int zfcp_fsf_req_send(struct zfcp_fsf_req *req) { struct zfcp_adapter *adapter = req->adapter; struct zfcp_qdio *qdio = adapter->qdio; - int with_qtcb = (req->qtcb != NULL); int req_id = req->req_id; zfcp_reqlist_add(adapter->req_list, req); @@ -756,7 +758,7 @@ static int zfcp_fsf_req_send(struct zfcp_fsf_req *req) } /* Don't increase for unsolicited status */ - if (with_qtcb) + if (!zfcp_fsf_req_is_status_read_buffer(req)) adapter->fsf_req_seq_no++; adapter->req_no++; @@ -765,8 +767,7 @@ static int zfcp_fsf_req_send(struct zfcp_fsf_req *req) /** * zfcp_fsf_status_read - send status read request - * @adapter: pointer to struct zfcp_adapter - * @req_flags: request flags + * @qdio: pointer to struct zfcp_qdio * Returns: 0 on success, ERROR otherwise */ int zfcp_fsf_status_read(struct zfcp_qdio *qdio) @@ -912,7 +913,7 @@ struct zfcp_fsf_req *zfcp_fsf_abort_fcp_cmnd(struct scsi_cmnd *scmnd) req->qtcb->header.port_handle = zfcp_sdev->port->handle; req->qtcb->bottom.support.req_handle = (u64) old_req_id; - zfcp_fsf_start_timer(req, ZFCP_SCSI_ER_TIMEOUT); + zfcp_fsf_start_timer(req, ZFCP_FSF_SCSI_ER_TIMEOUT); if (!zfcp_fsf_req_send(req)) goto out; @@ -1057,8 +1058,10 @@ static int zfcp_fsf_setup_ct_els(struct zfcp_fsf_req *req, /** * zfcp_fsf_send_ct - initiate a Generic Service request (FC-GS) + * @wka_port: pointer to zfcp WKA port to send CT/GS to * @ct: pointer to struct zfcp_send_ct with data for request * @pool: if non-null this mempool is used to allocate struct zfcp_fsf_req + * @timeout: timeout that hardware should use, and a later software timeout */ int zfcp_fsf_send_ct(struct zfcp_fc_wka_port *wka_port, struct zfcp_fsf_ct_els *ct, mempool_t *pool, @@ -1151,7 +1154,10 @@ skip_fsfstatus: /** * zfcp_fsf_send_els - initiate an ELS command (FC-FS) + * @adapter: pointer to zfcp adapter + * @d_id: N_Port_ID to send ELS to * @els: pointer to struct zfcp_send_els with data for the command + * @timeout: timeout that hardware should use, and a later software timeout */ int zfcp_fsf_send_els(struct zfcp_adapter *adapter, u32 d_id, struct zfcp_fsf_ct_els *els, unsigned int timeout) @@ -1809,7 +1815,7 @@ static void zfcp_fsf_open_lun_handler(struct zfcp_fsf_req *req) case FSF_LUN_SHARING_VIOLATION: if (qual->word[0]) dev_warn(&zfcp_sdev->port->adapter->ccw_device->dev, - "LUN 0x%Lx on port 0x%Lx is already in " + "LUN 0x%016Lx on port 0x%016Lx is already in " "use by CSS%d, MIF Image ID %x\n", zfcp_scsi_dev_lun(sdev), (unsigned long long)zfcp_sdev->port->wwpn, @@ -1986,7 +1992,7 @@ out: return retval; } -static void zfcp_fsf_update_lat(struct fsf_latency_record *lat_rec, u32 lat) +static void zfcp_fsf_update_lat(struct zfcp_latency_record *lat_rec, u32 lat) { lat_rec->sum += lat; lat_rec->min = min(lat_rec->min, lat); @@ -1996,7 +2002,7 @@ static void zfcp_fsf_update_lat(struct fsf_latency_record *lat_rec, u32 lat) static void zfcp_fsf_req_trace(struct zfcp_fsf_req *req, struct scsi_cmnd *scsi) { struct fsf_qual_latency_info *lat_in; - struct latency_cont *lat = NULL; + struct zfcp_latency_cont *lat = NULL; struct zfcp_scsi_dev *zfcp_sdev; struct zfcp_blk_drv_data blktrc; int ticks = req->adapter->timer_ticks; @@ -2088,11 +2094,8 @@ static void zfcp_fsf_fcp_handler_common(struct zfcp_fsf_req *req, break; case FSF_CMND_LENGTH_NOT_VALID: dev_err(&req->adapter->ccw_device->dev, - "Incorrect CDB length %d, LUN 0x%016Lx on " - "port 0x%016Lx closed\n", - req->qtcb->bottom.io.fcp_cmnd_length, - (unsigned long long)zfcp_scsi_dev_lun(sdev), - (unsigned long long)zfcp_sdev->port->wwpn); + "Incorrect FCP_CMND length %d, FCP device closed\n", + req->qtcb->bottom.io.fcp_cmnd_length); zfcp_erp_adapter_shutdown(req->adapter, 0, "fssfch4"); req->status |= ZFCP_STATUS_FSFREQ_ERROR; break; @@ -2369,7 +2372,7 @@ struct zfcp_fsf_req *zfcp_fsf_fcp_task_mgmt(struct scsi_device *sdev, fcp_cmnd = &req->qtcb->bottom.io.fcp_cmnd.iu; zfcp_fc_fcp_tm(fcp_cmnd, sdev, tm_flags); - zfcp_fsf_start_timer(req, ZFCP_SCSI_ER_TIMEOUT); + zfcp_fsf_start_timer(req, ZFCP_FSF_SCSI_ER_TIMEOUT); if (!zfcp_fsf_req_send(req)) goto out; @@ -2382,7 +2385,7 @@ out: /** * zfcp_fsf_reqid_check - validate req_id contained in SBAL returned by QDIO - * @adapter: pointer to struct zfcp_adapter + * @qdio: pointer to struct zfcp_qdio * @sbal_idx: response queue index of SBAL to be processed */ void zfcp_fsf_reqid_check(struct zfcp_qdio *qdio, int sbal_idx) diff --git a/drivers/s390/scsi/zfcp_fsf.h b/drivers/s390/scsi/zfcp_fsf.h index 535628b92f0a..2c658b66318c 100644 --- a/drivers/s390/scsi/zfcp_fsf.h +++ b/drivers/s390/scsi/zfcp_fsf.h @@ -438,8 +438,8 @@ struct zfcp_blk_drv_data { /** * struct zfcp_fsf_ct_els - zfcp data for ct or els request - * @req: scatter-gather list for request - * @resp: scatter-gather list for response + * @req: scatter-gather list for request, points to &zfcp_fc_req.sg_req or BSG + * @resp: scatter-gather list for response, points to &zfcp_fc_req.sg_rsp or BSG * @handler: handler function (called for response to the request) * @handler_data: data passed to handler function * @port: Optional pointer to port for zfcp internal ELS (only test link ADISC) diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c index 4ab02e8d36f3..10c4e8e3fd59 100644 --- a/drivers/s390/scsi/zfcp_qdio.c +++ b/drivers/s390/scsi/zfcp_qdio.c @@ -4,7 +4,7 @@ * * Setup and helper functions to access QDIO. * - * Copyright IBM Corp. 2002, 2010 + * Copyright IBM Corp. 2002, 2017 */ #define KMSG_COMPONENT "zfcp" @@ -19,7 +19,7 @@ static bool enable_multibuffer = true; module_param_named(datarouter, enable_multibuffer, bool, 0400); MODULE_PARM_DESC(datarouter, "Enable hardware data router support (default on)"); -static void zfcp_qdio_handler_error(struct zfcp_qdio *qdio, char *id, +static void zfcp_qdio_handler_error(struct zfcp_qdio *qdio, char *dbftag, unsigned int qdio_err) { struct zfcp_adapter *adapter = qdio->adapter; @@ -28,12 +28,12 @@ static void zfcp_qdio_handler_error(struct zfcp_qdio *qdio, char *id, if (qdio_err & QDIO_ERROR_SLSB_STATE) { zfcp_qdio_siosl(adapter); - zfcp_erp_adapter_shutdown(adapter, 0, id); + zfcp_erp_adapter_shutdown(adapter, 0, dbftag); return; } zfcp_erp_adapter_reopen(adapter, ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED | - ZFCP_STATUS_COMMON_ERP_FAILED, id); + ZFCP_STATUS_COMMON_ERP_FAILED, dbftag); } static void zfcp_qdio_zero_sbals(struct qdio_buffer *sbal[], int first, int cnt) @@ -180,7 +180,6 @@ zfcp_qdio_sbale_next(struct zfcp_qdio *qdio, struct zfcp_qdio_req *q_req) * @qdio: pointer to struct zfcp_qdio * @q_req: pointer to struct zfcp_qdio_req * @sg: scatter-gather list - * @max_sbals: upper bound for number of SBALs to be used * Returns: zero or -EINVAL on error */ int zfcp_qdio_sbals_from_sg(struct zfcp_qdio *qdio, struct zfcp_qdio_req *q_req, @@ -303,7 +302,7 @@ static void zfcp_qdio_setup_init_data(struct qdio_initialize *id, /** * zfcp_qdio_allocate - allocate queue memory and initialize QDIO data - * @adapter: pointer to struct zfcp_adapter + * @qdio: pointer to struct zfcp_qdio * Returns: -ENOMEM on memory allocation error or return value from * qdio_allocate */ diff --git a/drivers/s390/scsi/zfcp_qdio.h b/drivers/s390/scsi/zfcp_qdio.h index 886c662cc154..2a816a37b3c0 100644 --- a/drivers/s390/scsi/zfcp_qdio.h +++ b/drivers/s390/scsi/zfcp_qdio.h @@ -30,6 +30,8 @@ * @req_q_full: queue full incidents * @req_q_wq: used to wait for SBAL availability * @adapter: adapter used in conjunction with this qdio structure + * @max_sbale_per_sbal: qdio limit per sbal + * @max_sbale_per_req: qdio limit per request */ struct zfcp_qdio { struct qdio_buffer *res_q[QDIO_MAX_BUFFERS_PER_Q]; @@ -70,7 +72,7 @@ struct zfcp_qdio_req { /** * zfcp_qdio_sbale_req - return pointer to sbale on req_q for a request * @qdio: pointer to struct zfcp_qdio - * @q_rec: pointer to struct zfcp_qdio_req + * @q_req: pointer to struct zfcp_qdio_req * Returns: pointer to qdio_buffer_element (sbale) structure */ static inline struct qdio_buffer_element * @@ -82,7 +84,7 @@ zfcp_qdio_sbale_req(struct zfcp_qdio *qdio, struct zfcp_qdio_req *q_req) /** * zfcp_qdio_sbale_curr - return current sbale on req_q for a request * @qdio: pointer to struct zfcp_qdio - * @fsf_req: pointer to struct zfcp_fsf_req + * @q_req: pointer to struct zfcp_qdio_req * Returns: pointer to qdio_buffer_element (sbale) structure */ static inline struct qdio_buffer_element * @@ -135,6 +137,8 @@ void zfcp_qdio_req_init(struct zfcp_qdio *qdio, struct zfcp_qdio_req *q_req, * zfcp_qdio_fill_next - Fill next sbale, only for single sbal requests * @qdio: pointer to struct zfcp_qdio * @q_req: pointer to struct zfcp_queue_req + * @data: pointer to data + * @len: length of data * * This is only required for single sbal requests, calling it when * wrapping around to the next sbal is a bug. @@ -182,6 +186,7 @@ int zfcp_qdio_sg_one_sbale(struct scatterlist *sg) /** * zfcp_qdio_skip_to_last_sbale - skip to last sbale in sbal + * @qdio: pointer to struct zfcp_qdio * @q_req: The current zfcp_qdio_req */ static inline diff --git a/drivers/s390/scsi/zfcp_reqlist.h b/drivers/s390/scsi/zfcp_reqlist.h index 59a943c0d51d..9b8ff249e31c 100644 --- a/drivers/s390/scsi/zfcp_reqlist.h +++ b/drivers/s390/scsi/zfcp_reqlist.h @@ -17,7 +17,7 @@ /** * struct zfcp_reqlist - Container for request list (reqlist) * @lock: Spinlock for protecting the hash list - * @list: Array of hashbuckets, each is a list of requests in this bucket + * @buckets: Array of hashbuckets, each is a list of requests in this bucket */ struct zfcp_reqlist { spinlock_t lock; diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index a8efcb330bc1..00acc7144bbc 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -27,7 +27,11 @@ MODULE_PARM_DESC(queue_depth, "Default queue depth for new SCSI devices"); static bool enable_dif; module_param_named(dif, enable_dif, bool, 0400); -MODULE_PARM_DESC(dif, "Enable DIF/DIX data integrity support"); +MODULE_PARM_DESC(dif, "Enable DIF data integrity support (default off)"); + +bool zfcp_experimental_dix; +module_param_named(dix, zfcp_experimental_dix, bool, 0400); +MODULE_PARM_DESC(dix, "Enable experimental DIX (data integrity extension) support which implies DIF support (default off)"); static bool allow_lun_scan = true; module_param(allow_lun_scan, bool, 0600); @@ -226,7 +230,9 @@ static void zfcp_scsi_forget_cmnd(struct zfcp_fsf_req *old_req, void *data) (struct zfcp_scsi_req_filter *)data; /* already aborted - prevent side-effects - or not a SCSI command */ - if (old_req->data == NULL || old_req->fsf_command != FSF_QTCB_FCP_CMND) + if (old_req->data == NULL || + zfcp_fsf_req_is_status_read_buffer(old_req) || + old_req->qtcb->header.fsf_command != FSF_QTCB_FCP_CMND) return; /* (tmf_scope == FCP_TMF_TGT_RESET || tmf_scope == FCP_TMF_LUN_RESET) */ @@ -423,7 +429,6 @@ static struct scsi_host_template zfcp_scsi_host_template = { * ZFCP_QDIO_MAX_SBALS_PER_REQ) - 2) * 8, /* GCD, adjusted later */ .dma_boundary = ZFCP_QDIO_SBALE_LEN - 1, - .use_clustering = 1, .shost_attrs = zfcp_sysfs_shost_attrs, .sdev_attrs = zfcp_sysfs_sdev_attrs, .track_queue_depth = 1, @@ -788,11 +793,11 @@ void zfcp_scsi_set_prot(struct zfcp_adapter *adapter) data_div = atomic_read(&adapter->status) & ZFCP_STATUS_ADAPTER_DATA_DIV_ENABLED; - if (enable_dif && + if ((enable_dif || zfcp_experimental_dix) && adapter->adapter_features & FSF_FEATURE_DIF_PROT_TYPE1) mask |= SHOST_DIF_TYPE1_PROTECTION; - if (enable_dif && data_div && + if (zfcp_experimental_dix && data_div && adapter->adapter_features & FSF_FEATURE_DIX_PROT_TCPIP) { mask |= SHOST_DIX_TYPE1_PROTECTION; scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP); diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c index 2d655a97b959..a3c20e3a8b7c 100644 --- a/drivers/scsi/3w-9xxx.c +++ b/drivers/scsi/3w-9xxx.c @@ -1998,7 +1998,6 @@ static struct scsi_host_template driver_template = { .sg_tablesize = TW_APACHE_MAX_SGL_LENGTH, .max_sectors = TW_MAX_SECTORS, .cmd_per_lun = TW_MAX_CMDS_PER_LUN, - .use_clustering = ENABLE_CLUSTERING, .shost_attrs = twa_host_attrs, .emulated = 1, .no_write_same = 1, diff --git a/drivers/scsi/3w-sas.c b/drivers/scsi/3w-sas.c index 480cf82700e9..e8f5f7c63190 100644 --- a/drivers/scsi/3w-sas.c +++ b/drivers/scsi/3w-sas.c @@ -1550,7 +1550,6 @@ static struct scsi_host_template driver_template = { .sg_tablesize = TW_LIBERATOR_MAX_SGL_LENGTH, .max_sectors = TW_MAX_SECTORS, .cmd_per_lun = TW_MAX_CMDS_PER_LUN, - .use_clustering = ENABLE_CLUSTERING, .shost_attrs = twl_host_attrs, .emulated = 1, .no_write_same = 1, diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c index a58257645e94..2b1e0d503020 100644 --- a/drivers/scsi/3w-xxxx.c +++ b/drivers/scsi/3w-xxxx.c @@ -1174,7 +1174,7 @@ static int tw_setfeature(TW_Device_Extension *tw_dev, int parm, int param_size, command_que_value = tw_dev->command_packet_physical_address[request_id]; if (command_que_value == 0) { printk(KERN_WARNING "3w-xxxx: tw_setfeature(): Bad command packet physical address.\n"); - return 1; + return 1; } /* Send command packet to the board */ @@ -2247,7 +2247,6 @@ static struct scsi_host_template driver_template = { .sg_tablesize = TW_MAX_SGL_LENGTH, .max_sectors = TW_MAX_SECTORS, .cmd_per_lun = TW_MAX_CMDS_PER_LUN, - .use_clustering = ENABLE_CLUSTERING, .shost_attrs = tw_host_attrs, .emulated = 1, .no_write_same = 1, diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c index 6be77b3aa8a5..128d658d472a 100644 --- a/drivers/scsi/53c700.c +++ b/drivers/scsi/53c700.c @@ -318,7 +318,6 @@ NCR_700_detect(struct scsi_host_template *tpnt, tpnt->can_queue = NCR_700_COMMAND_SLOTS_PER_HOST; tpnt->sg_tablesize = NCR_700_SG_SEGMENTS; tpnt->cmd_per_lun = NCR_700_CMD_PER_LUN; - tpnt->use_clustering = ENABLE_CLUSTERING; tpnt->slave_configure = NCR_700_slave_configure; tpnt->slave_destroy = NCR_700_slave_destroy; tpnt->slave_alloc = NCR_700_slave_alloc; diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c index 9cee941f97d6..e41e51f1da71 100644 --- a/drivers/scsi/BusLogic.c +++ b/drivers/scsi/BusLogic.c @@ -2641,6 +2641,7 @@ static int blogic_resultcode(struct blogic_adapter *adapter, case BLOGIC_BAD_CMD_PARAM: blogic_warn("BusLogic Driver Protocol Error 0x%02X\n", adapter, adapter_status); + /* fall through */ case BLOGIC_DATA_UNDERRUN: case BLOGIC_DATA_OVERRUN: case BLOGIC_NOEXPECT_BUSFREE: @@ -3857,7 +3858,6 @@ static struct scsi_host_template blogic_template = { #endif .unchecked_isa_dma = 1, .max_sectors = 128, - .use_clustering = ENABLE_CLUSTERING, }; /* diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 640cd1b31a18..f38882f6f37d 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -50,18 +50,6 @@ config SCSI_NETLINK default n depends on NET -config SCSI_MQ_DEFAULT - bool "SCSI: use blk-mq I/O path by default" - default y - depends on SCSI - ---help--- - This option enables the blk-mq based I/O path for SCSI devices by - default. With this option the scsi_mod.use_blk_mq module/boot - option defaults to Y, without it to N, but it can still be - overridden either way. - - If unsure say Y. - config SCSI_PROC_FS bool "legacy /proc/scsi/ support" depends on SCSI && PROC_FS diff --git a/drivers/scsi/a100u2w.c b/drivers/scsi/a100u2w.c index 00072ed9540b..ff53fd0d12f2 100644 --- a/drivers/scsi/a100u2w.c +++ b/drivers/scsi/a100u2w.c @@ -1078,7 +1078,6 @@ static struct scsi_host_template inia100_template = { .can_queue = 1, .this_id = 1, .sg_tablesize = SG_ALL, - .use_clustering = ENABLE_CLUSTERING, }; static int inia100_probe_one(struct pci_dev *pdev, diff --git a/drivers/scsi/a2091.c b/drivers/scsi/a2091.c index 61aadc7acb49..c96bc7261a42 100644 --- a/drivers/scsi/a2091.c +++ b/drivers/scsi/a2091.c @@ -160,7 +160,7 @@ static struct scsi_host_template a2091_scsi_template = { .this_id = 7, .sg_tablesize = SG_ALL, .cmd_per_lun = CMD_PER_LUN, - .use_clustering = DISABLE_CLUSTERING + .dma_boundary = PAGE_SIZE - 1, }; static int a2091_probe(struct zorro_dev *z, const struct zorro_device_id *ent) diff --git a/drivers/scsi/a3000.c b/drivers/scsi/a3000.c index 2427a8541247..dcf435f312dd 100644 --- a/drivers/scsi/a3000.c +++ b/drivers/scsi/a3000.c @@ -175,7 +175,6 @@ static struct scsi_host_template amiga_a3000_scsi_template = { .this_id = 7, .sg_tablesize = SG_ALL, .cmd_per_lun = CMD_PER_LUN, - .use_clustering = ENABLE_CLUSTERING }; static int __init amiga_a3000_scsi_probe(struct platform_device *pdev) diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c index bd7f352c28f3..75ab5ff6b78c 100644 --- a/drivers/scsi/aacraid/aachba.c +++ b/drivers/scsi/aacraid/aachba.c @@ -2892,6 +2892,7 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd) !(dev->raw_io_64) || ((scsicmd->cmnd[1] & 0x1f) != SAI_READ_CAPACITY_16)) break; + /* fall through */ case INQUIRY: case READ_CAPACITY: case TEST_UNIT_READY: @@ -2966,6 +2967,7 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd) /* Issue FIB to tell Firmware to flush it's cache */ if ((aac_cache & 6) != 2) return aac_synchronize(scsicmd); + /* fall through */ case INQUIRY: { struct inquiry_data inq_data; @@ -3319,8 +3321,9 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd) min_t(size_t, sizeof(dev->fsa_dev[cid].sense_data), SCSI_SENSE_BUFFERSIZE)); - break; + break; } + /* fall through */ case RESERVE: case RELEASE: case REZERO_UNIT: diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index 39eb415987fc..3291d1c16864 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -40,6 +40,7 @@ #define nblank(x) _nblank(x)[0] #include <linux/interrupt.h> +#include <linux/completion.h> #include <linux/pci.h> #include <scsi/scsi_host.h> @@ -1241,7 +1242,7 @@ struct aac_fib_context { u32 unique; // unique value representing this context ulong jiffies; // used for cleanup - dmb changed to ulong struct list_head next; // used to link context's into a linked list - struct semaphore wait_sem; // this is used to wait for the next fib to arrive. + struct completion completion; // this is used to wait for the next fib to arrive. int wait; // Set to true when thread is in WaitForSingleObject unsigned long count; // total number of FIBs on FibList struct list_head fib_list; // this holds fibs and their attachd hw_fibs @@ -1313,7 +1314,7 @@ struct fib { * This is the event the sendfib routine will wait on if the * caller did not pass one and this is synch io. */ - struct semaphore event_wait; + struct completion event_wait; spinlock_t event_lock; u32 done; /* gets set to 1 when fib is complete */ diff --git a/drivers/scsi/aacraid/commctrl.c b/drivers/scsi/aacraid/commctrl.c index 25f6600d6c09..e2899ff7913e 100644 --- a/drivers/scsi/aacraid/commctrl.c +++ b/drivers/scsi/aacraid/commctrl.c @@ -41,7 +41,6 @@ #include <linux/blkdev.h> #include <linux/delay.h> /* ssleep prototype */ #include <linux/kthread.h> -#include <linux/semaphore.h> #include <linux/uaccess.h> #include <scsi/scsi_host.h> @@ -203,7 +202,7 @@ static int open_getadapter_fib(struct aac_dev * dev, void __user *arg) /* * Initialize the mutex used to wait for the next AIF. */ - sema_init(&fibctx->wait_sem, 0); + init_completion(&fibctx->completion); fibctx->wait = 0; /* * Initialize the fibs and set the count of fibs on @@ -335,7 +334,7 @@ return_fib: ssleep(1); } if (f.wait) { - if(down_interruptible(&fibctx->wait_sem) < 0) { + if (wait_for_completion_interruptible(&fibctx->completion) < 0) { status = -ERESTARTSYS; } else { /* Lock again and retry */ diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index 1e77d96a18f2..d5a6aa9676c8 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -44,7 +44,6 @@ #include <linux/delay.h> #include <linux/kthread.h> #include <linux/interrupt.h> -#include <linux/semaphore.h> #include <linux/bcd.h> #include <scsi/scsi.h> #include <scsi/scsi_host.h> @@ -189,7 +188,7 @@ int aac_fib_setup(struct aac_dev * dev) fibptr->hw_fib_va = hw_fib; fibptr->data = (void *) fibptr->hw_fib_va->data; fibptr->next = fibptr+1; /* Forward chain the fibs */ - sema_init(&fibptr->event_wait, 0); + init_completion(&fibptr->event_wait); spin_lock_init(&fibptr->event_lock); hw_fib->header.XferState = cpu_to_le32(0xffffffff); hw_fib->header.SenderSize = @@ -623,7 +622,7 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size, } if (wait) { fibptr->flags |= FIB_CONTEXT_FLAG_WAIT; - if (down_interruptible(&fibptr->event_wait)) { + if (wait_for_completion_interruptible(&fibptr->event_wait)) { fibptr->flags &= ~FIB_CONTEXT_FLAG_WAIT; return -EFAULT; } @@ -659,7 +658,7 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size, * hardware failure has occurred. */ unsigned long timeout = jiffies + (180 * HZ); /* 3 minutes */ - while (down_trylock(&fibptr->event_wait)) { + while (!try_wait_for_completion(&fibptr->event_wait)) { int blink; if (time_is_before_eq_jiffies(timeout)) { struct aac_queue * q = &dev->queues->queue[AdapNormCmdQueue]; @@ -689,9 +688,9 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size, */ schedule(); } - } else if (down_interruptible(&fibptr->event_wait)) { + } else if (wait_for_completion_interruptible(&fibptr->event_wait)) { /* Do nothing ... satisfy - * down_interruptible must_check */ + * wait_for_completion_interruptible must_check */ } spin_lock_irqsave(&fibptr->event_lock, flags); @@ -777,7 +776,7 @@ int aac_hba_send(u8 command, struct fib *fibptr, fib_callback callback, return -EFAULT; fibptr->flags |= FIB_CONTEXT_FLAG_WAIT; - if (down_interruptible(&fibptr->event_wait)) + if (wait_for_completion_interruptible(&fibptr->event_wait)) fibptr->done = 2; fibptr->flags &= ~(FIB_CONTEXT_FLAG_WAIT); @@ -1538,7 +1537,7 @@ static int _aac_reset_adapter(struct aac_dev *aac, int forced, u8 reset_type) || fib->flags & FIB_CONTEXT_FLAG_WAIT) { unsigned long flagv; spin_lock_irqsave(&fib->event_lock, flagv); - up(&fib->event_wait); + complete(&fib->event_wait); spin_unlock_irqrestore(&fib->event_lock, flagv); schedule(); retval = 0; @@ -1828,7 +1827,7 @@ int aac_check_health(struct aac_dev * aac) * Set the event to wake up the * thread that will waiting. */ - up(&fibctx->wait_sem); + complete(&fibctx->completion); } else { printk(KERN_WARNING "aifd: didn't allocate NewFib.\n"); kfree(fib); @@ -2165,7 +2164,7 @@ static void wakeup_fibctx_threads(struct aac_dev *dev, * Set the event to wake up the * thread that is waiting. */ - up(&fibctx->wait_sem); + complete(&fibctx->completion); entry = entry->next; } diff --git a/drivers/scsi/aacraid/dpcsup.c b/drivers/scsi/aacraid/dpcsup.c index ddc69738375f..40a771dd1c0e 100644 --- a/drivers/scsi/aacraid/dpcsup.c +++ b/drivers/scsi/aacraid/dpcsup.c @@ -38,7 +38,6 @@ #include <linux/slab.h> #include <linux/completion.h> #include <linux/blkdev.h> -#include <linux/semaphore.h> #include "aacraid.h" @@ -129,7 +128,7 @@ unsigned int aac_response_normal(struct aac_queue * q) spin_lock_irqsave(&fib->event_lock, flagv); if (!fib->done) { fib->done = 1; - up(&fib->event_wait); + complete(&fib->event_wait); } spin_unlock_irqrestore(&fib->event_lock, flagv); @@ -376,16 +375,16 @@ unsigned int aac_intr_normal(struct aac_dev *dev, u32 index, int isAif, start_callback = 1; } else { unsigned long flagv; - int complete = 0; + int completed = 0; dprintk((KERN_INFO "event_wait up\n")); spin_lock_irqsave(&fib->event_lock, flagv); if (fib->done == 2) { fib->done = 1; - complete = 1; + completed = 1; } else { fib->done = 1; - up(&fib->event_wait); + complete(&fib->event_wait); } spin_unlock_irqrestore(&fib->event_lock, flagv); @@ -395,7 +394,7 @@ unsigned int aac_intr_normal(struct aac_dev *dev, u32 index, int isAif, mflags); FIB_COUNTER_INCREMENT(aac_config.NativeRecved); - if (complete) + if (completed) aac_fib_complete(fib); } } else { @@ -428,16 +427,16 @@ unsigned int aac_intr_normal(struct aac_dev *dev, u32 index, int isAif, start_callback = 1; } else { unsigned long flagv; - int complete = 0; + int completed = 0; dprintk((KERN_INFO "event_wait up\n")); spin_lock_irqsave(&fib->event_lock, flagv); if (fib->done == 2) { fib->done = 1; - complete = 1; + completed = 1; } else { fib->done = 1; - up(&fib->event_wait); + complete(&fib->event_wait); } spin_unlock_irqrestore(&fib->event_lock, flagv); @@ -447,7 +446,7 @@ unsigned int aac_intr_normal(struct aac_dev *dev, u32 index, int isAif, mflags); FIB_COUNTER_INCREMENT(aac_config.NormalRecved); - if (complete) + if (completed) aac_fib_complete(fib); } } diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 2d4e4ddc5ace..634ddb90e7aa 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -759,6 +759,7 @@ static int aac_eh_abort(struct scsi_cmnd* cmd) !(aac->raw_io_64) || ((cmd->cmnd[1] & 0x1f) != SAI_READ_CAPACITY_16)) break; + /* fall through */ case INQUIRY: case READ_CAPACITY: /* @@ -1539,7 +1540,6 @@ static struct scsi_host_template aac_driver_template = { #else .cmd_per_lun = AAC_NUM_IO_FIB, #endif - .use_clustering = ENABLE_CLUSTERING, .emulated = 1, .no_write_same = 1, }; @@ -1559,7 +1559,7 @@ static void __aac_shutdown(struct aac_dev * aac) struct fib *fib = &aac->fibs[i]; if (!(fib->hw_fib_va->header.XferState & cpu_to_le32(NoResponseExpected | Async)) && (fib->hw_fib_va->header.XferState & cpu_to_le32(ResponseExpected))) - up(&fib->event_wait); + complete(&fib->event_wait); } kthread_stop(aac->thread); aac->thread = NULL; diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c index 7a51ccfa8662..8377aec0649d 100644 --- a/drivers/scsi/aacraid/src.c +++ b/drivers/scsi/aacraid/src.c @@ -106,7 +106,7 @@ static irqreturn_t aac_src_intr_message(int irq, void *dev_id) spin_lock_irqsave(&dev->sync_fib->event_lock, sflags); if (dev->sync_fib->flags & FIB_CONTEXT_FLAG_WAIT) { dev->management_fib_count--; - up(&dev->sync_fib->event_wait); + complete(&dev->sync_fib->event_wait); } spin_unlock_irqrestore(&dev->sync_fib->event_lock, sflags); diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c index 223ef6f4e258..d37584403c33 100644 --- a/drivers/scsi/advansys.c +++ b/drivers/scsi/advansys.c @@ -3192,8 +3192,8 @@ static void asc_prt_driver_conf(struct seq_file *m, struct Scsi_Host *shost) shost->sg_tablesize, shost->cmd_per_lun); seq_printf(m, - " unchecked_isa_dma %d, use_clustering %d\n", - shost->unchecked_isa_dma, shost->use_clustering); + " unchecked_isa_dma %d\n", + shost->unchecked_isa_dma); seq_printf(m, " flags 0x%x, last_reset 0x%lx, jiffies 0x%lx, asc_n_io_port 0x%x\n", @@ -10808,14 +10808,6 @@ static struct scsi_host_template advansys_template = { * for non-ISA adapters. */ .unchecked_isa_dma = true, - /* - * All adapters controlled by this driver are capable of large - * scatter-gather lists. According to the mid-level SCSI documentation - * this obviates any performance gain provided by setting - * 'use_clustering'. But empirically while CPU utilization is increased - * by enabling clustering, I/O throughput increases as well. - */ - .use_clustering = ENABLE_CLUSTERING, }; static int advansys_wide_init_chip(struct Scsi_Host *shost) diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c index 301b3cad15f8..97872838b983 100644 --- a/drivers/scsi/aha152x.c +++ b/drivers/scsi/aha152x.c @@ -2920,7 +2920,7 @@ static struct scsi_host_template aha152x_driver_template = { .can_queue = 1, .this_id = 7, .sg_tablesize = SG_ALL, - .use_clustering = DISABLE_CLUSTERING, + .dma_boundary = PAGE_SIZE - 1, .slave_alloc = aha152x_adjust_queue, }; diff --git a/drivers/scsi/aha1542.c b/drivers/scsi/aha1542.c index 41add33e3f1f..ba7a5725be04 100644 --- a/drivers/scsi/aha1542.c +++ b/drivers/scsi/aha1542.c @@ -58,8 +58,15 @@ struct aha1542_hostdata { int aha1542_last_mbi_used; int aha1542_last_mbo_used; struct scsi_cmnd *int_cmds[AHA1542_MAILBOXES]; - struct mailbox mb[2 * AHA1542_MAILBOXES]; - struct ccb ccb[AHA1542_MAILBOXES]; + struct mailbox *mb; + dma_addr_t mb_handle; + struct ccb *ccb; + dma_addr_t ccb_handle; +}; + +struct aha1542_cmd { + struct chain *chain; + dma_addr_t chain_handle; }; static inline void aha1542_intr_reset(u16 base) @@ -233,6 +240,21 @@ static int aha1542_test_port(struct Scsi_Host *sh) return 1; } +static void aha1542_free_cmd(struct scsi_cmnd *cmd) +{ + struct aha1542_cmd *acmd = scsi_cmd_priv(cmd); + struct device *dev = cmd->device->host->dma_dev; + size_t len = scsi_sg_count(cmd) * sizeof(struct chain); + + if (acmd->chain) { + dma_unmap_single(dev, acmd->chain_handle, len, DMA_TO_DEVICE); + kfree(acmd->chain); + } + + acmd->chain = NULL; + scsi_dma_unmap(cmd); +} + static irqreturn_t aha1542_interrupt(int irq, void *dev_id) { struct Scsi_Host *sh = dev_id; @@ -303,7 +325,7 @@ static irqreturn_t aha1542_interrupt(int irq, void *dev_id) return IRQ_HANDLED; }; - mbo = (scsi2int(mb[mbi].ccbptr) - (isa_virt_to_bus(&ccb[0]))) / sizeof(struct ccb); + mbo = (scsi2int(mb[mbi].ccbptr) - (unsigned long)aha1542->ccb_handle) / sizeof(struct ccb); mbistatus = mb[mbi].status; mb[mbi].status = 0; aha1542->aha1542_last_mbi_used = mbi; @@ -331,8 +353,7 @@ static irqreturn_t aha1542_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } my_done = tmp_cmd->scsi_done; - kfree(tmp_cmd->host_scribble); - tmp_cmd->host_scribble = NULL; + aha1542_free_cmd(tmp_cmd); /* Fetch the sense data, and tuck it away, in the required slot. The Adaptec automatically fetches it, and there is no guarantee that we will still have it in the cdb when we come back */ @@ -369,6 +390,7 @@ static irqreturn_t aha1542_interrupt(int irq, void *dev_id) static int aha1542_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *cmd) { + struct aha1542_cmd *acmd = scsi_cmd_priv(cmd); struct aha1542_hostdata *aha1542 = shost_priv(sh); u8 direction; u8 target = cmd->device->id; @@ -378,7 +400,6 @@ static int aha1542_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *cmd) int mbo, sg_count; struct mailbox *mb = aha1542->mb; struct ccb *ccb = aha1542->ccb; - struct chain *cptr; if (*cmd->cmnd == REQUEST_SENSE) { /* Don't do the command - we have the sense data already */ @@ -398,15 +419,17 @@ static int aha1542_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *cmd) print_hex_dump_bytes("command: ", DUMP_PREFIX_NONE, cmd->cmnd, cmd->cmd_len); } #endif - if (bufflen) { /* allocate memory before taking host_lock */ - sg_count = scsi_sg_count(cmd); - cptr = kmalloc_array(sg_count, sizeof(*cptr), - GFP_KERNEL | GFP_DMA); - if (!cptr) - return SCSI_MLQUEUE_HOST_BUSY; - } else { - sg_count = 0; - cptr = NULL; + sg_count = scsi_dma_map(cmd); + if (sg_count) { + size_t len = sg_count * sizeof(struct chain); + + acmd->chain = kmalloc(len, GFP_DMA); + if (!acmd->chain) + goto out_unmap; + acmd->chain_handle = dma_map_single(sh->dma_dev, acmd->chain, + len, DMA_TO_DEVICE); + if (dma_mapping_error(sh->dma_dev, acmd->chain_handle)) + goto out_free_chain; } /* Use the outgoing mailboxes in a round-robin fashion, because this @@ -437,7 +460,8 @@ static int aha1542_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *cmd) shost_printk(KERN_DEBUG, sh, "Sending command (%d %p)...", mbo, cmd->scsi_done); #endif - any2scsi(mb[mbo].ccbptr, isa_virt_to_bus(&ccb[mbo])); /* This gets trashed for some reason */ + /* This gets trashed for some reason */ + any2scsi(mb[mbo].ccbptr, aha1542->ccb_handle + mbo * sizeof(*ccb)); memset(&ccb[mbo], 0, sizeof(struct ccb)); @@ -456,21 +480,18 @@ static int aha1542_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *cmd) int i; ccb[mbo].op = 2; /* SCSI Initiator Command w/scatter-gather */ - cmd->host_scribble = (void *)cptr; scsi_for_each_sg(cmd, sg, sg_count, i) { - any2scsi(cptr[i].dataptr, isa_page_to_bus(sg_page(sg)) - + sg->offset); - any2scsi(cptr[i].datalen, sg->length); + any2scsi(acmd->chain[i].dataptr, sg_dma_address(sg)); + any2scsi(acmd->chain[i].datalen, sg_dma_len(sg)); }; any2scsi(ccb[mbo].datalen, sg_count * sizeof(struct chain)); - any2scsi(ccb[mbo].dataptr, isa_virt_to_bus(cptr)); + any2scsi(ccb[mbo].dataptr, acmd->chain_handle); #ifdef DEBUG - shost_printk(KERN_DEBUG, sh, "cptr %p: ", cptr); - print_hex_dump_bytes("cptr: ", DUMP_PREFIX_NONE, cptr, 18); + shost_printk(KERN_DEBUG, sh, "cptr %p: ", acmd->chain); + print_hex_dump_bytes("cptr: ", DUMP_PREFIX_NONE, acmd->chain, 18); #endif } else { ccb[mbo].op = 0; /* SCSI Initiator Command */ - cmd->host_scribble = NULL; any2scsi(ccb[mbo].datalen, 0); any2scsi(ccb[mbo].dataptr, 0); }; @@ -488,24 +509,29 @@ static int aha1542_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *cmd) spin_unlock_irqrestore(sh->host_lock, flags); return 0; +out_free_chain: + kfree(acmd->chain); + acmd->chain = NULL; +out_unmap: + scsi_dma_unmap(cmd); + return SCSI_MLQUEUE_HOST_BUSY; } /* Initialize mailboxes */ static void setup_mailboxes(struct Scsi_Host *sh) { struct aha1542_hostdata *aha1542 = shost_priv(sh); - int i; - struct mailbox *mb = aha1542->mb; - struct ccb *ccb = aha1542->ccb; - u8 mb_cmd[5] = { CMD_MBINIT, AHA1542_MAILBOXES, 0, 0, 0}; + int i; for (i = 0; i < AHA1542_MAILBOXES; i++) { - mb[i].status = mb[AHA1542_MAILBOXES + i].status = 0; - any2scsi(mb[i].ccbptr, isa_virt_to_bus(&ccb[i])); + aha1542->mb[i].status = 0; + any2scsi(aha1542->mb[i].ccbptr, + aha1542->ccb_handle + i * sizeof(struct ccb)); + aha1542->mb[AHA1542_MAILBOXES + i].status = 0; }; aha1542_intr_reset(sh->io_port); /* reset interrupts, so they don't block */ - any2scsi((mb_cmd + 2), isa_virt_to_bus(mb)); + any2scsi(mb_cmd + 2, aha1542->mb_handle); if (aha1542_out(sh->io_port, mb_cmd, 5)) shost_printk(KERN_ERR, sh, "failed setting up mailboxes\n"); aha1542_intr_reset(sh->io_port); @@ -739,11 +765,26 @@ static struct Scsi_Host *aha1542_hw_init(struct scsi_host_template *tpnt, struct if (aha1542->bios_translation == BIOS_TRANSLATION_25563) shost_printk(KERN_INFO, sh, "Using extended bios translation\n"); + if (dma_set_mask_and_coherent(pdev, DMA_BIT_MASK(24)) < 0) + goto unregister; + + aha1542->mb = dma_alloc_coherent(pdev, + AHA1542_MAILBOXES * 2 * sizeof(struct mailbox), + &aha1542->mb_handle, GFP_KERNEL); + if (!aha1542->mb) + goto unregister; + + aha1542->ccb = dma_alloc_coherent(pdev, + AHA1542_MAILBOXES * sizeof(struct ccb), + &aha1542->ccb_handle, GFP_KERNEL); + if (!aha1542->ccb) + goto free_mb; + setup_mailboxes(sh); if (request_irq(sh->irq, aha1542_interrupt, 0, "aha1542", sh)) { shost_printk(KERN_ERR, sh, "Unable to allocate IRQ.\n"); - goto unregister; + goto free_ccb; } if (sh->dma_channel != 0xFF) { if (request_dma(sh->dma_channel, "aha1542")) { @@ -762,11 +803,18 @@ static struct Scsi_Host *aha1542_hw_init(struct scsi_host_template *tpnt, struct scsi_scan_host(sh); return sh; + free_dma: if (sh->dma_channel != 0xff) free_dma(sh->dma_channel); free_irq: free_irq(sh->irq, sh); +free_ccb: + dma_free_coherent(pdev, AHA1542_MAILBOXES * sizeof(struct ccb), + aha1542->ccb, aha1542->ccb_handle); +free_mb: + dma_free_coherent(pdev, AHA1542_MAILBOXES * 2 * sizeof(struct mailbox), + aha1542->mb, aha1542->mb_handle); unregister: scsi_host_put(sh); release: @@ -777,9 +825,16 @@ release: static int aha1542_release(struct Scsi_Host *sh) { + struct aha1542_hostdata *aha1542 = shost_priv(sh); + struct device *dev = sh->dma_dev; + scsi_remove_host(sh); if (sh->dma_channel != 0xff) free_dma(sh->dma_channel); + dma_free_coherent(dev, AHA1542_MAILBOXES * sizeof(struct ccb), + aha1542->ccb, aha1542->ccb_handle); + dma_free_coherent(dev, AHA1542_MAILBOXES * 2 * sizeof(struct mailbox), + aha1542->mb, aha1542->mb_handle); if (sh->irq) free_irq(sh->irq, sh); if (sh->io_port && sh->n_io_port) @@ -826,7 +881,8 @@ static int aha1542_dev_reset(struct scsi_cmnd *cmd) aha1542->aha1542_last_mbo_used = mbo; - any2scsi(mb[mbo].ccbptr, isa_virt_to_bus(&ccb[mbo])); /* This gets trashed for some reason */ + /* This gets trashed for some reason */ + any2scsi(mb[mbo].ccbptr, aha1542->ccb_handle + mbo * sizeof(*ccb)); memset(&ccb[mbo], 0, sizeof(struct ccb)); @@ -901,8 +957,7 @@ static int aha1542_reset(struct scsi_cmnd *cmd, u8 reset_cmd) */ continue; } - kfree(tmp_cmd->host_scribble); - tmp_cmd->host_scribble = NULL; + aha1542_free_cmd(tmp_cmd); aha1542->int_cmds[i] = NULL; aha1542->mb[i].status = 0; } @@ -946,6 +1001,7 @@ static struct scsi_host_template driver_template = { .module = THIS_MODULE, .proc_name = "aha1542", .name = "Adaptec 1542", + .cmd_size = sizeof(struct aha1542_cmd), .queuecommand = aha1542_queuecommand, .eh_device_reset_handler= aha1542_dev_reset, .eh_bus_reset_handler = aha1542_bus_reset, @@ -955,7 +1011,6 @@ static struct scsi_host_template driver_template = { .this_id = 7, .sg_tablesize = 16, .unchecked_isa_dma = 1, - .use_clustering = ENABLE_CLUSTERING, }; static int aha1542_isa_match(struct device *pdev, unsigned int ndev) diff --git a/drivers/scsi/aha1740.c b/drivers/scsi/aha1740.c index 786bf7f32c64..da4150c17781 100644 --- a/drivers/scsi/aha1740.c +++ b/drivers/scsi/aha1740.c @@ -545,7 +545,6 @@ static struct scsi_host_template aha1740_template = { .can_queue = AHA1740_ECBS, .this_id = 7, .sg_tablesize = AHA1740_SCATTER, - .use_clustering = ENABLE_CLUSTERING, .eh_abort_handler = aha1740_eh_abort_handler, }; diff --git a/drivers/scsi/aic7xxx/aic79xx_osm.c b/drivers/scsi/aic7xxx/aic79xx_osm.c index 2588b8f84ba0..57992519384e 100644 --- a/drivers/scsi/aic7xxx/aic79xx_osm.c +++ b/drivers/scsi/aic7xxx/aic79xx_osm.c @@ -920,7 +920,6 @@ struct scsi_host_template aic79xx_driver_template = { .this_id = -1, .max_sectors = 8192, .cmd_per_lun = 2, - .use_clustering = ENABLE_CLUSTERING, .slave_alloc = ahd_linux_slave_alloc, .slave_configure = ahd_linux_slave_configure, .target_alloc = ahd_linux_target_alloc, diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c index c6be3aeb302b..3c9c17450bb3 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_osm.c +++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c @@ -807,7 +807,6 @@ struct scsi_host_template aic7xxx_driver_template = { .this_id = -1, .max_sectors = 8192, .cmd_per_lun = 2, - .use_clustering = ENABLE_CLUSTERING, .slave_alloc = ahc_linux_slave_alloc, .slave_configure = ahc_linux_slave_configure, .target_alloc = ahc_linux_target_alloc, diff --git a/drivers/scsi/aic94xx/aic94xx_hwi.c b/drivers/scsi/aic94xx/aic94xx_hwi.c index 3b8ad55e59de..2bc7615193bd 100644 --- a/drivers/scsi/aic94xx/aic94xx_hwi.c +++ b/drivers/scsi/aic94xx/aic94xx_hwi.c @@ -1057,14 +1057,13 @@ static struct asd_ascb *asd_ascb_alloc(struct asd_ha_struct *asd_ha, if (ascb) { ascb->dma_scb.size = sizeof(struct scb); - ascb->dma_scb.vaddr = dma_pool_alloc(asd_ha->scb_pool, + ascb->dma_scb.vaddr = dma_pool_zalloc(asd_ha->scb_pool, gfp_flags, &ascb->dma_scb.dma_handle); if (!ascb->dma_scb.vaddr) { kmem_cache_free(asd_ascb_cache, ascb); return NULL; } - memset(ascb->dma_scb.vaddr, 0, sizeof(struct scb)); asd_init_ascb(asd_ha, ascb); spin_lock_irqsave(&seq->tc_index_lock, flags); diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c index 41c4d8abdd4a..f83f79b07b50 100644 --- a/drivers/scsi/aic94xx/aic94xx_init.c +++ b/drivers/scsi/aic94xx/aic94xx_init.c @@ -68,7 +68,6 @@ static struct scsi_host_template aic94xx_sht = { .this_id = -1, .sg_tablesize = SG_ALL, .max_sectors = SCSI_DEFAULT_MAX_SECTORS, - .use_clustering = ENABLE_CLUSTERING, .eh_device_reset_handler = sas_eh_device_reset_handler, .eh_target_reset_handler = sas_eh_target_reset_handler, .target_destroy = sas_target_destroy, diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c index d4404eea24fb..0f6751b0a633 100644 --- a/drivers/scsi/arcmsr/arcmsr_hba.c +++ b/drivers/scsi/arcmsr/arcmsr_hba.c @@ -156,7 +156,6 @@ static struct scsi_host_template arcmsr_scsi_host_template = { .sg_tablesize = ARCMSR_DEFAULT_SG_ENTRIES, .max_sectors = ARCMSR_MAX_XFER_SECTORS_C, .cmd_per_lun = ARCMSR_DEFAULT_CMD_PERLUN, - .use_clustering = ENABLE_CLUSTERING, .shost_attrs = arcmsr_host_attrs, .no_write_same = 1, }; @@ -903,9 +902,9 @@ static int arcmsr_probe(struct pci_dev *pdev, const struct pci_device_id *id) if(!host){ goto pci_disable_dev; } - error = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + error = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); if(error){ - error = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + error = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); if(error){ printk(KERN_WARNING "scsi%d: No suitable DMA mask available\n", @@ -1049,9 +1048,9 @@ static int arcmsr_resume(struct pci_dev *pdev) pr_warn("%s: pci_enable_device error\n", __func__); return -ENODEV; } - error = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + error = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); if (error) { - error = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + error = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); if (error) { pr_warn("scsi%d: No suitable DMA mask available\n", host->host_no); diff --git a/drivers/scsi/arm/acornscsi.c b/drivers/scsi/arm/acornscsi.c index 421fe869a11e..d7509859dc00 100644 --- a/drivers/scsi/arm/acornscsi.c +++ b/drivers/scsi/arm/acornscsi.c @@ -2890,7 +2890,7 @@ static struct scsi_host_template acornscsi_template = { .this_id = 7, .sg_tablesize = SG_ALL, .cmd_per_lun = 2, - .use_clustering = DISABLE_CLUSTERING, + .dma_boundary = PAGE_SIZE - 1, .proc_name = "acornscsi", }; diff --git a/drivers/scsi/arm/arxescsi.c b/drivers/scsi/arm/arxescsi.c index 3110736fd337..5e9dd9f34821 100644 --- a/drivers/scsi/arm/arxescsi.c +++ b/drivers/scsi/arm/arxescsi.c @@ -245,7 +245,7 @@ static struct scsi_host_template arxescsi_template = { .can_queue = 0, .this_id = 7, .sg_tablesize = SG_ALL, - .use_clustering = DISABLE_CLUSTERING, + .dma_boundary = PAGE_SIZE - 1, .proc_name = "arxescsi", }; diff --git a/drivers/scsi/arm/cumana_1.c b/drivers/scsi/arm/cumana_1.c index ae1d809904fb..e2d2a81d8e0b 100644 --- a/drivers/scsi/arm/cumana_1.c +++ b/drivers/scsi/arm/cumana_1.c @@ -221,10 +221,10 @@ static struct scsi_host_template cumanascsi_template = { .this_id = 7, .sg_tablesize = SG_ALL, .cmd_per_lun = 2, - .use_clustering = DISABLE_CLUSTERING, .proc_name = "CumanaSCSI-1", .cmd_size = NCR5380_CMD_SIZE, .max_sectors = 128, + .dma_boundary = PAGE_SIZE - 1, }; static int cumanascsi1_probe(struct expansion_card *ec, diff --git a/drivers/scsi/arm/cumana_2.c b/drivers/scsi/arm/cumana_2.c index edce5f3cfdba..40afcbd8de61 100644 --- a/drivers/scsi/arm/cumana_2.c +++ b/drivers/scsi/arm/cumana_2.c @@ -367,7 +367,6 @@ static struct scsi_host_template cumanascsi2_template = { .this_id = 7, .sg_tablesize = SG_MAX_SEGMENTS, .dma_boundary = IOMD_DMA_BOUNDARY, - .use_clustering = DISABLE_CLUSTERING, .proc_name = "cumanascsi2", }; diff --git a/drivers/scsi/arm/eesox.c b/drivers/scsi/arm/eesox.c index e93e047f4316..8f64c370a8a7 100644 --- a/drivers/scsi/arm/eesox.c +++ b/drivers/scsi/arm/eesox.c @@ -486,7 +486,6 @@ static struct scsi_host_template eesox_template = { .this_id = 7, .sg_tablesize = SG_MAX_SEGMENTS, .dma_boundary = IOMD_DMA_BOUNDARY, - .use_clustering = DISABLE_CLUSTERING, .proc_name = "eesox", }; diff --git a/drivers/scsi/arm/oak.c b/drivers/scsi/arm/oak.c index 05b7f755499b..8f2efaab8d46 100644 --- a/drivers/scsi/arm/oak.c +++ b/drivers/scsi/arm/oak.c @@ -110,7 +110,7 @@ static struct scsi_host_template oakscsi_template = { .this_id = 7, .sg_tablesize = SG_ALL, .cmd_per_lun = 2, - .use_clustering = DISABLE_CLUSTERING, + .dma_boundary = PAGE_SIZE - 1, .proc_name = "oakscsi", .cmd_size = NCR5380_CMD_SIZE, .max_sectors = 128, diff --git a/drivers/scsi/arm/powertec.c b/drivers/scsi/arm/powertec.c index 79aa88911b7f..759f95ba993c 100644 --- a/drivers/scsi/arm/powertec.c +++ b/drivers/scsi/arm/powertec.c @@ -294,7 +294,6 @@ static struct scsi_host_template powertecscsi_template = { .sg_tablesize = SG_MAX_SEGMENTS, .dma_boundary = IOMD_DMA_BOUNDARY, .cmd_per_lun = 2, - .use_clustering = ENABLE_CLUSTERING, .proc_name = "powertec", }; diff --git a/drivers/scsi/atari_scsi.c b/drivers/scsi/atari_scsi.c index 89f5154c40b6..a503dc50c4f8 100644 --- a/drivers/scsi/atari_scsi.c +++ b/drivers/scsi/atari_scsi.c @@ -714,7 +714,7 @@ static struct scsi_host_template atari_scsi_template = { .eh_host_reset_handler = atari_scsi_host_reset, .this_id = 7, .cmd_per_lun = 2, - .use_clustering = DISABLE_CLUSTERING, + .dma_boundary = PAGE_SIZE - 1, .cmd_size = NCR5380_CMD_SIZE, }; diff --git a/drivers/scsi/atp870u.c b/drivers/scsi/atp870u.c index 802d15018ec0..1267200380f8 100644 --- a/drivers/scsi/atp870u.c +++ b/drivers/scsi/atp870u.c @@ -1681,7 +1681,6 @@ static struct scsi_host_template atp870u_template = { .can_queue = qcnt /* can_queue */, .this_id = 7 /* SCSI ID */, .sg_tablesize = ATP870U_SCATTER /*SG_ALL*/ /*SG_NONE*/, - .use_clustering = ENABLE_CLUSTERING, .max_sectors = ATP870U_MAX_SECTORS, }; diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index effb6fc95af4..39f3820572b4 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -214,12 +214,6 @@ static char const *cqe_desc[] = { "CXN_KILLED_IMM_DATA_RCVD" }; -static int beiscsi_slave_configure(struct scsi_device *sdev) -{ - blk_queue_max_segment_size(sdev->request_queue, 65536); - return 0; -} - static int beiscsi_eh_abort(struct scsi_cmnd *sc) { struct iscsi_task *abrt_task = (struct iscsi_task *)sc->SCp.ptr; @@ -393,7 +387,6 @@ static struct scsi_host_template beiscsi_sht = { .proc_name = DRV_NAME, .queuecommand = iscsi_queuecommand, .change_queue_depth = scsi_change_queue_depth, - .slave_configure = beiscsi_slave_configure, .target_alloc = iscsi_target_alloc, .eh_timed_out = iscsi_eh_cmd_timed_out, .eh_abort_handler = beiscsi_eh_abort, @@ -404,8 +397,8 @@ static struct scsi_host_template beiscsi_sht = { .can_queue = BE2_IO_DEPTH, .this_id = -1, .max_sectors = BEISCSI_MAX_SECTORS, + .max_segment_size = 65536, .cmd_per_lun = BEISCSI_CMD_PER_LUN, - .use_clustering = ENABLE_CLUSTERING, .vendor_id = SCSI_NL_VID_TYPE_PCI | BE_VENDOR_ID, .track_queue_depth = 1, }; diff --git a/drivers/scsi/bfa/bfa_ioc.c b/drivers/scsi/bfa/bfa_ioc.c index 16d3aeb0e572..9631877aba4f 100644 --- a/drivers/scsi/bfa/bfa_ioc.c +++ b/drivers/scsi/bfa/bfa_ioc.c @@ -3819,7 +3819,7 @@ bfa_sfp_scn(struct bfa_sfp_s *sfp, struct bfi_mbmsg_s *msg) sfp->state = BFA_SFP_STATE_REMOVED; sfp->data_valid = 0; bfa_sfp_scn_aen_post(sfp, rsp); - break; + break; case BFA_SFP_SCN_FAILED: sfp->state = BFA_SFP_STATE_FAILED; sfp->data_valid = 0; @@ -5763,7 +5763,7 @@ bfa_phy_intr(void *phyarg, struct bfi_mbmsg_s *msg) (struct bfa_phy_stats_s *) phy->ubuf; bfa_phy_ntoh32((u32 *)stats, (u32 *)phy->dbuf_kva, sizeof(struct bfa_phy_stats_s)); - bfa_trc(phy, stats->status); + bfa_trc(phy, stats->status); } phy->status = status; diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c index 911efc98d1fd..42a0caf6740d 100644 --- a/drivers/scsi/bfa/bfad.c +++ b/drivers/scsi/bfa/bfad.c @@ -739,14 +739,10 @@ bfad_pci_init(struct pci_dev *pdev, struct bfad_s *bfad) pci_set_master(pdev); - - if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) || - (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0)) { - if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) || - (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)) != 0)) { - printk(KERN_ERR "pci_set_dma_mask fail %p\n", pdev); - goto out_release_region; - } + if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) || + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32))) { + printk(KERN_ERR "dma_set_mask_and_coherent fail %p\n", pdev); + goto out_release_region; } /* Enable PCIE Advanced Error Recovery (AER) if kernel supports */ @@ -1565,9 +1561,9 @@ bfad_pci_slot_reset(struct pci_dev *pdev) pci_save_state(pdev); pci_set_master(pdev); - if (pci_set_dma_mask(bfad->pcidev, DMA_BIT_MASK(64)) != 0) - if (pci_set_dma_mask(bfad->pcidev, DMA_BIT_MASK(32)) != 0) - goto out_disable_device; + if (dma_set_mask_and_coherent(&bfad->pcidev->dev, DMA_BIT_MASK(64)) || + dma_set_mask_and_coherent(&bfad->pcidev->dev, DMA_BIT_MASK(32))) + goto out_disable_device; if (restart_bfa(bfad) == -1) goto out_disable_device; diff --git a/drivers/scsi/bfa/bfad_im.c b/drivers/scsi/bfa/bfad_im.c index c4a33317d344..394930cbaa13 100644 --- a/drivers/scsi/bfa/bfad_im.c +++ b/drivers/scsi/bfa/bfad_im.c @@ -817,7 +817,6 @@ struct scsi_host_template bfad_im_scsi_host_template = { .this_id = -1, .sg_tablesize = BFAD_IO_MAX_SGE, .cmd_per_lun = 3, - .use_clustering = ENABLE_CLUSTERING, .shost_attrs = bfad_im_host_attrs, .max_sectors = BFAD_MAX_SECTORS, .vendor_id = BFA_PCI_VENDOR_ID_BROCADE, @@ -840,7 +839,6 @@ struct scsi_host_template bfad_im_vport_template = { .this_id = -1, .sg_tablesize = BFAD_IO_MAX_SGE, .cmd_per_lun = 3, - .use_clustering = ENABLE_CLUSTERING, .shost_attrs = bfad_im_vport_attrs, .max_sectors = BFAD_MAX_SECTORS, }; diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index bcd30e2374f1..2e4e7159ebf9 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -2970,7 +2970,6 @@ static struct scsi_host_template bnx2fc_shost_template = { .change_queue_depth = scsi_change_queue_depth, .this_id = -1, .cmd_per_lun = 3, - .use_clustering = ENABLE_CLUSTERING, .sg_tablesize = BNX2FC_MAX_BDS_PER_CMD, .max_sectors = 1024, .track_queue_depth = 1, diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c index e9e669a6c2bc..91f5316aa3ab 100644 --- a/drivers/scsi/bnx2i/bnx2i_hwi.c +++ b/drivers/scsi/bnx2i/bnx2i_hwi.c @@ -1906,7 +1906,6 @@ static int bnx2i_queue_scsi_cmd_resp(struct iscsi_session *session, struct iscsi_task *task; struct scsi_cmnd *sc; int rc = 0; - int cpu; spin_lock(&session->back_lock); task = iscsi_itt_to_task(bnx2i_conn->cls_conn->dd_data, @@ -1917,14 +1916,9 @@ static int bnx2i_queue_scsi_cmd_resp(struct iscsi_session *session, } sc = task->sc; - if (!blk_rq_cpu_valid(sc->request)) - cpu = smp_processor_id(); - else - cpu = sc->request->cpu; - spin_unlock(&session->back_lock); - p = &per_cpu(bnx2i_percpu, cpu); + p = &per_cpu(bnx2i_percpu, blk_mq_rq_cpu(sc->request)); spin_lock(&p->p_work_lock); if (unlikely(!p->iothread)) { rc = -EINVAL; @@ -2433,7 +2427,6 @@ static void bnx2i_process_ofld_cmpl(struct bnx2i_hba *hba, { u32 cid_addr; struct bnx2i_endpoint *ep; - u32 cid_num; ep = bnx2i_find_ep_in_ofld_list(hba, ofld_kcqe->iscsi_conn_id); if (!ep) { @@ -2468,7 +2461,6 @@ static void bnx2i_process_ofld_cmpl(struct bnx2i_hba *hba, } else { ep->state = EP_STATE_OFLD_COMPL; cid_addr = ofld_kcqe->iscsi_conn_context_id; - cid_num = bnx2i_get_cid_num(ep); ep->ep_cid = cid_addr; ep->qp.ctx_base = NULL; } diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c index de0a507577ef..69c75426c5eb 100644 --- a/drivers/scsi/bnx2i/bnx2i_iscsi.c +++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c @@ -2263,7 +2263,6 @@ static struct scsi_host_template bnx2i_host_template = { .max_sectors = 127, .cmd_per_lun = 128, .this_id = -1, - .use_clustering = ENABLE_CLUSTERING, .sg_tablesize = ISCSI_MAX_BDS_PER_CMD, .shost_attrs = bnx2i_dev_attributes, .track_queue_depth = 1, diff --git a/drivers/scsi/csiostor/csio_init.c b/drivers/scsi/csiostor/csio_init.c index 1a458ce08210..cf629380a981 100644 --- a/drivers/scsi/csiostor/csio_init.c +++ b/drivers/scsi/csiostor/csio_init.c @@ -255,7 +255,6 @@ static void csio_hw_exit_workers(struct csio_hw *hw) { cancel_work_sync(&hw->evtq_work); - flush_scheduled_work(); } static int @@ -646,7 +645,7 @@ csio_shost_init(struct csio_hw *hw, struct device *dev, if (csio_lnode_init(ln, hw, pln)) goto err_shost_put; - if (scsi_add_host(shost, dev)) + if (scsi_add_host_with_dma(shost, dev, &hw->pdev->dev)) goto err_lnode_exit; return ln; diff --git a/drivers/scsi/csiostor/csio_scsi.c b/drivers/scsi/csiostor/csio_scsi.c index 8c15b7acb4b7..bc5547a62c00 100644 --- a/drivers/scsi/csiostor/csio_scsi.c +++ b/drivers/scsi/csiostor/csio_scsi.c @@ -1780,16 +1780,10 @@ csio_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmnd) int nsge = 0; int rv = SCSI_MLQUEUE_HOST_BUSY, nr; int retval; - int cpu; struct csio_scsi_qset *sqset; struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device)); - if (!blk_rq_cpu_valid(cmnd->request)) - cpu = smp_processor_id(); - else - cpu = cmnd->request->cpu; - - sqset = &hw->sqset[ln->portid][cpu]; + sqset = &hw->sqset[ln->portid][blk_mq_rq_cpu(cmnd->request)]; nr = fc_remote_port_chkready(rport); if (nr) { @@ -2280,7 +2274,6 @@ struct scsi_host_template csio_fcoe_shost_template = { .this_id = -1, .sg_tablesize = CSIO_SCSI_MAX_SGE, .cmd_per_lun = CSIO_MAX_CMD_PER_LUN, - .use_clustering = ENABLE_CLUSTERING, .shost_attrs = csio_fcoe_lport_attrs, .max_sectors = CSIO_MAX_SECTOR_SIZE, }; @@ -2300,7 +2293,6 @@ struct scsi_host_template csio_fcoe_shost_vport_template = { .this_id = -1, .sg_tablesize = CSIO_SCSI_MAX_SGE, .cmd_per_lun = CSIO_MAX_CMD_PER_LUN, - .use_clustering = ENABLE_CLUSTERING, .shost_attrs = csio_fcoe_vport_attrs, .max_sectors = CSIO_MAX_SECTOR_SIZE, }; diff --git a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c index bf07735275a4..8a20411699d9 100644 --- a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c +++ b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c @@ -95,7 +95,7 @@ static struct scsi_host_template cxgb3i_host_template = { .eh_device_reset_handler = iscsi_eh_device_reset, .eh_target_reset_handler = iscsi_eh_recover_target, .target_alloc = iscsi_target_alloc, - .use_clustering = DISABLE_CLUSTERING, + .dma_boundary = PAGE_SIZE - 1, .this_id = -1, .track_queue_depth = 1, }; diff --git a/drivers/scsi/cxgbi/cxgb4i/Kconfig b/drivers/scsi/cxgbi/cxgb4i/Kconfig index 594f593c8821..f36b76e8e12c 100644 --- a/drivers/scsi/cxgbi/cxgb4i/Kconfig +++ b/drivers/scsi/cxgbi/cxgb4i/Kconfig @@ -1,8 +1,8 @@ config SCSI_CXGB4_ISCSI tristate "Chelsio T4 iSCSI support" depends on PCI && INET && (IPV6 || IPV6=n) - select NETDEVICES - select ETHERNET + depends on THERMAL || !THERMAL + depends on ETHERNET select NET_VENDOR_CHELSIO select CHELSIO_T4 select CHELSIO_LIB diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c index 907dd8792a0a..49f8028ac524 100644 --- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c +++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c @@ -113,7 +113,7 @@ static struct scsi_host_template cxgb4i_host_template = { .eh_device_reset_handler = iscsi_eh_device_reset, .eh_target_reset_handler = iscsi_eh_recover_target, .target_alloc = iscsi_target_alloc, - .use_clustering = DISABLE_CLUSTERING, + .dma_boundary = PAGE_SIZE - 1, .this_id = -1, .track_queue_depth = 1, }; diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c index 6637116529aa..bfa13e3b191c 100644 --- a/drivers/scsi/cxlflash/main.c +++ b/drivers/scsi/cxlflash/main.c @@ -3088,12 +3088,6 @@ static ssize_t hwq_mode_store(struct device *dev, return -EINVAL; } - if ((mode == HWQ_MODE_TAG) && !shost_use_blk_mq(shost)) { - dev_info(cfgdev, "SCSI-MQ is not enabled, use a different " - "HWQ steering mode.\n"); - return -EINVAL; - } - afu->hwq_mode = mode; return count; @@ -3180,7 +3174,6 @@ static struct scsi_host_template driver_template = { .this_id = -1, .sg_tablesize = 1, /* No scatter gather support */ .max_sectors = CXLFLASH_MAX_SECTORS, - .use_clustering = ENABLE_CLUSTERING, .shost_attrs = cxlflash_host_attrs, .sdev_attrs = cxlflash_dev_attrs, }; diff --git a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c index 8c55ec6e1827..13fbb2eab842 100644 --- a/drivers/scsi/dc395x.c +++ b/drivers/scsi/dc395x.c @@ -4631,7 +4631,7 @@ static struct scsi_host_template dc395x_driver_template = { .cmd_per_lun = DC395x_MAX_CMD_PER_LUN, .eh_abort_handler = dc395x_eh_abort, .eh_bus_reset_handler = dc395x_eh_bus_reset, - .use_clustering = DISABLE_CLUSTERING, + .dma_boundary = PAGE_SIZE - 1, }; diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index 12dc7100bb4c..d7ac498ba35a 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -1071,28 +1071,29 @@ static void alua_check(struct scsi_device *sdev, bool force) * Fail I/O to all paths not in state * active/optimized or active/non-optimized. */ -static int alua_prep_fn(struct scsi_device *sdev, struct request *req) +static blk_status_t alua_prep_fn(struct scsi_device *sdev, struct request *req) { struct alua_dh_data *h = sdev->handler_data; struct alua_port_group *pg; unsigned char state = SCSI_ACCESS_STATE_OPTIMAL; - int ret = BLKPREP_OK; rcu_read_lock(); pg = rcu_dereference(h->pg); if (pg) state = pg->state; rcu_read_unlock(); - if (state == SCSI_ACCESS_STATE_TRANSITIONING) - ret = BLKPREP_DEFER; - else if (state != SCSI_ACCESS_STATE_OPTIMAL && - state != SCSI_ACCESS_STATE_ACTIVE && - state != SCSI_ACCESS_STATE_LBA) { - ret = BLKPREP_KILL; + + switch (state) { + case SCSI_ACCESS_STATE_OPTIMAL: + case SCSI_ACCESS_STATE_ACTIVE: + case SCSI_ACCESS_STATE_LBA: + return BLK_STS_OK; + case SCSI_ACCESS_STATE_TRANSITIONING: + return BLK_STS_RESOURCE; + default: req->rq_flags |= RQF_QUIET; + return BLK_STS_IOERR; } - return ret; - } static void alua_rescan(struct scsi_device *sdev) diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c index 95c47909a58f..bea8e13febb6 100644 --- a/drivers/scsi/device_handler/scsi_dh_emc.c +++ b/drivers/scsi/device_handler/scsi_dh_emc.c @@ -341,17 +341,17 @@ static int clariion_check_sense(struct scsi_device *sdev, return SCSI_RETURN_NOT_HANDLED; } -static int clariion_prep_fn(struct scsi_device *sdev, struct request *req) +static blk_status_t clariion_prep_fn(struct scsi_device *sdev, + struct request *req) { struct clariion_dh_data *h = sdev->handler_data; - int ret = BLKPREP_OK; if (h->lun_state != CLARIION_LUN_OWNED) { - ret = BLKPREP_KILL; req->rq_flags |= RQF_QUIET; + return BLK_STS_IOERR; } - return ret; + return BLK_STS_OK; } static int clariion_std_inquiry(struct scsi_device *sdev, diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c index e65a0ebb4b54..80129b033855 100644 --- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c +++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c @@ -172,17 +172,16 @@ retry: return rc; } -static int hp_sw_prep_fn(struct scsi_device *sdev, struct request *req) +static blk_status_t hp_sw_prep_fn(struct scsi_device *sdev, struct request *req) { struct hp_sw_dh_data *h = sdev->handler_data; - int ret = BLKPREP_OK; if (h->path_state != HP_SW_PATH_ACTIVE) { - ret = BLKPREP_KILL; req->rq_flags |= RQF_QUIET; + return BLK_STS_IOERR; } - return ret; + return BLK_STS_OK; } /* diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c index d27fabae8ddd..65f1fe343c64 100644 --- a/drivers/scsi/device_handler/scsi_dh_rdac.c +++ b/drivers/scsi/device_handler/scsi_dh_rdac.c @@ -642,17 +642,16 @@ done: return 0; } -static int rdac_prep_fn(struct scsi_device *sdev, struct request *req) +static blk_status_t rdac_prep_fn(struct scsi_device *sdev, struct request *req) { struct rdac_dh_data *h = sdev->handler_data; - int ret = BLKPREP_OK; if (h->state != RDAC_STATE_ACTIVE) { - ret = BLKPREP_KILL; req->rq_flags |= RQF_QUIET; + return BLK_STS_IOERR; } - return ret; + return BLK_STS_OK; } static int rdac_check_sense(struct scsi_device *sdev, diff --git a/drivers/scsi/dmx3191d.c b/drivers/scsi/dmx3191d.c index 003c3d726238..8db1cc552932 100644 --- a/drivers/scsi/dmx3191d.c +++ b/drivers/scsi/dmx3191d.c @@ -63,7 +63,7 @@ static struct scsi_host_template dmx3191d_driver_template = { .this_id = 7, .sg_tablesize = SG_ALL, .cmd_per_lun = 2, - .use_clustering = DISABLE_CLUSTERING, + .dma_boundary = PAGE_SIZE - 1, .cmd_size = NCR5380_CMD_SIZE, }; diff --git a/drivers/scsi/dpt_i2o.c b/drivers/scsi/dpt_i2o.c index 37de8fb186d7..70d1a18278af 100644 --- a/drivers/scsi/dpt_i2o.c +++ b/drivers/scsi/dpt_i2o.c @@ -934,15 +934,15 @@ static int adpt_install_hba(struct scsi_host_template* sht, struct pci_dev* pDev * See if we should enable dma64 mode. */ if (sizeof(dma_addr_t) > 4 && - pci_set_dma_mask(pDev, DMA_BIT_MASK(64)) == 0) { - if (dma_get_required_mask(&pDev->dev) > DMA_BIT_MASK(32)) - dma64 = 1; - } - if (!dma64 && pci_set_dma_mask(pDev, DMA_BIT_MASK(32)) != 0) + dma_get_required_mask(&pDev->dev) > DMA_BIT_MASK(32) && + dma_set_mask(&pDev->dev, DMA_BIT_MASK(64)) == 0) + dma64 = 1; + + if (!dma64 && dma_set_mask(&pDev->dev, DMA_BIT_MASK(32)) != 0) return -EINVAL; /* adapter only supports message blocks below 4GB */ - pci_set_consistent_dma_mask(pDev, DMA_BIT_MASK(32)); + dma_set_coherent_mask(&pDev->dev, DMA_BIT_MASK(32)); base_addr0_phys = pci_resource_start(pDev,0); hba_map0_area_size = pci_resource_len(pDev,0); @@ -3569,7 +3569,6 @@ static struct scsi_host_template driver_template = { .slave_configure = adpt_slave_configure, .can_queue = MAX_TO_IOP_MESSAGES, .this_id = 7, - .use_clustering = ENABLE_CLUSTERING, }; static int __init adpt_init(void) diff --git a/drivers/scsi/esas2r/esas2r_init.c b/drivers/scsi/esas2r/esas2r_init.c index bbe77db8938d..46b2c83ba21f 100644 --- a/drivers/scsi/esas2r/esas2r_init.c +++ b/drivers/scsi/esas2r/esas2r_init.c @@ -266,6 +266,7 @@ int esas2r_init_adapter(struct Scsi_Host *host, struct pci_dev *pcid, int i; void *next_uncached; struct esas2r_request *first_request, *last_request; + bool dma64 = false; if (index >= MAX_ADAPTERS) { esas2r_log(ESAS2R_LOG_CRIT, @@ -286,42 +287,20 @@ int esas2r_init_adapter(struct Scsi_Host *host, struct pci_dev *pcid, a->pcid = pcid; a->host = host; - if (sizeof(dma_addr_t) > 4) { - const uint64_t required_mask = dma_get_required_mask - (&pcid->dev); - if (required_mask > DMA_BIT_MASK(32) - && !pci_set_dma_mask(pcid, DMA_BIT_MASK(64)) - && !pci_set_consistent_dma_mask(pcid, - DMA_BIT_MASK(64))) { - esas2r_log_dev(ESAS2R_LOG_INFO, - &(a->pcid->dev), - "64-bit PCI addressing enabled\n"); - } else if (!pci_set_dma_mask(pcid, DMA_BIT_MASK(32)) - && !pci_set_consistent_dma_mask(pcid, - DMA_BIT_MASK(32))) { - esas2r_log_dev(ESAS2R_LOG_INFO, - &(a->pcid->dev), - "32-bit PCI addressing enabled\n"); - } else { - esas2r_log(ESAS2R_LOG_CRIT, - "failed to set DMA mask"); - esas2r_kill_adapter(index); - return 0; - } - } else { - if (!pci_set_dma_mask(pcid, DMA_BIT_MASK(32)) - && !pci_set_consistent_dma_mask(pcid, - DMA_BIT_MASK(32))) { - esas2r_log_dev(ESAS2R_LOG_INFO, - &(a->pcid->dev), - "32-bit PCI addressing enabled\n"); - } else { - esas2r_log(ESAS2R_LOG_CRIT, - "failed to set DMA mask"); - esas2r_kill_adapter(index); - return 0; - } + if (sizeof(dma_addr_t) > 4 && + dma_get_required_mask(&pcid->dev) > DMA_BIT_MASK(32) && + !dma_set_mask_and_coherent(&pcid->dev, DMA_BIT_MASK(64))) + dma64 = true; + + if (!dma64 && dma_set_mask_and_coherent(&pcid->dev, DMA_BIT_MASK(32))) { + esas2r_log(ESAS2R_LOG_CRIT, "failed to set DMA mask"); + esas2r_kill_adapter(index); + return 0; } + + esas2r_log_dev(ESAS2R_LOG_INFO, &pcid->dev, + "%s-bit PCI addressing enabled\n", dma64 ? "64" : "32"); + esas2r_adapters[index] = a; sprintf(a->name, ESAS2R_DRVR_NAME "_%02d", index); esas2r_debug("new adapter %p, name %s", a, a->name); diff --git a/drivers/scsi/esas2r/esas2r_main.c b/drivers/scsi/esas2r/esas2r_main.c index c07118617d89..64397d441bae 100644 --- a/drivers/scsi/esas2r/esas2r_main.c +++ b/drivers/scsi/esas2r/esas2r_main.c @@ -250,7 +250,6 @@ static struct scsi_host_template driver_template = { ESAS2R_DEFAULT_CMD_PER_LUN, .present = 0, .unchecked_isa_dma = 0, - .use_clustering = ENABLE_CLUSTERING, .emulated = 0, .proc_name = ESAS2R_DRVR_NAME, .change_queue_depth = scsi_change_queue_depth, diff --git a/drivers/scsi/esp_scsi.c b/drivers/scsi/esp_scsi.c index ac7da9db7317..465df475f753 100644 --- a/drivers/scsi/esp_scsi.c +++ b/drivers/scsi/esp_scsi.c @@ -2676,7 +2676,6 @@ struct scsi_host_template scsi_esp_template = { .can_queue = 7, .this_id = 7, .sg_tablesize = SG_ALL, - .use_clustering = ENABLE_CLUSTERING, .max_sectors = 0xffff, .skip_settle_delay = 1, }; diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index f46b312d04bc..cd19be3f3405 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -286,7 +286,6 @@ static struct scsi_host_template fcoe_shost_template = { .this_id = -1, .cmd_per_lun = 3, .can_queue = FCOE_MAX_OUTSTANDING_COMMANDS, - .use_clustering = ENABLE_CLUSTERING, .sg_tablesize = SG_ALL, .max_sectors = 0xffff, .track_queue_depth = 1, @@ -1670,7 +1669,6 @@ static void fcoe_recv_frame(struct sk_buff *skb) struct fc_stats *stats; struct fcoe_crc_eof crc_eof; struct fc_frame *fp; - struct fcoe_port *port; struct fcoe_hdr *hp; fr = fcoe_dev_from_skb(skb); @@ -1688,7 +1686,6 @@ static void fcoe_recv_frame(struct sk_buff *skb) skb_end_pointer(skb), skb->csum, skb->dev ? skb->dev->name : "<NULL>"); - port = lport_priv(lport); skb_linearize(skb); /* check for skb_is_nonlinear is within skb_linearize */ /* @@ -1859,7 +1856,6 @@ static int fcoe_device_notification(struct notifier_block *notifier, struct net_device *netdev = netdev_notifier_info_to_dev(ptr); struct fcoe_ctlr *ctlr; struct fcoe_interface *fcoe; - struct fcoe_port *port; struct fc_stats *stats; u32 link_possible = 1; u32 mfs; @@ -1897,7 +1893,6 @@ static int fcoe_device_notification(struct notifier_block *notifier, break; case NETDEV_UNREGISTER: list_del(&fcoe->list); - port = lport_priv(ctlr->lp); fcoe_vport_remove(lport); mutex_lock(&fcoe_config_mutex); fcoe_if_destroy(lport); diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c index cc461fd7bef1..5b3534b0deda 100644 --- a/drivers/scsi/fnic/fnic_main.c +++ b/drivers/scsi/fnic/fnic_main.c @@ -115,7 +115,6 @@ static struct scsi_host_template fnic_host_template = { .this_id = -1, .cmd_per_lun = 3, .can_queue = FNIC_DFLT_IO_REQ, - .use_clustering = ENABLE_CLUSTERING, .sg_tablesize = FNIC_MAX_SG_DESC_CNT, .max_sectors = 0xffff, .shost_attrs = fnic_attrs, diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index 96acfcecd540..cafbcfb85bfa 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -2274,7 +2274,7 @@ fnic_scsi_host_start_tag(struct fnic *fnic, struct scsi_cmnd *sc) return SCSI_NO_TAG; sc->tag = sc->request->tag = dummy->tag; - sc->request->special = sc; + sc->host_scribble = (unsigned char *)dummy; return dummy->tag; } @@ -2286,7 +2286,7 @@ fnic_scsi_host_start_tag(struct fnic *fnic, struct scsi_cmnd *sc) static inline void fnic_scsi_host_end_tag(struct fnic *fnic, struct scsi_cmnd *sc) { - struct request *dummy = sc->request->special; + struct request *dummy = (struct request *)sc->host_scribble; blk_mq_free_request(dummy); } diff --git a/drivers/scsi/fnic/fnic_trace.c b/drivers/scsi/fnic/fnic_trace.c index 8271785bdb93..bf0fd2aeb92e 100644 --- a/drivers/scsi/fnic/fnic_trace.c +++ b/drivers/scsi/fnic/fnic_trace.c @@ -468,14 +468,13 @@ int fnic_trace_buf_init(void) fnic_max_trace_entries = (trace_max_pages * PAGE_SIZE)/ FNIC_ENTRY_SIZE_BYTES; - fnic_trace_buf_p = (unsigned long)vmalloc((trace_max_pages * PAGE_SIZE)); + fnic_trace_buf_p = (unsigned long)vzalloc(trace_max_pages * PAGE_SIZE); if (!fnic_trace_buf_p) { printk(KERN_ERR PFX "Failed to allocate memory " "for fnic_trace_buf_p\n"); err = -ENOMEM; goto err_fnic_trace_buf_init; } - memset((void *)fnic_trace_buf_p, 0, (trace_max_pages * PAGE_SIZE)); fnic_trace_entries.page_offset = vmalloc(array_size(fnic_max_trace_entries, diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c index fc538181f8df..9cdca0625498 100644 --- a/drivers/scsi/g_NCR5380.c +++ b/drivers/scsi/g_NCR5380.c @@ -700,7 +700,7 @@ static struct scsi_host_template driver_template = { .this_id = 7, .sg_tablesize = SG_ALL, .cmd_per_lun = 2, - .use_clustering = DISABLE_CLUSTERING, + .dma_boundary = PAGE_SIZE - 1, .cmd_size = NCR5380_CMD_SIZE, .max_sectors = 128, }; diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c index 16709735b546..194c294f9b6c 100644 --- a/drivers/scsi/gdth.c +++ b/drivers/scsi/gdth.c @@ -4680,7 +4680,6 @@ static struct scsi_host_template gdth_template = { .sg_tablesize = GDTH_MAXSG, .cmd_per_lun = GDTH_MAXC_P_L, .unchecked_isa_dma = 1, - .use_clustering = ENABLE_CLUSTERING, .no_write_same = 1, }; diff --git a/drivers/scsi/gvp11.c b/drivers/scsi/gvp11.c index a27fc49ebd3a..d2acd0d826e2 100644 --- a/drivers/scsi/gvp11.c +++ b/drivers/scsi/gvp11.c @@ -184,7 +184,7 @@ static struct scsi_host_template gvp11_scsi_template = { .this_id = 7, .sg_tablesize = SG_ALL, .cmd_per_lun = CMD_PER_LUN, - .use_clustering = DISABLE_CLUSTERING + .dma_boundary = PAGE_SIZE - 1, }; static int check_wd33c93(struct gvp11_scsiregs *regs) diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h index 0ddb53c8a2e2..af291947a54d 100644 --- a/drivers/scsi/hisi_sas/hisi_sas.h +++ b/drivers/scsi/hisi_sas/hisi_sas.h @@ -69,6 +69,12 @@ #define HISI_SAS_SATA_PROTOCOL_FPDMA 0x8 #define HISI_SAS_SATA_PROTOCOL_ATAPI 0x10 +#define HISI_SAS_DIF_PROT_MASK (SHOST_DIF_TYPE1_PROTECTION | \ + SHOST_DIF_TYPE2_PROTECTION | \ + SHOST_DIF_TYPE3_PROTECTION) + +#define HISI_SAS_PROT_MASK (HISI_SAS_DIF_PROT_MASK) + struct hisi_hba; enum { @@ -211,7 +217,7 @@ struct hisi_sas_slot { /* Do not reorder/change members after here */ void *buf; dma_addr_t buf_dma; - int idx; + u16 idx; }; struct hisi_sas_hw { @@ -268,6 +274,8 @@ struct hisi_hba { struct pci_dev *pci_dev; struct device *dev; + int prot_mask; + void __iomem *regs; void __iomem *sgpio_regs; struct regmap *ctrl; @@ -322,6 +330,8 @@ struct hisi_hba { unsigned long sata_dev_bitmap[BITS_TO_LONGS(HISI_SAS_MAX_DEVICES)]; struct work_struct rst_work; u32 phy_state; + u32 intr_coal_ticks; /* Time of interrupt coalesce in us */ + u32 intr_coal_count; /* Interrupt count to coalesce */ }; /* Generic HW DMA host memory structures */ @@ -468,7 +478,6 @@ extern int hisi_sas_remove(struct platform_device *pdev); extern int hisi_sas_slave_configure(struct scsi_device *sdev); extern int hisi_sas_scan_finished(struct Scsi_Host *shost, unsigned long time); extern void hisi_sas_scan_start(struct Scsi_Host *shost); -extern struct device_attribute *host_attrs[]; extern int hisi_sas_host_reset(struct Scsi_Host *shost, int reset_type); extern void hisi_sas_phy_down(struct hisi_hba *hisi_hba, int phy_no, int rdy); extern void hisi_sas_slot_task_free(struct hisi_hba *hisi_hba, diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index b3f01d5b821b..eed7fc5b3389 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -296,42 +296,109 @@ static void hisi_sas_task_prep_abort(struct hisi_hba *hisi_hba, device_id, abort_flag, tag_to_abort); } +static void hisi_sas_dma_unmap(struct hisi_hba *hisi_hba, + struct sas_task *task, int n_elem, + int n_elem_req, int n_elem_resp) +{ + struct device *dev = hisi_hba->dev; + + if (!sas_protocol_ata(task->task_proto)) { + if (task->num_scatter) { + if (n_elem) + dma_unmap_sg(dev, task->scatter, + task->num_scatter, + task->data_dir); + } else if (task->task_proto & SAS_PROTOCOL_SMP) { + if (n_elem_req) + dma_unmap_sg(dev, &task->smp_task.smp_req, + 1, DMA_TO_DEVICE); + if (n_elem_resp) + dma_unmap_sg(dev, &task->smp_task.smp_resp, + 1, DMA_FROM_DEVICE); + } + } +} + +static int hisi_sas_dma_map(struct hisi_hba *hisi_hba, + struct sas_task *task, int *n_elem, + int *n_elem_req, int *n_elem_resp) +{ + struct device *dev = hisi_hba->dev; + int rc; + + if (sas_protocol_ata(task->task_proto)) { + *n_elem = task->num_scatter; + } else { + unsigned int req_len, resp_len; + + if (task->num_scatter) { + *n_elem = dma_map_sg(dev, task->scatter, + task->num_scatter, task->data_dir); + if (!*n_elem) { + rc = -ENOMEM; + goto prep_out; + } + } else if (task->task_proto & SAS_PROTOCOL_SMP) { + *n_elem_req = dma_map_sg(dev, &task->smp_task.smp_req, + 1, DMA_TO_DEVICE); + if (!*n_elem_req) { + rc = -ENOMEM; + goto prep_out; + } + req_len = sg_dma_len(&task->smp_task.smp_req); + if (req_len & 0x3) { + rc = -EINVAL; + goto err_out_dma_unmap; + } + *n_elem_resp = dma_map_sg(dev, &task->smp_task.smp_resp, + 1, DMA_FROM_DEVICE); + if (!*n_elem_resp) { + rc = -ENOMEM; + goto err_out_dma_unmap; + } + resp_len = sg_dma_len(&task->smp_task.smp_resp); + if (resp_len & 0x3) { + rc = -EINVAL; + goto err_out_dma_unmap; + } + } + } + + if (*n_elem > HISI_SAS_SGE_PAGE_CNT) { + dev_err(dev, "task prep: n_elem(%d) > HISI_SAS_SGE_PAGE_CNT", + *n_elem); + rc = -EINVAL; + goto err_out_dma_unmap; + } + return 0; + +err_out_dma_unmap: + /* It would be better to call dma_unmap_sg() here, but it's messy */ + hisi_sas_dma_unmap(hisi_hba, task, *n_elem, + *n_elem_req, *n_elem_resp); +prep_out: + return rc; +} + static int hisi_sas_task_prep(struct sas_task *task, struct hisi_sas_dq **dq_pointer, bool is_tmf, struct hisi_sas_tmf_task *tmf, int *pass) { struct domain_device *device = task->dev; - struct hisi_hba *hisi_hba; + struct hisi_hba *hisi_hba = dev_to_hisi_hba(device); struct hisi_sas_device *sas_dev = device->lldd_dev; struct hisi_sas_port *port; struct hisi_sas_slot *slot; struct hisi_sas_cmd_hdr *cmd_hdr_base; struct asd_sas_port *sas_port = device->port; - struct device *dev; + struct device *dev = hisi_hba->dev; int dlvry_queue_slot, dlvry_queue, rc, slot_idx; int n_elem = 0, n_elem_req = 0, n_elem_resp = 0; struct hisi_sas_dq *dq; unsigned long flags; int wr_q_index; - if (!sas_port) { - struct task_status_struct *ts = &task->task_status; - - ts->resp = SAS_TASK_UNDELIVERED; - ts->stat = SAS_PHY_DOWN; - /* - * libsas will use dev->port, should - * not call task_done for sata - */ - if (device->dev_type != SAS_SATA_DEV) - task->task_done(task); - return -ECOMM; - } - - hisi_hba = dev_to_hisi_hba(device); - dev = hisi_hba->dev; - if (DEV_IS_GONE(sas_dev)) { if (sas_dev) dev_info(dev, "task prep: device %d not ready\n", @@ -355,49 +422,10 @@ static int hisi_sas_task_prep(struct sas_task *task, return -ECOMM; } - if (!sas_protocol_ata(task->task_proto)) { - unsigned int req_len, resp_len; - - if (task->num_scatter) { - n_elem = dma_map_sg(dev, task->scatter, - task->num_scatter, task->data_dir); - if (!n_elem) { - rc = -ENOMEM; - goto prep_out; - } - } else if (task->task_proto & SAS_PROTOCOL_SMP) { - n_elem_req = dma_map_sg(dev, &task->smp_task.smp_req, - 1, DMA_TO_DEVICE); - if (!n_elem_req) { - rc = -ENOMEM; - goto prep_out; - } - req_len = sg_dma_len(&task->smp_task.smp_req); - if (req_len & 0x3) { - rc = -EINVAL; - goto err_out_dma_unmap; - } - n_elem_resp = dma_map_sg(dev, &task->smp_task.smp_resp, - 1, DMA_FROM_DEVICE); - if (!n_elem_resp) { - rc = -ENOMEM; - goto err_out_dma_unmap; - } - resp_len = sg_dma_len(&task->smp_task.smp_resp); - if (resp_len & 0x3) { - rc = -EINVAL; - goto err_out_dma_unmap; - } - } - } else - n_elem = task->num_scatter; - - if (n_elem > HISI_SAS_SGE_PAGE_CNT) { - dev_err(dev, "task prep: n_elem(%d) > HISI_SAS_SGE_PAGE_CNT", - n_elem); - rc = -EINVAL; - goto err_out_dma_unmap; - } + rc = hisi_sas_dma_map(hisi_hba, task, &n_elem, + &n_elem_req, &n_elem_resp); + if (rc < 0) + goto prep_out; if (hisi_hba->hw->slot_index_alloc) rc = hisi_hba->hw->slot_index_alloc(hisi_hba, device); @@ -482,19 +510,8 @@ static int hisi_sas_task_prep(struct sas_task *task, err_out_tag: hisi_sas_slot_index_free(hisi_hba, slot_idx); err_out_dma_unmap: - if (!sas_protocol_ata(task->task_proto)) { - if (task->num_scatter) { - dma_unmap_sg(dev, task->scatter, task->num_scatter, - task->data_dir); - } else if (task->task_proto & SAS_PROTOCOL_SMP) { - if (n_elem_req) - dma_unmap_sg(dev, &task->smp_task.smp_req, - 1, DMA_TO_DEVICE); - if (n_elem_resp) - dma_unmap_sg(dev, &task->smp_task.smp_resp, - 1, DMA_FROM_DEVICE); - } - } + hisi_sas_dma_unmap(hisi_hba, task, n_elem, + n_elem_req, n_elem_resp); prep_out: dev_err(dev, "task prep: failed[%d]!\n", rc); return rc; @@ -506,10 +523,29 @@ static int hisi_sas_task_exec(struct sas_task *task, gfp_t gfp_flags, u32 rc; u32 pass = 0; unsigned long flags; - struct hisi_hba *hisi_hba = dev_to_hisi_hba(task->dev); - struct device *dev = hisi_hba->dev; + struct hisi_hba *hisi_hba; + struct device *dev; + struct domain_device *device = task->dev; + struct asd_sas_port *sas_port = device->port; struct hisi_sas_dq *dq = NULL; + if (!sas_port) { + struct task_status_struct *ts = &task->task_status; + + ts->resp = SAS_TASK_UNDELIVERED; + ts->stat = SAS_PHY_DOWN; + /* + * libsas will use dev->port, should + * not call task_done for sata + */ + if (device->dev_type != SAS_SATA_DEV) + task->task_done(task); + return -ECOMM; + } + + hisi_hba = dev_to_hisi_hba(device); + dev = hisi_hba->dev; + if (unlikely(test_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags))) { if (in_softirq()) return -EINVAL; @@ -1459,12 +1495,12 @@ static int hisi_sas_abort_task(struct sas_task *task) if (task->lldd_task && task->task_proto & SAS_PROTOCOL_SSP) { struct scsi_cmnd *cmnd = task->uldd_task; struct hisi_sas_slot *slot = task->lldd_task; - u32 tag = slot->idx; + u16 tag = slot->idx; int rc2; int_to_scsilun(cmnd->device->lun, &lun); tmf_task.tmf = TMF_ABORT_TASK; - tmf_task.tag_of_task_to_be_managed = cpu_to_le16(tag); + tmf_task.tag_of_task_to_be_managed = tag; rc = hisi_sas_debug_issue_ssp_tmf(task->dev, lun.scsi_lun, &tmf_task); @@ -1718,7 +1754,7 @@ static int hisi_sas_query_task(struct sas_task *task) int_to_scsilun(cmnd->device->lun, &lun); tmf_task.tmf = TMF_QUERY_TASK; - tmf_task.tag_of_task_to_be_managed = cpu_to_le16(tag); + tmf_task.tag_of_task_to_be_managed = tag; rc = hisi_sas_debug_issue_ssp_tmf(device, lun.scsi_lun, @@ -1994,12 +2030,6 @@ EXPORT_SYMBOL_GPL(hisi_sas_kill_tasklets); struct scsi_transport_template *hisi_sas_stt; EXPORT_SYMBOL_GPL(hisi_sas_stt); -struct device_attribute *host_attrs[] = { - &dev_attr_phy_event_threshold, - NULL, -}; -EXPORT_SYMBOL_GPL(host_attrs); - static struct sas_domain_function_template hisi_sas_transport_ops = { .lldd_dev_found = hisi_sas_dev_found, .lldd_dev_gone = hisi_sas_dev_gone, @@ -2380,7 +2410,6 @@ int hisi_sas_probe(struct platform_device *pdev, shost->max_lun = ~0; shost->max_channel = 1; shost->max_cmd_len = 16; - shost->sg_tablesize = min_t(u16, SG_ALL, HISI_SAS_SGE_PAGE_CNT); if (hisi_hba->hw->slot_index_alloc) { shost->can_queue = hisi_hba->hw->max_command_entries; shost->cmd_per_lun = hisi_hba->hw->max_command_entries; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c index 8df822a4a1bd..28ab52a021cf 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c @@ -510,6 +510,7 @@ static void setup_itct_v1_hw(struct hisi_hba *hisi_hba, struct hisi_sas_itct *itct = &hisi_hba->itct[device_id]; struct asd_sas_port *sas_port = device->port; struct hisi_sas_port *port = to_hisi_sas_port(sas_port); + u64 sas_addr; memset(itct, 0, sizeof(*itct)); @@ -534,8 +535,8 @@ static void setup_itct_v1_hw(struct hisi_hba *hisi_hba, itct->qw0 = cpu_to_le64(qw0); /* qw1 */ - memcpy(&itct->sas_addr, device->sas_addr, SAS_ADDR_SIZE); - itct->sas_addr = __swab64(itct->sas_addr); + memcpy(&sas_addr, device->sas_addr, SAS_ADDR_SIZE); + itct->sas_addr = cpu_to_le64(__swab64(sas_addr)); /* qw2 */ itct->qw2 = cpu_to_le64((500ULL << ITCT_HDR_IT_NEXUS_LOSS_TL_OFF) | @@ -561,7 +562,7 @@ static void clear_itct_v1_hw(struct hisi_hba *hisi_hba, reg_val &= ~CFG_AGING_TIME_ITCT_REL_MSK; hisi_sas_write32(hisi_hba, CFG_AGING_TIME, reg_val); - qw0 = cpu_to_le64(itct->qw0); + qw0 = le64_to_cpu(itct->qw0); qw0 &= ~ITCT_HDR_VALID_MSK; itct->qw0 = cpu_to_le64(qw0); } @@ -1100,7 +1101,7 @@ static void slot_err_v1_hw(struct hisi_hba *hisi_hba, case SAS_PROTOCOL_SSP: { int error = -1; - u32 dma_err_type = cpu_to_le32(err_record->dma_err_type); + u32 dma_err_type = le32_to_cpu(err_record->dma_err_type); u32 dma_tx_err_type = ((dma_err_type & ERR_HDR_DMA_TX_ERR_TYPE_MSK)) >> ERR_HDR_DMA_TX_ERR_TYPE_OFF; @@ -1108,9 +1109,9 @@ static void slot_err_v1_hw(struct hisi_hba *hisi_hba, ERR_HDR_DMA_RX_ERR_TYPE_MSK)) >> ERR_HDR_DMA_RX_ERR_TYPE_OFF; u32 trans_tx_fail_type = - cpu_to_le32(err_record->trans_tx_fail_type); + le32_to_cpu(err_record->trans_tx_fail_type); u32 trans_rx_fail_type = - cpu_to_le32(err_record->trans_rx_fail_type); + le32_to_cpu(err_record->trans_rx_fail_type); if (dma_tx_err_type) { /* dma tx err */ @@ -1558,7 +1559,7 @@ static irqreturn_t cq_interrupt_v1_hw(int irq, void *p) u32 cmplt_hdr_data; complete_hdr = &complete_queue[rd_point]; - cmplt_hdr_data = cpu_to_le32(complete_hdr->data); + cmplt_hdr_data = le32_to_cpu(complete_hdr->data); idx = (cmplt_hdr_data & CMPLT_HDR_IPTT_MSK) >> CMPLT_HDR_IPTT_OFF; slot = &hisi_hba->slot_info[idx]; @@ -1797,6 +1798,11 @@ static int hisi_sas_v1_init(struct hisi_hba *hisi_hba) return 0; } +static struct device_attribute *host_attrs_v1_hw[] = { + &dev_attr_phy_event_threshold, + NULL +}; + static struct scsi_host_template sht_v1_hw = { .name = DRV_NAME, .module = THIS_MODULE, @@ -1808,14 +1814,13 @@ static struct scsi_host_template sht_v1_hw = { .change_queue_depth = sas_change_queue_depth, .bios_param = sas_bios_param, .this_id = -1, - .sg_tablesize = SG_ALL, + .sg_tablesize = HISI_SAS_SGE_PAGE_CNT, .max_sectors = SCSI_DEFAULT_MAX_SECTORS, - .use_clustering = ENABLE_CLUSTERING, .eh_device_reset_handler = sas_eh_device_reset_handler, .eh_target_reset_handler = sas_eh_target_reset_handler, .target_destroy = sas_target_destroy, .ioctl = sas_ioctl, - .shost_attrs = host_attrs, + .shost_attrs = host_attrs_v1_hw, }; static const struct hisi_sas_hw hisi_sas_v1_hw = { diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c index 77a85ead483e..c8ebff3ba559 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c @@ -934,6 +934,7 @@ static void setup_itct_v2_hw(struct hisi_hba *hisi_hba, struct domain_device *parent_dev = device->parent; struct asd_sas_port *sas_port = device->port; struct hisi_sas_port *port = to_hisi_sas_port(sas_port); + u64 sas_addr; memset(itct, 0, sizeof(*itct)); @@ -966,8 +967,8 @@ static void setup_itct_v2_hw(struct hisi_hba *hisi_hba, itct->qw0 = cpu_to_le64(qw0); /* qw1 */ - memcpy(&itct->sas_addr, device->sas_addr, SAS_ADDR_SIZE); - itct->sas_addr = __swab64(itct->sas_addr); + memcpy(&sas_addr, device->sas_addr, SAS_ADDR_SIZE); + itct->sas_addr = cpu_to_le64(__swab64(sas_addr)); /* qw2 */ if (!dev_is_sata(device)) @@ -2044,11 +2045,11 @@ static void slot_err_v2_hw(struct hisi_hba *hisi_hba, struct task_status_struct *ts = &task->task_status; struct hisi_sas_err_record_v2 *err_record = hisi_sas_status_buf_addr_mem(slot); - u32 trans_tx_fail_type = cpu_to_le32(err_record->trans_tx_fail_type); - u32 trans_rx_fail_type = cpu_to_le32(err_record->trans_rx_fail_type); - u16 dma_tx_err_type = cpu_to_le16(err_record->dma_tx_err_type); - u16 sipc_rx_err_type = cpu_to_le16(err_record->sipc_rx_err_type); - u32 dma_rx_err_type = cpu_to_le32(err_record->dma_rx_err_type); + u32 trans_tx_fail_type = le32_to_cpu(err_record->trans_tx_fail_type); + u32 trans_rx_fail_type = le32_to_cpu(err_record->trans_rx_fail_type); + u16 dma_tx_err_type = le16_to_cpu(err_record->dma_tx_err_type); + u16 sipc_rx_err_type = le16_to_cpu(err_record->sipc_rx_err_type); + u32 dma_rx_err_type = le32_to_cpu(err_record->dma_rx_err_type); int error = -1; if (err_phase == 1) { @@ -2059,8 +2060,7 @@ static void slot_err_v2_hw(struct hisi_hba *hisi_hba, trans_tx_fail_type); } else if (err_phase == 2) { /* error in RX phase, the priority is: DW1 > DW3 > DW2 */ - error = parse_trans_rx_err_code_v2_hw( - trans_rx_fail_type); + error = parse_trans_rx_err_code_v2_hw(trans_rx_fail_type); if (error == -1) { error = parse_dma_rx_err_code_v2_hw( dma_rx_err_type); @@ -2358,6 +2358,7 @@ slot_complete_v2_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot) &complete_queue[slot->cmplt_queue_slot]; unsigned long flags; bool is_internal = slot->is_internal; + u32 dw0; if (unlikely(!task || !task->lldd_task || !task->dev)) return -EINVAL; @@ -2382,8 +2383,9 @@ slot_complete_v2_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot) } /* Use SAS+TMF status codes */ - switch ((complete_hdr->dw0 & CMPLT_HDR_ABORT_STAT_MSK) - >> CMPLT_HDR_ABORT_STAT_OFF) { + dw0 = le32_to_cpu(complete_hdr->dw0); + switch ((dw0 & CMPLT_HDR_ABORT_STAT_MSK) >> + CMPLT_HDR_ABORT_STAT_OFF) { case STAT_IO_ABORTED: /* this io has been aborted by abort command */ ts->stat = SAS_ABORTED_TASK; @@ -2408,9 +2410,8 @@ slot_complete_v2_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot) break; } - if ((complete_hdr->dw0 & CMPLT_HDR_ERX_MSK) && - (!(complete_hdr->dw0 & CMPLT_HDR_RSPNS_XFRD_MSK))) { - u32 err_phase = (complete_hdr->dw0 & CMPLT_HDR_ERR_PHASE_MSK) + if ((dw0 & CMPLT_HDR_ERX_MSK) && (!(dw0 & CMPLT_HDR_RSPNS_XFRD_MSK))) { + u32 err_phase = (dw0 & CMPLT_HDR_ERR_PHASE_MSK) >> CMPLT_HDR_ERR_PHASE_OFF; u32 *error_info = hisi_sas_status_buf_addr_mem(slot); @@ -2526,22 +2527,23 @@ static void prep_ata_v2_hw(struct hisi_hba *hisi_hba, struct hisi_sas_tmf_task *tmf = slot->tmf; u8 *buf_cmd; int has_data = 0, hdr_tag = 0; - u32 dw1 = 0, dw2 = 0; + u32 dw0, dw1 = 0, dw2 = 0; /* create header */ /* dw0 */ - hdr->dw0 = cpu_to_le32(port->id << CMD_HDR_PORT_OFF); + dw0 = port->id << CMD_HDR_PORT_OFF; if (parent_dev && DEV_IS_EXPANDER(parent_dev->dev_type)) - hdr->dw0 |= cpu_to_le32(3 << CMD_HDR_CMD_OFF); + dw0 |= 3 << CMD_HDR_CMD_OFF; else - hdr->dw0 |= cpu_to_le32(4 << CMD_HDR_CMD_OFF); + dw0 |= 4 << CMD_HDR_CMD_OFF; if (tmf && tmf->force_phy) { - hdr->dw0 |= CMD_HDR_FORCE_PHY_MSK; - hdr->dw0 |= cpu_to_le32((1 << tmf->phy_id) - << CMD_HDR_PHY_ID_OFF); + dw0 |= CMD_HDR_FORCE_PHY_MSK; + dw0 |= (1 << tmf->phy_id) << CMD_HDR_PHY_ID_OFF; } + hdr->dw0 = cpu_to_le32(dw0); + /* dw1 */ switch (task->data_dir) { case DMA_TO_DEVICE: @@ -3152,20 +3154,24 @@ static void cq_tasklet_v2_hw(unsigned long val) /* Check for NCQ completion */ if (complete_hdr->act) { - u32 act_tmp = complete_hdr->act; + u32 act_tmp = le32_to_cpu(complete_hdr->act); int ncq_tag_count = ffs(act_tmp); + u32 dw1 = le32_to_cpu(complete_hdr->dw1); - dev_id = (complete_hdr->dw1 & CMPLT_HDR_DEV_ID_MSK) >> + dev_id = (dw1 & CMPLT_HDR_DEV_ID_MSK) >> CMPLT_HDR_DEV_ID_OFF; itct = &hisi_hba->itct[dev_id]; /* The NCQ tags are held in the itct header */ while (ncq_tag_count) { - __le64 *ncq_tag = &itct->qw4_15[0]; + __le64 *_ncq_tag = &itct->qw4_15[0], __ncq_tag; + u64 ncq_tag; - ncq_tag_count -= 1; - iptt = (ncq_tag[ncq_tag_count / 5] - >> (ncq_tag_count % 5) * 12) & 0xfff; + ncq_tag_count--; + __ncq_tag = _ncq_tag[ncq_tag_count / 5]; + ncq_tag = le64_to_cpu(__ncq_tag); + iptt = (ncq_tag >> (ncq_tag_count % 5) * 12) & + 0xfff; slot = &hisi_hba->slot_info[iptt]; slot->cmplt_queue_slot = rd_point; @@ -3176,7 +3182,9 @@ static void cq_tasklet_v2_hw(unsigned long val) ncq_tag_count = ffs(act_tmp); } } else { - iptt = (complete_hdr->dw1) & CMPLT_HDR_IPTT_MSK; + u32 dw1 = le32_to_cpu(complete_hdr->dw1); + + iptt = dw1 & CMPLT_HDR_IPTT_MSK; slot = &hisi_hba->slot_info[iptt]; slot->cmplt_queue_slot = rd_point; slot->cmplt_queue = queue; @@ -3552,6 +3560,11 @@ static void wait_cmds_complete_timeout_v2_hw(struct hisi_hba *hisi_hba, dev_dbg(dev, "wait commands complete %dms\n", time); } +static struct device_attribute *host_attrs_v2_hw[] = { + &dev_attr_phy_event_threshold, + NULL +}; + static struct scsi_host_template sht_v2_hw = { .name = DRV_NAME, .module = THIS_MODULE, @@ -3563,14 +3576,13 @@ static struct scsi_host_template sht_v2_hw = { .change_queue_depth = sas_change_queue_depth, .bios_param = sas_bios_param, .this_id = -1, - .sg_tablesize = SG_ALL, + .sg_tablesize = HISI_SAS_SGE_PAGE_CNT, .max_sectors = SCSI_DEFAULT_MAX_SECTORS, - .use_clustering = ENABLE_CLUSTERING, .eh_device_reset_handler = sas_eh_device_reset_handler, .eh_target_reset_handler = sas_eh_target_reset_handler, .target_destroy = sas_target_destroy, .ioctl = sas_ioctl, - .shost_attrs = host_attrs, + .shost_attrs = host_attrs_v2_hw, }; static const struct hisi_sas_hw hisi_sas_v2_hw = { diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index a369450a1fa7..e2420a810e99 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -42,6 +42,7 @@ #define MAX_CON_TIME_LIMIT_TIME 0xa4 #define BUS_INACTIVE_LIMIT_TIME 0xa8 #define REJECT_TO_OPEN_LIMIT_TIME 0xac +#define CQ_INT_CONVERGE_EN 0xb0 #define CFG_AGING_TIME 0xbc #define HGC_DFX_CFG2 0xc0 #define CFG_ABT_SET_QUERY_IPTT 0xd4 @@ -126,6 +127,8 @@ #define PHY_CTRL (PORT_BASE + 0x14) #define PHY_CTRL_RESET_OFF 0 #define PHY_CTRL_RESET_MSK (0x1 << PHY_CTRL_RESET_OFF) +#define CMD_HDR_PIR_OFF 8 +#define CMD_HDR_PIR_MSK (0x1 << CMD_HDR_PIR_OFF) #define SL_CFG (PORT_BASE + 0x84) #define AIP_LIMIT (PORT_BASE + 0x90) #define SL_CONTROL (PORT_BASE + 0x94) @@ -332,6 +335,16 @@ #define ITCT_HDR_RTOLT_OFF 48 #define ITCT_HDR_RTOLT_MSK (0xffffULL << ITCT_HDR_RTOLT_OFF) +struct hisi_sas_protect_iu_v3_hw { + u32 dw0; + u32 lbrtcv; + u32 lbrtgv; + u32 dw3; + u32 dw4; + u32 dw5; + u32 rsv; +}; + struct hisi_sas_complete_v3_hdr { __le32 dw0; __le32 dw1; @@ -371,6 +384,28 @@ struct hisi_sas_err_record_v3 { ((fis.command == ATA_CMD_DEV_RESET) && \ ((fis.control & ATA_SRST) != 0))) +#define T10_INSRT_EN_OFF 0 +#define T10_INSRT_EN_MSK (1 << T10_INSRT_EN_OFF) +#define T10_RMV_EN_OFF 1 +#define T10_RMV_EN_MSK (1 << T10_RMV_EN_OFF) +#define T10_RPLC_EN_OFF 2 +#define T10_RPLC_EN_MSK (1 << T10_RPLC_EN_OFF) +#define T10_CHK_EN_OFF 3 +#define T10_CHK_EN_MSK (1 << T10_CHK_EN_OFF) +#define INCR_LBRT_OFF 5 +#define INCR_LBRT_MSK (1 << INCR_LBRT_OFF) +#define USR_DATA_BLOCK_SZ_OFF 20 +#define USR_DATA_BLOCK_SZ_MSK (0x3 << USR_DATA_BLOCK_SZ_OFF) +#define T10_CHK_MSK_OFF 16 + +static bool hisi_sas_intr_conv; +MODULE_PARM_DESC(intr_conv, "interrupt converge enable (0-1)"); + +/* permit overriding the host protection capabilities mask (EEDP/T10 PI) */ +static int prot_mask; +module_param(prot_mask, int, 0); +MODULE_PARM_DESC(prot_mask, " host protection capabilities mask, def=0x0 "); + static u32 hisi_sas_read32(struct hisi_hba *hisi_hba, u32 off) { void __iomem *regs = hisi_hba->regs + off; @@ -436,6 +471,8 @@ static void init_reg_v3_hw(struct hisi_hba *hisi_hba) hisi_sas_write32(hisi_hba, INT_COAL_EN, 0x1); hisi_sas_write32(hisi_hba, OQ_INT_COAL_TIME, 0x1); hisi_sas_write32(hisi_hba, OQ_INT_COAL_CNT, 0x1); + hisi_sas_write32(hisi_hba, CQ_INT_CONVERGE_EN, + hisi_sas_intr_conv); hisi_sas_write32(hisi_hba, OQ_INT_SRC, 0xffff); hisi_sas_write32(hisi_hba, ENT_INT_SRC1, 0xffffffff); hisi_sas_write32(hisi_hba, ENT_INT_SRC2, 0xffffffff); @@ -494,7 +531,7 @@ static void init_reg_v3_hw(struct hisi_hba *hisi_hba) hisi_sas_phy_write32(hisi_hba, i, PHYCTRL_OOB_RESTART_MSK, 0x1); hisi_sas_phy_write32(hisi_hba, i, STP_LINK_TIMER, 0x7f7a120); hisi_sas_phy_write32(hisi_hba, i, CON_CFG_DRIVER, 0x2a0a01); - + hisi_sas_phy_write32(hisi_hba, i, SAS_SSP_CON_TIMER_CFG, 0x32); /* used for 12G negotiate */ hisi_sas_phy_write32(hisi_hba, i, COARSETUNE_TIME, 0x1e); hisi_sas_phy_write32(hisi_hba, i, AIP_LIMIT, 0x2ffff); @@ -622,6 +659,7 @@ static void setup_itct_v3_hw(struct hisi_hba *hisi_hba, struct domain_device *parent_dev = device->parent; struct asd_sas_port *sas_port = device->port; struct hisi_sas_port *port = to_hisi_sas_port(sas_port); + u64 sas_addr; memset(itct, 0, sizeof(*itct)); @@ -654,8 +692,8 @@ static void setup_itct_v3_hw(struct hisi_hba *hisi_hba, itct->qw0 = cpu_to_le64(qw0); /* qw1 */ - memcpy(&itct->sas_addr, device->sas_addr, SAS_ADDR_SIZE); - itct->sas_addr = __swab64(itct->sas_addr); + memcpy(&sas_addr, device->sas_addr, SAS_ADDR_SIZE); + itct->sas_addr = cpu_to_le64(__swab64(sas_addr)); /* qw2 */ if (!dev_is_sata(device)) @@ -932,6 +970,58 @@ static void prep_prd_sge_v3_hw(struct hisi_hba *hisi_hba, hdr->sg_len = cpu_to_le32(n_elem << CMD_HDR_DATA_SGL_LEN_OFF); } +static u32 get_prot_chk_msk_v3_hw(struct scsi_cmnd *scsi_cmnd) +{ + unsigned char prot_flags = scsi_cmnd->prot_flags; + + if (prot_flags & SCSI_PROT_TRANSFER_PI) { + if (prot_flags & SCSI_PROT_REF_CHECK) + return 0xc << 16; + return 0xfc << 16; + } + return 0; +} + +static void fill_prot_v3_hw(struct scsi_cmnd *scsi_cmnd, + struct hisi_sas_protect_iu_v3_hw *prot) +{ + unsigned char prot_op = scsi_get_prot_op(scsi_cmnd); + unsigned int interval = scsi_prot_interval(scsi_cmnd); + u32 lbrt_chk_val = t10_pi_ref_tag(scsi_cmnd->request); + + switch (prot_op) { + case SCSI_PROT_READ_STRIP: + prot->dw0 |= (T10_RMV_EN_MSK | T10_CHK_EN_MSK); + prot->lbrtcv = lbrt_chk_val; + prot->dw4 |= get_prot_chk_msk_v3_hw(scsi_cmnd); + break; + case SCSI_PROT_WRITE_INSERT: + prot->dw0 |= T10_INSRT_EN_MSK; + prot->lbrtgv = lbrt_chk_val; + break; + default: + WARN(1, "prot_op(0x%x) is not valid\n", prot_op); + break; + } + + switch (interval) { + case 512: + break; + case 4096: + prot->dw0 |= (0x1 << USR_DATA_BLOCK_SZ_OFF); + break; + case 520: + prot->dw0 |= (0x2 << USR_DATA_BLOCK_SZ_OFF); + break; + default: + WARN(1, "protection interval (0x%x) invalid\n", + interval); + break; + } + + prot->dw0 |= INCR_LBRT_MSK; +} + static void prep_ssp_v3_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot) { @@ -943,9 +1033,10 @@ static void prep_ssp_v3_hw(struct hisi_hba *hisi_hba, struct sas_ssp_task *ssp_task = &task->ssp_task; struct scsi_cmnd *scsi_cmnd = ssp_task->cmd; struct hisi_sas_tmf_task *tmf = slot->tmf; + unsigned char prot_op = scsi_get_prot_op(scsi_cmnd); int has_data = 0, priority = !!tmf; u8 *buf_cmd; - u32 dw1 = 0, dw2 = 0; + u32 dw1 = 0, dw2 = 0, len = 0; hdr->dw0 = cpu_to_le32((1 << CMD_HDR_RESP_REPORT_OFF) | (2 << CMD_HDR_TLR_CTRL_OFF) | @@ -975,7 +1066,6 @@ static void prep_ssp_v3_hw(struct hisi_hba *hisi_hba, /* map itct entry */ dw1 |= sas_dev->device_id << CMD_HDR_DEV_ID_OFF; - hdr->dw1 = cpu_to_le32(dw1); dw2 = (((sizeof(struct ssp_command_iu) + sizeof(struct ssp_frame_hdr) + 3) / 4) << CMD_HDR_CFL_OFF) | @@ -988,7 +1078,6 @@ static void prep_ssp_v3_hw(struct hisi_hba *hisi_hba, prep_prd_sge_v3_hw(hisi_hba, slot, hdr, task->scatter, slot->n_elem); - hdr->data_transfer_len = cpu_to_le32(task->total_xfer_len); hdr->cmd_table_addr = cpu_to_le64(hisi_sas_cmd_hdr_addr_dma(slot)); hdr->sts_buffer_addr = cpu_to_le64(hisi_sas_status_buf_addr_dma(slot)); @@ -1013,6 +1102,38 @@ static void prep_ssp_v3_hw(struct hisi_hba *hisi_hba, break; } } + + if (has_data && (prot_op != SCSI_PROT_NORMAL)) { + struct hisi_sas_protect_iu_v3_hw prot; + u8 *buf_cmd_prot; + + hdr->dw7 |= cpu_to_le32(1 << CMD_HDR_ADDR_MODE_SEL_OFF); + dw1 |= CMD_HDR_PIR_MSK; + buf_cmd_prot = hisi_sas_cmd_hdr_addr_mem(slot) + + sizeof(struct ssp_frame_hdr) + + sizeof(struct ssp_command_iu); + + memset(&prot, 0, sizeof(struct hisi_sas_protect_iu_v3_hw)); + fill_prot_v3_hw(scsi_cmnd, &prot); + memcpy(buf_cmd_prot, &prot, + sizeof(struct hisi_sas_protect_iu_v3_hw)); + + /* + * For READ, we need length of info read to memory, while for + * WRITE we need length of data written to the disk. + */ + if (prot_op == SCSI_PROT_WRITE_INSERT) { + unsigned int interval = scsi_prot_interval(scsi_cmnd); + unsigned int ilog2_interval = ilog2(interval); + + len = (task->total_xfer_len >> ilog2_interval) * 8; + } + + } + + hdr->dw1 = cpu_to_le32(dw1); + + hdr->data_transfer_len = cpu_to_le32(task->total_xfer_len + len); } static void prep_smp_v3_hw(struct hisi_hba *hisi_hba, @@ -1584,15 +1705,16 @@ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task, &complete_queue[slot->cmplt_queue_slot]; struct hisi_sas_err_record_v3 *record = hisi_sas_status_buf_addr_mem(slot); - u32 dma_rx_err_type = record->dma_rx_err_type; - u32 trans_tx_fail_type = record->trans_tx_fail_type; + u32 dma_rx_err_type = le32_to_cpu(record->dma_rx_err_type); + u32 trans_tx_fail_type = le32_to_cpu(record->trans_tx_fail_type); + u32 dw3 = le32_to_cpu(complete_hdr->dw3); switch (task->task_proto) { case SAS_PROTOCOL_SSP: if (dma_rx_err_type & RX_DATA_LEN_UNDERFLOW_MSK) { ts->residual = trans_tx_fail_type; ts->stat = SAS_DATA_UNDERRUN; - } else if (complete_hdr->dw3 & CMPLT_HDR_IO_IN_TARGET_MSK) { + } else if (dw3 & CMPLT_HDR_IO_IN_TARGET_MSK) { ts->stat = SAS_QUEUE_FULL; slot->abort = 1; } else { @@ -1606,7 +1728,7 @@ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task, if (dma_rx_err_type & RX_DATA_LEN_UNDERFLOW_MSK) { ts->residual = trans_tx_fail_type; ts->stat = SAS_DATA_UNDERRUN; - } else if (complete_hdr->dw3 & CMPLT_HDR_IO_IN_TARGET_MSK) { + } else if (dw3 & CMPLT_HDR_IO_IN_TARGET_MSK) { ts->stat = SAS_PHY_DOWN; slot->abort = 1; } else { @@ -1639,6 +1761,7 @@ slot_complete_v3_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot) &complete_queue[slot->cmplt_queue_slot]; unsigned long flags; bool is_internal = slot->is_internal; + u32 dw0, dw1, dw3; if (unlikely(!task || !task->lldd_task || !task->dev)) return -EINVAL; @@ -1662,11 +1785,14 @@ slot_complete_v3_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot) goto out; } + dw0 = le32_to_cpu(complete_hdr->dw0); + dw1 = le32_to_cpu(complete_hdr->dw1); + dw3 = le32_to_cpu(complete_hdr->dw3); + /* * Use SAS+TMF status codes */ - switch ((complete_hdr->dw0 & CMPLT_HDR_ABORT_STAT_MSK) - >> CMPLT_HDR_ABORT_STAT_OFF) { + switch ((dw0 & CMPLT_HDR_ABORT_STAT_MSK) >> CMPLT_HDR_ABORT_STAT_OFF) { case STAT_IO_ABORTED: /* this IO has been aborted by abort command */ ts->stat = SAS_ABORTED_TASK; @@ -1689,7 +1815,7 @@ slot_complete_v3_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot) } /* check for erroneous completion */ - if ((complete_hdr->dw0 & CMPLT_HDR_CMPLT_MSK) == 0x3) { + if ((dw0 & CMPLT_HDR_CMPLT_MSK) == 0x3) { u32 *error_info = hisi_sas_status_buf_addr_mem(slot); slot_err_v3_hw(hisi_hba, task, slot); @@ -1698,8 +1824,7 @@ slot_complete_v3_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot) "CQ hdr: 0x%x 0x%x 0x%x 0x%x " "Error info: 0x%x 0x%x 0x%x 0x%x\n", slot->idx, task, sas_dev->device_id, - complete_hdr->dw0, complete_hdr->dw1, - complete_hdr->act, complete_hdr->dw3, + dw0, dw1, complete_hdr->act, dw3, error_info[0], error_info[1], error_info[2], error_info[3]); if (unlikely(slot->abort)) @@ -1797,11 +1922,13 @@ static void cq_tasklet_v3_hw(unsigned long val) while (rd_point != wr_point) { struct hisi_sas_complete_v3_hdr *complete_hdr; struct device *dev = hisi_hba->dev; + u32 dw1; int iptt; complete_hdr = &complete_queue[rd_point]; + dw1 = le32_to_cpu(complete_hdr->dw1); - iptt = (complete_hdr->dw1) & CMPLT_HDR_IPTT_MSK; + iptt = dw1 & CMPLT_HDR_IPTT_MSK; if (likely(iptt < HISI_SAS_COMMAND_ENTRIES_V3_HW)) { slot = &hisi_hba->slot_info[iptt]; slot->cmplt_queue_slot = rd_point; @@ -1878,10 +2005,12 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba) for (i = 0; i < hisi_hba->queue_count; i++) { struct hisi_sas_cq *cq = &hisi_hba->cq[i]; struct tasklet_struct *t = &cq->tasklet; + int nr = hisi_sas_intr_conv ? 16 : 16 + i; + unsigned long irqflags = hisi_sas_intr_conv ? IRQF_SHARED : 0; - rc = devm_request_irq(dev, pci_irq_vector(pdev, i+16), - cq_interrupt_v3_hw, 0, - DRV_NAME " cq", cq); + rc = devm_request_irq(dev, pci_irq_vector(pdev, nr), + cq_interrupt_v3_hw, irqflags, + DRV_NAME " cq", cq); if (rc) { dev_err(dev, "could not request cq%d interrupt, rc=%d\n", @@ -1898,8 +2027,9 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba) free_cq_irqs: for (k = 0; k < i; k++) { struct hisi_sas_cq *cq = &hisi_hba->cq[k]; + int nr = hisi_sas_intr_conv ? 16 : 16 + k; - free_irq(pci_irq_vector(pdev, k+16), cq); + free_irq(pci_irq_vector(pdev, nr), cq); } free_irq(pci_irq_vector(pdev, 11), hisi_hba); free_chnl_interrupt: @@ -2089,6 +2219,119 @@ static void wait_cmds_complete_timeout_v3_hw(struct hisi_hba *hisi_hba, dev_dbg(dev, "wait commands complete %dms\n", time); } +static ssize_t intr_conv_v3_hw_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%u\n", hisi_sas_intr_conv); +} +static DEVICE_ATTR_RO(intr_conv_v3_hw); + +static void config_intr_coal_v3_hw(struct hisi_hba *hisi_hba) +{ + /* config those registers between enable and disable PHYs */ + hisi_sas_stop_phys(hisi_hba); + + if (hisi_hba->intr_coal_ticks == 0 || + hisi_hba->intr_coal_count == 0) { + hisi_sas_write32(hisi_hba, INT_COAL_EN, 0x1); + hisi_sas_write32(hisi_hba, OQ_INT_COAL_TIME, 0x1); + hisi_sas_write32(hisi_hba, OQ_INT_COAL_CNT, 0x1); + } else { + hisi_sas_write32(hisi_hba, INT_COAL_EN, 0x3); + hisi_sas_write32(hisi_hba, OQ_INT_COAL_TIME, + hisi_hba->intr_coal_ticks); + hisi_sas_write32(hisi_hba, OQ_INT_COAL_CNT, + hisi_hba->intr_coal_count); + } + phys_init_v3_hw(hisi_hba); +} + +static ssize_t intr_coal_ticks_v3_hw_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct Scsi_Host *shost = class_to_shost(dev); + struct hisi_hba *hisi_hba = shost_priv(shost); + + return scnprintf(buf, PAGE_SIZE, "%u\n", + hisi_hba->intr_coal_ticks); +} + +static ssize_t intr_coal_ticks_v3_hw_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct Scsi_Host *shost = class_to_shost(dev); + struct hisi_hba *hisi_hba = shost_priv(shost); + u32 intr_coal_ticks; + int ret; + + ret = kstrtou32(buf, 10, &intr_coal_ticks); + if (ret) { + dev_err(dev, "Input data of interrupt coalesce unmatch\n"); + return -EINVAL; + } + + if (intr_coal_ticks >= BIT(24)) { + dev_err(dev, "intr_coal_ticks must be less than 2^24!\n"); + return -EINVAL; + } + + hisi_hba->intr_coal_ticks = intr_coal_ticks; + + config_intr_coal_v3_hw(hisi_hba); + + return count; +} +static DEVICE_ATTR_RW(intr_coal_ticks_v3_hw); + +static ssize_t intr_coal_count_v3_hw_show(struct device *dev, + struct device_attribute + *attr, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(dev); + struct hisi_hba *hisi_hba = shost_priv(shost); + + return scnprintf(buf, PAGE_SIZE, "%u\n", + hisi_hba->intr_coal_count); +} + +static ssize_t intr_coal_count_v3_hw_store(struct device *dev, + struct device_attribute + *attr, const char *buf, size_t count) +{ + struct Scsi_Host *shost = class_to_shost(dev); + struct hisi_hba *hisi_hba = shost_priv(shost); + u32 intr_coal_count; + int ret; + + ret = kstrtou32(buf, 10, &intr_coal_count); + if (ret) { + dev_err(dev, "Input data of interrupt coalesce unmatch\n"); + return -EINVAL; + } + + if (intr_coal_count >= BIT(8)) { + dev_err(dev, "intr_coal_count must be less than 2^8!\n"); + return -EINVAL; + } + + hisi_hba->intr_coal_count = intr_coal_count; + + config_intr_coal_v3_hw(hisi_hba); + + return count; +} +static DEVICE_ATTR_RW(intr_coal_count_v3_hw); + +static struct device_attribute *host_attrs_v3_hw[] = { + &dev_attr_phy_event_threshold, + &dev_attr_intr_conv_v3_hw, + &dev_attr_intr_coal_ticks_v3_hw, + &dev_attr_intr_coal_count_v3_hw, + NULL +}; + static struct scsi_host_template sht_v3_hw = { .name = DRV_NAME, .module = THIS_MODULE, @@ -2100,14 +2343,13 @@ static struct scsi_host_template sht_v3_hw = { .change_queue_depth = sas_change_queue_depth, .bios_param = sas_bios_param, .this_id = -1, - .sg_tablesize = SG_ALL, + .sg_tablesize = HISI_SAS_SGE_PAGE_CNT, .max_sectors = SCSI_DEFAULT_MAX_SECTORS, - .use_clustering = ENABLE_CLUSTERING, .eh_device_reset_handler = sas_eh_device_reset_handler, .eh_target_reset_handler = sas_eh_target_reset_handler, .target_destroy = sas_target_destroy, .ioctl = sas_ioctl, - .shost_attrs = host_attrs, + .shost_attrs = host_attrs_v3_hw, .tag_alloc_policy = BLK_TAG_ALLOC_RR, }; @@ -2161,6 +2403,12 @@ hisi_sas_shost_alloc_pci(struct pci_dev *pdev) hisi_hba->shost = shost; SHOST_TO_SAS_HA(shost) = &hisi_hba->sha; + if (prot_mask & ~HISI_SAS_PROT_MASK) + dev_err(dev, "unsupported protection mask 0x%x, using default (0x0)\n", + prot_mask); + else + hisi_hba->prot_mask = prot_mask; + timer_setup(&hisi_hba->timer, NULL, 0); if (hisi_sas_get_fw_info(hisi_hba) < 0) @@ -2199,14 +2447,11 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) goto err_out_disable_device; - if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) || - (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0)) { - if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) || - (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)) != 0)) { - dev_err(dev, "No usable DMA addressing method\n"); - rc = -EIO; - goto err_out_regions; - } + if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) || + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32))) { + dev_err(dev, "No usable DMA addressing method\n"); + rc = -EIO; + goto err_out_regions; } shost = hisi_sas_shost_alloc_pci(pdev); @@ -2245,7 +2490,6 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) shost->max_lun = ~0; shost->max_channel = 1; shost->max_cmd_len = 16; - shost->sg_tablesize = min_t(u16, SG_ALL, HISI_SAS_SGE_PAGE_CNT); shost->can_queue = hisi_hba->hw->max_command_entries - HISI_SAS_RESERVED_IPTT_CNT; shost->cmd_per_lun = hisi_hba->hw->max_command_entries - @@ -2275,6 +2519,12 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) goto err_out_register_ha; + if (hisi_hba->prot_mask) { + dev_info(dev, "Registering for DIF/DIX prot_mask=0x%x\n", + prot_mask); + scsi_host_set_prot(hisi_hba->shost, prot_mask); + } + scsi_scan_host(shost); return 0; @@ -2301,8 +2551,9 @@ hisi_sas_v3_destroy_irqs(struct pci_dev *pdev, struct hisi_hba *hisi_hba) free_irq(pci_irq_vector(pdev, 11), hisi_hba); for (i = 0; i < hisi_hba->queue_count; i++) { struct hisi_sas_cq *cq = &hisi_hba->cq[i]; + int nr = hisi_sas_intr_conv ? 16 : 16 + i; - free_irq(pci_irq_vector(pdev, i+16), cq); + free_irq(pci_irq_vector(pdev, nr), cq); } pci_free_irq_vectors(pdev); } @@ -2529,7 +2780,7 @@ static int hisi_sas_v3_suspend(struct pci_dev *pdev, pm_message_t state) struct hisi_hba *hisi_hba = sha->lldd_ha; struct device *dev = hisi_hba->dev; struct Scsi_Host *shost = hisi_hba->shost; - u32 device_state; + pci_power_t device_state; int rc; if (!pdev->pm_cap) { @@ -2575,7 +2826,7 @@ static int hisi_sas_v3_resume(struct pci_dev *pdev) struct Scsi_Host *shost = hisi_hba->shost; struct device *dev = hisi_hba->dev; unsigned int rc; - u32 device_state = pdev->current_state; + pci_power_t device_state = pdev->current_state; dev_warn(dev, "resuming from operating state [D%d]\n", device_state); @@ -2624,6 +2875,7 @@ static struct pci_driver sas_v3_pci_driver = { }; module_pci_driver(sas_v3_pci_driver); +module_param_named(intr_conv, hisi_sas_intr_conv, bool, 0444); MODULE_LICENSE("GPL"); MODULE_AUTHOR("John Garry <john.garry@huawei.com>"); diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index ea4b0bb0c1cd..eaf329db3973 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -222,18 +222,9 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev, if (error) goto fail; - if (shost_use_blk_mq(shost)) { - error = scsi_mq_setup_tags(shost); - if (error) - goto fail; - } else { - shost->bqt = blk_init_tags(shost->can_queue, - shost->hostt->tag_alloc_policy); - if (!shost->bqt) { - error = -ENOMEM; - goto fail; - } - } + error = scsi_mq_setup_tags(shost); + if (error) + goto fail; if (!shost->shost_gendev.parent) shost->shost_gendev.parent = dev ? dev : &platform_bus; @@ -309,8 +300,7 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev, pm_runtime_disable(&shost->shost_gendev); pm_runtime_set_suspended(&shost->shost_gendev); pm_runtime_put_noidle(&shost->shost_gendev); - if (shost_use_blk_mq(shost)) - scsi_mq_destroy_tags(shost); + scsi_mq_destroy_tags(shost); fail: return error; } @@ -344,13 +334,8 @@ static void scsi_host_dev_release(struct device *dev) kfree(dev_name(&shost->shost_dev)); } - if (shost_use_blk_mq(shost)) { - if (shost->tag_set.tags) - scsi_mq_destroy_tags(shost); - } else { - if (shost->bqt) - blk_free_tags(shost->bqt); - } + if (shost->tag_set.tags) + scsi_mq_destroy_tags(shost); kfree(shost->shost_data); @@ -431,7 +416,6 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) shost->sg_prot_tablesize = sht->sg_prot_tablesize; shost->cmd_per_lun = sht->cmd_per_lun; shost->unchecked_isa_dma = sht->unchecked_isa_dma; - shost->use_clustering = sht->use_clustering; shost->no_write_same = sht->no_write_same; if (shost_eh_deadline == -1 || !sht->eh_host_reset_handler) @@ -464,6 +448,11 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) else shost->max_sectors = SCSI_DEFAULT_MAX_SECTORS; + if (sht->max_segment_size) + shost->max_segment_size = sht->max_segment_size; + else + shost->max_segment_size = BLK_MAX_SEGMENT_SIZE; + /* * assume a 4GB boundary, if not set */ @@ -472,8 +461,6 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) else shost->dma_boundary = 0xffffffff; - shost->use_blk_mq = scsi_use_blk_mq || shost->hostt->force_blk_mq; - device_initialize(&shost->shost_gendev); dev_set_name(&shost->shost_gendev, "host%d", shost->host_no); shost->shost_gendev.bus = &scsi_bus_type; diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index c9cccf35e9d7..ff67ef5d5347 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -965,7 +965,6 @@ static struct scsi_host_template hpsa_driver_template = { .scan_finished = hpsa_scan_finished, .change_queue_depth = hpsa_change_queue_depth, .this_id = -1, - .use_clustering = ENABLE_CLUSTERING, .eh_device_reset_handler = hpsa_eh_device_reset_handler, .ioctl = hpsa_ioctl, .slave_alloc = hpsa_slave_alloc, @@ -4663,6 +4662,7 @@ static int fixup_ioaccel_cdb(u8 *cdb, int *cdb_len) case WRITE_6: case WRITE_12: is_write = 1; + /* fall through */ case READ_6: case READ_12: if (*cdb_len == 6) { @@ -5093,6 +5093,7 @@ static int hpsa_scsi_ioaccel_raid_map(struct ctlr_info *h, switch (cmd->cmnd[0]) { case WRITE_6: is_write = 1; + /* fall through */ case READ_6: first_block = (((cmd->cmnd[1] & 0x1F) << 16) | (cmd->cmnd[2] << 8) | @@ -5103,6 +5104,7 @@ static int hpsa_scsi_ioaccel_raid_map(struct ctlr_info *h, break; case WRITE_10: is_write = 1; + /* fall through */ case READ_10: first_block = (((u64) cmd->cmnd[2]) << 24) | @@ -5115,6 +5117,7 @@ static int hpsa_scsi_ioaccel_raid_map(struct ctlr_info *h, break; case WRITE_12: is_write = 1; + /* fall through */ case READ_12: first_block = (((u64) cmd->cmnd[2]) << 24) | @@ -5129,6 +5132,7 @@ static int hpsa_scsi_ioaccel_raid_map(struct ctlr_info *h, break; case WRITE_16: is_write = 1; + /* fall through */ case READ_16: first_block = (((u64) cmd->cmnd[2]) << 56) | diff --git a/drivers/scsi/hptiop.c b/drivers/scsi/hptiop.c index 2fad7f03aa02..3eedfd4f8f57 100644 --- a/drivers/scsi/hptiop.c +++ b/drivers/scsi/hptiop.c @@ -1180,7 +1180,6 @@ static struct scsi_host_template driver_template = { .eh_host_reset_handler = hptiop_reset, .info = hptiop_info, .emulated = 0, - .use_clustering = ENABLE_CLUSTERING, .proc_name = driver_name, .shost_attrs = hptiop_attrs, .slave_configure = hptiop_slave_config, @@ -1309,11 +1308,11 @@ static int hptiop_probe(struct pci_dev *pcidev, const struct pci_device_id *id) /* Enable 64bit DMA if possible */ iop_ops = (struct hptiop_adapter_ops *)id->driver_data; - if (pci_set_dma_mask(pcidev, DMA_BIT_MASK(iop_ops->hw_dma_bit_mask))) { - if (pci_set_dma_mask(pcidev, DMA_BIT_MASK(32))) { - printk(KERN_ERR "hptiop: fail to set dma_mask\n"); - goto disable_pci_device; - } + if (dma_set_mask(&pcidev->dev, + DMA_BIT_MASK(iop_ops->hw_dma_bit_mask)) || + dma_set_mask(&pcidev->dev, DMA_BIT_MASK(32))) { + printk(KERN_ERR "hptiop: fail to set dma_mask\n"); + goto disable_pci_device; } if (pci_request_regions(pcidev, driver_name)) { diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index b64ca977825d..dbaa4f131433 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -3100,7 +3100,6 @@ static struct scsi_host_template driver_template = { .this_id = -1, .sg_tablesize = SG_ALL, .max_sectors = IBMVFC_MAX_SECTORS, - .use_clustering = ENABLE_CLUSTERING, .shost_attrs = ibmvfc_attrs, .track_queue_depth = 1, }; diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index 9df8a1a2299c..1135e74646e2 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -2079,7 +2079,6 @@ static struct scsi_host_template driver_template = { .can_queue = IBMVSCSI_MAX_REQUESTS_DEFAULT, .this_id = -1, .sg_tablesize = SG_ALL, - .use_clustering = ENABLE_CLUSTERING, .shost_attrs = ibmvscsi_attrs, }; diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index e63aadd10dfd..cc9cae469c4b 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -3695,11 +3695,6 @@ static int ibmvscsis_get_system_info(void) return 0; } -static char *ibmvscsis_get_fabric_name(void) -{ - return "ibmvscsis"; -} - static char *ibmvscsis_get_fabric_wwn(struct se_portal_group *se_tpg) { struct ibmvscsis_tport *tport = @@ -4044,9 +4039,8 @@ static struct configfs_attribute *ibmvscsis_tpg_attrs[] = { static const struct target_core_fabric_ops ibmvscsis_ops = { .module = THIS_MODULE, - .name = "ibmvscsis", + .fabric_name = "ibmvscsis", .max_data_sg_nents = MAX_TXU / PAGE_SIZE, - .get_fabric_name = ibmvscsis_get_fabric_name, .tpg_get_wwn = ibmvscsis_get_fabric_wwn, .tpg_get_tag = ibmvscsis_get_tag, .tpg_get_default_depth = ibmvscsis_get_default_depth, diff --git a/drivers/scsi/imm.c b/drivers/scsi/imm.c index 8c6627bc8a39..cea7f502e8ca 100644 --- a/drivers/scsi/imm.c +++ b/drivers/scsi/imm.c @@ -1110,7 +1110,6 @@ static struct scsi_host_template imm_template = { .bios_param = imm_biosparam, .this_id = 7, .sg_tablesize = SG_ALL, - .use_clustering = ENABLE_CLUSTERING, .can_queue = 1, .slave_alloc = imm_adjust_queue, }; diff --git a/drivers/scsi/initio.c b/drivers/scsi/initio.c index 7a91cf3ff173..eb2778b5c81b 100644 --- a/drivers/scsi/initio.c +++ b/drivers/scsi/initio.c @@ -2817,7 +2817,6 @@ static struct scsi_host_template initio_template = { .can_queue = MAX_TARGETS * i91u_MAXQUEUE, .this_id = 1, .sg_tablesize = SG_ALL, - .use_clustering = ENABLE_CLUSTERING, }; static int initio_probe_one(struct pci_dev *pdev, @@ -2840,7 +2839,7 @@ static int initio_probe_one(struct pci_dev *pdev, reg = 0; bios_seg = (bios_seg << 8) + ((u16) ((reg & 0xFF00) >> 8)); - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) { + if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) { printk(KERN_WARNING "i91u: Could not set 32 bit DMA mask\n"); error = -ENODEV; goto out_disable_device; diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 271990bc065b..d1b4025a4503 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -6754,7 +6754,6 @@ static struct scsi_host_template driver_template = { .sg_tablesize = IPR_MAX_SGLIST, .max_sectors = IPR_IOA_MAX_SECTORS, .cmd_per_lun = IPR_MAX_CMD_PER_LUN, - .use_clustering = ENABLE_CLUSTERING, .shost_attrs = ipr_ioa_attrs, .sdev_attrs = ipr_dev_attrs, .proc_name = IPR_NAME, diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c index ee8a1ecd58fd..e8bc8d328bab 100644 --- a/drivers/scsi/ips.c +++ b/drivers/scsi/ips.c @@ -365,7 +365,6 @@ static struct scsi_host_template ips_driver_template = { .this_id = -1, .sg_tablesize = IPS_MAX_SG, .cmd_per_lun = 3, - .use_clustering = ENABLE_CLUSTERING, .no_write_same = 1, }; @@ -1801,13 +1800,13 @@ ips_fill_scb_sg_single(ips_ha_t * ha, dma_addr_t busaddr, } if (IPS_USE_ENH_SGLIST(ha)) { scb->sg_list.enh_list[indx].address_lo = - cpu_to_le32(pci_dma_lo32(busaddr)); + cpu_to_le32(lower_32_bits(busaddr)); scb->sg_list.enh_list[indx].address_hi = - cpu_to_le32(pci_dma_hi32(busaddr)); + cpu_to_le32(upper_32_bits(busaddr)); scb->sg_list.enh_list[indx].length = cpu_to_le32(e_len); } else { scb->sg_list.std_list[indx].address = - cpu_to_le32(pci_dma_lo32(busaddr)); + cpu_to_le32(lower_32_bits(busaddr)); scb->sg_list.std_list[indx].length = cpu_to_le32(e_len); } @@ -6678,7 +6677,6 @@ ips_register_scsi(int index) sh->sg_tablesize = sh->hostt->sg_tablesize; sh->can_queue = sh->hostt->can_queue; sh->cmd_per_lun = sh->hostt->cmd_per_lun; - sh->use_clustering = sh->hostt->use_clustering; sh->max_sectors = 128; sh->max_id = ha->ntargets; @@ -6926,7 +6924,7 @@ ips_init_phase1(struct pci_dev *pci_dev, int *indexPtr) * it! Also, don't use 64bit addressing if dma addresses * are guaranteed to be < 4G. */ - if (IPS_ENABLE_DMA64 && IPS_HAS_ENH_SGLIST(ha) && + if (sizeof(dma_addr_t) > 4 && IPS_HAS_ENH_SGLIST(ha) && !dma_set_mask(&ha->pcidev->dev, DMA_BIT_MASK(64))) { (ha)->flags |= IPS_HA_ENH_SG; } else { diff --git a/drivers/scsi/ips.h b/drivers/scsi/ips.h index db546171e97f..6c0678fb9a67 100644 --- a/drivers/scsi/ips.h +++ b/drivers/scsi/ips.h @@ -96,15 +96,6 @@ #define __iomem #endif - #define pci_dma_hi32(a) ((a >> 16) >> 16) - #define pci_dma_lo32(a) (a & 0xffffffff) - - #if (BITS_PER_LONG > 32) || defined(CONFIG_HIGHMEM64G) - #define IPS_ENABLE_DMA64 (1) - #else - #define IPS_ENABLE_DMA64 (0) - #endif - /* * Adapter address map equates */ diff --git a/drivers/scsi/isci/init.c b/drivers/scsi/isci/init.c index 08c7b1e25fe4..68b90c4f79a3 100644 --- a/drivers/scsi/isci/init.c +++ b/drivers/scsi/isci/init.c @@ -163,7 +163,6 @@ static struct scsi_host_template isci_sht = { .this_id = -1, .sg_tablesize = SG_ALL, .max_sectors = SCSI_DEFAULT_MAX_SECTORS, - .use_clustering = ENABLE_CLUSTERING, .eh_abort_handler = sas_eh_abort_handler, .eh_device_reset_handler = sas_eh_device_reset_handler, .eh_target_reset_handler = sas_eh_target_reset_handler, @@ -304,21 +303,10 @@ static int isci_pci_init(struct pci_dev *pdev) pci_set_master(pdev); - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); - if (err) { - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); - if (err) - return err; - } - - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (err) { - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - if (err) - return err; - } - - return 0; + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + return err; } static int num_controllers(struct pci_dev *pdev) diff --git a/drivers/scsi/isci/phy.c b/drivers/scsi/isci/phy.c index 1deca8c5a94f..7f9b3f20e5e4 100644 --- a/drivers/scsi/isci/phy.c +++ b/drivers/scsi/isci/phy.c @@ -778,6 +778,7 @@ enum sci_status sci_phy_event_handler(struct isci_phy *iphy, u32 event_code) break; case SCU_EVENT_LINK_FAILURE: scu_link_layer_set_txcomsas_timeout(iphy, SCU_SAS_LINK_LAYER_TXCOMSAS_NEGTIME_DEFAULT); + /* fall through */ case SCU_EVENT_HARD_RESET_RECEIVED: /* Start the oob/sn state machine over again */ sci_change_state(&iphy->sm, SCI_PHY_STARTING); diff --git a/drivers/scsi/isci/remote_device.c b/drivers/scsi/isci/remote_device.c index cc51f38b116d..9d29edb9f590 100644 --- a/drivers/scsi/isci/remote_device.c +++ b/drivers/scsi/isci/remote_device.c @@ -310,7 +310,7 @@ static void isci_remote_device_not_ready(struct isci_host *ihost, /* Kill all outstanding requests for the device. */ sci_remote_device_terminate_requests(idev); - /* Fall through into the default case... */ + /* Fall through - into the default case... */ default: clear_bit(IDEV_IO_READY, &idev->flags); break; @@ -593,7 +593,7 @@ enum sci_status sci_remote_device_event_handler(struct isci_remote_device *idev, break; } - /* Else, fall through and treat as unhandled... */ + /* fall through - and treat as unhandled... */ default: dev_dbg(scirdev_to_dev(idev), "%s: device: %p event code: %x: %s\n", diff --git a/drivers/scsi/isci/remote_node_context.c b/drivers/scsi/isci/remote_node_context.c index e3f2a5359d71..474a43460963 100644 --- a/drivers/scsi/isci/remote_node_context.c +++ b/drivers/scsi/isci/remote_node_context.c @@ -601,9 +601,9 @@ enum sci_status sci_remote_node_context_suspend( __func__, sci_rnc); return SCI_FAILURE_INVALID_STATE; } - /* Fall through and handle like SCI_RNC_POSTING */ + /* Fall through - and handle like SCI_RNC_POSTING */ case SCI_RNC_RESUMING: - /* Fall through and handle like SCI_RNC_POSTING */ + /* Fall through - and handle like SCI_RNC_POSTING */ case SCI_RNC_POSTING: /* Set the destination state to AWAIT - this signals the * entry into the SCI_RNC_READY state that a suspension diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c index 2f151708b59a..1b18cf55167e 100644 --- a/drivers/scsi/isci/request.c +++ b/drivers/scsi/isci/request.c @@ -894,7 +894,7 @@ sci_io_request_terminate(struct isci_request *ireq) * and don't wait for the task response. */ sci_change_state(&ireq->sm, SCI_REQ_ABORTING); - /* Fall through and handle like ABORTING... */ + /* Fall through - and handle like ABORTING... */ case SCI_REQ_ABORTING: if (!isci_remote_device_is_safe_to_abort(ireq->target_device)) set_bit(IREQ_PENDING_ABORT, &ireq->flags); diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 23354f206533..cae6368ebb98 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -44,6 +44,7 @@ #include <scsi/scsi_host.h> #include <scsi/scsi.h> #include <scsi/scsi_transport_iscsi.h> +#include <trace/events/iscsi.h> #include "iscsi_tcp.h" @@ -72,6 +73,9 @@ MODULE_PARM_DESC(debug_iscsi_tcp, "Turn on debugging for iscsi_tcp module " iscsi_conn_printk(KERN_INFO, _conn, \ "%s " dbg_fmt, \ __func__, ##arg); \ + iscsi_dbg_trace(trace_iscsi_dbg_sw_tcp, \ + &(_conn)->cls_conn->dev, \ + "%s " dbg_fmt, __func__, ##arg);\ } while (0); @@ -980,7 +984,7 @@ static struct scsi_host_template iscsi_sw_tcp_sht = { .eh_abort_handler = iscsi_eh_abort, .eh_device_reset_handler= iscsi_eh_device_reset, .eh_target_reset_handler = iscsi_eh_recover_target, - .use_clustering = DISABLE_CLUSTERING, + .dma_boundary = PAGE_SIZE - 1, .slave_alloc = iscsi_sw_tcp_slave_alloc, .slave_configure = iscsi_sw_tcp_slave_configure, .target_alloc = iscsi_target_alloc, diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c index 1e1c0f1b9e69..9192a1d9dec6 100644 --- a/drivers/scsi/libfc/fc_rport.c +++ b/drivers/scsi/libfc/fc_rport.c @@ -860,7 +860,6 @@ static void fc_rport_enter_flogi(struct fc_rport_priv *rdata) static void fc_rport_recv_flogi_req(struct fc_lport *lport, struct fc_frame *rx_fp) { - struct fc_disc *disc; struct fc_els_flogi *flp; struct fc_rport_priv *rdata; struct fc_frame *fp = rx_fp; @@ -871,7 +870,6 @@ static void fc_rport_recv_flogi_req(struct fc_lport *lport, FC_RPORT_ID_DBG(lport, sid, "Received FLOGI request\n"); - disc = &lport->disc; if (!lport->point_to_multipoint) { rjt_data.reason = ELS_RJT_UNSUP; rjt_data.explan = ELS_EXPL_NONE; @@ -1724,6 +1722,7 @@ static void fc_rport_recv_els_req(struct fc_lport *lport, struct fc_frame *fp) kref_put(&rdata->kref, fc_rport_destroy); goto busy; } + /* fall through */ default: FC_RPORT_DBG(rdata, "Reject ELS 0x%02x while in state %s\n", diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index f78d2e5c1471..b8d325ce8754 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -40,6 +40,7 @@ #include <scsi/scsi_transport.h> #include <scsi/scsi_transport_iscsi.h> #include <scsi/libiscsi.h> +#include <trace/events/iscsi.h> static int iscsi_dbg_lib_conn; module_param_named(debug_libiscsi_conn, iscsi_dbg_lib_conn, int, @@ -68,6 +69,9 @@ MODULE_PARM_DESC(debug_libiscsi_eh, iscsi_conn_printk(KERN_INFO, _conn, \ "%s " dbg_fmt, \ __func__, ##arg); \ + iscsi_dbg_trace(trace_iscsi_dbg_conn, \ + &(_conn)->cls_conn->dev, \ + "%s " dbg_fmt, __func__, ##arg);\ } while (0); #define ISCSI_DBG_SESSION(_session, dbg_fmt, arg...) \ @@ -76,6 +80,9 @@ MODULE_PARM_DESC(debug_libiscsi_eh, iscsi_session_printk(KERN_INFO, _session, \ "%s " dbg_fmt, \ __func__, ##arg); \ + iscsi_dbg_trace(trace_iscsi_dbg_session, \ + &(_session)->cls_session->dev, \ + "%s " dbg_fmt, __func__, ##arg); \ } while (0); #define ISCSI_DBG_EH(_session, dbg_fmt, arg...) \ @@ -84,6 +91,9 @@ MODULE_PARM_DESC(debug_libiscsi_eh, iscsi_session_printk(KERN_INFO, _session, \ "%s " dbg_fmt, \ __func__, ##arg); \ + iscsi_dbg_trace(trace_iscsi_dbg_eh, \ + &(_session)->cls_session->dev, \ + "%s " dbg_fmt, __func__, ##arg); \ } while (0); inline void iscsi_conn_queue_work(struct iscsi_conn *conn) diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c index 4fcb9e65be57..8a6b1b3f8277 100644 --- a/drivers/scsi/libiscsi_tcp.c +++ b/drivers/scsi/libiscsi_tcp.c @@ -43,6 +43,7 @@ #include <scsi/scsi_host.h> #include <scsi/scsi.h> #include <scsi/scsi_transport_iscsi.h> +#include <trace/events/iscsi.h> #include "iscsi_tcp.h" @@ -65,6 +66,9 @@ MODULE_PARM_DESC(debug_libiscsi_tcp, "Turn on debugging for libiscsi_tcp " iscsi_conn_printk(KERN_INFO, _conn, \ "%s " dbg_fmt, \ __func__, ##arg); \ + iscsi_dbg_trace(trace_iscsi_dbg_tcp, \ + &(_conn)->cls_conn->dev, \ + "%s " dbg_fmt, __func__, ##arg);\ } while (0); static int iscsi_tcp_hdr_recv_done(struct iscsi_tcp_conn *tcp_conn, diff --git a/drivers/scsi/libsas/Makefile b/drivers/scsi/libsas/Makefile index 2e70140f70c3..5d51520c6f23 100644 --- a/drivers/scsi/libsas/Makefile +++ b/drivers/scsi/libsas/Makefile @@ -26,10 +26,11 @@ libsas-y += sas_init.o \ sas_phy.o \ sas_port.o \ sas_event.o \ - sas_dump.o \ sas_discover.o \ sas_expander.o \ sas_scsi_host.o \ sas_task.o libsas-$(CONFIG_SCSI_SAS_ATA) += sas_ata.o libsas-$(CONFIG_SCSI_SAS_HOST_SMP) += sas_host_smp.o + +ccflags-y := -DDEBUG diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 4f6cdf53e913..6f93fee2b21b 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -75,8 +75,8 @@ static enum ata_completion_errors sas_to_ata_err(struct task_status_struct *ts) case SAS_OPEN_TO: case SAS_OPEN_REJECT: - SAS_DPRINTK("%s: Saw error %d. What to do?\n", - __func__, ts->stat); + pr_warn("%s: Saw error %d. What to do?\n", + __func__, ts->stat); return AC_ERR_OTHER; case SAM_STAT_CHECK_CONDITION: @@ -151,8 +151,7 @@ static void sas_ata_task_done(struct sas_task *task) } else { ac = sas_to_ata_err(stat); if (ac) { - SAS_DPRINTK("%s: SAS error %x\n", __func__, - stat->stat); + pr_warn("%s: SAS error %x\n", __func__, stat->stat); /* We saw a SAS error. Send a vague error. */ if (!link->sactive) { qc->err_mask = ac; @@ -237,7 +236,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) ret = i->dft->lldd_execute_task(task, GFP_ATOMIC); if (ret) { - SAS_DPRINTK("lldd_execute_task returned: %d\n", ret); + pr_debug("lldd_execute_task returned: %d\n", ret); if (qc->scsicmd) ASSIGN_SAS_TASK(qc->scsicmd, NULL); @@ -282,9 +281,9 @@ int sas_get_ata_info(struct domain_device *dev, struct ex_phy *phy) res = sas_get_report_phy_sata(dev->parent, phy->phy_id, &dev->sata_dev.rps_resp); if (res) { - SAS_DPRINTK("report phy sata to %016llx:0x%x returned " - "0x%x\n", SAS_ADDR(dev->parent->sas_addr), - phy->phy_id, res); + pr_debug("report phy sata to %016llx:0x%x returned 0x%x\n", + SAS_ADDR(dev->parent->sas_addr), + phy->phy_id, res); return res; } memcpy(dev->frame_rcvd, &dev->sata_dev.rps_resp.rps.fis, @@ -375,7 +374,7 @@ static int sas_ata_printk(const char *level, const struct domain_device *ddev, vaf.fmt = fmt; vaf.va = &args; - r = printk("%ssas: ata%u: %s: %pV", + r = printk("%s" SAS_FMT "ata%u: %s: %pV", level, ap->print_id, dev_name(dev), &vaf); va_end(args); @@ -431,8 +430,7 @@ static void sas_ata_internal_abort(struct sas_task *task) if (task->task_state_flags & SAS_TASK_STATE_ABORTED || task->task_state_flags & SAS_TASK_STATE_DONE) { spin_unlock_irqrestore(&task->task_state_lock, flags); - SAS_DPRINTK("%s: Task %p already finished.\n", __func__, - task); + pr_debug("%s: Task %p already finished.\n", __func__, task); goto out; } task->task_state_flags |= SAS_TASK_STATE_ABORTED; @@ -452,7 +450,7 @@ static void sas_ata_internal_abort(struct sas_task *task) * aborted ata tasks, otherwise we (likely) leak the sas task * here */ - SAS_DPRINTK("%s: Task %p leaked.\n", __func__, task); + pr_warn("%s: Task %p leaked.\n", __func__, task); if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) task->task_state_flags &= ~SAS_TASK_STATE_ABORTED; @@ -558,7 +556,7 @@ int sas_ata_init(struct domain_device *found_dev) ata_host = kzalloc(sizeof(*ata_host), GFP_KERNEL); if (!ata_host) { - SAS_DPRINTK("ata host alloc failed.\n"); + pr_err("ata host alloc failed.\n"); return -ENOMEM; } @@ -566,7 +564,7 @@ int sas_ata_init(struct domain_device *found_dev) ap = ata_sas_port_alloc(ata_host, &sata_port_info, shost); if (!ap) { - SAS_DPRINTK("ata_sas_port_alloc failed.\n"); + pr_err("ata_sas_port_alloc failed.\n"); rc = -ENODEV; goto free_host; } @@ -601,12 +599,7 @@ void sas_ata_task_abort(struct sas_task *task) /* Bounce SCSI-initiated commands to the SCSI EH */ if (qc->scsicmd) { - struct request_queue *q = qc->scsicmd->device->request_queue; - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); blk_abort_request(qc->scsicmd->request); - spin_unlock_irqrestore(q->queue_lock, flags); return; } diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c index dde433aa59c2..726ada9b8c79 100644 --- a/drivers/scsi/libsas/sas_discover.c +++ b/drivers/scsi/libsas/sas_discover.c @@ -128,7 +128,7 @@ static int sas_get_port_device(struct asd_sas_port *port) SAS_FANOUT_EXPANDER_DEVICE); break; default: - printk("ERROR: Unidentified device type %d\n", dev->dev_type); + pr_warn("ERROR: Unidentified device type %d\n", dev->dev_type); rphy = NULL; break; } @@ -186,10 +186,9 @@ int sas_notify_lldd_dev_found(struct domain_device *dev) res = i->dft->lldd_dev_found(dev); if (res) { - printk("sas: driver on pcidev %s cannot handle " - "device %llx, error:%d\n", - dev_name(sas_ha->dev), - SAS_ADDR(dev->sas_addr), res); + pr_warn("driver on host %s cannot handle device %llx, error:%d\n", + dev_name(sas_ha->dev), + SAS_ADDR(dev->sas_addr), res); } set_bit(SAS_DEV_FOUND, &dev->state); kref_get(&dev->kref); @@ -456,8 +455,8 @@ static void sas_discover_domain(struct work_struct *work) return; dev = port->port_dev; - SAS_DPRINTK("DOING DISCOVERY on port %d, pid:%d\n", port->id, - task_pid_nr(current)); + pr_debug("DOING DISCOVERY on port %d, pid:%d\n", port->id, + task_pid_nr(current)); switch (dev->dev_type) { case SAS_END_DEVICE: @@ -473,12 +472,12 @@ static void sas_discover_domain(struct work_struct *work) error = sas_discover_sata(dev); break; #else - SAS_DPRINTK("ATA device seen but CONFIG_SCSI_SAS_ATA=N so cannot attach\n"); + pr_notice("ATA device seen but CONFIG_SCSI_SAS_ATA=N so cannot attach\n"); /* Fall through */ #endif default: error = -ENXIO; - SAS_DPRINTK("unhandled device %d\n", dev->dev_type); + pr_err("unhandled device %d\n", dev->dev_type); break; } @@ -495,8 +494,8 @@ static void sas_discover_domain(struct work_struct *work) sas_probe_devices(port); - SAS_DPRINTK("DONE DISCOVERY on port %d, pid:%d, result:%d\n", port->id, - task_pid_nr(current), error); + pr_debug("DONE DISCOVERY on port %d, pid:%d, result:%d\n", port->id, + task_pid_nr(current), error); } static void sas_revalidate_domain(struct work_struct *work) @@ -510,22 +509,22 @@ static void sas_revalidate_domain(struct work_struct *work) /* prevent revalidation from finding sata links in recovery */ mutex_lock(&ha->disco_mutex); if (test_bit(SAS_HA_ATA_EH_ACTIVE, &ha->state)) { - SAS_DPRINTK("REVALIDATION DEFERRED on port %d, pid:%d\n", - port->id, task_pid_nr(current)); + pr_debug("REVALIDATION DEFERRED on port %d, pid:%d\n", + port->id, task_pid_nr(current)); goto out; } clear_bit(DISCE_REVALIDATE_DOMAIN, &port->disc.pending); - SAS_DPRINTK("REVALIDATING DOMAIN on port %d, pid:%d\n", port->id, - task_pid_nr(current)); + pr_debug("REVALIDATING DOMAIN on port %d, pid:%d\n", port->id, + task_pid_nr(current)); if (ddev && (ddev->dev_type == SAS_FANOUT_EXPANDER_DEVICE || ddev->dev_type == SAS_EDGE_EXPANDER_DEVICE)) res = sas_ex_revalidate_domain(ddev); - SAS_DPRINTK("done REVALIDATING DOMAIN on port %d, pid:%d, res 0x%x\n", - port->id, task_pid_nr(current), res); + pr_debug("done REVALIDATING DOMAIN on port %d, pid:%d, res 0x%x\n", + port->id, task_pid_nr(current), res); out: mutex_unlock(&ha->disco_mutex); diff --git a/drivers/scsi/libsas/sas_dump.c b/drivers/scsi/libsas/sas_dump.c deleted file mode 100644 index 7e5d262e7a7d..000000000000 --- a/drivers/scsi/libsas/sas_dump.c +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Serial Attached SCSI (SAS) Dump/Debugging routines - * - * Copyright (C) 2005 Adaptec, Inc. All rights reserved. - * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com> - * - * This file is licensed under GPLv2. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -#include "sas_dump.h" - -static const char *sas_porte_str[] = { - [0] = "PORTE_BYTES_DMAED", - [1] = "PORTE_BROADCAST_RCVD", - [2] = "PORTE_LINK_RESET_ERR", - [3] = "PORTE_TIMER_EVENT", - [4] = "PORTE_HARD_RESET", -}; - -static const char *sas_phye_str[] = { - [0] = "PHYE_LOSS_OF_SIGNAL", - [1] = "PHYE_OOB_DONE", - [2] = "PHYE_OOB_ERROR", - [3] = "PHYE_SPINUP_HOLD", - [4] = "PHYE_RESUME_TIMEOUT", -}; - -void sas_dprint_porte(int phyid, enum port_event pe) -{ - SAS_DPRINTK("phy%d: port event: %s\n", phyid, sas_porte_str[pe]); -} -void sas_dprint_phye(int phyid, enum phy_event pe) -{ - SAS_DPRINTK("phy%d: phy event: %s\n", phyid, sas_phye_str[pe]); -} - -void sas_dump_port(struct asd_sas_port *port) -{ - SAS_DPRINTK("port%d: class:0x%x\n", port->id, port->class); - SAS_DPRINTK("port%d: sas_addr:%llx\n", port->id, - SAS_ADDR(port->sas_addr)); - SAS_DPRINTK("port%d: attached_sas_addr:%llx\n", port->id, - SAS_ADDR(port->attached_sas_addr)); - SAS_DPRINTK("port%d: iproto:0x%x\n", port->id, port->iproto); - SAS_DPRINTK("port%d: tproto:0x%x\n", port->id, port->tproto); - SAS_DPRINTK("port%d: oob_mode:0x%x\n", port->id, port->oob_mode); - SAS_DPRINTK("port%d: num_phys:%d\n", port->id, port->num_phys); -} diff --git a/drivers/scsi/libsas/sas_dump.h b/drivers/scsi/libsas/sas_dump.h deleted file mode 100644 index 6aaee6b0fcdb..000000000000 --- a/drivers/scsi/libsas/sas_dump.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Serial Attached SCSI (SAS) Dump/Debugging routines header file - * - * Copyright (C) 2005 Adaptec, Inc. All rights reserved. - * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com> - * - * This file is licensed under GPLv2. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -#include "sas_internal.h" - -void sas_dprint_porte(int phyid, enum port_event pe); -void sas_dprint_phye(int phyid, enum phy_event pe); -void sas_dump_port(struct asd_sas_port *port); diff --git a/drivers/scsi/libsas/sas_event.c b/drivers/scsi/libsas/sas_event.c index ae923eb6de95..b1e0f7d2b396 100644 --- a/drivers/scsi/libsas/sas_event.c +++ b/drivers/scsi/libsas/sas_event.c @@ -25,7 +25,6 @@ #include <linux/export.h> #include <scsi/scsi_host.h> #include "sas_internal.h" -#include "sas_dump.h" int sas_queue_work(struct sas_ha_struct *ha, struct sas_work *sw) { diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 0d1f72752ca2..17eb4185f29d 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -99,17 +99,17 @@ static int smp_execute_task_sg(struct domain_device *dev, if (res) { del_timer(&task->slow_task->timer); - SAS_DPRINTK("executing SMP task failed:%d\n", res); + pr_notice("executing SMP task failed:%d\n", res); break; } wait_for_completion(&task->slow_task->completion); res = -ECOMM; if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) { - SAS_DPRINTK("smp task timed out or aborted\n"); + pr_notice("smp task timed out or aborted\n"); i->dft->lldd_abort_task(task); if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) { - SAS_DPRINTK("SMP task aborted and not done\n"); + pr_notice("SMP task aborted and not done\n"); break; } } @@ -134,11 +134,11 @@ static int smp_execute_task_sg(struct domain_device *dev, task->task_status.stat == SAS_DEVICE_UNKNOWN) break; else { - SAS_DPRINTK("%s: task to dev %016llx response: 0x%x " - "status 0x%x\n", __func__, - SAS_ADDR(dev->sas_addr), - task->task_status.resp, - task->task_status.stat); + pr_notice("%s: task to dev %016llx response: 0x%x status 0x%x\n", + __func__, + SAS_ADDR(dev->sas_addr), + task->task_status.resp, + task->task_status.stat); sas_free_task(task); task = NULL; } @@ -347,11 +347,11 @@ static void sas_set_ex_phy(struct domain_device *dev, int phy_id, void *rsp) if (test_bit(SAS_HA_ATA_EH_ACTIVE, &ha->state)) set_bit(DISCE_REVALIDATE_DOMAIN, &dev->port->disc.pending); - SAS_DPRINTK("%sex %016llx phy%02d:%c:%X attached: %016llx (%s)\n", - test_bit(SAS_HA_ATA_EH_ACTIVE, &ha->state) ? "ata: " : "", - SAS_ADDR(dev->sas_addr), phy->phy_id, - sas_route_char(dev, phy), phy->linkrate, - SAS_ADDR(phy->attached_sas_addr), type); + pr_debug("%sex %016llx phy%02d:%c:%X attached: %016llx (%s)\n", + test_bit(SAS_HA_ATA_EH_ACTIVE, &ha->state) ? "ata: " : "", + SAS_ADDR(dev->sas_addr), phy->phy_id, + sas_route_char(dev, phy), phy->linkrate, + SAS_ADDR(phy->attached_sas_addr), type); } /* check if we have an existing attached ata device on this expander phy */ @@ -393,7 +393,7 @@ static int sas_ex_phy_discover_helper(struct domain_device *dev, u8 *disc_req, return res; dr = &((struct smp_resp *)disc_resp)->disc; if (memcmp(dev->sas_addr, dr->attached_sas_addr, SAS_ADDR_SIZE) == 0) { - sas_printk("Found loopback topology, just ignore it!\n"); + pr_notice("Found loopback topology, just ignore it!\n"); return 0; } sas_set_ex_phy(dev, single, disc_resp); @@ -500,12 +500,12 @@ static int sas_ex_general(struct domain_device *dev) RG_RESP_SIZE); if (res) { - SAS_DPRINTK("RG to ex %016llx failed:0x%x\n", - SAS_ADDR(dev->sas_addr), res); + pr_notice("RG to ex %016llx failed:0x%x\n", + SAS_ADDR(dev->sas_addr), res); goto out; } else if (rg_resp->result != SMP_RESP_FUNC_ACC) { - SAS_DPRINTK("RG:ex %016llx returned SMP result:0x%x\n", - SAS_ADDR(dev->sas_addr), rg_resp->result); + pr_debug("RG:ex %016llx returned SMP result:0x%x\n", + SAS_ADDR(dev->sas_addr), rg_resp->result); res = rg_resp->result; goto out; } @@ -513,8 +513,8 @@ static int sas_ex_general(struct domain_device *dev) ex_assign_report_general(dev, rg_resp); if (dev->ex_dev.configuring) { - SAS_DPRINTK("RG: ex %llx self-configuring...\n", - SAS_ADDR(dev->sas_addr)); + pr_debug("RG: ex %llx self-configuring...\n", + SAS_ADDR(dev->sas_addr)); schedule_timeout_interruptible(5*HZ); } else break; @@ -568,12 +568,12 @@ static int sas_ex_manuf_info(struct domain_device *dev) res = smp_execute_task(dev, mi_req, MI_REQ_SIZE, mi_resp,MI_RESP_SIZE); if (res) { - SAS_DPRINTK("MI: ex %016llx failed:0x%x\n", - SAS_ADDR(dev->sas_addr), res); + pr_notice("MI: ex %016llx failed:0x%x\n", + SAS_ADDR(dev->sas_addr), res); goto out; } else if (mi_resp[2] != SMP_RESP_FUNC_ACC) { - SAS_DPRINTK("MI ex %016llx returned SMP result:0x%x\n", - SAS_ADDR(dev->sas_addr), mi_resp[2]); + pr_debug("MI ex %016llx returned SMP result:0x%x\n", + SAS_ADDR(dev->sas_addr), mi_resp[2]); goto out; } @@ -836,10 +836,9 @@ static struct domain_device *sas_ex_discover_end_dev( res = sas_discover_sata(child); if (res) { - SAS_DPRINTK("sas_discover_sata() for device %16llx at " - "%016llx:0x%x returned 0x%x\n", - SAS_ADDR(child->sas_addr), - SAS_ADDR(parent->sas_addr), phy_id, res); + pr_notice("sas_discover_sata() for device %16llx at %016llx:0x%x returned 0x%x\n", + SAS_ADDR(child->sas_addr), + SAS_ADDR(parent->sas_addr), phy_id, res); goto out_list_del; } } else @@ -861,16 +860,15 @@ static struct domain_device *sas_ex_discover_end_dev( res = sas_discover_end_dev(child); if (res) { - SAS_DPRINTK("sas_discover_end_dev() for device %16llx " - "at %016llx:0x%x returned 0x%x\n", - SAS_ADDR(child->sas_addr), - SAS_ADDR(parent->sas_addr), phy_id, res); + pr_notice("sas_discover_end_dev() for device %16llx at %016llx:0x%x returned 0x%x\n", + SAS_ADDR(child->sas_addr), + SAS_ADDR(parent->sas_addr), phy_id, res); goto out_list_del; } } else { - SAS_DPRINTK("target proto 0x%x at %016llx:0x%x not handled\n", - phy->attached_tproto, SAS_ADDR(parent->sas_addr), - phy_id); + pr_notice("target proto 0x%x at %016llx:0x%x not handled\n", + phy->attached_tproto, SAS_ADDR(parent->sas_addr), + phy_id); goto out_free; } @@ -927,11 +925,10 @@ static struct domain_device *sas_ex_discover_expander( int res; if (phy->routing_attr == DIRECT_ROUTING) { - SAS_DPRINTK("ex %016llx:0x%x:D <--> ex %016llx:0x%x is not " - "allowed\n", - SAS_ADDR(parent->sas_addr), phy_id, - SAS_ADDR(phy->attached_sas_addr), - phy->attached_phy_id); + pr_warn("ex %016llx:0x%x:D <--> ex %016llx:0x%x is not allowed\n", + SAS_ADDR(parent->sas_addr), phy_id, + SAS_ADDR(phy->attached_sas_addr), + phy->attached_phy_id); return NULL; } child = sas_alloc_device(); @@ -1038,25 +1035,24 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id) ex_phy->attached_dev_type != SAS_FANOUT_EXPANDER_DEVICE && ex_phy->attached_dev_type != SAS_EDGE_EXPANDER_DEVICE && ex_phy->attached_dev_type != SAS_SATA_PENDING) { - SAS_DPRINTK("unknown device type(0x%x) attached to ex %016llx " - "phy 0x%x\n", ex_phy->attached_dev_type, - SAS_ADDR(dev->sas_addr), - phy_id); + pr_warn("unknown device type(0x%x) attached to ex %016llx phy 0x%x\n", + ex_phy->attached_dev_type, + SAS_ADDR(dev->sas_addr), + phy_id); return 0; } res = sas_configure_routing(dev, ex_phy->attached_sas_addr); if (res) { - SAS_DPRINTK("configure routing for dev %016llx " - "reported 0x%x. Forgotten\n", - SAS_ADDR(ex_phy->attached_sas_addr), res); + pr_notice("configure routing for dev %016llx reported 0x%x. Forgotten\n", + SAS_ADDR(ex_phy->attached_sas_addr), res); sas_disable_routing(dev, ex_phy->attached_sas_addr); return res; } if (sas_ex_join_wide_port(dev, phy_id)) { - SAS_DPRINTK("Attaching ex phy%d to wide port %016llx\n", - phy_id, SAS_ADDR(ex_phy->attached_sas_addr)); + pr_debug("Attaching ex phy%d to wide port %016llx\n", + phy_id, SAS_ADDR(ex_phy->attached_sas_addr)); return res; } @@ -1067,12 +1063,11 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id) break; case SAS_FANOUT_EXPANDER_DEVICE: if (SAS_ADDR(dev->port->disc.fanout_sas_addr)) { - SAS_DPRINTK("second fanout expander %016llx phy 0x%x " - "attached to ex %016llx phy 0x%x\n", - SAS_ADDR(ex_phy->attached_sas_addr), - ex_phy->attached_phy_id, - SAS_ADDR(dev->sas_addr), - phy_id); + pr_debug("second fanout expander %016llx phy 0x%x attached to ex %016llx phy 0x%x\n", + SAS_ADDR(ex_phy->attached_sas_addr), + ex_phy->attached_phy_id, + SAS_ADDR(dev->sas_addr), + phy_id); sas_ex_disable_phy(dev, phy_id); break; } else @@ -1101,9 +1096,8 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id) SAS_ADDR(child->sas_addr)) { ex->ex_phy[i].phy_state= PHY_DEVICE_DISCOVERED; if (sas_ex_join_wide_port(dev, i)) - SAS_DPRINTK("Attaching ex phy%d to wide port %016llx\n", - i, SAS_ADDR(ex->ex_phy[i].attached_sas_addr)); - + pr_debug("Attaching ex phy%d to wide port %016llx\n", + i, SAS_ADDR(ex->ex_phy[i].attached_sas_addr)); } } } @@ -1154,13 +1148,11 @@ static int sas_check_level_subtractive_boundary(struct domain_device *dev) if (sas_find_sub_addr(child, s2) && (SAS_ADDR(sub_addr) != SAS_ADDR(s2))) { - SAS_DPRINTK("ex %016llx->%016llx-?->%016llx " - "diverges from subtractive " - "boundary %016llx\n", - SAS_ADDR(dev->sas_addr), - SAS_ADDR(child->sas_addr), - SAS_ADDR(s2), - SAS_ADDR(sub_addr)); + pr_notice("ex %016llx->%016llx-?->%016llx diverges from subtractive boundary %016llx\n", + SAS_ADDR(dev->sas_addr), + SAS_ADDR(child->sas_addr), + SAS_ADDR(s2), + SAS_ADDR(sub_addr)); sas_ex_disable_port(child, s2); } @@ -1239,12 +1231,10 @@ static int sas_check_ex_subtractive_boundary(struct domain_device *dev) else if (SAS_ADDR(sub_sas_addr) != SAS_ADDR(phy->attached_sas_addr)) { - SAS_DPRINTK("ex %016llx phy 0x%x " - "diverges(%016llx) on subtractive " - "boundary(%016llx). Disabled\n", - SAS_ADDR(dev->sas_addr), i, - SAS_ADDR(phy->attached_sas_addr), - SAS_ADDR(sub_sas_addr)); + pr_notice("ex %016llx phy 0x%x diverges(%016llx) on subtractive boundary(%016llx). Disabled\n", + SAS_ADDR(dev->sas_addr), i, + SAS_ADDR(phy->attached_sas_addr), + SAS_ADDR(sub_sas_addr)); sas_ex_disable_phy(dev, i); } } @@ -1262,19 +1252,17 @@ static void sas_print_parent_topology_bug(struct domain_device *child, }; struct domain_device *parent = child->parent; - sas_printk("%s ex %016llx phy 0x%x <--> %s ex %016llx " - "phy 0x%x has %c:%c routing link!\n", - - ex_type[parent->dev_type], - SAS_ADDR(parent->sas_addr), - parent_phy->phy_id, + pr_notice("%s ex %016llx phy 0x%x <--> %s ex %016llx phy 0x%x has %c:%c routing link!\n", + ex_type[parent->dev_type], + SAS_ADDR(parent->sas_addr), + parent_phy->phy_id, - ex_type[child->dev_type], - SAS_ADDR(child->sas_addr), - child_phy->phy_id, + ex_type[child->dev_type], + SAS_ADDR(child->sas_addr), + child_phy->phy_id, - sas_route_char(parent, parent_phy), - sas_route_char(child, child_phy)); + sas_route_char(parent, parent_phy), + sas_route_char(child, child_phy)); } static int sas_check_eeds(struct domain_device *child, @@ -1286,13 +1274,12 @@ static int sas_check_eeds(struct domain_device *child, if (SAS_ADDR(parent->port->disc.fanout_sas_addr) != 0) { res = -ENODEV; - SAS_DPRINTK("edge ex %016llx phy S:0x%x <--> edge ex %016llx " - "phy S:0x%x, while there is a fanout ex %016llx\n", - SAS_ADDR(parent->sas_addr), - parent_phy->phy_id, - SAS_ADDR(child->sas_addr), - child_phy->phy_id, - SAS_ADDR(parent->port->disc.fanout_sas_addr)); + pr_warn("edge ex %016llx phy S:0x%x <--> edge ex %016llx phy S:0x%x, while there is a fanout ex %016llx\n", + SAS_ADDR(parent->sas_addr), + parent_phy->phy_id, + SAS_ADDR(child->sas_addr), + child_phy->phy_id, + SAS_ADDR(parent->port->disc.fanout_sas_addr)); } else if (SAS_ADDR(parent->port->disc.eeds_a) == 0) { memcpy(parent->port->disc.eeds_a, parent->sas_addr, SAS_ADDR_SIZE); @@ -1310,12 +1297,11 @@ static int sas_check_eeds(struct domain_device *child, ; else { res = -ENODEV; - SAS_DPRINTK("edge ex %016llx phy 0x%x <--> edge ex %016llx " - "phy 0x%x link forms a third EEDS!\n", - SAS_ADDR(parent->sas_addr), - parent_phy->phy_id, - SAS_ADDR(child->sas_addr), - child_phy->phy_id); + pr_warn("edge ex %016llx phy 0x%x <--> edge ex %016llx phy 0x%x link forms a third EEDS!\n", + SAS_ADDR(parent->sas_addr), + parent_phy->phy_id, + SAS_ADDR(child->sas_addr), + child_phy->phy_id); } return res; @@ -1429,14 +1415,13 @@ static int sas_configure_present(struct domain_device *dev, int phy_id, goto out; res = rri_resp[2]; if (res == SMP_RESP_NO_INDEX) { - SAS_DPRINTK("overflow of indexes: dev %016llx " - "phy 0x%x index 0x%x\n", - SAS_ADDR(dev->sas_addr), phy_id, i); + pr_warn("overflow of indexes: dev %016llx phy 0x%x index 0x%x\n", + SAS_ADDR(dev->sas_addr), phy_id, i); goto out; } else if (res != SMP_RESP_FUNC_ACC) { - SAS_DPRINTK("%s: dev %016llx phy 0x%x index 0x%x " - "result 0x%x\n", __func__, - SAS_ADDR(dev->sas_addr), phy_id, i, res); + pr_notice("%s: dev %016llx phy 0x%x index 0x%x result 0x%x\n", + __func__, SAS_ADDR(dev->sas_addr), phy_id, + i, res); goto out; } if (SAS_ADDR(sas_addr) != 0) { @@ -1500,9 +1485,8 @@ static int sas_configure_set(struct domain_device *dev, int phy_id, goto out; res = cri_resp[2]; if (res == SMP_RESP_NO_INDEX) { - SAS_DPRINTK("overflow of indexes: dev %016llx phy 0x%x " - "index 0x%x\n", - SAS_ADDR(dev->sas_addr), phy_id, index); + pr_warn("overflow of indexes: dev %016llx phy 0x%x index 0x%x\n", + SAS_ADDR(dev->sas_addr), phy_id, index); } out: kfree(cri_req); @@ -1549,8 +1533,8 @@ static int sas_configure_parent(struct domain_device *parent, } if (ex_parent->conf_route_table == 0) { - SAS_DPRINTK("ex %016llx has self-configuring routing table\n", - SAS_ADDR(parent->sas_addr)); + pr_debug("ex %016llx has self-configuring routing table\n", + SAS_ADDR(parent->sas_addr)); return 0; } @@ -1611,8 +1595,8 @@ static int sas_discover_expander(struct domain_device *dev) res = sas_expander_discover(dev); if (res) { - SAS_DPRINTK("expander %016llx discovery failed(0x%x)\n", - SAS_ADDR(dev->sas_addr), res); + pr_warn("expander %016llx discovery failed(0x%x)\n", + SAS_ADDR(dev->sas_addr), res); goto out_err; } @@ -1856,10 +1840,10 @@ static int sas_find_bcast_dev(struct domain_device *dev, if (phy_id != -1) { *src_dev = dev; ex->ex_change_count = ex_change_count; - SAS_DPRINTK("Expander phy change count has changed\n"); + pr_info("Expander phy change count has changed\n"); return res; } else - SAS_DPRINTK("Expander phys DID NOT change\n"); + pr_info("Expander phys DID NOT change\n"); } list_for_each_entry(ch, &ex->children, siblings) { if (ch->dev_type == SAS_EDGE_EXPANDER_DEVICE || ch->dev_type == SAS_FANOUT_EXPANDER_DEVICE) { @@ -1969,8 +1953,8 @@ static int sas_discover_new(struct domain_device *dev, int phy_id) struct domain_device *child; int res; - SAS_DPRINTK("ex %016llx phy%d new device attached\n", - SAS_ADDR(dev->sas_addr), phy_id); + pr_debug("ex %016llx phy%d new device attached\n", + SAS_ADDR(dev->sas_addr), phy_id); res = sas_ex_phy_discover(dev, phy_id); if (res) return res; @@ -2048,15 +2032,15 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, bool last) if (ata_dev && phy->attached_dev_type == SAS_SATA_PENDING) action = ", needs recovery"; - SAS_DPRINTK("ex %016llx phy 0x%x broadcast flutter%s\n", - SAS_ADDR(dev->sas_addr), phy_id, action); + pr_debug("ex %016llx phy 0x%x broadcast flutter%s\n", + SAS_ADDR(dev->sas_addr), phy_id, action); return res; } /* we always have to delete the old device when we went here */ - SAS_DPRINTK("ex %016llx phy 0x%x replace %016llx\n", - SAS_ADDR(dev->sas_addr), phy_id, - SAS_ADDR(phy->attached_sas_addr)); + pr_info("ex %016llx phy 0x%x replace %016llx\n", + SAS_ADDR(dev->sas_addr), phy_id, + SAS_ADDR(phy->attached_sas_addr)); sas_unregister_devs_sas_addr(dev, phy_id, last); return sas_discover_new(dev, phy_id); @@ -2084,8 +2068,8 @@ static int sas_rediscover(struct domain_device *dev, const int phy_id) int i; bool last = true; /* is this the last phy of the port */ - SAS_DPRINTK("ex %016llx phy%d originated BROADCAST(CHANGE)\n", - SAS_ADDR(dev->sas_addr), phy_id); + pr_debug("ex %016llx phy%d originated BROADCAST(CHANGE)\n", + SAS_ADDR(dev->sas_addr), phy_id); if (SAS_ADDR(changed_phy->attached_sas_addr) != 0) { for (i = 0; i < ex->num_phys; i++) { @@ -2095,8 +2079,8 @@ static int sas_rediscover(struct domain_device *dev, const int phy_id) continue; if (SAS_ADDR(phy->attached_sas_addr) == SAS_ADDR(changed_phy->attached_sas_addr)) { - SAS_DPRINTK("phy%d part of wide port with " - "phy%d\n", phy_id, i); + pr_debug("phy%d part of wide port with phy%d\n", + phy_id, i); last = false; break; } @@ -2154,23 +2138,23 @@ void sas_smp_handler(struct bsg_job *job, struct Scsi_Host *shost, case SAS_FANOUT_EXPANDER_DEVICE: break; default: - printk("%s: can we send a smp request to a device?\n", + pr_err("%s: can we send a smp request to a device?\n", __func__); goto out; } dev = sas_find_dev_by_rphy(rphy); if (!dev) { - printk("%s: fail to find a domain_device?\n", __func__); + pr_err("%s: fail to find a domain_device?\n", __func__); goto out; } /* do we need to support multiple segments? */ if (job->request_payload.sg_cnt > 1 || job->reply_payload.sg_cnt > 1) { - printk("%s: multiple segments req %u, rsp %u\n", - __func__, job->request_payload.payload_len, - job->reply_payload.payload_len); + pr_info("%s: multiple segments req %u, rsp %u\n", + __func__, job->request_payload.payload_len, + job->reply_payload.payload_len); goto out; } diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c index ede0af78144f..221340ee8651 100644 --- a/drivers/scsi/libsas/sas_init.c +++ b/drivers/scsi/libsas/sas_init.c @@ -128,19 +128,19 @@ int sas_register_ha(struct sas_ha_struct *sas_ha) error = sas_register_phys(sas_ha); if (error) { - printk(KERN_NOTICE "couldn't register sas phys:%d\n", error); + pr_notice("couldn't register sas phys:%d\n", error); return error; } error = sas_register_ports(sas_ha); if (error) { - printk(KERN_NOTICE "couldn't register sas ports:%d\n", error); + pr_notice("couldn't register sas ports:%d\n", error); goto Undo_phys; } error = sas_init_events(sas_ha); if (error) { - printk(KERN_NOTICE "couldn't start event thread:%d\n", error); + pr_notice("couldn't start event thread:%d\n", error); goto Undo_ports; } @@ -623,8 +623,8 @@ struct asd_sas_event *sas_alloc_event(struct asd_sas_phy *phy) if (atomic_read(&phy->event_nr) > phy->ha->event_thres) { if (i->dft->lldd_control_phy) { if (cmpxchg(&phy->in_shutdown, 0, 1) == 0) { - sas_printk("The phy%02d bursting events, shut it down.\n", - phy->id); + pr_notice("The phy%02d bursting events, shut it down.\n", + phy->id); sas_notify_phy_event(phy, PHYE_SHUTDOWN); } } else { diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h index 50e12d662ffe..2cdb981cf476 100644 --- a/drivers/scsi/libsas/sas_internal.h +++ b/drivers/scsi/libsas/sas_internal.h @@ -32,9 +32,13 @@ #include <scsi/libsas.h> #include <scsi/sas_ata.h> -#define sas_printk(fmt, ...) printk(KERN_NOTICE "sas: " fmt, ## __VA_ARGS__) +#ifdef pr_fmt +#undef pr_fmt +#endif + +#define SAS_FMT "sas: " -#define SAS_DPRINTK(fmt, ...) printk(KERN_DEBUG "sas: " fmt, ## __VA_ARGS__) +#define pr_fmt(fmt) SAS_FMT fmt #define TO_SAS_TASK(_scsi_cmd) ((void *)(_scsi_cmd)->host_scribble) #define ASSIGN_SAS_TASK(_sc, _t) do { (_sc)->host_scribble = (void *) _t; } while (0) @@ -120,10 +124,10 @@ static inline void sas_smp_host_handler(struct bsg_job *job, static inline void sas_fail_probe(struct domain_device *dev, const char *func, int err) { - SAS_DPRINTK("%s: for %s device %16llx returned %d\n", - func, dev->parent ? "exp-attached" : - "direct-attached", - SAS_ADDR(dev->sas_addr), err); + pr_warn("%s: for %s device %16llx returned %d\n", + func, dev->parent ? "exp-attached" : + "direct-attached", + SAS_ADDR(dev->sas_addr), err); sas_unregister_dev(dev->port, dev); } diff --git a/drivers/scsi/libsas/sas_phy.c b/drivers/scsi/libsas/sas_phy.c index bf3e1b979ca6..0374243c85d0 100644 --- a/drivers/scsi/libsas/sas_phy.c +++ b/drivers/scsi/libsas/sas_phy.c @@ -122,11 +122,11 @@ static void sas_phye_shutdown(struct work_struct *work) phy->enabled = 0; ret = i->dft->lldd_control_phy(phy, PHY_FUNC_DISABLE, NULL); if (ret) - sas_printk("lldd disable phy%02d returned %d\n", - phy->id, ret); + pr_notice("lldd disable phy%02d returned %d\n", + phy->id, ret); } else - sas_printk("phy%02d is not enabled, cannot shutdown\n", - phy->id); + pr_notice("phy%02d is not enabled, cannot shutdown\n", + phy->id); } /* ---------- Phy class registration ---------- */ diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c index fad23dd39114..03fe479359b6 100644 --- a/drivers/scsi/libsas/sas_port.c +++ b/drivers/scsi/libsas/sas_port.c @@ -110,9 +110,9 @@ static void sas_form_port(struct asd_sas_phy *phy) wake_up(&sas_ha->eh_wait_q); return; } else { - SAS_DPRINTK("%s: phy%d belongs to port%d already(%d)!\n", - __func__, phy->id, phy->port->id, - phy->port->num_phys); + pr_info("%s: phy%d belongs to port%d already(%d)!\n", + __func__, phy->id, phy->port->id, + phy->port->num_phys); return; } } @@ -125,8 +125,8 @@ static void sas_form_port(struct asd_sas_phy *phy) if (*(u64 *) port->sas_addr && phy_is_wideport_member(port, phy) && port->num_phys > 0) { /* wide port */ - SAS_DPRINTK("phy%d matched wide port%d\n", phy->id, - port->id); + pr_debug("phy%d matched wide port%d\n", phy->id, + port->id); break; } spin_unlock(&port->phy_list_lock); @@ -147,8 +147,7 @@ static void sas_form_port(struct asd_sas_phy *phy) } if (i >= sas_ha->num_phys) { - printk(KERN_NOTICE "%s: couldn't find a free port, bug?\n", - __func__); + pr_err("%s: couldn't find a free port, bug?\n", __func__); spin_unlock_irqrestore(&sas_ha->phy_port_lock, flags); return; } @@ -180,10 +179,10 @@ static void sas_form_port(struct asd_sas_phy *phy) } sas_port_add_phy(port->port, phy->phy); - SAS_DPRINTK("%s added to %s, phy_mask:0x%x (%16llx)\n", - dev_name(&phy->phy->dev), dev_name(&port->port->dev), - port->phy_mask, - SAS_ADDR(port->attached_sas_addr)); + pr_debug("%s added to %s, phy_mask:0x%x (%16llx)\n", + dev_name(&phy->phy->dev), dev_name(&port->port->dev), + port->phy_mask, + SAS_ADDR(port->attached_sas_addr)); if (port->port_dev) port->port_dev->pathways = port->num_phys; @@ -279,7 +278,7 @@ void sas_porte_broadcast_rcvd(struct work_struct *work) prim = phy->sas_prim; spin_unlock_irqrestore(&phy->sas_prim_lock, flags); - SAS_DPRINTK("broadcast received: %d\n", prim); + pr_debug("broadcast received: %d\n", prim); sas_discover_event(phy->port, DISCE_REVALIDATE_DOMAIN); if (phy->port) diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 33229348dcb6..c43a00a9d819 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -93,9 +93,8 @@ static void sas_end_task(struct scsi_cmnd *sc, struct sas_task *task) hs = DID_ERROR; break; case SAS_PROTO_RESPONSE: - SAS_DPRINTK("LLDD:%s sent SAS_PROTO_RESP for an SSP " - "task; please report this\n", - task->dev->port->ha->sas_ha_name); + pr_notice("LLDD:%s sent SAS_PROTO_RESP for an SSP task; please report this\n", + task->dev->port->ha->sas_ha_name); break; case SAS_ABORTED_TASK: hs = DID_ABORT; @@ -132,12 +131,12 @@ static void sas_scsi_task_done(struct sas_task *task) if (unlikely(!task)) { /* task will be completed by the error handler */ - SAS_DPRINTK("task done but aborted\n"); + pr_debug("task done but aborted\n"); return; } if (unlikely(!sc)) { - SAS_DPRINTK("task_done called with non existing SCSI cmnd!\n"); + pr_debug("task_done called with non existing SCSI cmnd!\n"); sas_free_task(task); return; } @@ -208,7 +207,7 @@ int sas_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) return 0; out_free_task: - SAS_DPRINTK("lldd_execute_task returned: %d\n", res); + pr_debug("lldd_execute_task returned: %d\n", res); ASSIGN_SAS_TASK(cmd, NULL); sas_free_task(task); if (res == -SAS_QUEUE_FULL) @@ -301,40 +300,38 @@ static enum task_disposition sas_scsi_find_task(struct sas_task *task) to_sas_internal(task->dev->port->ha->core.shost->transportt); for (i = 0; i < 5; i++) { - SAS_DPRINTK("%s: aborting task 0x%p\n", __func__, task); + pr_notice("%s: aborting task 0x%p\n", __func__, task); res = si->dft->lldd_abort_task(task); spin_lock_irqsave(&task->task_state_lock, flags); if (task->task_state_flags & SAS_TASK_STATE_DONE) { spin_unlock_irqrestore(&task->task_state_lock, flags); - SAS_DPRINTK("%s: task 0x%p is done\n", __func__, - task); + pr_debug("%s: task 0x%p is done\n", __func__, task); return TASK_IS_DONE; } spin_unlock_irqrestore(&task->task_state_lock, flags); if (res == TMF_RESP_FUNC_COMPLETE) { - SAS_DPRINTK("%s: task 0x%p is aborted\n", - __func__, task); + pr_notice("%s: task 0x%p is aborted\n", + __func__, task); return TASK_IS_ABORTED; } else if (si->dft->lldd_query_task) { - SAS_DPRINTK("%s: querying task 0x%p\n", - __func__, task); + pr_notice("%s: querying task 0x%p\n", __func__, task); res = si->dft->lldd_query_task(task); switch (res) { case TMF_RESP_FUNC_SUCC: - SAS_DPRINTK("%s: task 0x%p at LU\n", - __func__, task); + pr_notice("%s: task 0x%p at LU\n", __func__, + task); return TASK_IS_AT_LU; case TMF_RESP_FUNC_COMPLETE: - SAS_DPRINTK("%s: task 0x%p not at LU\n", - __func__, task); + pr_notice("%s: task 0x%p not at LU\n", + __func__, task); return TASK_IS_NOT_AT_LU; case TMF_RESP_FUNC_FAILED: - SAS_DPRINTK("%s: task 0x%p failed to abort\n", - __func__, task); - return TASK_ABORT_FAILED; - } + pr_notice("%s: task 0x%p failed to abort\n", + __func__, task); + return TASK_ABORT_FAILED; + } } } @@ -350,9 +347,9 @@ static int sas_recover_lu(struct domain_device *dev, struct scsi_cmnd *cmd) int_to_scsilun(cmd->device->lun, &lun); - SAS_DPRINTK("eh: device %llx LUN %llx has the task\n", - SAS_ADDR(dev->sas_addr), - cmd->device->lun); + pr_notice("eh: device %llx LUN %llx has the task\n", + SAS_ADDR(dev->sas_addr), + cmd->device->lun); if (i->dft->lldd_abort_task_set) res = i->dft->lldd_abort_task_set(dev, lun.scsi_lun); @@ -376,8 +373,8 @@ static int sas_recover_I_T(struct domain_device *dev) struct sas_internal *i = to_sas_internal(dev->port->ha->core.shost->transportt); - SAS_DPRINTK("I_T nexus reset for dev %016llx\n", - SAS_ADDR(dev->sas_addr)); + pr_notice("I_T nexus reset for dev %016llx\n", + SAS_ADDR(dev->sas_addr)); if (i->dft->lldd_I_T_nexus_reset) res = i->dft->lldd_I_T_nexus_reset(dev); @@ -471,9 +468,9 @@ static int sas_queue_reset(struct domain_device *dev, int reset_type, return SUCCESS; } - SAS_DPRINTK("%s reset of %s failed\n", - reset_type == SAS_DEV_LU_RESET ? "LUN" : "Bus", - dev_name(&dev->rphy->dev)); + pr_warn("%s reset of %s failed\n", + reset_type == SAS_DEV_LU_RESET ? "LUN" : "Bus", + dev_name(&dev->rphy->dev)); return FAILED; } @@ -501,7 +498,7 @@ int sas_eh_abort_handler(struct scsi_cmnd *cmd) if (task) res = i->dft->lldd_abort_task(task); else - SAS_DPRINTK("no task to abort\n"); + pr_notice("no task to abort\n"); if (res == TMF_RESP_FUNC_SUCC || res == TMF_RESP_FUNC_COMPLETE) return SUCCESS; @@ -612,34 +609,33 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head * spin_unlock_irqrestore(&task->task_state_lock, flags); if (need_reset) { - SAS_DPRINTK("%s: task 0x%p requests reset\n", - __func__, task); + pr_notice("%s: task 0x%p requests reset\n", + __func__, task); goto reset; } - SAS_DPRINTK("trying to find task 0x%p\n", task); + pr_debug("trying to find task 0x%p\n", task); res = sas_scsi_find_task(task); switch (res) { case TASK_IS_DONE: - SAS_DPRINTK("%s: task 0x%p is done\n", __func__, + pr_notice("%s: task 0x%p is done\n", __func__, task); sas_eh_finish_cmd(cmd); continue; case TASK_IS_ABORTED: - SAS_DPRINTK("%s: task 0x%p is aborted\n", - __func__, task); + pr_notice("%s: task 0x%p is aborted\n", + __func__, task); sas_eh_finish_cmd(cmd); continue; case TASK_IS_AT_LU: - SAS_DPRINTK("task 0x%p is at LU: lu recover\n", task); + pr_info("task 0x%p is at LU: lu recover\n", task); reset: tmf_resp = sas_recover_lu(task->dev, cmd); if (tmf_resp == TMF_RESP_FUNC_COMPLETE) { - SAS_DPRINTK("dev %016llx LU %llx is " - "recovered\n", - SAS_ADDR(task->dev), - cmd->device->lun); + pr_notice("dev %016llx LU %llx is recovered\n", + SAS_ADDR(task->dev), + cmd->device->lun); sas_eh_finish_cmd(cmd); sas_scsi_clear_queue_lu(work_q, cmd); goto Again; @@ -647,14 +643,14 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head * /* fallthrough */ case TASK_IS_NOT_AT_LU: case TASK_ABORT_FAILED: - SAS_DPRINTK("task 0x%p is not at LU: I_T recover\n", - task); + pr_notice("task 0x%p is not at LU: I_T recover\n", + task); tmf_resp = sas_recover_I_T(task->dev); if (tmf_resp == TMF_RESP_FUNC_COMPLETE || tmf_resp == -ENODEV) { struct domain_device *dev = task->dev; - SAS_DPRINTK("I_T %016llx recovered\n", - SAS_ADDR(task->dev->sas_addr)); + pr_notice("I_T %016llx recovered\n", + SAS_ADDR(task->dev->sas_addr)); sas_eh_finish_cmd(cmd); sas_scsi_clear_queue_I_T(work_q, dev); goto Again; @@ -663,12 +659,12 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head * try_to_reset_cmd_device(cmd); if (i->dft->lldd_clear_nexus_port) { struct asd_sas_port *port = task->dev->port; - SAS_DPRINTK("clearing nexus for port:%d\n", - port->id); + pr_debug("clearing nexus for port:%d\n", + port->id); res = i->dft->lldd_clear_nexus_port(port); if (res == TMF_RESP_FUNC_COMPLETE) { - SAS_DPRINTK("clear nexus port:%d " - "succeeded\n", port->id); + pr_notice("clear nexus port:%d succeeded\n", + port->id); sas_eh_finish_cmd(cmd); sas_scsi_clear_queue_port(work_q, port); @@ -676,11 +672,10 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head * } } if (i->dft->lldd_clear_nexus_ha) { - SAS_DPRINTK("clear nexus ha\n"); + pr_debug("clear nexus ha\n"); res = i->dft->lldd_clear_nexus_ha(ha); if (res == TMF_RESP_FUNC_COMPLETE) { - SAS_DPRINTK("clear nexus ha " - "succeeded\n"); + pr_notice("clear nexus ha succeeded\n"); sas_eh_finish_cmd(cmd); goto clear_q; } @@ -689,10 +684,9 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head * * of effort could recover from errors. Quite * possibly the HA just disappeared. */ - SAS_DPRINTK("error from device %llx, LUN %llx " - "couldn't be recovered in any way\n", - SAS_ADDR(task->dev->sas_addr), - cmd->device->lun); + pr_err("error from device %llx, LUN %llx couldn't be recovered in any way\n", + SAS_ADDR(task->dev->sas_addr), + cmd->device->lun); sas_eh_finish_cmd(cmd); goto clear_q; @@ -704,7 +698,7 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head * return; clear_q: - SAS_DPRINTK("--- Exit %s -- clear_q\n", __func__); + pr_debug("--- Exit %s -- clear_q\n", __func__); list_for_each_entry_safe(cmd, n, work_q, eh_entry) sas_eh_finish_cmd(cmd); goto out; @@ -758,8 +752,8 @@ retry: list_splice_init(&shost->eh_cmd_q, &eh_work_q); spin_unlock_irq(shost->host_lock); - SAS_DPRINTK("Enter %s busy: %d failed: %d\n", - __func__, scsi_host_busy(shost), shost->host_failed); + pr_notice("Enter %s busy: %d failed: %d\n", + __func__, scsi_host_busy(shost), shost->host_failed); /* * Deal with commands that still have SAS tasks (i.e. they didn't * complete via the normal sas_task completion mechanism), @@ -800,9 +794,9 @@ out: if (retry) goto retry; - SAS_DPRINTK("--- Exit %s: busy: %d failed: %d tries: %d\n", - __func__, scsi_host_busy(shost), - shost->host_failed, tries); + pr_notice("--- Exit %s: busy: %d failed: %d tries: %d\n", + __func__, scsi_host_busy(shost), + shost->host_failed, tries); } int sas_ioctl(struct scsi_device *sdev, int cmd, void __user *arg) @@ -875,9 +869,8 @@ int sas_slave_configure(struct scsi_device *scsi_dev) if (scsi_dev->tagged_supported) { scsi_change_queue_depth(scsi_dev, SAS_DEF_QD); } else { - SAS_DPRINTK("device %llx, LUN %llx doesn't support " - "TCQ\n", SAS_ADDR(dev->sas_addr), - scsi_dev->lun); + pr_notice("device %llx, LUN %llx doesn't support TCQ\n", + SAS_ADDR(dev->sas_addr), scsi_dev->lun); scsi_change_queue_depth(scsi_dev, 1); } @@ -930,16 +923,10 @@ void sas_task_abort(struct sas_task *task) return; } - if (dev_is_sata(task->dev)) { + if (dev_is_sata(task->dev)) sas_ata_task_abort(task); - } else { - struct request_queue *q = sc->device->request_queue; - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); + else blk_abort_request(sc->request); - spin_unlock_irqrestore(q->queue_lock, flags); - } } void sas_target_destroy(struct scsi_target *starget) diff --git a/drivers/scsi/libsas/sas_task.c b/drivers/scsi/libsas/sas_task.c index a78e5bd3e514..c3b9befad4e6 100644 --- a/drivers/scsi/libsas/sas_task.c +++ b/drivers/scsi/libsas/sas_task.c @@ -1,3 +1,6 @@ + +#include "sas_internal.h" + #include <linux/kernel.h> #include <linux/export.h> #include <scsi/sas.h> @@ -23,11 +26,8 @@ void sas_ssp_task_response(struct device *dev, struct sas_task *task, memcpy(tstat->buf, iu->sense_data, tstat->buf_valid_size); if (iu->status != SAM_STAT_CHECK_CONDITION) - dev_printk(KERN_WARNING, dev, - "dev %llx sent sense data, but " - "stat(%x) is not CHECK CONDITION\n", - SAS_ADDR(task->dev->sas_addr), - iu->status); + dev_warn(dev, "dev %llx sent sense data, but stat(%x) is not CHECK CONDITION\n", + SAS_ADDR(task->dev->sas_addr), iu->status); } else /* when datapres contains corrupt/unknown value... */ diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index c1eb2b00ca7f..ebdfe5b26937 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -335,6 +335,18 @@ enum hba_state { LPFC_HBA_ERROR = -1 }; +struct lpfc_trunk_link_state { + enum hba_state state; + uint8_t fault; +}; + +struct lpfc_trunk_link { + struct lpfc_trunk_link_state link0, + link1, + link2, + link3; +}; + struct lpfc_vport { struct lpfc_hba *phba; struct list_head listentry; @@ -490,6 +502,7 @@ struct lpfc_vport { struct nvme_fc_local_port *localport; uint8_t nvmei_support; /* driver supports NVME Initiator */ uint32_t last_fcp_wqidx; + uint32_t rcv_flogi_cnt; /* How many unsol FLOGIs ACK'd. */ }; struct hbq_s { @@ -683,6 +696,7 @@ struct lpfc_hba { uint32_t iocb_cmd_size; uint32_t iocb_rsp_size; + struct lpfc_trunk_link trunk_link; enum hba_state link_state; uint32_t link_flag; /* link state flags */ #define LS_LOOPBACK_MODE 0x1 /* NPort is in Loopback mode */ @@ -717,6 +731,7 @@ struct lpfc_hba { * capability */ #define HBA_NVME_IOQ_FLUSH 0x80000 /* NVME IO queues flushed. */ +#define HBA_FLOGI_ISSUED 0x100000 /* FLOGI was issued */ uint32_t fcp_ring_in_use; /* When polling test if intr-hndlr active*/ struct lpfc_dmabuf slim2p; @@ -783,6 +798,7 @@ struct lpfc_hba { #define LPFC_FCF_PRIORITY 2 /* Priority fcf failover */ uint32_t cfg_fcf_failover_policy; uint32_t cfg_fcp_io_sched; + uint32_t cfg_ns_query; uint32_t cfg_fcp2_no_tgt_reset; uint32_t cfg_cr_delay; uint32_t cfg_cr_count; @@ -989,7 +1005,8 @@ struct lpfc_hba { spinlock_t port_list_lock; /* lock for port_list mutations */ struct lpfc_vport *pport; /* physical lpfc_vport pointer */ uint16_t max_vpi; /* Maximum virtual nports */ -#define LPFC_MAX_VPI 0xFFFF /* Max number of VPI supported */ +#define LPFC_MAX_VPI 0xFF /* Max number VPI supported 0 - 0xff */ +#define LPFC_MAX_VPORTS 0x100 /* Max vports per port, with pport */ uint16_t max_vports; /* * For IOV HBAs max_vpi can change * after a reset. max_vports is max @@ -1111,6 +1128,10 @@ struct lpfc_hba { uint16_t vlan_id; struct list_head fcf_conn_rec_list; + bool defer_flogi_acc_flag; + uint16_t defer_flogi_acc_rx_id; + uint16_t defer_flogi_acc_ox_id; + spinlock_t ct_ev_lock; /* synchronize access to ct_ev_waiters */ struct list_head ct_ev_waiters; struct unsol_rcv_ct_ctx ct_ctx[LPFC_CT_CTX_MAX]; @@ -1262,6 +1283,12 @@ lpfc_sli_read_hs(struct lpfc_hba *phba) static inline struct lpfc_sli_ring * lpfc_phba_elsring(struct lpfc_hba *phba) { + /* Return NULL if sli_rev has become invalid due to bad fw */ + if (phba->sli_rev != LPFC_SLI_REV4 && + phba->sli_rev != LPFC_SLI_REV3 && + phba->sli_rev != LPFC_SLI_REV2) + return NULL; + if (phba->sli_rev == LPFC_SLI_REV4) { if (phba->sli4_hba.els_wq) return phba->sli4_hba.els_wq->pring; diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index dda7f450b96d..4bae72cbf3f6 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -883,6 +883,42 @@ lpfc_link_state_show(struct device *dev, struct device_attribute *attr, } } + if ((phba->sli_rev == LPFC_SLI_REV4) && + ((bf_get(lpfc_sli_intf_if_type, + &phba->sli4_hba.sli_intf) == + LPFC_SLI_INTF_IF_TYPE_6))) { + struct lpfc_trunk_link link = phba->trunk_link; + + if (bf_get(lpfc_conf_trunk_port0, &phba->sli4_hba)) + len += snprintf(buf + len, PAGE_SIZE - len, + "Trunk port 0: Link %s %s\n", + (link.link0.state == LPFC_LINK_UP) ? + "Up" : "Down. ", + trunk_errmsg[link.link0.fault]); + + if (bf_get(lpfc_conf_trunk_port1, &phba->sli4_hba)) + len += snprintf(buf + len, PAGE_SIZE - len, + "Trunk port 1: Link %s %s\n", + (link.link1.state == LPFC_LINK_UP) ? + "Up" : "Down. ", + trunk_errmsg[link.link1.fault]); + + if (bf_get(lpfc_conf_trunk_port2, &phba->sli4_hba)) + len += snprintf(buf + len, PAGE_SIZE - len, + "Trunk port 2: Link %s %s\n", + (link.link2.state == LPFC_LINK_UP) ? + "Up" : "Down. ", + trunk_errmsg[link.link2.fault]); + + if (bf_get(lpfc_conf_trunk_port3, &phba->sli4_hba)) + len += snprintf(buf + len, PAGE_SIZE - len, + "Trunk port 3: Link %s %s\n", + (link.link3.state == LPFC_LINK_UP) ? + "Up" : "Down. ", + trunk_errmsg[link.link3.fault]); + + } + return len; } @@ -1156,6 +1192,82 @@ out: } /** + * lpfc_reset_pci_bus - resets PCI bridge controller's secondary bus of an HBA + * @phba: lpfc_hba pointer. + * + * Description: + * Issues a PCI secondary bus reset for the phba->pcidev. + * + * Notes: + * First walks the bus_list to ensure only PCI devices with Emulex + * vendor id, device ids that support hot reset, only one occurrence + * of function 0, and all ports on the bus are in offline mode to ensure the + * hot reset only affects one valid HBA. + * + * Returns: + * -ENOTSUPP, cfg_enable_hba_reset must be of value 2 + * -ENODEV, NULL ptr to pcidev + * -EBADSLT, detected invalid device + * -EBUSY, port is not in offline state + * 0, successful + */ +int +lpfc_reset_pci_bus(struct lpfc_hba *phba) +{ + struct pci_dev *pdev = phba->pcidev; + struct Scsi_Host *shost = NULL; + struct lpfc_hba *phba_other = NULL; + struct pci_dev *ptr = NULL; + int res; + + if (phba->cfg_enable_hba_reset != 2) + return -ENOTSUPP; + + if (!pdev) { + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "8345 pdev NULL!\n"); + return -ENODEV; + } + + res = lpfc_check_pci_resettable(phba); + if (res) + return res; + + /* Walk the list of devices on the pci_dev's bus */ + list_for_each_entry(ptr, &pdev->bus->devices, bus_list) { + /* Check port is offline */ + shost = pci_get_drvdata(ptr); + if (shost) { + phba_other = + ((struct lpfc_vport *)shost->hostdata)->phba; + if (!(phba_other->pport->fc_flag & FC_OFFLINE_MODE)) { + lpfc_printf_log(phba_other, KERN_INFO, LOG_INIT, + "8349 WWPN = 0x%02x%02x%02x%02x" + "%02x%02x%02x%02x is not " + "offline!\n", + phba_other->wwpn[0], + phba_other->wwpn[1], + phba_other->wwpn[2], + phba_other->wwpn[3], + phba_other->wwpn[4], + phba_other->wwpn[5], + phba_other->wwpn[6], + phba_other->wwpn[7]); + return -EBUSY; + } + } + } + + /* Issue PCI bus reset */ + res = pci_reset_bus(pdev); + if (res) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "8350 PCI reset bus failed: %d\n", res); + } + + return res; +} + +/** * lpfc_selective_reset - Offline then onlines the port * @phba: lpfc_hba pointer. * @@ -1322,7 +1434,7 @@ lpfc_sli4_pdev_reg_request(struct lpfc_hba *phba, uint32_t opcode) return -EACCES; if ((phba->sli_rev < LPFC_SLI_REV4) || - (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) != + (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) < LPFC_SLI_INTF_IF_TYPE_2)) return -EPERM; @@ -1430,6 +1542,66 @@ lpfc_nport_evt_cnt_show(struct device *dev, struct device_attribute *attr, return snprintf(buf, PAGE_SIZE, "%d\n", phba->nport_event_cnt); } +int +lpfc_set_trunking(struct lpfc_hba *phba, char *buff_out) +{ + LPFC_MBOXQ_t *mbox = NULL; + unsigned long val = 0; + char *pval = 0; + int rc = 0; + + if (!strncmp("enable", buff_out, + strlen("enable"))) { + pval = buff_out + strlen("enable") + 1; + rc = kstrtoul(pval, 0, &val); + if (rc) + return rc; /* Invalid number */ + } else if (!strncmp("disable", buff_out, + strlen("disable"))) { + val = 0; + } else { + return -EINVAL; /* Invalid command */ + } + + switch (val) { + case 0: + val = 0x0; /* Disable */ + break; + case 2: + val = 0x1; /* Enable two port trunk */ + break; + case 4: + val = 0x2; /* Enable four port trunk */ + break; + default: + return -EINVAL; + } + + lpfc_printf_log(phba, KERN_ERR, LOG_MBOX, + "0070 Set trunk mode with val %ld ", val); + + mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (!mbox) + return -ENOMEM; + + lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE, + LPFC_MBOX_OPCODE_FCOE_FC_SET_TRUNK_MODE, + 12, LPFC_SLI4_MBX_EMBED); + + bf_set(lpfc_mbx_set_trunk_mode, + &mbox->u.mqe.un.set_trunk_mode, + val); + rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL); + if (rc) + lpfc_printf_log(phba, KERN_ERR, LOG_MBOX, + "0071 Set trunk mode failed with status: %d", + rc); + if (rc != MBX_TIMEOUT) + mempool_free(mbox, phba->mbox_mem_pool); + + return 0; +} + /** * lpfc_board_mode_show - Return the state of the board * @dev: class device that is converted into a Scsi_host. @@ -1522,6 +1694,11 @@ lpfc_board_mode_store(struct device *dev, struct device_attribute *attr, status = lpfc_sli4_pdev_reg_request(phba, LPFC_FW_RESET); else if (strncmp(buf, "dv_reset", sizeof("dv_reset") - 1) == 0) status = lpfc_sli4_pdev_reg_request(phba, LPFC_DV_RESET); + else if (strncmp(buf, "pci_bus_reset", sizeof("pci_bus_reset") - 1) + == 0) + status = lpfc_reset_pci_bus(phba); + else if (strncmp(buf, "trunk", sizeof("trunk") - 1) == 0) + status = lpfc_set_trunking(phba, (char *)buf + sizeof("trunk")); else status = -EINVAL; @@ -1590,7 +1767,7 @@ lpfc_get_hba_info(struct lpfc_hba *phba, pmb = &pmboxq->u.mb; pmb->mbxCommand = MBX_READ_CONFIG; pmb->mbxOwner = OWN_HOST; - pmboxq->context1 = NULL; + pmboxq->ctx_buf = NULL; if (phba->pport->fc_flag & FC_OFFLINE_MODE) rc = MBX_NOT_FINISHED; @@ -1622,6 +1799,9 @@ lpfc_get_hba_info(struct lpfc_hba *phba, max_vpi = (bf_get(lpfc_mbx_rd_conf_vpi_count, rd_config) > 0) ? (bf_get(lpfc_mbx_rd_conf_vpi_count, rd_config) - 1) : 0; + /* Limit the max we support */ + if (max_vpi > LPFC_MAX_VPI) + max_vpi = LPFC_MAX_VPI; if (mvpi) *mvpi = max_vpi; if (avpi) @@ -1637,8 +1817,13 @@ lpfc_get_hba_info(struct lpfc_hba *phba, *axri = pmb->un.varRdConfig.avail_xri; if (mvpi) *mvpi = pmb->un.varRdConfig.max_vpi; - if (avpi) - *avpi = pmb->un.varRdConfig.avail_vpi; + if (avpi) { + /* avail_vpi is only valid if link is up and ready */ + if (phba->link_state == LPFC_HBA_READY) + *avpi = pmb->un.varRdConfig.avail_vpi; + else + *avpi = pmb->un.varRdConfig.max_vpi; + } } mempool_free(pmboxq, phba->mbox_mem_pool); @@ -3831,8 +4016,9 @@ lpfc_topology_store(struct device *dev, struct device_attribute *attr, val); return -EINVAL; } - if (phba->pcidev->device == PCI_DEVICE_ID_LANCER_G6_FC && - val == 4) { + if ((phba->pcidev->device == PCI_DEVICE_ID_LANCER_G6_FC || + phba->pcidev->device == PCI_DEVICE_ID_LANCER_G7_FC) && + val == 4) { lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT, "3114 Loop mode not supported\n"); return -EINVAL; @@ -4254,7 +4440,7 @@ lpfc_link_speed_store(struct device *dev, struct device_attribute *attr, uint32_t prev_val, if_type; if_type = bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf); - if (if_type == LPFC_SLI_INTF_IF_TYPE_2 && + if (if_type >= LPFC_SLI_INTF_IF_TYPE_2 && phba->hba_flag & HBA_FORCED_LINK_SPEED) return -EPERM; @@ -5070,6 +5256,18 @@ LPFC_ATTR_RW(fcp_io_sched, LPFC_FCP_SCHED_ROUND_ROBIN, "issuing commands [0] - Round Robin, [1] - Current CPU"); /* + * lpfc_ns_query: Determine algrithmn for NameServer queries after RSCN + * range is [0,1]. Default value is 0. + * For [0], GID_FT is used for NameServer queries after RSCN (default) + * For [1], GID_PT is used for NameServer queries after RSCN + * + */ +LPFC_ATTR_RW(ns_query, LPFC_NS_QUERY_GID_FT, + LPFC_NS_QUERY_GID_FT, LPFC_NS_QUERY_GID_PT, + "Determine algorithm NameServer queries after RSCN " + "[0] - GID_FT, [1] - GID_PT"); + +/* # lpfc_fcp2_no_tgt_reset: Determine bus reset behavior # range is [0,1]. Default value is 0. # For [0], bus reset issues target reset to ALL devices @@ -5257,9 +5455,10 @@ LPFC_ATTR_R(nvme_io_channel, # lpfc_enable_hba_reset: Allow or prevent HBA resets to the hardware. # 0 = HBA resets disabled # 1 = HBA resets enabled (default) -# Value range is [0,1]. Default value is 1. +# 2 = HBA reset via PCI bus reset enabled +# Value range is [0,2]. Default value is 1. */ -LPFC_ATTR_R(enable_hba_reset, 1, 0, 1, "Enable HBA resets from the driver."); +LPFC_ATTR_RW(enable_hba_reset, 1, 0, 2, "Enable HBA resets from the driver."); /* # lpfc_enable_hba_heartbeat: Disable HBA heartbeat timer.. @@ -5514,6 +5713,7 @@ struct device_attribute *lpfc_hba_attrs[] = { &dev_attr_lpfc_scan_down, &dev_attr_lpfc_link_speed, &dev_attr_lpfc_fcp_io_sched, + &dev_attr_lpfc_ns_query, &dev_attr_lpfc_fcp2_no_tgt_reset, &dev_attr_lpfc_cr_delay, &dev_attr_lpfc_cr_count, @@ -6006,6 +6206,9 @@ lpfc_get_host_speed(struct Scsi_Host *shost) case LPFC_LINK_SPEED_64GHZ: fc_host_speed(shost) = FC_PORTSPEED_64GBIT; break; + case LPFC_LINK_SPEED_128GHZ: + fc_host_speed(shost) = FC_PORTSPEED_128GBIT; + break; default: fc_host_speed(shost) = FC_PORTSPEED_UNKNOWN; break; @@ -6105,7 +6308,7 @@ lpfc_get_stats(struct Scsi_Host *shost) pmb = &pmboxq->u.mb; pmb->mbxCommand = MBX_READ_STATUS; pmb->mbxOwner = OWN_HOST; - pmboxq->context1 = NULL; + pmboxq->ctx_buf = NULL; pmboxq->vport = vport; if (vport->fc_flag & FC_OFFLINE_MODE) @@ -6137,7 +6340,7 @@ lpfc_get_stats(struct Scsi_Host *shost) memset(pmboxq, 0, sizeof (LPFC_MBOXQ_t)); pmb->mbxCommand = MBX_READ_LNK_STAT; pmb->mbxOwner = OWN_HOST; - pmboxq->context1 = NULL; + pmboxq->ctx_buf = NULL; pmboxq->vport = vport; if (vport->fc_flag & FC_OFFLINE_MODE) @@ -6217,7 +6420,7 @@ lpfc_reset_stats(struct Scsi_Host *shost) pmb->mbxCommand = MBX_READ_STATUS; pmb->mbxOwner = OWN_HOST; pmb->un.varWords[0] = 0x1; /* reset request */ - pmboxq->context1 = NULL; + pmboxq->ctx_buf = NULL; pmboxq->vport = vport; if ((vport->fc_flag & FC_OFFLINE_MODE) || @@ -6235,7 +6438,7 @@ lpfc_reset_stats(struct Scsi_Host *shost) memset(pmboxq, 0, sizeof(LPFC_MBOXQ_t)); pmb->mbxCommand = MBX_READ_LNK_STAT; pmb->mbxOwner = OWN_HOST; - pmboxq->context1 = NULL; + pmboxq->ctx_buf = NULL; pmboxq->vport = vport; if ((vport->fc_flag & FC_OFFLINE_MODE) || @@ -6564,6 +6767,7 @@ void lpfc_get_cfgparam(struct lpfc_hba *phba) { lpfc_fcp_io_sched_init(phba, lpfc_fcp_io_sched); + lpfc_ns_query_init(phba, lpfc_ns_query); lpfc_fcp2_no_tgt_reset_init(phba, lpfc_fcp2_no_tgt_reset); lpfc_cr_delay_init(phba, lpfc_cr_delay); lpfc_cr_count_init(phba, lpfc_cr_count); diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index 7bd7ae86bed5..8698af86485d 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -2222,7 +2222,7 @@ lpfc_bsg_diag_loopback_mode(struct bsg_job *job) if (phba->sli_rev < LPFC_SLI_REV4) rc = lpfc_sli3_bsg_diag_loopback_mode(phba, job); - else if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) == + else if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) >= LPFC_SLI_INTF_IF_TYPE_2) rc = lpfc_sli4_bsg_diag_loopback_mode(phba, job); else @@ -2262,7 +2262,7 @@ lpfc_sli4_bsg_diag_mode_end(struct bsg_job *job) if (phba->sli_rev < LPFC_SLI_REV4) return -ENODEV; - if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) != + if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) < LPFC_SLI_INTF_IF_TYPE_2) return -ENODEV; @@ -2354,7 +2354,7 @@ lpfc_sli4_bsg_link_diag_test(struct bsg_job *job) rc = -ENODEV; goto job_error; } - if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) != + if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) < LPFC_SLI_INTF_IF_TYPE_2) { rc = -ENODEV; goto job_error; @@ -2501,9 +2501,9 @@ static int lpfcdiag_loop_self_reg(struct lpfc_hba *phba, uint16_t *rpi) return -ENOMEM; } - dmabuff = (struct lpfc_dmabuf *) mbox->context1; - mbox->context1 = NULL; - mbox->context2 = NULL; + dmabuff = (struct lpfc_dmabuf *)mbox->ctx_buf; + mbox->ctx_buf = NULL; + mbox->ctx_ndlp = NULL; status = lpfc_sli_issue_mbox_wait(phba, mbox, LPFC_MBOX_TMO); if ((status != MBX_SUCCESS) || (mbox->u.mb.mbxStatus)) { @@ -3388,7 +3388,7 @@ lpfc_bsg_issue_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq) unsigned long flags; uint8_t *pmb, *pmb_buf; - dd_data = pmboxq->context1; + dd_data = pmboxq->ctx_ndlp; /* * The outgoing buffer is readily referred from the dma buffer, @@ -3573,7 +3573,7 @@ lpfc_bsg_issue_mbox_ext_handle_job(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq) struct lpfc_sli_config_mbox *sli_cfg_mbx; uint8_t *pmbx; - dd_data = pmboxq->context1; + dd_data = pmboxq->ctx_buf; /* Determine if job has been aborted */ spin_lock_irqsave(&phba->ct_ev_lock, flags); @@ -3960,7 +3960,7 @@ lpfc_bsg_sli_cfg_read_cmd_ext(struct lpfc_hba *phba, struct bsg_job *job, pmboxq->mbox_cmpl = lpfc_bsg_issue_read_mbox_ext_cmpl; /* context fields to callback function */ - pmboxq->context1 = dd_data; + pmboxq->ctx_buf = dd_data; dd_data->type = TYPE_MBOX; dd_data->set_job = job; dd_data->context_un.mbox.pmboxq = pmboxq; @@ -4131,7 +4131,7 @@ lpfc_bsg_sli_cfg_write_cmd_ext(struct lpfc_hba *phba, struct bsg_job *job, pmboxq->mbox_cmpl = lpfc_bsg_issue_write_mbox_ext_cmpl; /* context fields to callback function */ - pmboxq->context1 = dd_data; + pmboxq->ctx_buf = dd_data; dd_data->type = TYPE_MBOX; dd_data->set_job = job; dd_data->context_un.mbox.pmboxq = pmboxq; @@ -4476,7 +4476,7 @@ lpfc_bsg_write_ebuf_set(struct lpfc_hba *phba, struct bsg_job *job, pmboxq->mbox_cmpl = lpfc_bsg_issue_write_mbox_ext_cmpl; /* context fields to callback function */ - pmboxq->context1 = dd_data; + pmboxq->ctx_buf = dd_data; dd_data->type = TYPE_MBOX; dd_data->set_job = job; dd_data->context_un.mbox.pmboxq = pmboxq; @@ -4761,7 +4761,7 @@ lpfc_bsg_issue_mbox(struct lpfc_hba *phba, struct bsg_job *job, if (mbox_req->inExtWLen || mbox_req->outExtWLen) { from = pmbx; ext = from + sizeof(MAILBOX_t); - pmboxq->context2 = ext; + pmboxq->ctx_buf = ext; pmboxq->in_ext_byte_len = mbox_req->inExtWLen * sizeof(uint32_t); pmboxq->out_ext_byte_len = @@ -4889,7 +4889,7 @@ lpfc_bsg_issue_mbox(struct lpfc_hba *phba, struct bsg_job *job, pmboxq->mbox_cmpl = lpfc_bsg_issue_mbox_cmpl; /* setup context field to pass wait_queue pointer to wake function */ - pmboxq->context1 = dd_data; + pmboxq->ctx_ndlp = dd_data; dd_data->type = TYPE_MBOX; dd_data->set_job = job; dd_data->context_un.mbox.pmboxq = pmboxq; @@ -5348,7 +5348,7 @@ lpfc_bsg_get_ras_config(struct bsg_job *job) sizeof(struct fc_bsg_request) + sizeof(struct lpfc_bsg_ras_req)) { lpfc_printf_log(phba, KERN_ERR, LOG_LIBDFC, - "6181 Received RAS_LOG request " + "6192 FW_LOG request received " "below minimum size\n"); rc = -EINVAL; goto ras_job_error; @@ -5356,7 +5356,7 @@ lpfc_bsg_get_ras_config(struct bsg_job *job) /* Check FW log status */ rc = lpfc_check_fwlog_support(phba); - if (rc == -EACCES || rc == -EPERM) + if (rc) goto ras_job_error; ras_reply = (struct lpfc_bsg_get_ras_config_reply *) @@ -5381,25 +5381,6 @@ ras_job_error: } /** - * lpfc_ras_stop_fwlog: Disable FW logging by the adapter - * @phba: Pointer to HBA context object. - * - * Disable FW logging into host memory on the adapter. To - * be done before reading logs from the host memory. - **/ -static void -lpfc_ras_stop_fwlog(struct lpfc_hba *phba) -{ - struct lpfc_ras_fwlog *ras_fwlog = &phba->ras_fwlog; - - ras_fwlog->ras_active = false; - - /* Disable FW logging to host memory */ - writel(LPFC_CTL_PDEV_CTL_DDL_RAS, - phba->sli4_hba.conf_regs_memmap_p + LPFC_CTL_PDEV_CTL_OFFSET); -} - -/** * lpfc_bsg_set_ras_config: Set FW logging parameters * @job: fc_bsg_job to handle * @@ -5416,7 +5397,7 @@ lpfc_bsg_set_ras_config(struct bsg_job *job) struct lpfc_ras_fwlog *ras_fwlog = &phba->ras_fwlog; struct fc_bsg_reply *bsg_reply = job->reply; uint8_t action = 0, log_level = 0; - int rc = 0; + int rc = 0, action_status = 0; if (job->request_len < sizeof(struct fc_bsg_request) + @@ -5430,7 +5411,7 @@ lpfc_bsg_set_ras_config(struct bsg_job *job) /* Check FW log status */ rc = lpfc_check_fwlog_support(phba); - if (rc == -EACCES || rc == -EPERM) + if (rc) goto ras_job_error; ras_req = (struct lpfc_bsg_set_ras_config_req *) @@ -5449,16 +5430,25 @@ lpfc_bsg_set_ras_config(struct bsg_job *job) lpfc_ras_stop_fwlog(phba); } else { /*action = LPFC_RASACTION_START_LOGGING*/ - if (ras_fwlog->ras_active == true) { - rc = -EINPROGRESS; - goto ras_job_error; - } + + /* Even though FW-logging is active re-initialize + * FW-logging with new log-level. Return status + * "Logging already Running" to caller. + **/ + if (ras_fwlog->ras_active) + action_status = -EINPROGRESS; /* Enable logging */ rc = lpfc_sli4_ras_fwlog_init(phba, log_level, LPFC_RAS_ENABLE_LOGGING); - if (rc) + if (rc) { rc = -EINVAL; + goto ras_job_error; + } + + /* Check if FW-logging is re-initialized */ + if (action_status == -EINPROGRESS) + rc = action_status; } ras_job_error: /* make error code available to userspace */ @@ -5487,12 +5477,11 @@ lpfc_bsg_get_ras_lwpd(struct bsg_job *job) struct lpfc_hba *phba = vport->phba; struct lpfc_ras_fwlog *ras_fwlog = &phba->ras_fwlog; struct fc_bsg_reply *bsg_reply = job->reply; - uint32_t lwpd_offset = 0; - uint64_t wrap_value = 0; + u32 *lwpd_ptr = NULL; int rc = 0; rc = lpfc_check_fwlog_support(phba); - if (rc == -EACCES || rc == -EPERM) + if (rc) goto ras_job_error; if (job->request_len < @@ -5508,11 +5497,19 @@ lpfc_bsg_get_ras_lwpd(struct bsg_job *job) ras_reply = (struct lpfc_bsg_get_ras_lwpd *) bsg_reply->reply_data.vendor_reply.vendor_rsp; - lwpd_offset = *((uint32_t *)ras_fwlog->lwpd.virt) & 0xffffffff; - ras_reply->offset = be32_to_cpu(lwpd_offset); + if (!ras_fwlog->lwpd.virt) { + lpfc_printf_log(phba, KERN_ERR, LOG_LIBDFC, + "6193 Restart FW Logging\n"); + rc = -EINVAL; + goto ras_job_error; + } + + /* Get lwpd offset */ + lwpd_ptr = (uint32_t *)(ras_fwlog->lwpd.virt); + ras_reply->offset = be32_to_cpu(*lwpd_ptr & 0xffffffff); - wrap_value = *((uint64_t *)ras_fwlog->lwpd.virt); - ras_reply->wrap_count = be32_to_cpu((wrap_value >> 32) & 0xffffffff); + /* Get wrap count */ + ras_reply->wrap_count = be32_to_cpu(*(++lwpd_ptr) & 0xffffffff); ras_job_error: /* make error code available to userspace */ @@ -5539,9 +5536,8 @@ lpfc_bsg_get_ras_fwlog(struct bsg_job *job) struct fc_bsg_request *bsg_request = job->request; struct fc_bsg_reply *bsg_reply = job->reply; struct lpfc_bsg_get_fwlog_req *ras_req; - uint32_t rd_offset, rd_index, offset, pending_wlen; - uint32_t boundary = 0, align_len = 0, write_len = 0; - void *dest, *src, *fwlog_buff; + u32 rd_offset, rd_index, offset; + void *src, *fwlog_buff; struct lpfc_ras_fwlog *ras_fwlog = NULL; struct lpfc_dmabuf *dmabuf, *next; int rc = 0; @@ -5549,7 +5545,7 @@ lpfc_bsg_get_ras_fwlog(struct bsg_job *job) ras_fwlog = &phba->ras_fwlog; rc = lpfc_check_fwlog_support(phba); - if (rc == -EACCES || rc == -EPERM) + if (rc) goto ras_job_error; /* Logging to be stopped before reading */ @@ -5581,8 +5577,6 @@ lpfc_bsg_get_ras_fwlog(struct bsg_job *job) rd_index = (rd_offset / LPFC_RAS_MAX_ENTRY_SIZE); offset = (rd_offset % LPFC_RAS_MAX_ENTRY_SIZE); - pending_wlen = ras_req->read_size; - dest = fwlog_buff; list_for_each_entry_safe(dmabuf, next, &ras_fwlog->fwlog_buff_list, list) { @@ -5590,29 +5584,9 @@ lpfc_bsg_get_ras_fwlog(struct bsg_job *job) if (dmabuf->buffer_tag < rd_index) continue; - /* Align read to buffer size */ - if (offset) { - boundary = ((dmabuf->buffer_tag + 1) * - LPFC_RAS_MAX_ENTRY_SIZE); - - align_len = (boundary - offset); - write_len = min_t(u32, align_len, - LPFC_RAS_MAX_ENTRY_SIZE); - } else { - write_len = min_t(u32, pending_wlen, - LPFC_RAS_MAX_ENTRY_SIZE); - align_len = 0; - boundary = 0; - } src = dmabuf->virt + offset; - memcpy(dest, src, write_len); - - pending_wlen -= write_len; - if (!pending_wlen) - break; - - dest += write_len; - offset = (offset + write_len) % LPFC_RAS_MAX_ENTRY_SIZE; + memcpy(fwlog_buff, src, ras_req->read_size); + break; } bsg_reply->reply_payload_rcv_len = @@ -5629,6 +5603,77 @@ ras_job_error: return rc; } +static int +lpfc_get_trunk_info(struct bsg_job *job) +{ + struct lpfc_vport *vport = shost_priv(fc_bsg_to_shost(job)); + struct lpfc_hba *phba = vport->phba; + struct fc_bsg_reply *bsg_reply = job->reply; + struct lpfc_trunk_info *event_reply; + int rc = 0; + + if (job->request_len < + sizeof(struct fc_bsg_request) + sizeof(struct get_trunk_info_req)) { + lpfc_printf_log(phba, KERN_ERR, LOG_LIBDFC, + "2744 Received GET TRUNK _INFO request below " + "minimum size\n"); + rc = -EINVAL; + goto job_error; + } + + event_reply = (struct lpfc_trunk_info *) + bsg_reply->reply_data.vendor_reply.vendor_rsp; + + if (job->reply_len < + sizeof(struct fc_bsg_request) + sizeof(struct lpfc_trunk_info)) { + lpfc_printf_log(phba, KERN_WARNING, LOG_LIBDFC, + "2728 Received GET TRUNK _INFO reply below " + "minimum size\n"); + rc = -EINVAL; + goto job_error; + } + if (event_reply == NULL) { + rc = -EINVAL; + goto job_error; + } + + bsg_bf_set(lpfc_trunk_info_link_status, event_reply, + (phba->link_state >= LPFC_LINK_UP) ? 1 : 0); + + bsg_bf_set(lpfc_trunk_info_trunk_active0, event_reply, + (phba->trunk_link.link0.state == LPFC_LINK_UP) ? 1 : 0); + + bsg_bf_set(lpfc_trunk_info_trunk_active1, event_reply, + (phba->trunk_link.link1.state == LPFC_LINK_UP) ? 1 : 0); + + bsg_bf_set(lpfc_trunk_info_trunk_active2, event_reply, + (phba->trunk_link.link2.state == LPFC_LINK_UP) ? 1 : 0); + + bsg_bf_set(lpfc_trunk_info_trunk_active3, event_reply, + (phba->trunk_link.link3.state == LPFC_LINK_UP) ? 1 : 0); + + bsg_bf_set(lpfc_trunk_info_trunk_config0, event_reply, + bf_get(lpfc_conf_trunk_port0, &phba->sli4_hba)); + + bsg_bf_set(lpfc_trunk_info_trunk_config1, event_reply, + bf_get(lpfc_conf_trunk_port1, &phba->sli4_hba)); + + bsg_bf_set(lpfc_trunk_info_trunk_config2, event_reply, + bf_get(lpfc_conf_trunk_port2, &phba->sli4_hba)); + + bsg_bf_set(lpfc_trunk_info_trunk_config3, event_reply, + bf_get(lpfc_conf_trunk_port3, &phba->sli4_hba)); + + event_reply->port_speed = phba->sli4_hba.link_state.speed / 1000; + event_reply->logical_speed = + phba->sli4_hba.link_state.logical_speed / 100; +job_error: + bsg_reply->result = rc; + bsg_job_done(job, bsg_reply->result, + bsg_reply->reply_payload_rcv_len); + return rc; + +} /** * lpfc_bsg_hst_vendor - process a vendor-specific fc_bsg_job @@ -5689,6 +5734,9 @@ lpfc_bsg_hst_vendor(struct bsg_job *job) case LPFC_BSG_VENDOR_RAS_SET_CONFIG: rc = lpfc_bsg_set_ras_config(job); break; + case LPFC_BSG_VENDOR_GET_TRUNK_INFO: + rc = lpfc_get_trunk_info(job); + break; default: rc = -EINVAL; bsg_reply->reply_payload_rcv_len = 0; diff --git a/drivers/scsi/lpfc/lpfc_bsg.h b/drivers/scsi/lpfc/lpfc_bsg.h index 820323f1139b..9151824beea4 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.h +++ b/drivers/scsi/lpfc/lpfc_bsg.h @@ -42,6 +42,7 @@ #define LPFC_BSG_VENDOR_RAS_GET_FWLOG 17 #define LPFC_BSG_VENDOR_RAS_GET_CONFIG 18 #define LPFC_BSG_VENDOR_RAS_SET_CONFIG 19 +#define LPFC_BSG_VENDOR_GET_TRUNK_INFO 20 struct set_ct_event { uint32_t command; @@ -331,6 +332,43 @@ struct lpfc_bsg_get_ras_config_reply { uint32_t log_buff_sz; }; +struct lpfc_trunk_info { + uint32_t word0; +#define lpfc_trunk_info_link_status_SHIFT 0 +#define lpfc_trunk_info_link_status_MASK 1 +#define lpfc_trunk_info_link_status_WORD word0 +#define lpfc_trunk_info_trunk_active0_SHIFT 8 +#define lpfc_trunk_info_trunk_active0_MASK 1 +#define lpfc_trunk_info_trunk_active0_WORD word0 +#define lpfc_trunk_info_trunk_active1_SHIFT 9 +#define lpfc_trunk_info_trunk_active1_MASK 1 +#define lpfc_trunk_info_trunk_active1_WORD word0 +#define lpfc_trunk_info_trunk_active2_SHIFT 10 +#define lpfc_trunk_info_trunk_active2_MASK 1 +#define lpfc_trunk_info_trunk_active2_WORD word0 +#define lpfc_trunk_info_trunk_active3_SHIFT 11 +#define lpfc_trunk_info_trunk_active3_MASK 1 +#define lpfc_trunk_info_trunk_active3_WORD word0 +#define lpfc_trunk_info_trunk_config0_SHIFT 12 +#define lpfc_trunk_info_trunk_config0_MASK 1 +#define lpfc_trunk_info_trunk_config0_WORD word0 +#define lpfc_trunk_info_trunk_config1_SHIFT 13 +#define lpfc_trunk_info_trunk_config1_MASK 1 +#define lpfc_trunk_info_trunk_config1_WORD word0 +#define lpfc_trunk_info_trunk_config2_SHIFT 14 +#define lpfc_trunk_info_trunk_config2_MASK 1 +#define lpfc_trunk_info_trunk_config2_WORD word0 +#define lpfc_trunk_info_trunk_config3_SHIFT 15 +#define lpfc_trunk_info_trunk_config3_MASK 1 +#define lpfc_trunk_info_trunk_config3_WORD word0 + uint16_t port_speed; + uint16_t logical_speed; + uint32_t reserved3; +}; + +struct get_trunk_info_req { + uint32_t command; +}; /* driver only */ #define SLI_CONFIG_NOT_HANDLED 0 diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index e01136507780..39f3fa988732 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -74,7 +74,6 @@ void lpfc_mbx_cmpl_read_topology(struct lpfc_hba *, LPFC_MBOXQ_t *); void lpfc_init_vpi_cmpl(struct lpfc_hba *, LPFC_MBOXQ_t *); void lpfc_cancel_all_vport_retry_delay_timer(struct lpfc_hba *); void lpfc_retry_pport_discovery(struct lpfc_hba *); -void lpfc_release_rpi(struct lpfc_hba *, struct lpfc_vport *, uint16_t); int lpfc_init_iocb_list(struct lpfc_hba *phba, int cnt); void lpfc_free_iocb_list(struct lpfc_hba *phba); int lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq, @@ -175,6 +174,7 @@ void lpfc_hb_timeout_handler(struct lpfc_hba *); void lpfc_ct_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *, struct lpfc_iocbq *); int lpfc_ct_handle_unsol_abort(struct lpfc_hba *, struct hbq_dmabuf *); +int lpfc_issue_gidpt(struct lpfc_vport *vport); int lpfc_issue_gidft(struct lpfc_vport *vport); int lpfc_get_gidft_type(struct lpfc_vport *vport, struct lpfc_iocbq *iocbq); int lpfc_ns_cmd(struct lpfc_vport *, int, uint8_t, uint32_t); @@ -380,8 +380,10 @@ void lpfc_nvmet_buf_free(struct lpfc_hba *phba, void *virtp, dma_addr_t dma); void lpfc_in_buf_free(struct lpfc_hba *, struct lpfc_dmabuf *); void lpfc_rq_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp); +int lpfc_link_reset(struct lpfc_vport *vport); /* Function prototypes. */ +int lpfc_check_pci_resettable(const struct lpfc_hba *phba); const char* lpfc_info(struct Scsi_Host *); int lpfc_scan_finished(struct Scsi_Host *, unsigned long); @@ -550,6 +552,7 @@ void lpfc_sli4_ras_init(struct lpfc_hba *phba); void lpfc_sli4_ras_setup(struct lpfc_hba *phba); int lpfc_sli4_ras_fwlog_init(struct lpfc_hba *phba, uint32_t fwlog_level, uint32_t fwlog_enable); +void lpfc_ras_stop_fwlog(struct lpfc_hba *phba); int lpfc_check_fwlog_support(struct lpfc_hba *phba); /* NVME interfaces. */ diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c index 789ad1502534..552da8bf43e4 100644 --- a/drivers/scsi/lpfc/lpfc_ct.c +++ b/drivers/scsi/lpfc/lpfc_ct.c @@ -540,7 +540,17 @@ lpfc_ns_rsp_audit_did(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type) struct lpfc_hba *phba = vport->phba; struct lpfc_nodelist *ndlp = NULL; struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + char *str; + if (phba->cfg_ns_query == LPFC_NS_QUERY_GID_FT) + str = "GID_FT"; + else + str = "GID_PT"; + lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, + "6430 Process %s rsp for %08x type %x %s %s\n", + str, Did, fc4_type, + (fc4_type == FC_TYPE_FCP) ? "FCP" : " ", + (fc4_type == FC_TYPE_NVME) ? "NVME" : " "); /* * To conserve rpi's, filter out addresses for other * vports on the same physical HBAs. @@ -832,6 +842,198 @@ out: } static void +lpfc_cmpl_ct_cmd_gid_pt(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, + struct lpfc_iocbq *rspiocb) +{ + struct lpfc_vport *vport = cmdiocb->vport; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + IOCB_t *irsp; + struct lpfc_dmabuf *outp; + struct lpfc_dmabuf *inp; + struct lpfc_sli_ct_request *CTrsp; + struct lpfc_sli_ct_request *CTreq; + struct lpfc_nodelist *ndlp; + int rc; + + /* First save ndlp, before we overwrite it */ + ndlp = cmdiocb->context_un.ndlp; + + /* we pass cmdiocb to state machine which needs rspiocb as well */ + cmdiocb->context_un.rsp_iocb = rspiocb; + inp = (struct lpfc_dmabuf *)cmdiocb->context1; + outp = (struct lpfc_dmabuf *)cmdiocb->context2; + irsp = &rspiocb->iocb; + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT, + "GID_PT cmpl: status:x%x/x%x rtry:%d", + irsp->ulpStatus, irsp->un.ulpWord[4], + vport->fc_ns_retry); + + /* Don't bother processing response if vport is being torn down. */ + if (vport->load_flag & FC_UNLOADING) { + if (vport->fc_flag & FC_RSCN_MODE) + lpfc_els_flush_rscn(vport); + goto out; + } + + if (lpfc_els_chk_latt(vport)) { + lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, + "4108 Link event during NS query\n"); + if (vport->fc_flag & FC_RSCN_MODE) + lpfc_els_flush_rscn(vport); + lpfc_vport_set_state(vport, FC_VPORT_FAILED); + goto out; + } + if (lpfc_error_lost_link(irsp)) { + lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, + "4101 NS query failed due to link event\n"); + if (vport->fc_flag & FC_RSCN_MODE) + lpfc_els_flush_rscn(vport); + goto out; + } + + spin_lock_irq(shost->host_lock); + if (vport->fc_flag & FC_RSCN_DEFERRED) { + vport->fc_flag &= ~FC_RSCN_DEFERRED; + spin_unlock_irq(shost->host_lock); + + /* This is a GID_PT completing so the gidft_inp counter was + * incremented before the GID_PT was issued to the wire. + */ + vport->gidft_inp--; + + /* + * Skip processing the NS response + * Re-issue the NS cmd + */ + lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, + "4102 Process Deferred RSCN Data: x%x x%x\n", + vport->fc_flag, vport->fc_rscn_id_cnt); + lpfc_els_handle_rscn(vport); + + goto out; + } + spin_unlock_irq(shost->host_lock); + + if (irsp->ulpStatus) { + /* Check for retry */ + if (vport->fc_ns_retry < LPFC_MAX_NS_RETRY) { + if (irsp->ulpStatus != IOSTAT_LOCAL_REJECT || + (irsp->un.ulpWord[4] & IOERR_PARAM_MASK) != + IOERR_NO_RESOURCES) + vport->fc_ns_retry++; + + /* CT command is being retried */ + vport->gidft_inp--; + rc = lpfc_ns_cmd(vport, SLI_CTNS_GID_PT, + vport->fc_ns_retry, GID_PT_N_PORT); + if (rc == 0) + goto out; + } + if (vport->fc_flag & FC_RSCN_MODE) + lpfc_els_flush_rscn(vport); + lpfc_vport_set_state(vport, FC_VPORT_FAILED); + lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS, + "4103 GID_FT Query error: 0x%x 0x%x\n", + irsp->ulpStatus, vport->fc_ns_retry); + } else { + /* Good status, continue checking */ + CTreq = (struct lpfc_sli_ct_request *)inp->virt; + CTrsp = (struct lpfc_sli_ct_request *)outp->virt; + if (CTrsp->CommandResponse.bits.CmdRsp == + cpu_to_be16(SLI_CT_RESPONSE_FS_ACC)) { + lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, + "4105 NameServer Rsp Data: x%x x%x\n", + vport->fc_flag, + CTreq->un.gid.Fc4Type); + + lpfc_ns_rsp(vport, + outp, + CTreq->un.gid.Fc4Type, + (uint32_t)(irsp->un.genreq64.bdl.bdeSize)); + } else if (CTrsp->CommandResponse.bits.CmdRsp == + be16_to_cpu(SLI_CT_RESPONSE_FS_RJT)) { + /* NameServer Rsp Error */ + if ((CTrsp->ReasonCode == SLI_CT_UNABLE_TO_PERFORM_REQ) + && (CTrsp->Explanation == SLI_CT_NO_FC4_TYPES)) { + lpfc_printf_vlog( + vport, KERN_INFO, LOG_DISCOVERY, + "4106 No NameServer Entries " + "Data: x%x x%x x%x x%x\n", + CTrsp->CommandResponse.bits.CmdRsp, + (uint32_t)CTrsp->ReasonCode, + (uint32_t)CTrsp->Explanation, + vport->fc_flag); + + lpfc_debugfs_disc_trc( + vport, LPFC_DISC_TRC_CT, + "GID_PT no entry cmd:x%x rsn:x%x exp:x%x", + (uint32_t)CTrsp->CommandResponse.bits.CmdRsp, + (uint32_t)CTrsp->ReasonCode, + (uint32_t)CTrsp->Explanation); + } else { + lpfc_printf_vlog( + vport, KERN_INFO, LOG_DISCOVERY, + "4107 NameServer Rsp Error " + "Data: x%x x%x x%x x%x\n", + CTrsp->CommandResponse.bits.CmdRsp, + (uint32_t)CTrsp->ReasonCode, + (uint32_t)CTrsp->Explanation, + vport->fc_flag); + + lpfc_debugfs_disc_trc( + vport, LPFC_DISC_TRC_CT, + "GID_PT rsp err1 cmd:x%x rsn:x%x exp:x%x", + (uint32_t)CTrsp->CommandResponse.bits.CmdRsp, + (uint32_t)CTrsp->ReasonCode, + (uint32_t)CTrsp->Explanation); + } + } else { + /* NameServer Rsp Error */ + lpfc_printf_vlog(vport, KERN_ERR, LOG_DISCOVERY, + "4109 NameServer Rsp Error " + "Data: x%x x%x x%x x%x\n", + CTrsp->CommandResponse.bits.CmdRsp, + (uint32_t)CTrsp->ReasonCode, + (uint32_t)CTrsp->Explanation, + vport->fc_flag); + + lpfc_debugfs_disc_trc( + vport, LPFC_DISC_TRC_CT, + "GID_PT rsp err2 cmd:x%x rsn:x%x exp:x%x", + (uint32_t)CTrsp->CommandResponse.bits.CmdRsp, + (uint32_t)CTrsp->ReasonCode, + (uint32_t)CTrsp->Explanation); + } + vport->gidft_inp--; + } + /* Link up / RSCN discovery */ + if ((vport->num_disc_nodes == 0) && + (vport->gidft_inp == 0)) { + /* + * The driver has cycled through all Nports in the RSCN payload. + * Complete the handling by cleaning up and marking the + * current driver state. + */ + if (vport->port_state >= LPFC_DISC_AUTH) { + if (vport->fc_flag & FC_RSCN_MODE) { + lpfc_els_flush_rscn(vport); + spin_lock_irq(shost->host_lock); + vport->fc_flag |= FC_RSCN_MODE; /* RSCN still */ + spin_unlock_irq(shost->host_lock); + } else { + lpfc_els_flush_rscn(vport); + } + } + + lpfc_disc_start(vport); + } +out: + cmdiocb->context_un.ndlp = ndlp; /* Now restore ndlp for free */ + lpfc_ct_free_iocb(phba, cmdiocb); +} + +static void lpfc_cmpl_ct_cmd_gff_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, struct lpfc_iocbq *rspiocb) { @@ -857,6 +1059,13 @@ lpfc_cmpl_ct_cmd_gff_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, CTrsp = (struct lpfc_sli_ct_request *) outp->virt; fbits = CTrsp->un.gff_acc.fbits[FCP_TYPE_FEATURE_OFFSET]; + lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, + "6431 Process GFF_ID rsp for %08x " + "fbits %02x %s %s\n", + did, fbits, + (fbits & FC4_FEATURE_INIT) ? "Initiator" : " ", + (fbits & FC4_FEATURE_TARGET) ? "Target" : " "); + if (CTrsp->CommandResponse.bits.CmdRsp == be16_to_cpu(SLI_CT_RESPONSE_FS_ACC)) { if ((fbits & FC4_FEATURE_INIT) && @@ -979,9 +1188,15 @@ lpfc_cmpl_ct_cmd_gft_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, CTrsp = (struct lpfc_sli_ct_request *)outp->virt; fc4_data_0 = be32_to_cpu(CTrsp->un.gft_acc.fc4_types[0]); fc4_data_1 = be32_to_cpu(CTrsp->un.gft_acc.fc4_types[1]); + lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, - "3062 DID x%06x GFT Wd0 x%08x Wd1 x%08x\n", - did, fc4_data_0, fc4_data_1); + "6432 Process GFT_ID rsp for %08x " + "Data %08x %08x %s %s\n", + did, fc4_data_0, fc4_data_1, + (fc4_data_0 & LPFC_FC4_TYPE_BITMASK) ? + "FCP" : " ", + (fc4_data_1 & LPFC_FC4_TYPE_BITMASK) ? + "NVME" : " "); ndlp = lpfc_findnode_did(vport, did); if (ndlp) { @@ -1312,6 +1527,7 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode, struct ulp_bde64 *bpl; void (*cmpl) (struct lpfc_hba *, struct lpfc_iocbq *, struct lpfc_iocbq *) = NULL; + uint32_t *ptr; uint32_t rsp_size = 1024; size_t size; int rc = 0; @@ -1365,6 +1581,8 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode, bpl->tus.f.bdeFlags = 0; if (cmdcode == SLI_CTNS_GID_FT) bpl->tus.f.bdeSize = GID_REQUEST_SZ; + else if (cmdcode == SLI_CTNS_GID_PT) + bpl->tus.f.bdeSize = GID_REQUEST_SZ; else if (cmdcode == SLI_CTNS_GFF_ID) bpl->tus.f.bdeSize = GFF_REQUEST_SZ; else if (cmdcode == SLI_CTNS_GFT_ID) @@ -1405,6 +1623,18 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode, rsp_size = FC_MAX_NS_RSP; break; + case SLI_CTNS_GID_PT: + CtReq->CommandResponse.bits.CmdRsp = + cpu_to_be16(SLI_CTNS_GID_PT); + CtReq->un.gid.PortType = context; + + if (vport->port_state < LPFC_NS_QRY) + vport->port_state = LPFC_NS_QRY; + lpfc_set_disctmo(vport); + cmpl = lpfc_cmpl_ct_cmd_gid_pt; + rsp_size = FC_MAX_NS_RSP; + break; + case SLI_CTNS_GFF_ID: CtReq->CommandResponse.bits.CmdRsp = cpu_to_be16(SLI_CTNS_GFF_ID); @@ -1436,8 +1666,18 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode, */ if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) || (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) - CtReq->un.rft.rsvd[0] = cpu_to_be32(0x00000100); + CtReq->un.rft.rsvd[0] = + cpu_to_be32(LPFC_FC4_TYPE_BITMASK); + ptr = (uint32_t *)CtReq; + lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, + "6433 Issue RFT (%s %s): %08x %08x %08x %08x " + "%08x %08x %08x %08x\n", + CtReq->un.rft.fcpReg ? "FCP" : " ", + CtReq->un.rft.rsvd[0] ? "NVME" : " ", + *ptr, *(ptr + 1), *(ptr + 2), *(ptr + 3), + *(ptr + 4), *(ptr + 5), + *(ptr + 6), *(ptr + 7)); cmpl = lpfc_cmpl_ct_cmd_rft_id; break; @@ -1512,6 +1752,14 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode, else goto ns_cmd_free_bmpvirt; + ptr = (uint32_t *)CtReq; + lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, + "6434 Issue RFF (%s): %08x %08x %08x %08x " + "%08x %08x %08x %08x\n", + (context == FC_TYPE_NVME) ? "NVME" : "FCP", + *ptr, *(ptr + 1), *(ptr + 2), *(ptr + 3), + *(ptr + 4), *(ptr + 5), + *(ptr + 6), *(ptr + 7)); cmpl = lpfc_cmpl_ct_cmd_rff_id; break; } @@ -1758,7 +2006,7 @@ lpfc_fdmi_hba_attr_manufacturer(struct lpfc_vport *vport, memset(ae, 0, 256); strncpy(ae->un.AttrString, - "Emulex Corporation", + "Broadcom Inc.", sizeof(ae->un.AttrString)); len = strnlen(ae->un.AttrString, sizeof(ae->un.AttrString)); @@ -2134,6 +2382,8 @@ lpfc_fdmi_port_attr_support_speed(struct lpfc_vport *vport, ae->un.AttrInt = 0; if (!(phba->hba_flag & HBA_FCOE_MODE)) { + if (phba->lmt & LMT_128Gb) + ae->un.AttrInt |= HBA_PORTSPEED_128GFC; if (phba->lmt & LMT_64Gb) ae->un.AttrInt |= HBA_PORTSPEED_64GFC; if (phba->lmt & LMT_32Gb) @@ -2210,6 +2460,9 @@ lpfc_fdmi_port_attr_speed(struct lpfc_vport *vport, case LPFC_LINK_SPEED_64GHZ: ae->un.AttrInt = HBA_PORTSPEED_64GFC; break; + case LPFC_LINK_SPEED_128GHZ: + ae->un.AttrInt = HBA_PORTSPEED_128GFC; + break; default: ae->un.AttrInt = HBA_PORTSPEED_UNKNOWN; break; diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 34d311a7dbef..a58f0b3f03a9 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -645,6 +645,8 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size) i, ndlp->cmd_qdepth); outio += i; } + len += snprintf(buf + len, size - len, "defer:%x ", + ndlp->nlp_defer_did); len += snprintf(buf+len, size-len, "\n"); } spin_unlock_irq(shost->host_lock); diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h index 28e2b60fc5c0..1c89c9f314fa 100644 --- a/drivers/scsi/lpfc/lpfc_disc.h +++ b/drivers/scsi/lpfc/lpfc_disc.h @@ -138,6 +138,7 @@ struct lpfc_nodelist { uint32_t nvme_fb_size; /* NVME target's supported byte cnt */ #define NVME_FB_BIT_SHIFT 9 /* PRLI Rsp first burst in 512B units. */ + uint32_t nlp_defer_did; }; struct lpfc_node_rrq { struct list_head list; @@ -165,6 +166,7 @@ struct lpfc_node_rrq { #define NLP_ELS_SND_MASK 0x000007e0 /* sent ELS request for this entry */ #define NLP_NVMET_RECOV 0x00001000 /* NVMET auditing node for recovery. */ #define NLP_FCP_PRLI_RJT 0x00002000 /* Rport does not support FCP PRLI. */ +#define NLP_UNREG_INP 0x00008000 /* UNREG_RPI cmd is in progress */ #define NLP_DEFER_RM 0x00010000 /* Remove this ndlp if no longer used */ #define NLP_DELAY_TMO 0x00020000 /* delay timeout is running for node */ #define NLP_NPR_2B_DISC 0x00040000 /* node is included in num_disc_nodes */ @@ -293,4 +295,4 @@ struct lpfc_node_rrq { #define NLP_EVT_DEVICE_RM 0xb /* Device not found in NS / ALPAmap */ #define NLP_EVT_DEVICE_RECOVERY 0xc /* Device existence unknown */ #define NLP_EVT_MAX_EVENT 0xd - +#define NLP_EVT_NOTHING_PENDING 0xff diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index f1c1faa74b46..b3a4789468c3 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -242,6 +242,8 @@ lpfc_prep_els_iocb(struct lpfc_vport *vport, uint8_t expectRsp, icmd->ulpCommand = CMD_ELS_REQUEST64_CR; if (elscmd == ELS_CMD_FLOGI) icmd->ulpTimeout = FF_DEF_RATOV * 2; + else if (elscmd == ELS_CMD_LOGO) + icmd->ulpTimeout = phba->fc_ratov; else icmd->ulpTimeout = phba->fc_ratov * 2; } else { @@ -313,20 +315,20 @@ lpfc_prep_els_iocb(struct lpfc_vport *vport, uint8_t expectRsp, /* Xmit ELS command <elsCmd> to remote NPORT <did> */ lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, "0116 Xmit ELS command x%x to remote " - "NPORT x%x I/O tag: x%x, port state:x%x" - " fc_flag:x%x\n", + "NPORT x%x I/O tag: x%x, port state:x%x " + "rpi x%x fc_flag:x%x\n", elscmd, did, elsiocb->iotag, - vport->port_state, + vport->port_state, ndlp->nlp_rpi, vport->fc_flag); } else { /* Xmit ELS response <elsCmd> to remote NPORT <did> */ lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, "0117 Xmit ELS response x%x to remote " "NPORT x%x I/O tag: x%x, size: x%x " - "port_state x%x fc_flag x%x\n", + "port_state x%x rpi x%x fc_flag x%x\n", elscmd, ndlp->nlp_DID, elsiocb->iotag, cmdSize, vport->port_state, - vport->fc_flag); + ndlp->nlp_rpi, vport->fc_flag); } return elsiocb; @@ -413,7 +415,7 @@ lpfc_issue_fabric_reglogin(struct lpfc_vport *vport) /* increment the reference count on ndlp to hold reference * for the callback routine. */ - mbox->context2 = lpfc_nlp_get(ndlp); + mbox->ctx_ndlp = lpfc_nlp_get(ndlp); rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT); if (rc == MBX_NOT_FINISHED) { @@ -428,7 +430,7 @@ fail_issue_reg_login: * for the failed mbox command. */ lpfc_nlp_put(ndlp); - mp = (struct lpfc_dmabuf *) mbox->context1; + mp = (struct lpfc_dmabuf *)mbox->ctx_buf; lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); fail_free_mbox: @@ -502,7 +504,7 @@ lpfc_issue_reg_vfi(struct lpfc_vport *vport) mboxq->mbox_cmpl = lpfc_mbx_cmpl_reg_vfi; mboxq->vport = vport; - mboxq->context1 = dmabuf; + mboxq->ctx_buf = dmabuf; rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_NOWAIT); if (rc == MBX_NOT_FINISHED) { rc = -ENXIO; @@ -1055,9 +1057,9 @@ stop_rr_fcf_flogi: goto flogifail; lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS, - "0150 FLOGI failure Status:x%x/x%x TMO:x%x\n", + "0150 FLOGI failure Status:x%x/x%x xri x%x TMO:x%x\n", irsp->ulpStatus, irsp->un.ulpWord[4], - irsp->ulpTimeout); + cmdiocb->sli4_xritag, irsp->ulpTimeout); /* FLOGI failed, so there is no fabric */ spin_lock_irq(shost->host_lock); @@ -1111,7 +1113,8 @@ stop_rr_fcf_flogi: /* FLOGI completes successfully */ lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, "0101 FLOGI completes successfully, I/O tag:x%x, " - "Data: x%x x%x x%x x%x x%x x%x\n", cmdiocb->iotag, + "xri x%x Data: x%x x%x x%x x%x x%x %x\n", + cmdiocb->iotag, cmdiocb->sli4_xritag, irsp->un.ulpWord[4], sp->cmn.e_d_tov, sp->cmn.w2.r_a_tov, sp->cmn.edtovResolution, vport->port_state, vport->fc_flag); @@ -1155,6 +1158,7 @@ stop_rr_fcf_flogi: phba->fcf.fcf_flag &= ~FCF_DISCOVERY; phba->hba_flag &= ~(FCF_RR_INPROG | HBA_DEVLOSS_TMO); spin_unlock_irq(&phba->hbalock); + phba->fcf.fcf_redisc_attempted = 0; /* reset */ goto out; } if (!rc) { @@ -1169,6 +1173,7 @@ stop_rr_fcf_flogi: phba->fcf.fcf_flag &= ~FCF_DISCOVERY; phba->hba_flag &= ~(FCF_RR_INPROG | HBA_DEVLOSS_TMO); spin_unlock_irq(&phba->hbalock); + phba->fcf.fcf_redisc_attempted = 0; /* reset */ goto out; } } @@ -1229,9 +1234,10 @@ lpfc_issue_els_flogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, struct serv_parm *sp; IOCB_t *icmd; struct lpfc_iocbq *elsiocb; + struct lpfc_iocbq defer_flogi_acc; uint8_t *pcmd; uint16_t cmdsize; - uint32_t tmo; + uint32_t tmo, did; int rc; cmdsize = (sizeof(uint32_t) + sizeof(struct serv_parm)); @@ -1303,6 +1309,35 @@ lpfc_issue_els_flogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, phba->sli3_options, 0, 0); rc = lpfc_issue_fabric_iocb(phba, elsiocb); + + phba->hba_flag |= HBA_FLOGI_ISSUED; + + /* Check for a deferred FLOGI ACC condition */ + if (phba->defer_flogi_acc_flag) { + did = vport->fc_myDID; + vport->fc_myDID = Fabric_DID; + + memset(&defer_flogi_acc, 0, sizeof(struct lpfc_iocbq)); + + defer_flogi_acc.iocb.ulpContext = phba->defer_flogi_acc_rx_id; + defer_flogi_acc.iocb.unsli3.rcvsli3.ox_id = + phba->defer_flogi_acc_ox_id; + + lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, + "3354 Xmit deferred FLOGI ACC: rx_id: x%x," + " ox_id: x%x, hba_flag x%x\n", + phba->defer_flogi_acc_rx_id, + phba->defer_flogi_acc_ox_id, phba->hba_flag); + + /* Send deferred FLOGI ACC */ + lpfc_els_rsp_acc(vport, ELS_CMD_FLOGI, &defer_flogi_acc, + ndlp, NULL); + + phba->defer_flogi_acc_flag = false; + + vport->fc_myDID = did; + } + if (rc == IOCB_ERROR) { lpfc_els_free_iocb(phba, elsiocb); return 1; @@ -1338,6 +1373,8 @@ lpfc_els_abort_flogi(struct lpfc_hba *phba) Fabric_DID); pring = lpfc_phba_elsring(phba); + if (unlikely(!pring)) + return -EIO; /* * Check the txcmplq for an iocb that matches the nport the driver is @@ -1531,7 +1568,9 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp, struct serv_parm *sp; uint8_t name[sizeof(struct lpfc_name)]; uint32_t rc, keepDID = 0, keep_nlp_flag = 0; + uint32_t keep_new_nlp_flag = 0; uint16_t keep_nlp_state; + u32 keep_nlp_fc4_type = 0; struct lpfc_nvme_rport *keep_nrport = NULL; int put_node; int put_rport; @@ -1551,8 +1590,10 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp, */ new_ndlp = lpfc_findnode_wwpn(vport, &sp->portName); + /* return immediately if the WWPN matches ndlp */ if (new_ndlp == ndlp && NLP_CHK_NODE_ACT(new_ndlp)) return ndlp; + if (phba->sli_rev == LPFC_SLI_REV4) { active_rrqs_xri_bitmap = mempool_alloc(phba->active_rrq_pool, GFP_KERNEL); @@ -1561,9 +1602,13 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp, phba->cfg_rrq_xri_bitmap_sz); } - lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, - "3178 PLOGI confirm: ndlp %p x%x: new_ndlp %p\n", - ndlp, ndlp->nlp_DID, new_ndlp); + lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS | LOG_NODE, + "3178 PLOGI confirm: ndlp x%x x%x x%x: " + "new_ndlp x%x x%x x%x\n", + ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_fc4_type, + (new_ndlp ? new_ndlp->nlp_DID : 0), + (new_ndlp ? new_ndlp->nlp_flag : 0), + (new_ndlp ? new_ndlp->nlp_fc4_type : 0)); if (!new_ndlp) { rc = memcmp(&ndlp->nlp_portname, name, @@ -1612,6 +1657,16 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp, phba->cfg_rrq_xri_bitmap_sz); } + /* At this point in this routine, we know new_ndlp will be + * returned. however, any previous GID_FTs that were done + * would have updated nlp_fc4_type in ndlp, so we must ensure + * new_ndlp has the right value. + */ + if (vport->fc_flag & FC_FABRIC) { + keep_nlp_fc4_type = new_ndlp->nlp_fc4_type; + new_ndlp->nlp_fc4_type = ndlp->nlp_fc4_type; + } + lpfc_unreg_rpi(vport, new_ndlp); new_ndlp->nlp_DID = ndlp->nlp_DID; new_ndlp->nlp_prev_state = ndlp->nlp_prev_state; @@ -1621,9 +1676,36 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp, phba->cfg_rrq_xri_bitmap_sz); spin_lock_irq(shost->host_lock); - keep_nlp_flag = new_ndlp->nlp_flag; + keep_new_nlp_flag = new_ndlp->nlp_flag; + keep_nlp_flag = ndlp->nlp_flag; new_ndlp->nlp_flag = ndlp->nlp_flag; - ndlp->nlp_flag = keep_nlp_flag; + + /* if new_ndlp had NLP_UNREG_INP set, keep it */ + if (keep_new_nlp_flag & NLP_UNREG_INP) + new_ndlp->nlp_flag |= NLP_UNREG_INP; + else + new_ndlp->nlp_flag &= ~NLP_UNREG_INP; + + /* if new_ndlp had NLP_RPI_REGISTERED set, keep it */ + if (keep_new_nlp_flag & NLP_RPI_REGISTERED) + new_ndlp->nlp_flag |= NLP_RPI_REGISTERED; + else + new_ndlp->nlp_flag &= ~NLP_RPI_REGISTERED; + + ndlp->nlp_flag = keep_new_nlp_flag; + + /* if ndlp had NLP_UNREG_INP set, keep it */ + if (keep_nlp_flag & NLP_UNREG_INP) + ndlp->nlp_flag |= NLP_UNREG_INP; + else + ndlp->nlp_flag &= ~NLP_UNREG_INP; + + /* if ndlp had NLP_RPI_REGISTERED set, keep it */ + if (keep_nlp_flag & NLP_RPI_REGISTERED) + ndlp->nlp_flag |= NLP_RPI_REGISTERED; + else + ndlp->nlp_flag &= ~NLP_RPI_REGISTERED; + spin_unlock_irq(shost->host_lock); /* Set nlp_states accordingly */ @@ -1661,7 +1743,6 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp, if (ndlp->nrport) { ndlp->nrport = NULL; lpfc_nlp_put(ndlp); - new_ndlp->nlp_fc4_type = ndlp->nlp_fc4_type; } /* We shall actually free the ndlp with both nlp_DID and @@ -1674,7 +1755,10 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp, spin_unlock_irq(&phba->ndlp_lock); } - /* Two ndlps cannot have the same did on the nodelist */ + /* Two ndlps cannot have the same did on the nodelist. + * Note: for this case, ndlp has a NULL WWPN so setting + * the nlp_fc4_type isn't required. + */ ndlp->nlp_DID = keepDID; lpfc_nlp_set_state(vport, ndlp, keep_nlp_state); if (phba->sli_rev == LPFC_SLI_REV4 && @@ -1693,8 +1777,13 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp, lpfc_unreg_rpi(vport, ndlp); - /* Two ndlps cannot have the same did */ + /* Two ndlps cannot have the same did and the fc4 + * type must be transferred because the ndlp is in + * flight. + */ ndlp->nlp_DID = keepDID; + ndlp->nlp_fc4_type = keep_nlp_fc4_type; + if (phba->sli_rev == LPFC_SLI_REV4 && active_rrqs_xri_bitmap) memcpy(ndlp->active_rrqs_xri_bitmap, @@ -1735,6 +1824,12 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp, active_rrqs_xri_bitmap) mempool_free(active_rrqs_xri_bitmap, phba->active_rrq_pool); + + lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS | LOG_NODE, + "3173 PLOGI confirm exit: new_ndlp x%x x%x x%x\n", + new_ndlp->nlp_DID, new_ndlp->nlp_flag, + new_ndlp->nlp_fc4_type); + return new_ndlp; } @@ -1895,7 +1990,7 @@ lpfc_cmpl_els_plogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, disc = (ndlp->nlp_flag & NLP_NPR_2B_DISC); ndlp->nlp_flag &= ~NLP_NPR_2B_DISC; spin_unlock_irq(shost->host_lock); - rc = 0; + rc = 0; /* PLOGI completes to NPort <nlp_DID> */ lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, @@ -2002,8 +2097,29 @@ lpfc_issue_els_plogi(struct lpfc_vport *vport, uint32_t did, uint8_t retry) int ret; ndlp = lpfc_findnode_did(vport, did); - if (ndlp && !NLP_CHK_NODE_ACT(ndlp)) - ndlp = NULL; + + if (ndlp) { + /* Defer the processing of the issue PLOGI until after the + * outstanding UNREG_RPI mbox command completes, unless we + * are going offline. This logic does not apply for Fabric DIDs + */ + if ((ndlp->nlp_flag & NLP_UNREG_INP) && + ((ndlp->nlp_DID & Fabric_DID_MASK) != Fabric_DID_MASK) && + !(vport->fc_flag & FC_OFFLINE_MODE)) { + lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, + "4110 Issue PLOGI x%x deferred " + "on NPort x%x rpi x%x Data: %p\n", + ndlp->nlp_defer_did, ndlp->nlp_DID, + ndlp->nlp_rpi, ndlp); + + /* We can only defer 1st PLOGI */ + if (ndlp->nlp_defer_did == NLP_EVT_NOTHING_PENDING) + ndlp->nlp_defer_did = did; + return 0; + } + if (!NLP_CHK_NODE_ACT(ndlp)) + ndlp = NULL; + } /* If ndlp is not NULL, we will bump the reference count on it */ cmdsize = (sizeof(uint32_t) + sizeof(struct serv_parm)); @@ -2137,7 +2253,7 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, else lpfc_disc_state_machine(vport, ndlp, cmdiocb, NLP_EVT_CMPL_PRLI); - } else + } else { /* Good status, call state machine. However, if another * PRLI is outstanding, don't call the state machine * because final disposition to Mapped or Unmapped is @@ -2145,6 +2261,7 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, */ lpfc_disc_state_machine(vport, ndlp, cmdiocb, NLP_EVT_CMPL_PRLI); + } out: lpfc_els_free_iocb(phba, cmdiocb); @@ -2203,7 +2320,7 @@ lpfc_issue_els_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ndlp->nlp_type &= ~(NLP_FCP_TARGET | NLP_FCP_INITIATOR); ndlp->nlp_type &= ~(NLP_NVME_TARGET | NLP_NVME_INITIATOR); ndlp->nlp_fcp_info &= ~NLP_FCP_2_DEVICE; - ndlp->nlp_flag &= ~NLP_FIRSTBURST; + ndlp->nlp_flag &= ~(NLP_FIRSTBURST | NLP_NPR_2B_DISC); ndlp->nvme_fb_size = 0; send_next_prli: @@ -2682,16 +2799,15 @@ lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, goto out; } + /* The LOGO will not be retried on failure. A LOGO was + * issued to the remote rport and a ACC or RJT or no Answer are + * all acceptable. Note the failure and move forward with + * discovery. The PLOGI will retry. + */ if (irsp->ulpStatus) { - /* Check for retry */ - if (lpfc_els_retry(phba, cmdiocb, rspiocb)) { - /* ELS command is being retried */ - skip_recovery = 1; - goto out; - } /* LOGO failed */ lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS, - "2756 LOGO failure DID:%06X Status:x%x/x%x\n", + "2756 LOGO failure, No Retry DID:%06X Status:x%x/x%x\n", ndlp->nlp_DID, irsp->ulpStatus, irsp->un.ulpWord[4]); /* Do not call DSM for lpfc_els_abort'ed ELS cmds */ @@ -2737,7 +2853,8 @@ out: * For any other port type, the rpi is unregistered as an implicit * LOGO. */ - if ((ndlp->nlp_type & NLP_FCP_TARGET) && (skip_recovery == 0)) { + if (ndlp->nlp_type & (NLP_FCP_TARGET | NLP_NVME_TARGET) && + skip_recovery == 0) { lpfc_cancel_retry_delay_tmo(vport, ndlp); spin_lock_irqsave(shost->host_lock, flags); ndlp->nlp_flag |= NLP_NPR_2B_DISC; @@ -2770,6 +2887,8 @@ out: * will be stored into the context1 field of the IOCB for the completion * callback function to the LOGO ELS command. * + * Callers of this routine are expected to unregister the RPI first + * * Return code * 0 - successfully issued logo * 1 - failed to issue logo @@ -2811,22 +2930,6 @@ lpfc_issue_els_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, "Issue LOGO: did:x%x", ndlp->nlp_DID, 0, 0); - /* - * If we are issuing a LOGO, we may try to recover the remote NPort - * by issuing a PLOGI later. Even though we issue ELS cmds by the - * VPI, if we have a valid RPI, and that RPI gets unreg'ed while - * that ELS command is in-flight, the HBA returns a IOERR_INVALID_RPI - * for that ELS cmd. To avoid this situation, lets get rid of the - * RPI right now, before any ELS cmds are sent. - */ - spin_lock_irq(shost->host_lock); - ndlp->nlp_flag |= NLP_ISSUE_LOGO; - spin_unlock_irq(shost->host_lock); - if (lpfc_unreg_rpi(vport, ndlp)) { - lpfc_els_free_iocb(phba, elsiocb); - return 0; - } - phba->fc_stat.elsXmitLOGO++; elsiocb->iocb_cmpl = lpfc_cmpl_els_logo; spin_lock_irq(shost->host_lock); @@ -2834,7 +2937,6 @@ lpfc_issue_els_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ndlp->nlp_flag &= ~NLP_ISSUE_LOGO; spin_unlock_irq(shost->host_lock); rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0); - if (rc == IOCB_ERROR) { spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~NLP_LOGO_SND; @@ -2842,6 +2944,11 @@ lpfc_issue_els_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, lpfc_els_free_iocb(phba, elsiocb); return 1; } + + spin_lock_irq(shost->host_lock); + ndlp->nlp_prev_state = ndlp->nlp_state; + spin_unlock_irq(shost->host_lock); + lpfc_nlp_set_state(vport, ndlp, NLP_STE_LOGO_ISSUE); return 0; } @@ -3250,6 +3357,62 @@ lpfc_els_retry_delay_handler(struct lpfc_nodelist *ndlp) } /** + * lpfc_link_reset - Issue link reset + * @vport: pointer to a virtual N_Port data structure. + * + * This routine performs link reset by sending INIT_LINK mailbox command. + * For SLI-3 adapter, link attention interrupt is enabled before issuing + * INIT_LINK mailbox command. + * + * Return code + * 0 - Link reset initiated successfully + * 1 - Failed to initiate link reset + **/ +int +lpfc_link_reset(struct lpfc_vport *vport) +{ + struct lpfc_hba *phba = vport->phba; + LPFC_MBOXQ_t *mbox; + uint32_t control; + int rc; + + lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS, + "2851 Attempt link reset\n"); + mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (!mbox) { + lpfc_printf_log(phba, KERN_ERR, LOG_MBOX, + "2852 Failed to allocate mbox memory"); + return 1; + } + + /* Enable Link attention interrupts */ + if (phba->sli_rev <= LPFC_SLI_REV3) { + spin_lock_irq(&phba->hbalock); + phba->sli.sli_flag |= LPFC_PROCESS_LA; + control = readl(phba->HCregaddr); + control |= HC_LAINT_ENA; + writel(control, phba->HCregaddr); + readl(phba->HCregaddr); /* flush */ + spin_unlock_irq(&phba->hbalock); + } + + lpfc_init_link(phba, mbox, phba->cfg_topology, + phba->cfg_link_speed); + mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; + mbox->vport = vport; + rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT); + if ((rc != MBX_BUSY) && (rc != MBX_SUCCESS)) { + lpfc_printf_log(phba, KERN_ERR, LOG_MBOX, + "2853 Failed to issue INIT_LINK " + "mbox command, rc:x%x\n", rc); + mempool_free(mbox, phba->mbox_mem_pool); + return 1; + } + + return 0; +} + +/** * lpfc_els_retry - Make retry decision on an els command iocb * @phba: pointer to lpfc hba data structure. * @cmdiocb: pointer to lpfc command iocb data structure. @@ -3285,6 +3448,7 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, int logerr = 0; uint32_t cmd = 0; uint32_t did; + int link_reset = 0, rc; /* Note: context2 may be 0 for internal driver abort @@ -3366,7 +3530,6 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, retry = 1; break; - case IOERR_SEQUENCE_TIMEOUT: case IOERR_INVALID_RPI: if (cmd == ELS_CMD_PLOGI && did == NameServer_DID) { @@ -3377,6 +3540,18 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, } retry = 1; break; + + case IOERR_SEQUENCE_TIMEOUT: + if (cmd == ELS_CMD_PLOGI && + did == NameServer_DID && + (cmdiocb->retry + 1) == maxretry) { + /* Reset the Link */ + link_reset = 1; + break; + } + retry = 1; + delay = 100; + break; } break; @@ -3533,6 +3708,19 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, break; } + if (link_reset) { + rc = lpfc_link_reset(vport); + if (rc) { + /* Do not give up. Retry PLOGI one more time and attempt + * link reset if PLOGI fails again. + */ + retry = 1; + delay = 100; + goto out_retry; + } + return 1; + } + if (did == FDMI_DID) retry = 1; @@ -3895,11 +4083,11 @@ lpfc_cmpl_els_logo_acc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, void lpfc_mbx_cmpl_dflt_rpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { - struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1); - struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2; + struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(pmb->ctx_buf); + struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; - pmb->context1 = NULL; - pmb->context2 = NULL; + pmb->ctx_buf = NULL; + pmb->ctx_ndlp = NULL; lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); @@ -3975,7 +4163,7 @@ lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, /* Check to see if link went down during discovery */ if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) || lpfc_els_chk_latt(vport)) { if (mbox) { - mp = (struct lpfc_dmabuf *) mbox->context1; + mp = (struct lpfc_dmabuf *)mbox->ctx_buf; if (mp) { lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); @@ -4019,7 +4207,7 @@ lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, "Data: x%x x%x x%x\n", ndlp->nlp_DID, ndlp->nlp_state, ndlp->nlp_rpi, ndlp->nlp_flag); - mp = mbox->context1; + mp = mbox->ctx_buf; if (mp) { lpfc_mbuf_free(phba, mp->virt, mp->phys); @@ -4032,7 +4220,7 @@ lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, /* Increment reference count to ndlp to hold the * reference to ndlp for the callback function. */ - mbox->context2 = lpfc_nlp_get(ndlp); + mbox->ctx_ndlp = lpfc_nlp_get(ndlp); mbox->vport = vport; if (ndlp->nlp_flag & NLP_RM_DFLT_RPI) { mbox->mbox_flag |= LPFC_MBX_IMED_UNREG; @@ -4086,7 +4274,7 @@ lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, } } } - mp = (struct lpfc_dmabuf *) mbox->context1; + mp = (struct lpfc_dmabuf *)mbox->ctx_buf; if (mp) { lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); @@ -4272,14 +4460,6 @@ lpfc_els_rsp_acc(struct lpfc_vport *vport, uint32_t flag, default: return 1; } - /* Xmit ELS ACC response tag <ulpIoTag> */ - lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, - "0128 Xmit ELS ACC response tag x%x, XRI: x%x, " - "DID: x%x, nlp_flag: x%x nlp_state: x%x RPI: x%x " - "fc_flag x%x\n", - elsiocb->iotag, elsiocb->iocb.ulpContext, - ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state, - ndlp->nlp_rpi, vport->fc_flag); if (ndlp->nlp_flag & NLP_LOGO_ACC) { spin_lock_irq(shost->host_lock); if (!(ndlp->nlp_flag & NLP_RPI_REGISTERED || @@ -4448,6 +4628,15 @@ lpfc_els_rsp_adisc_acc(struct lpfc_vport *vport, struct lpfc_iocbq *oldiocb, lpfc_els_free_iocb(phba, elsiocb); return 1; } + + /* Xmit ELS ACC response tag <ulpIoTag> */ + lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, + "0128 Xmit ELS ACC response Status: x%x, IoTag: x%x, " + "XRI: x%x, DID: x%x, nlp_flag: x%x nlp_state: x%x " + "RPI: x%x, fc_flag x%x\n", + rc, elsiocb->iotag, elsiocb->sli4_xritag, + ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state, + ndlp->nlp_rpi, vport->fc_flag); return 0; } @@ -5281,6 +5470,8 @@ lpfc_rdp_res_speed(struct fc_rdp_port_speed_desc *desc, struct lpfc_hba *phba) desc->info.port_speed.speed = cpu_to_be16(rdp_speed); + if (phba->lmt & LMT_128Gb) + rdp_cap |= RDP_PS_128GB; if (phba->lmt & LMT_64Gb) rdp_cap |= RDP_PS_64GB; if (phba->lmt & LMT_32Gb) @@ -5499,7 +5690,7 @@ lpfc_get_rdp_info(struct lpfc_hba *phba, struct lpfc_rdp_context *rdp_context) goto prep_mbox_fail; mbox->vport = rdp_context->ndlp->vport; mbox->mbox_cmpl = lpfc_mbx_cmpl_rdp_page_a0; - mbox->context2 = (struct lpfc_rdp_context *) rdp_context; + mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context; rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT); if (rc == MBX_NOT_FINISHED) goto issue_mbox_fail; @@ -5542,7 +5733,7 @@ lpfc_els_rcv_rdp(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, struct ls_rjt stat; if (phba->sli_rev < LPFC_SLI_REV4 || - bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) != + bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) < LPFC_SLI_INTF_IF_TYPE_2) { rjt_err = LSRJT_UNABLE_TPC; rjt_expl = LSEXP_REQ_UNSUPPORTED; @@ -5624,10 +5815,10 @@ lpfc_els_lcb_rsp(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) int rc; mb = &pmb->u.mb; - lcb_context = (struct lpfc_lcb_context *)pmb->context1; + lcb_context = (struct lpfc_lcb_context *)pmb->ctx_ndlp; ndlp = lcb_context->ndlp; - pmb->context1 = NULL; - pmb->context2 = NULL; + pmb->ctx_ndlp = NULL; + pmb->ctx_buf = NULL; shdr = (union lpfc_sli4_cfg_shdr *) &pmb->u.mqe.un.beacon_config.header.cfg_shdr; @@ -5701,6 +5892,9 @@ error: stat = (struct ls_rjt *)(pcmd + sizeof(uint32_t)); stat->un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; + if (shdr_add_status == ADD_STATUS_OPERATION_ALREADY_ACTIVE) + stat->un.b.lsRjtRsnCodeExp = LSEXP_CMD_IN_PROGRESS; + elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; phba->fc_stat.elsXmitLSRJT++; rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0); @@ -5731,7 +5925,7 @@ lpfc_sli4_set_beacon(struct lpfc_vport *vport, lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_COMMON, LPFC_MBOX_OPCODE_SET_BEACON_CONFIG, len, LPFC_SLI4_MBX_EMBED); - mbox->context1 = (void *)lcb_context; + mbox->ctx_ndlp = (void *)lcb_context; mbox->vport = phba->pport; mbox->mbox_cmpl = lpfc_els_lcb_rsp; bf_set(lpfc_mbx_set_beacon_port_num, &mbox->u.mqe.un.beacon_config, @@ -6011,6 +6205,19 @@ lpfc_rscn_recovery_check(struct lpfc_vport *vport) if (vport->phba->nvmet_support) continue; + /* If we are in the process of doing discovery on this + * NPort, let it continue on its own. + */ + switch (ndlp->nlp_state) { + case NLP_STE_PLOGI_ISSUE: + case NLP_STE_ADISC_ISSUE: + case NLP_STE_REG_LOGIN_ISSUE: + case NLP_STE_PRLI_ISSUE: + case NLP_STE_LOGO_ISSUE: + continue; + } + + lpfc_disc_state_machine(vport, ndlp, NULL, NLP_EVT_DEVICE_RECOVERY); lpfc_cancel_retry_delay_tmo(vport, ndlp); @@ -6272,6 +6479,7 @@ int lpfc_els_handle_rscn(struct lpfc_vport *vport) { struct lpfc_nodelist *ndlp; + struct lpfc_hba *phba = vport->phba; /* Ignore RSCN if the port is being torn down. */ if (vport->load_flag & FC_UNLOADING) { @@ -6300,8 +6508,15 @@ lpfc_els_handle_rscn(struct lpfc_vport *vport) * flush the RSCN. Otherwise, the outstanding requests * need to complete. */ - if (lpfc_issue_gidft(vport) > 0) + if (phba->cfg_ns_query == LPFC_NS_QUERY_GID_FT) { + if (lpfc_issue_gidft(vport) > 0) + return 1; + } else if (phba->cfg_ns_query == LPFC_NS_QUERY_GID_PT) { + if (lpfc_issue_gidpt(vport) > 0) + return 1; + } else { return 1; + } } else { /* Nameserver login in question. Revalidate. */ if (ndlp) { @@ -6455,6 +6670,11 @@ lpfc_els_rcv_flogi(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, port_state = vport->port_state; vport->fc_flag |= FC_PT2PT; vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP); + + /* Acking an unsol FLOGI. Count 1 for link bounce + * work-around. + */ + vport->rcv_flogi_cnt++; spin_unlock_irq(shost->host_lock); lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, "3311 Rcv Flogi PS x%x new PS x%x " @@ -6472,6 +6692,25 @@ lpfc_els_rcv_flogi(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, memcpy(&phba->fc_fabparam, sp, sizeof(struct serv_parm)); + /* Defer ACC response until AFTER we issue a FLOGI */ + if (!(phba->hba_flag & HBA_FLOGI_ISSUED)) { + phba->defer_flogi_acc_rx_id = cmdiocb->iocb.ulpContext; + phba->defer_flogi_acc_ox_id = + cmdiocb->iocb.unsli3.rcvsli3.ox_id; + + vport->fc_myDID = did; + + lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, + "3344 Deferring FLOGI ACC: rx_id: x%x," + " ox_id: x%x, hba_flag x%x\n", + phba->defer_flogi_acc_rx_id, + phba->defer_flogi_acc_ox_id, phba->hba_flag); + + phba->defer_flogi_acc_flag = true; + + return 0; + } + /* Send back ACC */ lpfc_els_rsp_acc(vport, ELS_CMD_FLOGI, cmdiocb, ndlp, NULL); @@ -6644,11 +6883,11 @@ lpfc_els_rsp_rls_acc(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) mb = &pmb->u.mb; - ndlp = (struct lpfc_nodelist *) pmb->context2; - rxid = (uint16_t) ((unsigned long)(pmb->context1) & 0xffff); - oxid = (uint16_t) (((unsigned long)(pmb->context1) >> 16) & 0xffff); - pmb->context1 = NULL; - pmb->context2 = NULL; + ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; + rxid = (uint16_t)((unsigned long)(pmb->ctx_buf) & 0xffff); + oxid = (uint16_t)(((unsigned long)(pmb->ctx_buf) >> 16) & 0xffff); + pmb->ctx_buf = NULL; + pmb->ctx_ndlp = NULL; if (mb->mbxStatus) { mempool_free(pmb, phba->mbox_mem_pool); @@ -6732,11 +6971,11 @@ lpfc_els_rsp_rps_acc(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) mb = &pmb->u.mb; - ndlp = (struct lpfc_nodelist *) pmb->context2; - rxid = (uint16_t) ((unsigned long)(pmb->context1) & 0xffff); - oxid = (uint16_t) (((unsigned long)(pmb->context1) >> 16) & 0xffff); - pmb->context1 = NULL; - pmb->context2 = NULL; + ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; + rxid = (uint16_t)((unsigned long)(pmb->ctx_buf) & 0xffff); + oxid = (uint16_t)(((unsigned long)(pmb->ctx_buf) >> 16) & 0xffff); + pmb->ctx_ndlp = NULL; + pmb->ctx_buf = NULL; if (mb->mbxStatus) { mempool_free(pmb, phba->mbox_mem_pool); @@ -6827,10 +7066,10 @@ lpfc_els_rcv_rls(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, mbox = mempool_alloc(phba->mbox_mem_pool, GFP_ATOMIC); if (mbox) { lpfc_read_lnk_stat(phba, mbox); - mbox->context1 = (void *)((unsigned long) + mbox->ctx_buf = (void *)((unsigned long) ((cmdiocb->iocb.unsli3.rcvsli3.ox_id << 16) | cmdiocb->iocb.ulpContext)); /* rx_id */ - mbox->context2 = lpfc_nlp_get(ndlp); + mbox->ctx_ndlp = lpfc_nlp_get(ndlp); mbox->vport = vport; mbox->mbox_cmpl = lpfc_els_rsp_rls_acc; if (lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT) @@ -6990,10 +7229,10 @@ lpfc_els_rcv_rps(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, mbox = mempool_alloc(phba->mbox_mem_pool, GFP_ATOMIC); if (mbox) { lpfc_read_lnk_stat(phba, mbox); - mbox->context1 = (void *)((unsigned long) + mbox->ctx_buf = (void *)((unsigned long) ((cmdiocb->iocb.unsli3.rcvsli3.ox_id << 16) | cmdiocb->iocb.ulpContext)); /* rx_id */ - mbox->context2 = lpfc_nlp_get(ndlp); + mbox->ctx_ndlp = lpfc_nlp_get(ndlp); mbox->vport = vport; mbox->mbox_cmpl = lpfc_els_rsp_rps_acc; if (lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT) @@ -7852,8 +8091,9 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, struct ls_rjt stat; uint32_t *payload; uint32_t cmd, did, newnode; - uint8_t rjt_exp, rjt_err = 0; + uint8_t rjt_exp, rjt_err = 0, init_link = 0; IOCB_t *icmd = &elsiocb->iocb; + LPFC_MBOXQ_t *mbox; if (!vport || !(elsiocb->context2)) goto dropit; @@ -7940,9 +8180,10 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, cmd, did, vport->port_state, vport->fc_flag, vport->fc_myDID, vport->fc_prevDID); - /* reject till our FLOGI completes */ + /* reject till our FLOGI completes or PLOGI assigned DID via PT2PT */ if ((vport->port_state < LPFC_FABRIC_CFG_LINK) && - (cmd != ELS_CMD_FLOGI)) { + (cmd != ELS_CMD_FLOGI) && + !((cmd == ELS_CMD_PLOGI) && (vport->fc_flag & FC_PT2PT))) { rjt_err = LSRJT_LOGICAL_BSY; rjt_exp = LSEXP_NOTHING_MORE; goto lsrjt; @@ -8002,6 +8243,19 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, did, vport->port_state, ndlp->nlp_flag); phba->fc_stat.elsRcvFLOGI++; + + /* If the driver believes fabric discovery is done and is ready, + * bounce the link. There is some descrepancy. + */ + if (vport->port_state >= LPFC_LOCAL_CFG_LINK && + vport->fc_flag & FC_PT2PT && + vport->rcv_flogi_cnt >= 1) { + rjt_err = LSRJT_LOGICAL_BSY; + rjt_exp = LSEXP_NOTHING_MORE; + init_link++; + goto lsrjt; + } + lpfc_els_rcv_flogi(vport, elsiocb, ndlp); if (newnode) lpfc_nlp_put(ndlp); @@ -8230,6 +8484,27 @@ lsrjt: lpfc_nlp_put(elsiocb->context1); elsiocb->context1 = NULL; + + /* Special case. Driver received an unsolicited command that + * unsupportable given the driver's current state. Reset the + * link and start over. + */ + if (init_link) { + mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (!mbox) + return; + lpfc_linkdown(phba); + lpfc_init_link(phba, mbox, + phba->cfg_topology, + phba->cfg_link_speed); + mbox->u.mb.un.varInitLnk.lipsr_AL_PA = 0; + mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; + mbox->vport = vport; + if (lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT) == + MBX_NOT_FINISHED) + mempool_free(mbox, phba->mbox_mem_pool); + } + return; dropit: @@ -8453,7 +8728,7 @@ lpfc_cmpl_reg_new_vport(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { struct lpfc_vport *vport = pmb->vport; struct Scsi_Host *shost = lpfc_shost_from_vport(vport); - struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2; + struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; MAILBOX_t *mb = &pmb->u.mb; int rc; @@ -8571,7 +8846,7 @@ lpfc_register_new_vport(struct lpfc_hba *phba, struct lpfc_vport *vport, if (mbox) { lpfc_reg_vpi(vport, mbox); mbox->vport = vport; - mbox->context2 = lpfc_nlp_get(ndlp); + mbox->ctx_ndlp = lpfc_nlp_get(ndlp); mbox->mbox_cmpl = lpfc_cmpl_reg_new_vport; if (lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT) == MBX_NOT_FINISHED) { @@ -9505,7 +9780,8 @@ lpfc_sli_abts_recover_port(struct lpfc_vport *vport, "rport in state 0x%x\n", ndlp->nlp_state); return; } - lpfc_printf_log(phba, KERN_INFO, LOG_SLI, + lpfc_printf_log(phba, KERN_ERR, + LOG_ELS | LOG_FCP_ERROR | LOG_NVME_IOERR, "3094 Start rport recovery on shost id 0x%x " "fc_id 0x%06x vpi 0x%x rpi 0x%x state 0x%x " "flags 0x%x\n", @@ -9518,8 +9794,8 @@ lpfc_sli_abts_recover_port(struct lpfc_vport *vport, */ spin_lock_irqsave(shost->host_lock, flags); ndlp->nlp_fcp_info &= ~NLP_FCP_2_DEVICE; + ndlp->nlp_flag |= NLP_ISSUE_LOGO; spin_unlock_irqrestore(shost->host_lock, flags); - lpfc_issue_els_logo(vport, ndlp, 0); - lpfc_nlp_set_state(vport, ndlp, NLP_STE_LOGO_ISSUE); + lpfc_unreg_rpi(vport, ndlp); } diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index f4deb862efc6..b183b882d506 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -888,17 +888,31 @@ lpfc_linkdown(struct lpfc_hba *phba) LPFC_MBOXQ_t *mb; int i; - if (phba->link_state == LPFC_LINK_DOWN) + if (phba->link_state == LPFC_LINK_DOWN) { + if (phba->sli4_hba.conf_trunk) { + phba->trunk_link.link0.state = 0; + phba->trunk_link.link1.state = 0; + phba->trunk_link.link2.state = 0; + phba->trunk_link.link3.state = 0; + } return 0; - + } /* Block all SCSI stack I/Os */ lpfc_scsi_dev_block(phba); + phba->defer_flogi_acc_flag = false; + spin_lock_irq(&phba->hbalock); phba->fcf.fcf_flag &= ~(FCF_AVAILABLE | FCF_SCAN_DONE); spin_unlock_irq(&phba->hbalock); if (phba->link_state > LPFC_LINK_DOWN) { phba->link_state = LPFC_LINK_DOWN; + if (phba->sli4_hba.conf_trunk) { + phba->trunk_link.link0.state = 0; + phba->trunk_link.link1.state = 0; + phba->trunk_link.link2.state = 0; + phba->trunk_link.link3.state = 0; + } spin_lock_irq(shost->host_lock); phba->pport->fc_flag &= ~FC_LBIT; spin_unlock_irq(shost->host_lock); @@ -947,6 +961,7 @@ lpfc_linkdown(struct lpfc_hba *phba) } spin_lock_irq(shost->host_lock); phba->pport->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI); + phba->pport->rcv_flogi_cnt = 0; spin_unlock_irq(shost->host_lock); } return 0; @@ -1018,6 +1033,7 @@ lpfc_linkup(struct lpfc_hba *phba) { struct lpfc_vport **vports; int i; + struct Scsi_Host *shost = lpfc_shost_from_vport(phba->pport); phba->link_state = LPFC_LINK_UP; @@ -1031,6 +1047,18 @@ lpfc_linkup(struct lpfc_hba *phba) lpfc_linkup_port(vports[i]); lpfc_destroy_vport_work_array(phba, vports); + /* Clear the pport flogi counter in case the link down was + * absorbed without an ACQE. No lock here - in worker thread + * and discovery is synchronized. + */ + spin_lock_irq(shost->host_lock); + phba->pport->rcv_flogi_cnt = 0; + spin_unlock_irq(shost->host_lock); + + /* reinitialize initial FLOGI flag */ + phba->hba_flag &= ~(HBA_FLOGI_ISSUED); + phba->defer_flogi_acc_flag = false; + return 0; } @@ -1992,6 +2020,26 @@ int lpfc_sli4_fcf_rr_next_proc(struct lpfc_vport *vport, uint16_t fcf_index) "failover and change port state:x%x/x%x\n", phba->pport->port_state, LPFC_VPORT_UNKNOWN); phba->pport->port_state = LPFC_VPORT_UNKNOWN; + + if (!phba->fcf.fcf_redisc_attempted) { + lpfc_unregister_fcf(phba); + + rc = lpfc_sli4_redisc_fcf_table(phba); + if (!rc) { + lpfc_printf_log(phba, KERN_INFO, LOG_FIP, + "3195 Rediscover FCF table\n"); + phba->fcf.fcf_redisc_attempted = 1; + lpfc_sli4_clear_fcf_rr_bmask(phba); + } else { + lpfc_printf_log(phba, KERN_WARNING, LOG_FIP, + "3196 Rediscover FCF table " + "failed. Status:x%x\n", rc); + } + } else { + lpfc_printf_log(phba, KERN_WARNING, LOG_FIP, + "3197 Already rediscover FCF table " + "attempted. No more retry\n"); + } goto stop_flogi_current_fcf; } else { lpfc_printf_log(phba, KERN_INFO, LOG_FIP | LOG_ELS, @@ -2915,7 +2963,7 @@ lpfc_start_fdiscs(struct lpfc_hba *phba) void lpfc_mbx_cmpl_reg_vfi(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq) { - struct lpfc_dmabuf *dmabuf = mboxq->context1; + struct lpfc_dmabuf *dmabuf = mboxq->ctx_buf; struct lpfc_vport *vport = mboxq->vport; struct Scsi_Host *shost = lpfc_shost_from_vport(vport); @@ -3008,7 +3056,7 @@ static void lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { MAILBOX_t *mb = &pmb->u.mb; - struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) pmb->context1; + struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)pmb->ctx_buf; struct lpfc_vport *vport = pmb->vport; struct Scsi_Host *shost = lpfc_shost_from_vport(vport); struct serv_parm *sp = &vport->fc_sparam; @@ -3052,7 +3100,7 @@ lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) return; out: - pmb->context1 = NULL; + pmb->ctx_buf = NULL; lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); lpfc_issue_clear_la(phba, vport); @@ -3085,6 +3133,7 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la) case LPFC_LINK_SPEED_16GHZ: case LPFC_LINK_SPEED_32GHZ: case LPFC_LINK_SPEED_64GHZ: + case LPFC_LINK_SPEED_128GHZ: break; default: phba->fc_linkspeed = LPFC_LINK_SPEED_UNKNOWN; @@ -3190,7 +3239,7 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la) sparam_mbox->mbox_cmpl = lpfc_mbx_cmpl_read_sparam; rc = lpfc_sli_issue_mbox(phba, sparam_mbox, MBX_NOWAIT); if (rc == MBX_NOT_FINISHED) { - mp = (struct lpfc_dmabuf *) sparam_mbox->context1; + mp = (struct lpfc_dmabuf *)sparam_mbox->ctx_buf; lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); mempool_free(sparam_mbox, phba->mbox_mem_pool); @@ -3319,7 +3368,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) struct lpfc_mbx_read_top *la; struct lpfc_sli_ring *pring; MAILBOX_t *mb = &pmb->u.mb; - struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1); + struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(pmb->ctx_buf); uint8_t attn_type; /* Unblock ELS traffic */ @@ -3476,12 +3525,12 @@ void lpfc_mbx_cmpl_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { struct lpfc_vport *vport = pmb->vport; - struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1); - struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2; + struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(pmb->ctx_buf); + struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; struct Scsi_Host *shost = lpfc_shost_from_vport(vport); - pmb->context1 = NULL; - pmb->context2 = NULL; + pmb->ctx_buf = NULL; + pmb->ctx_ndlp = NULL; lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI, "0002 rpi:%x DID:%x flg:%x %d map:%x %p\n", @@ -3689,8 +3738,8 @@ lpfc_create_static_vport(struct lpfc_hba *phba) vport_buff = (uint8_t *) vport_info; do { /* free dma buffer from previous round */ - if (pmb->context1) { - mp = (struct lpfc_dmabuf *)pmb->context1; + if (pmb->ctx_buf) { + mp = (struct lpfc_dmabuf *)pmb->ctx_buf; lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); } @@ -3712,7 +3761,7 @@ lpfc_create_static_vport(struct lpfc_hba *phba) if (phba->sli_rev == LPFC_SLI_REV4) { byte_count = pmb->u.mqe.un.mb_words[5]; - mp = (struct lpfc_dmabuf *)pmb->context1; + mp = (struct lpfc_dmabuf *)pmb->ctx_buf; if (byte_count > sizeof(struct static_vport_info) - offset) byte_count = sizeof(struct static_vport_info) @@ -3777,8 +3826,8 @@ lpfc_create_static_vport(struct lpfc_hba *phba) out: kfree(vport_info); if (mbx_wait_rc != MBX_TIMEOUT) { - if (pmb->context1) { - mp = (struct lpfc_dmabuf *)pmb->context1; + if (pmb->ctx_buf) { + mp = (struct lpfc_dmabuf *)pmb->ctx_buf; lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); } @@ -3799,13 +3848,13 @@ lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { struct lpfc_vport *vport = pmb->vport; MAILBOX_t *mb = &pmb->u.mb; - struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1); + struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(pmb->ctx_buf); struct lpfc_nodelist *ndlp; struct Scsi_Host *shost; - ndlp = (struct lpfc_nodelist *) pmb->context2; - pmb->context1 = NULL; - pmb->context2 = NULL; + ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; + pmb->ctx_ndlp = NULL; + pmb->ctx_buf = NULL; if (mb->mbxStatus) { lpfc_printf_vlog(vport, KERN_ERR, LOG_MBOX, @@ -3913,6 +3962,35 @@ lpfc_issue_gidft(struct lpfc_vport *vport) return vport->gidft_inp; } +/** + * lpfc_issue_gidpt - issue a GID_PT for all N_Ports + * @vport: The virtual port for which this call is being executed. + * + * This routine will issue a GID_PT to get a list of all N_Ports + * + * Return value : + * 0 - Failure to issue a GID_PT + * 1 - GID_PT issued + **/ +int +lpfc_issue_gidpt(struct lpfc_vport *vport) +{ + /* Good status, issue CT Request to NameServer */ + if (lpfc_ns_cmd(vport, SLI_CTNS_GID_PT, 0, GID_PT_N_PORT)) { + /* Cannot issue NameServer FCP Query, so finish up + * discovery + */ + lpfc_printf_vlog(vport, KERN_ERR, LOG_SLI, + "0606 %s Port TYPE %x %s\n", + "Failed to issue GID_PT to ", + GID_PT_N_PORT, + "Finishing discovery."); + return 0; + } + vport->gidft_inp++; + return 1; +} + /* * This routine handles processing a NameServer REG_LOGIN mailbox * command upon completion. It is setup in the LPFC_MBOXQ @@ -3923,12 +4001,12 @@ void lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { MAILBOX_t *mb = &pmb->u.mb; - struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1); - struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2; + struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(pmb->ctx_buf); + struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; struct lpfc_vport *vport = pmb->vport; - pmb->context1 = NULL; - pmb->context2 = NULL; + pmb->ctx_buf = NULL; + pmb->ctx_ndlp = NULL; vport->gidft_inp = 0; if (mb->mbxStatus) { @@ -4385,6 +4463,7 @@ lpfc_initialize_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, NLP_INT_NODE_ACT(ndlp); atomic_set(&ndlp->cmd_pending, 0); ndlp->cmd_qdepth = vport->cfg_tgt_queue_depth; + ndlp->nlp_defer_did = NLP_EVT_NOTHING_PENDING; } struct lpfc_nodelist * @@ -4392,10 +4471,11 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, int state) { struct lpfc_hba *phba = vport->phba; - uint32_t did; + uint32_t did, flag; unsigned long flags; unsigned long *active_rrqs_xri_bitmap = NULL; int rpi = LPFC_RPI_ALLOC_ERROR; + uint32_t defer_did = 0; if (!ndlp) return NULL; @@ -4428,16 +4508,23 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, goto free_rpi; } - /* Keep the original DID */ + /* First preserve the orginal DID, xri_bitmap and some flags */ did = ndlp->nlp_DID; + flag = (ndlp->nlp_flag & NLP_UNREG_INP); + if (flag & NLP_UNREG_INP) + defer_did = ndlp->nlp_defer_did; if (phba->sli_rev == LPFC_SLI_REV4) active_rrqs_xri_bitmap = ndlp->active_rrqs_xri_bitmap; - /* re-initialize ndlp except of ndlp linked list pointer */ + /* Zero ndlp except of ndlp linked list pointer */ memset((((char *)ndlp) + sizeof (struct list_head)), 0, sizeof (struct lpfc_nodelist) - sizeof (struct list_head)); - lpfc_initialize_node(vport, ndlp, did); + /* Next reinitialize and restore saved objects */ + lpfc_initialize_node(vport, ndlp, did); + ndlp->nlp_flag |= flag; + if (flag & NLP_UNREG_INP) + ndlp->nlp_defer_did = defer_did; if (phba->sli_rev == LPFC_SLI_REV4) ndlp->active_rrqs_xri_bitmap = active_rrqs_xri_bitmap; @@ -4697,11 +4784,27 @@ lpfc_nlp_logo_unreg(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) struct lpfc_vport *vport = pmb->vport; struct lpfc_nodelist *ndlp; - ndlp = (struct lpfc_nodelist *)(pmb->context1); + ndlp = (struct lpfc_nodelist *)(pmb->ctx_ndlp); if (!ndlp) return; lpfc_issue_els_logo(vport, ndlp, 0); mempool_free(pmb, phba->mbox_mem_pool); + + /* Check to see if there are any deferred events to process */ + if ((ndlp->nlp_flag & NLP_UNREG_INP) && + (ndlp->nlp_defer_did != NLP_EVT_NOTHING_PENDING)) { + lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, + "1434 UNREG cmpl deferred logo x%x " + "on NPort x%x Data: x%x %p\n", + ndlp->nlp_rpi, ndlp->nlp_DID, + ndlp->nlp_defer_did, ndlp); + + ndlp->nlp_flag &= ~NLP_UNREG_INP; + ndlp->nlp_defer_did = NLP_EVT_NOTHING_PENDING; + lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0); + } else { + ndlp->nlp_flag &= ~NLP_UNREG_INP; + } } /* @@ -4730,6 +4833,21 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) "did x%x\n", ndlp->nlp_rpi, ndlp->nlp_flag, ndlp->nlp_DID); + + /* If there is already an UNREG in progress for this ndlp, + * no need to queue up another one. + */ + if (ndlp->nlp_flag & NLP_UNREG_INP) { + lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, + "1436 unreg_rpi SKIP UNREG x%x on " + "NPort x%x deferred x%x flg x%x " + "Data: %p\n", + ndlp->nlp_rpi, ndlp->nlp_DID, + ndlp->nlp_defer_did, + ndlp->nlp_flag, ndlp); + goto out; + } + mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); if (mbox) { /* SLI4 ports require the physical rpi value. */ @@ -4740,26 +4858,38 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) lpfc_unreg_login(phba, vport->vpi, rpi, mbox); mbox->vport = vport; if (ndlp->nlp_flag & NLP_ISSUE_LOGO) { - mbox->context1 = ndlp; + mbox->ctx_ndlp = ndlp; mbox->mbox_cmpl = lpfc_nlp_logo_unreg; } else { if (phba->sli_rev == LPFC_SLI_REV4 && (!(vport->load_flag & FC_UNLOADING)) && (bf_get(lpfc_sli_intf_if_type, - &phba->sli4_hba.sli_intf) == + &phba->sli4_hba.sli_intf) >= LPFC_SLI_INTF_IF_TYPE_2) && (kref_read(&ndlp->kref) > 0)) { - mbox->context1 = lpfc_nlp_get(ndlp); + mbox->ctx_ndlp = lpfc_nlp_get(ndlp); mbox->mbox_cmpl = lpfc_sli4_unreg_rpi_cmpl_clr; /* * accept PLOGIs after unreg_rpi_cmpl */ acc_plogi = 0; - } else + } else { + mbox->ctx_ndlp = ndlp; mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; + } } + if (((ndlp->nlp_DID & Fabric_DID_MASK) != + Fabric_DID_MASK) && + (!(vport->fc_flag & FC_OFFLINE_MODE))) + ndlp->nlp_flag |= NLP_UNREG_INP; + + lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, + "1433 unreg_rpi UNREG x%x on " + "NPort x%x deferred flg x%x Data:%p\n", + ndlp->nlp_rpi, ndlp->nlp_DID, + ndlp->nlp_flag, ndlp); rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT); if (rc == MBX_NOT_FINISHED) { @@ -4768,7 +4898,7 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) } } lpfc_no_rpi(phba, ndlp); - +out: if (phba->sli_rev != LPFC_SLI_REV4) ndlp->nlp_rpi = 0; ndlp->nlp_flag &= ~NLP_RPI_REGISTERED; @@ -4836,7 +4966,7 @@ lpfc_unreg_all_rpis(struct lpfc_vport *vport) mbox); mbox->vport = vport; mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; - mbox->context1 = NULL; + mbox->ctx_ndlp = NULL; rc = lpfc_sli_issue_mbox_wait(phba, mbox, LPFC_MBOX_TMO); if (rc != MBX_TIMEOUT) mempool_free(mbox, phba->mbox_mem_pool); @@ -4861,7 +4991,7 @@ lpfc_unreg_default_rpis(struct lpfc_vport *vport) mbox); mbox->vport = vport; mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; - mbox->context1 = NULL; + mbox->ctx_ndlp = NULL; rc = lpfc_sli_issue_mbox_wait(phba, mbox, LPFC_MBOX_TMO); if (rc != MBX_TIMEOUT) mempool_free(mbox, phba->mbox_mem_pool); @@ -4915,8 +5045,8 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) if ((mb = phba->sli.mbox_active)) { if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) && !(mb->mbox_flag & LPFC_MBX_IMED_UNREG) && - (ndlp == (struct lpfc_nodelist *) mb->context2)) { - mb->context2 = NULL; + (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) { + mb->ctx_ndlp = NULL; mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl; } } @@ -4926,18 +5056,18 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) list_for_each_entry(mb, &phba->sli.mboxq_cmpl, list) { if ((mb->u.mb.mbxCommand != MBX_REG_LOGIN64) || (mb->mbox_flag & LPFC_MBX_IMED_UNREG) || - (ndlp != (struct lpfc_nodelist *) mb->context2)) + (ndlp != (struct lpfc_nodelist *)mb->ctx_ndlp)) continue; - mb->context2 = NULL; + mb->ctx_ndlp = NULL; mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl; } list_for_each_entry_safe(mb, nextmb, &phba->sli.mboxq, list) { if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) && !(mb->mbox_flag & LPFC_MBX_IMED_UNREG) && - (ndlp == (struct lpfc_nodelist *) mb->context2)) { - mp = (struct lpfc_dmabuf *) (mb->context1); + (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) { + mp = (struct lpfc_dmabuf *)(mb->ctx_buf); if (mp) { __lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); @@ -5007,7 +5137,7 @@ lpfc_nlp_remove(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) mbox->mbox_flag |= LPFC_MBX_IMED_UNREG; mbox->mbox_cmpl = lpfc_mbx_cmpl_dflt_rpi; mbox->vport = vport; - mbox->context2 = ndlp; + mbox->ctx_ndlp = ndlp; rc =lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT); if (rc == MBX_NOT_FINISHED) { mempool_free(mbox, phba->mbox_mem_pool); @@ -5772,12 +5902,12 @@ void lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { MAILBOX_t *mb = &pmb->u.mb; - struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1); - struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2; + struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(pmb->ctx_buf); + struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; struct lpfc_vport *vport = pmb->vport; - pmb->context1 = NULL; - pmb->context2 = NULL; + pmb->ctx_buf = NULL; + pmb->ctx_ndlp = NULL; if (phba->sli_rev < LPFC_SLI_REV4) ndlp->nlp_rpi = mb->un.varWords[0]; diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h index 009aa0eee040..ec1227018913 100644 --- a/drivers/scsi/lpfc/lpfc_hw.h +++ b/drivers/scsi/lpfc/lpfc_hw.h @@ -115,6 +115,7 @@ struct lpfc_sli_ct_request { uint32_t PortID; struct gid { uint8_t PortType; /* for GID_PT requests */ +#define GID_PT_N_PORT 1 uint8_t DomainScope; uint8_t AreaScope; uint8_t Fc4Type; /* for GID_FT requests */ diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index bbd0a57e953f..c15b9b6fb840 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -197,6 +197,10 @@ struct lpfc_sli_intf { #define LPFC_FCP_SCHED_ROUND_ROBIN 0 #define LPFC_FCP_SCHED_BY_CPU 1 +/* Algrithmns for NameServer Query after RSCN */ +#define LPFC_NS_QUERY_GID_FT 0 +#define LPFC_NS_QUERY_GID_PT 1 + /* Delay Multiplier constant */ #define LPFC_DMULT_CONST 651042 #define LPFC_DMULT_MAX 1023 @@ -1031,6 +1035,7 @@ struct mbox_header { #define LPFC_MBOX_OPCODE_FCOE_SET_FCLINK_SETTINGS 0x21 #define LPFC_MBOX_OPCODE_FCOE_LINK_DIAG_STATE 0x22 #define LPFC_MBOX_OPCODE_FCOE_LINK_DIAG_LOOPBACK 0x23 +#define LPFC_MBOX_OPCODE_FCOE_FC_SET_TRUNK_MODE 0x42 /* Low level Opcodes */ #define LPFC_MBOX_OPCODE_SET_DIAG_LOG_OPTION 0x37 @@ -2777,6 +2782,9 @@ struct lpfc_mbx_read_config { #define lpfc_mbx_rd_conf_lnk_ldv_SHIFT 8 #define lpfc_mbx_rd_conf_lnk_ldv_MASK 0x00000001 #define lpfc_mbx_rd_conf_lnk_ldv_WORD word2 +#define lpfc_mbx_rd_conf_trunk_SHIFT 12 +#define lpfc_mbx_rd_conf_trunk_MASK 0x0000000F +#define lpfc_mbx_rd_conf_trunk_WORD word2 #define lpfc_mbx_rd_conf_topology_SHIFT 24 #define lpfc_mbx_rd_conf_topology_MASK 0x000000FF #define lpfc_mbx_rd_conf_topology_WORD word2 @@ -3512,6 +3520,15 @@ struct lpfc_mbx_set_host_data { uint8_t data[LPFC_HOST_OS_DRIVER_VERSION_SIZE]; }; +struct lpfc_mbx_set_trunk_mode { + struct mbox_header header; + uint32_t word0; +#define lpfc_mbx_set_trunk_mode_WORD word0 +#define lpfc_mbx_set_trunk_mode_SHIFT 0 +#define lpfc_mbx_set_trunk_mode_MASK 0xFF + uint32_t word1; + uint32_t word2; +}; struct lpfc_mbx_get_sli4_parameters { struct mbox_header header; @@ -3833,6 +3850,9 @@ struct lpfc_mbx_wr_object { #define lpfc_wr_object_eof_SHIFT 31 #define lpfc_wr_object_eof_MASK 0x00000001 #define lpfc_wr_object_eof_WORD word4 +#define lpfc_wr_object_eas_SHIFT 29 +#define lpfc_wr_object_eas_MASK 0x00000001 +#define lpfc_wr_object_eas_WORD word4 #define lpfc_wr_object_write_length_SHIFT 0 #define lpfc_wr_object_write_length_MASK 0x00FFFFFF #define lpfc_wr_object_write_length_WORD word4 @@ -3843,6 +3863,15 @@ struct lpfc_mbx_wr_object { } request; struct { uint32_t actual_write_length; + uint32_t word5; +#define lpfc_wr_object_change_status_SHIFT 0 +#define lpfc_wr_object_change_status_MASK 0x000000FF +#define lpfc_wr_object_change_status_WORD word5 +#define LPFC_CHANGE_STATUS_NO_RESET_NEEDED 0x00 +#define LPFC_CHANGE_STATUS_PHYS_DEV_RESET 0x01 +#define LPFC_CHANGE_STATUS_FW_RESET 0x02 +#define LPFC_CHANGE_STATUS_PORT_MIGRATION 0x04 +#define LPFC_CHANGE_STATUS_PCI_RESET 0x05 } response; } u; }; @@ -3911,6 +3940,7 @@ struct lpfc_mqe { struct lpfc_mbx_set_feature set_feature; struct lpfc_mbx_memory_dump_type3 mem_dump_type3; struct lpfc_mbx_set_host_data set_host_data; + struct lpfc_mbx_set_trunk_mode set_trunk_mode; struct lpfc_mbx_nop nop; struct lpfc_mbx_set_ras_fwlog ras_fwlog; } un; @@ -4047,6 +4077,23 @@ struct lpfc_acqe_grp5 { uint32_t trailer; }; +static char *const trunk_errmsg[] = { /* map errcode */ + "", /* There is no such error code at index 0*/ + "link negotiated speed does not match existing" + " trunk - link was \"low\" speed", + "link negotiated speed does not match" + " existing trunk - link was \"middle\" speed", + "link negotiated speed does not match existing" + " trunk - link was \"high\" speed", + "Attached to non-trunking port - F_Port", + "Attached to non-trunking port - N_Port", + "FLOGI response timeout", + "non-FLOGI frame received", + "Invalid FLOGI response", + "Trunking initialization protocol", + "Trunk peer device mismatch", +}; + struct lpfc_acqe_fc_la { uint32_t word0; #define lpfc_acqe_fc_la_speed_SHIFT 24 @@ -4080,6 +4127,7 @@ struct lpfc_acqe_fc_la { #define LPFC_FC_LA_TYPE_MDS_LINK_DOWN 0x4 #define LPFC_FC_LA_TYPE_MDS_LOOPBACK 0x5 #define LPFC_FC_LA_TYPE_UNEXP_WWPN 0x6 +#define LPFC_FC_LA_TYPE_TRUNKING_EVENT 0x7 #define lpfc_acqe_fc_la_port_type_SHIFT 6 #define lpfc_acqe_fc_la_port_type_MASK 0x00000003 #define lpfc_acqe_fc_la_port_type_WORD word0 @@ -4088,6 +4136,32 @@ struct lpfc_acqe_fc_la { #define lpfc_acqe_fc_la_port_number_SHIFT 0 #define lpfc_acqe_fc_la_port_number_MASK 0x0000003F #define lpfc_acqe_fc_la_port_number_WORD word0 + +/* Attention Type is 0x07 (Trunking Event) word0 */ +#define lpfc_acqe_fc_la_trunk_link_status_port0_SHIFT 16 +#define lpfc_acqe_fc_la_trunk_link_status_port0_MASK 0x0000001 +#define lpfc_acqe_fc_la_trunk_link_status_port0_WORD word0 +#define lpfc_acqe_fc_la_trunk_link_status_port1_SHIFT 17 +#define lpfc_acqe_fc_la_trunk_link_status_port1_MASK 0x0000001 +#define lpfc_acqe_fc_la_trunk_link_status_port1_WORD word0 +#define lpfc_acqe_fc_la_trunk_link_status_port2_SHIFT 18 +#define lpfc_acqe_fc_la_trunk_link_status_port2_MASK 0x0000001 +#define lpfc_acqe_fc_la_trunk_link_status_port2_WORD word0 +#define lpfc_acqe_fc_la_trunk_link_status_port3_SHIFT 19 +#define lpfc_acqe_fc_la_trunk_link_status_port3_MASK 0x0000001 +#define lpfc_acqe_fc_la_trunk_link_status_port3_WORD word0 +#define lpfc_acqe_fc_la_trunk_config_port0_SHIFT 20 +#define lpfc_acqe_fc_la_trunk_config_port0_MASK 0x0000001 +#define lpfc_acqe_fc_la_trunk_config_port0_WORD word0 +#define lpfc_acqe_fc_la_trunk_config_port1_SHIFT 21 +#define lpfc_acqe_fc_la_trunk_config_port1_MASK 0x0000001 +#define lpfc_acqe_fc_la_trunk_config_port1_WORD word0 +#define lpfc_acqe_fc_la_trunk_config_port2_SHIFT 22 +#define lpfc_acqe_fc_la_trunk_config_port2_MASK 0x0000001 +#define lpfc_acqe_fc_la_trunk_config_port2_WORD word0 +#define lpfc_acqe_fc_la_trunk_config_port3_SHIFT 23 +#define lpfc_acqe_fc_la_trunk_config_port3_MASK 0x0000001 +#define lpfc_acqe_fc_la_trunk_config_port3_WORD word0 uint32_t word1; #define lpfc_acqe_fc_la_llink_spd_SHIFT 16 #define lpfc_acqe_fc_la_llink_spd_MASK 0x0000FFFF @@ -4095,6 +4169,12 @@ struct lpfc_acqe_fc_la { #define lpfc_acqe_fc_la_fault_SHIFT 0 #define lpfc_acqe_fc_la_fault_MASK 0x000000FF #define lpfc_acqe_fc_la_fault_WORD word1 +#define lpfc_acqe_fc_la_trunk_fault_SHIFT 0 +#define lpfc_acqe_fc_la_trunk_fault_MASK 0x0000000F +#define lpfc_acqe_fc_la_trunk_fault_WORD word1 +#define lpfc_acqe_fc_la_trunk_linkmask_SHIFT 4 +#define lpfc_acqe_fc_la_trunk_linkmask_MASK 0x000000F +#define lpfc_acqe_fc_la_trunk_linkmask_WORD word1 #define LPFC_FC_LA_FAULT_NONE 0x0 #define LPFC_FC_LA_FAULT_LOCAL 0x1 #define LPFC_FC_LA_FAULT_REMOTE 0x2 diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 68d62d55a3a5..c1c36812c3d2 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -447,19 +447,19 @@ lpfc_config_port_post(struct lpfc_hba *phba) "READ_SPARM mbxStatus x%x\n", mb->mbxCommand, mb->mbxStatus); phba->link_state = LPFC_HBA_ERROR; - mp = (struct lpfc_dmabuf *) pmb->context1; + mp = (struct lpfc_dmabuf *)pmb->ctx_buf; mempool_free(pmb, phba->mbox_mem_pool); lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); return -EIO; } - mp = (struct lpfc_dmabuf *) pmb->context1; + mp = (struct lpfc_dmabuf *)pmb->ctx_buf; memcpy(&vport->fc_sparam, mp->virt, sizeof (struct serv_parm)); lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); - pmb->context1 = NULL; + pmb->ctx_buf = NULL; lpfc_update_vport_wwn(vport); /* Update the fc_host data structures with new wwn. */ @@ -1801,7 +1801,12 @@ lpfc_sli4_port_sta_fn_reset(struct lpfc_hba *phba, int mbx_action, lpfc_offline(phba); /* release interrupt for possible resource change */ lpfc_sli4_disable_intr(phba); - lpfc_sli_brdrestart(phba); + rc = lpfc_sli_brdrestart(phba); + if (rc) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "6309 Failed to restart board\n"); + return rc; + } /* request and enable interrupt */ intr_mode = lpfc_sli4_enable_intr(phba, phba->intr_mode); if (intr_mode == LPFC_INTR_ERROR) { @@ -4106,6 +4111,32 @@ finished: return stat; } +void lpfc_host_supported_speeds_set(struct Scsi_Host *shost) +{ + struct lpfc_vport *vport = (struct lpfc_vport *)shost->hostdata; + struct lpfc_hba *phba = vport->phba; + + fc_host_supported_speeds(shost) = 0; + if (phba->lmt & LMT_128Gb) + fc_host_supported_speeds(shost) |= FC_PORTSPEED_128GBIT; + if (phba->lmt & LMT_64Gb) + fc_host_supported_speeds(shost) |= FC_PORTSPEED_64GBIT; + if (phba->lmt & LMT_32Gb) + fc_host_supported_speeds(shost) |= FC_PORTSPEED_32GBIT; + if (phba->lmt & LMT_16Gb) + fc_host_supported_speeds(shost) |= FC_PORTSPEED_16GBIT; + if (phba->lmt & LMT_10Gb) + fc_host_supported_speeds(shost) |= FC_PORTSPEED_10GBIT; + if (phba->lmt & LMT_8Gb) + fc_host_supported_speeds(shost) |= FC_PORTSPEED_8GBIT; + if (phba->lmt & LMT_4Gb) + fc_host_supported_speeds(shost) |= FC_PORTSPEED_4GBIT; + if (phba->lmt & LMT_2Gb) + fc_host_supported_speeds(shost) |= FC_PORTSPEED_2GBIT; + if (phba->lmt & LMT_1Gb) + fc_host_supported_speeds(shost) |= FC_PORTSPEED_1GBIT; +} + /** * lpfc_host_attrib_init - Initialize SCSI host attributes on a FC port * @shost: pointer to SCSI host data structure. @@ -4133,23 +4164,7 @@ void lpfc_host_attrib_init(struct Scsi_Host *shost) lpfc_vport_symbolic_node_name(vport, fc_host_symbolic_name(shost), sizeof fc_host_symbolic_name(shost)); - fc_host_supported_speeds(shost) = 0; - if (phba->lmt & LMT_64Gb) - fc_host_supported_speeds(shost) |= FC_PORTSPEED_64GBIT; - if (phba->lmt & LMT_32Gb) - fc_host_supported_speeds(shost) |= FC_PORTSPEED_32GBIT; - if (phba->lmt & LMT_16Gb) - fc_host_supported_speeds(shost) |= FC_PORTSPEED_16GBIT; - if (phba->lmt & LMT_10Gb) - fc_host_supported_speeds(shost) |= FC_PORTSPEED_10GBIT; - if (phba->lmt & LMT_8Gb) - fc_host_supported_speeds(shost) |= FC_PORTSPEED_8GBIT; - if (phba->lmt & LMT_4Gb) - fc_host_supported_speeds(shost) |= FC_PORTSPEED_4GBIT; - if (phba->lmt & LMT_2Gb) - fc_host_supported_speeds(shost) |= FC_PORTSPEED_2GBIT; - if (phba->lmt & LMT_1Gb) - fc_host_supported_speeds(shost) |= FC_PORTSPEED_1GBIT; + lpfc_host_supported_speeds_set(shost); fc_host_maxframe_size(shost) = (((uint32_t) vport->fc_sparam.cmn.bbRcvSizeMsb & 0x0F) << 8) | @@ -4467,6 +4482,9 @@ lpfc_sli4_port_speed_parse(struct lpfc_hba *phba, uint32_t evt_code, case LPFC_FC_LA_SPEED_64G: port_speed = 64000; break; + case LPFC_FC_LA_SPEED_128G: + port_speed = 128000; + break; default: port_speed = 0; } @@ -4609,6 +4627,136 @@ out_free_pmb: } /** + * lpfc_async_link_speed_to_read_top - Parse async evt link speed code to read + * topology. + * @phba: pointer to lpfc hba data structure. + * @evt_code: asynchronous event code. + * @speed_code: asynchronous event link speed code. + * + * This routine is to parse the giving SLI4 async event link speed code into + * value of Read topology link speed. + * + * Return: link speed in terms of Read topology. + **/ +static uint8_t +lpfc_async_link_speed_to_read_top(struct lpfc_hba *phba, uint8_t speed_code) +{ + uint8_t port_speed; + + switch (speed_code) { + case LPFC_FC_LA_SPEED_1G: + port_speed = LPFC_LINK_SPEED_1GHZ; + break; + case LPFC_FC_LA_SPEED_2G: + port_speed = LPFC_LINK_SPEED_2GHZ; + break; + case LPFC_FC_LA_SPEED_4G: + port_speed = LPFC_LINK_SPEED_4GHZ; + break; + case LPFC_FC_LA_SPEED_8G: + port_speed = LPFC_LINK_SPEED_8GHZ; + break; + case LPFC_FC_LA_SPEED_16G: + port_speed = LPFC_LINK_SPEED_16GHZ; + break; + case LPFC_FC_LA_SPEED_32G: + port_speed = LPFC_LINK_SPEED_32GHZ; + break; + case LPFC_FC_LA_SPEED_64G: + port_speed = LPFC_LINK_SPEED_64GHZ; + break; + case LPFC_FC_LA_SPEED_128G: + port_speed = LPFC_LINK_SPEED_128GHZ; + break; + case LPFC_FC_LA_SPEED_256G: + port_speed = LPFC_LINK_SPEED_256GHZ; + break; + default: + port_speed = 0; + break; + } + + return port_speed; +} + +#define trunk_link_status(__idx)\ + bf_get(lpfc_acqe_fc_la_trunk_config_port##__idx, acqe_fc) ?\ + ((phba->trunk_link.link##__idx.state == LPFC_LINK_UP) ?\ + "Link up" : "Link down") : "NA" +/* Did port __idx reported an error */ +#define trunk_port_fault(__idx)\ + bf_get(lpfc_acqe_fc_la_trunk_config_port##__idx, acqe_fc) ?\ + (port_fault & (1 << __idx) ? "YES" : "NO") : "NA" + +static void +lpfc_update_trunk_link_status(struct lpfc_hba *phba, + struct lpfc_acqe_fc_la *acqe_fc) +{ + uint8_t port_fault = bf_get(lpfc_acqe_fc_la_trunk_linkmask, acqe_fc); + uint8_t err = bf_get(lpfc_acqe_fc_la_trunk_fault, acqe_fc); + + phba->sli4_hba.link_state.speed = + lpfc_sli4_port_speed_parse(phba, LPFC_TRAILER_CODE_FC, + bf_get(lpfc_acqe_fc_la_speed, acqe_fc)); + + phba->sli4_hba.link_state.logical_speed = + bf_get(lpfc_acqe_fc_la_llink_spd, acqe_fc); + /* We got FC link speed, convert to fc_linkspeed (READ_TOPOLOGY) */ + phba->fc_linkspeed = + lpfc_async_link_speed_to_read_top( + phba, + bf_get(lpfc_acqe_fc_la_speed, acqe_fc)); + + if (bf_get(lpfc_acqe_fc_la_trunk_config_port0, acqe_fc)) { + phba->trunk_link.link0.state = + bf_get(lpfc_acqe_fc_la_trunk_link_status_port0, acqe_fc) + ? LPFC_LINK_UP : LPFC_LINK_DOWN; + phba->trunk_link.link0.fault = port_fault & 0x1 ? err : 0; + } + if (bf_get(lpfc_acqe_fc_la_trunk_config_port1, acqe_fc)) { + phba->trunk_link.link1.state = + bf_get(lpfc_acqe_fc_la_trunk_link_status_port1, acqe_fc) + ? LPFC_LINK_UP : LPFC_LINK_DOWN; + phba->trunk_link.link1.fault = port_fault & 0x2 ? err : 0; + } + if (bf_get(lpfc_acqe_fc_la_trunk_config_port2, acqe_fc)) { + phba->trunk_link.link2.state = + bf_get(lpfc_acqe_fc_la_trunk_link_status_port2, acqe_fc) + ? LPFC_LINK_UP : LPFC_LINK_DOWN; + phba->trunk_link.link2.fault = port_fault & 0x4 ? err : 0; + } + if (bf_get(lpfc_acqe_fc_la_trunk_config_port3, acqe_fc)) { + phba->trunk_link.link3.state = + bf_get(lpfc_acqe_fc_la_trunk_link_status_port3, acqe_fc) + ? LPFC_LINK_UP : LPFC_LINK_DOWN; + phba->trunk_link.link3.fault = port_fault & 0x8 ? err : 0; + } + + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "2910 Async FC Trunking Event - Speed:%d\n" + "\tLogical speed:%d " + "port0: %s port1: %s port2: %s port3: %s\n", + phba->sli4_hba.link_state.speed, + phba->sli4_hba.link_state.logical_speed, + trunk_link_status(0), trunk_link_status(1), + trunk_link_status(2), trunk_link_status(3)); + + if (port_fault) + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "3202 trunk error:0x%x (%s) seen on port0:%s " + /* + * SLI-4: We have only 0xA error codes + * defined as of now. print an appropriate + * message in case driver needs to be updated. + */ + "port1:%s port2:%s port3:%s\n", err, err > 0xA ? + "UNDEFINED. update driver." : trunk_errmsg[err], + trunk_port_fault(0), trunk_port_fault(1), + trunk_port_fault(2), trunk_port_fault(3)); +} + + +/** * lpfc_sli4_async_fc_evt - Process the asynchronous FC link event * @phba: pointer to lpfc hba data structure. * @acqe_fc: pointer to the async fc completion queue entry. @@ -4633,6 +4781,13 @@ lpfc_sli4_async_fc_evt(struct lpfc_hba *phba, struct lpfc_acqe_fc_la *acqe_fc) bf_get(lpfc_trailer_type, acqe_fc)); return; } + + if (bf_get(lpfc_acqe_fc_la_att_type, acqe_fc) == + LPFC_FC_LA_TYPE_TRUNKING_EVENT) { + lpfc_update_trunk_link_status(phba, acqe_fc); + return; + } + /* Keep the link status for extra SLI4 state machine reference */ phba->sli4_hba.link_state.speed = lpfc_sli4_port_speed_parse(phba, LPFC_TRAILER_CODE_FC, @@ -4762,6 +4917,8 @@ lpfc_sli4_async_sli_evt(struct lpfc_hba *phba, struct lpfc_acqe_sli *acqe_sli) struct temp_event temp_event_data; struct lpfc_acqe_misconfigured_event *misconfigured; struct Scsi_Host *shost; + struct lpfc_vport **vports; + int rc, i; evt_type = bf_get(lpfc_trailer_type, acqe_sli); @@ -4887,6 +5044,25 @@ lpfc_sli4_async_sli_evt(struct lpfc_hba *phba, struct lpfc_acqe_sli *acqe_sli) sprintf(message, "Unknown event status x%02x", status); break; } + + /* Issue READ_CONFIG mbox command to refresh supported speeds */ + rc = lpfc_sli4_read_config(phba); + if (rc) { + phba->lmt = 0; + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "3194 Unable to retrieve supported " + "speeds, rc = 0x%x\n", rc); + } + vports = lpfc_create_vport_work_array(phba); + if (vports != NULL) { + for (i = 0; i <= phba->max_vports && vports[i] != NULL; + i++) { + shost = lpfc_shost_from_vport(vports[i]); + lpfc_host_supported_speeds_set(shost); + } + } + lpfc_destroy_vport_work_array(phba, vports); + phba->sli4_hba.lnk_info.optic_state = status; lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "3176 Port Name %c %s\n", port_name, message); @@ -5044,7 +5220,7 @@ lpfc_sli4_async_fip_evt(struct lpfc_hba *phba, break; } /* If fast FCF failover rescan event is pending, do nothing */ - if (phba->fcf.fcf_flag & FCF_REDISC_EVT) { + if (phba->fcf.fcf_flag & (FCF_REDISC_EVT | FCF_REDISC_PEND)) { spin_unlock_irq(&phba->hbalock); break; } @@ -7181,26 +7357,19 @@ lpfc_post_init_setup(struct lpfc_hba *phba) static int lpfc_sli_pci_mem_setup(struct lpfc_hba *phba) { - struct pci_dev *pdev; + struct pci_dev *pdev = phba->pcidev; unsigned long bar0map_len, bar2map_len; int i, hbq_count; void *ptr; int error = -ENODEV; - /* Obtain PCI device reference */ - if (!phba->pcidev) + if (!pdev) return error; - else - pdev = phba->pcidev; /* Set the device DMA mask size */ - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) != 0 - || pci_set_consistent_dma_mask(pdev,DMA_BIT_MASK(64)) != 0) { - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0 - || pci_set_consistent_dma_mask(pdev,DMA_BIT_MASK(32)) != 0) { - return error; - } - } + if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) || + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32))) + return error; /* Get the bus address of Bar0 and Bar2 and the number of bytes * required by each mapping. @@ -7779,6 +7948,8 @@ lpfc_sli4_read_config(struct lpfc_hba *phba) phba->sli4_hba.bbscn_params.word0 = rd_config->word8; } + phba->sli4_hba.conf_trunk = + bf_get(lpfc_mbx_rd_conf_trunk, rd_config); phba->sli4_hba.extents_in_use = bf_get(lpfc_mbx_rd_conf_extnts_inuse, rd_config); phba->sli4_hba.max_cfg_param.max_xri = @@ -7787,6 +7958,9 @@ lpfc_sli4_read_config(struct lpfc_hba *phba) bf_get(lpfc_mbx_rd_conf_xri_base, rd_config); phba->sli4_hba.max_cfg_param.max_vpi = bf_get(lpfc_mbx_rd_conf_vpi_count, rd_config); + /* Limit the max we support */ + if (phba->sli4_hba.max_cfg_param.max_vpi > LPFC_MAX_VPORTS) + phba->sli4_hba.max_cfg_param.max_vpi = LPFC_MAX_VPORTS; phba->sli4_hba.max_cfg_param.vpi_base = bf_get(lpfc_mbx_rd_conf_vpi_base, rd_config); phba->sli4_hba.max_cfg_param.max_rpi = @@ -9562,25 +9736,18 @@ out: static int lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba) { - struct pci_dev *pdev; + struct pci_dev *pdev = phba->pcidev; unsigned long bar0map_len, bar1map_len, bar2map_len; int error = -ENODEV; uint32_t if_type; - /* Obtain PCI device reference */ - if (!phba->pcidev) + if (!pdev) return error; - else - pdev = phba->pcidev; /* Set the device DMA mask size */ - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) != 0 - || pci_set_consistent_dma_mask(pdev,DMA_BIT_MASK(64)) != 0) { - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0 - || pci_set_consistent_dma_mask(pdev,DMA_BIT_MASK(32)) != 0) { - return error; - } - } + if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) || + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32))) + return error; /* * The BARs and register set definitions and offset locations are @@ -10523,12 +10690,7 @@ lpfc_sli4_hba_unset(struct lpfc_hba *phba) kthread_stop(phba->worker_thread); /* Disable FW logging to host memory */ - writel(LPFC_CTL_PDEV_CTL_DDL_RAS, - phba->sli4_hba.conf_regs_memmap_p + LPFC_CTL_PDEV_CTL_OFFSET); - - /* Free RAS DMA memory */ - if (phba->ras_fwlog.ras_enabled == true) - lpfc_sli4_ras_dma_free(phba); + lpfc_ras_stop_fwlog(phba); /* Unset the queues shared with the hardware then release all * allocated resources. @@ -10539,6 +10701,10 @@ lpfc_sli4_hba_unset(struct lpfc_hba *phba) /* Reset SLI4 HBA FCoE function */ lpfc_pci_function_reset(phba); + /* Free RAS DMA memory */ + if (phba->ras_fwlog.ras_enabled) + lpfc_sli4_ras_dma_free(phba); + /* Stop the SLI4 device port */ phba->pport->work_port_events = 0; } @@ -12476,7 +12642,8 @@ lpfc_sli4_ras_init(struct lpfc_hba *phba) case PCI_DEVICE_ID_LANCER_G6_FC: case PCI_DEVICE_ID_LANCER_G7_FC: phba->ras_fwlog.ras_hwsupport = true; - if (phba->cfg_ras_fwlog_func == PCI_FUNC(phba->pcidev->devfn)) + if (phba->cfg_ras_fwlog_func == PCI_FUNC(phba->pcidev->devfn) && + phba->cfg_ras_fwlog_buffsize) phba->ras_fwlog.ras_enabled = true; else phba->ras_fwlog.ras_enabled = false; diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c index deb094fdbb79..f6a5083a621e 100644 --- a/drivers/scsi/lpfc/lpfc_mbox.c +++ b/drivers/scsi/lpfc/lpfc_mbox.c @@ -94,7 +94,7 @@ lpfc_dump_static_vport(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb, memset(mp->virt, 0, LPFC_BPL_SIZE); INIT_LIST_HEAD(&mp->list); /* save address for completion */ - pmb->context1 = (uint8_t *)mp; + pmb->ctx_buf = (uint8_t *)mp; mb->un.varWords[3] = putPaddrLow(mp->phys); mb->un.varWords[4] = putPaddrHigh(mp->phys); mb->un.varDmp.sli4_length = sizeof(struct static_vport_info); @@ -139,7 +139,7 @@ lpfc_dump_mem(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb, uint16_t offset, void *ctx; mb = &pmb->u.mb; - ctx = pmb->context2; + ctx = pmb->ctx_buf; /* Setup to dump VPD region */ memset(pmb, 0, sizeof (LPFC_MBOXQ_t)); @@ -151,7 +151,7 @@ lpfc_dump_mem(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb, uint16_t offset, mb->un.varDmp.word_cnt = (DMP_RSP_SIZE / sizeof (uint32_t)); mb->un.varDmp.co = 0; mb->un.varDmp.resp_offset = 0; - pmb->context2 = ctx; + pmb->ctx_buf = ctx; mb->mbxOwner = OWN_HOST; return; } @@ -172,7 +172,7 @@ lpfc_dump_wakeup_param(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) mb = &pmb->u.mb; /* Save context so that we can restore after memset */ - ctx = pmb->context2; + ctx = pmb->ctx_buf; /* Setup to dump VPD region */ memset(pmb, 0, sizeof(LPFC_MBOXQ_t)); @@ -186,7 +186,7 @@ lpfc_dump_wakeup_param(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) mb->un.varDmp.word_cnt = WAKE_UP_PARMS_WORD_SIZE; mb->un.varDmp.co = 0; mb->un.varDmp.resp_offset = 0; - pmb->context2 = ctx; + pmb->ctx_buf = ctx; return; } @@ -304,7 +304,7 @@ lpfc_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb, /* Save address for later completion and set the owner to host so that * the FW knows this mailbox is available for processing. */ - pmb->context1 = (uint8_t *)mp; + pmb->ctx_buf = (uint8_t *)mp; mb->mbxOwner = OWN_HOST; return (0); } @@ -513,9 +513,9 @@ lpfc_init_link(struct lpfc_hba * phba, break; } - if (phba->pcidev->device == PCI_DEVICE_ID_LANCER_G6_FC && - mb->un.varInitLnk.link_flags & FLAGS_TOPOLOGY_MODE_LOOP) { - /* Failover is not tried for Lancer G6 */ + if ((phba->pcidev->device == PCI_DEVICE_ID_LANCER_G6_FC || + phba->pcidev->device == PCI_DEVICE_ID_LANCER_G7_FC) && + mb->un.varInitLnk.link_flags & FLAGS_TOPOLOGY_MODE_LOOP) { mb->un.varInitLnk.link_flags = FLAGS_TOPOLOGY_MODE_PT_PT; phba->cfg_topology = FLAGS_TOPOLOGY_MODE_PT_PT; } @@ -631,7 +631,7 @@ lpfc_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb, int vpi) mb->un.varRdSparm.vpi = phba->vpi_ids[vpi]; /* save address for completion */ - pmb->context1 = mp; + pmb->ctx_buf = mp; return (0); } @@ -783,7 +783,7 @@ lpfc_reg_rpi(struct lpfc_hba *phba, uint16_t vpi, uint32_t did, memcpy(sparam, param, sizeof (struct serv_parm)); /* save address for completion */ - pmb->context1 = (uint8_t *) mp; + pmb->ctx_buf = (uint8_t *)mp; mb->mbxCommand = MBX_REG_LOGIN64; mb->un.varRegLogin.un.sp64.tus.f.bdeSize = sizeof (struct serv_parm); @@ -858,7 +858,7 @@ lpfc_sli4_unreg_all_rpis(struct lpfc_vport *vport) mbox->u.mb.un.varUnregLogin.rsvd1 = 0x4000; mbox->vport = vport; mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; - mbox->context1 = NULL; + mbox->ctx_ndlp = NULL; rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT); if (rc == MBX_NOT_FINISHED) mempool_free(mbox, phba->mbox_mem_pool); @@ -2288,7 +2288,7 @@ lpfc_sli4_dump_cfg_rg23(struct lpfc_hba *phba, struct lpfcMboxq *mbox) INIT_LIST_HEAD(&mp->list); /* save address for completion */ - mbox->context1 = (uint8_t *) mp; + mbox->ctx_buf = (uint8_t *)mp; mb->mbxCommand = MBX_DUMP_MEMORY; mb->un.varDmp.type = DMP_NV_PARAMS; @@ -2305,7 +2305,7 @@ lpfc_mbx_cmpl_rdp_link_stat(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq) MAILBOX_t *mb; int rc = FAILURE; struct lpfc_rdp_context *rdp_context = - (struct lpfc_rdp_context *)(mboxq->context2); + (struct lpfc_rdp_context *)(mboxq->ctx_ndlp); mb = &mboxq->u.mb; if (mb->mbxStatus) @@ -2323,9 +2323,9 @@ mbx_failed: static void lpfc_mbx_cmpl_rdp_page_a2(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox) { - struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) mbox->context1; + struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)mbox->ctx_buf; struct lpfc_rdp_context *rdp_context = - (struct lpfc_rdp_context *)(mbox->context2); + (struct lpfc_rdp_context *)(mbox->ctx_ndlp); if (bf_get(lpfc_mqe_status, &mbox->u.mqe)) goto error_mbuf_free; @@ -2341,7 +2341,7 @@ lpfc_mbx_cmpl_rdp_page_a2(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox) lpfc_read_lnk_stat(phba, mbox); mbox->vport = rdp_context->ndlp->vport; mbox->mbox_cmpl = lpfc_mbx_cmpl_rdp_link_stat; - mbox->context2 = (struct lpfc_rdp_context *) rdp_context; + mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context; if (lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT) == MBX_NOT_FINISHED) goto error_cmd_free; @@ -2359,9 +2359,9 @@ void lpfc_mbx_cmpl_rdp_page_a0(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox) { int rc; - struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (mbox->context1); + struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(mbox->ctx_buf); struct lpfc_rdp_context *rdp_context = - (struct lpfc_rdp_context *)(mbox->context2); + (struct lpfc_rdp_context *)(mbox->ctx_ndlp); if (bf_get(lpfc_mqe_status, &mbox->u.mqe)) goto error; @@ -2375,7 +2375,7 @@ lpfc_mbx_cmpl_rdp_page_a0(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox) INIT_LIST_HEAD(&mp->list); /* save address for completion */ - mbox->context1 = mp; + mbox->ctx_buf = mp; mbox->vport = rdp_context->ndlp->vport; bf_set(lpfc_mqe_command, &mbox->u.mqe, MBX_DUMP_MEMORY); @@ -2391,7 +2391,7 @@ lpfc_mbx_cmpl_rdp_page_a0(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox) mbox->u.mqe.un.mem_dump_type3.addr_hi = putPaddrHigh(mp->phys); mbox->mbox_cmpl = lpfc_mbx_cmpl_rdp_page_a2; - mbox->context2 = (struct lpfc_rdp_context *) rdp_context; + mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context; rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT); if (rc == MBX_NOT_FINISHED) goto error; @@ -2436,7 +2436,7 @@ lpfc_sli4_dump_page_a0(struct lpfc_hba *phba, struct lpfcMboxq *mbox) bf_set(lpfc_mqe_command, &mbox->u.mqe, MBX_DUMP_MEMORY); /* save address for completion */ - mbox->context1 = mp; + mbox->ctx_buf = mp; bf_set(lpfc_mbx_memory_dump_type3_type, &mbox->u.mqe.un.mem_dump_type3, DMP_LMSD); diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c index 9c22a2c93462..66191fa35f63 100644 --- a/drivers/scsi/lpfc/lpfc_mem.c +++ b/drivers/scsi/lpfc/lpfc_mem.c @@ -330,7 +330,7 @@ lpfc_mem_free_all(struct lpfc_hba *phba) /* Free memory used in mailbox queue back to mailbox memory pool */ list_for_each_entry_safe(mbox, next_mbox, &psli->mboxq, list) { - mp = (struct lpfc_dmabuf *) (mbox->context1); + mp = (struct lpfc_dmabuf *)(mbox->ctx_buf); if (mp) { lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); @@ -340,7 +340,7 @@ lpfc_mem_free_all(struct lpfc_hba *phba) } /* Free memory used in mailbox cmpl list back to mailbox memory pool */ list_for_each_entry_safe(mbox, next_mbox, &psli->mboxq_cmpl, list) { - mp = (struct lpfc_dmabuf *) (mbox->context1); + mp = (struct lpfc_dmabuf *)(mbox->ctx_buf); if (mp) { lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); @@ -354,7 +354,7 @@ lpfc_mem_free_all(struct lpfc_hba *phba) spin_unlock_irq(&phba->hbalock); if (psli->mbox_active) { mbox = psli->mbox_active; - mp = (struct lpfc_dmabuf *) (mbox->context1); + mp = (struct lpfc_dmabuf *)(mbox->ctx_buf); if (mp) { lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index 269808e8480f..96bc3789a166 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -467,7 +467,7 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, */ mbox->mbox_cmpl = lpfc_mbx_cmpl_reg_login; /* - * mbox->context2 = lpfc_nlp_get(ndlp) deferred until mailbox + * mbox->ctx_ndlp = lpfc_nlp_get(ndlp) deferred until mailbox * command issued in lpfc_cmpl_els_acc(). */ mbox->vport = vport; @@ -535,8 +535,8 @@ lpfc_mbx_cmpl_resume_rpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq) struct lpfc_nodelist *ndlp; uint32_t cmd; - elsiocb = (struct lpfc_iocbq *)mboxq->context1; - ndlp = (struct lpfc_nodelist *) mboxq->context2; + elsiocb = (struct lpfc_iocbq *)mboxq->ctx_buf; + ndlp = (struct lpfc_nodelist *)mboxq->ctx_ndlp; vport = mboxq->vport; cmd = elsiocb->drvrTimeout; @@ -836,7 +836,9 @@ lpfc_disc_set_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) struct Scsi_Host *shost = lpfc_shost_from_vport(vport); if (!(ndlp->nlp_flag & NLP_RPI_REGISTERED)) { + spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~NLP_NPR_ADISC; + spin_unlock_irq(shost->host_lock); return 0; } @@ -851,7 +853,10 @@ lpfc_disc_set_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) return 1; } } + + spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~NLP_NPR_ADISC; + spin_unlock_irq(shost->host_lock); lpfc_unreg_rpi(vport, ndlp); return 0; } @@ -866,13 +871,26 @@ lpfc_disc_set_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) * to release a rpi. **/ void -lpfc_release_rpi(struct lpfc_hba *phba, - struct lpfc_vport *vport, - uint16_t rpi) +lpfc_release_rpi(struct lpfc_hba *phba, struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp, uint16_t rpi) { LPFC_MBOXQ_t *pmb; int rc; + /* If there is already an UNREG in progress for this ndlp, + * no need to queue up another one. + */ + if (ndlp->nlp_flag & NLP_UNREG_INP) { + lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, + "1435 release_rpi SKIP UNREG x%x on " + "NPort x%x deferred x%x flg x%x " + "Data: %p\n", + ndlp->nlp_rpi, ndlp->nlp_DID, + ndlp->nlp_defer_did, + ndlp->nlp_flag, ndlp); + return; + } + pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); if (!pmb) @@ -881,6 +899,18 @@ lpfc_release_rpi(struct lpfc_hba *phba, else { lpfc_unreg_login(phba, vport->vpi, rpi, pmb); pmb->mbox_cmpl = lpfc_sli_def_mbox_cmpl; + pmb->vport = vport; + pmb->ctx_ndlp = ndlp; + + if (((ndlp->nlp_DID & Fabric_DID_MASK) != Fabric_DID_MASK) && + (!(vport->fc_flag & FC_OFFLINE_MODE))) + ndlp->nlp_flag |= NLP_UNREG_INP; + + lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, + "1437 release_rpi UNREG x%x " + "on NPort x%x flg x%x\n", + ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag); + rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT); if (rc == MBX_NOT_FINISHED) mempool_free(pmb, phba->mbox_mem_pool); @@ -901,7 +931,7 @@ lpfc_disc_illegal(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, (evt == NLP_EVT_CMPL_REG_LOGIN) && (!pmb->u.mb.mbxStatus)) { rpi = pmb->u.mb.un.varWords[0]; - lpfc_release_rpi(phba, vport, rpi); + lpfc_release_rpi(phba, vport, ndlp, rpi); } lpfc_printf_vlog(vport, KERN_ERR, LOG_DISCOVERY, "0271 Illegal State Transition: node x%x " @@ -1253,7 +1283,7 @@ lpfc_cmpl_plogi_plogi_issue(struct lpfc_vport *vport, ndlp->nlp_flag |= NLP_REG_LOGIN_SEND; mbox->mbox_cmpl = lpfc_mbx_cmpl_reg_login; } - mbox->context2 = lpfc_nlp_get(ndlp); + mbox->ctx_ndlp = lpfc_nlp_get(ndlp); mbox->vport = vport; if (lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT) != MBX_NOT_FINISHED) { @@ -1267,7 +1297,7 @@ lpfc_cmpl_plogi_plogi_issue(struct lpfc_vport *vport, * command */ lpfc_nlp_put(ndlp); - mp = (struct lpfc_dmabuf *) mbox->context1; + mp = (struct lpfc_dmabuf *)mbox->ctx_buf; lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); mempool_free(mbox, phba->mbox_mem_pool); @@ -1329,7 +1359,7 @@ lpfc_cmpl_reglogin_plogi_issue(struct lpfc_vport *vport, if (!(phba->pport->load_flag & FC_UNLOADING) && !mb->mbxStatus) { rpi = pmb->u.mb.un.varWords[0]; - lpfc_release_rpi(phba, vport, rpi); + lpfc_release_rpi(phba, vport, ndlp, rpi); } return ndlp->nlp_state; } @@ -1636,10 +1666,10 @@ lpfc_rcv_logo_reglogin_issue(struct lpfc_vport *vport, /* cleanup any ndlp on mbox q waiting for reglogin cmpl */ if ((mb = phba->sli.mbox_active)) { if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) && - (ndlp == (struct lpfc_nodelist *) mb->context2)) { + (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) { ndlp->nlp_flag &= ~NLP_REG_LOGIN_SEND; lpfc_nlp_put(ndlp); - mb->context2 = NULL; + mb->ctx_ndlp = NULL; mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl; } } @@ -1647,8 +1677,8 @@ lpfc_rcv_logo_reglogin_issue(struct lpfc_vport *vport, spin_lock_irq(&phba->hbalock); list_for_each_entry_safe(mb, nextmb, &phba->sli.mboxq, list) { if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) && - (ndlp == (struct lpfc_nodelist *) mb->context2)) { - mp = (struct lpfc_dmabuf *) (mb->context1); + (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) { + mp = (struct lpfc_dmabuf *)(mb->ctx_buf); if (mp) { __lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); @@ -1770,9 +1800,16 @@ lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport, ndlp->nlp_fc4_type |= NLP_FC4_FCP; } else if (ndlp->nlp_fc4_type == 0) { - rc = lpfc_ns_cmd(vport, SLI_CTNS_GFT_ID, - 0, ndlp->nlp_DID); - return ndlp->nlp_state; + /* If we are only configured for FCP, the driver + * should just issue PRLI for FCP. Otherwise issue + * GFT_ID to determine if remote port supports NVME. + */ + if (phba->cfg_enable_fc4_type != LPFC_ENABLE_FCP) { + rc = lpfc_ns_cmd(vport, SLI_CTNS_GFT_ID, + 0, ndlp->nlp_DID); + return ndlp->nlp_state; + } + ndlp->nlp_fc4_type = NLP_FC4_FCP; } ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE; @@ -2863,8 +2900,9 @@ lpfc_disc_state_machine(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, /* DSM in event <evt> on NPort <nlp_DID> in state <cur_state> */ lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, "0211 DSM in event x%x on NPort x%x in " - "state %d Data: x%x\n", - evt, ndlp->nlp_DID, cur_state, ndlp->nlp_flag); + "state %d rpi x%x Data: x%x x%x\n", + evt, ndlp->nlp_DID, cur_state, ndlp->nlp_rpi, + ndlp->nlp_flag, ndlp->nlp_fc4_type); lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_DSM, "DSM in: evt:%d ste:%d did:x%x", @@ -2876,8 +2914,9 @@ lpfc_disc_state_machine(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, /* DSM out state <rc> on NPort <nlp_DID> */ if (got_ndlp) { lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, - "0212 DSM out state %d on NPort x%x Data: x%x\n", - rc, ndlp->nlp_DID, ndlp->nlp_flag); + "0212 DSM out state %d on NPort x%x " + "rpi x%x Data: x%x\n", + rc, ndlp->nlp_DID, ndlp->nlp_rpi, ndlp->nlp_flag); lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_DSM, "DSM out: ste:%d did:x%x flg:x%x", diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index ba831def9301..4c66b19e6199 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -1855,7 +1855,6 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, bf_set(abort_cmd_criteria, &abts_wqe->abort_cmd, T_XRI_TAG); /* word 7 */ - bf_set(wqe_ct, &abts_wqe->abort_cmd.wqe_com, 0); bf_set(wqe_cmnd, &abts_wqe->abort_cmd.wqe_com, CMD_ABORT_XRI_CX); bf_set(wqe_class, &abts_wqe->abort_cmd.wqe_com, nvmereq_wqe->iocb.ulpClass); @@ -1870,7 +1869,6 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, abts_buf->iotag); /* word 10 */ - bf_set(wqe_wqid, &abts_wqe->abort_cmd.wqe_com, nvmereq_wqe->hba_wqidx); bf_set(wqe_qosd, &abts_wqe->abort_cmd.wqe_com, 1); bf_set(wqe_lenloc, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_LENLOC_NONE); diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 4fa6703a9ec9..b4f1a840b3b4 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -2734,6 +2734,7 @@ lpfc_bg_scsi_prep_dma_buf_s3(struct lpfc_hba *phba, int datasegcnt, protsegcnt, datadir = scsi_cmnd->sc_data_direction; int prot_group_type = 0; int fcpdl; + struct lpfc_vport *vport = phba->pport; /* * Start the lpfc command prep by bumping the bpl beyond fcp_cmnd @@ -2839,6 +2840,14 @@ lpfc_bg_scsi_prep_dma_buf_s3(struct lpfc_hba *phba, */ iocb_cmd->un.fcpi.fcpi_parm = fcpdl; + /* + * For First burst, we may need to adjust the initial transfer + * length for DIF + */ + if (iocb_cmd->un.fcpi.fcpi_XRdy && + (fcpdl < vport->cfg_first_burst_size)) + iocb_cmd->un.fcpi.fcpi_XRdy = fcpdl; + return 0; err: if (lpfc_cmd->seg_cnt) @@ -3403,6 +3412,7 @@ lpfc_bg_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, int datasegcnt, protsegcnt, datadir = scsi_cmnd->sc_data_direction; int prot_group_type = 0; int fcpdl; + struct lpfc_vport *vport = phba->pport; /* * Start the lpfc command prep by bumping the sgl beyond fcp_cmnd @@ -3519,6 +3529,14 @@ lpfc_bg_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, iocb_cmd->un.fcpi.fcpi_parm = fcpdl; /* + * For First burst, we may need to adjust the initial transfer + * length for DIF + */ + if (iocb_cmd->un.fcpi.fcpi_XRdy && + (fcpdl < vport->cfg_first_burst_size)) + iocb_cmd->un.fcpi.fcpi_XRdy = fcpdl; + + /* * If the OAS driver feature is enabled and the lun is enabled for * OAS, set the oas iocb related flags. */ @@ -3914,7 +3932,7 @@ int lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba, uint32_t tag; uint16_t hwq; - if (cmnd && shost_use_blk_mq(cmnd->device->host)) { + if (cmnd) { tag = blk_mq_unique_tag(cmnd->request); hwq = blk_mq_unique_tag_to_hwq(tag); @@ -4163,7 +4181,7 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn, /* If pCmd was set to NULL from abort path, do not call scsi_done */ if (xchg(&lpfc_cmd->pCmd, NULL) == NULL) { lpfc_printf_vlog(vport, KERN_INFO, LOG_FCP, - "0711 FCP cmd already NULL, sid: 0x%06x, " + "5688 FCP cmd already NULL, sid: 0x%06x, " "did: 0x%06x, oxid: 0x%04x\n", vport->fc_myDID, (pnode) ? pnode->nlp_DID : 0, @@ -4442,6 +4460,66 @@ lpfc_tskmgmt_def_cmpl(struct lpfc_hba *phba, } /** + * lpfc_check_pci_resettable - Walks list of devices on pci_dev's bus to check + * if issuing a pci_bus_reset is possibly unsafe + * @phba: lpfc_hba pointer. + * + * Description: + * Walks the bus_list to ensure only PCI devices with Emulex + * vendor id, device ids that support hot reset, and only one occurrence + * of function 0. + * + * Returns: + * -EBADSLT, detected invalid device + * 0, successful + */ +int +lpfc_check_pci_resettable(const struct lpfc_hba *phba) +{ + const struct pci_dev *pdev = phba->pcidev; + struct pci_dev *ptr = NULL; + u8 counter = 0; + + /* Walk the list of devices on the pci_dev's bus */ + list_for_each_entry(ptr, &pdev->bus->devices, bus_list) { + /* Check for Emulex Vendor ID */ + if (ptr->vendor != PCI_VENDOR_ID_EMULEX) { + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "8346 Non-Emulex vendor found: " + "0x%04x\n", ptr->vendor); + return -EBADSLT; + } + + /* Check for valid Emulex Device ID */ + switch (ptr->device) { + case PCI_DEVICE_ID_LANCER_FC: + case PCI_DEVICE_ID_LANCER_G6_FC: + case PCI_DEVICE_ID_LANCER_G7_FC: + break; + default: + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "8347 Invalid device found: " + "0x%04x\n", ptr->device); + return -EBADSLT; + } + + /* Check for only one function 0 ID to ensure only one HBA on + * secondary bus + */ + if (ptr->devfn == 0) { + if (++counter > 1) { + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "8348 More than one device on " + "secondary bus found\n"); + return -EBADSLT; + } + } + } + + return 0; +} + +/** * lpfc_info - Info entry point of scsi_host_template data structure * @host: The scsi host for which this call is being executed. * @@ -4455,32 +4533,53 @@ lpfc_info(struct Scsi_Host *host) { struct lpfc_vport *vport = (struct lpfc_vport *) host->hostdata; struct lpfc_hba *phba = vport->phba; - int len, link_speed = 0; - static char lpfcinfobuf[384]; + int link_speed = 0; + static char lpfcinfobuf[384]; + char tmp[384] = {0}; - memset(lpfcinfobuf,0,384); + memset(lpfcinfobuf, 0, sizeof(lpfcinfobuf)); if (phba && phba->pcidev){ - strncpy(lpfcinfobuf, phba->ModelDesc, 256); - len = strlen(lpfcinfobuf); - snprintf(lpfcinfobuf + len, - 384-len, - " on PCI bus %02x device %02x irq %d", - phba->pcidev->bus->number, - phba->pcidev->devfn, - phba->pcidev->irq); - len = strlen(lpfcinfobuf); + /* Model Description */ + scnprintf(tmp, sizeof(tmp), phba->ModelDesc); + if (strlcat(lpfcinfobuf, tmp, sizeof(lpfcinfobuf)) >= + sizeof(lpfcinfobuf)) + goto buffer_done; + + /* PCI Info */ + scnprintf(tmp, sizeof(tmp), + " on PCI bus %02x device %02x irq %d", + phba->pcidev->bus->number, phba->pcidev->devfn, + phba->pcidev->irq); + if (strlcat(lpfcinfobuf, tmp, sizeof(lpfcinfobuf)) >= + sizeof(lpfcinfobuf)) + goto buffer_done; + + /* Port Number */ if (phba->Port[0]) { - snprintf(lpfcinfobuf + len, - 384-len, - " port %s", - phba->Port); + scnprintf(tmp, sizeof(tmp), " port %s", phba->Port); + if (strlcat(lpfcinfobuf, tmp, sizeof(lpfcinfobuf)) >= + sizeof(lpfcinfobuf)) + goto buffer_done; } - len = strlen(lpfcinfobuf); + + /* Link Speed */ link_speed = lpfc_sli_port_speed_get(phba); - if (link_speed != 0) - snprintf(lpfcinfobuf + len, 384-len, - " Logical Link Speed: %d Mbps", link_speed); + if (link_speed != 0) { + scnprintf(tmp, sizeof(tmp), + " Logical Link Speed: %d Mbps", link_speed); + if (strlcat(lpfcinfobuf, tmp, sizeof(lpfcinfobuf)) >= + sizeof(lpfcinfobuf)) + goto buffer_done; + } + + /* PCI resettable */ + if (!lpfc_check_pci_resettable(phba)) { + scnprintf(tmp, sizeof(tmp), " PCI resettable"); + strlcat(lpfcinfobuf, tmp, sizeof(lpfcinfobuf)); + } } + +buffer_done: return lpfcinfobuf; } @@ -6036,7 +6135,6 @@ struct scsi_host_template lpfc_template_nvme = { .this_id = -1, .sg_tablesize = 1, .cmd_per_lun = 1, - .use_clustering = ENABLE_CLUSTERING, .shost_attrs = lpfc_hba_attrs, .max_sectors = 0xFFFF, .vendor_id = LPFC_NL_VENDOR_ID, @@ -6061,7 +6159,6 @@ struct scsi_host_template lpfc_template_no_hr = { .this_id = -1, .sg_tablesize = LPFC_DEFAULT_SG_SEG_CNT, .cmd_per_lun = LPFC_CMD_PER_LUN, - .use_clustering = ENABLE_CLUSTERING, .shost_attrs = lpfc_hba_attrs, .max_sectors = 0xFFFF, .vendor_id = LPFC_NL_VENDOR_ID, @@ -6088,7 +6185,6 @@ struct scsi_host_template lpfc_template = { .this_id = -1, .sg_tablesize = LPFC_DEFAULT_SG_SEG_CNT, .cmd_per_lun = LPFC_CMD_PER_LUN, - .use_clustering = ENABLE_CLUSTERING, .shost_attrs = lpfc_hba_attrs, .max_sectors = 0xFFFF, .vendor_id = LPFC_NL_VENDOR_ID, @@ -6113,7 +6209,6 @@ struct scsi_host_template lpfc_vport_template = { .this_id = -1, .sg_tablesize = LPFC_DEFAULT_SG_SEG_CNT, .cmd_per_lun = LPFC_CMD_PER_LUN, - .use_clustering = ENABLE_CLUSTERING, .shost_attrs = lpfc_vport_attrs, .max_sectors = 0xFFFF, .change_queue_depth = scsi_change_queue_depth, diff --git a/drivers/scsi/lpfc/lpfc_scsi.h b/drivers/scsi/lpfc/lpfc_scsi.h index cc99859774ff..b759b089432c 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.h +++ b/drivers/scsi/lpfc/lpfc_scsi.h @@ -194,6 +194,10 @@ struct lpfc_scsi_buf { #define NO_MORE_OAS_LUN -1 #define NOT_OAS_ENABLED_LUN NO_MORE_OAS_LUN +#ifndef FC_PORTSPEED_128GBIT +#define FC_PORTSPEED_128GBIT 0x2000 +#endif + #define TXRDY_PAYLOAD_LEN 12 int lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba, diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index b9e5cd79931a..30734caf77e1 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -2456,7 +2456,7 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) uint16_t rpi, vpi; int rc; - mp = (struct lpfc_dmabuf *) (pmb->context1); + mp = (struct lpfc_dmabuf *)(pmb->ctx_buf); if (mp) { lpfc_mbuf_free(phba, mp->virt, mp->phys); @@ -2491,9 +2491,35 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) } if (pmb->u.mb.mbxCommand == MBX_REG_LOGIN64) { - ndlp = (struct lpfc_nodelist *)pmb->context2; + ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; lpfc_nlp_put(ndlp); - pmb->context2 = NULL; + pmb->ctx_buf = NULL; + pmb->ctx_ndlp = NULL; + } + + if (pmb->u.mb.mbxCommand == MBX_UNREG_LOGIN) { + ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; + + /* Check to see if there are any deferred events to process */ + if (ndlp) { + lpfc_printf_vlog( + vport, + KERN_INFO, LOG_MBOX | LOG_DISCOVERY, + "1438 UNREG cmpl deferred mbox x%x " + "on NPort x%x Data: x%x x%x %p\n", + ndlp->nlp_rpi, ndlp->nlp_DID, + ndlp->nlp_flag, ndlp->nlp_defer_did, ndlp); + + if ((ndlp->nlp_flag & NLP_UNREG_INP) && + (ndlp->nlp_defer_did != NLP_EVT_NOTHING_PENDING)) { + ndlp->nlp_flag &= ~NLP_UNREG_INP; + ndlp->nlp_defer_did = NLP_EVT_NOTHING_PENDING; + lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0); + } else { + ndlp->nlp_flag &= ~NLP_UNREG_INP; + } + } + pmb->ctx_ndlp = NULL; } /* Check security permission status on INIT_LINK mailbox command */ @@ -2527,21 +2553,46 @@ lpfc_sli4_unreg_rpi_cmpl_clr(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) struct lpfc_vport *vport = pmb->vport; struct lpfc_nodelist *ndlp; - ndlp = pmb->context1; + ndlp = pmb->ctx_ndlp; if (pmb->u.mb.mbxCommand == MBX_UNREG_LOGIN) { if (phba->sli_rev == LPFC_SLI_REV4 && (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) >= LPFC_SLI_INTF_IF_TYPE_2)) { if (ndlp) { - lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI, - "0010 UNREG_LOGIN vpi:%x " - "rpi:%x DID:%x map:%x %p\n", - vport->vpi, ndlp->nlp_rpi, - ndlp->nlp_DID, - ndlp->nlp_usg_map, ndlp); + lpfc_printf_vlog( + vport, KERN_INFO, LOG_MBOX | LOG_SLI, + "0010 UNREG_LOGIN vpi:%x " + "rpi:%x DID:%x defer x%x flg x%x " + "map:%x %p\n", + vport->vpi, ndlp->nlp_rpi, + ndlp->nlp_DID, ndlp->nlp_defer_did, + ndlp->nlp_flag, + ndlp->nlp_usg_map, ndlp); ndlp->nlp_flag &= ~NLP_LOGO_ACC; lpfc_nlp_put(ndlp); + + /* Check to see if there are any deferred + * events to process + */ + if ((ndlp->nlp_flag & NLP_UNREG_INP) && + (ndlp->nlp_defer_did != + NLP_EVT_NOTHING_PENDING)) { + lpfc_printf_vlog( + vport, KERN_INFO, LOG_DISCOVERY, + "4111 UNREG cmpl deferred " + "clr x%x on " + "NPort x%x Data: x%x %p\n", + ndlp->nlp_rpi, ndlp->nlp_DID, + ndlp->nlp_defer_did, ndlp); + ndlp->nlp_flag &= ~NLP_UNREG_INP; + ndlp->nlp_defer_did = + NLP_EVT_NOTHING_PENDING; + lpfc_issue_els_plogi( + vport, ndlp->nlp_DID, 0); + } else { + ndlp->nlp_flag &= ~NLP_UNREG_INP; + } } } } @@ -4640,6 +4691,8 @@ lpfc_sli_brdrestart_s4(struct lpfc_hba *phba) hba_aer_enabled = phba->hba_flag & HBA_AER_ENABLED; rc = lpfc_sli4_brdreset(phba); + if (rc) + return rc; spin_lock_irq(&phba->hbalock); phba->pport->stopped = 0; @@ -5228,7 +5281,7 @@ lpfc_sli4_read_fcoe_params(struct lpfc_hba *phba) goto out_free_mboxq; } - mp = (struct lpfc_dmabuf *) mboxq->context1; + mp = (struct lpfc_dmabuf *)mboxq->ctx_buf; rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL); lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI, @@ -6148,6 +6201,25 @@ lpfc_set_features(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox, } /** + * lpfc_ras_stop_fwlog: Disable FW logging by the adapter + * @phba: Pointer to HBA context object. + * + * Disable FW logging into host memory on the adapter. To + * be done before reading logs from the host memory. + **/ +void +lpfc_ras_stop_fwlog(struct lpfc_hba *phba) +{ + struct lpfc_ras_fwlog *ras_fwlog = &phba->ras_fwlog; + + ras_fwlog->ras_active = false; + + /* Disable FW logging to host memory */ + writel(LPFC_CTL_PDEV_CTL_DDL_RAS, + phba->sli4_hba.conf_regs_memmap_p + LPFC_CTL_PDEV_CTL_OFFSET); +} + +/** * lpfc_sli4_ras_dma_free - Free memory allocated for FW logging. * @phba: Pointer to HBA context object. * @@ -6211,7 +6283,7 @@ lpfc_sli4_ras_dma_alloc(struct lpfc_hba *phba, &ras_fwlog->lwpd.phys, GFP_KERNEL); if (!ras_fwlog->lwpd.virt) { - lpfc_printf_log(phba, KERN_WARNING, LOG_INIT, + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "6185 LWPD Memory Alloc Failed\n"); return -ENOMEM; @@ -6228,7 +6300,7 @@ lpfc_sli4_ras_dma_alloc(struct lpfc_hba *phba, goto free_mem; } - dmabuf->virt = dma_alloc_coherent(&phba->pcidev->dev, + dmabuf->virt = dma_zalloc_coherent(&phba->pcidev->dev, LPFC_RAS_MAX_ENTRY_SIZE, &dmabuf->phys, GFP_KERNEL); @@ -6239,7 +6311,6 @@ lpfc_sli4_ras_dma_alloc(struct lpfc_hba *phba, "6187 DMA Alloc Failed FW logging"); goto free_mem; } - memset(dmabuf->virt, 0, LPFC_RAS_MAX_ENTRY_SIZE); dmabuf->buffer_tag = i; list_add_tail(&dmabuf->list, &ras_fwlog->fwlog_buff_list); } @@ -6274,11 +6345,13 @@ lpfc_sli4_ras_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response); if (mb->mbxStatus != MBX_SUCCESS || shdr_status) { - lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX, + lpfc_printf_log(phba, KERN_ERR, LOG_MBOX, "6188 FW LOG mailbox " "completed with status x%x add_status x%x," " mbx status x%x\n", shdr_status, shdr_add_status, mb->mbxStatus); + + ras_fwlog->ras_hwsupport = false; goto disable_ras; } @@ -6326,7 +6399,7 @@ lpfc_sli4_ras_fwlog_init(struct lpfc_hba *phba, rc = lpfc_sli4_ras_dma_alloc(phba, fwlog_entry_count); if (rc) { lpfc_printf_log(phba, KERN_WARNING, LOG_INIT, - "6189 RAS FW Log Support Not Enabled"); + "6189 FW Log Memory Allocation Failed"); return rc; } } @@ -6334,7 +6407,7 @@ lpfc_sli4_ras_fwlog_init(struct lpfc_hba *phba, /* Setup Mailbox command */ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); if (!mbox) { - lpfc_printf_log(phba, KERN_WARNING, LOG_INIT, + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "6190 RAS MBX Alloc Failed"); rc = -ENOMEM; goto mem_free; @@ -6379,8 +6452,8 @@ lpfc_sli4_ras_fwlog_init(struct lpfc_hba *phba, rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT); if (rc == MBX_NOT_FINISHED) { - lpfc_printf_log(phba, KERN_WARNING, LOG_INIT, - "6191 RAS Mailbox failed. " + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "6191 FW-Log Mailbox failed. " "status %d mbxStatus : x%x", rc, bf_get(lpfc_mqe_status, &mbox->u.mqe)); mempool_free(mbox, phba->mbox_mem_pool); @@ -7348,7 +7421,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba) mboxq->vport = vport; rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL); - mp = (struct lpfc_dmabuf *) mboxq->context1; + mp = (struct lpfc_dmabuf *)mboxq->ctx_buf; if (rc == MBX_SUCCESS) { memcpy(&vport->fc_sparam, mp->virt, sizeof(struct serv_parm)); rc = 0; @@ -7360,7 +7433,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba) */ lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); - mboxq->context1 = NULL; + mboxq->ctx_buf = NULL; if (unlikely(rc)) { lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI, "0382 READ_SPARAM command failed " @@ -7635,7 +7708,18 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba) */ spin_lock_irq(&phba->hbalock); phba->link_state = LPFC_LINK_DOWN; + + /* Check if physical ports are trunked */ + if (bf_get(lpfc_conf_trunk_port0, &phba->sli4_hba)) + phba->trunk_link.link0.state = LPFC_LINK_DOWN; + if (bf_get(lpfc_conf_trunk_port1, &phba->sli4_hba)) + phba->trunk_link.link1.state = LPFC_LINK_DOWN; + if (bf_get(lpfc_conf_trunk_port2, &phba->sli4_hba)) + phba->trunk_link.link2.state = LPFC_LINK_DOWN; + if (bf_get(lpfc_conf_trunk_port3, &phba->sli4_hba)) + phba->trunk_link.link3.state = LPFC_LINK_DOWN; spin_unlock_irq(&phba->hbalock); + if (!(phba->hba_flag & HBA_FCOE_MODE) && (phba->hba_flag & LINK_DISABLED)) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_SLI, @@ -8119,10 +8203,10 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, } /* Copy the mailbox extension data */ - if (pmbox->in_ext_byte_len && pmbox->context2) { - lpfc_sli_pcimem_bcopy(pmbox->context2, - (uint8_t *)phba->mbox_ext, - pmbox->in_ext_byte_len); + if (pmbox->in_ext_byte_len && pmbox->ctx_buf) { + lpfc_sli_pcimem_bcopy(pmbox->ctx_buf, + (uint8_t *)phba->mbox_ext, + pmbox->in_ext_byte_len); } /* Copy command data to host SLIM area */ lpfc_sli_pcimem_bcopy(mbx, phba->mbox, MAILBOX_CMD_SIZE); @@ -8133,10 +8217,10 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, = MAILBOX_HBA_EXT_OFFSET; /* Copy the mailbox extension data */ - if (pmbox->in_ext_byte_len && pmbox->context2) + if (pmbox->in_ext_byte_len && pmbox->ctx_buf) lpfc_memcpy_to_slim(phba->MBslimaddr + MAILBOX_HBA_EXT_OFFSET, - pmbox->context2, pmbox->in_ext_byte_len); + pmbox->ctx_buf, pmbox->in_ext_byte_len); if (mbx->mbxCommand == MBX_CONFIG_PORT) /* copy command data into host mbox for cmpl */ @@ -8259,9 +8343,9 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, lpfc_sli_pcimem_bcopy(phba->mbox, mbx, MAILBOX_CMD_SIZE); /* Copy the mailbox extension data */ - if (pmbox->out_ext_byte_len && pmbox->context2) { + if (pmbox->out_ext_byte_len && pmbox->ctx_buf) { lpfc_sli_pcimem_bcopy(phba->mbox_ext, - pmbox->context2, + pmbox->ctx_buf, pmbox->out_ext_byte_len); } } else { @@ -8269,8 +8353,9 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, lpfc_memcpy_from_slim(mbx, phba->MBslimaddr, MAILBOX_CMD_SIZE); /* Copy the mailbox extension data */ - if (pmbox->out_ext_byte_len && pmbox->context2) { - lpfc_memcpy_from_slim(pmbox->context2, + if (pmbox->out_ext_byte_len && pmbox->ctx_buf) { + lpfc_memcpy_from_slim( + pmbox->ctx_buf, phba->MBslimaddr + MAILBOX_HBA_EXT_OFFSET, pmbox->out_ext_byte_len); @@ -11265,19 +11350,12 @@ lpfc_sli4_abort_nvme_io(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, /* Complete prepping the abort wqe and issue to the FW. */ abts_wqe = &abtsiocbp->wqe; - bf_set(abort_cmd_ia, &abts_wqe->abort_cmd, 0); - bf_set(abort_cmd_criteria, &abts_wqe->abort_cmd, T_XRI_TAG); - - /* Explicitly set reserved fields to zero.*/ - abts_wqe->abort_cmd.rsrvd4 = 0; - abts_wqe->abort_cmd.rsrvd5 = 0; - /* WQE Common - word 6. Context is XRI tag. Set 0. */ - bf_set(wqe_xri_tag, &abts_wqe->abort_cmd.wqe_com, 0); - bf_set(wqe_ctxt_tag, &abts_wqe->abort_cmd.wqe_com, 0); + /* Clear any stale WQE contents */ + memset(abts_wqe, 0, sizeof(union lpfc_wqe)); + bf_set(abort_cmd_criteria, &abts_wqe->abort_cmd, T_XRI_TAG); /* word 7 */ - bf_set(wqe_ct, &abts_wqe->abort_cmd.wqe_com, 0); bf_set(wqe_cmnd, &abts_wqe->abort_cmd.wqe_com, CMD_ABORT_XRI_CX); bf_set(wqe_class, &abts_wqe->abort_cmd.wqe_com, cmdiocb->iocb.ulpClass); @@ -11292,7 +11370,6 @@ lpfc_sli4_abort_nvme_io(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, abtsiocbp->iotag); /* word 10 */ - bf_set(wqe_wqid, &abts_wqe->abort_cmd.wqe_com, cmdiocb->hba_wqidx); bf_set(wqe_qosd, &abts_wqe->abort_cmd.wqe_com, 1); bf_set(wqe_lenloc, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_LENLOC_NONE); @@ -12545,10 +12622,10 @@ lpfc_sli_sp_intr_handler(int irq, void *dev_id) lpfc_sli_pcimem_bcopy(mbox, pmbox, MAILBOX_CMD_SIZE); if (pmb->out_ext_byte_len && - pmb->context2) + pmb->ctx_buf) lpfc_sli_pcimem_bcopy( phba->mbox_ext, - pmb->context2, + pmb->ctx_buf, pmb->out_ext_byte_len); } if (pmb->mbox_flag & LPFC_MBX_IMED_UNREG) { @@ -12563,9 +12640,9 @@ lpfc_sli_sp_intr_handler(int irq, void *dev_id) if (!pmbox->mbxStatus) { mp = (struct lpfc_dmabuf *) - (pmb->context1); + (pmb->ctx_buf); ndlp = (struct lpfc_nodelist *) - pmb->context2; + pmb->ctx_ndlp; /* Reg_LOGIN of dflt RPI was * successful. new lets get @@ -12578,8 +12655,8 @@ lpfc_sli_sp_intr_handler(int irq, void *dev_id) pmb); pmb->mbox_cmpl = lpfc_mbx_cmpl_dflt_rpi; - pmb->context1 = mp; - pmb->context2 = ndlp; + pmb->ctx_buf = mp; + pmb->ctx_ndlp = ndlp; pmb->vport = vport; rc = lpfc_sli_issue_mbox(phba, pmb, @@ -13185,16 +13262,16 @@ lpfc_sli4_sp_handle_mbox_event(struct lpfc_hba *phba, struct lpfc_mcqe *mcqe) mcqe_status, pmbox->un.varWords[0], 0); if (mcqe_status == MB_CQE_STATUS_SUCCESS) { - mp = (struct lpfc_dmabuf *)(pmb->context1); - ndlp = (struct lpfc_nodelist *)pmb->context2; + mp = (struct lpfc_dmabuf *)(pmb->ctx_buf); + ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp; /* Reg_LOGIN of dflt RPI was successful. Now lets get * RID of the PPI using the same mbox buffer. */ lpfc_unreg_login(phba, vport->vpi, pmbox->un.varWords[0], pmb); pmb->mbox_cmpl = lpfc_mbx_cmpl_dflt_rpi; - pmb->context1 = mp; - pmb->context2 = ndlp; + pmb->ctx_buf = mp; + pmb->ctx_ndlp = ndlp; pmb->vport = vport; rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT); if (rc != MBX_BUSY) @@ -13413,6 +13490,8 @@ lpfc_sli4_sp_handle_abort_xri_wcqe(struct lpfc_hba *phba, return workposted; } +#define FC_RCTL_MDS_DIAGS 0xF4 + /** * lpfc_sli4_sp_handle_rcqe - Process a receive-queue completion queue entry * @phba: Pointer to HBA context object. @@ -13426,6 +13505,7 @@ static bool lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe) { bool workposted = false; + struct fc_frame_header *fc_hdr; struct lpfc_queue *hrq = phba->sli4_hba.hdr_rq; struct lpfc_queue *drq = phba->sli4_hba.dat_rq; struct lpfc_nvmet_tgtport *tgtp; @@ -13462,7 +13542,17 @@ lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe) hrq->RQ_buf_posted--; memcpy(&dma_buf->cq_event.cqe.rcqe_cmpl, rcqe, sizeof(*rcqe)); - /* save off the frame for the word thread to process */ + fc_hdr = (struct fc_frame_header *)dma_buf->hbuf.virt; + + if (fc_hdr->fh_r_ctl == FC_RCTL_MDS_DIAGS || + fc_hdr->fh_r_ctl == FC_RCTL_DD_UNSOL_DATA) { + spin_unlock_irqrestore(&phba->hbalock, iflags); + /* Handle MDS Loopback frames */ + lpfc_sli4_handle_mds_loopback(phba->pport, dma_buf); + break; + } + + /* save off the frame for the work thread to process */ list_add_tail(&dma_buf->cq_event.list, &phba->sli4_hba.sp_queue_event); /* Frame received */ @@ -14501,7 +14591,8 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size, hw_page_size))/hw_page_size; /* If needed, Adjust page count to match the max the adapter supports */ - if (queue->page_count > phba->sli4_hba.pc_sli4_params.wqpcnt) + if (phba->sli4_hba.pc_sli4_params.wqpcnt && + (queue->page_count > phba->sli4_hba.pc_sli4_params.wqpcnt)) queue->page_count = phba->sli4_hba.pc_sli4_params.wqpcnt; INIT_LIST_HEAD(&queue->list); @@ -14669,7 +14760,8 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq, mbox->vport = phba->pport; mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; - mbox->context1 = NULL; + mbox->ctx_buf = NULL; + mbox->ctx_ndlp = NULL; rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL); shdr = (union lpfc_sli4_cfg_shdr *) &eq_delay->header.cfg_shdr; shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response); @@ -14789,7 +14881,8 @@ lpfc_eq_create(struct lpfc_hba *phba, struct lpfc_queue *eq, uint32_t imax) } mbox->vport = phba->pport; mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; - mbox->context1 = NULL; + mbox->ctx_buf = NULL; + mbox->ctx_ndlp = NULL; rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL); shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response); shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response); @@ -16863,8 +16956,6 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr) struct fc_vft_header *fc_vft_hdr; uint32_t *header = (uint32_t *) fc_hdr; -#define FC_RCTL_MDS_DIAGS 0xF4 - switch (fc_hdr->fh_r_ctl) { case FC_RCTL_DD_UNCAT: /* uncategorized information */ case FC_RCTL_DD_SOL_DATA: /* solicited data */ @@ -16903,15 +16994,12 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr) goto drop; } -#define FC_TYPE_VENDOR_UNIQUE 0xFF - switch (fc_hdr->fh_type) { case FC_TYPE_BLS: case FC_TYPE_ELS: case FC_TYPE_FCP: case FC_TYPE_CT: case FC_TYPE_NVME: - case FC_TYPE_VENDOR_UNIQUE: break; case FC_TYPE_IP: case FC_TYPE_ILS: @@ -17741,6 +17829,7 @@ lpfc_sli4_mds_loopback_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, dma_pool_free(phba->lpfc_drb_pool, pcmd->virt, pcmd->phys); kfree(pcmd); lpfc_sli_release_iocbq(phba, cmdiocb); + lpfc_drain_txq(phba); } static void @@ -17754,14 +17843,23 @@ lpfc_sli4_handle_mds_loopback(struct lpfc_vport *vport, struct lpfc_dmabuf *pcmd = NULL; uint32_t frame_len; int rc; + unsigned long iflags; fc_hdr = (struct fc_frame_header *)dmabuf->hbuf.virt; frame_len = bf_get(lpfc_rcqe_length, &dmabuf->cq_event.cqe.rcqe_cmpl); /* Send the received frame back */ iocbq = lpfc_sli_get_iocbq(phba); - if (!iocbq) - goto exit; + if (!iocbq) { + /* Queue cq event and wakeup worker thread to process it */ + spin_lock_irqsave(&phba->hbalock, iflags); + list_add_tail(&dmabuf->cq_event.list, + &phba->sli4_hba.sp_queue_event); + phba->hba_flag |= HBA_SP_QUEUE_EVT; + spin_unlock_irqrestore(&phba->hbalock, iflags); + lpfc_worker_wake_up(phba); + return; + } /* Allocate buffer for command payload */ pcmd = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL); @@ -17846,6 +17944,14 @@ lpfc_sli4_handle_received_buffer(struct lpfc_hba *phba, /* Process each received buffer */ fc_hdr = (struct fc_frame_header *)dmabuf->hbuf.virt; + if (fc_hdr->fh_r_ctl == FC_RCTL_MDS_DIAGS || + fc_hdr->fh_r_ctl == FC_RCTL_DD_UNSOL_DATA) { + vport = phba->pport; + /* Handle MDS Loopback frames */ + lpfc_sli4_handle_mds_loopback(vport, dmabuf); + return; + } + /* check to see if this a valid type of frame */ if (lpfc_fc_frame_check(phba, fc_hdr)) { lpfc_in_buf_free(phba, &dmabuf->dbuf); @@ -17860,13 +17966,6 @@ lpfc_sli4_handle_received_buffer(struct lpfc_hba *phba, fcfi = bf_get(lpfc_rcqe_fcf_id, &dmabuf->cq_event.cqe.rcqe_cmpl); - if (fc_hdr->fh_r_ctl == 0xF4 && fc_hdr->fh_type == 0xFF) { - vport = phba->pport; - /* Handle MDS Loopback frames */ - lpfc_sli4_handle_mds_loopback(vport, dmabuf); - return; - } - /* d_id this frame is directed to */ did = sli4_did_from_fc_hdr(fc_hdr); @@ -18207,8 +18306,8 @@ lpfc_sli4_resume_rpi(struct lpfc_nodelist *ndlp, lpfc_resume_rpi(mboxq, ndlp); if (cmpl) { mboxq->mbox_cmpl = cmpl; - mboxq->context1 = arg; - mboxq->context2 = ndlp; + mboxq->ctx_buf = arg; + mboxq->ctx_ndlp = ndlp; } else mboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl; mboxq->vport = ndlp->vport; @@ -18711,15 +18810,8 @@ next_priority: goto initial_priority; lpfc_printf_log(phba, KERN_WARNING, LOG_FIP, "2844 No roundrobin failover FCF available\n"); - if (next_fcf_index >= LPFC_SLI4_FCF_TBL_INDX_MAX) - return LPFC_FCOE_FCF_NEXT_NONE; - else { - lpfc_printf_log(phba, KERN_WARNING, LOG_FIP, - "3063 Only FCF available idx %d, flag %x\n", - next_fcf_index, - phba->fcf.fcf_pri[next_fcf_index].fcf_rec.flag); - return next_fcf_index; - } + + return LPFC_FCOE_FCF_NEXT_NONE; } if (next_fcf_index < LPFC_SLI4_FCF_TBL_INDX_MAX && @@ -19026,7 +19118,7 @@ lpfc_sli4_get_config_region23(struct lpfc_hba *phba, char *rgn23_data) if (lpfc_sli4_dump_cfg_rg23(phba, mboxq)) goto out; mqe = &mboxq->u.mqe; - mp = (struct lpfc_dmabuf *) mboxq->context1; + mp = (struct lpfc_dmabuf *)mboxq->ctx_buf; rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL); if (rc) goto out; @@ -19171,11 +19263,11 @@ lpfc_wr_object(struct lpfc_hba *phba, struct list_head *dmabuf_list, struct lpfc_mbx_wr_object *wr_object; LPFC_MBOXQ_t *mbox; int rc = 0, i = 0; - uint32_t shdr_status, shdr_add_status; + uint32_t shdr_status, shdr_add_status, shdr_change_status; uint32_t mbox_tmo; - union lpfc_sli4_cfg_shdr *shdr; struct lpfc_dmabuf *dmabuf; uint32_t written = 0; + bool check_change_status = false; mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); if (!mbox) @@ -19203,6 +19295,8 @@ lpfc_wr_object(struct lpfc_hba *phba, struct list_head *dmabuf_list, (size - written); written += (size - written); bf_set(lpfc_wr_object_eof, &wr_object->u.request, 1); + bf_set(lpfc_wr_object_eas, &wr_object->u.request, 1); + check_change_status = true; } else { wr_object->u.request.bde[i].tus.f.bdeSize = SLI4_PAGE_SIZE; @@ -19219,9 +19313,39 @@ lpfc_wr_object(struct lpfc_hba *phba, struct list_head *dmabuf_list, rc = lpfc_sli_issue_mbox_wait(phba, mbox, mbox_tmo); } /* The IOCTL status is embedded in the mailbox subheader. */ - shdr = (union lpfc_sli4_cfg_shdr *) &wr_object->header.cfg_shdr; - shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response); - shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response); + shdr_status = bf_get(lpfc_mbox_hdr_status, + &wr_object->header.cfg_shdr.response); + shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, + &wr_object->header.cfg_shdr.response); + if (check_change_status) { + shdr_change_status = bf_get(lpfc_wr_object_change_status, + &wr_object->u.response); + switch (shdr_change_status) { + case (LPFC_CHANGE_STATUS_PHYS_DEV_RESET): + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3198 Firmware write complete: System " + "reboot required to instantiate\n"); + break; + case (LPFC_CHANGE_STATUS_FW_RESET): + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3199 Firmware write complete: Firmware" + " reset required to instantiate\n"); + break; + case (LPFC_CHANGE_STATUS_PORT_MIGRATION): + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3200 Firmware write complete: Port " + "Migration or PCI Reset required to " + "instantiate\n"); + break; + case (LPFC_CHANGE_STATUS_PCI_RESET): + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3201 Firmware write complete: PCI " + "Reset required to instantiate\n"); + break; + default: + break; + } + } if (rc != MBX_TIMEOUT) mempool_free(mbox, phba->mbox_mem_pool); if (shdr_status || shdr_add_status || rc) { @@ -19277,7 +19401,7 @@ lpfc_cleanup_pending_mbox(struct lpfc_vport *vport) (mb->u.mb.mbxCommand == MBX_REG_VPI)) mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl; if (mb->u.mb.mbxCommand == MBX_REG_LOGIN64) { - act_mbx_ndlp = (struct lpfc_nodelist *)mb->context2; + act_mbx_ndlp = (struct lpfc_nodelist *)mb->ctx_ndlp; /* Put reference count for delayed processing */ act_mbx_ndlp = lpfc_nlp_get(act_mbx_ndlp); /* Unregister the RPI when mailbox complete */ @@ -19302,7 +19426,7 @@ lpfc_cleanup_pending_mbox(struct lpfc_vport *vport) mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl; if (mb->u.mb.mbxCommand == MBX_REG_LOGIN64) { - ndlp = (struct lpfc_nodelist *)mb->context2; + ndlp = (struct lpfc_nodelist *)mb->ctx_ndlp; /* Unregister the RPI when mailbox complete */ mb->mbox_flag |= LPFC_MBX_IMED_UNREG; restart_loop = 1; @@ -19322,13 +19446,14 @@ lpfc_cleanup_pending_mbox(struct lpfc_vport *vport) while (!list_empty(&mbox_cmd_list)) { list_remove_head(&mbox_cmd_list, mb, LPFC_MBOXQ_t, list); if (mb->u.mb.mbxCommand == MBX_REG_LOGIN64) { - mp = (struct lpfc_dmabuf *) (mb->context1); + mp = (struct lpfc_dmabuf *)(mb->ctx_buf); if (mp) { __lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); } - ndlp = (struct lpfc_nodelist *) mb->context2; - mb->context2 = NULL; + mb->ctx_buf = NULL; + ndlp = (struct lpfc_nodelist *)mb->ctx_ndlp; + mb->ctx_ndlp = NULL; if (ndlp) { spin_lock(shost->host_lock); ndlp->nlp_flag &= ~NLP_IGNR_REG_CMPL; diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h index 34b7ab69b9b4..7abb395bb64a 100644 --- a/drivers/scsi/lpfc/lpfc_sli.h +++ b/drivers/scsi/lpfc/lpfc_sli.h @@ -144,9 +144,9 @@ typedef struct lpfcMboxq { MAILBOX_t mb; /* Mailbox cmd */ struct lpfc_mqe mqe; } u; - struct lpfc_vport *vport;/* virtual port pointer */ - void *context1; /* caller context information */ - void *context2; /* caller context information */ + struct lpfc_vport *vport; /* virtual port pointer */ + void *ctx_ndlp; /* caller ndlp information */ + void *ctx_buf; /* caller buffer information */ void *context3; void (*mbox_cmpl) (struct lpfc_hba *, struct lpfcMboxq *); diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index e76c380e1a84..6b2d2350e2c6 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -279,6 +279,7 @@ struct lpfc_fcf { #define FCF_REDISC_EVT 0x100 /* FCF rediscovery event to worker thread */ #define FCF_REDISC_FOV 0x200 /* Post FCF rediscovery fast failover */ #define FCF_REDISC_PROG (FCF_REDISC_PEND | FCF_REDISC_EVT) + uint16_t fcf_redisc_attempted; uint32_t addr_mode; uint32_t eligible_fcf_cnt; struct lpfc_fcf_rec current_rec; @@ -717,6 +718,19 @@ struct lpfc_sli4_hba { uint16_t num_online_cpu; uint16_t num_present_cpu; uint16_t curr_disp_cpu; + uint32_t conf_trunk; +#define lpfc_conf_trunk_port0_WORD conf_trunk +#define lpfc_conf_trunk_port0_SHIFT 0 +#define lpfc_conf_trunk_port0_MASK 0x1 +#define lpfc_conf_trunk_port1_WORD conf_trunk +#define lpfc_conf_trunk_port1_SHIFT 1 +#define lpfc_conf_trunk_port1_MASK 0x1 +#define lpfc_conf_trunk_port2_WORD conf_trunk +#define lpfc_conf_trunk_port2_SHIFT 2 +#define lpfc_conf_trunk_port2_MASK 0x1 +#define lpfc_conf_trunk_port3_WORD conf_trunk +#define lpfc_conf_trunk_port3_SHIFT 3 +#define lpfc_conf_trunk_port3_MASK 0x1 }; enum lpfc_sge_type { diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index 5a0d512ff497..3f4398ffb567 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -20,7 +20,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "12.0.0.7" +#define LPFC_DRIVER_VERSION "12.0.0.10" #define LPFC_DRIVER_NAME "lpfc" /* Used for SLI 2/3 */ diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c index c340e0e47473..102a011ff6d4 100644 --- a/drivers/scsi/lpfc/lpfc_vport.c +++ b/drivers/scsi/lpfc/lpfc_vport.c @@ -138,8 +138,8 @@ lpfc_vport_sparm(struct lpfc_hba *phba, struct lpfc_vport *vport) * Grab buffer pointer and clear context1 so we can use * lpfc_sli_issue_box_wait */ - mp = (struct lpfc_dmabuf *) pmb->context1; - pmb->context1 = NULL; + mp = (struct lpfc_dmabuf *)pmb->ctx_buf; + pmb->ctx_buf = NULL; pmb->vport = vport; rc = lpfc_sli_issue_mbox_wait(phba, pmb, phba->fc_ratov * 2); diff --git a/drivers/scsi/mac53c94.c b/drivers/scsi/mac53c94.c index 177701dfdfcb..c8e6ae98a4a6 100644 --- a/drivers/scsi/mac53c94.c +++ b/drivers/scsi/mac53c94.c @@ -403,7 +403,7 @@ static struct scsi_host_template mac53c94_template = { .can_queue = 1, .this_id = 7, .sg_tablesize = SG_ALL, - .use_clustering = DISABLE_CLUSTERING, + .max_segment_size = 65535, }; static int mac53c94_probe(struct macio_dev *mdev, const struct of_device_id *match) diff --git a/drivers/scsi/mac_esp.c b/drivers/scsi/mac_esp.c index 764d320bb2ca..ee741207fd4e 100644 --- a/drivers/scsi/mac_esp.c +++ b/drivers/scsi/mac_esp.c @@ -307,7 +307,7 @@ static int esp_mac_probe(struct platform_device *dev) goto fail; host->max_id = 8; - host->use_clustering = DISABLE_CLUSTERING; + host->dma_boundary = PAGE_SIZE - 1; esp = shost_priv(host); esp->host = host; diff --git a/drivers/scsi/mac_scsi.c b/drivers/scsi/mac_scsi.c index dd6057359d7c..8b4b5b1a13d7 100644 --- a/drivers/scsi/mac_scsi.c +++ b/drivers/scsi/mac_scsi.c @@ -333,7 +333,7 @@ static struct scsi_host_template mac_scsi_template = { .this_id = 7, .sg_tablesize = 1, .cmd_per_lun = 2, - .use_clustering = DISABLE_CLUSTERING, + .dma_boundary = PAGE_SIZE - 1, .cmd_size = NCR5380_CMD_SIZE, .max_sectors = 128, }; diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index 8c7154143a4e..4862f65ec3e8 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -4148,7 +4148,6 @@ static struct scsi_host_template megaraid_template = { .this_id = DEFAULT_INITIATOR_ID, .sg_tablesize = MAX_SGLIST, .cmd_per_lun = DEF_CMD_PER_LUN, - .use_clustering = ENABLE_CLUSTERING, .eh_abort_handler = megaraid_abort, .eh_device_reset_handler = megaraid_reset, .eh_bus_reset_handler = megaraid_reset, diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c index 3b7abe5ca7f5..e836392b75e8 100644 --- a/drivers/scsi/megaraid/megaraid_mbox.c +++ b/drivers/scsi/megaraid/megaraid_mbox.c @@ -336,7 +336,6 @@ static struct scsi_host_template megaraid_template_g = { .eh_abort_handler = megaraid_abort_handler, .eh_host_reset_handler = megaraid_reset_handler, .change_queue_depth = scsi_change_queue_depth, - .use_clustering = ENABLE_CLUSTERING, .no_write_same = 1, .sdev_attrs = megaraid_sdev_attrs, .shost_attrs = megaraid_shost_attrs, @@ -1243,8 +1242,7 @@ megaraid_mbox_teardown_dma_pools(adapter_t *adapter) dma_pool_free(raid_dev->sg_pool_handle, sg_pci_blk[i].vaddr, sg_pci_blk[i].dma_addr); } - if (raid_dev->sg_pool_handle) - dma_pool_destroy(raid_dev->sg_pool_handle); + dma_pool_destroy(raid_dev->sg_pool_handle); epthru_pci_blk = raid_dev->epthru_pool; @@ -1252,8 +1250,7 @@ megaraid_mbox_teardown_dma_pools(adapter_t *adapter) dma_pool_free(raid_dev->epthru_pool_handle, epthru_pci_blk[i].vaddr, epthru_pci_blk[i].dma_addr); } - if (raid_dev->epthru_pool_handle) - dma_pool_destroy(raid_dev->epthru_pool_handle); + dma_pool_destroy(raid_dev->epthru_pool_handle); mbox_pci_blk = raid_dev->mbox_pool; @@ -1261,8 +1258,7 @@ megaraid_mbox_teardown_dma_pools(adapter_t *adapter) dma_pool_free(raid_dev->mbox_pool_handle, mbox_pci_blk[i].vaddr, mbox_pci_blk[i].dma_addr); } - if (raid_dev->mbox_pool_handle) - dma_pool_destroy(raid_dev->mbox_pool_handle); + dma_pool_destroy(raid_dev->mbox_pool_handle); return; } diff --git a/drivers/scsi/megaraid/megaraid_mm.c b/drivers/scsi/megaraid/megaraid_mm.c index 8428247015db..3ce837e4b24c 100644 --- a/drivers/scsi/megaraid/megaraid_mm.c +++ b/drivers/scsi/megaraid/megaraid_mm.c @@ -1017,8 +1017,7 @@ memalloc_error: kfree(adapter->kioc_list); kfree(adapter->mbox_list); - if (adapter->pthru_dma_pool) - dma_pool_destroy(adapter->pthru_dma_pool); + dma_pool_destroy(adapter->pthru_dma_pool); kfree(adapter); diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index 67d356d84717..16536c41f0c5 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -2,7 +2,8 @@ * Linux MegaRAID driver for SAS based RAID controllers * * Copyright (c) 2003-2013 LSI Corporation - * Copyright (c) 2013-2014 Avago Technologies + * Copyright (c) 2013-2016 Avago Technologies + * Copyright (c) 2016-2018 Broadcom Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -19,14 +20,11 @@ * * FILE: megaraid_sas.h * - * Authors: Avago Technologies - * Kashyap Desai <kashyap.desai@avagotech.com> - * Sumit Saxena <sumit.saxena@avagotech.com> + * Authors: Broadcom Inc. + * Kashyap Desai <kashyap.desai@broadcom.com> + * Sumit Saxena <sumit.saxena@broadcom.com> * - * Send feedback to: megaraidlinux.pdl@avagotech.com - * - * Mail to: Avago Technologies, 350 West Trimble Road, Building 90, - * San Jose, California 95131 + * Send feedback to: megaraidlinux.pdl@broadcom.com */ #ifndef LSI_MEGARAID_SAS_H @@ -35,8 +33,8 @@ /* * MegaRAID SAS Driver meta data */ -#define MEGASAS_VERSION "07.706.03.00-rc1" -#define MEGASAS_RELDATE "May 21, 2018" +#define MEGASAS_VERSION "07.707.50.00-rc1" +#define MEGASAS_RELDATE "December 18, 2018" /* * Device IDs @@ -62,6 +60,10 @@ #define PCI_DEVICE_ID_LSI_TOMCAT 0x0017 #define PCI_DEVICE_ID_LSI_VENTURA_4PORT 0x001B #define PCI_DEVICE_ID_LSI_CRUSADER_4PORT 0x001C +#define PCI_DEVICE_ID_LSI_AERO_10E1 0x10e1 +#define PCI_DEVICE_ID_LSI_AERO_10E2 0x10e2 +#define PCI_DEVICE_ID_LSI_AERO_10E5 0x10e5 +#define PCI_DEVICE_ID_LSI_AERO_10E6 0x10e6 /* * Intel HBA SSDIDs @@ -142,6 +144,7 @@ * CLR_HANDSHAKE: FW is waiting for HANDSHAKE from BIOS or Driver * HOTPLUG : Resume from Hotplug * MFI_STOP_ADP : Send signal to FW to stop processing + * MFI_ADP_TRIGGER_SNAP_DUMP: Inform firmware to initiate snap dump */ #define WRITE_SEQUENCE_OFFSET (0x0000000FC) /* I20 */ #define HOST_DIAGNOSTIC_OFFSET (0x000000F8) /* I20 */ @@ -158,6 +161,7 @@ #define MFI_RESET_FLAGS MFI_INIT_READY| \ MFI_INIT_MFIMODE| \ MFI_INIT_ABORT +#define MFI_ADP_TRIGGER_SNAP_DUMP 0x00000100 #define MPI2_IOCINIT_MSGFLAG_RDPQ_ARRAY_MODE (0x01) /* @@ -860,8 +864,22 @@ struct megasas_ctrl_prop { u32 reserved:18; #endif } OnOffProperties; - u8 autoSnapVDSpace; - u8 viewSpace; + + union { + u8 autoSnapVDSpace; + u8 viewSpace; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u16 reserved2:11; + u16 enable_snap_dump:1; + u16 reserved1:4; +#else + u16 reserved1:4; + u16 enable_snap_dump:1; + u16 reserved2:11; +#endif + } on_off_properties2; + }; __le16 spinDownTime; u8 reserved[24]; } __packed; @@ -1485,7 +1503,6 @@ enum FW_BOOT_CONTEXT { #define MEGASAS_IOCTL_CMD 0 #define MEGASAS_DEFAULT_CMD_TIMEOUT 90 #define MEGASAS_THROTTLE_QUEUE_DEPTH 16 -#define MEGASAS_BLOCKED_CMD_TIMEOUT 60 #define MEGASAS_DEFAULT_TM_TIMEOUT 50 /* * FW reports the maximum of number of commands that it can accept (maximum @@ -1544,11 +1561,16 @@ enum FW_BOOT_CONTEXT { #define MR_CAN_HANDLE_64_BIT_DMA_OFFSET (1 << 25) +#define MEGASAS_WATCHDOG_THREAD_INTERVAL 1000 +#define MEGASAS_WAIT_FOR_NEXT_DMA_MSECS 20 +#define MEGASAS_WATCHDOG_WAIT_COUNT 50 + enum MR_ADAPTER_TYPE { MFI_SERIES = 1, THUNDERBOLT_SERIES = 2, INVADER_SERIES = 3, VENTURA_SERIES = 4, + AERO_SERIES = 5, }; /* @@ -1588,11 +1610,10 @@ struct megasas_register_set { u32 reserved_3[3]; /*00A4h*/ - u32 outbound_scratch_pad ; /*00B0h*/ - u32 outbound_scratch_pad_2; /*00B4h*/ - u32 outbound_scratch_pad_3; /*00B8h*/ - u32 outbound_scratch_pad_4; /*00BCh*/ - + u32 outbound_scratch_pad_0; /*00B0h*/ + u32 outbound_scratch_pad_1; /*00B4h*/ + u32 outbound_scratch_pad_2; /*00B8h*/ + u32 outbound_scratch_pad_3; /*00BCh*/ u32 inbound_low_queue_port ; /*00C0h*/ @@ -2181,6 +2202,9 @@ struct megasas_instance { struct MR_LD_TARGETID_LIST *ld_targetid_list_buf; dma_addr_t ld_targetid_list_buf_h; + struct MR_SNAPDUMP_PROPERTIES *snapdump_prop; + dma_addr_t snapdump_prop_h; + void *crash_buf[MAX_CRASH_DUMP_SIZE]; unsigned int fw_crash_buffer_size; unsigned int fw_crash_state; @@ -2250,7 +2274,9 @@ struct megasas_instance { struct megasas_instance_template *instancet; struct tasklet_struct isr_tasklet; struct work_struct work_init; - struct work_struct crash_init; + struct delayed_work fw_fault_work; + struct workqueue_struct *fw_fault_work_q; + char fault_handler_work_q_name[48]; u8 flag; u8 unload; @@ -2310,6 +2336,7 @@ struct megasas_instance { bool support_nvme_passthru; u8 task_abort_tmo; u8 max_reset_tmo; + u8 snapdump_wait_time; }; struct MR_LD_VF_MAP { u32 size; @@ -2386,9 +2413,9 @@ struct megasas_instance_template { void (*enable_intr)(struct megasas_instance *); void (*disable_intr)(struct megasas_instance *); - int (*clear_intr)(struct megasas_register_set __iomem *); + int (*clear_intr)(struct megasas_instance *); - u32 (*read_fw_status_reg)(struct megasas_register_set __iomem *); + u32 (*read_fw_status_reg)(struct megasas_instance *); int (*adp_reset)(struct megasas_instance *, \ struct megasas_register_set __iomem *); int (*check_reset)(struct megasas_instance *, \ @@ -2535,11 +2562,11 @@ void megasas_set_dynamic_target_properties(struct scsi_device *sdev, bool is_target_prop); int megasas_get_target_prop(struct megasas_instance *instance, struct scsi_device *sdev); +void megasas_get_snapdump_properties(struct megasas_instance *instance); int megasas_set_crash_dump_params(struct megasas_instance *instance, u8 crash_buf_state); void megasas_free_host_crash_buffer(struct megasas_instance *instance); -void megasas_fusion_crash_dump_wq(struct work_struct *work); void megasas_return_cmd_fusion(struct megasas_instance *instance, struct megasas_cmd_fusion *cmd); @@ -2560,6 +2587,9 @@ int megasas_reset_target_fusion(struct scsi_cmnd *scmd); u32 mega_mod64(u64 dividend, u32 divisor); int megasas_alloc_fusion_context(struct megasas_instance *instance); void megasas_free_fusion_context(struct megasas_instance *instance); +int megasas_fusion_start_watchdog(struct megasas_instance *instance); +void megasas_fusion_stop_watchdog(struct megasas_instance *instance); + void megasas_set_dma_settings(struct megasas_instance *instance, struct megasas_dcmd_frame *dcmd, dma_addr_t dma_addr, u32 dma_len); diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 9b90c716f06d..f7bdd783360a 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -2,7 +2,8 @@ * Linux MegaRAID driver for SAS based RAID controllers * * Copyright (c) 2003-2013 LSI Corporation - * Copyright (c) 2013-2014 Avago Technologies + * Copyright (c) 2013-2016 Avago Technologies + * Copyright (c) 2016-2018 Broadcom Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -17,18 +18,15 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * - * Authors: Avago Technologies + * Authors: Broadcom Inc. * Sreenivas Bagalkote * Sumant Patro * Bo Yang * Adam Radford - * Kashyap Desai <kashyap.desai@avagotech.com> - * Sumit Saxena <sumit.saxena@avagotech.com> + * Kashyap Desai <kashyap.desai@broadcom.com> + * Sumit Saxena <sumit.saxena@broadcom.com> * - * Send feedback to: megaraidlinux.pdl@avagotech.com - * - * Mail to: Avago Technologies, 350 West Trimble Road, Building 90, - * San Jose, California 95131 + * Send feedback to: megaraidlinux.pdl@broadcom.com */ #include <linux/kernel.h> @@ -87,8 +85,7 @@ MODULE_PARM_DESC(throttlequeuedepth, unsigned int resetwaittime = MEGASAS_RESET_WAIT_TIME; module_param(resetwaittime, int, S_IRUGO); -MODULE_PARM_DESC(resetwaittime, "Wait time in seconds after I/O timeout " - "before resetting adapter. Default: 180"); +MODULE_PARM_DESC(resetwaittime, "Wait time in (1-180s) after I/O timeout before resetting adapter. Default: 180s"); int smp_affinity_enable = 1; module_param(smp_affinity_enable, int, S_IRUGO); @@ -96,7 +93,7 @@ MODULE_PARM_DESC(smp_affinity_enable, "SMP affinity feature enable/disable Defau int rdpq_enable = 1; module_param(rdpq_enable, int, S_IRUGO); -MODULE_PARM_DESC(rdpq_enable, " Allocate reply queue in chunks for large queue depth enable/disable Default: disable(0)"); +MODULE_PARM_DESC(rdpq_enable, "Allocate reply queue in chunks for large queue depth enable/disable Default: enable(1)"); unsigned int dual_qdepth_disable; module_param(dual_qdepth_disable, int, S_IRUGO); @@ -108,8 +105,8 @@ MODULE_PARM_DESC(scmd_timeout, "scsi command timeout (10-90s), default 90s. See MODULE_LICENSE("GPL"); MODULE_VERSION(MEGASAS_VERSION); -MODULE_AUTHOR("megaraidlinux.pdl@avagotech.com"); -MODULE_DESCRIPTION("Avago MegaRAID SAS Driver"); +MODULE_AUTHOR("megaraidlinux.pdl@broadcom.com"); +MODULE_DESCRIPTION("Broadcom MegaRAID SAS Driver"); int megasas_transition_to_ready(struct megasas_instance *instance, int ocr); static int megasas_get_pd_list(struct megasas_instance *instance); @@ -165,6 +162,10 @@ static struct pci_device_id megasas_pci_table[] = { {PCI_DEVICE(PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_TOMCAT)}, {PCI_DEVICE(PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_VENTURA_4PORT)}, {PCI_DEVICE(PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_CRUSADER_4PORT)}, + {PCI_DEVICE(PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_AERO_10E1)}, + {PCI_DEVICE(PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_AERO_10E2)}, + {PCI_DEVICE(PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_AERO_10E5)}, + {PCI_DEVICE(PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_AERO_10E6)}, {} }; @@ -189,7 +190,7 @@ void megasas_complete_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd, u8 alt_status); static u32 -megasas_read_fw_status_reg_gen2(struct megasas_register_set __iomem *regs); +megasas_read_fw_status_reg_gen2(struct megasas_instance *instance); static int megasas_adp_reset_gen2(struct megasas_instance *instance, struct megasas_register_set __iomem *reg_set); @@ -219,6 +220,28 @@ megasas_free_ctrl_dma_buffers(struct megasas_instance *instance); static inline void megasas_init_ctrl_params(struct megasas_instance *instance); +u32 megasas_readl(struct megasas_instance *instance, + const volatile void __iomem *addr) +{ + u32 i = 0, ret_val; + /* + * Due to a HW errata in Aero controllers, reads to certain + * Fusion registers could intermittently return all zeroes. + * This behavior is transient in nature and subsequent reads will + * return valid value. As a workaround in driver, retry readl for + * upto three times until a non-zero value is read. + */ + if (instance->adapter_type == AERO_SERIES) { + do { + ret_val = readl(addr); + i++; + } while (ret_val == 0 && i < 3); + return ret_val; + } else { + return readl(addr); + } +} + /** * megasas_set_dma_settings - Populate DMA address, length and flags for DCMDs * @instance: Adapter soft state @@ -419,19 +442,21 @@ megasas_disable_intr_xscale(struct megasas_instance *instance) * @regs: MFI register set */ static u32 -megasas_read_fw_status_reg_xscale(struct megasas_register_set __iomem * regs) +megasas_read_fw_status_reg_xscale(struct megasas_instance *instance) { - return readl(&(regs)->outbound_msg_0); + return readl(&instance->reg_set->outbound_msg_0); } /** * megasas_clear_interrupt_xscale - Check & clear interrupt * @regs: MFI register set */ static int -megasas_clear_intr_xscale(struct megasas_register_set __iomem * regs) +megasas_clear_intr_xscale(struct megasas_instance *instance) { u32 status; u32 mfiStatus = 0; + struct megasas_register_set __iomem *regs; + regs = instance->reg_set; /* * Check if it is our interrupt @@ -596,9 +621,9 @@ megasas_disable_intr_ppc(struct megasas_instance *instance) * @regs: MFI register set */ static u32 -megasas_read_fw_status_reg_ppc(struct megasas_register_set __iomem * regs) +megasas_read_fw_status_reg_ppc(struct megasas_instance *instance) { - return readl(&(regs)->outbound_scratch_pad); + return readl(&instance->reg_set->outbound_scratch_pad_0); } /** @@ -606,9 +631,11 @@ megasas_read_fw_status_reg_ppc(struct megasas_register_set __iomem * regs) * @regs: MFI register set */ static int -megasas_clear_intr_ppc(struct megasas_register_set __iomem * regs) +megasas_clear_intr_ppc(struct megasas_instance *instance) { u32 status, mfiStatus = 0; + struct megasas_register_set __iomem *regs; + regs = instance->reg_set; /* * Check if it is our interrupt @@ -721,9 +748,9 @@ megasas_disable_intr_skinny(struct megasas_instance *instance) * @regs: MFI register set */ static u32 -megasas_read_fw_status_reg_skinny(struct megasas_register_set __iomem *regs) +megasas_read_fw_status_reg_skinny(struct megasas_instance *instance) { - return readl(&(regs)->outbound_scratch_pad); + return readl(&instance->reg_set->outbound_scratch_pad_0); } /** @@ -731,10 +758,12 @@ megasas_read_fw_status_reg_skinny(struct megasas_register_set __iomem *regs) * @regs: MFI register set */ static int -megasas_clear_intr_skinny(struct megasas_register_set __iomem *regs) +megasas_clear_intr_skinny(struct megasas_instance *instance) { u32 status; u32 mfiStatus = 0; + struct megasas_register_set __iomem *regs; + regs = instance->reg_set; /* * Check if it is our interrupt @@ -748,7 +777,7 @@ megasas_clear_intr_skinny(struct megasas_register_set __iomem *regs) /* * Check if it is our interrupt */ - if ((megasas_read_fw_status_reg_skinny(regs) & MFI_STATE_MASK) == + if ((megasas_read_fw_status_reg_skinny(instance) & MFI_STATE_MASK) == MFI_STATE_FAULT) { mfiStatus = MFI_INTR_FLAG_FIRMWARE_STATE_CHANGE; } else @@ -866,9 +895,9 @@ megasas_disable_intr_gen2(struct megasas_instance *instance) * @regs: MFI register set */ static u32 -megasas_read_fw_status_reg_gen2(struct megasas_register_set __iomem *regs) +megasas_read_fw_status_reg_gen2(struct megasas_instance *instance) { - return readl(&(regs)->outbound_scratch_pad); + return readl(&instance->reg_set->outbound_scratch_pad_0); } /** @@ -876,10 +905,12 @@ megasas_read_fw_status_reg_gen2(struct megasas_register_set __iomem *regs) * @regs: MFI register set */ static int -megasas_clear_intr_gen2(struct megasas_register_set __iomem *regs) +megasas_clear_intr_gen2(struct megasas_instance *instance) { u32 status; u32 mfiStatus = 0; + struct megasas_register_set __iomem *regs; + regs = instance->reg_set; /* * Check if it is our interrupt @@ -2079,9 +2110,11 @@ void megaraid_sas_kill_hba(struct megasas_instance *instance) if ((instance->pdev->device == PCI_DEVICE_ID_LSI_SAS0073SKINNY) || (instance->pdev->device == PCI_DEVICE_ID_LSI_SAS0071SKINNY) || (instance->adapter_type != MFI_SERIES)) { - writel(MFI_STOP_ADP, &instance->reg_set->doorbell); - /* Flush */ - readl(&instance->reg_set->doorbell); + if (!instance->requestorId) { + writel(MFI_STOP_ADP, &instance->reg_set->doorbell); + /* Flush */ + readl(&instance->reg_set->doorbell); + } if (instance->requestorId && instance->peerIsPresent) memset(instance->ld_ids, 0xff, MEGASAS_MAX_LD_IDS); } else { @@ -2682,7 +2715,7 @@ static int megasas_wait_for_outstanding(struct megasas_instance *instance) i = 0; outstanding = atomic_read(&instance->fw_outstanding); - fw_state = instance->instancet->read_fw_status_reg(instance->reg_set) & MFI_STATE_MASK; + fw_state = instance->instancet->read_fw_status_reg(instance) & MFI_STATE_MASK; if ((!outstanding && (fw_state == MFI_STATE_OPERATIONAL))) goto no_outstanding; @@ -2711,7 +2744,7 @@ static int megasas_wait_for_outstanding(struct megasas_instance *instance) outstanding = atomic_read(&instance->fw_outstanding); - fw_state = instance->instancet->read_fw_status_reg(instance->reg_set) & MFI_STATE_MASK; + fw_state = instance->instancet->read_fw_status_reg(instance) & MFI_STATE_MASK; if ((!outstanding && (fw_state == MFI_STATE_OPERATIONAL))) goto no_outstanding; } @@ -3186,7 +3219,6 @@ static struct scsi_host_template megasas_template = { .eh_timed_out = megasas_reset_timer, .shost_attrs = megaraid_host_attrs, .bios_param = megasas_bios_param, - .use_clustering = ENABLE_CLUSTERING, .change_queue_depth = scsi_change_queue_depth, .no_write_same = 1, }; @@ -3278,6 +3310,7 @@ megasas_complete_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd, megasas_complete_int_cmd(instance, cmd); break; } + /* fall through */ case MFI_CMD_LD_READ: case MFI_CMD_LD_WRITE: @@ -3665,9 +3698,8 @@ megasas_deplete_reply_queue(struct megasas_instance *instance, return IRQ_HANDLED; } - if ((mfiStatus = instance->instancet->clear_intr( - instance->reg_set) - ) == 0) { + mfiStatus = instance->instancet->clear_intr(instance); + if (mfiStatus == 0) { /* Hardware may not set outbound_intr_status in MSI-X mode */ if (!instance->msix_vectors) return IRQ_NONE; @@ -3677,7 +3709,7 @@ megasas_deplete_reply_queue(struct megasas_instance *instance, if ((mfiStatus & MFI_INTR_FLAG_FIRMWARE_STATE_CHANGE)) { fw_state = instance->instancet->read_fw_status_reg( - instance->reg_set) & MFI_STATE_MASK; + instance) & MFI_STATE_MASK; if (fw_state != MFI_STATE_FAULT) { dev_notice(&instance->pdev->dev, "fw state:%x\n", @@ -3760,7 +3792,7 @@ megasas_transition_to_ready(struct megasas_instance *instance, int ocr) u32 cur_state; u32 abs_state, curr_abs_state; - abs_state = instance->instancet->read_fw_status_reg(instance->reg_set); + abs_state = instance->instancet->read_fw_status_reg(instance); fw_state = abs_state & MFI_STATE_MASK; if (fw_state != MFI_STATE_READY) @@ -3832,7 +3864,8 @@ megasas_transition_to_ready(struct megasas_instance *instance, int ocr) if (instance->adapter_type != MFI_SERIES) { for (i = 0; i < (10 * 1000); i += 20) { - if (readl( + if (megasas_readl( + instance, &instance-> reg_set-> doorbell) & 1) @@ -3891,12 +3924,12 @@ megasas_transition_to_ready(struct megasas_instance *instance, int ocr) /* * The cur_state should not last for more than max_wait secs */ - for (i = 0; i < (max_wait * 1000); i++) { + for (i = 0; i < max_wait; i++) { curr_abs_state = instance->instancet-> - read_fw_status_reg(instance->reg_set); + read_fw_status_reg(instance); if (abs_state == curr_abs_state) { - msleep(1); + msleep(1000); } else break; } @@ -4634,9 +4667,9 @@ static void megasas_update_ext_vd_details(struct megasas_instance *instance) } dev_info(&instance->pdev->dev, - "firmware type\t: %s\n", - instance->supportmax256vd ? "Extended VD(240 VD)firmware" : - "Legacy(64 VD) firmware"); + "FW provided supportMaxExtLDs: %d\tmax_lds: %d\n", + instance->ctrl_info_buf->adapterOperations3.supportMaxExtLDs ? 1 : 0, + instance->ctrl_info_buf->max_lds); if (instance->max_raid_mapsize) { ventura_map_sz = instance->max_raid_mapsize * @@ -4661,6 +4694,87 @@ static void megasas_update_ext_vd_details(struct megasas_instance *instance) fusion->drv_map_sz = sizeof(struct MR_DRV_RAID_MAP_ALL); } +/* + * dcmd.opcode - MR_DCMD_CTRL_SNAPDUMP_GET_PROPERTIES + * dcmd.hdr.length - number of bytes to read + * dcmd.sge - Ptr to MR_SNAPDUMP_PROPERTIES + * Desc: Fill in snapdump properties + * Status: MFI_STAT_OK- Command successful + */ +void megasas_get_snapdump_properties(struct megasas_instance *instance) +{ + int ret = 0; + struct megasas_cmd *cmd; + struct megasas_dcmd_frame *dcmd; + struct MR_SNAPDUMP_PROPERTIES *ci; + dma_addr_t ci_h = 0; + + ci = instance->snapdump_prop; + ci_h = instance->snapdump_prop_h; + + if (!ci) + return; + + cmd = megasas_get_cmd(instance); + + if (!cmd) { + dev_dbg(&instance->pdev->dev, "Failed to get a free cmd\n"); + return; + } + + dcmd = &cmd->frame->dcmd; + + memset(ci, 0, sizeof(*ci)); + memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE); + + dcmd->cmd = MFI_CMD_DCMD; + dcmd->cmd_status = MFI_STAT_INVALID_STATUS; + dcmd->sge_count = 1; + dcmd->flags = MFI_FRAME_DIR_READ; + dcmd->timeout = 0; + dcmd->pad_0 = 0; + dcmd->data_xfer_len = cpu_to_le32(sizeof(struct MR_SNAPDUMP_PROPERTIES)); + dcmd->opcode = cpu_to_le32(MR_DCMD_CTRL_SNAPDUMP_GET_PROPERTIES); + + megasas_set_dma_settings(instance, dcmd, ci_h, + sizeof(struct MR_SNAPDUMP_PROPERTIES)); + + if (!instance->mask_interrupts) { + ret = megasas_issue_blocked_cmd(instance, cmd, + MFI_IO_TIMEOUT_SECS); + } else { + ret = megasas_issue_polled(instance, cmd); + cmd->flags |= DRV_DCMD_SKIP_REFIRE; + } + + switch (ret) { + case DCMD_SUCCESS: + instance->snapdump_wait_time = + min_t(u8, ci->trigger_min_num_sec_before_ocr, + MEGASAS_MAX_SNAP_DUMP_WAIT_TIME); + break; + + case DCMD_TIMEOUT: + switch (dcmd_timeout_ocr_possible(instance)) { + case INITIATE_OCR: + cmd->flags |= DRV_DCMD_SKIP_REFIRE; + megasas_reset_fusion(instance->host, + MFI_IO_TIMEOUT_OCR); + break; + case KILL_ADAPTER: + megaraid_sas_kill_hba(instance); + break; + case IGNORE_TIMEOUT: + dev_info(&instance->pdev->dev, "Ignore DCMD timeout: %s %d\n", + __func__, __LINE__); + break; + } + } + + if (ret != DCMD_TIMEOUT) + megasas_return_cmd(instance, cmd); +} + /** * megasas_get_controller_info - Returns FW's controller structure * @instance: Adapter soft state @@ -4720,6 +4834,7 @@ megasas_get_ctrl_info(struct megasas_instance *instance) * CPU endianness format. */ le32_to_cpus((u32 *)&ci->properties.OnOffProperties); + le16_to_cpus((u16 *)&ci->properties.on_off_properties2); le32_to_cpus((u32 *)&ci->adapterOperations2); le32_to_cpus((u32 *)&ci->adapterOperations3); le16_to_cpus((u16 *)&ci->adapter_operations4); @@ -4741,6 +4856,11 @@ megasas_get_ctrl_info(struct megasas_instance *instance) /*Check whether controller is iMR or MR */ instance->is_imr = (ci->memory_size ? 0 : 1); + + instance->snapdump_wait_time = + (ci->properties.on_off_properties2.enable_snap_dump ? + MEGASAS_DEFAULT_SNAP_DUMP_WAIT_TIME : 0); + dev_info(&instance->pdev->dev, "controller type\t: %s(%dMB)\n", instance->is_imr ? "iMR" : "MR", @@ -4942,16 +5062,13 @@ fail_fw_init: static u32 megasas_init_adapter_mfi(struct megasas_instance *instance) { - struct megasas_register_set __iomem *reg_set; u32 context_sz; u32 reply_q_sz; - reg_set = instance->reg_set; - /* * Get various operational parameters from status register */ - instance->max_fw_cmds = instance->instancet->read_fw_status_reg(reg_set) & 0x00FFFF; + instance->max_fw_cmds = instance->instancet->read_fw_status_reg(instance) & 0x00FFFF; /* * Reduce the max supported cmds by 1. This is to ensure that the * reply_q_sz (1 more than the max cmd that driver may send) @@ -4959,7 +5076,7 @@ megasas_init_adapter_mfi(struct megasas_instance *instance) */ instance->max_fw_cmds = instance->max_fw_cmds-1; instance->max_mfi_cmds = instance->max_fw_cmds; - instance->max_num_sge = (instance->instancet->read_fw_status_reg(reg_set) & 0xFF0000) >> + instance->max_num_sge = (instance->instancet->read_fw_status_reg(instance) & 0xFF0000) >> 0x10; /* * For MFI skinny adapters, MEGASAS_SKINNY_INT_CMDS commands @@ -5015,7 +5132,7 @@ megasas_init_adapter_mfi(struct megasas_instance *instance) instance->fw_support_ieee = 0; instance->fw_support_ieee = - (instance->instancet->read_fw_status_reg(reg_set) & + (instance->instancet->read_fw_status_reg(instance) & 0x04000000); dev_notice(&instance->pdev->dev, "megasas_init_mfi: fw_support_ieee=%d", @@ -5213,14 +5330,14 @@ static int megasas_init_fw(struct megasas_instance *instance) { u32 max_sectors_1; u32 max_sectors_2, tmp_sectors, msix_enable; - u32 scratch_pad_2, scratch_pad_3, scratch_pad_4; + u32 scratch_pad_1, scratch_pad_2, scratch_pad_3, status_reg; resource_size_t base_addr; - struct megasas_register_set __iomem *reg_set; struct megasas_ctrl_info *ctrl_info = NULL; unsigned long bar_list; int i, j, loop, fw_msix_count = 0; struct IOV_111 *iovPtr; struct fusion_context *fusion; + bool do_adp_reset = true; fusion = instance->ctrl_context; @@ -5241,8 +5358,6 @@ static int megasas_init_fw(struct megasas_instance *instance) goto fail_ioremap; } - reg_set = instance->reg_set; - if (instance->adapter_type != MFI_SERIES) instance->instancet = &megasas_instance_template_fusion; else { @@ -5269,19 +5384,29 @@ static int megasas_init_fw(struct megasas_instance *instance) } if (megasas_transition_to_ready(instance, 0)) { - atomic_set(&instance->fw_reset_no_pci_access, 1); - instance->instancet->adp_reset - (instance, instance->reg_set); - atomic_set(&instance->fw_reset_no_pci_access, 0); - dev_info(&instance->pdev->dev, - "FW restarted successfully from %s!\n", - __func__); + if (instance->adapter_type >= INVADER_SERIES) { + status_reg = instance->instancet->read_fw_status_reg( + instance); + do_adp_reset = status_reg & MFI_RESET_ADAPTER; + } - /*waitting for about 30 second before retry*/ - ssleep(30); + if (do_adp_reset) { + atomic_set(&instance->fw_reset_no_pci_access, 1); + instance->instancet->adp_reset + (instance, instance->reg_set); + atomic_set(&instance->fw_reset_no_pci_access, 0); + dev_info(&instance->pdev->dev, + "FW restarted successfully from %s!\n", + __func__); + + /*waiting for about 30 second before retry*/ + ssleep(30); - if (megasas_transition_to_ready(instance, 0)) + if (megasas_transition_to_ready(instance, 0)) + goto fail_ready_state; + } else { goto fail_ready_state; + } } megasas_init_ctrl_params(instance); @@ -5297,38 +5422,57 @@ static int megasas_init_fw(struct megasas_instance *instance) fusion = instance->ctrl_context; - if (instance->adapter_type == VENTURA_SERIES) { - scratch_pad_3 = - readl(&instance->reg_set->outbound_scratch_pad_3); - instance->max_raid_mapsize = ((scratch_pad_3 >> + if (instance->adapter_type >= VENTURA_SERIES) { + scratch_pad_2 = + megasas_readl(instance, + &instance->reg_set->outbound_scratch_pad_2); + instance->max_raid_mapsize = ((scratch_pad_2 >> MR_MAX_RAID_MAP_SIZE_OFFSET_SHIFT) & MR_MAX_RAID_MAP_SIZE_MASK); } /* Check if MSI-X is supported while in ready state */ - msix_enable = (instance->instancet->read_fw_status_reg(reg_set) & + msix_enable = (instance->instancet->read_fw_status_reg(instance) & 0x4000000) >> 0x1a; if (msix_enable && !msix_disable) { int irq_flags = PCI_IRQ_MSIX; - scratch_pad_2 = readl - (&instance->reg_set->outbound_scratch_pad_2); + scratch_pad_1 = megasas_readl + (instance, &instance->reg_set->outbound_scratch_pad_1); /* Check max MSI-X vectors */ if (fusion) { if (instance->adapter_type == THUNDERBOLT_SERIES) { /* Thunderbolt Series*/ - instance->msix_vectors = (scratch_pad_2 + instance->msix_vectors = (scratch_pad_1 & MR_MAX_REPLY_QUEUES_OFFSET) + 1; fw_msix_count = instance->msix_vectors; - } else { /* Invader series supports more than 8 MSI-x vectors*/ - instance->msix_vectors = ((scratch_pad_2 + } else { + instance->msix_vectors = ((scratch_pad_1 & MR_MAX_REPLY_QUEUES_EXT_OFFSET) >> MR_MAX_REPLY_QUEUES_EXT_OFFSET_SHIFT) + 1; - if (instance->msix_vectors > 16) - instance->msix_combined = true; + + /* + * For Invader series, > 8 MSI-x vectors + * supported by FW/HW implies combined + * reply queue mode is enabled. + * For Ventura series, > 16 MSI-x vectors + * supported by FW/HW implies combined + * reply queue mode is enabled. + */ + switch (instance->adapter_type) { + case INVADER_SERIES: + if (instance->msix_vectors > 8) + instance->msix_combined = true; + break; + case AERO_SERIES: + case VENTURA_SERIES: + if (instance->msix_vectors > 16) + instance->msix_combined = true; + break; + } if (rdpq_enable) - instance->is_rdpq = (scratch_pad_2 & MR_RDPQ_MODE_OFFSET) ? + instance->is_rdpq = (scratch_pad_1 & MR_RDPQ_MODE_OFFSET) ? 1 : 0; fw_msix_count = instance->msix_vectors; /* Save 1-15 reply post index address to local memory @@ -5377,7 +5521,7 @@ static int megasas_init_fw(struct megasas_instance *instance) if (!instance->msix_vectors) { i = pci_alloc_irq_vectors(instance->pdev, 1, 1, PCI_IRQ_LEGACY); if (i < 0) - goto fail_setup_irqs; + goto fail_init_adapter; } megasas_setup_reply_map(instance); @@ -5403,13 +5547,14 @@ static int megasas_init_fw(struct megasas_instance *instance) if (instance->instancet->init_adapter(instance)) goto fail_init_adapter; - if (instance->adapter_type == VENTURA_SERIES) { - scratch_pad_4 = - readl(&instance->reg_set->outbound_scratch_pad_4); - if ((scratch_pad_4 & MR_NVME_PAGE_SIZE_MASK) >= + if (instance->adapter_type >= VENTURA_SERIES) { + scratch_pad_3 = + megasas_readl(instance, + &instance->reg_set->outbound_scratch_pad_3); + if ((scratch_pad_3 & MR_NVME_PAGE_SIZE_MASK) >= MR_DEFAULT_NVME_PAGE_SHIFT) instance->nvme_page_size = - (1 << (scratch_pad_4 & MR_NVME_PAGE_SIZE_MASK)); + (1 << (scratch_pad_3 & MR_NVME_PAGE_SIZE_MASK)); dev_info(&instance->pdev->dev, "NVME page size\t: (%d)\n", instance->nvme_page_size); @@ -5439,7 +5584,7 @@ static int megasas_init_fw(struct megasas_instance *instance) memset(instance->ld_ids, 0xff, MEGASAS_MAX_LD_IDS); /* stream detection initialization */ - if (instance->adapter_type == VENTURA_SERIES) { + if (instance->adapter_type >= VENTURA_SERIES) { fusion->stream_detect_by_ld = kcalloc(MAX_LOGICAL_DRIVES_EXT, sizeof(struct LD_STREAM_DETECT *), @@ -5539,6 +5684,11 @@ static int megasas_init_fw(struct megasas_instance *instance) instance->crash_dump_buf = NULL; } + if (instance->snapdump_wait_time) { + megasas_get_snapdump_properties(instance); + dev_info(&instance->pdev->dev, "Snap dump wait time\t: %d\n", + instance->snapdump_wait_time); + } dev_info(&instance->pdev->dev, "pci id\t\t: (0x%04x)/(0x%04x)/(0x%04x)/(0x%04x)\n", @@ -5553,7 +5703,6 @@ static int megasas_init_fw(struct megasas_instance *instance) dev_info(&instance->pdev->dev, "jbod sync map : %s\n", instance->use_seqnum_jbod_fp ? "yes" : "no"); - instance->max_sectors_per_req = instance->max_num_sge * SGE_BUFFER_SIZE / 512; if (tmp_sectors && (instance->max_sectors_per_req > tmp_sectors)) @@ -5576,19 +5725,32 @@ static int megasas_init_fw(struct megasas_instance *instance) /* Launch SR-IOV heartbeat timer */ if (instance->requestorId) { - if (!megasas_sriov_start_heartbeat(instance, 1)) + if (!megasas_sriov_start_heartbeat(instance, 1)) { megasas_start_timer(instance); - else + } else { instance->skip_heartbeat_timer_del = 1; + goto fail_get_ld_pd_list; + } } + /* + * Create and start watchdog thread which will monitor + * controller state every 1 sec and trigger OCR when + * it enters fault state + */ + if (instance->adapter_type != MFI_SERIES) + if (megasas_fusion_start_watchdog(instance) != SUCCESS) + goto fail_start_watchdog; + return 0; +fail_start_watchdog: + if (instance->requestorId && !instance->skip_heartbeat_timer_del) + del_timer_sync(&instance->sriov_heartbeat_timer); fail_get_ld_pd_list: instance->instancet->disable_intr(instance); -fail_init_adapter: megasas_destroy_irqs(instance); -fail_setup_irqs: +fail_init_adapter: if (instance->msix_vectors) pci_free_irq_vectors(instance->pdev); instance->msix_vectors = 0; @@ -6022,13 +6184,13 @@ static int megasas_io_attach(struct megasas_instance *instance) * @instance: Adapter soft state * Description: * - * For Ventura, driver/FW will operate in 64bit DMA addresses. + * For Ventura, driver/FW will operate in 63bit DMA addresses. * * For invader- * By default, driver/FW will operate in 32bit DMA addresses * for consistent DMA mapping but if 32 bit consistent - * DMA mask fails, driver will try with 64 bit consistent - * mask provided FW is true 64bit DMA capable + * DMA mask fails, driver will try with 63 bit consistent + * mask provided FW is true 63bit DMA capable * * For older controllers(Thunderbolt and MFI based adapters)- * driver/FW will operate in 32 bit consistent DMA addresses. @@ -6038,31 +6200,31 @@ megasas_set_dma_mask(struct megasas_instance *instance) { u64 consistent_mask; struct pci_dev *pdev; - u32 scratch_pad_2; + u32 scratch_pad_1; pdev = instance->pdev; - consistent_mask = (instance->adapter_type == VENTURA_SERIES) ? - DMA_BIT_MASK(64) : DMA_BIT_MASK(32); + consistent_mask = (instance->adapter_type >= VENTURA_SERIES) ? + DMA_BIT_MASK(63) : DMA_BIT_MASK(32); if (IS_DMA64) { - if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)) && + if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(63)) && dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32))) goto fail_set_dma_mask; - if ((*pdev->dev.dma_mask == DMA_BIT_MASK(64)) && + if ((*pdev->dev.dma_mask == DMA_BIT_MASK(63)) && (dma_set_coherent_mask(&pdev->dev, consistent_mask) && dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)))) { /* * If 32 bit DMA mask fails, then try for 64 bit mask * for FW capable of handling 64 bit DMA. */ - scratch_pad_2 = readl - (&instance->reg_set->outbound_scratch_pad_2); + scratch_pad_1 = megasas_readl + (instance, &instance->reg_set->outbound_scratch_pad_1); - if (!(scratch_pad_2 & MR_CAN_HANDLE_64_BIT_DMA_OFFSET)) + if (!(scratch_pad_1 & MR_CAN_HANDLE_64_BIT_DMA_OFFSET)) goto fail_set_dma_mask; else if (dma_set_mask_and_coherent(&pdev->dev, - DMA_BIT_MASK(64))) + DMA_BIT_MASK(63))) goto fail_set_dma_mask; } } else if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32))) @@ -6074,8 +6236,8 @@ megasas_set_dma_mask(struct megasas_instance *instance) instance->consistent_mask_64bit = true; dev_info(&pdev->dev, "%s bit DMA mask and %s bit consistent mask\n", - ((*pdev->dev.dma_mask == DMA_BIT_MASK(64)) ? "64" : "32"), - (instance->consistent_mask_64bit ? "64" : "32")); + ((*pdev->dev.dma_mask == DMA_BIT_MASK(64)) ? "63" : "32"), + (instance->consistent_mask_64bit ? "63" : "32")); return 0; @@ -6088,12 +6250,14 @@ fail_set_dma_mask: /* * megasas_set_adapter_type - Set adapter type. * Supported controllers can be divided in - * 4 categories- enum MR_ADAPTER_TYPE { - * MFI_SERIES = 1, - * THUNDERBOLT_SERIES = 2, - * INVADER_SERIES = 3, - * VENTURA_SERIES = 4, - * }; + * different categories- + * enum MR_ADAPTER_TYPE { + * MFI_SERIES = 1, + * THUNDERBOLT_SERIES = 2, + * INVADER_SERIES = 3, + * VENTURA_SERIES = 4, + * AERO_SERIES = 5, + * }; * @instance: Adapter soft state * return: void */ @@ -6104,6 +6268,12 @@ static inline void megasas_set_adapter_type(struct megasas_instance *instance) instance->adapter_type = MFI_SERIES; } else { switch (instance->pdev->device) { + case PCI_DEVICE_ID_LSI_AERO_10E1: + case PCI_DEVICE_ID_LSI_AERO_10E2: + case PCI_DEVICE_ID_LSI_AERO_10E5: + case PCI_DEVICE_ID_LSI_AERO_10E6: + instance->adapter_type = AERO_SERIES; + break; case PCI_DEVICE_ID_LSI_VENTURA: case PCI_DEVICE_ID_LSI_CRUSADER: case PCI_DEVICE_ID_LSI_HARPOON: @@ -6171,6 +6341,7 @@ static int megasas_alloc_ctrl_mem(struct megasas_instance *instance) if (megasas_alloc_mfi_ctrl_mem(instance)) goto fail; break; + case AERO_SERIES: case VENTURA_SERIES: case THUNDERBOLT_SERIES: case INVADER_SERIES: @@ -6245,6 +6416,14 @@ int megasas_alloc_ctrl_dma_buffers(struct megasas_instance *instance) "Failed to allocate PD list buffer\n"); return -ENOMEM; } + + instance->snapdump_prop = dma_alloc_coherent(&pdev->dev, + sizeof(struct MR_SNAPDUMP_PROPERTIES), + &instance->snapdump_prop_h, GFP_KERNEL); + + if (!instance->snapdump_prop) + dev_err(&pdev->dev, + "Failed to allocate snapdump properties buffer\n"); } instance->pd_list_buf = @@ -6388,6 +6567,12 @@ void megasas_free_ctrl_dma_buffers(struct megasas_instance *instance) dma_free_coherent(&pdev->dev, CRASH_DMA_BUF_SIZE, instance->crash_dump_buf, instance->crash_dump_h); + + if (instance->snapdump_prop) + dma_free_coherent(&pdev->dev, + sizeof(struct MR_SNAPDUMP_PROPERTIES), + instance->snapdump_prop, + instance->snapdump_prop_h); } /* @@ -6434,12 +6619,10 @@ static inline void megasas_init_ctrl_params(struct megasas_instance *instance) instance->disableOnlineCtrlReset = 1; instance->UnevenSpanSupport = 0; - if (instance->adapter_type != MFI_SERIES) { + if (instance->adapter_type != MFI_SERIES) INIT_WORK(&instance->work_init, megasas_fusion_ocr_wq); - INIT_WORK(&instance->crash_init, megasas_fusion_crash_dump_wq); - } else { + else INIT_WORK(&instance->work_init, process_fw_state_change_wq); - } } /** @@ -6455,6 +6638,13 @@ static int megasas_probe_one(struct pci_dev *pdev, struct megasas_instance *instance; u16 control = 0; + switch (pdev->device) { + case PCI_DEVICE_ID_LSI_AERO_10E1: + case PCI_DEVICE_ID_LSI_AERO_10E5: + dev_info(&pdev->dev, "Adapter is in configurable secure mode\n"); + break; + } + /* Reset MSI-X in the kdump kernel */ if (reset_devices) { pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX); @@ -6708,6 +6898,10 @@ megasas_suspend(struct pci_dev *pdev, pm_message_t state) if (instance->requestorId && !instance->skip_heartbeat_timer_del) del_timer_sync(&instance->sriov_heartbeat_timer); + /* Stop the FW fault detection watchdog */ + if (instance->adapter_type != MFI_SERIES) + megasas_fusion_stop_watchdog(instance); + megasas_flush_cache(instance); megasas_shutdown_controller(instance, MR_DCMD_HIBERNATE_SHUTDOWN); @@ -6843,8 +7037,16 @@ megasas_resume(struct pci_dev *pdev) if (megasas_start_aen(instance)) dev_err(&instance->pdev->dev, "Start AEN failed\n"); + /* Re-launch FW fault watchdog */ + if (instance->adapter_type != MFI_SERIES) + if (megasas_fusion_start_watchdog(instance) != SUCCESS) + goto fail_start_watchdog; + return 0; +fail_start_watchdog: + if (instance->requestorId && !instance->skip_heartbeat_timer_del) + del_timer_sync(&instance->sriov_heartbeat_timer); fail_init_mfi: megasas_free_ctrl_dma_buffers(instance); megasas_free_ctrl_mem(instance); @@ -6912,6 +7114,10 @@ static void megasas_detach_one(struct pci_dev *pdev) if (instance->requestorId && !instance->skip_heartbeat_timer_del) del_timer_sync(&instance->sriov_heartbeat_timer); + /* Stop the FW fault detection watchdog */ + if (instance->adapter_type != MFI_SERIES) + megasas_fusion_stop_watchdog(instance); + if (instance->fw_crash_state != UNAVAILABLE) megasas_free_host_crash_buffer(instance); scsi_remove_host(instance->host); @@ -6956,7 +7162,7 @@ skip_firing_dcmds: if (instance->msix_vectors) pci_free_irq_vectors(instance->pdev); - if (instance->adapter_type == VENTURA_SERIES) { + if (instance->adapter_type >= VENTURA_SERIES) { for (i = 0; i < MAX_LOGICAL_DRIVES_EXT; ++i) kfree(fusion->stream_detect_by_ld[i]); kfree(fusion->stream_detect_by_ld); @@ -7737,8 +7943,15 @@ megasas_aen_polling(struct work_struct *work) break; case MR_EVT_CTRL_PROP_CHANGED: - dcmd_ret = megasas_get_ctrl_info(instance); - break; + dcmd_ret = megasas_get_ctrl_info(instance); + if (dcmd_ret == DCMD_SUCCESS && + instance->snapdump_wait_time) { + megasas_get_snapdump_properties(instance); + dev_info(&instance->pdev->dev, + "Snap dump wait time\t: %d\n", + instance->snapdump_wait_time); + } + break; default: doscan = 0; break; diff --git a/drivers/scsi/megaraid/megaraid_sas_fp.c b/drivers/scsi/megaraid/megaraid_sas_fp.c index 59ecbb3b53b5..87c2c0472c8f 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fp.c +++ b/drivers/scsi/megaraid/megaraid_sas_fp.c @@ -2,7 +2,8 @@ * Linux MegaRAID driver for SAS based RAID controllers * * Copyright (c) 2009-2013 LSI Corporation - * Copyright (c) 2013-2014 Avago Technologies + * Copyright (c) 2013-2016 Avago Technologies + * Copyright (c) 2016-2018 Broadcom Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -19,17 +20,14 @@ * * FILE: megaraid_sas_fp.c * - * Authors: Avago Technologies + * Authors: Broadcom Inc. * Sumant Patro * Varad Talamacki * Manoj Jose - * Kashyap Desai <kashyap.desai@avagotech.com> - * Sumit Saxena <sumit.saxena@avagotech.com> + * Kashyap Desai <kashyap.desai@broadcom.com> + * Sumit Saxena <sumit.saxena@broadcom.com> * - * Send feedback to: megaraidlinux.pdl@avagotech.com - * - * Mail to: Avago Technologies, 350 West Trimble Road, Building 90, - * San Jose, California 95131 + * Send feedback to: megaraidlinux.pdl@broadcom.com */ #include <linux/kernel.h> @@ -745,7 +743,7 @@ static u8 mr_spanset_get_phy_params(struct megasas_instance *instance, u32 ld, *pDevHandle = MR_PdDevHandleGet(pd, map); *pPdInterface = MR_PdInterfaceTypeGet(pd, map); /* get second pd also for raid 1/10 fast path writes*/ - if ((instance->adapter_type == VENTURA_SERIES) && + if ((instance->adapter_type >= VENTURA_SERIES) && (raid->level == 1) && !io_info->isRead) { r1_alt_pd = MR_ArPdGet(arRef, physArm + 1, map); @@ -770,7 +768,7 @@ static u8 mr_spanset_get_phy_params(struct megasas_instance *instance, u32 ld, } *pdBlock += stripRef + le64_to_cpu(MR_LdSpanPtrGet(ld, span, map)->startBlk); - if (instance->adapter_type == VENTURA_SERIES) { + if (instance->adapter_type >= VENTURA_SERIES) { ((struct RAID_CONTEXT_G35 *)pRAID_Context)->span_arm = (span << RAID_CTX_SPANARM_SPAN_SHIFT) | physArm; io_info->span_arm = @@ -861,7 +859,7 @@ u8 MR_GetPhyParams(struct megasas_instance *instance, u32 ld, u64 stripRow, *pDevHandle = MR_PdDevHandleGet(pd, map); *pPdInterface = MR_PdInterfaceTypeGet(pd, map); /* get second pd also for raid 1/10 fast path writes*/ - if ((instance->adapter_type == VENTURA_SERIES) && + if ((instance->adapter_type >= VENTURA_SERIES) && (raid->level == 1) && !io_info->isRead) { r1_alt_pd = MR_ArPdGet(arRef, physArm + 1, map); @@ -888,7 +886,7 @@ u8 MR_GetPhyParams(struct megasas_instance *instance, u32 ld, u64 stripRow, } *pdBlock += stripRef + le64_to_cpu(MR_LdSpanPtrGet(ld, span, map)->startBlk); - if (instance->adapter_type == VENTURA_SERIES) { + if (instance->adapter_type >= VENTURA_SERIES) { ((struct RAID_CONTEXT_G35 *)pRAID_Context)->span_arm = (span << RAID_CTX_SPANARM_SPAN_SHIFT) | physArm; io_info->span_arm = @@ -1266,7 +1264,7 @@ void mr_update_load_balance_params(struct MR_DRV_RAID_MAP_ALL *drv_map, for (ldCount = 0; ldCount < MAX_LOGICAL_DRIVES_EXT; ldCount++) { ld = MR_TargetIdToLdGet(ldCount, drv_map); - if (ld >= MAX_LOGICAL_DRIVES_EXT) { + if (ld >= MAX_LOGICAL_DRIVES_EXT - 1) { lbInfo[ldCount].loadBalanceFlag = 0; continue; } diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index f74b5ea24f0f..211c17c33aa0 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -2,7 +2,8 @@ * Linux MegaRAID driver for SAS based RAID controllers * * Copyright (c) 2009-2013 LSI Corporation - * Copyright (c) 2013-2014 Avago Technologies + * Copyright (c) 2013-2016 Avago Technologies + * Copyright (c) 2016-2018 Broadcom Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -19,16 +20,13 @@ * * FILE: megaraid_sas_fusion.c * - * Authors: Avago Technologies + * Authors: Broadcom Inc. * Sumant Patro * Adam Radford - * Kashyap Desai <kashyap.desai@avagotech.com> - * Sumit Saxena <sumit.saxena@avagotech.com> + * Kashyap Desai <kashyap.desai@broadcom.com> + * Sumit Saxena <sumit.saxena@broadcom.com> * - * Send feedback to: megaraidlinux.pdl@avagotech.com - * - * Mail to: Avago Technologies, 350 West Trimble Road, Building 90, - * San Jose, California 95131 + * Send feedback to: megaraidlinux.pdl@broadcom.com */ #include <linux/kernel.h> @@ -48,6 +46,7 @@ #include <linux/mutex.h> #include <linux/poll.h> #include <linux/vmalloc.h> +#include <linux/workqueue.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> @@ -74,7 +73,7 @@ void megasas_return_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd); int megasas_alloc_cmds(struct megasas_instance *instance); int -megasas_clear_intr_fusion(struct megasas_register_set __iomem *regs); +megasas_clear_intr_fusion(struct megasas_instance *instance); int megasas_issue_polled(struct megasas_instance *instance, struct megasas_cmd *cmd); @@ -95,6 +94,9 @@ static void megasas_free_rdpq_fusion(struct megasas_instance *instance); static void megasas_free_reply_fusion(struct megasas_instance *instance); static inline void megasas_configure_queue_sizes(struct megasas_instance *instance); +static void megasas_fusion_crash_dump(struct megasas_instance *instance); +extern u32 megasas_readl(struct megasas_instance *instance, + const volatile void __iomem *addr); /** * megasas_check_same_4gb_region - check if allocation @@ -165,9 +167,11 @@ megasas_disable_intr_fusion(struct megasas_instance *instance) } int -megasas_clear_intr_fusion(struct megasas_register_set __iomem *regs) +megasas_clear_intr_fusion(struct megasas_instance *instance) { u32 status; + struct megasas_register_set __iomem *regs; + regs = instance->reg_set; /* * Check if it is our interrupt */ @@ -262,16 +266,17 @@ megasas_fusion_update_can_queue(struct megasas_instance *instance, int fw_boot_c reg_set = instance->reg_set; - /* ventura FW does not fill outbound_scratch_pad_3 with queue depth */ + /* ventura FW does not fill outbound_scratch_pad_2 with queue depth */ if (instance->adapter_type < VENTURA_SERIES) cur_max_fw_cmds = - readl(&instance->reg_set->outbound_scratch_pad_3) & 0x00FFFF; + megasas_readl(instance, + &instance->reg_set->outbound_scratch_pad_2) & 0x00FFFF; if (dual_qdepth_disable || !cur_max_fw_cmds) - cur_max_fw_cmds = instance->instancet->read_fw_status_reg(reg_set) & 0x00FFFF; + cur_max_fw_cmds = instance->instancet->read_fw_status_reg(instance) & 0x00FFFF; else ldio_threshold = - (instance->instancet->read_fw_status_reg(reg_set) & 0x00FFFF) - MEGASAS_FUSION_IOCTL_CMDS; + (instance->instancet->read_fw_status_reg(instance) & 0x00FFFF) - MEGASAS_FUSION_IOCTL_CMDS; dev_info(&instance->pdev->dev, "Current firmware supports maximum commands: %d\t LDIO threshold: %d\n", @@ -807,10 +812,8 @@ megasas_free_rdpq_fusion(struct megasas_instance *instance) { } - if (fusion->reply_frames_desc_pool) - dma_pool_destroy(fusion->reply_frames_desc_pool); - if (fusion->reply_frames_desc_pool_align) - dma_pool_destroy(fusion->reply_frames_desc_pool_align); + dma_pool_destroy(fusion->reply_frames_desc_pool); + dma_pool_destroy(fusion->reply_frames_desc_pool_align); if (fusion->rdpq_virt) dma_free_coherent(&instance->pdev->dev, @@ -830,8 +833,7 @@ megasas_free_reply_fusion(struct megasas_instance *instance) { fusion->reply_frames_desc[0], fusion->reply_frames_desc_phys[0]); - if (fusion->reply_frames_desc_pool) - dma_pool_destroy(fusion->reply_frames_desc_pool); + dma_pool_destroy(fusion->reply_frames_desc_pool); } @@ -974,7 +976,7 @@ megasas_ioc_init_fusion(struct megasas_instance *instance) struct megasas_header *frame_hdr; const char *sys_info; MFI_CAPABILITIES *drv_ops; - u32 scratch_pad_2; + u32 scratch_pad_1; ktime_t time; bool cur_fw_64bit_dma_capable; @@ -985,14 +987,14 @@ megasas_ioc_init_fusion(struct megasas_instance *instance) cmd = fusion->ioc_init_cmd; - scratch_pad_2 = readl - (&instance->reg_set->outbound_scratch_pad_2); + scratch_pad_1 = megasas_readl + (instance, &instance->reg_set->outbound_scratch_pad_1); - cur_rdpq_mode = (scratch_pad_2 & MR_RDPQ_MODE_OFFSET) ? 1 : 0; + cur_rdpq_mode = (scratch_pad_1 & MR_RDPQ_MODE_OFFSET) ? 1 : 0; if (instance->adapter_type == INVADER_SERIES) { cur_fw_64bit_dma_capable = - (scratch_pad_2 & MR_CAN_HANDLE_64_BIT_DMA_OFFSET) ? true : false; + (scratch_pad_1 & MR_CAN_HANDLE_64_BIT_DMA_OFFSET) ? true : false; if (instance->consistent_mask_64bit && !cur_fw_64bit_dma_capable) { dev_err(&instance->pdev->dev, "Driver was operating on 64bit " @@ -1010,7 +1012,7 @@ megasas_ioc_init_fusion(struct megasas_instance *instance) goto fail_fw_init; } - instance->fw_sync_cache_support = (scratch_pad_2 & + instance->fw_sync_cache_support = (scratch_pad_1 & MR_CAN_HANDLE_SYNC_CACHE_OFFSET) ? 1 : 0; dev_info(&instance->pdev->dev, "FW supports sync cache\t: %s\n", instance->fw_sync_cache_support ? "Yes" : "No"); @@ -1043,9 +1045,7 @@ megasas_ioc_init_fusion(struct megasas_instance *instance) frame_hdr = &cmd->frame->hdr; frame_hdr->cmd_status = 0xFF; - frame_hdr->flags = cpu_to_le16( - le16_to_cpu(frame_hdr->flags) | - MFI_FRAME_DONT_POST_IN_REPLY_QUEUE); + frame_hdr->flags |= cpu_to_le16(MFI_FRAME_DONT_POST_IN_REPLY_QUEUE); init_frame->cmd = MFI_CMD_INIT; init_frame->cmd_status = 0xFF; @@ -1107,7 +1107,7 @@ megasas_ioc_init_fusion(struct megasas_instance *instance) instance->instancet->disable_intr(instance); for (i = 0; i < (10 * 1000); i += 20) { - if (readl(&instance->reg_set->doorbell) & 1) + if (megasas_readl(instance, &instance->reg_set->doorbell) & 1) msleep(20); else break; @@ -1115,7 +1115,7 @@ megasas_ioc_init_fusion(struct megasas_instance *instance) megasas_fire_cmd_fusion(instance, &req_desc); - wait_and_poll(instance, cmd, MFI_POLL_TIMEOUT_SECS); + wait_and_poll(instance, cmd, MFI_IO_TIMEOUT_SECS); frame_hdr = &cmd->frame->hdr; if (frame_hdr->cmd_status != 0) { @@ -1559,14 +1559,12 @@ void megasas_configure_queue_sizes(struct megasas_instance *instance) fusion = instance->ctrl_context; max_cmd = instance->max_fw_cmds; - if (instance->adapter_type == VENTURA_SERIES) + if (instance->adapter_type >= VENTURA_SERIES) instance->max_mpt_cmds = instance->max_fw_cmds * RAID_1_PEER_CMDS; else instance->max_mpt_cmds = instance->max_fw_cmds; - instance->max_scsi_cmds = instance->max_fw_cmds - - (MEGASAS_FUSION_INTERNAL_CMDS + - MEGASAS_FUSION_IOCTL_CMDS); + instance->max_scsi_cmds = instance->max_fw_cmds - instance->max_mfi_cmds; instance->cur_can_queue = instance->max_scsi_cmds; instance->host->can_queue = instance->cur_can_queue; @@ -1627,8 +1625,7 @@ static inline void megasas_free_ioc_init_cmd(struct megasas_instance *instance) fusion->ioc_init_cmd->frame, fusion->ioc_init_cmd->frame_phys_addr); - if (fusion->ioc_init_cmd) - kfree(fusion->ioc_init_cmd); + kfree(fusion->ioc_init_cmd); } /** @@ -1642,7 +1639,7 @@ megasas_init_adapter_fusion(struct megasas_instance *instance) { struct megasas_register_set __iomem *reg_set; struct fusion_context *fusion; - u32 scratch_pad_2; + u32 scratch_pad_1; int i = 0, count; fusion = instance->ctrl_context; @@ -1659,20 +1656,21 @@ megasas_init_adapter_fusion(struct megasas_instance *instance) megasas_configure_queue_sizes(instance); - scratch_pad_2 = readl(&instance->reg_set->outbound_scratch_pad_2); - /* If scratch_pad_2 & MEGASAS_MAX_CHAIN_SIZE_UNITS_MASK is set, + scratch_pad_1 = megasas_readl(instance, + &instance->reg_set->outbound_scratch_pad_1); + /* If scratch_pad_1 & MEGASAS_MAX_CHAIN_SIZE_UNITS_MASK is set, * Firmware support extended IO chain frame which is 4 times more than * legacy Firmware. * Legacy Firmware - Frame size is (8 * 128) = 1K * 1M IO Firmware - Frame size is (8 * 128 * 4) = 4K */ - if (scratch_pad_2 & MEGASAS_MAX_CHAIN_SIZE_UNITS_MASK) + if (scratch_pad_1 & MEGASAS_MAX_CHAIN_SIZE_UNITS_MASK) instance->max_chain_frame_sz = - ((scratch_pad_2 & MEGASAS_MAX_CHAIN_SIZE_MASK) >> + ((scratch_pad_1 & MEGASAS_MAX_CHAIN_SIZE_MASK) >> MEGASAS_MAX_CHAIN_SHIFT) * MEGASAS_1MB_IO; else instance->max_chain_frame_sz = - ((scratch_pad_2 & MEGASAS_MAX_CHAIN_SIZE_MASK) >> + ((scratch_pad_1 & MEGASAS_MAX_CHAIN_SIZE_MASK) >> MEGASAS_MAX_CHAIN_SHIFT) * MEGASAS_256K_IO; if (instance->max_chain_frame_sz < MEGASAS_CHAIN_FRAME_SZ_MIN) { @@ -1760,6 +1758,90 @@ fail_alloc_mfi_cmds: } /** + * megasas_fault_detect_work - Worker function of + * FW fault handling workqueue. + */ +static void +megasas_fault_detect_work(struct work_struct *work) +{ + struct megasas_instance *instance = + container_of(work, struct megasas_instance, + fw_fault_work.work); + u32 fw_state, dma_state, status; + + /* Check the fw state */ + fw_state = instance->instancet->read_fw_status_reg(instance) & + MFI_STATE_MASK; + + if (fw_state == MFI_STATE_FAULT) { + dma_state = instance->instancet->read_fw_status_reg(instance) & + MFI_STATE_DMADONE; + /* Start collecting crash, if DMA bit is done */ + if (instance->crash_dump_drv_support && + instance->crash_dump_app_support && dma_state) { + megasas_fusion_crash_dump(instance); + } else { + if (instance->unload == 0) { + status = megasas_reset_fusion(instance->host, 0); + if (status != SUCCESS) { + dev_err(&instance->pdev->dev, + "Failed from %s %d, do not re-arm timer\n", + __func__, __LINE__); + return; + } + } + } + } + + if (instance->fw_fault_work_q) + queue_delayed_work(instance->fw_fault_work_q, + &instance->fw_fault_work, + msecs_to_jiffies(MEGASAS_WATCHDOG_THREAD_INTERVAL)); +} + +int +megasas_fusion_start_watchdog(struct megasas_instance *instance) +{ + /* Check if the Fault WQ is already started */ + if (instance->fw_fault_work_q) + return SUCCESS; + + INIT_DELAYED_WORK(&instance->fw_fault_work, megasas_fault_detect_work); + + snprintf(instance->fault_handler_work_q_name, + sizeof(instance->fault_handler_work_q_name), + "poll_megasas%d_status", instance->host->host_no); + + instance->fw_fault_work_q = + create_singlethread_workqueue(instance->fault_handler_work_q_name); + if (!instance->fw_fault_work_q) { + dev_err(&instance->pdev->dev, "Failed from %s %d\n", + __func__, __LINE__); + return FAILED; + } + + queue_delayed_work(instance->fw_fault_work_q, + &instance->fw_fault_work, + msecs_to_jiffies(MEGASAS_WATCHDOG_THREAD_INTERVAL)); + + return SUCCESS; +} + +void +megasas_fusion_stop_watchdog(struct megasas_instance *instance) +{ + struct workqueue_struct *wq; + + if (instance->fw_fault_work_q) { + wq = instance->fw_fault_work_q; + instance->fw_fault_work_q = NULL; + if (!cancel_delayed_work_sync(&instance->fw_fault_work)) + flush_workqueue(wq); + destroy_workqueue(wq); + } +} + +/** * map_cmd_status - Maps FW cmd status to OS cmd status * @cmd : Pointer to cmd * @status : status of cmd returned by FW @@ -2543,19 +2625,22 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, struct MR_DRV_RAID_MAP_ALL *local_map_ptr; u8 *raidLUN; unsigned long spinlock_flags; - union RAID_CONTEXT_UNION *praid_context; struct MR_LD_RAID *raid = NULL; struct MR_PRIV_DEVICE *mrdev_priv; + struct RAID_CONTEXT *rctx; + struct RAID_CONTEXT_G35 *rctx_g35; device_id = MEGASAS_DEV_INDEX(scp); fusion = instance->ctrl_context; io_request = cmd->io_request; - io_request->RaidContext.raid_context.virtual_disk_tgt_id = - cpu_to_le16(device_id); - io_request->RaidContext.raid_context.status = 0; - io_request->RaidContext.raid_context.ex_status = 0; + rctx = &io_request->RaidContext.raid_context; + rctx_g35 = &io_request->RaidContext.raid_context_g35; + + rctx->virtual_disk_tgt_id = cpu_to_le16(device_id); + rctx->status = 0; + rctx->ex_status = 0; req_desc = (union MEGASAS_REQUEST_DESCRIPTOR_UNION *)cmd->request_desc; @@ -2631,11 +2716,10 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, raid = MR_LdRaidGet(ld, local_map_ptr); if (!raid || (!fusion->fast_path_io)) { - io_request->RaidContext.raid_context.reg_lock_flags = 0; + rctx->reg_lock_flags = 0; fp_possible = false; } else { - if (MR_BuildRaidContext(instance, &io_info, - &io_request->RaidContext.raid_context, + if (MR_BuildRaidContext(instance, &io_info, rctx, local_map_ptr, &raidLUN)) fp_possible = (io_info.fpOkForIo > 0) ? true : false; } @@ -2643,9 +2727,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, cmd->request_desc->SCSIIO.MSIxIndex = instance->reply_map[raw_smp_processor_id()]; - praid_context = &io_request->RaidContext; - - if (instance->adapter_type == VENTURA_SERIES) { + if (instance->adapter_type >= VENTURA_SERIES) { /* FP for Optimal raid level 1. * All large RAID-1 writes (> 32 KiB, both WT and WB modes) * are built by the driver as LD I/Os. @@ -2681,17 +2763,17 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, /* In ventura if stream detected for a read and it is * read ahead capable make this IO as LDIO */ - if (is_stream_detected(&io_request->RaidContext.raid_context_g35)) + if (is_stream_detected(rctx_g35)) fp_possible = false; } /* If raid is NULL, set CPU affinity to default CPU0 */ if (raid) - megasas_set_raidflag_cpu_affinity(praid_context, + megasas_set_raidflag_cpu_affinity(&io_request->RaidContext, raid, fp_possible, io_info.isRead, scsi_buff_len); else - praid_context->raid_context_g35.routing_flags |= + rctx_g35->routing_flags |= (MR_RAID_CTX_CPUSEL_0 << MR_RAID_CTX_ROUTINGFLAGS_CPUSEL_SHIFT); } @@ -2703,25 +2785,20 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, (MPI2_REQ_DESCRIPT_FLAGS_FP_IO << MEGASAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); if (instance->adapter_type == INVADER_SERIES) { - if (io_request->RaidContext.raid_context.reg_lock_flags == - REGION_TYPE_UNUSED) + if (rctx->reg_lock_flags == REGION_TYPE_UNUSED) cmd->request_desc->SCSIIO.RequestFlags = (MEGASAS_REQ_DESCRIPT_FLAGS_NO_LOCK << MEGASAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); - io_request->RaidContext.raid_context.type - = MPI2_TYPE_CUDA; - io_request->RaidContext.raid_context.nseg = 0x1; + rctx->type = MPI2_TYPE_CUDA; + rctx->nseg = 0x1; io_request->IoFlags |= cpu_to_le16(MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH); - io_request->RaidContext.raid_context.reg_lock_flags |= + rctx->reg_lock_flags |= (MR_RL_FLAGS_GRANT_DESTINATION_CUDA | MR_RL_FLAGS_SEQ_NUM_ENABLE); - } else if (instance->adapter_type == VENTURA_SERIES) { - io_request->RaidContext.raid_context_g35.nseg_type |= - (1 << RAID_CONTEXT_NSEG_SHIFT); - io_request->RaidContext.raid_context_g35.nseg_type |= - (MPI2_TYPE_CUDA << RAID_CONTEXT_TYPE_SHIFT); - io_request->RaidContext.raid_context_g35.routing_flags |= - (1 << MR_RAID_CTX_ROUTINGFLAGS_SQN_SHIFT); + } else if (instance->adapter_type >= VENTURA_SERIES) { + rctx_g35->nseg_type |= (1 << RAID_CONTEXT_NSEG_SHIFT); + rctx_g35->nseg_type |= (MPI2_TYPE_CUDA << RAID_CONTEXT_TYPE_SHIFT); + rctx_g35->routing_flags |= (1 << MR_RAID_CTX_ROUTINGFLAGS_SQN_SHIFT); io_request->IoFlags |= cpu_to_le16(MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH); } @@ -2734,17 +2811,15 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, &io_info, local_map_ptr); scp->SCp.Status |= MEGASAS_LOAD_BALANCE_FLAG; cmd->pd_r1_lb = io_info.pd_after_lb; - if (instance->adapter_type == VENTURA_SERIES) - io_request->RaidContext.raid_context_g35.span_arm - = io_info.span_arm; + if (instance->adapter_type >= VENTURA_SERIES) + rctx_g35->span_arm = io_info.span_arm; else - io_request->RaidContext.raid_context.span_arm - = io_info.span_arm; + rctx->span_arm = io_info.span_arm; } else scp->SCp.Status &= ~MEGASAS_LOAD_BALANCE_FLAG; - if (instance->adapter_type == VENTURA_SERIES) + if (instance->adapter_type >= VENTURA_SERIES) cmd->r1_alt_dev_handle = io_info.r1_alt_dev_handle; else cmd->r1_alt_dev_handle = MR_DEVHANDLE_INVALID; @@ -2762,31 +2837,26 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, /* populate the LUN field */ memcpy(io_request->LUN, raidLUN, 8); } else { - io_request->RaidContext.raid_context.timeout_value = + rctx->timeout_value = cpu_to_le16(local_map_ptr->raidMap.fpPdIoTimeoutSec); cmd->request_desc->SCSIIO.RequestFlags = (MEGASAS_REQ_DESCRIPT_FLAGS_LD_IO << MEGASAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); if (instance->adapter_type == INVADER_SERIES) { if (io_info.do_fp_rlbypass || - (io_request->RaidContext.raid_context.reg_lock_flags - == REGION_TYPE_UNUSED)) + (rctx->reg_lock_flags == REGION_TYPE_UNUSED)) cmd->request_desc->SCSIIO.RequestFlags = (MEGASAS_REQ_DESCRIPT_FLAGS_NO_LOCK << MEGASAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); - io_request->RaidContext.raid_context.type - = MPI2_TYPE_CUDA; - io_request->RaidContext.raid_context.reg_lock_flags |= + rctx->type = MPI2_TYPE_CUDA; + rctx->reg_lock_flags |= (MR_RL_FLAGS_GRANT_DESTINATION_CPU0 | - MR_RL_FLAGS_SEQ_NUM_ENABLE); - io_request->RaidContext.raid_context.nseg = 0x1; - } else if (instance->adapter_type == VENTURA_SERIES) { - io_request->RaidContext.raid_context_g35.routing_flags |= - (1 << MR_RAID_CTX_ROUTINGFLAGS_SQN_SHIFT); - io_request->RaidContext.raid_context_g35.nseg_type |= - (1 << RAID_CONTEXT_NSEG_SHIFT); - io_request->RaidContext.raid_context_g35.nseg_type |= - (MPI2_TYPE_CUDA << RAID_CONTEXT_TYPE_SHIFT); + MR_RL_FLAGS_SEQ_NUM_ENABLE); + rctx->nseg = 0x1; + } else if (instance->adapter_type >= VENTURA_SERIES) { + rctx_g35->routing_flags |= (1 << MR_RAID_CTX_ROUTINGFLAGS_SQN_SHIFT); + rctx_g35->nseg_type |= (1 << RAID_CONTEXT_NSEG_SHIFT); + rctx_g35->nseg_type |= (MPI2_TYPE_CUDA << RAID_CONTEXT_TYPE_SHIFT); } io_request->Function = MEGASAS_MPI2_FUNCTION_LD_IO_REQUEST; io_request->DevHandle = cpu_to_le16(device_id); @@ -2832,7 +2902,7 @@ static void megasas_build_ld_nonrw_fusion(struct megasas_instance *instance, device_id < instance->fw_supported_vd_count)) { ld = MR_TargetIdToLdGet(device_id, local_map_ptr); - if (ld >= instance->fw_supported_vd_count) + if (ld >= instance->fw_supported_vd_count - 1) fp_possible = 0; else { raid = MR_LdRaidGet(ld, local_map_ptr); @@ -2855,7 +2925,7 @@ static void megasas_build_ld_nonrw_fusion(struct megasas_instance *instance, /* set RAID context values */ pRAID_Context->config_seq_num = raid->seqNum; - if (instance->adapter_type != VENTURA_SERIES) + if (instance->adapter_type < VENTURA_SERIES) pRAID_Context->reg_lock_flags = REGION_TYPE_SHARED_READ; pRAID_Context->timeout_value = cpu_to_le16(raid->fpIoTimeoutForLd); @@ -2940,7 +3010,7 @@ megasas_build_syspd_fusion(struct megasas_instance *instance, cpu_to_le16(device_id + (MAX_PHYSICAL_DEVICES - 1)); pRAID_Context->config_seq_num = pd_sync->seq[pd_index].seqNum; io_request->DevHandle = pd_sync->seq[pd_index].devHandle; - if (instance->adapter_type == VENTURA_SERIES) { + if (instance->adapter_type >= VENTURA_SERIES) { io_request->RaidContext.raid_context_g35.routing_flags |= (1 << MR_RAID_CTX_ROUTINGFLAGS_SQN_SHIFT); io_request->RaidContext.raid_context_g35.nseg_type |= @@ -3073,7 +3143,7 @@ megasas_build_io_fusion(struct megasas_instance *instance, return 1; } - if (instance->adapter_type == VENTURA_SERIES) { + if (instance->adapter_type >= VENTURA_SERIES) { set_num_sge(&io_request->RaidContext.raid_context_g35, sge_count); cpu_to_le16s(&io_request->RaidContext.raid_context_g35.routing_flags); cpu_to_le16s(&io_request->RaidContext.raid_context_g35.nseg_type); @@ -3385,7 +3455,7 @@ complete_cmd_fusion(struct megasas_instance *instance, u32 MSIxIndex) atomic_dec(&lbinfo->scsi_pending_cmds[cmd_fusion->pd_r1_lb]); cmd_fusion->scmd->SCp.Status &= ~MEGASAS_LOAD_BALANCE_FLAG; } - //Fall thru and complete IO + /* Fall through - and complete IO */ case MEGASAS_MPI2_FUNCTION_LD_IO_REQUEST: /* LD-IO Path */ atomic_dec(&instance->fw_outstanding); if (cmd_fusion->r1_alt_dev_handle == MR_DEVHANDLE_INVALID) { @@ -3501,18 +3571,13 @@ megasas_complete_cmd_dpc_fusion(unsigned long instance_addr) { struct megasas_instance *instance = (struct megasas_instance *)instance_addr; - unsigned long flags; u32 count, MSIxIndex; count = instance->msix_vectors > 0 ? instance->msix_vectors : 1; /* If we have already declared adapter dead, donot complete cmds */ - spin_lock_irqsave(&instance->hba_lock, flags); - if (atomic_read(&instance->adprecovery) == MEGASAS_HW_CRITICAL_ERROR) { - spin_unlock_irqrestore(&instance->hba_lock, flags); + if (atomic_read(&instance->adprecovery) == MEGASAS_HW_CRITICAL_ERROR) return; - } - spin_unlock_irqrestore(&instance->hba_lock, flags); for (MSIxIndex = 0 ; MSIxIndex < count; MSIxIndex++) complete_cmd_fusion(instance, MSIxIndex); @@ -3525,48 +3590,24 @@ irqreturn_t megasas_isr_fusion(int irq, void *devp) { struct megasas_irq_context *irq_context = devp; struct megasas_instance *instance = irq_context->instance; - u32 mfiStatus, fw_state, dma_state; + u32 mfiStatus; if (instance->mask_interrupts) return IRQ_NONE; if (!instance->msix_vectors) { - mfiStatus = instance->instancet->clear_intr(instance->reg_set); + mfiStatus = instance->instancet->clear_intr(instance); if (!mfiStatus) return IRQ_NONE; } /* If we are resetting, bail */ if (test_bit(MEGASAS_FUSION_IN_RESET, &instance->reset_flags)) { - instance->instancet->clear_intr(instance->reg_set); + instance->instancet->clear_intr(instance); return IRQ_HANDLED; } - if (!complete_cmd_fusion(instance, irq_context->MSIxIndex)) { - instance->instancet->clear_intr(instance->reg_set); - /* If we didn't complete any commands, check for FW fault */ - fw_state = instance->instancet->read_fw_status_reg( - instance->reg_set) & MFI_STATE_MASK; - dma_state = instance->instancet->read_fw_status_reg - (instance->reg_set) & MFI_STATE_DMADONE; - if (instance->crash_dump_drv_support && - instance->crash_dump_app_support) { - /* Start collecting crash, if DMA bit is done */ - if ((fw_state == MFI_STATE_FAULT) && dma_state) - schedule_work(&instance->crash_init); - else if (fw_state == MFI_STATE_FAULT) { - if (instance->unload == 0) - schedule_work(&instance->work_init); - } - } else if (fw_state == MFI_STATE_FAULT) { - dev_warn(&instance->pdev->dev, "Iop2SysDoorbellInt" - "for scsi%d\n", instance->host->host_no); - if (instance->unload == 0) - schedule_work(&instance->work_init); - } - } - - return IRQ_HANDLED; + return complete_cmd_fusion(instance, irq_context->MSIxIndex); } /** @@ -3692,9 +3733,9 @@ megasas_release_fusion(struct megasas_instance *instance) * @regs: MFI register set */ static u32 -megasas_read_fw_status_reg_fusion(struct megasas_register_set __iomem *regs) +megasas_read_fw_status_reg_fusion(struct megasas_instance *instance) { - return readl(&(regs)->outbound_scratch_pad); + return megasas_readl(instance, &instance->reg_set->outbound_scratch_pad_0); } /** @@ -3756,11 +3797,12 @@ megasas_adp_reset_fusion(struct megasas_instance *instance, writel(MPI2_WRSEQ_6TH_KEY_VALUE, &instance->reg_set->fusion_seq_offset); /* Check that the diag write enable (DRWE) bit is on */ - host_diag = readl(&instance->reg_set->fusion_host_diag); + host_diag = megasas_readl(instance, &instance->reg_set->fusion_host_diag); retry = 0; while (!(host_diag & HOST_DIAG_WRITE_ENABLE)) { msleep(100); - host_diag = readl(&instance->reg_set->fusion_host_diag); + host_diag = megasas_readl(instance, + &instance->reg_set->fusion_host_diag); if (retry++ == 100) { dev_warn(&instance->pdev->dev, "Host diag unlock failed from %s %d\n", @@ -3777,11 +3819,12 @@ megasas_adp_reset_fusion(struct megasas_instance *instance, msleep(3000); /* Make sure reset adapter bit is cleared */ - host_diag = readl(&instance->reg_set->fusion_host_diag); + host_diag = megasas_readl(instance, &instance->reg_set->fusion_host_diag); retry = 0; while (host_diag & HOST_DIAG_RESET_ADAPTER) { msleep(100); - host_diag = readl(&instance->reg_set->fusion_host_diag); + host_diag = megasas_readl(instance, + &instance->reg_set->fusion_host_diag); if (retry++ == 1000) { dev_warn(&instance->pdev->dev, "Diag reset adapter never cleared %s %d\n", @@ -3792,14 +3835,14 @@ megasas_adp_reset_fusion(struct megasas_instance *instance, if (host_diag & HOST_DIAG_RESET_ADAPTER) return -1; - abs_state = instance->instancet->read_fw_status_reg(instance->reg_set) + abs_state = instance->instancet->read_fw_status_reg(instance) & MFI_STATE_MASK; retry = 0; while ((abs_state <= MFI_STATE_FW_INIT) && (retry++ < 1000)) { msleep(100); abs_state = instance->instancet-> - read_fw_status_reg(instance->reg_set) & MFI_STATE_MASK; + read_fw_status_reg(instance) & MFI_STATE_MASK; } if (abs_state <= MFI_STATE_FW_INIT) { dev_warn(&instance->pdev->dev, @@ -3822,17 +3865,60 @@ megasas_check_reset_fusion(struct megasas_instance *instance, return 0; } +/** + * megasas_trigger_snap_dump - Trigger snap dump in FW + * @instance: Soft instance of adapter + */ +static inline void megasas_trigger_snap_dump(struct megasas_instance *instance) +{ + int j; + u32 fw_state; + + if (!instance->disableOnlineCtrlReset) { + dev_info(&instance->pdev->dev, "Trigger snap dump\n"); + writel(MFI_ADP_TRIGGER_SNAP_DUMP, + &instance->reg_set->doorbell); + readl(&instance->reg_set->doorbell); + } + + for (j = 0; j < instance->snapdump_wait_time; j++) { + fw_state = instance->instancet->read_fw_status_reg(instance) & + MFI_STATE_MASK; + if (fw_state == MFI_STATE_FAULT) { + dev_err(&instance->pdev->dev, + "Found FW in FAULT state, after snap dump trigger\n"); + return; + } + msleep(1000); + } +} + /* This function waits for outstanding commands on fusion to complete */ int megasas_wait_for_outstanding_fusion(struct megasas_instance *instance, int reason, int *convert) { int i, outstanding, retval = 0, hb_seconds_missed = 0; u32 fw_state; + u32 waittime_for_io_completion; + + waittime_for_io_completion = + min_t(u32, resetwaittime, + (resetwaittime - instance->snapdump_wait_time)); - for (i = 0; i < resetwaittime; i++) { + if (reason == MFI_IO_TIMEOUT_OCR) { + dev_info(&instance->pdev->dev, + "MFI command is timed out\n"); + megasas_complete_cmd_dpc_fusion((unsigned long)instance); + if (instance->snapdump_wait_time) + megasas_trigger_snap_dump(instance); + retval = 1; + goto out; + } + + for (i = 0; i < waittime_for_io_completion; i++) { /* Check if firmware is in fault state */ - fw_state = instance->instancet->read_fw_status_reg( - instance->reg_set) & MFI_STATE_MASK; + fw_state = instance->instancet->read_fw_status_reg(instance) & + MFI_STATE_MASK; if (fw_state == MFI_STATE_FAULT) { dev_warn(&instance->pdev->dev, "Found FW in FAULT state," " will reset adapter scsi%d.\n", @@ -3850,13 +3936,6 @@ int megasas_wait_for_outstanding_fusion(struct megasas_instance *instance, goto out; } - if (reason == MFI_IO_TIMEOUT_OCR) { - dev_info(&instance->pdev->dev, - "MFI IO is timed out, initiating OCR\n"); - megasas_complete_cmd_dpc_fusion((unsigned long)instance); - retval = 1; - goto out; - } /* If SR-IOV VF mode & heartbeat timeout, don't wait */ if (instance->requestorId && !reason) { @@ -3901,6 +3980,12 @@ int megasas_wait_for_outstanding_fusion(struct megasas_instance *instance, msleep(1000); } + if (instance->snapdump_wait_time) { + megasas_trigger_snap_dump(instance); + retval = 1; + goto out; + } + if (atomic_read(&instance->fw_outstanding)) { dev_err(&instance->pdev->dev, "pending commands remain after waiting, " "will reset adapter scsi%d.\n", @@ -3908,6 +3993,7 @@ int megasas_wait_for_outstanding_fusion(struct megasas_instance *instance, *convert = 1; retval = 1; } + out: return retval; } @@ -4518,7 +4604,7 @@ int megasas_reset_fusion(struct Scsi_Host *shost, int reason) mutex_unlock(&instance->reset_mutex); return FAILED; } - status_reg = instance->instancet->read_fw_status_reg(instance->reg_set); + status_reg = instance->instancet->read_fw_status_reg(instance); abs_state = status_reg & MFI_STATE_MASK; /* IO timeout detected, forcibly put FW in FAULT state */ @@ -4527,7 +4613,7 @@ int megasas_reset_fusion(struct Scsi_Host *shost, int reason) dev_info(&instance->pdev->dev, "IO/DCMD timeout is detected, " "forcibly FAULT Firmware\n"); atomic_set(&instance->adprecovery, MEGASAS_ADPRESET_SM_INFAULT); - status_reg = readl(&instance->reg_set->doorbell); + status_reg = megasas_readl(instance, &instance->reg_set->doorbell); writel(status_reg | MFI_STATE_FORCE_OCR, &instance->reg_set->doorbell); readl(&instance->reg_set->doorbell); @@ -4578,7 +4664,7 @@ int megasas_reset_fusion(struct Scsi_Host *shost, int reason) for (i = 0 ; i < instance->max_scsi_cmds; i++) { cmd_fusion = fusion->cmd_list[i]; /*check for extra commands issued by driver*/ - if (instance->adapter_type == VENTURA_SERIES) { + if (instance->adapter_type >= VENTURA_SERIES) { r1_cmd = fusion->cmd_list[i + instance->max_fw_cmds]; megasas_return_cmd_fusion(instance, r1_cmd); } @@ -4605,8 +4691,7 @@ int megasas_reset_fusion(struct Scsi_Host *shost, int reason) atomic_set(&instance->fw_outstanding, 0); - status_reg = instance->instancet->read_fw_status_reg( - instance->reg_set); + status_reg = instance->instancet->read_fw_status_reg(instance); abs_state = status_reg & MFI_STATE_MASK; reset_adapter = status_reg & MFI_RESET_ADAPTER; if (instance->disableOnlineCtrlReset || @@ -4677,7 +4762,7 @@ transition_to_ready: megasas_setup_jbod_map(instance); /* reset stream detection array */ - if (instance->adapter_type == VENTURA_SERIES) { + if (instance->adapter_type >= VENTURA_SERIES) { for (j = 0; j < MAX_LOGICAL_DRIVES_EXT; ++j) { memset(fusion->stream_detect_by_ld[j], 0, sizeof(struct LD_STREAM_DETECT)); @@ -4721,6 +4806,13 @@ transition_to_ready: megasas_set_crash_dump_params(instance, MR_CRASH_BUF_TURN_OFF); + if (instance->snapdump_wait_time) { + megasas_get_snapdump_properties(instance); + dev_info(&instance->pdev->dev, + "Snap dump wait time\t: %d\n", + instance->snapdump_wait_time); + } + retval = SUCCESS; /* Adapter reset completed successfully */ @@ -4752,16 +4844,15 @@ out: return retval; } -/* Fusion Crash dump collection work queue */ -void megasas_fusion_crash_dump_wq(struct work_struct *work) +/* Fusion Crash dump collection */ +void megasas_fusion_crash_dump(struct megasas_instance *instance) { - struct megasas_instance *instance = - container_of(work, struct megasas_instance, crash_init); u32 status_reg; u8 partial_copy = 0; + int wait = 0; - status_reg = instance->instancet->read_fw_status_reg(instance->reg_set); + status_reg = instance->instancet->read_fw_status_reg(instance); /* * Allocate host crash buffers to copy data from 1 MB DMA crash buffer @@ -4777,8 +4868,8 @@ void megasas_fusion_crash_dump_wq(struct work_struct *work) "crash dump and initiating OCR\n"); status_reg |= MFI_STATE_CRASH_DUMP_DONE; writel(status_reg, - &instance->reg_set->outbound_scratch_pad); - readl(&instance->reg_set->outbound_scratch_pad); + &instance->reg_set->outbound_scratch_pad_0); + readl(&instance->reg_set->outbound_scratch_pad_0); return; } megasas_alloc_host_crash_buffer(instance); @@ -4786,21 +4877,41 @@ void megasas_fusion_crash_dump_wq(struct work_struct *work) "allocated: %d\n", instance->drv_buf_alloc); } - /* - * Driver has allocated max buffers, which can be allocated - * and FW has more crash dump data, then driver will - * ignore the data. - */ - if (instance->drv_buf_index >= (instance->drv_buf_alloc)) { - dev_info(&instance->pdev->dev, "Driver is done copying " - "the buffer: %d\n", instance->drv_buf_alloc); - status_reg |= MFI_STATE_CRASH_DUMP_DONE; - partial_copy = 1; - } else { - memcpy(instance->crash_buf[instance->drv_buf_index], - instance->crash_dump_buf, CRASH_DMA_BUF_SIZE); - instance->drv_buf_index++; - status_reg &= ~MFI_STATE_DMADONE; + while (!(status_reg & MFI_STATE_CRASH_DUMP_DONE) && + (wait < MEGASAS_WATCHDOG_WAIT_COUNT)) { + if (!(status_reg & MFI_STATE_DMADONE)) { + /* + * Next crash dump buffer is not yet DMA'd by FW + * Check after 10ms. Wait for 1 second for FW to + * post the next buffer. If not bail out. + */ + wait++; + msleep(MEGASAS_WAIT_FOR_NEXT_DMA_MSECS); + status_reg = instance->instancet->read_fw_status_reg( + instance); + continue; + } + + wait = 0; + if (instance->drv_buf_index >= instance->drv_buf_alloc) { + dev_info(&instance->pdev->dev, + "Driver is done copying the buffer: %d\n", + instance->drv_buf_alloc); + status_reg |= MFI_STATE_CRASH_DUMP_DONE; + partial_copy = 1; + break; + } else { + memcpy(instance->crash_buf[instance->drv_buf_index], + instance->crash_dump_buf, CRASH_DMA_BUF_SIZE); + instance->drv_buf_index++; + status_reg &= ~MFI_STATE_DMADONE; + } + + writel(status_reg, &instance->reg_set->outbound_scratch_pad_0); + readl(&instance->reg_set->outbound_scratch_pad_0); + + msleep(MEGASAS_WAIT_FOR_NEXT_DMA_MSECS); + status_reg = instance->instancet->read_fw_status_reg(instance); } if (status_reg & MFI_STATE_CRASH_DUMP_DONE) { @@ -4809,13 +4920,10 @@ void megasas_fusion_crash_dump_wq(struct work_struct *work) instance->fw_crash_buffer_size = instance->drv_buf_index; instance->fw_crash_state = AVAILABLE; instance->drv_buf_index = 0; - writel(status_reg, &instance->reg_set->outbound_scratch_pad); - readl(&instance->reg_set->outbound_scratch_pad); + writel(status_reg, &instance->reg_set->outbound_scratch_pad_0); + readl(&instance->reg_set->outbound_scratch_pad_0); if (!partial_copy) megasas_reset_fusion(instance->host, 0); - } else { - writel(status_reg, &instance->reg_set->outbound_scratch_pad); - readl(&instance->reg_set->outbound_scratch_pad); } } diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.h b/drivers/scsi/megaraid/megaraid_sas_fusion.h index 8e5ebee6517f..ca73c50fe723 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.h +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.h @@ -2,7 +2,8 @@ * Linux MegaRAID driver for SAS based RAID controllers * * Copyright (c) 2009-2013 LSI Corporation - * Copyright (c) 2013-2014 Avago Technologies + * Copyright (c) 2013-2016 Avago Technologies + * Copyright (c) 2016-2018 Broadcom Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -19,16 +20,13 @@ * * FILE: megaraid_sas_fusion.h * - * Authors: Avago Technologies + * Authors: Broadcom Inc. * Manoj Jose * Sumant Patro - * Kashyap Desai <kashyap.desai@avagotech.com> - * Sumit Saxena <sumit.saxena@avagotech.com> + * Kashyap Desai <kashyap.desai@broadcom.com> + * Sumit Saxena <sumit.saxena@broadcom.com> * - * Send feedback to: megaraidlinux.pdl@avagotech.com - * - * Mail to: Avago Technologies, 350 West Trimble Road, Building 90, - * San Jose, California 95131 + * Send feedback to: megaraidlinux.pdl@broadcom.com */ #ifndef _MEGARAID_SAS_FUSION_H_ @@ -725,6 +723,7 @@ struct MPI2_IOC_INIT_REQUEST { #define MR_DCMD_CTRL_SHARED_HOST_MEM_ALLOC 0x010e8485 /* SR-IOV HB alloc*/ #define MR_DCMD_LD_VF_MAP_GET_ALL_LDS_111 0x03200200 #define MR_DCMD_LD_VF_MAP_GET_ALL_LDS 0x03150200 +#define MR_DCMD_CTRL_SNAPDUMP_GET_PROPERTIES 0x01200100 struct MR_DEV_HANDLE_INFO { __le16 curDevHdl; @@ -1063,6 +1062,9 @@ struct MR_FW_RAID_MAP_DYNAMIC { #define MPI26_IEEE_SGE_FLAGS_NSF_NVME_PRP (0x08) #define MPI26_IEEE_SGE_FLAGS_NSF_NVME_SGL (0x10) +#define MEGASAS_DEFAULT_SNAP_DUMP_WAIT_TIME 15 +#define MEGASAS_MAX_SNAP_DUMP_WAIT_TIME 60 + struct megasas_register_set; struct megasas_instance; @@ -1350,6 +1352,14 @@ enum CMD_RET_VALUES { RETURN_CMD = 3, }; +struct MR_SNAPDUMP_PROPERTIES { + u8 offload_num; + u8 max_num_supported; + u8 cur_num_supported; + u8 trigger_min_num_sec_before_ocr; + u8 reserved[12]; +}; + void megasas_free_cmds_fusion(struct megasas_instance *instance); int megasas_ioc_init_fusion(struct megasas_instance *instance); u8 megasas_get_map_info(struct megasas_instance *instance); diff --git a/drivers/scsi/mesh.c b/drivers/scsi/mesh.c index ec6940f2fcb3..f3e182eb0970 100644 --- a/drivers/scsi/mesh.c +++ b/drivers/scsi/mesh.c @@ -1838,7 +1838,7 @@ static struct scsi_host_template mesh_template = { .this_id = 7, .sg_tablesize = SG_ALL, .cmd_per_lun = 2, - .use_clustering = DISABLE_CLUSTERING, + .max_segment_size = 65535, }; static int mesh_probe(struct macio_dev *mdev, const struct of_device_id *match) diff --git a/drivers/scsi/mpt3sas/mpi/mpi2.h b/drivers/scsi/mpt3sas/mpi/mpi2.h index 1e45268a78fc..7efd17a3c25b 100644 --- a/drivers/scsi/mpt3sas/mpi/mpi2.h +++ b/drivers/scsi/mpt3sas/mpi/mpi2.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright 2000-2015 Avago Technologies. All rights reserved. + * Copyright 2000-2020 Broadcom Inc. All rights reserved. * * * Name: mpi2.h @@ -9,7 +9,7 @@ * scatter/gather formats. * Creation Date: June 21, 2006 * - * mpi2.h Version: 02.00.50 + * mpi2.h Version: 02.00.53 * * NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25 * prefix are for use only on MPI v2.5 products, and must not be used @@ -116,7 +116,12 @@ * 02-03-17 02.00.48 Bumped MPI2_HEADER_VERSION_UNIT. * 06-13-17 02.00.49 Bumped MPI2_HEADER_VERSION_UNIT. * 09-29-17 02.00.50 Bumped MPI2_HEADER_VERSION_UNIT. - * -------------------------------------------------------------------------- + * 07-22-18 02.00.51 Added SECURE_BOOT define. + * Bumped MPI2_HEADER_VERSION_UNIT + * 08-15-18 02.00.52 Bumped MPI2_HEADER_VERSION_UNIT. + * 08-28-18 02.00.53 Bumped MPI2_HEADER_VERSION_UNIT. + * Added MPI2_IOCSTATUS_FAILURE + * -------------------------------------------------------------------------- */ #ifndef MPI2_H @@ -156,7 +161,7 @@ /* Unit and Dev versioning for this MPI header set */ -#define MPI2_HEADER_VERSION_UNIT (0x32) +#define MPI2_HEADER_VERSION_UNIT (0x35) #define MPI2_HEADER_VERSION_DEV (0x00) #define MPI2_HEADER_VERSION_UNIT_MASK (0xFF00) #define MPI2_HEADER_VERSION_UNIT_SHIFT (8) @@ -257,6 +262,8 @@ typedef volatile struct _MPI2_SYSTEM_INTERFACE_REGS { */ #define MPI2_HOST_DIAGNOSTIC_OFFSET (0x00000008) +#define MPI26_DIAG_SECURE_BOOT (0x80000000) + #define MPI2_DIAG_SBR_RELOAD (0x00002000) #define MPI2_DIAG_BOOT_DEVICE_SELECT_MASK (0x00001800) @@ -687,7 +694,9 @@ typedef union _MPI2_REPLY_DESCRIPTORS_UNION { #define MPI2_IOCSTATUS_INVALID_FIELD (0x0007) #define MPI2_IOCSTATUS_INVALID_STATE (0x0008) #define MPI2_IOCSTATUS_OP_STATE_NOT_SUPPORTED (0x0009) +/*MPI v2.6 and later */ #define MPI2_IOCSTATUS_INSUFFICIENT_POWER (0x000A) +#define MPI2_IOCSTATUS_FAILURE (0x000F) /**************************************************************************** * Config IOCStatus values diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h b/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h index 5122920a961a..398fa6fde960 100644 --- a/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h +++ b/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h @@ -1,13 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright 2000-2015 Avago Technologies. All rights reserved. + * Copyright 2000-2020 Broadcom Inc. All rights reserved. * * * Name: mpi2_cnfg.h * Title: MPI Configuration messages and pages * Creation Date: November 10, 2006 * - * mpi2_cnfg.h Version: 02.00.42 + * mpi2_cnfg.h Version: 02.00.46 * * NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25 * prefix are for use only on MPI v2.5 products, and must not be used @@ -231,6 +231,18 @@ * Added NOIOB field to PCIe Device Page 2. * Added MPI26_PCIEDEV2_CAP_DATA_BLK_ALIGN_AND_GRAN to * the Capabilities field of PCIe Device Page 2. + * 07-22-18 02.00.43 Added defines for SAS3916 and SAS3816. + * Added WRiteCache defines to IO Unit Page 1. + * Added MaxEnclosureLevel to BIOS Page 1. + * Added OEMRD to SAS Enclosure Page 1. + * Added DMDReportPCIe to PCIe IO Unit Page 1. + * Added Flags field and flags for Retimers to + * PCIe Switch Page 1. + * 08-02-18 02.00.44 Added Slotx2, Slotx4 to ManPage 7. + * 08-15-18 02.00.45 Added ProductSpecific field at end of IOC Page 1 + * 08-28-18 02.00.46 Added NVMs Write Cache flag to IOUnitPage1 + * Added DMDReport Delay Time defines to + * PCIeIOUnitPage1 * -------------------------------------------------------------------------- */ @@ -568,8 +580,17 @@ typedef struct _MPI2_CONFIG_REPLY { #define MPI26_MFGPAGE_DEVID_SAS3616 (0x00D1) #define MPI26_MFGPAGE_DEVID_SAS3708 (0x00D2) -#define MPI26_MFGPAGE_DEVID_SAS3816 (0x00A1) -#define MPI26_MFGPAGE_DEVID_SAS3916 (0x00A0) +#define MPI26_MFGPAGE_DEVID_SEC_MASK_3916 (0x0003) +#define MPI26_MFGPAGE_DEVID_INVALID0_3916 (0x00E0) +#define MPI26_MFGPAGE_DEVID_CFG_SEC_3916 (0x00E1) +#define MPI26_MFGPAGE_DEVID_HARD_SEC_3916 (0x00E2) +#define MPI26_MFGPAGE_DEVID_INVALID1_3916 (0x00E3) + +#define MPI26_MFGPAGE_DEVID_SEC_MASK_3816 (0x0003) +#define MPI26_MFGPAGE_DEVID_INVALID0_3816 (0x00E4) +#define MPI26_MFGPAGE_DEVID_CFG_SEC_3816 (0x00E5) +#define MPI26_MFGPAGE_DEVID_HARD_SEC_3816 (0x00E6) +#define MPI26_MFGPAGE_DEVID_INVALID1_3816 (0x00E7) /*Manufacturing Page 0 */ @@ -932,7 +953,11 @@ typedef struct _MPI2_CONFIG_PAGE_IO_UNIT_1 { #define MPI2_IOUNITPAGE1_PAGEVERSION (0x04) -/*IO Unit Page 1 Flags defines */ +/* IO Unit Page 1 Flags defines */ +#define MPI26_IOUNITPAGE1_NVME_WRCACHE_MASK (0x00030000) +#define MPI26_IOUNITPAGE1_NVME_WRCACHE_ENABLE (0x00000000) +#define MPI26_IOUNITPAGE1_NVME_WRCACHE_DISABLE (0x00010000) +#define MPI26_IOUNITPAGE1_NVME_WRCACHE_NO_CHANGE (0x00020000) #define MPI2_IOUNITPAGE1_ATA_SECURITY_FREEZE_LOCK (0x00004000) #define MPI25_IOUNITPAGE1_NEW_DEVICE_FAST_PATH_DISABLE (0x00002000) #define MPI25_IOUNITPAGE1_DISABLE_FAST_PATH (0x00001000) @@ -1511,7 +1536,7 @@ typedef struct _MPI2_CONFIG_PAGE_BIOS_1 { U32 BiosOptions; /*0x04 */ U32 IOCSettings; /*0x08 */ U8 SSUTimeout; /*0x0C */ - U8 Reserved1; /*0x0D */ + U8 MaxEnclosureLevel; /*0x0D */ U16 Reserved2; /*0x0E */ U32 DeviceSettings; /*0x10 */ U16 NumberOfDevices; /*0x14 */ @@ -1531,7 +1556,6 @@ typedef struct _MPI2_CONFIG_PAGE_BIOS_1 { #define MPI2_BIOSPAGE1_OPTIONS_ADVANCED_CONFIG (0x00004000) #define MPI2_BIOSPAGE1_OPTIONS_PNS_MASK (0x00003800) -#define MPI2_BIOSPAGE1_OPTIONS_PNS_MASK (0x00003800) #define MPI2_BIOSPAGE1_OPTIONS_PNS_PBDHL (0x00000000) #define MPI2_BIOSPAGE1_OPTIONS_PNS_ENCSLOSURE (0x00000800) #define MPI2_BIOSPAGE1_OPTIONS_PNS_LWWID (0x00001000) @@ -3271,10 +3295,12 @@ typedef struct _MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0 { U16 NumSlots; /*0x18 */ U16 StartSlot; /*0x1A */ U8 ChassisSlot; /*0x1C */ - U8 EnclosureLeve; /*0x1D */ + U8 EnclosureLevel; /*0x1D */ U16 SEPDevHandle; /*0x1E */ - U32 Reserved3; /*0x20 */ - U32 Reserved4; /*0x24 */ + U8 OEMRD; /*0x20 */ + U8 Reserved1a; /*0x21 */ + U16 Reserved2; /*0x22 */ + U32 Reserved3; /*0x24 */ } MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0, *PTR_MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0, Mpi2SasEnclosurePage0_t, *pMpi2SasEnclosurePage0_t, @@ -3285,6 +3311,8 @@ typedef struct _MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0 { #define MPI2_SASENCLOSURE0_PAGEVERSION (0x04) /*values for SAS Enclosure Page 0 Flags field */ +#define MPI26_SAS_ENCLS0_FLAGS_OEMRD_VALID (0x0080) +#define MPI26_SAS_ENCLS0_FLAGS_OEMRD_COLLECTING (0x0040) #define MPI2_SAS_ENCLS0_FLAGS_CHASSIS_SLOT_VALID (0x0020) #define MPI2_SAS_ENCLS0_FLAGS_ENCL_LEVEL_VALID (0x0010) #define MPI2_SAS_ENCLS0_FLAGS_MNG_MASK (0x000F) @@ -3298,6 +3326,8 @@ typedef struct _MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0 { #define MPI26_ENCLOSURE0_PAGEVERSION (0x04) /*Values for Enclosure Page 0 Flags field */ +#define MPI26_ENCLS0_FLAGS_OEMRD_VALID (0x0080) +#define MPI26_ENCLS0_FLAGS_OEMRD_COLLECTING (0x0040) #define MPI26_ENCLS0_FLAGS_CHASSIS_SLOT_VALID (0x0020) #define MPI26_ENCLS0_FLAGS_ENCL_LEVEL_VALID (0x0010) #define MPI26_ENCLS0_FLAGS_MNG_MASK (0x000F) @@ -3696,8 +3726,9 @@ typedef struct _MPI26_PCIE_IO_UNIT1_PHY_DATA { Mpi26PCIeIOUnit1PhyData_t, *pMpi26PCIeIOUnit1PhyData_t; /*values for LinkFlags */ -#define MPI26_PCIEIOUNIT1_LINKFLAGS_DIS_SRIS (0x00) -#define MPI26_PCIEIOUNIT1_LINKFLAGS_EN_SRIS (0x01) +#define MPI26_PCIEIOUNIT1_LINKFLAGS_DIS_SEPARATE_REFCLK (0x00) +#define MPI26_PCIEIOUNIT1_LINKFLAGS_SRIS_EN (0x01) +#define MPI26_PCIEIOUNIT1_LINKFLAGS_SRNS_EN (0x02) /* *Host code (drivers, BIOS, utilities, etc.) should leave this define set to @@ -3714,7 +3745,7 @@ typedef struct _MPI26_CONFIG_PAGE_PIOUNIT_1 { U16 AdditionalControlFlags; /*0x0C */ U16 NVMeMaxQueueDepth; /*0x0E */ U8 NumPhys; /*0x10 */ - U8 Reserved1; /*0x11 */ + U8 DMDReportPCIe; /*0x11 */ U16 Reserved2; /*0x12 */ MPI26_PCIE_IO_UNIT1_PHY_DATA PhyData[MPI26_PCIE_IOUNIT1_PHY_MAX];/*0x14 */ @@ -3736,6 +3767,12 @@ typedef struct _MPI26_CONFIG_PAGE_PIOUNIT_1 { #define MPI26_PCIEIOUNIT1_MAX_RATE_8_0 (0x40) #define MPI26_PCIEIOUNIT1_MAX_RATE_16_0 (0x50) +/*values for PCIe IO Unit Page 1 DMDReportPCIe */ +#define MPI26_PCIEIOUNIT1_DMDRPT_UNIT_MASK (0x80) +#define MPI26_PCIEIOUNIT1_DMDRPT_UNIT_1_SEC (0x00) +#define MPI26_PCIEIOUNIT1_DMDRPT_UNIT_16_SEC (0x80) +#define MPI26_PCIEIOUNIT1_DMDRPT_DELAY_TIME_MASK (0x7F) + /*see mpi2_pci.h for values for PCIe IO Unit Page 0 ControllerPhyDeviceInfo *values */ @@ -3788,6 +3825,9 @@ typedef struct _MPI26_CONFIG_PAGE_PSWITCH_1 { /*use MPI26_PCIE_NEG_LINK_RATE_ defines for the NegotiatedLinkRate field */ +/* defines for the Flags field */ +#define MPI26_PCIESWITCH1_2_RETIMER_PRESENCE (0x0002) +#define MPI26_PCIESWITCH1_RETIMER_PRESENCE (0x0001) /**************************************************************************** * PCIe Device Config Pages (MPI v2.6 and later) @@ -3849,19 +3889,21 @@ typedef struct _MPI26_CONFIG_PAGE_PCIEDEV_0 { *field */ -/*values for PCIe Device Page 0 Flags field */ -#define MPI26_PCIEDEV0_FLAGS_UNAUTHORIZED_DEVICE (0x8000) -#define MPI26_PCIEDEV0_FLAGS_ENABLED_FAST_PATH (0x4000) -#define MPI26_PCIEDEV0_FLAGS_FAST_PATH_CAPABLE (0x2000) -#define MPI26_PCIEDEV0_FLAGS_ASYNCHRONOUS_NOTIFICATION (0x0400) -#define MPI26_PCIEDEV0_FLAGS_ATA_SW_PRESERVATION (0x0200) -#define MPI26_PCIEDEV0_FLAGS_UNSUPPORTED_DEVICE (0x0100) -#define MPI26_PCIEDEV0_FLAGS_ATA_48BIT_LBA_SUPPORTED (0x0080) -#define MPI26_PCIEDEV0_FLAGS_ATA_SMART_SUPPORTED (0x0040) -#define MPI26_PCIEDEV0_FLAGS_ATA_NCQ_SUPPORTED (0x0020) -#define MPI26_PCIEDEV0_FLAGS_ATA_FUA_SUPPORTED (0x0010) -#define MPI26_PCIEDEV0_FLAGS_ENCL_LEVEL_VALID (0x0002) -#define MPI26_PCIEDEV0_FLAGS_DEVICE_PRESENT (0x0001) +/*values for PCIe Device Page 0 Flags field*/ +#define MPI26_PCIEDEV0_FLAGS_2_RETIMER_PRESENCE (0x00020000) +#define MPI26_PCIEDEV0_FLAGS_RETIMER_PRESENCE (0x00010000) +#define MPI26_PCIEDEV0_FLAGS_UNAUTHORIZED_DEVICE (0x00008000) +#define MPI26_PCIEDEV0_FLAGS_ENABLED_FAST_PATH (0x00004000) +#define MPI26_PCIEDEV0_FLAGS_FAST_PATH_CAPABLE (0x00002000) +#define MPI26_PCIEDEV0_FLAGS_ASYNCHRONOUS_NOTIFICATION (0x00000400) +#define MPI26_PCIEDEV0_FLAGS_ATA_SW_PRESERVATION (0x00000200) +#define MPI26_PCIEDEV0_FLAGS_UNSUPPORTED_DEVICE (0x00000100) +#define MPI26_PCIEDEV0_FLAGS_ATA_48BIT_LBA_SUPPORTED (0x00000080) +#define MPI26_PCIEDEV0_FLAGS_ATA_SMART_SUPPORTED (0x00000040) +#define MPI26_PCIEDEV0_FLAGS_ATA_NCQ_SUPPORTED (0x00000020) +#define MPI26_PCIEDEV0_FLAGS_ATA_FUA_SUPPORTED (0x00000010) +#define MPI26_PCIEDEV0_FLAGS_ENCL_LEVEL_VALID (0x00000002) +#define MPI26_PCIEDEV0_FLAGS_DEVICE_PRESENT (0x00000001) /* values for PCIe Device Page 0 SupportedLinkRates field */ #define MPI26_PCIEDEV0_LINK_RATE_16_0_SUPPORTED (0x08) diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_image.h b/drivers/scsi/mpt3sas/mpi/mpi2_image.h new file mode 100644 index 000000000000..4959585f029d --- /dev/null +++ b/drivers/scsi/mpt3sas/mpi/mpi2_image.h @@ -0,0 +1,506 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2016-2020 Broadcom Limited. All rights reserved. + * + * Name: mpi2_image.h + * Description: Contains definitions for firmware and other component images + * Creation Date: 04/02/2018 + * Version: 02.06.03 + * + * + * Version History + * --------------- + * + * Date Version Description + * -------- -------- ------------------------------------------------------ + * 08-01-18 02.06.00 Initial version for MPI 2.6.5. + * 08-14-18 02.06.01 Corrected define for MPI26_IMAGE_HEADER_SIGNATURE0_MPI26 + * 08-28-18 02.06.02 Added MPI2_EXT_IMAGE_TYPE_RDE + * 09-07-18 02.06.03 Added MPI26_EVENT_PCIE_TOPO_PI_16_LANES + */ +#ifndef MPI2_IMAGE_H +#define MPI2_IMAGE_H + + +/*FW Image Header */ +typedef struct _MPI2_FW_IMAGE_HEADER { + U32 Signature; /*0x00 */ + U32 Signature0; /*0x04 */ + U32 Signature1; /*0x08 */ + U32 Signature2; /*0x0C */ + MPI2_VERSION_UNION MPIVersion; /*0x10 */ + MPI2_VERSION_UNION FWVersion; /*0x14 */ + MPI2_VERSION_UNION NVDATAVersion; /*0x18 */ + MPI2_VERSION_UNION PackageVersion; /*0x1C */ + U16 VendorID; /*0x20 */ + U16 ProductID; /*0x22 */ + U16 ProtocolFlags; /*0x24 */ + U16 Reserved26; /*0x26 */ + U32 IOCCapabilities; /*0x28 */ + U32 ImageSize; /*0x2C */ + U32 NextImageHeaderOffset; /*0x30 */ + U32 Checksum; /*0x34 */ + U32 Reserved38; /*0x38 */ + U32 Reserved3C; /*0x3C */ + U32 Reserved40; /*0x40 */ + U32 Reserved44; /*0x44 */ + U32 Reserved48; /*0x48 */ + U32 Reserved4C; /*0x4C */ + U32 Reserved50; /*0x50 */ + U32 Reserved54; /*0x54 */ + U32 Reserved58; /*0x58 */ + U32 Reserved5C; /*0x5C */ + U32 BootFlags; /*0x60 */ + U32 FirmwareVersionNameWhat; /*0x64 */ + U8 FirmwareVersionName[32]; /*0x68 */ + U32 VendorNameWhat; /*0x88 */ + U8 VendorName[32]; /*0x8C */ + U32 PackageNameWhat; /*0x88 */ + U8 PackageName[32]; /*0x8C */ + U32 ReservedD0; /*0xD0 */ + U32 ReservedD4; /*0xD4 */ + U32 ReservedD8; /*0xD8 */ + U32 ReservedDC; /*0xDC */ + U32 ReservedE0; /*0xE0 */ + U32 ReservedE4; /*0xE4 */ + U32 ReservedE8; /*0xE8 */ + U32 ReservedEC; /*0xEC */ + U32 ReservedF0; /*0xF0 */ + U32 ReservedF4; /*0xF4 */ + U32 ReservedF8; /*0xF8 */ + U32 ReservedFC; /*0xFC */ +} MPI2_FW_IMAGE_HEADER, *PTR_MPI2_FW_IMAGE_HEADER, + Mpi2FWImageHeader_t, *pMpi2FWImageHeader_t; + +/*Signature field */ +#define MPI2_FW_HEADER_SIGNATURE_OFFSET (0x00) +#define MPI2_FW_HEADER_SIGNATURE_MASK (0xFF000000) +#define MPI2_FW_HEADER_SIGNATURE (0xEA000000) +#define MPI26_FW_HEADER_SIGNATURE (0xEB000000) + +/*Signature0 field */ +#define MPI2_FW_HEADER_SIGNATURE0_OFFSET (0x04) +#define MPI2_FW_HEADER_SIGNATURE0 (0x5AFAA55A) +/*Last byte is defined by architecture */ +#define MPI26_FW_HEADER_SIGNATURE0_BASE (0x5AEAA500) +#define MPI26_FW_HEADER_SIGNATURE0_ARC_0 (0x5A) +#define MPI26_FW_HEADER_SIGNATURE0_ARC_1 (0x00) +#define MPI26_FW_HEADER_SIGNATURE0_ARC_2 (0x01) +/*legacy (0x5AEAA55A) */ +#define MPI26_FW_HEADER_SIGNATURE0_ARC_3 (0x02) +#define MPI26_FW_HEADER_SIGNATURE0 \ + (MPI26_FW_HEADER_SIGNATURE0_BASE+MPI26_FW_HEADER_SIGNATURE0_ARC_0) +#define MPI26_FW_HEADER_SIGNATURE0_3516 \ + (MPI26_FW_HEADER_SIGNATURE0_BASE+MPI26_FW_HEADER_SIGNATURE0_ARC_1) +#define MPI26_FW_HEADER_SIGNATURE0_4008 \ + (MPI26_FW_HEADER_SIGNATURE0_BASE+MPI26_FW_HEADER_SIGNATURE0_ARC_3) + +/*Signature1 field */ +#define MPI2_FW_HEADER_SIGNATURE1_OFFSET (0x08) +#define MPI2_FW_HEADER_SIGNATURE1 (0xA55AFAA5) +#define MPI26_FW_HEADER_SIGNATURE1 (0xA55AEAA5) + +/*Signature2 field */ +#define MPI2_FW_HEADER_SIGNATURE2_OFFSET (0x0C) +#define MPI2_FW_HEADER_SIGNATURE2 (0x5AA55AFA) +#define MPI26_FW_HEADER_SIGNATURE2 (0x5AA55AEA) + +/*defines for using the ProductID field */ +#define MPI2_FW_HEADER_PID_TYPE_MASK (0xF000) +#define MPI2_FW_HEADER_PID_TYPE_SAS (0x2000) + +#define MPI2_FW_HEADER_PID_PROD_MASK (0x0F00) +#define MPI2_FW_HEADER_PID_PROD_A (0x0000) +#define MPI2_FW_HEADER_PID_PROD_TARGET_INITIATOR_SCSI (0x0200) +#define MPI2_FW_HEADER_PID_PROD_IR_SCSI (0x0700) + +#define MPI2_FW_HEADER_PID_FAMILY_MASK (0x00FF) +/*SAS ProductID Family bits */ +#define MPI2_FW_HEADER_PID_FAMILY_2108_SAS (0x0013) +#define MPI2_FW_HEADER_PID_FAMILY_2208_SAS (0x0014) +#define MPI25_FW_HEADER_PID_FAMILY_3108_SAS (0x0021) +#define MPI26_FW_HEADER_PID_FAMILY_3324_SAS (0x0028) +#define MPI26_FW_HEADER_PID_FAMILY_3516_SAS (0x0031) + +/*use MPI2_IOCFACTS_PROTOCOL_ defines for ProtocolFlags field */ + +/*use MPI2_IOCFACTS_CAPABILITY_ defines for IOCCapabilities field */ + +#define MPI2_FW_HEADER_IMAGESIZE_OFFSET (0x2C) +#define MPI2_FW_HEADER_NEXTIMAGE_OFFSET (0x30) + +#define MPI26_FW_HEADER_BOOTFLAGS_OFFSET (0x60) +#define MPI2_FW_HEADER_BOOTFLAGS_ISSI32M_FLAG (0x00000001) +#define MPI2_FW_HEADER_BOOTFLAGS_W25Q256JW_FLAG (0x00000002) +/*This image has a auto-discovery version of SPI */ +#define MPI2_FW_HEADER_BOOTFLAGS_AUTO_SPI_FLAG (0x00000004) + + +#define MPI2_FW_HEADER_VERNMHWAT_OFFSET (0x64) + +#define MPI2_FW_HEADER_WHAT_SIGNATURE (0x29232840) + +#define MPI2_FW_HEADER_SIZE (0x100) + + +/**************************************************************************** + * Component Image Format and related defines * + ****************************************************************************/ + +/*Maximum number of Hash Exclusion entries in a Component Image Header */ +#define MPI26_COMP_IMG_HDR_NUM_HASH_EXCL (4) + +/*Hash Exclusion Format */ +typedef struct _MPI26_HASH_EXCLUSION_FORMAT { + U32 Offset; /*0x00 */ + U32 Size; /*0x04 */ +} MPI26_HASH_EXCLUSION_FORMAT, + *PTR_MPI26_HASH_EXCLUSION_FORMAT, + Mpi26HashSxclusionFormat_t, + *pMpi26HashExclusionFormat_t; + +/*FW Image Header */ +typedef struct _MPI26_COMPONENT_IMAGE_HEADER { + U32 Signature0; /*0x00 */ + U32 LoadAddress; /*0x04 */ + U32 DataSize; /*0x08 */ + U32 StartAddress; /*0x0C */ + U32 Signature1; /*0x10 */ + U32 FlashOffset; /*0x14 */ + U32 FlashSize; /*0x18 */ + U32 VersionStringOffset; /*0x1C */ + U32 BuildDateStringOffset; /*0x20 */ + U32 BuildTimeStringOffset; /*0x24 */ + U32 EnvironmentVariableOffset; /*0x28 */ + U32 ApplicationSpecific; /*0x2C */ + U32 Signature2; /*0x30 */ + U32 HeaderSize; /*0x34 */ + U32 Crc; /*0x38 */ + U8 NotFlashImage; /*0x3C */ + U8 Compressed; /*0x3D */ + U16 Reserved3E; /*0x3E */ + U32 SecondaryFlashOffset; /*0x40 */ + U32 Reserved44; /*0x44 */ + U32 Reserved48; /*0x48 */ + MPI2_VERSION_UNION RMCInterfaceVersion; /*0x4C */ + MPI2_VERSION_UNION Reserved50; /*0x50 */ + MPI2_VERSION_UNION FWVersion; /*0x54 */ + MPI2_VERSION_UNION NvdataVersion; /*0x58 */ + MPI26_HASH_EXCLUSION_FORMAT + HashExclusion[MPI26_COMP_IMG_HDR_NUM_HASH_EXCL];/*0x5C */ + U32 NextImageHeaderOffset; /*0x7C */ + U32 Reserved80[32]; /*0x80 -- 0xFC */ +} MPI26_COMPONENT_IMAGE_HEADER, + *PTR_MPI26_COMPONENT_IMAGE_HEADER, + Mpi26ComponentImageHeader_t, + *pMpi26ComponentImageHeader_t; + + +/**** Definitions for Signature0 field ****/ +#define MPI26_IMAGE_HEADER_SIGNATURE0_MPI26 (0xEB000042) + +/**** Definitions for Signature1 field ****/ +#define MPI26_IMAGE_HEADER_SIGNATURE1_APPLICATION (0x20505041) +#define MPI26_IMAGE_HEADER_SIGNATURE1_CBB (0x20424243) +#define MPI26_IMAGE_HEADER_SIGNATURE1_MFG (0x2047464D) +#define MPI26_IMAGE_HEADER_SIGNATURE1_BIOS (0x534F4942) +#define MPI26_IMAGE_HEADER_SIGNATURE1_HIIM (0x4D494948) +#define MPI26_IMAGE_HEADER_SIGNATURE1_HIIA (0x41494948) +#define MPI26_IMAGE_HEADER_SIGNATURE1_CPLD (0x444C5043) +#define MPI26_IMAGE_HEADER_SIGNATURE1_SPD (0x20445053) +#define MPI26_IMAGE_HEADER_SIGNATURE1_NVDATA (0x5444564E) +#define MPI26_IMAGE_HEADER_SIGNATURE1_GAS_GAUGE (0x20534147) +#define MPI26_IMAGE_HEADER_SIGNATURE1_PBLP (0x50424C50) + +/**** Definitions for Signature2 field ****/ +#define MPI26_IMAGE_HEADER_SIGNATURE2_VALUE (0x50584546) + +/**** Offsets for Image Header Fields ****/ +#define MPI26_IMAGE_HEADER_SIGNATURE0_OFFSET (0x00) +#define MPI26_IMAGE_HEADER_LOAD_ADDRESS_OFFSET (0x04) +#define MPI26_IMAGE_HEADER_DATA_SIZE_OFFSET (0x08) +#define MPI26_IMAGE_HEADER_START_ADDRESS_OFFSET (0x0C) +#define MPI26_IMAGE_HEADER_SIGNATURE1_OFFSET (0x10) +#define MPI26_IMAGE_HEADER_FLASH_OFFSET_OFFSET (0x14) +#define MPI26_IMAGE_HEADER_FLASH_SIZE_OFFSET (0x18) +#define MPI26_IMAGE_HEADER_VERSION_STRING_OFFSET_OFFSET (0x1C) +#define MPI26_IMAGE_HEADER_BUILD_DATE_STRING_OFFSET_OFFSET (0x20) +#define MPI26_IMAGE_HEADER_BUILD_TIME_OFFSET_OFFSET (0x24) +#define MPI26_IMAGE_HEADER_ENVIROMENT_VAR_OFFSET_OFFSET (0x28) +#define MPI26_IMAGE_HEADER_APPLICATION_SPECIFIC_OFFSET (0x2C) +#define MPI26_IMAGE_HEADER_SIGNATURE2_OFFSET (0x30) +#define MPI26_IMAGE_HEADER_HEADER_SIZE_OFFSET (0x34) +#define MPI26_IMAGE_HEADER_CRC_OFFSET (0x38) +#define MPI26_IMAGE_HEADER_NOT_FLASH_IMAGE_OFFSET (0x3C) +#define MPI26_IMAGE_HEADER_COMPRESSED_OFFSET (0x3D) +#define MPI26_IMAGE_HEADER_SECONDARY_FLASH_OFFSET_OFFSET (0x40) +#define MPI26_IMAGE_HEADER_RMC_INTERFACE_VER_OFFSET (0x4C) +#define MPI26_IMAGE_HEADER_COMPONENT_IMAGE_VER_OFFSET (0x54) +#define MPI26_IMAGE_HEADER_HASH_EXCLUSION_OFFSET (0x5C) +#define MPI26_IMAGE_HEADER_NEXT_IMAGE_HEADER_OFFSET_OFFSET (0x7C) + + +#define MPI26_IMAGE_HEADER_SIZE (0x100) + + +/*Extended Image Header */ +typedef struct _MPI2_EXT_IMAGE_HEADER { + U8 ImageType; /*0x00 */ + U8 Reserved1; /*0x01 */ + U16 Reserved2; /*0x02 */ + U32 Checksum; /*0x04 */ + U32 ImageSize; /*0x08 */ + U32 NextImageHeaderOffset; /*0x0C */ + U32 PackageVersion; /*0x10 */ + U32 Reserved3; /*0x14 */ + U32 Reserved4; /*0x18 */ + U32 Reserved5; /*0x1C */ + U8 IdentifyString[32]; /*0x20 */ +} MPI2_EXT_IMAGE_HEADER, *PTR_MPI2_EXT_IMAGE_HEADER, + Mpi2ExtImageHeader_t, *pMpi2ExtImageHeader_t; + +/*useful offsets */ +#define MPI2_EXT_IMAGE_IMAGETYPE_OFFSET (0x00) +#define MPI2_EXT_IMAGE_IMAGESIZE_OFFSET (0x08) +#define MPI2_EXT_IMAGE_NEXTIMAGE_OFFSET (0x0C) +#define MPI2_EXT_IMAGE_PACKAGEVERSION_OFFSET (0x10) + +#define MPI2_EXT_IMAGE_HEADER_SIZE (0x40) + +/*defines for the ImageType field */ +#define MPI2_EXT_IMAGE_TYPE_UNSPECIFIED (0x00) +#define MPI2_EXT_IMAGE_TYPE_FW (0x01) +#define MPI2_EXT_IMAGE_TYPE_NVDATA (0x03) +#define MPI2_EXT_IMAGE_TYPE_BOOTLOADER (0x04) +#define MPI2_EXT_IMAGE_TYPE_INITIALIZATION (0x05) +#define MPI2_EXT_IMAGE_TYPE_FLASH_LAYOUT (0x06) +#define MPI2_EXT_IMAGE_TYPE_SUPPORTED_DEVICES (0x07) +#define MPI2_EXT_IMAGE_TYPE_MEGARAID (0x08) +#define MPI2_EXT_IMAGE_TYPE_ENCRYPTED_HASH (0x09) +#define MPI2_EXT_IMAGE_TYPE_RDE (0x0A) +#define MPI2_EXT_IMAGE_TYPE_MIN_PRODUCT_SPECIFIC (0x80) +#define MPI2_EXT_IMAGE_TYPE_MAX_PRODUCT_SPECIFIC (0xFF) + +#define MPI2_EXT_IMAGE_TYPE_MAX (MPI2_EXT_IMAGE_TYPE_MAX_PRODUCT_SPECIFIC) + +/*FLASH Layout Extended Image Data */ + +/* + *Host code (drivers, BIOS, utilities, etc.) should leave this define set to + *one and check RegionsPerLayout at runtime. + */ +#ifndef MPI2_FLASH_NUMBER_OF_REGIONS +#define MPI2_FLASH_NUMBER_OF_REGIONS (1) +#endif + +/* + *Host code (drivers, BIOS, utilities, etc.) should leave this define set to + *one and check NumberOfLayouts at runtime. + */ +#ifndef MPI2_FLASH_NUMBER_OF_LAYOUTS +#define MPI2_FLASH_NUMBER_OF_LAYOUTS (1) +#endif + +typedef struct _MPI2_FLASH_REGION { + U8 RegionType; /*0x00 */ + U8 Reserved1; /*0x01 */ + U16 Reserved2; /*0x02 */ + U32 RegionOffset; /*0x04 */ + U32 RegionSize; /*0x08 */ + U32 Reserved3; /*0x0C */ +} MPI2_FLASH_REGION, *PTR_MPI2_FLASH_REGION, + Mpi2FlashRegion_t, *pMpi2FlashRegion_t; + +typedef struct _MPI2_FLASH_LAYOUT { + U32 FlashSize; /*0x00 */ + U32 Reserved1; /*0x04 */ + U32 Reserved2; /*0x08 */ + U32 Reserved3; /*0x0C */ + MPI2_FLASH_REGION Region[MPI2_FLASH_NUMBER_OF_REGIONS]; /*0x10 */ +} MPI2_FLASH_LAYOUT, *PTR_MPI2_FLASH_LAYOUT, + Mpi2FlashLayout_t, *pMpi2FlashLayout_t; + +typedef struct _MPI2_FLASH_LAYOUT_DATA { + U8 ImageRevision; /*0x00 */ + U8 Reserved1; /*0x01 */ + U8 SizeOfRegion; /*0x02 */ + U8 Reserved2; /*0x03 */ + U16 NumberOfLayouts; /*0x04 */ + U16 RegionsPerLayout; /*0x06 */ + U16 MinimumSectorAlignment; /*0x08 */ + U16 Reserved3; /*0x0A */ + U32 Reserved4; /*0x0C */ + MPI2_FLASH_LAYOUT Layout[MPI2_FLASH_NUMBER_OF_LAYOUTS]; /*0x10 */ +} MPI2_FLASH_LAYOUT_DATA, *PTR_MPI2_FLASH_LAYOUT_DATA, + Mpi2FlashLayoutData_t, *pMpi2FlashLayoutData_t; + +/*defines for the RegionType field */ +#define MPI2_FLASH_REGION_UNUSED (0x00) +#define MPI2_FLASH_REGION_FIRMWARE (0x01) +#define MPI2_FLASH_REGION_BIOS (0x02) +#define MPI2_FLASH_REGION_NVDATA (0x03) +#define MPI2_FLASH_REGION_FIRMWARE_BACKUP (0x05) +#define MPI2_FLASH_REGION_MFG_INFORMATION (0x06) +#define MPI2_FLASH_REGION_CONFIG_1 (0x07) +#define MPI2_FLASH_REGION_CONFIG_2 (0x08) +#define MPI2_FLASH_REGION_MEGARAID (0x09) +#define MPI2_FLASH_REGION_COMMON_BOOT_BLOCK (0x0A) +#define MPI2_FLASH_REGION_INIT (MPI2_FLASH_REGION_COMMON_BOOT_BLOCK) +#define MPI2_FLASH_REGION_CBB_BACKUP (0x0D) +#define MPI2_FLASH_REGION_SBR (0x0E) +#define MPI2_FLASH_REGION_SBR_BACKUP (0x0F) +#define MPI2_FLASH_REGION_HIIM (0x10) +#define MPI2_FLASH_REGION_HIIA (0x11) +#define MPI2_FLASH_REGION_CTLR (0x12) +#define MPI2_FLASH_REGION_IMR_FIRMWARE (0x13) +#define MPI2_FLASH_REGION_MR_NVDATA (0x14) +#define MPI2_FLASH_REGION_CPLD (0x15) +#define MPI2_FLASH_REGION_PSOC (0x16) + +/*ImageRevision */ +#define MPI2_FLASH_LAYOUT_IMAGE_REVISION (0x00) + +/*Supported Devices Extended Image Data */ + +/* + *Host code (drivers, BIOS, utilities, etc.) should leave this define set to + *one and check NumberOfDevices at runtime. + */ +#ifndef MPI2_SUPPORTED_DEVICES_IMAGE_NUM_DEVICES +#define MPI2_SUPPORTED_DEVICES_IMAGE_NUM_DEVICES (1) +#endif + +typedef struct _MPI2_SUPPORTED_DEVICE { + U16 DeviceID; /*0x00 */ + U16 VendorID; /*0x02 */ + U16 DeviceIDMask; /*0x04 */ + U16 Reserved1; /*0x06 */ + U8 LowPCIRev; /*0x08 */ + U8 HighPCIRev; /*0x09 */ + U16 Reserved2; /*0x0A */ + U32 Reserved3; /*0x0C */ +} MPI2_SUPPORTED_DEVICE, *PTR_MPI2_SUPPORTED_DEVICE, + Mpi2SupportedDevice_t, *pMpi2SupportedDevice_t; + +typedef struct _MPI2_SUPPORTED_DEVICES_DATA { + U8 ImageRevision; /*0x00 */ + U8 Reserved1; /*0x01 */ + U8 NumberOfDevices; /*0x02 */ + U8 Reserved2; /*0x03 */ + U32 Reserved3; /*0x04 */ + MPI2_SUPPORTED_DEVICE + SupportedDevice[MPI2_SUPPORTED_DEVICES_IMAGE_NUM_DEVICES];/*0x08 */ +} MPI2_SUPPORTED_DEVICES_DATA, *PTR_MPI2_SUPPORTED_DEVICES_DATA, + Mpi2SupportedDevicesData_t, *pMpi2SupportedDevicesData_t; + +/*ImageRevision */ +#define MPI2_SUPPORTED_DEVICES_IMAGE_REVISION (0x00) + +/*Init Extended Image Data */ + +typedef struct _MPI2_INIT_IMAGE_FOOTER { + U32 BootFlags; /*0x00 */ + U32 ImageSize; /*0x04 */ + U32 Signature0; /*0x08 */ + U32 Signature1; /*0x0C */ + U32 Signature2; /*0x10 */ + U32 ResetVector; /*0x14 */ +} MPI2_INIT_IMAGE_FOOTER, *PTR_MPI2_INIT_IMAGE_FOOTER, + Mpi2InitImageFooter_t, *pMpi2InitImageFooter_t; + +/*defines for the BootFlags field */ +#define MPI2_INIT_IMAGE_BOOTFLAGS_OFFSET (0x00) + +/*defines for the ImageSize field */ +#define MPI2_INIT_IMAGE_IMAGESIZE_OFFSET (0x04) + +/*defines for the Signature0 field */ +#define MPI2_INIT_IMAGE_SIGNATURE0_OFFSET (0x08) +#define MPI2_INIT_IMAGE_SIGNATURE0 (0x5AA55AEA) + +/*defines for the Signature1 field */ +#define MPI2_INIT_IMAGE_SIGNATURE1_OFFSET (0x0C) +#define MPI2_INIT_IMAGE_SIGNATURE1 (0xA55AEAA5) + +/*defines for the Signature2 field */ +#define MPI2_INIT_IMAGE_SIGNATURE2_OFFSET (0x10) +#define MPI2_INIT_IMAGE_SIGNATURE2 (0x5AEAA55A) + +/*Signature fields as individual bytes */ +#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_0 (0xEA) +#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_1 (0x5A) +#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_2 (0xA5) +#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_3 (0x5A) + +#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_4 (0xA5) +#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_5 (0xEA) +#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_6 (0x5A) +#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_7 (0xA5) + +#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_8 (0x5A) +#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_9 (0xA5) +#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_A (0xEA) +#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_B (0x5A) + +/*defines for the ResetVector field */ +#define MPI2_INIT_IMAGE_RESETVECTOR_OFFSET (0x14) + + +/* Encrypted Hash Extended Image Data */ + +typedef struct _MPI25_ENCRYPTED_HASH_ENTRY { + U8 HashImageType; /*0x00 */ + U8 HashAlgorithm; /*0x01 */ + U8 EncryptionAlgorithm; /*0x02 */ + U8 Reserved1; /*0x03 */ + U32 Reserved2; /*0x04 */ + U32 EncryptedHash[1]; /*0x08 */ /* variable length */ +} MPI25_ENCRYPTED_HASH_ENTRY, *PTR_MPI25_ENCRYPTED_HASH_ENTRY, +Mpi25EncryptedHashEntry_t, *pMpi25EncryptedHashEntry_t; + +/* values for HashImageType */ +#define MPI25_HASH_IMAGE_TYPE_UNUSED (0x00) +#define MPI25_HASH_IMAGE_TYPE_FIRMWARE (0x01) +#define MPI25_HASH_IMAGE_TYPE_BIOS (0x02) + +#define MPI26_HASH_IMAGE_TYPE_UNUSED (0x00) +#define MPI26_HASH_IMAGE_TYPE_FIRMWARE (0x01) +#define MPI26_HASH_IMAGE_TYPE_BIOS (0x02) +#define MPI26_HASH_IMAGE_TYPE_KEY_HASH (0x03) + +/* values for HashAlgorithm */ +#define MPI25_HASH_ALGORITHM_UNUSED (0x00) +#define MPI25_HASH_ALGORITHM_SHA256 (0x01) + +#define MPI26_HASH_ALGORITHM_VERSION_MASK (0xE0) +#define MPI26_HASH_ALGORITHM_VERSION_NONE (0x00) +#define MPI26_HASH_ALGORITHM_VERSION_SHA1 (0x20) +#define MPI26_HASH_ALGORITHM_VERSION_SHA2 (0x40) +#define MPI26_HASH_ALGORITHM_VERSION_SHA3 (0x60) +#define MPI26_HASH_ALGORITHM_SIZE_MASK (0x1F) +#define MPI26_HASH_ALGORITHM_SIZE_256 (0x01) +#define MPI26_HASH_ALGORITHM_SIZE_512 (0x02) + + +/* values for EncryptionAlgorithm */ +#define MPI25_ENCRYPTION_ALG_UNUSED (0x00) +#define MPI25_ENCRYPTION_ALG_RSA256 (0x01) + +#define MPI26_ENCRYPTION_ALG_UNUSED (0x00) +#define MPI26_ENCRYPTION_ALG_RSA256 (0x01) +#define MPI26_ENCRYPTION_ALG_RSA512 (0x02) +#define MPI26_ENCRYPTION_ALG_RSA1024 (0x03) +#define MPI26_ENCRYPTION_ALG_RSA2048 (0x04) +#define MPI26_ENCRYPTION_ALG_RSA4096 (0x05) + +typedef struct _MPI25_ENCRYPTED_HASH_DATA { + U8 ImageVersion; /*0x00 */ + U8 NumHash; /*0x01 */ + U16 Reserved1; /*0x02 */ + U32 Reserved2; /*0x04 */ + MPI25_ENCRYPTED_HASH_ENTRY EncryptedHashEntry[1]; /*0x08 */ +} MPI25_ENCRYPTED_HASH_DATA, *PTR_MPI25_ENCRYPTED_HASH_DATA, +Mpi25EncryptedHashData_t, *pMpi25EncryptedHashData_t; + + +#endif /* MPI2_IMAGE_H */ diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_init.h b/drivers/scsi/mpt3sas/mpi/mpi2_init.h index 6213ce6791ac..8f1b903fe0a9 100644 --- a/drivers/scsi/mpt3sas/mpi/mpi2_init.h +++ b/drivers/scsi/mpt3sas/mpi/mpi2_init.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright 2000-2015 Avago Technologies. All rights reserved. + * Copyright 2000-2020 Broadcom Inc. All rights reserved. * * * Name: mpi2_init.h diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_ioc.h b/drivers/scsi/mpt3sas/mpi/mpi2_ioc.h index 1faec3a93e69..68ea408cd5c5 100644 --- a/drivers/scsi/mpt3sas/mpi/mpi2_ioc.h +++ b/drivers/scsi/mpt3sas/mpi/mpi2_ioc.h @@ -1,13 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright 2000-2015 Avago Technologies. All rights reserved. + * Copyright 2000-2020 Broadcom Inc. All rights reserved. * * * Name: mpi2_ioc.h * Title: MPI IOC, Port, Event, FW Download, and FW Upload messages * Creation Date: October 11, 2006 * - * mpi2_ioc.h Version: 02.00.34 + * mpi2_ioc.h Version: 02.00.37 * * NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25 * prefix are for use only on MPI v2.5 products, and must not be used @@ -171,6 +171,10 @@ * 09-29-17 02.00.34 Added MPI26_EVENT_PCIDEV_STAT_RC_PCIE_HOT_RESET_FAILED * to the ReasonCode field in PCIe Device Status Change * Event Data. + * 07-22-18 02.00.35 Added FW_DOWNLOAD_ITYPE_CPLD and _PSOC. + * Moved FW image definitions ionto new mpi2_image,h + * 08-14-18 02.00.36 Fixed definition of MPI2_FW_DOWNLOAD_ITYPE_PSOC (0x16) + * 09-07-18 02.00.37 Added MPI26_EVENT_PCIE_TOPO_PI_16_LANES * -------------------------------------------------------------------------- */ @@ -1255,6 +1259,7 @@ typedef struct _MPI26_EVENT_PCIE_TOPO_PORT_ENTRY { #define MPI26_EVENT_PCIE_TOPO_PI_2_LANES (0x20) #define MPI26_EVENT_PCIE_TOPO_PI_4_LANES (0x30) #define MPI26_EVENT_PCIE_TOPO_PI_8_LANES (0x40) +#define MPI26_EVENT_PCIE_TOPO_PI_16_LANES (0x50) #define MPI26_EVENT_PCIE_TOPO_PI_RATE_MASK (0x0F) #define MPI26_EVENT_PCIE_TOPO_PI_RATE_UNKNOWN (0x00) @@ -1450,7 +1455,11 @@ typedef struct _MPI2_FW_DOWNLOAD_REQUEST { #define MPI2_FW_DOWNLOAD_ITYPE_CTLR (0x12) #define MPI2_FW_DOWNLOAD_ITYPE_IMR_FIRMWARE (0x13) #define MPI2_FW_DOWNLOAD_ITYPE_MR_NVDATA (0x14) +/*MPI v2.6 and newer */ +#define MPI2_FW_DOWNLOAD_ITYPE_CPLD (0x15) +#define MPI2_FW_DOWNLOAD_ITYPE_PSOC (0x16) #define MPI2_FW_DOWNLOAD_ITYPE_MIN_PRODUCT_SPECIFIC (0xF0) +#define MPI2_FW_DOWNLOAD_ITYPE_TERMINATE (0xFF) /*MPI v2.0 FWDownload TransactionContext Element */ typedef struct _MPI2_FW_DOWNLOAD_TCSGE { @@ -1597,352 +1606,6 @@ typedef struct _MPI2_FW_UPLOAD_REPLY { } MPI2_FW_UPLOAD_REPLY, *PTR_MPI2_FW_UPLOAD_REPLY, Mpi2FWUploadReply_t, *pMPi2FWUploadReply_t; -/*FW Image Header */ -typedef struct _MPI2_FW_IMAGE_HEADER { - U32 Signature; /*0x00 */ - U32 Signature0; /*0x04 */ - U32 Signature1; /*0x08 */ - U32 Signature2; /*0x0C */ - MPI2_VERSION_UNION MPIVersion; /*0x10 */ - MPI2_VERSION_UNION FWVersion; /*0x14 */ - MPI2_VERSION_UNION NVDATAVersion; /*0x18 */ - MPI2_VERSION_UNION PackageVersion; /*0x1C */ - U16 VendorID; /*0x20 */ - U16 ProductID; /*0x22 */ - U16 ProtocolFlags; /*0x24 */ - U16 Reserved26; /*0x26 */ - U32 IOCCapabilities; /*0x28 */ - U32 ImageSize; /*0x2C */ - U32 NextImageHeaderOffset; /*0x30 */ - U32 Checksum; /*0x34 */ - U32 Reserved38; /*0x38 */ - U32 Reserved3C; /*0x3C */ - U32 Reserved40; /*0x40 */ - U32 Reserved44; /*0x44 */ - U32 Reserved48; /*0x48 */ - U32 Reserved4C; /*0x4C */ - U32 Reserved50; /*0x50 */ - U32 Reserved54; /*0x54 */ - U32 Reserved58; /*0x58 */ - U32 Reserved5C; /*0x5C */ - U32 BootFlags; /*0x60 */ - U32 FirmwareVersionNameWhat; /*0x64 */ - U8 FirmwareVersionName[32]; /*0x68 */ - U32 VendorNameWhat; /*0x88 */ - U8 VendorName[32]; /*0x8C */ - U32 PackageNameWhat; /*0x88 */ - U8 PackageName[32]; /*0x8C */ - U32 ReservedD0; /*0xD0 */ - U32 ReservedD4; /*0xD4 */ - U32 ReservedD8; /*0xD8 */ - U32 ReservedDC; /*0xDC */ - U32 ReservedE0; /*0xE0 */ - U32 ReservedE4; /*0xE4 */ - U32 ReservedE8; /*0xE8 */ - U32 ReservedEC; /*0xEC */ - U32 ReservedF0; /*0xF0 */ - U32 ReservedF4; /*0xF4 */ - U32 ReservedF8; /*0xF8 */ - U32 ReservedFC; /*0xFC */ -} MPI2_FW_IMAGE_HEADER, *PTR_MPI2_FW_IMAGE_HEADER, - Mpi2FWImageHeader_t, *pMpi2FWImageHeader_t; - -/*Signature field */ -#define MPI2_FW_HEADER_SIGNATURE_OFFSET (0x00) -#define MPI2_FW_HEADER_SIGNATURE_MASK (0xFF000000) -#define MPI2_FW_HEADER_SIGNATURE (0xEA000000) -#define MPI26_FW_HEADER_SIGNATURE (0xEB000000) - -/*Signature0 field */ -#define MPI2_FW_HEADER_SIGNATURE0_OFFSET (0x04) -#define MPI2_FW_HEADER_SIGNATURE0 (0x5AFAA55A) -/* Last byte is defined by architecture */ -#define MPI26_FW_HEADER_SIGNATURE0_BASE (0x5AEAA500) -#define MPI26_FW_HEADER_SIGNATURE0_ARC_0 (0x5A) -#define MPI26_FW_HEADER_SIGNATURE0_ARC_1 (0x00) -#define MPI26_FW_HEADER_SIGNATURE0_ARC_2 (0x01) -/* legacy (0x5AEAA55A) */ -#define MPI26_FW_HEADER_SIGNATURE0_ARC_3 (0x02) -#define MPI26_FW_HEADER_SIGNATURE0 \ - (MPI26_FW_HEADER_SIGNATURE0_BASE+MPI26_FW_HEADER_SIGNATURE0_ARC_0) -#define MPI26_FW_HEADER_SIGNATURE0_3516 \ - (MPI26_FW_HEADER_SIGNATURE0_BASE+MPI26_FW_HEADER_SIGNATURE0_ARC_1) -#define MPI26_FW_HEADER_SIGNATURE0_4008 \ - (MPI26_FW_HEADER_SIGNATURE0_BASE+MPI26_FW_HEADER_SIGNATURE0_ARC_3) - -/*Signature1 field */ -#define MPI2_FW_HEADER_SIGNATURE1_OFFSET (0x08) -#define MPI2_FW_HEADER_SIGNATURE1 (0xA55AFAA5) -#define MPI26_FW_HEADER_SIGNATURE1 (0xA55AEAA5) - -/*Signature2 field */ -#define MPI2_FW_HEADER_SIGNATURE2_OFFSET (0x0C) -#define MPI2_FW_HEADER_SIGNATURE2 (0x5AA55AFA) -#define MPI26_FW_HEADER_SIGNATURE2 (0x5AA55AEA) - -/*defines for using the ProductID field */ -#define MPI2_FW_HEADER_PID_TYPE_MASK (0xF000) -#define MPI2_FW_HEADER_PID_TYPE_SAS (0x2000) - -#define MPI2_FW_HEADER_PID_PROD_MASK (0x0F00) -#define MPI2_FW_HEADER_PID_PROD_A (0x0000) -#define MPI2_FW_HEADER_PID_PROD_TARGET_INITIATOR_SCSI (0x0200) -#define MPI2_FW_HEADER_PID_PROD_IR_SCSI (0x0700) - -#define MPI2_FW_HEADER_PID_FAMILY_MASK (0x00FF) -/*SAS ProductID Family bits */ -#define MPI2_FW_HEADER_PID_FAMILY_2108_SAS (0x0013) -#define MPI2_FW_HEADER_PID_FAMILY_2208_SAS (0x0014) -#define MPI25_FW_HEADER_PID_FAMILY_3108_SAS (0x0021) -#define MPI26_FW_HEADER_PID_FAMILY_3324_SAS (0x0028) -#define MPI26_FW_HEADER_PID_FAMILY_3516_SAS (0x0031) - -/*use MPI2_IOCFACTS_PROTOCOL_ defines for ProtocolFlags field */ - -/*use MPI2_IOCFACTS_CAPABILITY_ defines for IOCCapabilities field */ - -#define MPI2_FW_HEADER_IMAGESIZE_OFFSET (0x2C) -#define MPI2_FW_HEADER_NEXTIMAGE_OFFSET (0x30) -#define MPI26_FW_HEADER_BOOTFLAGS_OFFSET (0x60) -#define MPI2_FW_HEADER_VERNMHWAT_OFFSET (0x64) - -#define MPI2_FW_HEADER_WHAT_SIGNATURE (0x29232840) - -#define MPI2_FW_HEADER_SIZE (0x100) - -/*Extended Image Header */ -typedef struct _MPI2_EXT_IMAGE_HEADER { - U8 ImageType; /*0x00 */ - U8 Reserved1; /*0x01 */ - U16 Reserved2; /*0x02 */ - U32 Checksum; /*0x04 */ - U32 ImageSize; /*0x08 */ - U32 NextImageHeaderOffset; /*0x0C */ - U32 PackageVersion; /*0x10 */ - U32 Reserved3; /*0x14 */ - U32 Reserved4; /*0x18 */ - U32 Reserved5; /*0x1C */ - U8 IdentifyString[32]; /*0x20 */ -} MPI2_EXT_IMAGE_HEADER, *PTR_MPI2_EXT_IMAGE_HEADER, - Mpi2ExtImageHeader_t, *pMpi2ExtImageHeader_t; - -/*useful offsets */ -#define MPI2_EXT_IMAGE_IMAGETYPE_OFFSET (0x00) -#define MPI2_EXT_IMAGE_IMAGESIZE_OFFSET (0x08) -#define MPI2_EXT_IMAGE_NEXTIMAGE_OFFSET (0x0C) - -#define MPI2_EXT_IMAGE_HEADER_SIZE (0x40) - -/*defines for the ImageType field */ -#define MPI2_EXT_IMAGE_TYPE_UNSPECIFIED (0x00) -#define MPI2_EXT_IMAGE_TYPE_FW (0x01) -#define MPI2_EXT_IMAGE_TYPE_NVDATA (0x03) -#define MPI2_EXT_IMAGE_TYPE_BOOTLOADER (0x04) -#define MPI2_EXT_IMAGE_TYPE_INITIALIZATION (0x05) -#define MPI2_EXT_IMAGE_TYPE_FLASH_LAYOUT (0x06) -#define MPI2_EXT_IMAGE_TYPE_SUPPORTED_DEVICES (0x07) -#define MPI2_EXT_IMAGE_TYPE_MEGARAID (0x08) -#define MPI2_EXT_IMAGE_TYPE_ENCRYPTED_HASH (0x09) -#define MPI2_EXT_IMAGE_TYPE_MIN_PRODUCT_SPECIFIC (0x80) -#define MPI2_EXT_IMAGE_TYPE_MAX_PRODUCT_SPECIFIC (0xFF) - -#define MPI2_EXT_IMAGE_TYPE_MAX (MPI2_EXT_IMAGE_TYPE_MAX_PRODUCT_SPECIFIC) - -/*FLASH Layout Extended Image Data */ - -/* - *Host code (drivers, BIOS, utilities, etc.) should leave this define set to - *one and check RegionsPerLayout at runtime. - */ -#ifndef MPI2_FLASH_NUMBER_OF_REGIONS -#define MPI2_FLASH_NUMBER_OF_REGIONS (1) -#endif - -/* - *Host code (drivers, BIOS, utilities, etc.) should leave this define set to - *one and check NumberOfLayouts at runtime. - */ -#ifndef MPI2_FLASH_NUMBER_OF_LAYOUTS -#define MPI2_FLASH_NUMBER_OF_LAYOUTS (1) -#endif - -typedef struct _MPI2_FLASH_REGION { - U8 RegionType; /*0x00 */ - U8 Reserved1; /*0x01 */ - U16 Reserved2; /*0x02 */ - U32 RegionOffset; /*0x04 */ - U32 RegionSize; /*0x08 */ - U32 Reserved3; /*0x0C */ -} MPI2_FLASH_REGION, *PTR_MPI2_FLASH_REGION, - Mpi2FlashRegion_t, *pMpi2FlashRegion_t; - -typedef struct _MPI2_FLASH_LAYOUT { - U32 FlashSize; /*0x00 */ - U32 Reserved1; /*0x04 */ - U32 Reserved2; /*0x08 */ - U32 Reserved3; /*0x0C */ - MPI2_FLASH_REGION Region[MPI2_FLASH_NUMBER_OF_REGIONS]; /*0x10 */ -} MPI2_FLASH_LAYOUT, *PTR_MPI2_FLASH_LAYOUT, - Mpi2FlashLayout_t, *pMpi2FlashLayout_t; - -typedef struct _MPI2_FLASH_LAYOUT_DATA { - U8 ImageRevision; /*0x00 */ - U8 Reserved1; /*0x01 */ - U8 SizeOfRegion; /*0x02 */ - U8 Reserved2; /*0x03 */ - U16 NumberOfLayouts; /*0x04 */ - U16 RegionsPerLayout; /*0x06 */ - U16 MinimumSectorAlignment; /*0x08 */ - U16 Reserved3; /*0x0A */ - U32 Reserved4; /*0x0C */ - MPI2_FLASH_LAYOUT Layout[MPI2_FLASH_NUMBER_OF_LAYOUTS]; /*0x10 */ -} MPI2_FLASH_LAYOUT_DATA, *PTR_MPI2_FLASH_LAYOUT_DATA, - Mpi2FlashLayoutData_t, *pMpi2FlashLayoutData_t; - -/*defines for the RegionType field */ -#define MPI2_FLASH_REGION_UNUSED (0x00) -#define MPI2_FLASH_REGION_FIRMWARE (0x01) -#define MPI2_FLASH_REGION_BIOS (0x02) -#define MPI2_FLASH_REGION_NVDATA (0x03) -#define MPI2_FLASH_REGION_FIRMWARE_BACKUP (0x05) -#define MPI2_FLASH_REGION_MFG_INFORMATION (0x06) -#define MPI2_FLASH_REGION_CONFIG_1 (0x07) -#define MPI2_FLASH_REGION_CONFIG_2 (0x08) -#define MPI2_FLASH_REGION_MEGARAID (0x09) -#define MPI2_FLASH_REGION_COMMON_BOOT_BLOCK (0x0A) -#define MPI2_FLASH_REGION_INIT (MPI2_FLASH_REGION_COMMON_BOOT_BLOCK) -#define MPI2_FLASH_REGION_CBB_BACKUP (0x0D) -#define MPI2_FLASH_REGION_SBR (0x0E) -#define MPI2_FLASH_REGION_SBR_BACKUP (0x0F) -#define MPI2_FLASH_REGION_HIIM (0x10) -#define MPI2_FLASH_REGION_HIIA (0x11) -#define MPI2_FLASH_REGION_CTLR (0x12) -#define MPI2_FLASH_REGION_IMR_FIRMWARE (0x13) -#define MPI2_FLASH_REGION_MR_NVDATA (0x14) - -/*ImageRevision */ -#define MPI2_FLASH_LAYOUT_IMAGE_REVISION (0x00) - -/*Supported Devices Extended Image Data */ - -/* - *Host code (drivers, BIOS, utilities, etc.) should leave this define set to - *one and check NumberOfDevices at runtime. - */ -#ifndef MPI2_SUPPORTED_DEVICES_IMAGE_NUM_DEVICES -#define MPI2_SUPPORTED_DEVICES_IMAGE_NUM_DEVICES (1) -#endif - -typedef struct _MPI2_SUPPORTED_DEVICE { - U16 DeviceID; /*0x00 */ - U16 VendorID; /*0x02 */ - U16 DeviceIDMask; /*0x04 */ - U16 Reserved1; /*0x06 */ - U8 LowPCIRev; /*0x08 */ - U8 HighPCIRev; /*0x09 */ - U16 Reserved2; /*0x0A */ - U32 Reserved3; /*0x0C */ -} MPI2_SUPPORTED_DEVICE, *PTR_MPI2_SUPPORTED_DEVICE, - Mpi2SupportedDevice_t, *pMpi2SupportedDevice_t; - -typedef struct _MPI2_SUPPORTED_DEVICES_DATA { - U8 ImageRevision; /*0x00 */ - U8 Reserved1; /*0x01 */ - U8 NumberOfDevices; /*0x02 */ - U8 Reserved2; /*0x03 */ - U32 Reserved3; /*0x04 */ - MPI2_SUPPORTED_DEVICE - SupportedDevice[MPI2_SUPPORTED_DEVICES_IMAGE_NUM_DEVICES];/*0x08 */ -} MPI2_SUPPORTED_DEVICES_DATA, *PTR_MPI2_SUPPORTED_DEVICES_DATA, - Mpi2SupportedDevicesData_t, *pMpi2SupportedDevicesData_t; - -/*ImageRevision */ -#define MPI2_SUPPORTED_DEVICES_IMAGE_REVISION (0x00) - -/*Init Extended Image Data */ - -typedef struct _MPI2_INIT_IMAGE_FOOTER { - U32 BootFlags; /*0x00 */ - U32 ImageSize; /*0x04 */ - U32 Signature0; /*0x08 */ - U32 Signature1; /*0x0C */ - U32 Signature2; /*0x10 */ - U32 ResetVector; /*0x14 */ -} MPI2_INIT_IMAGE_FOOTER, *PTR_MPI2_INIT_IMAGE_FOOTER, - Mpi2InitImageFooter_t, *pMpi2InitImageFooter_t; - -/*defines for the BootFlags field */ -#define MPI2_INIT_IMAGE_BOOTFLAGS_OFFSET (0x00) - -/*defines for the ImageSize field */ -#define MPI2_INIT_IMAGE_IMAGESIZE_OFFSET (0x04) - -/*defines for the Signature0 field */ -#define MPI2_INIT_IMAGE_SIGNATURE0_OFFSET (0x08) -#define MPI2_INIT_IMAGE_SIGNATURE0 (0x5AA55AEA) - -/*defines for the Signature1 field */ -#define MPI2_INIT_IMAGE_SIGNATURE1_OFFSET (0x0C) -#define MPI2_INIT_IMAGE_SIGNATURE1 (0xA55AEAA5) - -/*defines for the Signature2 field */ -#define MPI2_INIT_IMAGE_SIGNATURE2_OFFSET (0x10) -#define MPI2_INIT_IMAGE_SIGNATURE2 (0x5AEAA55A) - -/*Signature fields as individual bytes */ -#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_0 (0xEA) -#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_1 (0x5A) -#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_2 (0xA5) -#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_3 (0x5A) - -#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_4 (0xA5) -#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_5 (0xEA) -#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_6 (0x5A) -#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_7 (0xA5) - -#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_8 (0x5A) -#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_9 (0xA5) -#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_A (0xEA) -#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_B (0x5A) - -/*defines for the ResetVector field */ -#define MPI2_INIT_IMAGE_RESETVECTOR_OFFSET (0x14) - - -/* Encrypted Hash Extended Image Data */ - -typedef struct _MPI25_ENCRYPTED_HASH_ENTRY { - U8 HashImageType; /* 0x00 */ - U8 HashAlgorithm; /* 0x01 */ - U8 EncryptionAlgorithm; /* 0x02 */ - U8 Reserved1; /* 0x03 */ - U32 Reserved2; /* 0x04 */ - U32 EncryptedHash[1]; /* 0x08 */ /* variable length */ -} MPI25_ENCRYPTED_HASH_ENTRY, *PTR_MPI25_ENCRYPTED_HASH_ENTRY, -Mpi25EncryptedHashEntry_t, *pMpi25EncryptedHashEntry_t; - -/* values for HashImageType */ -#define MPI25_HASH_IMAGE_TYPE_UNUSED (0x00) -#define MPI25_HASH_IMAGE_TYPE_FIRMWARE (0x01) -#define MPI25_HASH_IMAGE_TYPE_BIOS (0x02) - -/* values for HashAlgorithm */ -#define MPI25_HASH_ALGORITHM_UNUSED (0x00) -#define MPI25_HASH_ALGORITHM_SHA256 (0x01) - -/* values for EncryptionAlgorithm */ -#define MPI25_ENCRYPTION_ALG_UNUSED (0x00) -#define MPI25_ENCRYPTION_ALG_RSA256 (0x01) - -typedef struct _MPI25_ENCRYPTED_HASH_DATA { - U8 ImageVersion; /* 0x00 */ - U8 NumHash; /* 0x01 */ - U16 Reserved1; /* 0x02 */ - U32 Reserved2; /* 0x04 */ - MPI25_ENCRYPTED_HASH_ENTRY EncryptedHashEntry[1]; /* 0x08 */ -} MPI25_ENCRYPTED_HASH_DATA, *PTR_MPI25_ENCRYPTED_HASH_DATA, -Mpi25EncryptedHashData_t, *pMpi25EncryptedHashData_t; - /**************************************************************************** * PowerManagementControl message diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_pci.h b/drivers/scsi/mpt3sas/mpi/mpi2_pci.h index f0281f943ec9..63a09509d7d1 100644 --- a/drivers/scsi/mpt3sas/mpi/mpi2_pci.h +++ b/drivers/scsi/mpt3sas/mpi/mpi2_pci.h @@ -1,12 +1,12 @@ /* - * Copyright 2012-2015 Avago Technologies. All rights reserved. + * Copyright 2000-2020 Broadcom Inc. All rights reserved. * * * Name: mpi2_pci.h * Title: MPI PCIe Attached Devices structures and definitions. * Creation Date: October 9, 2012 * - * mpi2_pci.h Version: 02.00.02 + * mpi2_pci.h Version: 02.00.03 * * NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25 * prefix are for use only on MPI v2.5 products, and must not be used @@ -23,6 +23,7 @@ * Removed SOP support. * 07-01-16 02.00.02 Added MPI26_NVME_FLAGS_FORCE_ADMIN_ERR_RESP to * NVME Encapsulated Request. + * 07-22-18 02.00.03 Updted flags field for NVME Encapsulated req * -------------------------------------------------------------------------- */ @@ -75,10 +76,10 @@ typedef struct _MPI26_NVME_ENCAPSULATED_REQUEST { #define MPI26_NVME_FLAGS_SUBMISSIONQ_ADMIN (0x0010) /*Error Response Address Space */ #define MPI26_NVME_FLAGS_MASK_ERROR_RSP_ADDR (0x000C) +#define MPI26_NVME_FLAGS_MASK_ERROR_RSP_ADDR_MASK (0x000C) #define MPI26_NVME_FLAGS_SYSTEM_RSP_ADDR (0x0000) -#define MPI26_NVME_FLAGS_IOCPLB_RSP_ADDR (0x0008) -#define MPI26_NVME_FLAGS_IOCPLBNTA_RSP_ADDR (0x000C) -/*Data Direction*/ +#define MPI26_NVME_FLAGS_IOCCTL_RSP_ADDR (0x0008) +/* Data Direction*/ #define MPI26_NVME_FLAGS_DATADIRECTION_MASK (0x0003) #define MPI26_NVME_FLAGS_NODATATRANSFER (0x0000) #define MPI26_NVME_FLAGS_WRITE (0x0001) diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_raid.h b/drivers/scsi/mpt3sas/mpi/mpi2_raid.h index b9bb1c178f12..b770eb516c14 100644 --- a/drivers/scsi/mpt3sas/mpi/mpi2_raid.h +++ b/drivers/scsi/mpt3sas/mpi/mpi2_raid.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright 2000-2014 Avago Technologies. All rights reserved. + * Copyright 2000-2020 Broadcom Inc. All rights reserved. * * * Name: mpi2_raid.h diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_sas.h b/drivers/scsi/mpt3sas/mpi/mpi2_sas.h index afa17ff246b4..16c922a8a02b 100644 --- a/drivers/scsi/mpt3sas/mpi/mpi2_sas.h +++ b/drivers/scsi/mpt3sas/mpi/mpi2_sas.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright 2000-2015 Avago Technologies. All rights reserved. + * Copyright 2000-2020 Broadcom Inc. All rights reserved. * * * Name: mpi2_sas.h diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_tool.h b/drivers/scsi/mpt3sas/mpi/mpi2_tool.h index 629296ee9236..3f966b6796b3 100644 --- a/drivers/scsi/mpt3sas/mpi/mpi2_tool.h +++ b/drivers/scsi/mpt3sas/mpi/mpi2_tool.h @@ -1,13 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright 2000-2014 Avago Technologies. All rights reserved. + * Copyright 2000-2020 Broadcom Inc. All rights reserved. * * * Name: mpi2_tool.h * Title: MPI diagnostic tool structures and definitions * Creation Date: March 26, 2007 * - * mpi2_tool.h Version: 02.00.14 + * mpi2_tool.h Version: 02.00.15 * * Version History * --------------- @@ -38,6 +38,8 @@ * 11-18-14 02.00.13 Updated copyright information. * 08-25-16 02.00.14 Added new values for the Flags field of Toolbox Clean * Tool Request Message. + * 07-22-18 02.00.15 Added defines for new TOOLBOX_PCIE_LANE_MARGINING tool. + * Added option for DeviceInfo field in ISTWI tool. * -------------------------------------------------------------------------- */ @@ -58,6 +60,7 @@ #define MPI2_TOOLBOX_BEACON_TOOL (0x05) #define MPI2_TOOLBOX_DIAGNOSTIC_CLI_TOOL (0x06) #define MPI2_TOOLBOX_TEXT_DISPLAY_TOOL (0x07) +#define MPI26_TOOLBOX_BACKEND_PCIE_LANE_MARGIN (0x08) /**************************************************************************** * Toolbox reply @@ -226,6 +229,13 @@ typedef struct _MPI2_TOOLBOX_ISTWI_READ_WRITE_REQUEST { #define MPI2_TOOL_ISTWI_FLAG_AUTO_RESERVE_RELEASE (0x80) #define MPI2_TOOL_ISTWI_FLAG_PAGE_ADDR_MASK (0x07) +/*MPI26 TOOLBOX Request MsgFlags defines */ +#define MPI26_TOOLBOX_REQ_MSGFLAGS_ADDRESSING_MASK (0x01) +/*Request uses Man Page 43 device index addressing */ +#define MPI26_TOOLBOX_REQ_MSGFLAGS_ADDRESSING_DEVINDEX (0x00) +/*Request uses Man Page 43 device info struct addressing */ +#define MPI26_TOOLBOX_REQ_MSGFLAGS_ADDRESSING_DEVINFO (0x01) + /*Toolbox ISTWI Read Write Tool reply message */ typedef struct _MPI2_TOOLBOX_ISTWI_REPLY { U8 Tool; /*0x00 */ @@ -387,6 +397,64 @@ Mpi2ToolboxTextDisplayRequest_t, #define MPI2_TOOLBOX_CONSOLE_FLAG_TIMESTAMP (0x01) +/*************************************************************************** + * Toolbox Backend Lane Margining Tool + *************************************************************************** + */ + +/*Toolbox Backend Lane Margining Tool request message */ +typedef struct _MPI26_TOOLBOX_LANE_MARGINING_REQUEST { + U8 Tool; /*0x00 */ + U8 Reserved1; /*0x01 */ + U8 ChainOffset; /*0x02 */ + U8 Function; /*0x03 */ + U16 Reserved2; /*0x04 */ + U8 Reserved3; /*0x06 */ + U8 MsgFlags; /*0x07 */ + U8 VP_ID; /*0x08 */ + U8 VF_ID; /*0x09 */ + U16 Reserved4; /*0x0A */ + U8 Command; /*0x0C */ + U8 SwitchPort; /*0x0D */ + U16 DevHandle; /*0x0E */ + U8 RegisterOffset; /*0x10 */ + U8 Reserved5; /*0x11 */ + U16 DataLength; /*0x12 */ + MPI25_SGE_IO_UNION SGL; /*0x14 */ +} MPI26_TOOLBOX_LANE_MARGINING_REQUEST, + *PTR_MPI2_TOOLBOX_LANE_MARGINING_REQUEST, + Mpi26ToolboxLaneMarginingRequest_t, + *pMpi2ToolboxLaneMarginingRequest_t; + +/* defines for the Command field */ +#define MPI26_TOOL_MARGIN_COMMAND_ENTER_MARGIN_MODE (0x01) +#define MPI26_TOOL_MARGIN_COMMAND_READ_REGISTER_DATA (0x02) +#define MPI26_TOOL_MARGIN_COMMAND_WRITE_REGISTER_DATA (0x03) +#define MPI26_TOOL_MARGIN_COMMAND_EXIT_MARGIN_MODE (0x04) + + +/*Toolbox Backend Lane Margining Tool reply message */ +typedef struct _MPI26_TOOLBOX_LANE_MARGINING_REPLY { + U8 Tool; /*0x00 */ + U8 Reserved1; /*0x01 */ + U8 MsgLength; /*0x02 */ + U8 Function; /*0x03 */ + U16 Reserved2; /*0x04 */ + U8 Reserved3; /*0x06 */ + U8 MsgFlags; /*0x07 */ + U8 VP_ID; /*0x08 */ + U8 VF_ID; /*0x09 */ + U16 Reserved4; /*0x0A */ + U16 Reserved5; /*0x0C */ + U16 IOCStatus; /*0x0E */ + U32 IOCLogInfo; /*0x10 */ + U16 ReturnedDataLength; /*0x14 */ + U16 Reserved6; /*0x16 */ +} MPI26_TOOLBOX_LANE_MARGINING_REPLY, + *PTR_MPI26_TOOLBOX_LANE_MARGINING_REPLY, + Mpi26ToolboxLaneMarginingReply_t, + *pMpi26ToolboxLaneMarginingReply_t; + /***************************************************************************** * diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 2500377d0723..0a6cb8f0680c 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -157,6 +157,32 @@ module_param_call(mpt3sas_fwfault_debug, _scsih_set_fwfault_debug, param_get_int, &mpt3sas_fwfault_debug, 0644); /** + * _base_readl_aero - retry readl for max three times. + * @addr - MPT Fusion system interface register address + * + * Retry the readl() for max three times if it gets zero value + * while reading the system interface register. + */ +static inline u32 +_base_readl_aero(const volatile void __iomem *addr) +{ + u32 i = 0, ret_val; + + do { + ret_val = readl(addr); + i++; + } while (ret_val == 0 && i < 3); + + return ret_val; +} + +static inline u32 +_base_readl(const volatile void __iomem *addr) +{ + return readl(addr); +} + +/** * _base_clone_reply_to_sys_mem - copies reply to reply free iomem * in BAR0 space. * @@ -716,7 +742,7 @@ mpt3sas_halt_firmware(struct MPT3SAS_ADAPTER *ioc) dump_stack(); - doorbell = readl(&ioc->chip->Doorbell); + doorbell = ioc->base_readl(&ioc->chip->Doorbell); if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) mpt3sas_base_fault_info(ioc , doorbell); else { @@ -1325,10 +1351,10 @@ _base_mask_interrupts(struct MPT3SAS_ADAPTER *ioc) u32 him_register; ioc->mask_interrupts = 1; - him_register = readl(&ioc->chip->HostInterruptMask); + him_register = ioc->base_readl(&ioc->chip->HostInterruptMask); him_register |= MPI2_HIM_DIM + MPI2_HIM_RIM + MPI2_HIM_RESET_IRQ_MASK; writel(him_register, &ioc->chip->HostInterruptMask); - readl(&ioc->chip->HostInterruptMask); + ioc->base_readl(&ioc->chip->HostInterruptMask); } /** @@ -1342,7 +1368,7 @@ _base_unmask_interrupts(struct MPT3SAS_ADAPTER *ioc) { u32 him_register; - him_register = readl(&ioc->chip->HostInterruptMask); + him_register = ioc->base_readl(&ioc->chip->HostInterruptMask); him_register &= ~MPI2_HIM_RIM; writel(him_register, &ioc->chip->HostInterruptMask); ioc->mask_interrupts = 0; @@ -3319,8 +3345,9 @@ _base_mpi_ep_writeq(__u64 b, volatile void __iomem *addr, static inline void _base_writeq(__u64 b, volatile void __iomem *addr, spinlock_t *writeq_lock) { + wmb(); __raw_writeq(b, addr); - mmiowb(); + barrier(); } #else static inline void @@ -4060,7 +4087,7 @@ _base_static_config_pages(struct MPT3SAS_ADAPTER *ioc) * flag unset in NVDATA. */ mpt3sas_config_get_manufacturing_pg11(ioc, &mpi_reply, &ioc->manu_pg11); - if (ioc->manu_pg11.EEDPTagMode == 0) { + if (!ioc->is_gen35_ioc && ioc->manu_pg11.EEDPTagMode == 0) { pr_err("%s: overriding NVDATA EEDPTagMode setting\n", ioc->name); ioc->manu_pg11.EEDPTagMode &= ~0x3; @@ -4854,7 +4881,7 @@ mpt3sas_base_get_iocstate(struct MPT3SAS_ADAPTER *ioc, int cooked) { u32 s, sc; - s = readl(&ioc->chip->Doorbell); + s = ioc->base_readl(&ioc->chip->Doorbell); sc = s & MPI2_IOC_STATE_MASK; return cooked ? sc : s; } @@ -4910,7 +4937,7 @@ _base_wait_for_doorbell_int(struct MPT3SAS_ADAPTER *ioc, int timeout) count = 0; cntdn = 1000 * timeout; do { - int_status = readl(&ioc->chip->HostInterruptStatus); + int_status = ioc->base_readl(&ioc->chip->HostInterruptStatus); if (int_status & MPI2_HIS_IOC2SYS_DB_STATUS) { dhsprintk(ioc, ioc_info(ioc, "%s: successful count(%d), timeout(%d)\n", @@ -4936,7 +4963,7 @@ _base_spin_on_doorbell_int(struct MPT3SAS_ADAPTER *ioc, int timeout) count = 0; cntdn = 2000 * timeout; do { - int_status = readl(&ioc->chip->HostInterruptStatus); + int_status = ioc->base_readl(&ioc->chip->HostInterruptStatus); if (int_status & MPI2_HIS_IOC2SYS_DB_STATUS) { dhsprintk(ioc, ioc_info(ioc, "%s: successful count(%d), timeout(%d)\n", @@ -4974,14 +5001,14 @@ _base_wait_for_doorbell_ack(struct MPT3SAS_ADAPTER *ioc, int timeout) count = 0; cntdn = 1000 * timeout; do { - int_status = readl(&ioc->chip->HostInterruptStatus); + int_status = ioc->base_readl(&ioc->chip->HostInterruptStatus); if (!(int_status & MPI2_HIS_SYS2IOC_DB_STATUS)) { dhsprintk(ioc, ioc_info(ioc, "%s: successful count(%d), timeout(%d)\n", __func__, count, timeout)); return 0; } else if (int_status & MPI2_HIS_IOC2SYS_DB_STATUS) { - doorbell = readl(&ioc->chip->Doorbell); + doorbell = ioc->base_readl(&ioc->chip->Doorbell); if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { mpt3sas_base_fault_info(ioc , doorbell); @@ -5016,7 +5043,7 @@ _base_wait_for_doorbell_not_used(struct MPT3SAS_ADAPTER *ioc, int timeout) count = 0; cntdn = 1000 * timeout; do { - doorbell_reg = readl(&ioc->chip->Doorbell); + doorbell_reg = ioc->base_readl(&ioc->chip->Doorbell); if (!(doorbell_reg & MPI2_DOORBELL_USED)) { dhsprintk(ioc, ioc_info(ioc, "%s: successful count(%d), timeout(%d)\n", @@ -5078,6 +5105,39 @@ _base_send_ioc_reset(struct MPT3SAS_ADAPTER *ioc, u8 reset_type, int timeout) } /** + * mpt3sas_wait_for_ioc - IOC's operational state is checked here. + * @ioc: per adapter object + * @wait_count: timeout in seconds + * + * Return: Waits up to timeout seconds for the IOC to + * become operational. Returns 0 if IOC is present + * and operational; otherwise returns -EFAULT. + */ + +int +mpt3sas_wait_for_ioc(struct MPT3SAS_ADAPTER *ioc, int timeout) +{ + int wait_state_count = 0; + u32 ioc_state; + + do { + ioc_state = mpt3sas_base_get_iocstate(ioc, 1); + if (ioc_state == MPI2_IOC_STATE_OPERATIONAL) + break; + ssleep(1); + ioc_info(ioc, "%s: waiting for operational state(count=%d)\n", + __func__, ++wait_state_count); + } while (--timeout); + if (!timeout) { + ioc_err(ioc, "%s: failed due to ioc not operational\n", __func__); + return -EFAULT; + } + if (wait_state_count) + ioc_info(ioc, "ioc is operational\n"); + return 0; +} + +/** * _base_handshake_req_reply_wait - send request thru doorbell interface * @ioc: per adapter object * @request_bytes: request length @@ -5098,13 +5158,13 @@ _base_handshake_req_reply_wait(struct MPT3SAS_ADAPTER *ioc, int request_bytes, __le32 *mfp; /* make sure doorbell is not in use */ - if ((readl(&ioc->chip->Doorbell) & MPI2_DOORBELL_USED)) { + if ((ioc->base_readl(&ioc->chip->Doorbell) & MPI2_DOORBELL_USED)) { ioc_err(ioc, "doorbell is in use (line=%d)\n", __LINE__); return -EFAULT; } /* clear pending doorbell interrupts from previous state changes */ - if (readl(&ioc->chip->HostInterruptStatus) & + if (ioc->base_readl(&ioc->chip->HostInterruptStatus) & MPI2_HIS_IOC2SYS_DB_STATUS) writel(0, &ioc->chip->HostInterruptStatus); @@ -5147,7 +5207,7 @@ _base_handshake_req_reply_wait(struct MPT3SAS_ADAPTER *ioc, int request_bytes, } /* read the first two 16-bits, it gives the total length of the reply */ - reply[0] = le16_to_cpu(readl(&ioc->chip->Doorbell) + reply[0] = le16_to_cpu(ioc->base_readl(&ioc->chip->Doorbell) & MPI2_DOORBELL_DATA_MASK); writel(0, &ioc->chip->HostInterruptStatus); if ((_base_wait_for_doorbell_int(ioc, 5))) { @@ -5155,7 +5215,7 @@ _base_handshake_req_reply_wait(struct MPT3SAS_ADAPTER *ioc, int request_bytes, __LINE__); return -EFAULT; } - reply[1] = le16_to_cpu(readl(&ioc->chip->Doorbell) + reply[1] = le16_to_cpu(ioc->base_readl(&ioc->chip->Doorbell) & MPI2_DOORBELL_DATA_MASK); writel(0, &ioc->chip->HostInterruptStatus); @@ -5166,9 +5226,10 @@ _base_handshake_req_reply_wait(struct MPT3SAS_ADAPTER *ioc, int request_bytes, return -EFAULT; } if (i >= reply_bytes/2) /* overflow case */ - readl(&ioc->chip->Doorbell); + ioc->base_readl(&ioc->chip->Doorbell); else - reply[i] = le16_to_cpu(readl(&ioc->chip->Doorbell) + reply[i] = le16_to_cpu( + ioc->base_readl(&ioc->chip->Doorbell) & MPI2_DOORBELL_DATA_MASK); writel(0, &ioc->chip->HostInterruptStatus); } @@ -5211,11 +5272,9 @@ mpt3sas_base_sas_iounit_control(struct MPT3SAS_ADAPTER *ioc, Mpi2SasIoUnitControlRequest_t *mpi_request) { u16 smid; - u32 ioc_state; u8 issue_reset = 0; int rc; void *request; - u16 wait_state_count; dinitprintk(ioc, ioc_info(ioc, "%s\n", __func__)); @@ -5227,20 +5286,9 @@ mpt3sas_base_sas_iounit_control(struct MPT3SAS_ADAPTER *ioc, goto out; } - wait_state_count = 0; - ioc_state = mpt3sas_base_get_iocstate(ioc, 1); - while (ioc_state != MPI2_IOC_STATE_OPERATIONAL) { - if (wait_state_count++ == 10) { - ioc_err(ioc, "%s: failed due to ioc not operational\n", - __func__); - rc = -EFAULT; - goto out; - } - ssleep(1); - ioc_state = mpt3sas_base_get_iocstate(ioc, 1); - ioc_info(ioc, "%s: waiting for operational state(count=%d)\n", - __func__, wait_state_count); - } + rc = mpt3sas_wait_for_ioc(ioc, IOC_OPERATIONAL_WAIT_COUNT); + if (rc) + goto out; smid = mpt3sas_base_get_smid(ioc, ioc->base_cb_idx); if (!smid) { @@ -5306,11 +5354,9 @@ mpt3sas_base_scsi_enclosure_processor(struct MPT3SAS_ADAPTER *ioc, Mpi2SepReply_t *mpi_reply, Mpi2SepRequest_t *mpi_request) { u16 smid; - u32 ioc_state; u8 issue_reset = 0; int rc; void *request; - u16 wait_state_count; dinitprintk(ioc, ioc_info(ioc, "%s\n", __func__)); @@ -5322,20 +5368,9 @@ mpt3sas_base_scsi_enclosure_processor(struct MPT3SAS_ADAPTER *ioc, goto out; } - wait_state_count = 0; - ioc_state = mpt3sas_base_get_iocstate(ioc, 1); - while (ioc_state != MPI2_IOC_STATE_OPERATIONAL) { - if (wait_state_count++ == 10) { - ioc_err(ioc, "%s: failed due to ioc not operational\n", - __func__); - rc = -EFAULT; - goto out; - } - ssleep(1); - ioc_state = mpt3sas_base_get_iocstate(ioc, 1); - ioc_info(ioc, "%s: waiting for operational state(count=%d)\n", - __func__, wait_state_count); - } + rc = mpt3sas_wait_for_ioc(ioc, IOC_OPERATIONAL_WAIT_COUNT); + if (rc) + goto out; smid = mpt3sas_base_get_smid(ioc, ioc->base_cb_idx); if (!smid) { @@ -6020,14 +6055,14 @@ _base_diag_reset(struct MPT3SAS_ADAPTER *ioc) if (count++ > 20) goto out; - host_diagnostic = readl(&ioc->chip->HostDiagnostic); + host_diagnostic = ioc->base_readl(&ioc->chip->HostDiagnostic); drsprintk(ioc, ioc_info(ioc, "wrote magic sequence: count(%d), host_diagnostic(0x%08x)\n", count, host_diagnostic)); } while ((host_diagnostic & MPI2_DIAG_DIAG_WRITE_ENABLE) == 0); - hcb_size = readl(&ioc->chip->HCBSize); + hcb_size = ioc->base_readl(&ioc->chip->HCBSize); drsprintk(ioc, ioc_info(ioc, "diag reset: issued\n")); writel(host_diagnostic | MPI2_DIAG_RESET_ADAPTER, @@ -6040,7 +6075,7 @@ _base_diag_reset(struct MPT3SAS_ADAPTER *ioc) for (count = 0; count < (300000000 / MPI2_HARD_RESET_PCIE_SECOND_READ_DELAY_MICRO_SEC); count++) { - host_diagnostic = readl(&ioc->chip->HostDiagnostic); + host_diagnostic = ioc->base_readl(&ioc->chip->HostDiagnostic); if (host_diagnostic == 0xFFFFFFFF) goto out; @@ -6391,6 +6426,10 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc) ioc->rdpq_array_enable_assigned = 0; ioc->dma_mask = 0; + if (ioc->is_aero_ioc) + ioc->base_readl = &_base_readl_aero; + else + ioc->base_readl = &_base_readl; r = mpt3sas_base_map_resources(ioc); if (r) goto out_free_resources; diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index 8f1d6b071b39..800351932cc3 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -55,6 +55,7 @@ #include "mpi/mpi2_tool.h" #include "mpi/mpi2_sas.h" #include "mpi/mpi2_pci.h" +#include "mpi/mpi2_image.h" #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> @@ -74,9 +75,9 @@ #define MPT3SAS_DRIVER_NAME "mpt3sas" #define MPT3SAS_AUTHOR "Avago Technologies <MPT-FusionLinux.pdl@avagotech.com>" #define MPT3SAS_DESCRIPTION "LSI MPT Fusion SAS 3.0 Device Driver" -#define MPT3SAS_DRIVER_VERSION "26.100.00.00" -#define MPT3SAS_MAJOR_VERSION 26 -#define MPT3SAS_MINOR_VERSION 100 +#define MPT3SAS_DRIVER_VERSION "27.101.00.00" +#define MPT3SAS_MAJOR_VERSION 27 +#define MPT3SAS_MINOR_VERSION 101 #define MPT3SAS_BUILD_VERSION 0 #define MPT3SAS_RELEASE_VERSION 00 @@ -139,6 +140,9 @@ #define DEFAULT_NUM_FWCHAIN_ELEMTS 8 #define FW_IMG_HDR_READ_TIMEOUT 15 + +#define IOC_OPERATIONAL_WAIT_COUNT 10 + /* * NVMe defines */ @@ -908,6 +912,7 @@ typedef void (*NVME_BUILD_PRP)(struct MPT3SAS_ADAPTER *ioc, u16 smid, typedef void (*PUT_SMID_IO_FP_HIP) (struct MPT3SAS_ADAPTER *ioc, u16 smid, u16 funcdep); typedef void (*PUT_SMID_DEFAULT) (struct MPT3SAS_ADAPTER *ioc, u16 smid); +typedef u32 (*BASE_READ_REG) (const volatile void __iomem *addr); /* IOC Facts and Port Facts converted from little endian to cpu */ union mpi3_version_union { @@ -1388,6 +1393,7 @@ struct MPT3SAS_ADAPTER { u8 hide_drives; spinlock_t diag_trigger_lock; u8 diag_trigger_active; + BASE_READ_REG base_readl; struct SL_WH_MASTER_TRIGGER_T diag_trigger_master; struct SL_WH_EVENT_TRIGGERS_T diag_trigger_event; struct SL_WH_SCSI_TRIGGERS_T diag_trigger_scsi; @@ -1395,6 +1401,7 @@ struct MPT3SAS_ADAPTER { void *device_remove_in_progress; u16 device_remove_in_progress_sz; u8 is_gen35_ioc; + u8 is_aero_ioc; PUT_SMID_IO_FP_HIP put_smid_scsi_io; }; @@ -1487,6 +1494,7 @@ mpt3sas_wait_for_commands_to_complete(struct MPT3SAS_ADAPTER *ioc); u8 mpt3sas_base_check_cmd_timeout(struct MPT3SAS_ADAPTER *ioc, u8 status, void *mpi_request, int sz); +int mpt3sas_wait_for_ioc(struct MPT3SAS_ADAPTER *ioc, int wait_count); /* scsih shared API */ struct scsi_cmnd *mpt3sas_scsih_scsi_lookup_get(struct MPT3SAS_ADAPTER *ioc, diff --git a/drivers/scsi/mpt3sas/mpt3sas_config.c b/drivers/scsi/mpt3sas/mpt3sas_config.c index 02209447f4ef..fb0a17252f86 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_config.c +++ b/drivers/scsi/mpt3sas/mpt3sas_config.c @@ -119,7 +119,7 @@ _config_display_some_debug(struct MPT3SAS_ADAPTER *ioc, u16 smid, desc = "raid_volume"; break; case MPI2_CONFIG_PAGETYPE_MANUFACTURING: - desc = "manufaucturing"; + desc = "manufacturing"; break; case MPI2_CONFIG_PAGETYPE_RAID_PHYSDISK: desc = "physdisk"; @@ -300,11 +300,9 @@ _config_request(struct MPT3SAS_ADAPTER *ioc, Mpi2ConfigRequest_t void *config_page, u16 config_page_sz) { u16 smid; - u32 ioc_state; Mpi2ConfigRequest_t *config_request; int r; u8 retry_count, issue_host_reset = 0; - u16 wait_state_count; struct config_request mem; u32 ioc_status = UINT_MAX; @@ -361,23 +359,10 @@ _config_request(struct MPT3SAS_ADAPTER *ioc, Mpi2ConfigRequest_t ioc_info(ioc, "%s: attempting retry (%d)\n", __func__, retry_count); } - wait_state_count = 0; - ioc_state = mpt3sas_base_get_iocstate(ioc, 1); - while (ioc_state != MPI2_IOC_STATE_OPERATIONAL) { - if (wait_state_count++ == MPT3_CONFIG_PAGE_DEFAULT_TIMEOUT) { - ioc_err(ioc, "%s: failed due to ioc not operational\n", - __func__); - ioc->config_cmds.status = MPT3_CMD_NOT_USED; - r = -EFAULT; - goto free_mem; - } - ssleep(1); - ioc_state = mpt3sas_base_get_iocstate(ioc, 1); - ioc_info(ioc, "%s: waiting for operational state(count=%d)\n", - __func__, wait_state_count); - } - if (wait_state_count) - ioc_info(ioc, "%s: ioc is operational\n", __func__); + + r = mpt3sas_wait_for_ioc(ioc, MPT3_CONFIG_PAGE_DEFAULT_TIMEOUT); + if (r) + goto free_mem; smid = mpt3sas_base_get_smid(ioc, ioc->config_cb_idx); if (!smid) { @@ -673,10 +658,6 @@ mpt3sas_config_set_manufacturing_pg11(struct MPT3SAS_ADAPTER *ioc, r = _config_request(ioc, &mpi_request, mpi_reply, MPT3_CONFIG_PAGE_DEFAULT_TIMEOUT, config_page, sizeof(*config_page)); - mpi_request.Action = MPI2_CONFIG_ACTION_PAGE_WRITE_NVRAM; - r = _config_request(ioc, &mpi_request, mpi_reply, - MPT3_CONFIG_PAGE_DEFAULT_TIMEOUT, config_page, - sizeof(*config_page)); out: return r; } diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c index 4afa597cbfba..b2bb47c14d35 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c @@ -641,7 +641,6 @@ _ctl_do_mpt_command(struct MPT3SAS_ADAPTER *ioc, struct mpt3_ioctl_command karg, MPI2DefaultReply_t *mpi_reply; Mpi26NVMeEncapsulatedRequest_t *nvme_encap_request = NULL; struct _pcie_device *pcie_device = NULL; - u32 ioc_state; u16 smid; u8 timeout; u8 issue_reset; @@ -654,7 +653,6 @@ _ctl_do_mpt_command(struct MPT3SAS_ADAPTER *ioc, struct mpt3_ioctl_command karg, dma_addr_t data_in_dma = 0; size_t data_in_sz = 0; long ret; - u16 wait_state_count; u16 device_handle = MPT3SAS_INVALID_DEVICE_HANDLE; u8 tr_method = MPI26_SCSITASKMGMT_MSGFLAGS_PROTOCOL_LVL_RST_PCIE; @@ -666,22 +664,9 @@ _ctl_do_mpt_command(struct MPT3SAS_ADAPTER *ioc, struct mpt3_ioctl_command karg, goto out; } - wait_state_count = 0; - ioc_state = mpt3sas_base_get_iocstate(ioc, 1); - while (ioc_state != MPI2_IOC_STATE_OPERATIONAL) { - if (wait_state_count++ == 10) { - ioc_err(ioc, "%s: failed due to ioc not operational\n", - __func__); - ret = -EFAULT; - goto out; - } - ssleep(1); - ioc_state = mpt3sas_base_get_iocstate(ioc, 1); - ioc_info(ioc, "%s: waiting for operational state(count=%d)\n", - __func__, wait_state_count); - } - if (wait_state_count) - ioc_info(ioc, "%s: ioc is operational\n", __func__); + ret = mpt3sas_wait_for_ioc(ioc, IOC_OPERATIONAL_WAIT_COUNT); + if (ret) + goto out; mpi_request = kzalloc(ioc->request_sz, GFP_KERNEL); if (!mpi_request) { diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 03c52847ed07..6be39dc27103 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -3748,6 +3748,40 @@ _scsih_tm_tr_complete(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, return _scsih_check_for_pending_tm(ioc, smid); } +/** _scsih_allow_scmd_to_device - check whether scmd needs to + * issue to IOC or not. + * @ioc: per adapter object + * @scmd: pointer to scsi command object + * + * Returns true if scmd can be issued to IOC otherwise returns false. + */ +inline bool _scsih_allow_scmd_to_device(struct MPT3SAS_ADAPTER *ioc, + struct scsi_cmnd *scmd) +{ + + if (ioc->pci_error_recovery) + return false; + + if (ioc->hba_mpi_version_belonged == MPI2_VERSION) { + if (ioc->remove_host) + return false; + + return true; + } + + if (ioc->remove_host) { + + switch (scmd->cmnd[0]) { + case SYNCHRONIZE_CACHE: + case START_STOP: + return true; + default: + return false; + } + } + + return true; +} /** * _scsih_sas_control_complete - completion routine @@ -4571,7 +4605,7 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd) return 0; } - if (ioc->pci_error_recovery || ioc->remove_host) { + if (!(_scsih_allow_scmd_to_device(ioc, scmd))) { scmd->result = DID_NO_CONNECT << 16; scmd->scsi_done(scmd); return 0; @@ -9641,6 +9675,7 @@ static void scsih_remove(struct pci_dev *pdev) /* release all the volumes */ _scsih_ir_shutdown(ioc); + sas_remove_host(shost); list_for_each_entry_safe(raid_device, next, &ioc->raid_device_list, list) { if (raid_device->starget) { @@ -9682,7 +9717,6 @@ static void scsih_remove(struct pci_dev *pdev) ioc->sas_hba.num_phys = 0; } - sas_remove_host(shost); mpt3sas_base_detach(ioc); spin_lock(&gioc_lock); list_del(&ioc->list); @@ -10139,7 +10173,6 @@ static struct scsi_host_template mpt2sas_driver_template = { .sg_tablesize = MPT2SAS_SG_DEPTH, .max_sectors = 32767, .cmd_per_lun = 7, - .use_clustering = ENABLE_CLUSTERING, .shost_attrs = mpt3sas_host_attrs, .sdev_attrs = mpt3sas_dev_attrs, .track_queue_depth = 1, @@ -10178,7 +10211,6 @@ static struct scsi_host_template mpt3sas_driver_template = { .sg_tablesize = MPT3SAS_SG_DEPTH, .max_sectors = 32767, .cmd_per_lun = 7, - .use_clustering = ENABLE_CLUSTERING, .shost_attrs = mpt3sas_host_attrs, .sdev_attrs = mpt3sas_dev_attrs, .track_queue_depth = 1, @@ -10250,6 +10282,10 @@ _scsih_determine_hba_mpi_version(struct pci_dev *pdev) case MPI26_MFGPAGE_DEVID_SAS3516_1: case MPI26_MFGPAGE_DEVID_SAS3416: case MPI26_MFGPAGE_DEVID_SAS3616: + case MPI26_MFGPAGE_DEVID_CFG_SEC_3916: + case MPI26_MFGPAGE_DEVID_HARD_SEC_3916: + case MPI26_MFGPAGE_DEVID_CFG_SEC_3816: + case MPI26_MFGPAGE_DEVID_HARD_SEC_3816: return MPI26_VERSION; } return 0; @@ -10337,8 +10373,17 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id) case MPI26_MFGPAGE_DEVID_SAS3616: ioc->is_gen35_ioc = 1; break; + case MPI26_MFGPAGE_DEVID_CFG_SEC_3816: + case MPI26_MFGPAGE_DEVID_CFG_SEC_3916: + dev_info(&pdev->dev, + "HBA is in Configurable Secure mode\n"); + /* fall through */ + case MPI26_MFGPAGE_DEVID_HARD_SEC_3816: + case MPI26_MFGPAGE_DEVID_HARD_SEC_3916: + ioc->is_aero_ioc = ioc->is_gen35_ioc = 1; + break; default: - ioc->is_gen35_ioc = 0; + ioc->is_gen35_ioc = ioc->is_aero_ioc = 0; } if ((ioc->hba_mpi_version_belonged == MPI25_VERSION && pdev->revision >= SAS3_PCI_DEVICE_C0_REVISION) || @@ -10795,6 +10840,23 @@ static const struct pci_device_id mpt3sas_pci_table[] = { /* Mercator ~ 3616*/ { MPI2_MFGPAGE_VENDORID_LSI, MPI26_MFGPAGE_DEVID_SAS3616, PCI_ANY_ID, PCI_ANY_ID }, + + /* Aero SI 0x00E1 Configurable Secure + * 0x00E2 Hard Secure + */ + { MPI2_MFGPAGE_VENDORID_LSI, MPI26_MFGPAGE_DEVID_CFG_SEC_3916, + PCI_ANY_ID, PCI_ANY_ID }, + { MPI2_MFGPAGE_VENDORID_LSI, MPI26_MFGPAGE_DEVID_HARD_SEC_3916, + PCI_ANY_ID, PCI_ANY_ID }, + + /* Sea SI 0x00E5 Configurable Secure + * 0x00E6 Hard Secure + */ + { MPI2_MFGPAGE_VENDORID_LSI, MPI26_MFGPAGE_DEVID_CFG_SEC_3816, + PCI_ANY_ID, PCI_ANY_ID }, + { MPI2_MFGPAGE_VENDORID_LSI, MPI26_MFGPAGE_DEVID_HARD_SEC_3816, + PCI_ANY_ID, PCI_ANY_ID }, + {0} /* Terminating entry */ }; MODULE_DEVICE_TABLE(pci, mpt3sas_pci_table); diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c index 6a8a3c09b4b1..60ae2d0feb2b 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_transport.c +++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c @@ -296,7 +296,6 @@ _transport_expander_report_manufacture(struct MPT3SAS_ADAPTER *ioc, struct rep_manu_request *manufacture_request; int rc; u16 smid; - u32 ioc_state; void *psge; u8 issue_reset = 0; void *data_out = NULL; @@ -304,7 +303,6 @@ _transport_expander_report_manufacture(struct MPT3SAS_ADAPTER *ioc, dma_addr_t data_in_dma; size_t data_in_sz; size_t data_out_sz; - u16 wait_state_count; if (ioc->shost_recovery || ioc->pci_error_recovery) { ioc_info(ioc, "%s: host reset in progress!\n", __func__); @@ -320,22 +318,9 @@ _transport_expander_report_manufacture(struct MPT3SAS_ADAPTER *ioc, } ioc->transport_cmds.status = MPT3_CMD_PENDING; - wait_state_count = 0; - ioc_state = mpt3sas_base_get_iocstate(ioc, 1); - while (ioc_state != MPI2_IOC_STATE_OPERATIONAL) { - if (wait_state_count++ == 10) { - ioc_err(ioc, "%s: failed due to ioc not operational\n", - __func__); - rc = -EFAULT; - goto out; - } - ssleep(1); - ioc_state = mpt3sas_base_get_iocstate(ioc, 1); - ioc_info(ioc, "%s: waiting for operational state(count=%d)\n", - __func__, wait_state_count); - } - if (wait_state_count) - ioc_info(ioc, "%s: ioc is operational\n", __func__); + rc = mpt3sas_wait_for_ioc(ioc, IOC_OPERATIONAL_WAIT_COUNT); + if (rc) + goto out; smid = mpt3sas_base_get_smid(ioc, ioc->transport_cb_idx); if (!smid) { @@ -821,10 +806,13 @@ mpt3sas_transport_port_remove(struct MPT3SAS_ADAPTER *ioc, u64 sas_address, mpt3sas_port->remote_identify.sas_address, mpt3sas_phy->phy_id); mpt3sas_phy->phy_belongs_to_port = 0; - sas_port_delete_phy(mpt3sas_port->port, mpt3sas_phy->phy); + if (!ioc->remove_host) + sas_port_delete_phy(mpt3sas_port->port, + mpt3sas_phy->phy); list_del(&mpt3sas_phy->port_siblings); } - sas_port_delete(mpt3sas_port->port); + if (!ioc->remove_host) + sas_port_delete(mpt3sas_port->port); kfree(mpt3sas_port); } @@ -1076,13 +1064,11 @@ _transport_get_expander_phy_error_log(struct MPT3SAS_ADAPTER *ioc, struct phy_error_log_reply *phy_error_log_reply; int rc; u16 smid; - u32 ioc_state; void *psge; u8 issue_reset = 0; void *data_out = NULL; dma_addr_t data_out_dma; u32 sz; - u16 wait_state_count; if (ioc->shost_recovery || ioc->pci_error_recovery) { ioc_info(ioc, "%s: host reset in progress!\n", __func__); @@ -1098,22 +1084,9 @@ _transport_get_expander_phy_error_log(struct MPT3SAS_ADAPTER *ioc, } ioc->transport_cmds.status = MPT3_CMD_PENDING; - wait_state_count = 0; - ioc_state = mpt3sas_base_get_iocstate(ioc, 1); - while (ioc_state != MPI2_IOC_STATE_OPERATIONAL) { - if (wait_state_count++ == 10) { - ioc_err(ioc, "%s: failed due to ioc not operational\n", - __func__); - rc = -EFAULT; - goto out; - } - ssleep(1); - ioc_state = mpt3sas_base_get_iocstate(ioc, 1); - ioc_info(ioc, "%s: waiting for operational state(count=%d)\n", - __func__, wait_state_count); - } - if (wait_state_count) - ioc_info(ioc, "%s: ioc is operational\n", __func__); + rc = mpt3sas_wait_for_ioc(ioc, IOC_OPERATIONAL_WAIT_COUNT); + if (rc) + goto out; smid = mpt3sas_base_get_smid(ioc, ioc->transport_cb_idx); if (!smid) { @@ -1381,13 +1354,11 @@ _transport_expander_phy_control(struct MPT3SAS_ADAPTER *ioc, struct phy_control_reply *phy_control_reply; int rc; u16 smid; - u32 ioc_state; void *psge; u8 issue_reset = 0; void *data_out = NULL; dma_addr_t data_out_dma; u32 sz; - u16 wait_state_count; if (ioc->shost_recovery || ioc->pci_error_recovery) { ioc_info(ioc, "%s: host reset in progress!\n", __func__); @@ -1403,22 +1374,9 @@ _transport_expander_phy_control(struct MPT3SAS_ADAPTER *ioc, } ioc->transport_cmds.status = MPT3_CMD_PENDING; - wait_state_count = 0; - ioc_state = mpt3sas_base_get_iocstate(ioc, 1); - while (ioc_state != MPI2_IOC_STATE_OPERATIONAL) { - if (wait_state_count++ == 10) { - ioc_err(ioc, "%s: failed due to ioc not operational\n", - __func__); - rc = -EFAULT; - goto out; - } - ssleep(1); - ioc_state = mpt3sas_base_get_iocstate(ioc, 1); - ioc_info(ioc, "%s: waiting for operational state(count=%d)\n", - __func__, wait_state_count); - } - if (wait_state_count) - ioc_info(ioc, "%s: ioc is operational\n", __func__); + rc = mpt3sas_wait_for_ioc(ioc, IOC_OPERATIONAL_WAIT_COUNT); + if (rc) + goto out; smid = mpt3sas_base_get_smid(ioc, ioc->transport_cb_idx); if (!smid) { @@ -1880,7 +1838,6 @@ _transport_smp_handler(struct bsg_job *job, struct Scsi_Host *shost, Mpi2SmpPassthroughReply_t *mpi_reply; int rc; u16 smid; - u32 ioc_state; void *psge; dma_addr_t dma_addr_in; dma_addr_t dma_addr_out; @@ -1888,7 +1845,6 @@ _transport_smp_handler(struct bsg_job *job, struct Scsi_Host *shost, void *addr_out = NULL; size_t dma_len_in; size_t dma_len_out; - u16 wait_state_count; unsigned int reslen = 0; if (ioc->shost_recovery || ioc->pci_error_recovery) { @@ -1924,22 +1880,9 @@ _transport_smp_handler(struct bsg_job *job, struct Scsi_Host *shost, if (rc) goto unmap_out; - wait_state_count = 0; - ioc_state = mpt3sas_base_get_iocstate(ioc, 1); - while (ioc_state != MPI2_IOC_STATE_OPERATIONAL) { - if (wait_state_count++ == 10) { - ioc_err(ioc, "%s: failed due to ioc not operational\n", - __func__); - rc = -EFAULT; - goto unmap_in; - } - ssleep(1); - ioc_state = mpt3sas_base_get_iocstate(ioc, 1); - ioc_info(ioc, "%s: waiting for operational state(count=%d)\n", - __func__, wait_state_count); - } - if (wait_state_count) - ioc_info(ioc, "%s: ioc is operational\n", __func__); + rc = mpt3sas_wait_for_ioc(ioc, IOC_OPERATIONAL_WAIT_COUNT); + if (rc) + goto unmap_in; smid = mpt3sas_base_get_smid(ioc, ioc->transport_cb_idx); if (!smid) { diff --git a/drivers/scsi/mvme147.c b/drivers/scsi/mvme147.c index 7d1ab414b78f..ca96d6d9c350 100644 --- a/drivers/scsi/mvme147.c +++ b/drivers/scsi/mvme147.c @@ -78,7 +78,6 @@ static struct scsi_host_template mvme147_host_template = { .this_id = 7, .sg_tablesize = SG_ALL, .cmd_per_lun = CMD_PER_LUN, - .use_clustering = ENABLE_CLUSTERING }; static struct Scsi_Host *mvme147_shost; diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c index 3ac34373746c..030d911ee374 100644 --- a/drivers/scsi/mvsas/mv_init.c +++ b/drivers/scsi/mvsas/mv_init.c @@ -59,7 +59,6 @@ static struct scsi_host_template mvs_sht = { .this_id = -1, .sg_tablesize = SG_ALL, .max_sectors = SCSI_DEFAULT_MAX_SECTORS, - .use_clustering = ENABLE_CLUSTERING, .eh_device_reset_handler = sas_eh_device_reset_handler, .eh_target_reset_handler = sas_eh_target_reset_handler, .target_destroy = sas_target_destroy, diff --git a/drivers/scsi/mvumi.c b/drivers/scsi/mvumi.c index 2458974d1af6..dbe753fba486 100644 --- a/drivers/scsi/mvumi.c +++ b/drivers/scsi/mvumi.c @@ -2197,6 +2197,7 @@ static struct scsi_host_template mvumi_template = { .eh_timed_out = mvumi_timed_out, .eh_host_reset_handler = mvumi_host_reset, .bios_param = mvumi_bios_param, + .dma_boundary = PAGE_SIZE - 1, .this_id = -1, }; @@ -2620,7 +2621,7 @@ static int __maybe_unused mvumi_resume(struct pci_dev *pdev) } ret = mvumi_pci_set_master(pdev); - ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); if (ret) goto fail; ret = pci_request_regions(mhba->pdev, MV_DRIVER_NAME); diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c index 0642f2d0a3bb..539ac8ce4fcd 100644 --- a/drivers/scsi/myrb.c +++ b/drivers/scsi/myrb.c @@ -1528,6 +1528,7 @@ static int myrb_ldev_queuecommand(struct Scsi_Host *shost, scmd->scsi_done(scmd); return 0; } + /* fall through */ case WRITE_6: lba = (((scmd->cmnd[1] & 0x1F) << 16) | (scmd->cmnd[2] << 8) | @@ -1544,6 +1545,7 @@ static int myrb_ldev_queuecommand(struct Scsi_Host *shost, scmd->scsi_done(scmd); return 0; } + /* fall through */ case WRITE_10: case VERIFY: /* 0x2F */ case WRITE_VERIFY: /* 0x2E */ @@ -1560,6 +1562,7 @@ static int myrb_ldev_queuecommand(struct Scsi_Host *shost, scmd->scsi_done(scmd); return 0; } + /* fall through */ case WRITE_12: case VERIFY_12: /* 0xAF */ case WRITE_VERIFY_12: /* 0xAE */ diff --git a/drivers/scsi/ncr53c8xx.c b/drivers/scsi/ncr53c8xx.c index 6cd3e289ef99..1a236a3dfd51 100644 --- a/drivers/scsi/ncr53c8xx.c +++ b/drivers/scsi/ncr53c8xx.c @@ -8313,7 +8313,6 @@ struct Scsi_Host * __init ncr_attach(struct scsi_host_template *tpnt, tpnt->this_id = 7; tpnt->sg_tablesize = SCSI_NCR_SG_TABLESIZE; tpnt->cmd_per_lun = SCSI_NCR_CMD_PER_LUN; - tpnt->use_clustering = ENABLE_CLUSTERING; if (device->differential) driver_setup.diff_support = device->differential; diff --git a/drivers/scsi/nsp32.c b/drivers/scsi/nsp32.c index 5aac3e801903..00e3cbee55b8 100644 --- a/drivers/scsi/nsp32.c +++ b/drivers/scsi/nsp32.c @@ -274,7 +274,7 @@ static struct scsi_host_template nsp32_template = { .sg_tablesize = NSP32_SG_SIZE, .max_sectors = 128, .this_id = NSP32_HOST_SCSIID, - .use_clustering = DISABLE_CLUSTERING, + .dma_boundary = PAGE_SIZE - 1, .eh_abort_handler = nsp32_eh_abort, .eh_host_reset_handler = nsp32_eh_host_reset, /* .highmem_io = 1, */ diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c index e19fa883376f..60cf7c5eb880 100644 --- a/drivers/scsi/osd/osd_initiator.c +++ b/drivers/scsi/osd/osd_initiator.c @@ -506,11 +506,11 @@ static void osd_request_async_done(struct request *req, blk_status_t error) _set_error_resid(or, req, error); if (req->next_rq) { - __blk_put_request(req->q, req->next_rq); + blk_put_request(req->next_rq); req->next_rq = NULL; } - __blk_put_request(req->q, req); + blk_put_request(req); or->request = NULL; or->in.req = NULL; or->out.req = NULL; diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c index 7a1a1edde35d..664c1238a87f 100644 --- a/drivers/scsi/osst.c +++ b/drivers/scsi/osst.c @@ -341,7 +341,7 @@ static void osst_end_async(struct request *req, blk_status_t status) blk_rq_unmap_user(SRpnt->bio); } - __blk_put_request(req->q, req); + blk_put_request(req); } /* osst_request memory management */ diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c index f3230494a8c9..1bd6825a4f14 100644 --- a/drivers/scsi/pcmcia/nsp_cs.c +++ b/drivers/scsi/pcmcia/nsp_cs.c @@ -86,7 +86,7 @@ static struct scsi_host_template nsp_driver_template = { .can_queue = 1, .this_id = NSP_INITIATOR_ID, .sg_tablesize = SG_ALL, - .use_clustering = DISABLE_CLUSTERING, + .dma_boundary = PAGE_SIZE - 1, }; static nsp_hw_data nsp_data_base; /* attach <-> detect glue */ diff --git a/drivers/scsi/pcmcia/qlogic_stub.c b/drivers/scsi/pcmcia/qlogic_stub.c index 173351a8554b..828d53faf09a 100644 --- a/drivers/scsi/pcmcia/qlogic_stub.c +++ b/drivers/scsi/pcmcia/qlogic_stub.c @@ -72,7 +72,7 @@ static struct scsi_host_template qlogicfas_driver_template = { .can_queue = 1, .this_id = -1, .sg_tablesize = SG_ALL, - .use_clustering = DISABLE_CLUSTERING, + .dma_boundary = PAGE_SIZE - 1, }; /*====================================================================*/ diff --git a/drivers/scsi/pcmcia/sym53c500_cs.c b/drivers/scsi/pcmcia/sym53c500_cs.c index a3b63bea0e50..d1e98a6ea28f 100644 --- a/drivers/scsi/pcmcia/sym53c500_cs.c +++ b/drivers/scsi/pcmcia/sym53c500_cs.c @@ -680,7 +680,6 @@ static struct scsi_host_template sym53c500_driver_template = { .can_queue = 1, .this_id = 7, .sg_tablesize = 32, - .use_clustering = ENABLE_CLUSTERING, .shost_attrs = SYM53C500_shost_attrs }; diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c index d71e7e4ec29c..a36060c23b37 100644 --- a/drivers/scsi/pm8001/pm8001_init.c +++ b/drivers/scsi/pm8001/pm8001_init.c @@ -84,7 +84,6 @@ static struct scsi_host_template pm8001_sht = { .this_id = -1, .sg_tablesize = SG_ALL, .max_sectors = SCSI_DEFAULT_MAX_SECTORS, - .use_clustering = ENABLE_CLUSTERING, .eh_device_reset_handler = sas_eh_device_reset_handler, .eh_target_reset_handler = sas_eh_target_reset_handler, .target_destroy = sas_target_destroy, diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index 4e86994e10e8..7c4673308f5b 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -846,16 +846,9 @@ static void pmcraid_erp_done(struct pmcraid_cmd *cmd) cmd->ioa_cb->ioarcb.cdb[0], ioasc); } - /* if we had allocated sense buffers for request sense, copy the sense - * release the buffers - */ - if (cmd->sense_buffer != NULL) { - memcpy(scsi_cmd->sense_buffer, - cmd->sense_buffer, - SCSI_SENSE_BUFFERSIZE); - pci_free_consistent(pinstance->pdev, - SCSI_SENSE_BUFFERSIZE, - cmd->sense_buffer, cmd->sense_buffer_dma); + if (cmd->sense_buffer) { + dma_unmap_single(&pinstance->pdev->dev, cmd->sense_buffer_dma, + SCSI_SENSE_BUFFERSIZE, DMA_FROM_DEVICE); cmd->sense_buffer = NULL; cmd->sense_buffer_dma = 0; } @@ -2444,13 +2437,12 @@ static void pmcraid_request_sense(struct pmcraid_cmd *cmd) { struct pmcraid_ioarcb *ioarcb = &cmd->ioa_cb->ioarcb; struct pmcraid_ioadl_desc *ioadl = ioarcb->add_data.u.ioadl; + struct device *dev = &cmd->drv_inst->pdev->dev; - /* allocate DMAable memory for sense buffers */ - cmd->sense_buffer = pci_alloc_consistent(cmd->drv_inst->pdev, - SCSI_SENSE_BUFFERSIZE, - &cmd->sense_buffer_dma); - - if (cmd->sense_buffer == NULL) { + cmd->sense_buffer = cmd->scsi_cmd->sense_buffer; + cmd->sense_buffer_dma = dma_map_single(dev, cmd->sense_buffer, + SCSI_SENSE_BUFFERSIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(dev, cmd->sense_buffer_dma)) { pmcraid_err ("couldn't allocate sense buffer for request sense\n"); pmcraid_erp_done(cmd); @@ -2491,17 +2483,15 @@ static void pmcraid_request_sense(struct pmcraid_cmd *cmd) /** * pmcraid_cancel_all - cancel all outstanding IOARCBs as part of error recovery * @cmd: command that failed - * @sense: true if request_sense is required after cancel all + * @need_sense: true if request_sense is required after cancel all * * This function sends a cancel all to a device to clear the queue. */ -static void pmcraid_cancel_all(struct pmcraid_cmd *cmd, u32 sense) +static void pmcraid_cancel_all(struct pmcraid_cmd *cmd, bool need_sense) { struct scsi_cmnd *scsi_cmd = cmd->scsi_cmd; struct pmcraid_ioarcb *ioarcb = &cmd->ioa_cb->ioarcb; struct pmcraid_resource_entry *res = scsi_cmd->device->hostdata; - void (*cmd_done) (struct pmcraid_cmd *) = sense ? pmcraid_erp_done - : pmcraid_request_sense; memset(ioarcb->cdb, 0, PMCRAID_MAX_CDB_LEN); ioarcb->request_flags0 = SYNC_OVERRIDE; @@ -2519,7 +2509,8 @@ static void pmcraid_cancel_all(struct pmcraid_cmd *cmd, u32 sense) /* writing to IOARRIN must be protected by host_lock, as mid-layer * schedule queuecommand while we are doing this */ - pmcraid_send_cmd(cmd, cmd_done, + pmcraid_send_cmd(cmd, need_sense ? + pmcraid_erp_done : pmcraid_request_sense, PMCRAID_REQUEST_SENSE_TIMEOUT, pmcraid_timeout_handler); } @@ -2612,7 +2603,7 @@ static int pmcraid_error_handler(struct pmcraid_cmd *cmd) struct pmcraid_ioasa *ioasa = &cmd->ioa_cb->ioasa; u32 ioasc = le32_to_cpu(ioasa->ioasc); u32 masked_ioasc = ioasc & PMCRAID_IOASC_SENSE_MASK; - u32 sense_copied = 0; + bool sense_copied = false; if (!res) { pmcraid_info("resource pointer is NULL\n"); @@ -2684,7 +2675,7 @@ static int pmcraid_error_handler(struct pmcraid_cmd *cmd) memcpy(scsi_cmd->sense_buffer, ioasa->sense_data, data_size); - sense_copied = 1; + sense_copied = true; } if (RES_IS_GSCSI(res->cfg_entry)) @@ -3523,7 +3514,7 @@ static int pmcraid_build_passthrough_ioadls( return -ENOMEM; } - sglist->num_dma_sg = pci_map_sg(cmd->drv_inst->pdev, + sglist->num_dma_sg = dma_map_sg(&cmd->drv_inst->pdev->dev, sglist->scatterlist, sglist->num_sg, direction); @@ -3572,7 +3563,7 @@ static void pmcraid_release_passthrough_ioadls( struct pmcraid_sglist *sglist = cmd->sglist; if (buflen > 0) { - pci_unmap_sg(cmd->drv_inst->pdev, + dma_unmap_sg(&cmd->drv_inst->pdev->dev, sglist->scatterlist, sglist->num_sg, direction); @@ -4158,7 +4149,6 @@ static struct scsi_host_template pmcraid_host_template = { .max_sectors = PMCRAID_IOA_MAX_SECTORS, .no_write_same = 1, .cmd_per_lun = PMCRAID_MAX_CMD_PER_LUN, - .use_clustering = ENABLE_CLUSTERING, .shost_attrs = pmcraid_host_attrs, .proc_name = PMCRAID_DRIVER_NAME, }; @@ -4708,9 +4698,9 @@ static void pmcraid_release_host_rrqs(struct pmcraid_instance *pinstance, int maxindex) { int i; - for (i = 0; i < maxindex; i++) { - pci_free_consistent(pinstance->pdev, + for (i = 0; i < maxindex; i++) { + dma_free_coherent(&pinstance->pdev->dev, HRRQ_ENTRY_SIZE * PMCRAID_MAX_CMD, pinstance->hrrq_start[i], pinstance->hrrq_start_bus_addr[i]); @@ -4737,11 +4727,9 @@ static int pmcraid_allocate_host_rrqs(struct pmcraid_instance *pinstance) for (i = 0; i < pinstance->num_hrrq; i++) { pinstance->hrrq_start[i] = - pci_alloc_consistent( - pinstance->pdev, - buffer_size, - &(pinstance->hrrq_start_bus_addr[i])); - + dma_alloc_coherent(&pinstance->pdev->dev, buffer_size, + &pinstance->hrrq_start_bus_addr[i], + GFP_KERNEL); if (!pinstance->hrrq_start[i]) { pmcraid_err("pci_alloc failed for hrrq vector : %d\n", i); @@ -4770,7 +4758,7 @@ static int pmcraid_allocate_host_rrqs(struct pmcraid_instance *pinstance) static void pmcraid_release_hcams(struct pmcraid_instance *pinstance) { if (pinstance->ccn.msg != NULL) { - pci_free_consistent(pinstance->pdev, + dma_free_coherent(&pinstance->pdev->dev, PMCRAID_AEN_HDR_SIZE + sizeof(struct pmcraid_hcam_ccn_ext), pinstance->ccn.msg, @@ -4782,7 +4770,7 @@ static void pmcraid_release_hcams(struct pmcraid_instance *pinstance) } if (pinstance->ldn.msg != NULL) { - pci_free_consistent(pinstance->pdev, + dma_free_coherent(&pinstance->pdev->dev, PMCRAID_AEN_HDR_SIZE + sizeof(struct pmcraid_hcam_ldn), pinstance->ldn.msg, @@ -4803,17 +4791,15 @@ static void pmcraid_release_hcams(struct pmcraid_instance *pinstance) */ static int pmcraid_allocate_hcams(struct pmcraid_instance *pinstance) { - pinstance->ccn.msg = pci_alloc_consistent( - pinstance->pdev, + pinstance->ccn.msg = dma_alloc_coherent(&pinstance->pdev->dev, PMCRAID_AEN_HDR_SIZE + sizeof(struct pmcraid_hcam_ccn_ext), - &(pinstance->ccn.baddr)); + &pinstance->ccn.baddr, GFP_KERNEL); - pinstance->ldn.msg = pci_alloc_consistent( - pinstance->pdev, + pinstance->ldn.msg = dma_alloc_coherent(&pinstance->pdev->dev, PMCRAID_AEN_HDR_SIZE + sizeof(struct pmcraid_hcam_ldn), - &(pinstance->ldn.baddr)); + &pinstance->ldn.baddr, GFP_KERNEL); if (pinstance->ldn.msg == NULL || pinstance->ccn.msg == NULL) { pmcraid_release_hcams(pinstance); @@ -4841,7 +4827,7 @@ static void pmcraid_release_config_buffers(struct pmcraid_instance *pinstance) { if (pinstance->cfg_table != NULL && pinstance->cfg_table_bus_addr != 0) { - pci_free_consistent(pinstance->pdev, + dma_free_coherent(&pinstance->pdev->dev, sizeof(struct pmcraid_config_table), pinstance->cfg_table, pinstance->cfg_table_bus_addr); @@ -4886,10 +4872,10 @@ static int pmcraid_allocate_config_buffers(struct pmcraid_instance *pinstance) list_add_tail(&pinstance->res_entries[i].queue, &pinstance->free_res_q); - pinstance->cfg_table = - pci_alloc_consistent(pinstance->pdev, + pinstance->cfg_table = dma_alloc_coherent(&pinstance->pdev->dev, sizeof(struct pmcraid_config_table), - &pinstance->cfg_table_bus_addr); + &pinstance->cfg_table_bus_addr, + GFP_KERNEL); if (NULL == pinstance->cfg_table) { pmcraid_err("couldn't alloc DMA memory for config table\n"); @@ -4954,7 +4940,7 @@ static void pmcraid_release_buffers(struct pmcraid_instance *pinstance) pmcraid_release_host_rrqs(pinstance, pinstance->num_hrrq); if (pinstance->inq_data != NULL) { - pci_free_consistent(pinstance->pdev, + dma_free_coherent(&pinstance->pdev->dev, sizeof(struct pmcraid_inquiry_data), pinstance->inq_data, pinstance->inq_data_baddr); @@ -4964,7 +4950,7 @@ static void pmcraid_release_buffers(struct pmcraid_instance *pinstance) } if (pinstance->timestamp_data != NULL) { - pci_free_consistent(pinstance->pdev, + dma_free_coherent(&pinstance->pdev->dev, sizeof(struct pmcraid_timestamp_data), pinstance->timestamp_data, pinstance->timestamp_data_baddr); @@ -4981,8 +4967,8 @@ static void pmcraid_release_buffers(struct pmcraid_instance *pinstance) * This routine pre-allocates memory based on the type of block as below: * cmdblocks(PMCRAID_MAX_CMD): kernel memory using kernel's slab_allocator, * IOARCBs(PMCRAID_MAX_CMD) : DMAable memory, using pci pool allocator - * config-table entries : DMAable memory using pci_alloc_consistent - * HostRRQs : DMAable memory, using pci_alloc_consistent + * config-table entries : DMAable memory using dma_alloc_coherent + * HostRRQs : DMAable memory, using dma_alloc_coherent * * Return Value * 0 in case all of the blocks are allocated, -ENOMEM otherwise. @@ -5019,11 +5005,9 @@ static int pmcraid_init_buffers(struct pmcraid_instance *pinstance) } /* allocate DMAable memory for page D0 INQUIRY buffer */ - pinstance->inq_data = pci_alloc_consistent( - pinstance->pdev, + pinstance->inq_data = dma_alloc_coherent(&pinstance->pdev->dev, sizeof(struct pmcraid_inquiry_data), - &pinstance->inq_data_baddr); - + &pinstance->inq_data_baddr, GFP_KERNEL); if (pinstance->inq_data == NULL) { pmcraid_err("couldn't allocate DMA memory for INQUIRY\n"); pmcraid_release_buffers(pinstance); @@ -5031,11 +5015,10 @@ static int pmcraid_init_buffers(struct pmcraid_instance *pinstance) } /* allocate DMAable memory for set timestamp data buffer */ - pinstance->timestamp_data = pci_alloc_consistent( - pinstance->pdev, + pinstance->timestamp_data = dma_alloc_coherent(&pinstance->pdev->dev, sizeof(struct pmcraid_timestamp_data), - &pinstance->timestamp_data_baddr); - + &pinstance->timestamp_data_baddr, + GFP_KERNEL); if (pinstance->timestamp_data == NULL) { pmcraid_err("couldn't allocate DMA memory for \ set time_stamp \n"); @@ -5324,12 +5307,12 @@ static int pmcraid_resume(struct pci_dev *pdev) pci_set_master(pdev); - if ((sizeof(dma_addr_t) == 4) || - pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (sizeof(dma_addr_t) == 4 || + dma_set_mask(&pdev->dev, DMA_BIT_MASK(64))) + rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); if (rc == 0) - rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + rc = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); if (rc != 0) { dev_err(&pdev->dev, "resume: Failed to set PCI DMA mask\n"); @@ -5733,19 +5716,19 @@ static int pmcraid_probe(struct pci_dev *pdev, /* Firmware requires the system bus address of IOARCB to be within * 32-bit addressable range though it has 64-bit IOARRIN register. * However, firmware supports 64-bit streaming DMA buffers, whereas - * coherent buffers are to be 32-bit. Since pci_alloc_consistent always + * coherent buffers are to be 32-bit. Since dma_alloc_coherent always * returns memory within 4GB (if not, change this logic), coherent * buffers are within firmware acceptable address ranges. */ - if ((sizeof(dma_addr_t) == 4) || - pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (sizeof(dma_addr_t) == 4 || + dma_set_mask(&pdev->dev, DMA_BIT_MASK(64))) + rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); /* firmware expects 32-bit DMA addresses for IOARRIN register; set 32 - * bit mask for pci_alloc_consistent to return addresses within 4GB + * bit mask for dma_alloc_coherent to return addresses within 4GB */ if (rc == 0) - rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + rc = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); if (rc != 0) { dev_err(&pdev->dev, "Failed to set PCI DMA mask\n"); diff --git a/drivers/scsi/ppa.c b/drivers/scsi/ppa.c index ee86a0c62dbf..c182b5458f98 100644 --- a/drivers/scsi/ppa.c +++ b/drivers/scsi/ppa.c @@ -978,7 +978,6 @@ static struct scsi_host_template ppa_template = { .bios_param = ppa_biosparam, .this_id = -1, .sg_tablesize = SG_ALL, - .use_clustering = ENABLE_CLUSTERING, .can_queue = 1, .slave_alloc = ppa_adjust_queue, }; diff --git a/drivers/scsi/ps3rom.c b/drivers/scsi/ps3rom.c index 4924424d20fe..8d769138c01c 100644 --- a/drivers/scsi/ps3rom.c +++ b/drivers/scsi/ps3rom.c @@ -349,7 +349,6 @@ static struct scsi_host_template ps3rom_host_template = { .sg_tablesize = SG_ALL, .emulated = 1, /* only sg driver uses this */ .max_sectors = PS3ROM_MAX_SECTORS, - .use_clustering = ENABLE_CLUSTERING, .module = THIS_MODULE, }; diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index d5a4f17fce51..edcaf4b0cb0b 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -785,7 +785,6 @@ static struct scsi_host_template qedf_host_template = { .name = QEDF_MODULE_NAME, .this_id = -1, .cmd_per_lun = 32, - .use_clustering = ENABLE_CLUSTERING, .max_sectors = 0xffff, .queuecommand = qedf_queuecommand, .shost_attrs = qedf_host_attrs, @@ -2935,8 +2934,7 @@ static void qedf_free_fcoe_pf_param(struct qedf_ctx *qedf) qedf_free_global_queues(qedf); - if (qedf->global_queues) - kfree(qedf->global_queues); + kfree(qedf->global_queues); } /* diff --git a/drivers/scsi/qedi/qedi.h b/drivers/scsi/qedi/qedi.h index a6f96b35e971..a26bb5066b90 100644 --- a/drivers/scsi/qedi/qedi.h +++ b/drivers/scsi/qedi/qedi.h @@ -45,7 +45,7 @@ struct qedi_endpoint; #define QEDI_MAX_TASK_NUM 0x0FFF #define QEDI_MAX_ISCSI_CONNS_PER_HBA 1024 #define QEDI_ISCSI_MAX_BDS_PER_CMD 255 /* Firmware max BDs is 255 */ -#define MAX_OUSTANDING_TASKS_PER_CON 1024 +#define MAX_OUTSTANDING_TASKS_PER_CON 1024 #define QEDI_MAX_BD_LEN 0xffff #define QEDI_BD_SPLIT_SZ 0x1000 @@ -63,12 +63,9 @@ struct qedi_endpoint; #define QEDI_LOCAL_PORT_INVALID 0xffff #define TX_RX_RING 16 #define RX_RING (TX_RX_RING - 1) -#define LL2_SINGLE_BUF_SIZE 0x400 -#define QEDI_PAGE_SIZE 4096 #define QEDI_PAGE_ALIGN(addr) ALIGN(addr, QEDI_PAGE_SIZE) #define QEDI_PAGE_MASK (~((QEDI_PAGE_SIZE) - 1)) -#define QEDI_PAGE_SIZE 4096 #define QEDI_HW_DMA_BOUNDARY 0xfff #define QEDI_PATH_HANDLE 0xFE0000000UL @@ -146,7 +143,7 @@ struct skb_work_list { }; /* Queue sizes in number of elements */ -#define QEDI_SQ_SIZE MAX_OUSTANDING_TASKS_PER_CON +#define QEDI_SQ_SIZE MAX_OUTSTANDING_TASKS_PER_CON #define QEDI_CQ_SIZE 2048 #define QEDI_CMDQ_SIZE QEDI_MAX_ISCSI_TASK #define QEDI_PROTO_CQ_PROD_IDX 0 diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c index 2f0a4f2c5ff8..4da660c1c431 100644 --- a/drivers/scsi/qedi/qedi_iscsi.c +++ b/drivers/scsi/qedi/qedi_iscsi.c @@ -61,7 +61,6 @@ struct scsi_host_template qedi_host_template = { .max_sectors = 0xffff, .dma_boundary = QEDI_HW_DMA_BOUNDARY, .cmd_per_lun = 128, - .use_clustering = ENABLE_CLUSTERING, .shost_attrs = qedi_shost_attrs, }; diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 105b0e4d7818..5c53409a8cea 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -44,6 +44,11 @@ module_param(qedi_io_tracing, uint, 0644); MODULE_PARM_DESC(qedi_io_tracing, " Enable logging of SCSI requests/completions into trace buffer. (default off)."); +uint qedi_ll2_buf_size = 0x400; +module_param(qedi_ll2_buf_size, uint, 0644); +MODULE_PARM_DESC(qedi_ll2_buf_size, + "parameter to set ping packet size, default - 0x400, Jumbo packets - 0x2400."); + const struct qed_iscsi_ops *qedi_ops; static struct scsi_transport_template *qedi_scsi_transport; static struct pci_driver qedi_pci_driver; @@ -228,7 +233,7 @@ static int __qedi_alloc_uio_rings(struct qedi_uio_dev *udev) } /* Allocating memory for Tx/Rx pkt buffer */ - udev->ll2_buf_size = TX_RX_RING * LL2_SINGLE_BUF_SIZE; + udev->ll2_buf_size = TX_RX_RING * qedi_ll2_buf_size; udev->ll2_buf_size = QEDI_PAGE_ALIGN(udev->ll2_buf_size); udev->ll2_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO, 2); @@ -283,7 +288,7 @@ static int qedi_alloc_uio_rings(struct qedi_ctx *qedi) qedi->udev = udev; udev->tx_pkt = udev->ll2_buf; - udev->rx_pkt = udev->ll2_buf + LL2_SINGLE_BUF_SIZE; + udev->rx_pkt = udev->ll2_buf + qedi_ll2_buf_size; return 0; err_uctrl: @@ -644,8 +649,7 @@ static struct qedi_ctx *qedi_host_alloc(struct pci_dev *pdev) qedi->max_active_conns = ISCSI_MAX_SESS_PER_HBA; qedi->max_sqes = QEDI_SQ_SIZE; - if (shost_use_blk_mq(shost)) - shost->nr_hw_queues = MIN_NUM_CPUS_MSIX(qedi); + shost->nr_hw_queues = MIN_NUM_CPUS_MSIX(qedi); pci_set_drvdata(pdev, qedi); @@ -659,7 +663,7 @@ static int qedi_ll2_rx(void *cookie, struct sk_buff *skb, u32 arg1, u32 arg2) struct qedi_uio_dev *udev; struct qedi_uio_ctrl *uctrl; struct skb_work_list *work; - u32 prod; + struct ethhdr *eh; if (!qedi) { QEDI_ERR(NULL, "qedi is NULL\n"); @@ -673,6 +677,29 @@ static int qedi_ll2_rx(void *cookie, struct sk_buff *skb, u32 arg1, u32 arg2) return 0; } + eh = (struct ethhdr *)skb->data; + /* Undo VLAN encapsulation */ + if (eh->h_proto == htons(ETH_P_8021Q)) { + memmove((u8 *)eh + VLAN_HLEN, eh, ETH_ALEN * 2); + eh = (struct ethhdr *)skb_pull(skb, VLAN_HLEN); + skb_reset_mac_header(skb); + } + + /* Filter out non FIP/FCoE frames here to free them faster */ + if (eh->h_proto != htons(ETH_P_ARP) && + eh->h_proto != htons(ETH_P_IP) && + eh->h_proto != htons(ETH_P_IPV6)) { + QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_LL2, + "Dropping frame ethertype [0x%x] len [0x%x].\n", + eh->h_proto, skb->len); + kfree_skb(skb); + return 0; + } + + QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_LL2, + "Allowed frame ethertype [0x%x] len [0x%x].\n", + eh->h_proto, skb->len); + udev = qedi->udev; uctrl = udev->uctrl; @@ -695,17 +722,10 @@ static int qedi_ll2_rx(void *cookie, struct sk_buff *skb, u32 arg1, u32 arg2) spin_lock_bh(&qedi->ll2_lock); list_add_tail(&work->list, &qedi->ll2_skb_list); + spin_unlock_bh(&qedi->ll2_lock); - ++uctrl->hw_rx_prod_cnt; - prod = (uctrl->hw_rx_prod + 1) % RX_RING; - if (prod != uctrl->host_rx_cons) { - uctrl->hw_rx_prod = prod; - spin_unlock_bh(&qedi->ll2_lock); - wake_up_process(qedi->ll2_recv_thread); - return 0; - } + wake_up_process(qedi->ll2_recv_thread); - spin_unlock_bh(&qedi->ll2_lock); return 0; } @@ -720,6 +740,7 @@ static int qedi_ll2_process_skb(struct qedi_ctx *qedi, struct sk_buff *skb, u32 rx_bd_prod; void *pkt; int len = 0; + u32 prod; if (!qedi) { QEDI_ERR(NULL, "qedi is NULL\n"); @@ -728,12 +749,16 @@ static int qedi_ll2_process_skb(struct qedi_ctx *qedi, struct sk_buff *skb, udev = qedi->udev; uctrl = udev->uctrl; - pkt = udev->rx_pkt + (uctrl->hw_rx_prod * LL2_SINGLE_BUF_SIZE); - len = min_t(u32, skb->len, (u32)LL2_SINGLE_BUF_SIZE); + + ++uctrl->hw_rx_prod_cnt; + prod = (uctrl->hw_rx_prod + 1) % RX_RING; + + pkt = udev->rx_pkt + (prod * qedi_ll2_buf_size); + len = min_t(u32, skb->len, (u32)qedi_ll2_buf_size); memcpy(pkt, skb->data, len); memset(&rxbd, 0, sizeof(rxbd)); - rxbd.rx_pkt_index = uctrl->hw_rx_prod; + rxbd.rx_pkt_index = prod; rxbd.rx_pkt_len = len; rxbd.vlan_id = vlan_id; @@ -744,6 +769,16 @@ static int qedi_ll2_process_skb(struct qedi_ctx *qedi, struct sk_buff *skb, memcpy(p_rxbd, &rxbd, sizeof(rxbd)); + QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_LL2, + "hw_rx_prod [%d] prod [%d] hw_rx_bd_prod [%d] rx_pkt_idx [%d] rx_len [%d].\n", + uctrl->hw_rx_prod, prod, uctrl->hw_rx_bd_prod, + rxbd.rx_pkt_index, rxbd.rx_pkt_len); + QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_LL2, + "host_rx_cons [%d] hw_rx_bd_cons [%d].\n", + uctrl->host_rx_cons, uctrl->host_rx_bd_cons); + + uctrl->hw_rx_prod = prod; + /* notify the iscsiuio about new packet */ uio_event_notify(&udev->qedi_uinfo); @@ -796,7 +831,7 @@ static int qedi_set_iscsi_pf_param(struct qedi_ctx *qedi) int rval = 0; - num_sq_pages = (MAX_OUSTANDING_TASKS_PER_CON * 8) / PAGE_SIZE; + num_sq_pages = (MAX_OUTSTANDING_TASKS_PER_CON * 8) / QEDI_PAGE_SIZE; qedi->num_queues = MIN_NUM_CPUS_MSIX(qedi); @@ -834,7 +869,7 @@ static int qedi_set_iscsi_pf_param(struct qedi_ctx *qedi) qedi->pf_params.iscsi_pf_params.max_fin_rt = 2; for (log_page_size = 0 ; log_page_size < 32 ; log_page_size++) { - if ((1 << log_page_size) == PAGE_SIZE) + if ((1 << log_page_size) == QEDI_PAGE_SIZE) break; } qedi->pf_params.iscsi_pf_params.log_page_size = log_page_size; @@ -952,6 +987,9 @@ static int qedi_find_boot_info(struct qedi_ctx *qedi, cls_sess = iscsi_conn_to_session(cls_conn); sess = cls_sess->dd_data; + if (!iscsi_is_session_online(cls_sess)) + continue; + if (pri_ctrl_flags) { if (!strcmp(pri_tgt->iscsi_name, sess->targetname) && !strcmp(pri_tgt->ip_addr, ep_ip_addr)) { @@ -1298,7 +1336,7 @@ static int qedi_request_msix_irq(struct qedi_ctx *qedi) int i, rc, cpu; cpu = cpumask_first(cpu_online_mask); - for (i = 0; i < MIN_NUM_CPUS_MSIX(qedi); i++) { + for (i = 0; i < qedi->int_info.msix_cnt; i++) { rc = request_irq(qedi->int_info.msix[i].vector, qedi_msix_handler, 0, "qedi", &qedi->fp_array[i]); @@ -1376,7 +1414,7 @@ static void qedi_free_bdq(struct qedi_ctx *qedi) int i; if (qedi->bdq_pbl_list) - dma_free_coherent(&qedi->pdev->dev, PAGE_SIZE, + dma_free_coherent(&qedi->pdev->dev, QEDI_PAGE_SIZE, qedi->bdq_pbl_list, qedi->bdq_pbl_list_dma); if (qedi->bdq_pbl) @@ -1437,7 +1475,7 @@ static int qedi_alloc_bdq(struct qedi_ctx *qedi) /* Alloc dma memory for BDQ page buffer list */ qedi->bdq_pbl_mem_size = QEDI_BDQ_NUM * sizeof(struct scsi_bd); - qedi->bdq_pbl_mem_size = ALIGN(qedi->bdq_pbl_mem_size, PAGE_SIZE); + qedi->bdq_pbl_mem_size = ALIGN(qedi->bdq_pbl_mem_size, QEDI_PAGE_SIZE); qedi->rq_num_entries = qedi->bdq_pbl_mem_size / sizeof(struct scsi_bd); QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_CONN, "rq_num_entries = %d.\n", @@ -1472,7 +1510,8 @@ static int qedi_alloc_bdq(struct qedi_ctx *qedi) } /* Allocate list of PBL pages */ - qedi->bdq_pbl_list = dma_zalloc_coherent(&qedi->pdev->dev, PAGE_SIZE, + qedi->bdq_pbl_list = dma_zalloc_coherent(&qedi->pdev->dev, + QEDI_PAGE_SIZE, &qedi->bdq_pbl_list_dma, GFP_KERNEL); if (!qedi->bdq_pbl_list) { @@ -1485,13 +1524,14 @@ static int qedi_alloc_bdq(struct qedi_ctx *qedi) * Now populate PBL list with pages that contain pointers to the * individual buffers. */ - qedi->bdq_pbl_list_num_entries = qedi->bdq_pbl_mem_size / PAGE_SIZE; + qedi->bdq_pbl_list_num_entries = qedi->bdq_pbl_mem_size / + QEDI_PAGE_SIZE; list = (u64 *)qedi->bdq_pbl_list; page = qedi->bdq_pbl_list_dma; for (i = 0; i < qedi->bdq_pbl_list_num_entries; i++) { *list = qedi->bdq_pbl_dma; list++; - page += PAGE_SIZE; + page += QEDI_PAGE_SIZE; } return 0; diff --git a/drivers/scsi/qedi/qedi_version.h b/drivers/scsi/qedi/qedi_version.h index 8a0e523fc089..41bcbbafebd4 100644 --- a/drivers/scsi/qedi/qedi_version.h +++ b/drivers/scsi/qedi/qedi_version.h @@ -7,8 +7,8 @@ * this source tree. */ -#define QEDI_MODULE_VERSION "8.33.0.20" +#define QEDI_MODULE_VERSION "8.33.0.21" #define QEDI_DRIVER_MAJOR_VER 8 #define QEDI_DRIVER_MINOR_VER 33 #define QEDI_DRIVER_REV_VER 0 -#define QEDI_DRIVER_ENG_VER 20 +#define QEDI_DRIVER_ENG_VER 21 diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c index 15a50cc7e4b3..a414f51302b7 100644 --- a/drivers/scsi/qla1280.c +++ b/drivers/scsi/qla1280.c @@ -383,20 +383,10 @@ #include "qla1280.h" -#ifndef BITS_PER_LONG -#error "BITS_PER_LONG not defined!" -#endif -#if (BITS_PER_LONG == 64) || defined CONFIG_HIGHMEM +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT #define QLA_64BIT_PTR 1 #endif -#ifdef QLA_64BIT_PTR -#define pci_dma_hi32(a) ((a >> 16) >> 16) -#else -#define pci_dma_hi32(a) 0 -#endif -#define pci_dma_lo32(a) (a & 0xffffffff) - #define NVRAM_DELAY() udelay(500) /* 2 microseconds */ #if defined(__ia64__) && !defined(ia64_platform_is) @@ -1790,8 +1780,8 @@ qla1280_load_firmware_dma(struct scsi_qla_host *ha) mb[4] = cnt; mb[3] = ha->request_dma & 0xffff; mb[2] = (ha->request_dma >> 16) & 0xffff; - mb[7] = pci_dma_hi32(ha->request_dma) & 0xffff; - mb[6] = pci_dma_hi32(ha->request_dma) >> 16; + mb[7] = upper_32_bits(ha->request_dma) & 0xffff; + mb[6] = upper_32_bits(ha->request_dma) >> 16; dprintk(2, "%s: op=%d 0x%p = 0x%4x,0x%4x,0x%4x,0x%4x\n", __func__, mb[0], (void *)(long)ha->request_dma, @@ -1810,8 +1800,8 @@ qla1280_load_firmware_dma(struct scsi_qla_host *ha) mb[4] = cnt; mb[3] = p_tbuf & 0xffff; mb[2] = (p_tbuf >> 16) & 0xffff; - mb[7] = pci_dma_hi32(p_tbuf) & 0xffff; - mb[6] = pci_dma_hi32(p_tbuf) >> 16; + mb[7] = upper_32_bits(p_tbuf) & 0xffff; + mb[6] = upper_32_bits(p_tbuf) >> 16; err = qla1280_mailbox_command(ha, BIT_4 | BIT_3 | BIT_2 | BIT_1 | BIT_0, mb); @@ -1933,8 +1923,8 @@ qla1280_init_rings(struct scsi_qla_host *ha) mb[3] = ha->request_dma & 0xffff; mb[2] = (ha->request_dma >> 16) & 0xffff; mb[4] = 0; - mb[7] = pci_dma_hi32(ha->request_dma) & 0xffff; - mb[6] = pci_dma_hi32(ha->request_dma) >> 16; + mb[7] = upper_32_bits(ha->request_dma) & 0xffff; + mb[6] = upper_32_bits(ha->request_dma) >> 16; if (!(status = qla1280_mailbox_command(ha, BIT_7 | BIT_6 | BIT_4 | BIT_3 | BIT_2 | BIT_1 | BIT_0, &mb[0]))) { @@ -1947,8 +1937,8 @@ qla1280_init_rings(struct scsi_qla_host *ha) mb[3] = ha->response_dma & 0xffff; mb[2] = (ha->response_dma >> 16) & 0xffff; mb[5] = 0; - mb[7] = pci_dma_hi32(ha->response_dma) & 0xffff; - mb[6] = pci_dma_hi32(ha->response_dma) >> 16; + mb[7] = upper_32_bits(ha->response_dma) & 0xffff; + mb[6] = upper_32_bits(ha->response_dma) >> 16; status = qla1280_mailbox_command(ha, BIT_7 | BIT_6 | BIT_5 | BIT_3 | BIT_2 | BIT_1 | BIT_0, &mb[0]); @@ -2914,13 +2904,13 @@ qla1280_64bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp) SCSI_BUS_32(cmd)); #endif *dword_ptr++ = - cpu_to_le32(pci_dma_lo32(dma_handle)); + cpu_to_le32(lower_32_bits(dma_handle)); *dword_ptr++ = - cpu_to_le32(pci_dma_hi32(dma_handle)); + cpu_to_le32(upper_32_bits(dma_handle)); *dword_ptr++ = cpu_to_le32(sg_dma_len(s)); dprintk(3, "S/G Segment phys_addr=%x %x, len=0x%x\n", - cpu_to_le32(pci_dma_hi32(dma_handle)), - cpu_to_le32(pci_dma_lo32(dma_handle)), + cpu_to_le32(upper_32_bits(dma_handle)), + cpu_to_le32(lower_32_bits(dma_handle)), cpu_to_le32(sg_dma_len(sg_next(s)))); remseg--; } @@ -2976,14 +2966,14 @@ qla1280_64bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp) SCSI_BUS_32(cmd)); #endif *dword_ptr++ = - cpu_to_le32(pci_dma_lo32(dma_handle)); + cpu_to_le32(lower_32_bits(dma_handle)); *dword_ptr++ = - cpu_to_le32(pci_dma_hi32(dma_handle)); + cpu_to_le32(upper_32_bits(dma_handle)); *dword_ptr++ = cpu_to_le32(sg_dma_len(s)); dprintk(3, "S/G Segment Cont. phys_addr=%x %x, len=0x%x\n", - cpu_to_le32(pci_dma_hi32(dma_handle)), - cpu_to_le32(pci_dma_lo32(dma_handle)), + cpu_to_le32(upper_32_bits(dma_handle)), + cpu_to_le32(lower_32_bits(dma_handle)), cpu_to_le32(sg_dma_len(s))); } remseg -= cnt; @@ -3178,10 +3168,10 @@ qla1280_32bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp) if (cnt == 4) break; *dword_ptr++ = - cpu_to_le32(pci_dma_lo32(sg_dma_address(s))); + cpu_to_le32(lower_32_bits(sg_dma_address(s))); *dword_ptr++ = cpu_to_le32(sg_dma_len(s)); dprintk(3, "S/G Segment phys_addr=0x%lx, len=0x%x\n", - (pci_dma_lo32(sg_dma_address(s))), + (lower_32_bits(sg_dma_address(s))), (sg_dma_len(s))); remseg--; } @@ -3224,13 +3214,13 @@ qla1280_32bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp) if (cnt == 7) break; *dword_ptr++ = - cpu_to_le32(pci_dma_lo32(sg_dma_address(s))); + cpu_to_le32(lower_32_bits(sg_dma_address(s))); *dword_ptr++ = cpu_to_le32(sg_dma_len(s)); dprintk(1, "S/G Segment Cont. phys_addr=0x%x, " "len=0x%x\n", - cpu_to_le32(pci_dma_lo32(sg_dma_address(s))), + cpu_to_le32(lower_32_bits(sg_dma_address(s))), cpu_to_le32(sg_dma_len(s))); } remseg -= cnt; @@ -4213,7 +4203,6 @@ static struct scsi_host_template qla1280_driver_template = { .can_queue = MAX_OUTSTANDING_COMMANDS, .this_id = -1, .sg_tablesize = SG_ALL, - .use_clustering = ENABLE_CLUSTERING, }; diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 0bb9ac6ece92..00444dc79756 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -2712,6 +2712,8 @@ qla24xx_vport_delete(struct fc_vport *fc_vport) test_bit(FCPORT_UPDATE_NEEDED, &vha->dpc_flags)) msleep(1000); + qla_nvme_delete(vha); + qla24xx_disable_vp(vha); qla2x00_wait_for_sess_deletion(vha); diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index eb59c796a795..364bb52ed2a6 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -237,15 +237,13 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport, qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); sp->done = qla2x00_async_login_sp_done; - if (N2N_TOPO(fcport->vha->hw) && fcport_is_bigger(fcport)) { + if (N2N_TOPO(fcport->vha->hw) && fcport_is_bigger(fcport)) lio->u.logio.flags |= SRB_LOGIN_PRLI_ONLY; - } else { + else lio->u.logio.flags |= SRB_LOGIN_COND_PLOGI; - if (fcport->fc4f_nvme) - lio->u.logio.flags |= SRB_LOGIN_SKIP_PRLI; - - } + if (fcport->fc4f_nvme) + lio->u.logio.flags |= SRB_LOGIN_SKIP_PRLI; ql_dbg(ql_dbg_disc, vha, 0x2072, "Async-login - %8phC hdl=%x, loopid=%x portid=%02x%02x%02x " diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index d620f4bebcd0..099d8e9851cb 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -507,6 +507,7 @@ qla24xx_create_vhost(struct fc_vport *fc_vport) qla2x00_start_timer(vha, WATCH_INTERVAL); vha->req = base_vha->req; + vha->flags.nvme_enabled = base_vha->flags.nvme_enabled; host->can_queue = base_vha->req->length + 128; host->cmd_per_lun = 3; if (IS_T10_PI_CAPABLE(ha) && ql2xenabledif) diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 7e78e7eff783..39d892bbd219 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -272,17 +272,6 @@ static void qla_nvme_fcp_abort(struct nvme_fc_local_port *lport, schedule_work(&priv->abort_work); } -static void qla_nvme_poll(struct nvme_fc_local_port *lport, void *hw_queue_handle) -{ - struct qla_qpair *qpair = hw_queue_handle; - unsigned long flags; - struct scsi_qla_host *vha = lport->private; - - spin_lock_irqsave(&qpair->qp_lock, flags); - qla24xx_process_response_queue(vha, qpair->rsp); - spin_unlock_irqrestore(&qpair->qp_lock, flags); -} - static inline int qla2x00_start_nvme_mq(srb_t *sp) { unsigned long flags; @@ -474,21 +463,10 @@ static int qla_nvme_post_cmd(struct nvme_fc_local_port *lport, int rval = -ENODEV; srb_t *sp; struct qla_qpair *qpair = hw_queue_handle; - struct nvme_private *priv; + struct nvme_private *priv = fd->private; struct qla_nvme_rport *qla_rport = rport->private; - if (!fd || !qpair) { - ql_log(ql_log_warn, NULL, 0x2134, - "NO NVMe request or Queue Handle\n"); - return rval; - } - - priv = fd->private; fcport = qla_rport->fcport; - if (!fcport) { - ql_log(ql_log_warn, NULL, 0x210e, "No fcport ptr\n"); - return rval; - } vha = fcport->vha; @@ -517,6 +495,7 @@ static int qla_nvme_post_cmd(struct nvme_fc_local_port *lport, sp->name = "nvme_cmd"; sp->done = qla_nvme_sp_done; sp->qpair = qpair; + sp->vha = vha; nvme = &sp->u.iocb_cmd; nvme->u.nvme.desc = fd; @@ -564,7 +543,7 @@ static void qla_nvme_remoteport_delete(struct nvme_fc_remote_port *rport) schedule_work(&fcport->free_work); } - fcport->nvme_flag &= ~(NVME_FLAG_REGISTERED | NVME_FLAG_DELETING); + fcport->nvme_flag &= ~NVME_FLAG_DELETING; ql_log(ql_log_info, fcport->vha, 0x2110, "remoteport_delete of %p completed.\n", fcport); } @@ -578,7 +557,6 @@ static struct nvme_fc_port_template qla_nvme_fc_transport = { .ls_abort = qla_nvme_ls_abort, .fcp_io = qla_nvme_post_cmd, .fcp_abort = qla_nvme_fcp_abort, - .poll_queue = qla_nvme_poll, .max_hw_queues = 8, .max_sgl_segments = 128, .max_dif_sgl_segments = 64, diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index d0ecc729a90a..ea69dafc9774 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -328,7 +328,6 @@ struct scsi_host_template qla2xxx_driver_template = { .map_queues = qla2xxx_map_queues, .this_id = -1, .cmd_per_lun = 3, - .use_clustering = ENABLE_CLUSTERING, .sg_tablesize = SG_ALL, .max_sectors = 0xFFFF, @@ -857,13 +856,9 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) } if (ha->mqenable) { - if (shost_use_blk_mq(vha->host)) { - tag = blk_mq_unique_tag(cmd->request); - hwq = blk_mq_unique_tag_to_hwq(tag); - qpair = ha->queue_pair_map[hwq]; - } else if (vha->vp_idx && vha->qpair) { - qpair = vha->qpair; - } + tag = blk_mq_unique_tag(cmd->request); + hwq = blk_mq_unique_tag_to_hwq(tag); + qpair = ha->queue_pair_map[hwq]; if (qpair) return qla2xxx_mqueuecommand(host, cmd, qpair); @@ -1464,7 +1459,7 @@ __qla2xxx_eh_generic_reset(char *name, enum nexus_wait_type type, goto eh_reset_failed; } err = 2; - if (do_reset(fcport, cmd->device->lun, cmd->request->cpu + 1) + if (do_reset(fcport, cmd->device->lun, blk_mq_rq_cpu(cmd->request) + 1) != QLA_SUCCESS) { ql_log(ql_log_warn, vha, 0x800c, "do_reset failed for cmd=%p.\n", cmd); @@ -1746,10 +1741,53 @@ qla2x00_loop_reset(scsi_qla_host_t *vha) return QLA_SUCCESS; } +static void qla2x00_abort_srb(struct qla_qpair *qp, srb_t *sp, const int res, + unsigned long *flags) + __releases(qp->qp_lock_ptr) + __acquires(qp->qp_lock_ptr) +{ + scsi_qla_host_t *vha = qp->vha; + struct qla_hw_data *ha = vha->hw; + + if (sp->type == SRB_NVME_CMD || sp->type == SRB_NVME_LS) { + if (!sp_get(sp)) { + /* got sp */ + spin_unlock_irqrestore(qp->qp_lock_ptr, *flags); + qla_nvme_abort(ha, sp, res); + spin_lock_irqsave(qp->qp_lock_ptr, *flags); + } + } else if (GET_CMD_SP(sp) && !ha->flags.eeh_busy && + !test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) && + !qla2x00_isp_reg_stat(ha) && sp->type == SRB_SCSI_CMD) { + /* + * Don't abort commands in adapter during EEH recovery as it's + * not accessible/responding. + * + * Get a reference to the sp and drop the lock. The reference + * ensures this sp->done() call and not the call in + * qla2xxx_eh_abort() ends the SCSI cmd (with result 'res'). + */ + if (!sp_get(sp)) { + int status; + + spin_unlock_irqrestore(qp->qp_lock_ptr, *flags); + status = qla2xxx_eh_abort(GET_CMD_SP(sp)); + spin_lock_irqsave(qp->qp_lock_ptr, *flags); + /* + * Get rid of extra reference caused + * by early exit from qla2xxx_eh_abort + */ + if (status == FAST_IO_FAIL) + atomic_dec(&sp->ref_count); + } + } + sp->done(sp, res); +} + static void __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res) { - int cnt, status; + int cnt; unsigned long flags; srb_t *sp; scsi_qla_host_t *vha = qp->vha; @@ -1768,50 +1806,7 @@ __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res) req->outstanding_cmds[cnt] = NULL; switch (sp->cmd_type) { case TYPE_SRB: - if (sp->type == SRB_NVME_CMD || - sp->type == SRB_NVME_LS) { - if (!sp_get(sp)) { - /* got sp */ - spin_unlock_irqrestore - (qp->qp_lock_ptr, - flags); - qla_nvme_abort(ha, sp, res); - spin_lock_irqsave - (qp->qp_lock_ptr, flags); - } - } else if (GET_CMD_SP(sp) && - !ha->flags.eeh_busy && - (!test_bit(ABORT_ISP_ACTIVE, - &vha->dpc_flags)) && - !qla2x00_isp_reg_stat(ha) && - (sp->type == SRB_SCSI_CMD)) { - /* - * Don't abort commands in adapter - * during EEH recovery as it's not - * accessible/responding. - * - * Get a reference to the sp and drop - * the lock. The reference ensures this - * sp->done() call and not the call in - * qla2xxx_eh_abort() ends the SCSI cmd - * (with result 'res'). - */ - if (!sp_get(sp)) { - spin_unlock_irqrestore - (qp->qp_lock_ptr, flags); - status = qla2xxx_eh_abort( - GET_CMD_SP(sp)); - spin_lock_irqsave - (qp->qp_lock_ptr, flags); - /* - * Get rid of extra reference caused - * by early exit from qla2xxx_eh_abort - */ - if (status == FAST_IO_FAIL) - atomic_dec(&sp->ref_count); - } - } - sp->done(sp, res); + qla2x00_abort_srb(qp, sp, res, &flags); break; case TYPE_TGT_CMD: if (!vha->hw->tgt.tgt_ops || !tgt || @@ -3159,7 +3154,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) goto probe_failed; } - if (ha->mqenable && shost_use_blk_mq(host)) { + if (ha->mqenable) { /* number of hardware queues supported by blk/scsi-mq*/ host->nr_hw_queues = ha->max_qpairs; @@ -3271,25 +3266,17 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) base_vha->mgmt_svr_loop_id, host->sg_tablesize); if (ha->mqenable) { - bool mq = false; bool startit = false; - if (QLA_TGT_MODE_ENABLED()) { - mq = true; + if (QLA_TGT_MODE_ENABLED()) startit = false; - } - if ((ql2x_ini_mode == QLA2XXX_INI_MODE_ENABLED) && - shost_use_blk_mq(host)) { - mq = true; + if (ql2x_ini_mode == QLA2XXX_INI_MODE_ENABLED) startit = true; - } - if (mq) { - /* Create start of day qpairs for Block MQ */ - for (i = 0; i < ha->max_qpairs; i++) - qla2xxx_create_qpair(base_vha, 5, 0, startit); - } + /* Create start of day qpairs for Block MQ */ + for (i = 0; i < ha->max_qpairs; i++) + qla2xxx_create_qpair(base_vha, 5, 0, startit); } if (ha->flags.running_gold_fw) @@ -3570,6 +3557,8 @@ qla2x00_delete_all_vps(struct qla_hw_data *ha, scsi_qla_host_t *base_vha) spin_unlock_irqrestore(&ha->vport_slock, flags); mutex_unlock(&ha->vport_lock); + qla_nvme_delete(vha); + fc_vport_terminate(vha->fc_vport); scsi_host_put(vha->host); @@ -4191,12 +4180,10 @@ fail_free_nvram: kfree(ha->nvram); ha->nvram = NULL; fail_free_ctx_mempool: - if (ha->ctx_mempool) - mempool_destroy(ha->ctx_mempool); + mempool_destroy(ha->ctx_mempool); ha->ctx_mempool = NULL; fail_free_srb_mempool: - if (ha->srb_mempool) - mempool_destroy(ha->srb_mempool); + mempool_destroy(ha->srb_mempool); ha->srb_mempool = NULL; fail_free_gid_list: dma_free_coherent(&ha->pdev->dev, qla2x00_gid_list_size(ha), @@ -4498,8 +4485,7 @@ qla2x00_mem_free(struct qla_hw_data *ha) dma_free_coherent(&ha->pdev->dev, MCTP_DUMP_SIZE, ha->mctp_dump, ha->mctp_dump_dma); - if (ha->srb_mempool) - mempool_destroy(ha->srb_mempool); + mempool_destroy(ha->srb_mempool); if (ha->dcbx_tlv) dma_free_coherent(&ha->pdev->dev, DCBX_TLV_DATA_SIZE, @@ -4531,8 +4517,7 @@ qla2x00_mem_free(struct qla_hw_data *ha) if (ha->async_pd) dma_pool_free(ha->s_dma_pool, ha->async_pd, ha->async_pd_dma); - if (ha->s_dma_pool) - dma_pool_destroy(ha->s_dma_pool); + dma_pool_destroy(ha->s_dma_pool); if (ha->gid_list) dma_free_coherent(&ha->pdev->dev, qla2x00_gid_list_size(ha), @@ -4553,14 +4538,11 @@ qla2x00_mem_free(struct qla_hw_data *ha) } } - if (ha->dl_dma_pool) - dma_pool_destroy(ha->dl_dma_pool); + dma_pool_destroy(ha->dl_dma_pool); - if (ha->fcp_cmnd_dma_pool) - dma_pool_destroy(ha->fcp_cmnd_dma_pool); + dma_pool_destroy(ha->fcp_cmnd_dma_pool); - if (ha->ctx_mempool) - mempool_destroy(ha->ctx_mempool); + mempool_destroy(ha->ctx_mempool); qlt_mem_free(ha); @@ -6952,11 +6934,12 @@ static int qla2xxx_map_queues(struct Scsi_Host *shost) { int rc; scsi_qla_host_t *vha = (scsi_qla_host_t *)shost->hostdata; + struct blk_mq_queue_map *qmap = &shost->tag_set.map[0]; if (USER_CTRL_IRQ(vha->hw)) - rc = blk_mq_map_queues(&shost->tag_set); + rc = blk_mq_map_queues(qmap); else - rc = blk_mq_pci_map_queues(&shost->tag_set, vha->hw->pdev, 0); + rc = blk_mq_pci_map_queues(qmap, vha->hw->pdev, 0); return rc; } @@ -7106,8 +7089,7 @@ qla2x00_module_exit(void) qla2x00_release_firmware(); kmem_cache_destroy(srb_cachep); qlt_exit(); - if (ctx_cachep) - kmem_cache_destroy(ctx_cachep); + kmem_cache_destroy(ctx_cachep); fc_release_transport(qla2xxx_transport_template); fc_release_transport(qla2xxx_transport_vport_template); } diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index c4504740f0e2..510337eac106 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -2379,20 +2379,20 @@ void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *mcmd) } if (mcmd->flags == QLA24XX_MGMT_SEND_NACK) { - if (mcmd->orig_iocb.imm_ntfy.u.isp24.status_subcode == - ELS_LOGO || - mcmd->orig_iocb.imm_ntfy.u.isp24.status_subcode == - ELS_PRLO || - mcmd->orig_iocb.imm_ntfy.u.isp24.status_subcode == - ELS_TPRLO) { + switch (mcmd->orig_iocb.imm_ntfy.u.isp24.status_subcode) { + case ELS_LOGO: + case ELS_PRLO: + case ELS_TPRLO: ql_dbg(ql_dbg_disc, vha, 0x2106, "TM response logo %phC status %#x state %#x", mcmd->sess->port_name, mcmd->fc_tm_rsp, mcmd->flags); qlt_schedule_sess_for_deletion(mcmd->sess); - } else { + break; + default: qlt_send_notify_ack(vha->hw->base_qpair, &mcmd->orig_iocb.imm_ntfy, 0, 0, 0, 0, 0, 0); + break; } } else { if (mcmd->orig_iocb.atio.u.raw.entry_type == ABTS_RECV_24XX) { @@ -2660,9 +2660,9 @@ static void qlt_load_cont_data_segments(struct qla_tgt_prm *prm) cnt < QLA_TGT_DATASEGS_PER_CONT_24XX && prm->seg_cnt; cnt++, prm->seg_cnt--) { *dword_ptr++ = - cpu_to_le32(pci_dma_lo32 + cpu_to_le32(lower_32_bits (sg_dma_address(prm->sg))); - *dword_ptr++ = cpu_to_le32(pci_dma_hi32 + *dword_ptr++ = cpu_to_le32(upper_32_bits (sg_dma_address(prm->sg))); *dword_ptr++ = cpu_to_le32(sg_dma_len(prm->sg)); @@ -2704,9 +2704,9 @@ static void qlt_load_data_segments(struct qla_tgt_prm *prm) (cnt < QLA_TGT_DATASEGS_PER_CMD_24XX) && prm->seg_cnt; cnt++, prm->seg_cnt--) { *dword_ptr++ = - cpu_to_le32(pci_dma_lo32(sg_dma_address(prm->sg))); + cpu_to_le32(lower_32_bits(sg_dma_address(prm->sg))); - *dword_ptr++ = cpu_to_le32(pci_dma_hi32( + *dword_ptr++ = cpu_to_le32(upper_32_bits( sg_dma_address(prm->sg))); *dword_ptr++ = cpu_to_le32(sg_dma_len(prm->sg)); diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index 721da593b1bc..577e1786a3f1 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -771,14 +771,6 @@ int qla2x00_wait_for_hba_online(struct scsi_qla_host *); #define FC_TM_REJECT 4 #define FC_TM_FAILED 5 -#if (BITS_PER_LONG > 32) || defined(CONFIG_HIGHMEM64G) -#define pci_dma_lo32(a) (a & 0xffffffff) -#define pci_dma_hi32(a) ((((a) >> 16)>>16) & 0xffffffff) -#else -#define pci_dma_lo32(a) (a & 0xffffffff) -#define pci_dma_hi32(a) 0 -#endif - #define QLA_TGT_SENSE_VALID(sense) ((sense != NULL) && \ (((const uint8_t *)(sense))[0] & 0x70) == 0x70) diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h index 12bafff71a1a..ca7945cb959b 100644 --- a/drivers/scsi/qla2xxx/qla_version.h +++ b/drivers/scsi/qla2xxx/qla_version.h @@ -7,7 +7,7 @@ /* * Driver version */ -#define QLA2XXX_VERSION "10.00.00.11-k" +#define QLA2XXX_VERSION "10.00.00.12-k" #define QLA_DRIVER_MAJOR_VER 10 #define QLA_DRIVER_MINOR_VER 0 diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 65053c066680..283e6b80abb5 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -108,11 +108,6 @@ static ssize_t tcm_qla2xxx_format_wwn(char *buf, size_t len, u64 wwn) b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]); } -static char *tcm_qla2xxx_get_fabric_name(void) -{ - return "qla2xxx"; -} - /* * From drivers/scsi/scsi_transport_fc.c:fc_parse_wwn */ @@ -178,11 +173,6 @@ static int tcm_qla2xxx_npiv_parse_wwn( return 0; } -static char *tcm_qla2xxx_npiv_get_fabric_name(void) -{ - return "qla2xxx_npiv"; -} - static char *tcm_qla2xxx_get_fabric_wwn(struct se_portal_group *se_tpg) { struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, @@ -964,38 +954,14 @@ static ssize_t tcm_qla2xxx_tpg_enable_show(struct config_item *item, atomic_read(&tpg->lport_tpg_enabled)); } -static void tcm_qla2xxx_depend_tpg(struct work_struct *work) -{ - struct tcm_qla2xxx_tpg *base_tpg = container_of(work, - struct tcm_qla2xxx_tpg, tpg_base_work); - struct se_portal_group *se_tpg = &base_tpg->se_tpg; - struct scsi_qla_host *base_vha = base_tpg->lport->qla_vha; - - if (!target_depend_item(&se_tpg->tpg_group.cg_item)) { - atomic_set(&base_tpg->lport_tpg_enabled, 1); - qlt_enable_vha(base_vha); - } - complete(&base_tpg->tpg_base_comp); -} - -static void tcm_qla2xxx_undepend_tpg(struct work_struct *work) -{ - struct tcm_qla2xxx_tpg *base_tpg = container_of(work, - struct tcm_qla2xxx_tpg, tpg_base_work); - struct se_portal_group *se_tpg = &base_tpg->se_tpg; - struct scsi_qla_host *base_vha = base_tpg->lport->qla_vha; - - if (!qlt_stop_phase1(base_vha->vha_tgt.qla_tgt)) { - atomic_set(&base_tpg->lport_tpg_enabled, 0); - target_undepend_item(&se_tpg->tpg_group.cg_item); - } - complete(&base_tpg->tpg_base_comp); -} - static ssize_t tcm_qla2xxx_tpg_enable_store(struct config_item *item, const char *page, size_t count) { struct se_portal_group *se_tpg = to_tpg(item); + struct se_wwn *se_wwn = se_tpg->se_tpg_wwn; + struct tcm_qla2xxx_lport *lport = container_of(se_wwn, + struct tcm_qla2xxx_lport, lport_wwn); + struct scsi_qla_host *vha = lport->qla_vha; struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, struct tcm_qla2xxx_tpg, se_tpg); unsigned long op; @@ -1014,24 +980,16 @@ static ssize_t tcm_qla2xxx_tpg_enable_store(struct config_item *item, if (atomic_read(&tpg->lport_tpg_enabled)) return -EEXIST; - INIT_WORK(&tpg->tpg_base_work, tcm_qla2xxx_depend_tpg); + atomic_set(&tpg->lport_tpg_enabled, 1); + qlt_enable_vha(vha); } else { if (!atomic_read(&tpg->lport_tpg_enabled)) return count; - INIT_WORK(&tpg->tpg_base_work, tcm_qla2xxx_undepend_tpg); + atomic_set(&tpg->lport_tpg_enabled, 0); + qlt_stop_phase1(vha->vha_tgt.qla_tgt); } - init_completion(&tpg->tpg_base_comp); - schedule_work(&tpg->tpg_base_work); - wait_for_completion(&tpg->tpg_base_comp); - if (op) { - if (!atomic_read(&tpg->lport_tpg_enabled)) - return -ENODEV; - } else { - if (atomic_read(&tpg->lport_tpg_enabled)) - return -EPERM; - } return count; } @@ -1920,14 +1878,13 @@ static struct configfs_attribute *tcm_qla2xxx_wwn_attrs[] = { static const struct target_core_fabric_ops tcm_qla2xxx_ops = { .module = THIS_MODULE, - .name = "qla2xxx", + .fabric_name = "qla2xxx", .node_acl_size = sizeof(struct tcm_qla2xxx_nacl), /* * XXX: Limit assumes single page per scatter-gather-list entry. * Current maximum is ~4.9 MB per se_cmd->t_data_sg with PAGE_SIZE=4096 */ .max_data_sg_nents = 1200, - .get_fabric_name = tcm_qla2xxx_get_fabric_name, .tpg_get_wwn = tcm_qla2xxx_get_fabric_wwn, .tpg_get_tag = tcm_qla2xxx_get_tag, .tpg_check_demo_mode = tcm_qla2xxx_check_demo_mode, @@ -1969,9 +1926,8 @@ static const struct target_core_fabric_ops tcm_qla2xxx_ops = { static const struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = { .module = THIS_MODULE, - .name = "qla2xxx_npiv", + .fabric_name = "qla2xxx_npiv", .node_acl_size = sizeof(struct tcm_qla2xxx_nacl), - .get_fabric_name = tcm_qla2xxx_npiv_get_fabric_name, .tpg_get_wwn = tcm_qla2xxx_get_fabric_wwn, .tpg_get_tag = tcm_qla2xxx_get_tag, .tpg_check_demo_mode = tcm_qla2xxx_check_demo_mode, diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h b/drivers/scsi/qla2xxx/tcm_qla2xxx.h index 7550ba2831c3..147cf6c90366 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h @@ -48,9 +48,6 @@ struct tcm_qla2xxx_tpg { struct tcm_qla2xxx_tpg_attrib tpg_attrib; /* Returned by tcm_qla2xxx_make_tpg() */ struct se_portal_group se_tpg; - /* Items for dealing with configfs_depend_item */ - struct completion tpg_base_comp; - struct work_struct tpg_base_work; }; struct tcm_qla2xxx_fc_loopid { diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 051164f755a4..949e186cc5d7 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -205,7 +205,6 @@ static struct scsi_host_template qla4xxx_driver_template = { .this_id = -1, .cmd_per_lun = 3, - .use_clustering = ENABLE_CLUSTERING, .sg_tablesize = SG_ALL, .max_sectors = 0xFFFF, @@ -4160,20 +4159,16 @@ static void qla4xxx_mem_free(struct scsi_qla_host *ha) ha->fw_dump_size = 0; /* Free srb pool. */ - if (ha->srb_mempool) - mempool_destroy(ha->srb_mempool); - + mempool_destroy(ha->srb_mempool); ha->srb_mempool = NULL; - if (ha->chap_dma_pool) - dma_pool_destroy(ha->chap_dma_pool); + dma_pool_destroy(ha->chap_dma_pool); if (ha->chap_list) vfree(ha->chap_list); ha->chap_list = NULL; - if (ha->fw_ddb_dma_pool) - dma_pool_destroy(ha->fw_ddb_dma_pool); + dma_pool_destroy(ha->fw_ddb_dma_pool); /* release io space registers */ if (is_qla8022(ha)) { diff --git a/drivers/scsi/qlogicfas.c b/drivers/scsi/qlogicfas.c index 95431d605c24..8f709002f746 100644 --- a/drivers/scsi/qlogicfas.c +++ b/drivers/scsi/qlogicfas.c @@ -193,7 +193,7 @@ static struct scsi_host_template qlogicfas_driver_template = { .can_queue = 1, .this_id = -1, .sg_tablesize = SG_ALL, - .use_clustering = DISABLE_CLUSTERING, + .dma_boundary = PAGE_SIZE - 1, }; static __init int qlogicfas_init(void) diff --git a/drivers/scsi/qlogicpti.c b/drivers/scsi/qlogicpti.c index 9d09228eee28..e35ce762d454 100644 --- a/drivers/scsi/qlogicpti.c +++ b/drivers/scsi/qlogicpti.c @@ -1287,7 +1287,6 @@ static struct scsi_host_template qpti_template = { .can_queue = QLOGICPTI_REQ_QUEUE_LEN, .this_id = 7, .sg_tablesize = QLOGICPTI_MAX_SG(QLOGICPTI_REQ_QUEUE_LEN), - .use_clustering = ENABLE_CLUSTERING, }; static const struct of_device_id qpti_match[]; diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index fc1356d101b0..7675ff0ca2ea 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -780,11 +780,8 @@ MODULE_LICENSE("GPL"); module_param(scsi_logging_level, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(scsi_logging_level, "a bit mask of logging levels"); -#ifdef CONFIG_SCSI_MQ_DEFAULT +/* This should go away in the future, it doesn't do anything anymore */ bool scsi_use_blk_mq = true; -#else -bool scsi_use_blk_mq = false; -#endif module_param_named(use_blk_mq, scsi_use_blk_mq, bool, S_IWUSR | S_IRUGO); static int __init init_scsi(void) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 60bcc6df97a9..661512bec3ac 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -3973,7 +3973,6 @@ static int scsi_debug_slave_configure(struct scsi_device *sdp) return 1; /* no resources, will be marked offline */ } sdp->hostdata = devip; - blk_queue_max_segment_size(sdp->request_queue, -1U); if (sdebug_no_uld) sdp->no_uld_attach = 1; config_cdb_len(sdp); @@ -5851,7 +5850,7 @@ static struct scsi_host_template sdebug_driver_template = { .sg_tablesize = SG_MAX_SEGMENTS, .cmd_per_lun = DEF_CMD_PER_LUN, .max_sectors = -1U, - .use_clustering = DISABLE_CLUSTERING, + .max_segment_size = -1U, .module = THIS_MODULE, .track_queue_depth = 1, }; @@ -5866,8 +5865,9 @@ static int sdebug_driver_probe(struct device *dev) sdbg_host = to_sdebug_host(dev); sdebug_driver_template.can_queue = sdebug_max_queue; - if (sdebug_clustering) - sdebug_driver_template.use_clustering = ENABLE_CLUSTERING; + if (!sdebug_clustering) + sdebug_driver_template.dma_boundary = PAGE_SIZE - 1; + hpnt = scsi_host_alloc(&sdebug_driver_template, sizeof(sdbg_host)); if (NULL == hpnt) { pr_err("scsi_host_alloc failed\n"); @@ -5881,8 +5881,7 @@ static int sdebug_driver_probe(struct device *dev) } /* Decide whether to tell scsi subsystem that we want mq */ /* Following should give the same answer for each host */ - if (shost_use_blk_mq(hpnt)) - hpnt->nr_hw_queues = submit_queues; + hpnt->nr_hw_queues = submit_queues; sdbg_host->shost = hpnt; *((struct sdebug_host_info **)hpnt->hostdata) = sdbg_host; diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index c736d61b1648..16eef068e9e9 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -297,19 +297,19 @@ enum blk_eh_timer_return scsi_times_out(struct request *req) if (rtn == BLK_EH_DONE) { /* - * For blk-mq, we must set the request state to complete now - * before sending the request to the scsi error handler. This - * will prevent a use-after-free in the event the LLD manages - * to complete the request before the error handler finishes - * processing this timed out request. + * Set the command to complete first in order to prevent a real + * completion from releasing the command while error handling + * is using it. If the command was already completed, then the + * lower level driver beat the timeout handler, and it is safe + * to return without escalating error recovery. * - * If the request was already completed, then the LLD beat the - * time out handler from transferring the request to the scsi - * error handler. In that case we can return immediately as no - * further action is required. + * If timeout handling lost the race to a real completion, the + * block layer may ignore that due to a fake timeout injection, + * so return RESET_TIMER to allow error handling another shot + * at this command. */ - if (req->q->mq_ops && !blk_mq_mark_complete(req)) - return rtn; + if (test_and_set_bit(SCMD_STATE_COMPLETE, &scmd->state)) + return BLK_EH_RESET_TIMER; if (scsi_abort_command(scmd) != SUCCESS) { set_host_byte(scmd, DID_TIME_OUT); scsi_eh_scmd_add(scmd); @@ -1932,7 +1932,7 @@ maybe_retry: static void eh_lock_door_done(struct request *req, blk_status_t status) { - __blk_put_request(req->q, req); + blk_put_request(req); } /** diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index fa6e0c3b3aa6..b13cc9288ba0 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -168,8 +168,6 @@ static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd) static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, bool unbusy) { struct scsi_device *device = cmd->device; - struct request_queue *q = device->request_queue; - unsigned long flags; SCSI_LOG_MLQUEUE(1, scmd_printk(KERN_INFO, cmd, "Inserting command %p into mlqueue\n", cmd)); @@ -190,26 +188,20 @@ static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, bool unbusy) * before blk_cleanup_queue() finishes. */ cmd->result = 0; - if (q->mq_ops) { - /* - * Before a SCSI command is dispatched, - * get_device(&sdev->sdev_gendev) is called and the host, - * target and device busy counters are increased. Since - * requeuing a request causes these actions to be repeated and - * since scsi_device_unbusy() has already been called, - * put_device(&device->sdev_gendev) must still be called. Call - * put_device() after blk_mq_requeue_request() to avoid that - * removal of the SCSI device can start before requeueing has - * happened. - */ - blk_mq_requeue_request(cmd->request, true); - put_device(&device->sdev_gendev); - return; - } - spin_lock_irqsave(q->queue_lock, flags); - blk_requeue_request(q, cmd->request); - kblockd_schedule_work(&device->requeue_work); - spin_unlock_irqrestore(q->queue_lock, flags); + + /* + * Before a SCSI command is dispatched, + * get_device(&sdev->sdev_gendev) is called and the host, + * target and device busy counters are increased. Since + * requeuing a request causes these actions to be repeated and + * since scsi_device_unbusy() has already been called, + * put_device(&device->sdev_gendev) must still be called. Call + * put_device() after blk_mq_requeue_request() to avoid that + * removal of the SCSI device can start before requeueing has + * happened. + */ + blk_mq_requeue_request(cmd->request, true); + put_device(&device->sdev_gendev); } /* @@ -370,10 +362,7 @@ void scsi_device_unbusy(struct scsi_device *sdev) static void scsi_kick_queue(struct request_queue *q) { - if (q->mq_ops) - blk_mq_run_hw_queues(q, false); - else - blk_run_queue(q); + blk_mq_run_hw_queues(q, false); } /* @@ -534,10 +523,7 @@ static void scsi_run_queue(struct request_queue *q) if (!list_empty(&sdev->host->starved_list)) scsi_starved_list_run(sdev->host); - if (q->mq_ops) - blk_mq_run_hw_queues(q, false); - else - blk_run_queue(q); + blk_mq_run_hw_queues(q, false); } void scsi_requeue_run_queue(struct work_struct *work) @@ -550,42 +536,6 @@ void scsi_requeue_run_queue(struct work_struct *work) scsi_run_queue(q); } -/* - * Function: scsi_requeue_command() - * - * Purpose: Handle post-processing of completed commands. - * - * Arguments: q - queue to operate on - * cmd - command that may need to be requeued. - * - * Returns: Nothing - * - * Notes: After command completion, there may be blocks left - * over which weren't finished by the previous command - * this can be for a number of reasons - the main one is - * I/O errors in the middle of the request, in which case - * we need to request the blocks that come after the bad - * sector. - * Notes: Upon return, cmd is a stale pointer. - */ -static void scsi_requeue_command(struct request_queue *q, struct scsi_cmnd *cmd) -{ - struct scsi_device *sdev = cmd->device; - struct request *req = cmd->request; - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); - blk_unprep_request(req); - req->special = NULL; - scsi_put_command(cmd); - blk_requeue_request(q, req); - spin_unlock_irqrestore(q->queue_lock, flags); - - scsi_run_queue(q); - - put_device(&sdev->sdev_gendev); -} - void scsi_run_host_queues(struct Scsi_Host *shost) { struct scsi_device *sdev; @@ -626,42 +576,6 @@ static void scsi_mq_uninit_cmd(struct scsi_cmnd *cmd) scsi_del_cmd_from_list(cmd); } -/* - * Function: scsi_release_buffers() - * - * Purpose: Free resources allocate for a scsi_command. - * - * Arguments: cmd - command that we are bailing. - * - * Lock status: Assumed that no lock is held upon entry. - * - * Returns: Nothing - * - * Notes: In the event that an upper level driver rejects a - * command, we must release resources allocated during - * the __init_io() function. Primarily this would involve - * the scatter-gather table. - */ -static void scsi_release_buffers(struct scsi_cmnd *cmd) -{ - if (cmd->sdb.table.nents) - sg_free_table_chained(&cmd->sdb.table, false); - - memset(&cmd->sdb, 0, sizeof(cmd->sdb)); - - if (scsi_prot_sg_count(cmd)) - sg_free_table_chained(&cmd->prot_sdb->table, false); -} - -static void scsi_release_bidi_buffers(struct scsi_cmnd *cmd) -{ - struct scsi_data_buffer *bidi_sdb = cmd->request->next_rq->special; - - sg_free_table_chained(&bidi_sdb->table, false); - kmem_cache_free(scsi_sdb_cache, bidi_sdb); - cmd->request->next_rq->special = NULL; -} - /* Returns false when no more bytes to process, true if there are more */ static bool scsi_end_request(struct request *req, blk_status_t error, unsigned int bytes, unsigned int bidi_bytes) @@ -687,46 +601,30 @@ static bool scsi_end_request(struct request *req, blk_status_t error, destroy_rcu_head(&cmd->rcu); } - if (req->mq_ctx) { - /* - * In the MQ case the command gets freed by __blk_mq_end_request, - * so we have to do all cleanup that depends on it earlier. - * - * We also can't kick the queues from irq context, so we - * will have to defer it to a workqueue. - */ - scsi_mq_uninit_cmd(cmd); - - /* - * queue is still alive, so grab the ref for preventing it - * from being cleaned up during running queue. - */ - percpu_ref_get(&q->q_usage_counter); - - __blk_mq_end_request(req, error); - - if (scsi_target(sdev)->single_lun || - !list_empty(&sdev->host->starved_list)) - kblockd_schedule_work(&sdev->requeue_work); - else - blk_mq_run_hw_queues(q, true); - - percpu_ref_put(&q->q_usage_counter); - } else { - unsigned long flags; + /* + * In the MQ case the command gets freed by __blk_mq_end_request, + * so we have to do all cleanup that depends on it earlier. + * + * We also can't kick the queues from irq context, so we + * will have to defer it to a workqueue. + */ + scsi_mq_uninit_cmd(cmd); - if (bidi_bytes) - scsi_release_bidi_buffers(cmd); - scsi_release_buffers(cmd); - scsi_put_command(cmd); + /* + * queue is still alive, so grab the ref for preventing it + * from being cleaned up during running queue. + */ + percpu_ref_get(&q->q_usage_counter); - spin_lock_irqsave(q->queue_lock, flags); - blk_finish_request(req, error); - spin_unlock_irqrestore(q->queue_lock, flags); + __blk_mq_end_request(req, error); - scsi_run_queue(q); - } + if (scsi_target(sdev)->single_lun || + !list_empty(&sdev->host->starved_list)) + kblockd_schedule_work(&sdev->requeue_work); + else + blk_mq_run_hw_queues(q, true); + percpu_ref_put(&q->q_usage_counter); put_device(&sdev->sdev_gendev); return false; } @@ -774,13 +672,7 @@ static void scsi_io_completion_reprep(struct scsi_cmnd *cmd, struct request_queue *q) { /* A new command will be prepared and issued. */ - if (q->mq_ops) { - scsi_mq_requeue_cmd(cmd); - } else { - /* Unprep request and put it back at head of the queue. */ - scsi_release_buffers(cmd); - scsi_requeue_command(q, cmd); - } + scsi_mq_requeue_cmd(cmd); } /* Helper for scsi_io_completion() when special action required. */ @@ -1120,7 +1012,8 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) scsi_io_completion_action(cmd, result); } -static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb) +static blk_status_t scsi_init_sgtable(struct request *req, + struct scsi_data_buffer *sdb) { int count; @@ -1129,7 +1022,7 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb) */ if (unlikely(sg_alloc_table_chained(&sdb->table, blk_rq_nr_phys_segments(req), sdb->table.sgl))) - return BLKPREP_DEFER; + return BLK_STS_RESOURCE; /* * Next, walk the list, and fill in the addresses and sizes of @@ -1139,7 +1032,7 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb) BUG_ON(count > sdb->table.nents); sdb->table.nents = count; sdb->length = blk_rq_payload_bytes(req); - return BLKPREP_OK; + return BLK_STS_OK; } /* @@ -1149,62 +1042,48 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb) * * Arguments: cmd - Command descriptor we wish to initialize * - * Returns: 0 on success - * BLKPREP_DEFER if the failure is retryable - * BLKPREP_KILL if the failure is fatal + * Returns: BLK_STS_OK on success + * BLK_STS_RESOURCE if the failure is retryable + * BLK_STS_IOERR if the failure is fatal */ -int scsi_init_io(struct scsi_cmnd *cmd) +blk_status_t scsi_init_io(struct scsi_cmnd *cmd) { - struct scsi_device *sdev = cmd->device; struct request *rq = cmd->request; - bool is_mq = (rq->mq_ctx != NULL); - int error = BLKPREP_KILL; + blk_status_t ret; if (WARN_ON_ONCE(!blk_rq_nr_phys_segments(rq))) - goto err_exit; + return BLK_STS_IOERR; - error = scsi_init_sgtable(rq, &cmd->sdb); - if (error) - goto err_exit; + ret = scsi_init_sgtable(rq, &cmd->sdb); + if (ret) + return ret; if (blk_bidi_rq(rq)) { - if (!rq->q->mq_ops) { - struct scsi_data_buffer *bidi_sdb = - kmem_cache_zalloc(scsi_sdb_cache, GFP_ATOMIC); - if (!bidi_sdb) { - error = BLKPREP_DEFER; - goto err_exit; - } - - rq->next_rq->special = bidi_sdb; - } - - error = scsi_init_sgtable(rq->next_rq, rq->next_rq->special); - if (error) - goto err_exit; + ret = scsi_init_sgtable(rq->next_rq, rq->next_rq->special); + if (ret) + goto out_free_sgtables; } if (blk_integrity_rq(rq)) { struct scsi_data_buffer *prot_sdb = cmd->prot_sdb; int ivecs, count; - if (prot_sdb == NULL) { + if (WARN_ON_ONCE(!prot_sdb)) { /* * This can happen if someone (e.g. multipath) * queues a command to a device on an adapter * that does not support DIX. */ - WARN_ON_ONCE(1); - error = BLKPREP_KILL; - goto err_exit; + ret = BLK_STS_IOERR; + goto out_free_sgtables; } ivecs = blk_rq_count_integrity_sg(rq->q, rq->bio); if (sg_alloc_table_chained(&prot_sdb->table, ivecs, prot_sdb->table.sgl)) { - error = BLKPREP_DEFER; - goto err_exit; + ret = BLK_STS_RESOURCE; + goto out_free_sgtables; } count = blk_rq_map_integrity_sg(rq->q, rq->bio, @@ -1216,17 +1095,10 @@ int scsi_init_io(struct scsi_cmnd *cmd) cmd->prot_sdb->table.nents = count; } - return BLKPREP_OK; -err_exit: - if (is_mq) { - scsi_mq_free_sgtables(cmd); - } else { - scsi_release_buffers(cmd); - cmd->request->special = NULL; - scsi_put_command(cmd); - put_device(&sdev->sdev_gendev); - } - return error; + return BLK_STS_OK; +out_free_sgtables: + scsi_mq_free_sgtables(cmd); + return ret; } EXPORT_SYMBOL(scsi_init_io); @@ -1312,7 +1184,8 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd) scsi_add_cmd_to_list(cmd); } -static int scsi_setup_scsi_cmnd(struct scsi_device *sdev, struct request *req) +static blk_status_t scsi_setup_scsi_cmnd(struct scsi_device *sdev, + struct request *req) { struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); @@ -1323,8 +1196,8 @@ static int scsi_setup_scsi_cmnd(struct scsi_device *sdev, struct request *req) * submit a request without an attached bio. */ if (req->bio) { - int ret = scsi_init_io(cmd); - if (unlikely(ret)) + blk_status_t ret = scsi_init_io(cmd); + if (unlikely(ret != BLK_STS_OK)) return ret; } else { BUG_ON(blk_rq_bytes(req)); @@ -1336,20 +1209,21 @@ static int scsi_setup_scsi_cmnd(struct scsi_device *sdev, struct request *req) cmd->cmnd = scsi_req(req)->cmd; cmd->transfersize = blk_rq_bytes(req); cmd->allowed = scsi_req(req)->retries; - return BLKPREP_OK; + return BLK_STS_OK; } /* * Setup a normal block command. These are simple request from filesystems * that still need to be translated to SCSI CDBs from the ULD. */ -static int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req) +static blk_status_t scsi_setup_fs_cmnd(struct scsi_device *sdev, + struct request *req) { struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); if (unlikely(sdev->handler && sdev->handler->prep_fn)) { - int ret = sdev->handler->prep_fn(sdev, req); - if (ret != BLKPREP_OK) + blk_status_t ret = sdev->handler->prep_fn(sdev, req); + if (ret != BLK_STS_OK) return ret; } @@ -1358,7 +1232,8 @@ static int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req) return scsi_cmd_to_driver(cmd)->init_command(cmd); } -static int scsi_setup_cmnd(struct scsi_device *sdev, struct request *req) +static blk_status_t scsi_setup_cmnd(struct scsi_device *sdev, + struct request *req) { struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); @@ -1375,129 +1250,48 @@ static int scsi_setup_cmnd(struct scsi_device *sdev, struct request *req) return scsi_setup_fs_cmnd(sdev, req); } -static int +static blk_status_t scsi_prep_state_check(struct scsi_device *sdev, struct request *req) { - int ret = BLKPREP_OK; - - /* - * If the device is not in running state we will reject some - * or all commands. - */ - if (unlikely(sdev->sdev_state != SDEV_RUNNING)) { - switch (sdev->sdev_state) { - case SDEV_OFFLINE: - case SDEV_TRANSPORT_OFFLINE: - /* - * If the device is offline we refuse to process any - * commands. The device must be brought online - * before trying any recovery commands. - */ - sdev_printk(KERN_ERR, sdev, - "rejecting I/O to offline device\n"); - ret = BLKPREP_KILL; - break; - case SDEV_DEL: - /* - * If the device is fully deleted, we refuse to - * process any commands as well. - */ - sdev_printk(KERN_ERR, sdev, - "rejecting I/O to dead device\n"); - ret = BLKPREP_KILL; - break; - case SDEV_BLOCK: - case SDEV_CREATED_BLOCK: - ret = BLKPREP_DEFER; - break; - case SDEV_QUIESCE: - /* - * If the devices is blocked we defer normal commands. - */ - if (req && !(req->rq_flags & RQF_PREEMPT)) - ret = BLKPREP_DEFER; - break; - default: - /* - * For any other not fully online state we only allow - * special commands. In particular any user initiated - * command is not allowed. - */ - if (req && !(req->rq_flags & RQF_PREEMPT)) - ret = BLKPREP_KILL; - break; - } - } - return ret; -} - -static int -scsi_prep_return(struct request_queue *q, struct request *req, int ret) -{ - struct scsi_device *sdev = q->queuedata; - - switch (ret) { - case BLKPREP_KILL: - case BLKPREP_INVALID: - scsi_req(req)->result = DID_NO_CONNECT << 16; - /* release the command and kill it */ - if (req->special) { - struct scsi_cmnd *cmd = req->special; - scsi_release_buffers(cmd); - scsi_put_command(cmd); - put_device(&sdev->sdev_gendev); - req->special = NULL; - } - break; - case BLKPREP_DEFER: + switch (sdev->sdev_state) { + case SDEV_OFFLINE: + case SDEV_TRANSPORT_OFFLINE: /* - * If we defer, the blk_peek_request() returns NULL, but the - * queue must be restarted, so we schedule a callback to happen - * shortly. + * If the device is offline we refuse to process any + * commands. The device must be brought online + * before trying any recovery commands. */ - if (atomic_read(&sdev->device_busy) == 0) - blk_delay_queue(q, SCSI_QUEUE_DELAY); - break; + sdev_printk(KERN_ERR, sdev, + "rejecting I/O to offline device\n"); + return BLK_STS_IOERR; + case SDEV_DEL: + /* + * If the device is fully deleted, we refuse to + * process any commands as well. + */ + sdev_printk(KERN_ERR, sdev, + "rejecting I/O to dead device\n"); + return BLK_STS_IOERR; + case SDEV_BLOCK: + case SDEV_CREATED_BLOCK: + return BLK_STS_RESOURCE; + case SDEV_QUIESCE: + /* + * If the devices is blocked we defer normal commands. + */ + if (req && !(req->rq_flags & RQF_PREEMPT)) + return BLK_STS_RESOURCE; + return BLK_STS_OK; default: - req->rq_flags |= RQF_DONTPREP; - } - - return ret; -} - -static int scsi_prep_fn(struct request_queue *q, struct request *req) -{ - struct scsi_device *sdev = q->queuedata; - struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); - int ret; - - ret = scsi_prep_state_check(sdev, req); - if (ret != BLKPREP_OK) - goto out; - - if (!req->special) { - /* Bail if we can't get a reference to the device */ - if (unlikely(!get_device(&sdev->sdev_gendev))) { - ret = BLKPREP_DEFER; - goto out; - } - - scsi_init_command(sdev, cmd); - req->special = cmd; + /* + * For any other not fully online state we only allow + * special commands. In particular any user initiated + * command is not allowed. + */ + if (req && !(req->rq_flags & RQF_PREEMPT)) + return BLK_STS_IOERR; + return BLK_STS_OK; } - - cmd->tag = req->tag; - cmd->request = req; - cmd->prot_op = SCSI_PROT_NORMAL; - - ret = scsi_setup_cmnd(sdev, req); -out: - return scsi_prep_return(q, req, ret); -} - -static void scsi_unprep_fn(struct request_queue *q, struct request *req) -{ - scsi_uninit_cmd(blk_mq_rq_to_pdu(req)); } /* @@ -1519,14 +1313,8 @@ static inline int scsi_dev_queue_ready(struct request_queue *q, /* * unblock after device_blocked iterates to zero */ - if (atomic_dec_return(&sdev->device_blocked) > 0) { - /* - * For the MQ case we take care of this in the caller. - */ - if (!q->mq_ops) - blk_delay_queue(q, SCSI_QUEUE_DELAY); + if (atomic_dec_return(&sdev->device_blocked) > 0) goto out_dec; - } SCSI_LOG_MLQUEUE(3, sdev_printk(KERN_INFO, sdev, "unblocking device at zero depth\n")); } @@ -1661,13 +1449,13 @@ out_dec: * needs to return 'not busy'. Otherwise, request stacking drivers * may hold requests forever. */ -static int scsi_lld_busy(struct request_queue *q) +static bool scsi_mq_lld_busy(struct request_queue *q) { struct scsi_device *sdev = q->queuedata; struct Scsi_Host *shost; if (blk_queue_dying(q)) - return 0; + return false; shost = sdev->host; @@ -1678,43 +1466,9 @@ static int scsi_lld_busy(struct request_queue *q) * in SCSI layer. */ if (scsi_host_in_recovery(shost) || scsi_device_is_busy(sdev)) - return 1; - - return 0; -} - -/* - * Kill a request for a dead device - */ -static void scsi_kill_request(struct request *req, struct request_queue *q) -{ - struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); - struct scsi_device *sdev; - struct scsi_target *starget; - struct Scsi_Host *shost; - - blk_start_request(req); - - scmd_printk(KERN_INFO, cmd, "killing request\n"); - - sdev = cmd->device; - starget = scsi_target(sdev); - shost = sdev->host; - scsi_init_cmd_errh(cmd); - cmd->result = DID_NO_CONNECT << 16; - atomic_inc(&cmd->device->iorequest_cnt); - - /* - * SCSI request completion path will do scsi_device_unbusy(), - * bump busy counts. To bump the counters, we need to dance - * with the locks as normal issue path does. - */ - atomic_inc(&sdev->device_busy); - atomic_inc(&shost->host_busy); - if (starget->can_queue > 0) - atomic_inc(&starget->target_busy); + return true; - blk_complete_request(req); + return false; } static void scsi_softirq_done(struct request *rq) @@ -1837,170 +1591,6 @@ static int scsi_dispatch_cmd(struct scsi_cmnd *cmd) return 0; } -/** - * scsi_done - Invoke completion on finished SCSI command. - * @cmd: The SCSI Command for which a low-level device driver (LLDD) gives - * ownership back to SCSI Core -- i.e. the LLDD has finished with it. - * - * Description: This function is the mid-level's (SCSI Core) interrupt routine, - * which regains ownership of the SCSI command (de facto) from a LLDD, and - * calls blk_complete_request() for further processing. - * - * This function is interrupt context safe. - */ -static void scsi_done(struct scsi_cmnd *cmd) -{ - trace_scsi_dispatch_cmd_done(cmd); - blk_complete_request(cmd->request); -} - -/* - * Function: scsi_request_fn() - * - * Purpose: Main strategy routine for SCSI. - * - * Arguments: q - Pointer to actual queue. - * - * Returns: Nothing - * - * Lock status: request queue lock assumed to be held when called. - * - * Note: See sd_zbc.c sd_zbc_write_lock_zone() for write order - * protection for ZBC disks. - */ -static void scsi_request_fn(struct request_queue *q) - __releases(q->queue_lock) - __acquires(q->queue_lock) -{ - struct scsi_device *sdev = q->queuedata; - struct Scsi_Host *shost; - struct scsi_cmnd *cmd; - struct request *req; - - /* - * To start with, we keep looping until the queue is empty, or until - * the host is no longer able to accept any more requests. - */ - shost = sdev->host; - for (;;) { - int rtn; - /* - * get next queueable request. We do this early to make sure - * that the request is fully prepared even if we cannot - * accept it. - */ - req = blk_peek_request(q); - if (!req) - break; - - if (unlikely(!scsi_device_online(sdev))) { - sdev_printk(KERN_ERR, sdev, - "rejecting I/O to offline device\n"); - scsi_kill_request(req, q); - continue; - } - - if (!scsi_dev_queue_ready(q, sdev)) - break; - - /* - * Remove the request from the request list. - */ - if (!(blk_queue_tagged(q) && !blk_queue_start_tag(q, req))) - blk_start_request(req); - - spin_unlock_irq(q->queue_lock); - cmd = blk_mq_rq_to_pdu(req); - if (cmd != req->special) { - printk(KERN_CRIT "impossible request in %s.\n" - "please mail a stack trace to " - "linux-scsi@vger.kernel.org\n", - __func__); - blk_dump_rq_flags(req, "foo"); - BUG(); - } - - /* - * We hit this when the driver is using a host wide - * tag map. For device level tag maps the queue_depth check - * in the device ready fn would prevent us from trying - * to allocate a tag. Since the map is a shared host resource - * we add the dev to the starved list so it eventually gets - * a run when a tag is freed. - */ - if (blk_queue_tagged(q) && !(req->rq_flags & RQF_QUEUED)) { - spin_lock_irq(shost->host_lock); - if (list_empty(&sdev->starved_entry)) - list_add_tail(&sdev->starved_entry, - &shost->starved_list); - spin_unlock_irq(shost->host_lock); - goto not_ready; - } - - if (!scsi_target_queue_ready(shost, sdev)) - goto not_ready; - - if (!scsi_host_queue_ready(q, shost, sdev)) - goto host_not_ready; - - if (sdev->simple_tags) - cmd->flags |= SCMD_TAGGED; - else - cmd->flags &= ~SCMD_TAGGED; - - /* - * Finally, initialize any error handling parameters, and set up - * the timers for timeouts. - */ - scsi_init_cmd_errh(cmd); - - /* - * Dispatch the command to the low-level driver. - */ - cmd->scsi_done = scsi_done; - rtn = scsi_dispatch_cmd(cmd); - if (rtn) { - scsi_queue_insert(cmd, rtn); - spin_lock_irq(q->queue_lock); - goto out_delay; - } - spin_lock_irq(q->queue_lock); - } - - return; - - host_not_ready: - if (scsi_target(sdev)->can_queue > 0) - atomic_dec(&scsi_target(sdev)->target_busy); - not_ready: - /* - * lock q, handle tag, requeue req, and decrement device_busy. We - * must return with queue_lock held. - * - * Decrementing device_busy without checking it is OK, as all such - * cases (host limits or settings) should run the queue at some - * later time. - */ - spin_lock_irq(q->queue_lock); - blk_requeue_request(q, req); - atomic_dec(&sdev->device_busy); -out_delay: - if (!atomic_read(&sdev->device_busy) && !scsi_device_blocked(sdev)) - blk_delay_queue(q, SCSI_QUEUE_DELAY); -} - -static inline blk_status_t prep_to_mq(int ret) -{ - switch (ret) { - case BLKPREP_OK: - return BLK_STS_OK; - case BLKPREP_DEFER: - return BLK_STS_RESOURCE; - default: - return BLK_STS_IOERR; - } -} - /* Size in bytes of the sg-list stored in the scsi-mq command-private data. */ static unsigned int scsi_mq_sgl_size(struct Scsi_Host *shost) { @@ -2008,7 +1598,7 @@ static unsigned int scsi_mq_sgl_size(struct Scsi_Host *shost) sizeof(struct scatterlist); } -static int scsi_mq_prep_fn(struct request *req) +static blk_status_t scsi_mq_prep_fn(struct request *req) { struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); struct scsi_device *sdev = req->q->queuedata; @@ -2052,8 +1642,18 @@ static int scsi_mq_prep_fn(struct request *req) static void scsi_mq_done(struct scsi_cmnd *cmd) { + if (unlikely(test_and_set_bit(SCMD_STATE_COMPLETE, &cmd->state))) + return; trace_scsi_dispatch_cmd_done(cmd); - blk_mq_complete_request(cmd->request); + + /* + * If the block layer didn't complete the request due to a timeout + * injection, scsi must clear its internal completed state so that the + * timeout handler will see it needs to escalate its own error + * recovery. + */ + if (unlikely(!blk_mq_complete_request(cmd->request))) + clear_bit(SCMD_STATE_COMPLETE, &cmd->state); } static void scsi_mq_put_budget(struct blk_mq_hw_ctx *hctx) @@ -2096,9 +1696,15 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, blk_status_t ret; int reason; - ret = prep_to_mq(scsi_prep_state_check(sdev, req)); - if (ret != BLK_STS_OK) - goto out_put_budget; + /* + * If the device is not in running state we will reject some or all + * commands. + */ + if (unlikely(sdev->sdev_state != SDEV_RUNNING)) { + ret = scsi_prep_state_check(sdev, req); + if (ret != BLK_STS_OK) + goto out_put_budget; + } ret = BLK_STS_RESOURCE; if (!scsi_target_queue_ready(shost, sdev)) @@ -2106,8 +1712,9 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, if (!scsi_host_queue_ready(q, shost, sdev)) goto out_dec_target_busy; + clear_bit(SCMD_STATE_COMPLETE, &cmd->state); if (!(req->rq_flags & RQF_DONTPREP)) { - ret = prep_to_mq(scsi_mq_prep_fn(req)); + ret = scsi_mq_prep_fn(req); if (ret != BLK_STS_OK) goto out_dec_host_busy; req->rq_flags |= RQF_DONTPREP; @@ -2208,7 +1815,7 @@ static int scsi_map_queues(struct blk_mq_tag_set *set) if (shost->hostt->map_queues) return shost->hostt->map_queues(shost); - return blk_mq_map_queues(set); + return blk_mq_map_queues(&set->map[0]); } void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q) @@ -2235,10 +1842,8 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q) blk_queue_segment_boundary(q, shost->dma_boundary); dma_set_seg_boundary(dev, shost->dma_boundary); - blk_queue_max_segment_size(q, dma_get_max_seg_size(dev)); - - if (!shost->use_clustering) - q->limits.cluster = 0; + blk_queue_max_segment_size(q, + min(shost->max_segment_size, dma_get_max_seg_size(dev))); /* * Set a reasonable default alignment: The larger of 32-byte (dword), @@ -2251,77 +1856,6 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q) } EXPORT_SYMBOL_GPL(__scsi_init_queue); -static int scsi_old_init_rq(struct request_queue *q, struct request *rq, - gfp_t gfp) -{ - struct Scsi_Host *shost = q->rq_alloc_data; - const bool unchecked_isa_dma = shost->unchecked_isa_dma; - struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); - - memset(cmd, 0, sizeof(*cmd)); - - if (unchecked_isa_dma) - cmd->flags |= SCMD_UNCHECKED_ISA_DMA; - cmd->sense_buffer = scsi_alloc_sense_buffer(unchecked_isa_dma, gfp, - NUMA_NO_NODE); - if (!cmd->sense_buffer) - goto fail; - cmd->req.sense = cmd->sense_buffer; - - if (scsi_host_get_prot(shost) >= SHOST_DIX_TYPE0_PROTECTION) { - cmd->prot_sdb = kmem_cache_zalloc(scsi_sdb_cache, gfp); - if (!cmd->prot_sdb) - goto fail_free_sense; - } - - return 0; - -fail_free_sense: - scsi_free_sense_buffer(unchecked_isa_dma, cmd->sense_buffer); -fail: - return -ENOMEM; -} - -static void scsi_old_exit_rq(struct request_queue *q, struct request *rq) -{ - struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); - - if (cmd->prot_sdb) - kmem_cache_free(scsi_sdb_cache, cmd->prot_sdb); - scsi_free_sense_buffer(cmd->flags & SCMD_UNCHECKED_ISA_DMA, - cmd->sense_buffer); -} - -struct request_queue *scsi_old_alloc_queue(struct scsi_device *sdev) -{ - struct Scsi_Host *shost = sdev->host; - struct request_queue *q; - - q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL); - if (!q) - return NULL; - q->cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size; - q->rq_alloc_data = shost; - q->request_fn = scsi_request_fn; - q->init_rq_fn = scsi_old_init_rq; - q->exit_rq_fn = scsi_old_exit_rq; - q->initialize_rq_fn = scsi_initialize_rq; - - if (blk_init_allocated_queue(q) < 0) { - blk_cleanup_queue(q); - return NULL; - } - - __scsi_init_queue(shost, q); - blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q); - blk_queue_prep_rq(q, scsi_prep_fn); - blk_queue_unprep_rq(q, scsi_unprep_fn); - blk_queue_softirq_done(q, scsi_softirq_done); - blk_queue_rq_timed_out(q, scsi_times_out); - blk_queue_lld_busy(q, scsi_lld_busy); - return q; -} - static const struct blk_mq_ops scsi_mq_ops = { .get_budget = scsi_mq_get_budget, .put_budget = scsi_mq_put_budget, @@ -2334,6 +1868,7 @@ static const struct blk_mq_ops scsi_mq_ops = { .init_request = scsi_mq_init_request, .exit_request = scsi_mq_exit_request, .initialize_rq_fn = scsi_initialize_rq, + .busy = scsi_mq_lld_busy, .map_queues = scsi_map_queues, }; @@ -2388,10 +1923,7 @@ struct scsi_device *scsi_device_from_queue(struct request_queue *q) { struct scsi_device *sdev = NULL; - if (q->mq_ops) { - if (q->mq_ops == &scsi_mq_ops) - sdev = q->queuedata; - } else if (q->request_fn == scsi_request_fn) + if (q->mq_ops == &scsi_mq_ops) sdev = q->queuedata; if (!sdev || !get_device(&sdev->sdev_gendev)) sdev = NULL; @@ -2995,39 +2527,6 @@ void sdev_evt_send_simple(struct scsi_device *sdev, EXPORT_SYMBOL_GPL(sdev_evt_send_simple); /** - * scsi_request_fn_active() - number of kernel threads inside scsi_request_fn() - * @sdev: SCSI device to count the number of scsi_request_fn() callers for. - */ -static int scsi_request_fn_active(struct scsi_device *sdev) -{ - struct request_queue *q = sdev->request_queue; - int request_fn_active; - - WARN_ON_ONCE(sdev->host->use_blk_mq); - - spin_lock_irq(q->queue_lock); - request_fn_active = q->request_fn_active; - spin_unlock_irq(q->queue_lock); - - return request_fn_active; -} - -/** - * scsi_wait_for_queuecommand() - wait for ongoing queuecommand() calls - * @sdev: SCSI device pointer. - * - * Wait until the ongoing shost->hostt->queuecommand() calls that are - * invoked from scsi_request_fn() have finished. - */ -static void scsi_wait_for_queuecommand(struct scsi_device *sdev) -{ - WARN_ON_ONCE(sdev->host->use_blk_mq); - - while (scsi_request_fn_active(sdev)) - msleep(20); -} - -/** * scsi_device_quiesce - Block user issued commands. * @sdev: scsi device to quiesce. * @@ -3150,7 +2649,6 @@ EXPORT_SYMBOL(scsi_target_resume); int scsi_internal_device_block_nowait(struct scsi_device *sdev) { struct request_queue *q = sdev->request_queue; - unsigned long flags; int err = 0; err = scsi_device_set_state(sdev, SDEV_BLOCK); @@ -3166,14 +2664,7 @@ int scsi_internal_device_block_nowait(struct scsi_device *sdev) * block layer from calling the midlayer with this device's * request queue. */ - if (q->mq_ops) { - blk_mq_quiesce_queue_nowait(q); - } else { - spin_lock_irqsave(q->queue_lock, flags); - blk_stop_queue(q); - spin_unlock_irqrestore(q->queue_lock, flags); - } - + blk_mq_quiesce_queue_nowait(q); return 0; } EXPORT_SYMBOL_GPL(scsi_internal_device_block_nowait); @@ -3204,12 +2695,8 @@ static int scsi_internal_device_block(struct scsi_device *sdev) mutex_lock(&sdev->state_mutex); err = scsi_internal_device_block_nowait(sdev); - if (err == 0) { - if (q->mq_ops) - blk_mq_quiesce_queue(q); - else - scsi_wait_for_queuecommand(sdev); - } + if (err == 0) + blk_mq_quiesce_queue(q); mutex_unlock(&sdev->state_mutex); return err; @@ -3218,15 +2705,8 @@ static int scsi_internal_device_block(struct scsi_device *sdev) void scsi_start_queue(struct scsi_device *sdev) { struct request_queue *q = sdev->request_queue; - unsigned long flags; - if (q->mq_ops) { - blk_mq_unquiesce_queue(q); - } else { - spin_lock_irqsave(q->queue_lock, flags); - blk_start_queue(q); - spin_unlock_irqrestore(q->queue_lock, flags); - } + blk_mq_unquiesce_queue(q); } /** diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 99f1db5e467e..5f21547b2ad2 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -92,7 +92,6 @@ extern void scsi_queue_insert(struct scsi_cmnd *cmd, int reason); extern void scsi_io_completion(struct scsi_cmnd *, unsigned int); extern void scsi_run_host_queues(struct Scsi_Host *shost); extern void scsi_requeue_run_queue(struct work_struct *work); -extern struct request_queue *scsi_old_alloc_queue(struct scsi_device *sdev); extern struct request_queue *scsi_mq_alloc_queue(struct scsi_device *sdev); extern void scsi_start_queue(struct scsi_device *sdev); extern int scsi_mq_setup_tags(struct Scsi_Host *shost); diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 78ca63dfba4a..dd0d516f65e2 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -266,10 +266,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, */ sdev->borken = 1; - if (shost_use_blk_mq(shost)) - sdev->request_queue = scsi_mq_alloc_queue(sdev); - else - sdev->request_queue = scsi_old_alloc_queue(sdev); + sdev->request_queue = scsi_mq_alloc_queue(sdev); if (!sdev->request_queue) { /* release fn is set up in scsi_sysfs_device_initialise, so * have to free and put manually here */ @@ -280,11 +277,6 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, WARN_ON_ONCE(!blk_get_queue(sdev->request_queue)); sdev->request_queue->queuedata = sdev; - if (!shost_use_blk_mq(sdev->host)) { - blk_queue_init_tags(sdev->request_queue, - sdev->host->cmd_per_lun, shost->bqt, - shost->hostt->tag_alloc_policy); - } scsi_change_queue_depth(sdev, sdev->host->cmd_per_lun ? sdev->host->cmd_per_lun : 1); diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 3aee9464a7bf..6a9040faed00 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -367,7 +367,6 @@ store_shost_eh_deadline(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(eh_deadline, S_IRUGO | S_IWUSR, show_shost_eh_deadline, store_shost_eh_deadline); -shost_rd_attr(use_blk_mq, "%d\n"); shost_rd_attr(unique_id, "%u\n"); shost_rd_attr(cmd_per_lun, "%hd\n"); shost_rd_attr(can_queue, "%hd\n"); @@ -386,6 +385,13 @@ show_host_busy(struct device *dev, struct device_attribute *attr, char *buf) } static DEVICE_ATTR(host_busy, S_IRUGO, show_host_busy, NULL); +static ssize_t +show_use_blk_mq(struct device *dev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "1\n"); +} +static DEVICE_ATTR(use_blk_mq, S_IRUGO, show_use_blk_mq, NULL); + static struct attribute *scsi_sysfs_shost_attrs[] = { &dev_attr_use_blk_mq.attr, &dev_attr_unique_id.attr, diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 381668fa135d..d7035270d274 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -3592,7 +3592,7 @@ fc_bsg_job_timeout(struct request *req) /* the blk_end_sync_io() doesn't check the error */ if (inflight) - __blk_complete_request(req); + blk_mq_end_request(req, BLK_STS_IOERR); return BLK_EH_DONE; } @@ -3684,14 +3684,9 @@ static void fc_bsg_goose_queue(struct fc_rport *rport) { struct request_queue *q = rport->rqst_q; - unsigned long flags; - - if (!q) - return; - spin_lock_irqsave(q->queue_lock, flags); - blk_run_queue_async(q); - spin_unlock_irqrestore(q->queue_lock, flags); + if (q) + blk_mq_run_hw_queues(q, true); } /** @@ -3759,6 +3754,37 @@ static int fc_bsg_dispatch(struct bsg_job *job) return fc_bsg_host_dispatch(shost, job); } +static blk_status_t fc_bsg_rport_prep(struct fc_rport *rport) +{ + if (rport->port_state == FC_PORTSTATE_BLOCKED && + !(rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT)) + return BLK_STS_RESOURCE; + + if (rport->port_state != FC_PORTSTATE_ONLINE) + return BLK_STS_IOERR; + + return BLK_STS_OK; +} + + +static int fc_bsg_dispatch_prep(struct bsg_job *job) +{ + struct fc_rport *rport = fc_bsg_to_rport(job); + blk_status_t ret; + + ret = fc_bsg_rport_prep(rport); + switch (ret) { + case BLK_STS_OK: + break; + case BLK_STS_RESOURCE: + return -EAGAIN; + default: + return -EIO; + } + + return fc_bsg_dispatch(job); +} + /** * fc_bsg_hostadd - Create and add the bsg hooks so we can receive requests * @shost: shost for fc_host @@ -3780,7 +3806,8 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host) snprintf(bsg_name, sizeof(bsg_name), "fc_host%d", shost->host_no); - q = bsg_setup_queue(dev, bsg_name, fc_bsg_dispatch, i->f->dd_bsg_size); + q = bsg_setup_queue(dev, bsg_name, fc_bsg_dispatch, fc_bsg_job_timeout, + i->f->dd_bsg_size); if (IS_ERR(q)) { dev_err(dev, "fc_host%d: bsg interface failed to initialize - setup queue\n", @@ -3788,26 +3815,11 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host) return PTR_ERR(q); } __scsi_init_queue(shost, q); - blk_queue_rq_timed_out(q, fc_bsg_job_timeout); blk_queue_rq_timeout(q, FC_DEFAULT_BSG_TIMEOUT); fc_host->rqst_q = q; return 0; } -static int fc_bsg_rport_prep(struct request_queue *q, struct request *req) -{ - struct fc_rport *rport = dev_to_rport(q->queuedata); - - if (rport->port_state == FC_PORTSTATE_BLOCKED && - !(rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT)) - return BLKPREP_DEFER; - - if (rport->port_state != FC_PORTSTATE_ONLINE) - return BLKPREP_KILL; - - return BLKPREP_OK; -} - /** * fc_bsg_rportadd - Create and add the bsg hooks so we can receive requests * @shost: shost that rport is attached to @@ -3825,15 +3837,13 @@ fc_bsg_rportadd(struct Scsi_Host *shost, struct fc_rport *rport) if (!i->f->bsg_request) return -ENOTSUPP; - q = bsg_setup_queue(dev, dev_name(dev), fc_bsg_dispatch, - i->f->dd_bsg_size); + q = bsg_setup_queue(dev, dev_name(dev), fc_bsg_dispatch_prep, + fc_bsg_job_timeout, i->f->dd_bsg_size); if (IS_ERR(q)) { dev_err(dev, "failed to setup bsg queue\n"); return PTR_ERR(q); } __scsi_init_queue(shost, q); - blk_queue_prep_rq(q, fc_bsg_rport_prep); - blk_queue_rq_timed_out(q, fc_bsg_job_timeout); blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT); rport->rqst_q = q; return 0; @@ -3852,10 +3862,7 @@ fc_bsg_rportadd(struct Scsi_Host *shost, struct fc_rport *rport) static void fc_bsg_remove(struct request_queue *q) { - if (q) { - bsg_unregister_queue(q); - blk_cleanup_queue(q); - } + bsg_remove_queue(q); } diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 6fd2fe210fc3..0508831d6fb9 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -37,6 +37,18 @@ #define ISCSI_TRANSPORT_VERSION "2.0-870" +#define CREATE_TRACE_POINTS +#include <trace/events/iscsi.h> + +/* + * Export tracepoint symbols to be used by other modules. + */ +EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_conn); +EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_eh); +EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_session); +EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_tcp); +EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_sw_tcp); + static int dbg_session; module_param_named(debug_session, dbg_session, int, S_IRUGO | S_IWUSR); @@ -59,6 +71,9 @@ MODULE_PARM_DESC(debug_conn, iscsi_cls_session_printk(KERN_INFO, _session, \ "%s: " dbg_fmt, \ __func__, ##arg); \ + iscsi_dbg_trace(trace_iscsi_dbg_trans_session, \ + &(_session)->dev, \ + "%s " dbg_fmt, __func__, ##arg); \ } while (0); #define ISCSI_DBG_TRANS_CONN(_conn, dbg_fmt, arg...) \ @@ -66,7 +81,10 @@ MODULE_PARM_DESC(debug_conn, if (dbg_conn) \ iscsi_cls_conn_printk(KERN_INFO, _conn, \ "%s: " dbg_fmt, \ - __func__, ##arg); \ + __func__, ##arg); \ + iscsi_dbg_trace(trace_iscsi_dbg_trans_conn, \ + &(_conn)->dev, \ + "%s " dbg_fmt, __func__, ##arg); \ } while (0); struct iscsi_internal { @@ -1542,7 +1560,7 @@ iscsi_bsg_host_add(struct Scsi_Host *shost, struct iscsi_cls_host *ihost) return -ENOTSUPP; snprintf(bsg_name, sizeof(bsg_name), "iscsi_host%d", shost->host_no); - q = bsg_setup_queue(dev, bsg_name, iscsi_bsg_host_dispatch, 0); + q = bsg_setup_queue(dev, bsg_name, iscsi_bsg_host_dispatch, NULL, 0); if (IS_ERR(q)) { shost_printk(KERN_ERR, shost, "bsg interface failed to " "initialize - no request queue\n"); @@ -1576,10 +1594,7 @@ static int iscsi_remove_host(struct transport_container *tc, struct Scsi_Host *shost = dev_to_shost(dev); struct iscsi_cls_host *ihost = shost->shost_data; - if (ihost->bsg_q) { - bsg_unregister_queue(ihost->bsg_q); - blk_cleanup_queue(ihost->bsg_q); - } + bsg_remove_queue(ihost->bsg_q); return 0; } @@ -4497,6 +4512,20 @@ int iscsi_unregister_transport(struct iscsi_transport *tt) } EXPORT_SYMBOL_GPL(iscsi_unregister_transport); +void iscsi_dbg_trace(void (*trace)(struct device *dev, struct va_format *), + struct device *dev, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + trace(dev, &vaf); + va_end(args); +} +EXPORT_SYMBOL_GPL(iscsi_dbg_trace); + static __init int iscsi_transport_init(void) { int err; diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 0a165b2b3e81..692b46937e52 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -198,7 +198,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy) if (rphy) { q = bsg_setup_queue(&rphy->dev, dev_name(&rphy->dev), - sas_smp_dispatch, 0); + sas_smp_dispatch, NULL, 0); if (IS_ERR(q)) return PTR_ERR(q); rphy->q = q; @@ -207,7 +207,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy) snprintf(name, sizeof(name), "sas_host%d", shost->host_no); q = bsg_setup_queue(&shost->shost_gendev, name, - sas_smp_dispatch, 0); + sas_smp_dispatch, NULL, 0); if (IS_ERR(q)) return PTR_ERR(q); to_sas_host_attrs(shost)->q = q; @@ -246,11 +246,7 @@ static int sas_host_remove(struct transport_container *tc, struct device *dev, struct Scsi_Host *shost = dev_to_shost(dev); struct request_queue *q = to_sas_host_attrs(shost)->q; - if (q) { - bsg_unregister_queue(q); - blk_cleanup_queue(q); - } - + bsg_remove_queue(q); return 0; } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index bd0a5c694a97..a1a44f52e0e8 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -114,7 +114,7 @@ static int sd_suspend_system(struct device *); static int sd_suspend_runtime(struct device *); static int sd_resume(struct device *); static void sd_rescan(struct device *); -static int sd_init_command(struct scsi_cmnd *SCpnt); +static blk_status_t sd_init_command(struct scsi_cmnd *SCpnt); static void sd_uninit_command(struct scsi_cmnd *SCpnt); static int sd_done(struct scsi_cmnd *); static void sd_eh_reset(struct scsi_cmnd *); @@ -751,7 +751,7 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode) blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); } -static int sd_setup_unmap_cmnd(struct scsi_cmnd *cmd) +static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd) { struct scsi_device *sdp = cmd->device; struct request *rq = cmd->request; @@ -762,7 +762,7 @@ static int sd_setup_unmap_cmnd(struct scsi_cmnd *cmd) rq->special_vec.bv_page = mempool_alloc(sd_page_pool, GFP_ATOMIC); if (!rq->special_vec.bv_page) - return BLKPREP_DEFER; + return BLK_STS_RESOURCE; clear_highpage(rq->special_vec.bv_page); rq->special_vec.bv_offset = 0; rq->special_vec.bv_len = data_len; @@ -786,7 +786,8 @@ static int sd_setup_unmap_cmnd(struct scsi_cmnd *cmd) return scsi_init_io(cmd); } -static int sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, bool unmap) +static blk_status_t sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, + bool unmap) { struct scsi_device *sdp = cmd->device; struct request *rq = cmd->request; @@ -796,7 +797,7 @@ static int sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, bool unmap) rq->special_vec.bv_page = mempool_alloc(sd_page_pool, GFP_ATOMIC); if (!rq->special_vec.bv_page) - return BLKPREP_DEFER; + return BLK_STS_RESOURCE; clear_highpage(rq->special_vec.bv_page); rq->special_vec.bv_offset = 0; rq->special_vec.bv_len = data_len; @@ -817,7 +818,8 @@ static int sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, bool unmap) return scsi_init_io(cmd); } -static int sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap) +static blk_status_t sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, + bool unmap) { struct scsi_device *sdp = cmd->device; struct request *rq = cmd->request; @@ -827,7 +829,7 @@ static int sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap) rq->special_vec.bv_page = mempool_alloc(sd_page_pool, GFP_ATOMIC); if (!rq->special_vec.bv_page) - return BLKPREP_DEFER; + return BLK_STS_RESOURCE; clear_highpage(rq->special_vec.bv_page); rq->special_vec.bv_offset = 0; rq->special_vec.bv_len = data_len; @@ -848,7 +850,7 @@ static int sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap) return scsi_init_io(cmd); } -static int sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd) +static blk_status_t sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd) { struct request *rq = cmd->request; struct scsi_device *sdp = cmd->device; @@ -866,7 +868,7 @@ static int sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd) } if (sdp->no_write_same) - return BLKPREP_INVALID; + return BLK_STS_TARGET; if (sdkp->ws16 || sector > 0xffffffff || nr_sectors > 0xffff) return sd_setup_write_same16_cmnd(cmd, false); @@ -943,7 +945,7 @@ out: * Will set up either WRITE SAME(10) or WRITE SAME(16) depending on * the preference indicated by the target device. **/ -static int sd_setup_write_same_cmnd(struct scsi_cmnd *cmd) +static blk_status_t sd_setup_write_same_cmnd(struct scsi_cmnd *cmd) { struct request *rq = cmd->request; struct scsi_device *sdp = cmd->device; @@ -952,10 +954,10 @@ static int sd_setup_write_same_cmnd(struct scsi_cmnd *cmd) sector_t sector = blk_rq_pos(rq); unsigned int nr_sectors = blk_rq_sectors(rq); unsigned int nr_bytes = blk_rq_bytes(rq); - int ret; + blk_status_t ret; if (sdkp->device->no_write_same) - return BLKPREP_INVALID; + return BLK_STS_TARGET; BUG_ON(bio_offset(bio) || bio_iovec(bio).bv_len != sdp->sector_size); @@ -996,7 +998,7 @@ static int sd_setup_write_same_cmnd(struct scsi_cmnd *cmd) return ret; } -static int sd_setup_flush_cmnd(struct scsi_cmnd *cmd) +static blk_status_t sd_setup_flush_cmnd(struct scsi_cmnd *cmd) { struct request *rq = cmd->request; @@ -1009,10 +1011,10 @@ static int sd_setup_flush_cmnd(struct scsi_cmnd *cmd) cmd->allowed = SD_MAX_RETRIES; rq->timeout = rq->q->rq_timeout * SD_FLUSH_TIMEOUT_MULTIPLIER; - return BLKPREP_OK; + return BLK_STS_OK; } -static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt) +static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt) { struct request *rq = SCpnt->request; struct scsi_device *sdp = SCpnt->device; @@ -1022,18 +1024,14 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt) sector_t threshold; unsigned int this_count = blk_rq_sectors(rq); unsigned int dif, dix; - int ret; unsigned char protect; + blk_status_t ret; ret = scsi_init_io(SCpnt); - if (ret != BLKPREP_OK) + if (ret != BLK_STS_OK) return ret; WARN_ON_ONCE(SCpnt != rq->special); - /* from here on until we're complete, any goto out - * is used for a killable error condition */ - ret = BLKPREP_KILL; - SCSI_LOG_HLQUEUE(1, scmd_printk(KERN_INFO, SCpnt, "%s: block=%llu, count=%d\n", @@ -1046,7 +1044,7 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt) blk_rq_sectors(rq))); SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, "Retry with 0x%p\n", SCpnt)); - goto out; + return BLK_STS_IOERR; } if (sdp->changed) { @@ -1055,7 +1053,7 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt) * the changed bit has been reset */ /* printk("SCSI disk has been changed or is not present. Prohibiting further I/O.\n"); */ - goto out; + return BLK_STS_IOERR; } /* @@ -1093,31 +1091,28 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt) if ((block & 1) || (blk_rq_sectors(rq) & 1)) { scmd_printk(KERN_ERR, SCpnt, "Bad block number requested\n"); - goto out; - } else { - block = block >> 1; - this_count = this_count >> 1; + return BLK_STS_IOERR; } + block = block >> 1; + this_count = this_count >> 1; } if (sdp->sector_size == 2048) { if ((block & 3) || (blk_rq_sectors(rq) & 3)) { scmd_printk(KERN_ERR, SCpnt, "Bad block number requested\n"); - goto out; - } else { - block = block >> 2; - this_count = this_count >> 2; + return BLK_STS_IOERR; } + block = block >> 2; + this_count = this_count >> 2; } if (sdp->sector_size == 4096) { if ((block & 7) || (blk_rq_sectors(rq) & 7)) { scmd_printk(KERN_ERR, SCpnt, "Bad block number requested\n"); - goto out; - } else { - block = block >> 3; - this_count = this_count >> 3; + return BLK_STS_IOERR; } + block = block >> 3; + this_count = this_count >> 3; } if (rq_data_dir(rq) == WRITE) { SCpnt->cmnd[0] = WRITE_6; @@ -1129,7 +1124,7 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt) SCpnt->cmnd[0] = READ_6; } else { scmd_printk(KERN_ERR, SCpnt, "Unknown command %d\n", req_op(rq)); - goto out; + return BLK_STS_IOERR; } SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, @@ -1149,10 +1144,8 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt) if (protect && sdkp->protection_type == T10_PI_TYPE2_PROTECTION) { SCpnt->cmnd = mempool_alloc(sd_cdb_pool, GFP_ATOMIC); - if (unlikely(SCpnt->cmnd == NULL)) { - ret = BLKPREP_DEFER; - goto out; - } + if (unlikely(!SCpnt->cmnd)) + return BLK_STS_RESOURCE; SCpnt->cmd_len = SD_EXT_CDB_SIZE; memset(SCpnt->cmnd, 0, SCpnt->cmd_len); @@ -1220,7 +1213,7 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt) */ scmd_printk(KERN_ERR, SCpnt, "FUA write on READ/WRITE(6) drive\n"); - goto out; + return BLK_STS_IOERR; } SCpnt->cmnd[1] |= (unsigned char) ((block >> 16) & 0x1f); @@ -1244,12 +1237,10 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt) * This indicates that the command is ready from our end to be * queued. */ - ret = BLKPREP_OK; - out: - return ret; + return BLK_STS_OK; } -static int sd_init_command(struct scsi_cmnd *cmd) +static blk_status_t sd_init_command(struct scsi_cmnd *cmd) { struct request *rq = cmd->request; @@ -1265,7 +1256,7 @@ static int sd_init_command(struct scsi_cmnd *cmd) case SD_LBP_ZERO: return sd_setup_write_same10_cmnd(cmd, false); default: - return BLKPREP_INVALID; + return BLK_STS_TARGET; } case REQ_OP_WRITE_ZEROES: return sd_setup_write_zeroes_cmnd(cmd); @@ -1280,7 +1271,7 @@ static int sd_init_command(struct scsi_cmnd *cmd) return sd_zbc_setup_reset_cmnd(cmd); default: WARN_ON_ONCE(1); - return BLKPREP_KILL; + return BLK_STS_NOTSUPP; } } diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index 1d63f3a23ffb..7f43e6839bce 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -271,7 +271,7 @@ static inline int sd_is_zoned(struct scsi_disk *sdkp) extern int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buffer); extern void sd_zbc_print_zones(struct scsi_disk *sdkp); -extern int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd); +extern blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd); extern void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, struct scsi_sense_hdr *sshdr); extern int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, @@ -288,9 +288,9 @@ static inline int sd_zbc_read_zones(struct scsi_disk *sdkp, static inline void sd_zbc_print_zones(struct scsi_disk *sdkp) {} -static inline int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd) +static inline blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd) { - return BLKPREP_INVALID; + return BLK_STS_TARGET; } static inline void sd_zbc_complete(struct scsi_cmnd *cmd, diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index e06c48c866e4..83365b29a4d8 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -185,7 +185,7 @@ static inline sector_t sd_zbc_zone_sectors(struct scsi_disk *sdkp) * * Called from sd_init_command() for a REQ_OP_ZONE_RESET request. */ -int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd) +blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd) { struct request *rq = cmd->request; struct scsi_disk *sdkp = scsi_disk(rq->rq_disk); @@ -194,14 +194,14 @@ int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd) if (!sd_is_zoned(sdkp)) /* Not a zoned device */ - return BLKPREP_KILL; + return BLK_STS_IOERR; if (sdkp->device->changed) - return BLKPREP_KILL; + return BLK_STS_IOERR; if (sector & (sd_zbc_zone_sectors(sdkp) - 1)) /* Unaligned request */ - return BLKPREP_KILL; + return BLK_STS_IOERR; cmd->cmd_len = 16; memset(cmd->cmnd, 0, cmd->cmd_len); @@ -214,7 +214,7 @@ int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd) cmd->transfersize = 0; cmd->allowed = 0; - return BLKPREP_OK; + return BLK_STS_OK; } /** diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index c6ad00703c5b..4e27460ec926 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1390,7 +1390,7 @@ sg_rq_end_io(struct request *rq, blk_status_t status) */ srp->rq = NULL; scsi_req_free_cmd(scsi_req(rq)); - __blk_put_request(rq->q, rq); + blk_put_request(rq); write_lock_irqsave(&sfp->rq_list_lock, iflags); if (unlikely(srp->orphan)) { diff --git a/drivers/scsi/sgiwd93.c b/drivers/scsi/sgiwd93.c index 5ed696dc9bbd..713bce998b0e 100644 --- a/drivers/scsi/sgiwd93.c +++ b/drivers/scsi/sgiwd93.c @@ -208,7 +208,7 @@ static struct scsi_host_template sgiwd93_template = { .this_id = 7, .sg_tablesize = SG_ALL, .cmd_per_lun = 8, - .use_clustering = DISABLE_CLUSTERING, + .dma_boundary = PAGE_SIZE - 1, }; static int sgiwd93_probe(struct platform_device *pdev) diff --git a/drivers/scsi/smartpqi/smartpqi.h b/drivers/scsi/smartpqi/smartpqi.h index e97bf2670315..af962368818b 100644 --- a/drivers/scsi/smartpqi/smartpqi.h +++ b/drivers/scsi/smartpqi/smartpqi.h @@ -21,6 +21,9 @@ #if !defined(_SMARTPQI_H) #define _SMARTPQI_H +#include <scsi/scsi_host.h> +#include <linux/bsg-lib.h> + #pragma pack(1) #define PQI_DEVICE_SIGNATURE "PQI DREG" @@ -97,6 +100,12 @@ struct pqi_ctrl_registers { struct pqi_device_registers pqi_registers; /* 4000h */ }; +#if ((HZ) < 1000) +#define PQI_HZ 1000 +#else +#define PQI_HZ (HZ) +#endif + #define PQI_DEVICE_REGISTERS_OFFSET 0x4000 enum pqi_io_path { @@ -347,6 +356,10 @@ struct pqi_event_config { #define PQI_MAX_EVENT_DESCRIPTORS 255 +#define PQI_EVENT_OFA_MEMORY_ALLOCATION 0x0 +#define PQI_EVENT_OFA_QUIESCE 0x1 +#define PQI_EVENT_OFA_CANCELLED 0x2 + struct pqi_event_response { struct pqi_iu_header header; u8 event_type; @@ -354,7 +367,17 @@ struct pqi_event_response { u8 request_acknowlege : 1; __le16 event_id; __le32 additional_event_id; - u8 data[16]; + union { + struct { + __le32 bytes_requested; + u8 reserved[12]; + } ofa_memory_allocation; + + struct { + __le16 reason; /* reason for cancellation */ + u8 reserved[14]; + } ofa_cancelled; + } data; }; struct pqi_event_acknowledge_request { @@ -389,6 +412,54 @@ struct pqi_task_management_response { u8 response_code; }; +struct pqi_vendor_general_request { + struct pqi_iu_header header; + __le16 request_id; + __le16 function_code; + union { + struct { + __le16 first_section; + __le16 last_section; + u8 reserved[48]; + } config_table_update; + + struct { + __le64 buffer_address; + __le32 buffer_length; + u8 reserved[40]; + } ofa_memory_allocation; + } data; +}; + +struct pqi_vendor_general_response { + struct pqi_iu_header header; + __le16 request_id; + __le16 function_code; + __le16 status; + u8 reserved[2]; +}; + +#define PQI_VENDOR_GENERAL_CONFIG_TABLE_UPDATE 0 +#define PQI_VENDOR_GENERAL_HOST_MEMORY_UPDATE 1 + +#define PQI_OFA_VERSION 1 +#define PQI_OFA_SIGNATURE "OFA_QRM" +#define PQI_OFA_MAX_SG_DESCRIPTORS 64 + +#define PQI_OFA_MEMORY_DESCRIPTOR_LENGTH \ + (offsetof(struct pqi_ofa_memory, sg_descriptor) + \ + (PQI_OFA_MAX_SG_DESCRIPTORS * sizeof(struct pqi_sg_descriptor))) + +struct pqi_ofa_memory { + __le64 signature; /* "OFA_QRM" */ + __le16 version; /* version of this struct(1 = 1st version) */ + u8 reserved[62]; + __le32 bytes_allocated; /* total allocated memory in bytes */ + __le16 num_memory_descriptors; + u8 reserved1[2]; + struct pqi_sg_descriptor sg_descriptor[1]; +}; + struct pqi_aio_error_info { u8 status; u8 service_response; @@ -419,6 +490,7 @@ struct pqi_raid_error_info { #define PQI_REQUEST_IU_GENERAL_ADMIN 0x60 #define PQI_REQUEST_IU_REPORT_VENDOR_EVENT_CONFIG 0x72 #define PQI_REQUEST_IU_SET_VENDOR_EVENT_CONFIG 0x73 +#define PQI_REQUEST_IU_VENDOR_GENERAL 0x75 #define PQI_REQUEST_IU_ACKNOWLEDGE_VENDOR_EVENT 0xf6 #define PQI_RESPONSE_IU_GENERAL_MANAGEMENT 0x81 @@ -430,6 +502,7 @@ struct pqi_raid_error_info { #define PQI_RESPONSE_IU_AIO_PATH_IO_ERROR 0xf3 #define PQI_RESPONSE_IU_AIO_PATH_DISABLED 0xf4 #define PQI_RESPONSE_IU_VENDOR_EVENT 0xf5 +#define PQI_RESPONSE_IU_VENDOR_GENERAL 0xf7 #define PQI_GENERAL_ADMIN_FUNCTION_REPORT_DEVICE_CAPABILITY 0x0 #define PQI_GENERAL_ADMIN_FUNCTION_CREATE_IQ 0x10 @@ -492,6 +565,7 @@ struct pqi_raid_error_info { #define PQI_EVENT_TYPE_HARDWARE 0x2 #define PQI_EVENT_TYPE_PHYSICAL_DEVICE 0x4 #define PQI_EVENT_TYPE_LOGICAL_DEVICE 0x5 +#define PQI_EVENT_TYPE_OFA 0xfb #define PQI_EVENT_TYPE_AIO_STATE_CHANGE 0xfd #define PQI_EVENT_TYPE_AIO_CONFIG_CHANGE 0xfe @@ -556,6 +630,7 @@ typedef u32 pqi_index_t; #define SOP_TASK_ATTRIBUTE_ACA 4 #define SOP_TMF_COMPLETE 0x0 +#define SOP_TMF_REJECTED 0x4 #define SOP_TMF_FUNCTION_SUCCEEDED 0x8 /* additional CDB bytes usage field codes */ @@ -644,11 +719,13 @@ struct pqi_encryption_info { #define PQI_CONFIG_TABLE_MAX_LENGTH ((u16)~0) /* configuration table section IDs */ +#define PQI_CONFIG_TABLE_ALL_SECTIONS (-1) #define PQI_CONFIG_TABLE_SECTION_GENERAL_INFO 0 #define PQI_CONFIG_TABLE_SECTION_FIRMWARE_FEATURES 1 #define PQI_CONFIG_TABLE_SECTION_FIRMWARE_ERRATA 2 #define PQI_CONFIG_TABLE_SECTION_DEBUG 3 #define PQI_CONFIG_TABLE_SECTION_HEARTBEAT 4 +#define PQI_CONFIG_TABLE_SECTION_SOFT_RESET 5 struct pqi_config_table { u8 signature[8]; /* "CFGTABLE" */ @@ -680,6 +757,18 @@ struct pqi_config_table_general_info { /* command */ }; +struct pqi_config_table_firmware_features { + struct pqi_config_table_section_header header; + __le16 num_elements; + u8 features_supported[]; +/* u8 features_requested_by_host[]; */ +/* u8 features_enabled[]; */ +}; + +#define PQI_FIRMWARE_FEATURE_OFA 0 +#define PQI_FIRMWARE_FEATURE_SMP 1 +#define PQI_FIRMWARE_FEATURE_SOFT_RESET_HANDSHAKE 11 + struct pqi_config_table_debug { struct pqi_config_table_section_header header; __le32 scratchpad; @@ -690,6 +779,22 @@ struct pqi_config_table_heartbeat { __le32 heartbeat_counter; }; +struct pqi_config_table_soft_reset { + struct pqi_config_table_section_header header; + u8 soft_reset_status; +}; + +#define PQI_SOFT_RESET_INITIATE 0x1 +#define PQI_SOFT_RESET_ABORT 0x2 + +enum pqi_soft_reset_status { + RESET_INITIATE_FIRMWARE, + RESET_INITIATE_DRIVER, + RESET_ABORT, + RESET_NORESPONSE, + RESET_TIMEDOUT +}; + union pqi_reset_register { struct { u32 reset_type : 3; @@ -808,8 +913,10 @@ struct pqi_scsi_dev { u8 scsi3addr[8]; __be64 wwid; u8 volume_id[16]; + u8 unique_id[16]; u8 is_physical_device : 1; u8 is_external_raid_device : 1; + u8 is_expander_smp_device : 1; u8 target_lun_valid : 1; u8 device_gone : 1; u8 new_device : 1; @@ -817,6 +924,7 @@ struct pqi_scsi_dev { u8 volume_offline : 1; bool aio_enabled; /* only valid for physical disks */ bool in_reset; + bool in_remove; bool device_offline; u8 vendor[8]; /* bytes 8-15 of inquiry data */ u8 model[16]; /* bytes 16-31 of inquiry data */ @@ -854,6 +962,8 @@ struct pqi_scsi_dev { #define CISS_VPD_LV_DEVICE_GEOMETRY 0xc1 /* vendor-specific page */ #define CISS_VPD_LV_BYPASS_STATUS 0xc2 /* vendor-specific page */ #define CISS_VPD_LV_STATUS 0xc3 /* vendor-specific page */ +#define SCSI_VPD_HEADER_SZ 4 +#define SCSI_VPD_DEVICE_ID_IDX 8 /* Index of page id in page */ #define VPD_PAGE (1 << 8) @@ -916,6 +1026,7 @@ struct pqi_sas_node { struct pqi_sas_port { struct list_head port_list_entry; u64 sas_address; + struct pqi_scsi_dev *device; struct sas_port *port; int next_phy_index; struct list_head phy_list_head; @@ -947,13 +1058,15 @@ struct pqi_io_request { struct list_head request_list_entry; }; -#define PQI_NUM_SUPPORTED_EVENTS 6 +#define PQI_NUM_SUPPORTED_EVENTS 7 struct pqi_event { bool pending; u8 event_type; __le16 event_id; __le32 additional_event_id; + __le32 ofa_bytes_requested; + __le16 ofa_cancel_reason; }; #define PQI_RESERVED_IO_SLOTS_LUN_RESET 1 @@ -1014,12 +1127,16 @@ struct pqi_ctrl_info { struct mutex scan_mutex; struct mutex lun_reset_mutex; + struct mutex ofa_mutex; /* serialize ofa */ bool controller_online; bool block_requests; + bool in_shutdown; + bool in_ofa; u8 inbound_spanning_supported : 1; u8 outbound_spanning_supported : 1; u8 pqi_mode_enabled : 1; u8 pqi_reset_quiesce_supported : 1; + u8 soft_reset_handshake_supported : 1; struct list_head scsi_device_list; spinlock_t scsi_device_list_lock; @@ -1040,6 +1157,7 @@ struct pqi_ctrl_info { int previous_num_interrupts; u32 previous_heartbeat_count; __le32 __iomem *heartbeat_counter; + u8 __iomem *soft_reset_status; struct timer_list heartbeat_timer; struct work_struct ctrl_offline_work; @@ -1051,6 +1169,10 @@ struct pqi_ctrl_info { struct list_head raid_bypass_retry_list; spinlock_t raid_bypass_retry_list_lock; struct work_struct raid_bypass_retry_work; + + struct pqi_ofa_memory *pqi_ofa_mem_virt_addr; + dma_addr_t pqi_ofa_mem_dma_handle; + void **pqi_ofa_chunk_virt_addr; }; enum pqi_ctrl_mode { @@ -1080,8 +1202,13 @@ enum pqi_ctrl_mode { #define BMIC_WRITE 0x27 #define BMIC_SENSE_CONTROLLER_PARAMETERS 0x64 #define BMIC_SENSE_SUBSYSTEM_INFORMATION 0x66 +#define BMIC_CSMI_PASSTHRU 0x68 #define BMIC_WRITE_HOST_WELLNESS 0xa5 #define BMIC_FLUSH_CACHE 0xc2 +#define BMIC_SET_DIAG_OPTIONS 0xf4 +#define BMIC_SENSE_DIAG_OPTIONS 0xf5 + +#define CSMI_CC_SAS_SMP_PASSTHRU 0X17 #define SA_FLUSH_CACHE 0x1 @@ -1109,6 +1236,10 @@ struct bmic_identify_controller { u8 reserved3[32]; }; +#define SA_EXPANDER_SMP_DEVICE 0x05 +/*SCSI Invalid Device Type for SAS devices*/ +#define PQI_SAS_SCSI_INVALID_DEVTYPE 0xff + struct bmic_identify_physical_device { u8 scsi_bus; /* SCSI Bus number on controller */ u8 scsi_id; /* SCSI ID on this bus */ @@ -1189,6 +1320,50 @@ struct bmic_identify_physical_device { u8 padding_to_multiple_of_512[9]; }; +struct bmic_smp_request { + u8 frame_type; + u8 function; + u8 allocated_response_length; + u8 request_length; + u8 additional_request_bytes[1016]; +}; + +struct bmic_smp_response { + u8 frame_type; + u8 function; + u8 function_result; + u8 response_length; + u8 additional_response_bytes[1016]; +}; + +struct bmic_csmi_ioctl_header { + __le32 header_length; + u8 signature[8]; + __le32 timeout; + __le32 control_code; + __le32 return_code; + __le32 length; +}; + +struct bmic_csmi_smp_passthru { + u8 phy_identifier; + u8 port_identifier; + u8 connection_rate; + u8 reserved; + __be64 destination_sas_address; + __le32 request_length; + struct bmic_smp_request request; + u8 connection_status; + u8 reserved1[3]; + __le32 response_length; + struct bmic_smp_response response; +}; + +struct bmic_csmi_smp_passthru_buffer { + struct bmic_csmi_ioctl_header ioctl_header; + struct bmic_csmi_smp_passthru parameters; +}; + struct bmic_flush_cache { u8 disable_flag; u8 system_power_action; @@ -1206,8 +1381,42 @@ enum bmic_flush_cache_shutdown_event { RESTART = 4 }; +struct bmic_diag_options { + __le32 options; +}; + #pragma pack() +static inline struct pqi_ctrl_info *shost_to_hba(struct Scsi_Host *shost) +{ + void *hostdata = shost_priv(shost); + + return *((struct pqi_ctrl_info **)hostdata); +} + +static inline bool pqi_ctrl_offline(struct pqi_ctrl_info *ctrl_info) +{ + return !ctrl_info->controller_online; +} + +static inline void pqi_ctrl_busy(struct pqi_ctrl_info *ctrl_info) +{ + atomic_inc(&ctrl_info->num_busy_threads); +} + +static inline void pqi_ctrl_unbusy(struct pqi_ctrl_info *ctrl_info) +{ + atomic_dec(&ctrl_info->num_busy_threads); +} + +static inline bool pqi_ctrl_blocked(struct pqi_ctrl_info *ctrl_info) +{ + return ctrl_info->block_requests; +} + +void pqi_sas_smp_handler(struct bsg_job *job, struct Scsi_Host *shost, + struct sas_rphy *rphy); + int pqi_add_sas_host(struct Scsi_Host *shost, struct pqi_ctrl_info *ctrl_info); void pqi_delete_sas_host(struct pqi_ctrl_info *ctrl_info); int pqi_add_sas_device(struct pqi_sas_node *pqi_sas_node, @@ -1216,6 +1425,9 @@ void pqi_remove_sas_device(struct pqi_scsi_dev *device); struct pqi_scsi_dev *pqi_find_device_by_sas_rphy( struct pqi_ctrl_info *ctrl_info, struct sas_rphy *rphy); void pqi_prep_for_scsi_done(struct scsi_cmnd *scmd); +int pqi_csmi_smp_passthru(struct pqi_ctrl_info *ctrl_info, + struct bmic_csmi_smp_passthru_buffer *buffer, size_t buffer_length, + struct pqi_raid_error_info *error_info); extern struct sas_function_template pqi_sas_transport_functions; diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index a25a07a0b7f0..e2fa3f476227 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -40,11 +40,11 @@ #define BUILD_TIMESTAMP #endif -#define DRIVER_VERSION "1.1.4-130" +#define DRIVER_VERSION "1.2.4-070" #define DRIVER_MAJOR 1 -#define DRIVER_MINOR 1 +#define DRIVER_MINOR 2 #define DRIVER_RELEASE 4 -#define DRIVER_REVISION 130 +#define DRIVER_REVISION 70 #define DRIVER_NAME "Microsemi PQI Driver (v" \ DRIVER_VERSION BUILD_TIMESTAMP ")" @@ -74,6 +74,15 @@ static int pqi_aio_submit_io(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd, u32 aio_handle, u8 *cdb, unsigned int cdb_length, struct pqi_queue_group *queue_group, struct pqi_encryption_info *encryption_info, bool raid_bypass); +static void pqi_ofa_ctrl_quiesce(struct pqi_ctrl_info *ctrl_info); +static void pqi_ofa_ctrl_unquiesce(struct pqi_ctrl_info *ctrl_info); +static int pqi_ofa_ctrl_restart(struct pqi_ctrl_info *ctrl_info); +static void pqi_ofa_setup_host_buffer(struct pqi_ctrl_info *ctrl_info, + u32 bytes_requested); +static void pqi_ofa_free_host_buffer(struct pqi_ctrl_info *ctrl_info); +static int pqi_ofa_host_memory_update(struct pqi_ctrl_info *ctrl_info); +static int pqi_device_wait_for_pending_io(struct pqi_ctrl_info *ctrl_info, + struct pqi_scsi_dev *device, unsigned long timeout_secs); /* for flags argument to pqi_submit_raid_request_synchronous() */ #define PQI_SYNC_FLAGS_INTERRUPTABLE 0x1 @@ -113,6 +122,7 @@ static unsigned int pqi_supported_event_types[] = { PQI_EVENT_TYPE_HARDWARE, PQI_EVENT_TYPE_PHYSICAL_DEVICE, PQI_EVENT_TYPE_LOGICAL_DEVICE, + PQI_EVENT_TYPE_OFA, PQI_EVENT_TYPE_AIO_STATE_CHANGE, PQI_EVENT_TYPE_AIO_CONFIG_CHANGE, }; @@ -176,16 +186,14 @@ static inline void pqi_scsi_done(struct scsi_cmnd *scmd) scmd->scsi_done(scmd); } -static inline bool pqi_scsi3addr_equal(u8 *scsi3addr1, u8 *scsi3addr2) +static inline void pqi_disable_write_same(struct scsi_device *sdev) { - return memcmp(scsi3addr1, scsi3addr2, 8) == 0; + sdev->no_write_same = 1; } -static inline struct pqi_ctrl_info *shost_to_hba(struct Scsi_Host *shost) +static inline bool pqi_scsi3addr_equal(u8 *scsi3addr1, u8 *scsi3addr2) { - void *hostdata = shost_priv(shost); - - return *((struct pqi_ctrl_info **)hostdata); + return memcmp(scsi3addr1, scsi3addr2, 8) == 0; } static inline bool pqi_is_logical_device(struct pqi_scsi_dev *device) @@ -198,11 +206,6 @@ static inline bool pqi_is_external_raid_addr(u8 *scsi3addr) return scsi3addr[2] != 0; } -static inline bool pqi_ctrl_offline(struct pqi_ctrl_info *ctrl_info) -{ - return !ctrl_info->controller_online; -} - static inline void pqi_check_ctrl_health(struct pqi_ctrl_info *ctrl_info) { if (ctrl_info->controller_online) @@ -241,11 +244,6 @@ static inline void pqi_ctrl_unblock_requests(struct pqi_ctrl_info *ctrl_info) scsi_unblock_requests(ctrl_info->scsi_host); } -static inline bool pqi_ctrl_blocked(struct pqi_ctrl_info *ctrl_info) -{ - return ctrl_info->block_requests; -} - static unsigned long pqi_wait_if_ctrl_blocked(struct pqi_ctrl_info *ctrl_info, unsigned long timeout_msecs) { @@ -275,16 +273,6 @@ static unsigned long pqi_wait_if_ctrl_blocked(struct pqi_ctrl_info *ctrl_info, return remaining_msecs; } -static inline void pqi_ctrl_busy(struct pqi_ctrl_info *ctrl_info) -{ - atomic_inc(&ctrl_info->num_busy_threads); -} - -static inline void pqi_ctrl_unbusy(struct pqi_ctrl_info *ctrl_info) -{ - atomic_dec(&ctrl_info->num_busy_threads); -} - static inline void pqi_ctrl_wait_until_quiesced(struct pqi_ctrl_info *ctrl_info) { while (atomic_read(&ctrl_info->num_busy_threads) > @@ -312,11 +300,39 @@ static inline bool pqi_device_in_reset(struct pqi_scsi_dev *device) return device->in_reset; } +static inline void pqi_ctrl_ofa_start(struct pqi_ctrl_info *ctrl_info) +{ + ctrl_info->in_ofa = true; +} + +static inline void pqi_ctrl_ofa_done(struct pqi_ctrl_info *ctrl_info) +{ + ctrl_info->in_ofa = false; +} + +static inline bool pqi_ctrl_in_ofa(struct pqi_ctrl_info *ctrl_info) +{ + return ctrl_info->in_ofa; +} + +static inline void pqi_device_remove_start(struct pqi_scsi_dev *device) +{ + device->in_remove = true; +} + +static inline bool pqi_device_in_remove(struct pqi_ctrl_info *ctrl_info, + struct pqi_scsi_dev *device) +{ + return device->in_remove & !ctrl_info->in_shutdown; +} + static inline void pqi_schedule_rescan_worker_with_delay( struct pqi_ctrl_info *ctrl_info, unsigned long delay) { if (pqi_ctrl_offline(ctrl_info)) return; + if (pqi_ctrl_in_ofa(ctrl_info)) + return; schedule_delayed_work(&ctrl_info->rescan_work, delay); } @@ -326,7 +342,7 @@ static inline void pqi_schedule_rescan_worker(struct pqi_ctrl_info *ctrl_info) pqi_schedule_rescan_worker_with_delay(ctrl_info, 0); } -#define PQI_RESCAN_WORK_DELAY (10 * HZ) +#define PQI_RESCAN_WORK_DELAY (10 * PQI_HZ) static inline void pqi_schedule_rescan_worker_delayed( struct pqi_ctrl_info *ctrl_info) @@ -347,6 +363,27 @@ static inline u32 pqi_read_heartbeat_counter(struct pqi_ctrl_info *ctrl_info) return readl(ctrl_info->heartbeat_counter); } +static inline u8 pqi_read_soft_reset_status(struct pqi_ctrl_info *ctrl_info) +{ + if (!ctrl_info->soft_reset_status) + return 0; + + return readb(ctrl_info->soft_reset_status); +} + +static inline void pqi_clear_soft_reset_status(struct pqi_ctrl_info *ctrl_info, + u8 clear) +{ + u8 status; + + if (!ctrl_info->soft_reset_status) + return; + + status = pqi_read_soft_reset_status(ctrl_info); + status &= ~clear; + writeb(status, ctrl_info->soft_reset_status); +} + static int pqi_map_single(struct pci_dev *pci_dev, struct pqi_sg_descriptor *sg_descriptor, void *buffer, size_t buffer_length, enum dma_data_direction data_direction) @@ -390,6 +427,7 @@ static int pqi_build_raid_path_request(struct pqi_ctrl_info *ctrl_info, u16 vpd_page, enum dma_data_direction *dir) { u8 *cdb; + size_t cdb_length = buffer_length; memset(request, 0, sizeof(*request)); @@ -412,7 +450,7 @@ static int pqi_build_raid_path_request(struct pqi_ctrl_info *ctrl_info, cdb[1] = 0x1; cdb[2] = (u8)vpd_page; } - cdb[4] = (u8)buffer_length; + cdb[4] = (u8)cdb_length; break; case CISS_REPORT_LOG: case CISS_REPORT_PHYS: @@ -422,32 +460,45 @@ static int pqi_build_raid_path_request(struct pqi_ctrl_info *ctrl_info, cdb[1] = CISS_REPORT_PHYS_EXTENDED; else cdb[1] = CISS_REPORT_LOG_EXTENDED; - put_unaligned_be32(buffer_length, &cdb[6]); + put_unaligned_be32(cdb_length, &cdb[6]); break; case CISS_GET_RAID_MAP: request->data_direction = SOP_READ_FLAG; cdb[0] = CISS_READ; cdb[1] = CISS_GET_RAID_MAP; - put_unaligned_be32(buffer_length, &cdb[6]); + put_unaligned_be32(cdb_length, &cdb[6]); break; case SA_FLUSH_CACHE: request->data_direction = SOP_WRITE_FLAG; cdb[0] = BMIC_WRITE; cdb[6] = BMIC_FLUSH_CACHE; - put_unaligned_be16(buffer_length, &cdb[7]); + put_unaligned_be16(cdb_length, &cdb[7]); break; + case BMIC_SENSE_DIAG_OPTIONS: + cdb_length = 0; + /* fall through */ case BMIC_IDENTIFY_CONTROLLER: case BMIC_IDENTIFY_PHYSICAL_DEVICE: request->data_direction = SOP_READ_FLAG; cdb[0] = BMIC_READ; cdb[6] = cmd; - put_unaligned_be16(buffer_length, &cdb[7]); + put_unaligned_be16(cdb_length, &cdb[7]); break; + case BMIC_SET_DIAG_OPTIONS: + cdb_length = 0; + /* fall through */ case BMIC_WRITE_HOST_WELLNESS: request->data_direction = SOP_WRITE_FLAG; cdb[0] = BMIC_WRITE; cdb[6] = cmd; - put_unaligned_be16(buffer_length, &cdb[7]); + put_unaligned_be16(cdb_length, &cdb[7]); + break; + case BMIC_CSMI_PASSTHRU: + request->data_direction = SOP_BIDIRECTIONAL; + cdb[0] = BMIC_WRITE; + cdb[5] = CSMI_CC_SAS_SMP_PASSTHRU; + cdb[6] = cmd; + put_unaligned_be16(cdb_length, &cdb[7]); break; default: dev_err(&ctrl_info->pci_dev->dev, "unknown command 0x%c\n", @@ -509,43 +560,130 @@ static void pqi_free_io_request(struct pqi_io_request *io_request) atomic_dec(&io_request->refcount); } -static int pqi_identify_controller(struct pqi_ctrl_info *ctrl_info, - struct bmic_identify_controller *buffer) +static int pqi_send_scsi_raid_request(struct pqi_ctrl_info *ctrl_info, u8 cmd, + u8 *scsi3addr, void *buffer, size_t buffer_length, u16 vpd_page, + struct pqi_raid_error_info *error_info, + unsigned long timeout_msecs) { int rc; enum dma_data_direction dir; struct pqi_raid_path_request request; rc = pqi_build_raid_path_request(ctrl_info, &request, - BMIC_IDENTIFY_CONTROLLER, RAID_CTLR_LUNID, buffer, - sizeof(*buffer), 0, &dir); + cmd, scsi3addr, buffer, + buffer_length, vpd_page, &dir); if (rc) return rc; - rc = pqi_submit_raid_request_synchronous(ctrl_info, &request.header, 0, - NULL, NO_TIMEOUT); + rc = pqi_submit_raid_request_synchronous(ctrl_info, &request.header, + 0, error_info, timeout_msecs); pqi_pci_unmap(ctrl_info->pci_dev, request.sg_descriptors, 1, dir); return rc; } -static int pqi_scsi_inquiry(struct pqi_ctrl_info *ctrl_info, +/* Helper functions for pqi_send_scsi_raid_request */ + +static inline int pqi_send_ctrl_raid_request(struct pqi_ctrl_info *ctrl_info, + u8 cmd, void *buffer, size_t buffer_length) +{ + return pqi_send_scsi_raid_request(ctrl_info, cmd, RAID_CTLR_LUNID, + buffer, buffer_length, 0, NULL, NO_TIMEOUT); +} + +static inline int pqi_send_ctrl_raid_with_error(struct pqi_ctrl_info *ctrl_info, + u8 cmd, void *buffer, size_t buffer_length, + struct pqi_raid_error_info *error_info) +{ + return pqi_send_scsi_raid_request(ctrl_info, cmd, RAID_CTLR_LUNID, + buffer, buffer_length, 0, error_info, NO_TIMEOUT); +} + + +static inline int pqi_identify_controller(struct pqi_ctrl_info *ctrl_info, + struct bmic_identify_controller *buffer) +{ + return pqi_send_ctrl_raid_request(ctrl_info, BMIC_IDENTIFY_CONTROLLER, + buffer, sizeof(*buffer)); +} + +static inline int pqi_scsi_inquiry(struct pqi_ctrl_info *ctrl_info, u8 *scsi3addr, u16 vpd_page, void *buffer, size_t buffer_length) { + return pqi_send_scsi_raid_request(ctrl_info, INQUIRY, scsi3addr, + buffer, buffer_length, vpd_page, NULL, NO_TIMEOUT); +} + +static bool pqi_vpd_page_supported(struct pqi_ctrl_info *ctrl_info, + u8 *scsi3addr, u16 vpd_page) +{ int rc; - enum dma_data_direction dir; - struct pqi_raid_path_request request; + int i; + int pages; + unsigned char *buf, bufsize; - rc = pqi_build_raid_path_request(ctrl_info, &request, - INQUIRY, scsi3addr, buffer, buffer_length, vpd_page, - &dir); - if (rc) - return rc; + buf = kzalloc(256, GFP_KERNEL); + if (!buf) + return false; - rc = pqi_submit_raid_request_synchronous(ctrl_info, &request.header, 0, - NULL, NO_TIMEOUT); + /* Get the size of the page list first */ + rc = pqi_scsi_inquiry(ctrl_info, scsi3addr, + VPD_PAGE | SCSI_VPD_SUPPORTED_PAGES, + buf, SCSI_VPD_HEADER_SZ); + if (rc != 0) + goto exit_unsupported; + + pages = buf[3]; + if ((pages + SCSI_VPD_HEADER_SZ) <= 255) + bufsize = pages + SCSI_VPD_HEADER_SZ; + else + bufsize = 255; + + /* Get the whole VPD page list */ + rc = pqi_scsi_inquiry(ctrl_info, scsi3addr, + VPD_PAGE | SCSI_VPD_SUPPORTED_PAGES, + buf, bufsize); + if (rc != 0) + goto exit_unsupported; + + pages = buf[3]; + for (i = 1; i <= pages; i++) + if (buf[3 + i] == vpd_page) + goto exit_supported; + +exit_unsupported: + kfree(buf); + return false; + +exit_supported: + kfree(buf); + return true; +} + +static int pqi_get_device_id(struct pqi_ctrl_info *ctrl_info, + u8 *scsi3addr, u8 *device_id, int buflen) +{ + int rc; + unsigned char *buf; + + if (!pqi_vpd_page_supported(ctrl_info, scsi3addr, SCSI_VPD_DEVICE_ID)) + return 1; /* function not supported */ + + buf = kzalloc(64, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + rc = pqi_scsi_inquiry(ctrl_info, scsi3addr, + VPD_PAGE | SCSI_VPD_DEVICE_ID, + buf, 64); + if (rc == 0) { + if (buflen > 16) + buflen = 16; + memcpy(device_id, &buf[SCSI_VPD_DEVICE_ID_IDX], buflen); + } + + kfree(buf); - pqi_pci_unmap(ctrl_info->pci_dev, request.sg_descriptors, 1, dir); return rc; } @@ -580,9 +718,7 @@ static int pqi_flush_cache(struct pqi_ctrl_info *ctrl_info, enum bmic_flush_cache_shutdown_event shutdown_event) { int rc; - struct pqi_raid_path_request request; struct bmic_flush_cache *flush_cache; - enum dma_data_direction dir; /* * Don't bother trying to flush the cache if the controller is @@ -597,42 +733,55 @@ static int pqi_flush_cache(struct pqi_ctrl_info *ctrl_info, flush_cache->shutdown_event = shutdown_event; - rc = pqi_build_raid_path_request(ctrl_info, &request, - SA_FLUSH_CACHE, RAID_CTLR_LUNID, flush_cache, - sizeof(*flush_cache), 0, &dir); - if (rc) - goto out; - - rc = pqi_submit_raid_request_synchronous(ctrl_info, &request.header, - 0, NULL, NO_TIMEOUT); + rc = pqi_send_ctrl_raid_request(ctrl_info, SA_FLUSH_CACHE, flush_cache, + sizeof(*flush_cache)); - pqi_pci_unmap(ctrl_info->pci_dev, request.sg_descriptors, 1, dir); -out: kfree(flush_cache); return rc; } -static int pqi_write_host_wellness(struct pqi_ctrl_info *ctrl_info, - void *buffer, size_t buffer_length) +int pqi_csmi_smp_passthru(struct pqi_ctrl_info *ctrl_info, + struct bmic_csmi_smp_passthru_buffer *buffer, size_t buffer_length, + struct pqi_raid_error_info *error_info) +{ + return pqi_send_ctrl_raid_with_error(ctrl_info, BMIC_CSMI_PASSTHRU, + buffer, buffer_length, error_info); +} + +#define PQI_FETCH_PTRAID_DATA (1UL<<31) + +static int pqi_set_diag_rescan(struct pqi_ctrl_info *ctrl_info) { int rc; - struct pqi_raid_path_request request; - enum dma_data_direction dir; + struct bmic_diag_options *diag; - rc = pqi_build_raid_path_request(ctrl_info, &request, - BMIC_WRITE_HOST_WELLNESS, RAID_CTLR_LUNID, buffer, - buffer_length, 0, &dir); + diag = kzalloc(sizeof(*diag), GFP_KERNEL); + if (!diag) + return -ENOMEM; + + rc = pqi_send_ctrl_raid_request(ctrl_info, BMIC_SENSE_DIAG_OPTIONS, + diag, sizeof(*diag)); if (rc) - return rc; + goto out; - rc = pqi_submit_raid_request_synchronous(ctrl_info, &request.header, - 0, NULL, NO_TIMEOUT); + diag->options |= cpu_to_le32(PQI_FETCH_PTRAID_DATA); + + rc = pqi_send_ctrl_raid_request(ctrl_info, BMIC_SET_DIAG_OPTIONS, + diag, sizeof(*diag)); +out: + kfree(diag); - pqi_pci_unmap(ctrl_info->pci_dev, request.sg_descriptors, 1, dir); return rc; } +static inline int pqi_write_host_wellness(struct pqi_ctrl_info *ctrl_info, + void *buffer, size_t buffer_length) +{ + return pqi_send_ctrl_raid_request(ctrl_info, BMIC_WRITE_HOST_WELLNESS, + buffer, buffer_length); +} + #pragma pack(1) struct bmic_host_wellness_driver_version { @@ -640,6 +789,7 @@ struct bmic_host_wellness_driver_version { u8 driver_version_tag[2]; __le16 driver_version_length; char driver_version[32]; + u8 dont_write_tag[2]; u8 end_tag[2]; }; @@ -669,6 +819,8 @@ static int pqi_write_driver_version_to_host_wellness( strncpy(buffer->driver_version, "Linux " DRIVER_VERSION, sizeof(buffer->driver_version) - 1); buffer->driver_version[sizeof(buffer->driver_version) - 1] = '\0'; + buffer->dont_write_tag[0] = 'D'; + buffer->dont_write_tag[1] = 'W'; buffer->end_tag[0] = 'Z'; buffer->end_tag[1] = 'Z'; @@ -742,7 +894,7 @@ static int pqi_write_current_time_to_host_wellness( return rc; } -#define PQI_UPDATE_TIME_WORK_INTERVAL (24UL * 60 * 60 * HZ) +#define PQI_UPDATE_TIME_WORK_INTERVAL (24UL * 60 * 60 * PQI_HZ) static void pqi_update_time_worker(struct work_struct *work) { @@ -776,23 +928,11 @@ static inline void pqi_cancel_update_time_worker( cancel_delayed_work_sync(&ctrl_info->update_time_work); } -static int pqi_report_luns(struct pqi_ctrl_info *ctrl_info, u8 cmd, +static inline int pqi_report_luns(struct pqi_ctrl_info *ctrl_info, u8 cmd, void *buffer, size_t buffer_length) { - int rc; - enum dma_data_direction dir; - struct pqi_raid_path_request request; - - rc = pqi_build_raid_path_request(ctrl_info, &request, - cmd, RAID_CTLR_LUNID, buffer, buffer_length, 0, &dir); - if (rc) - return rc; - - rc = pqi_submit_raid_request_synchronous(ctrl_info, &request.header, 0, - NULL, NO_TIMEOUT); - - pqi_pci_unmap(ctrl_info->pci_dev, request.sg_descriptors, 1, dir); - return rc; + return pqi_send_ctrl_raid_request(ctrl_info, cmd, buffer, + buffer_length); } static int pqi_report_phys_logical_luns(struct pqi_ctrl_info *ctrl_info, u8 cmd, @@ -1010,8 +1150,6 @@ static int pqi_validate_raid_map(struct pqi_ctrl_info *ctrl_info, char *err_msg; u32 raid_map_size; u32 r5or6_blocks_per_row; - unsigned int num_phys_disks; - unsigned int num_raid_map_entries; raid_map_size = get_unaligned_le32(&raid_map->structure_size); @@ -1020,22 +1158,6 @@ static int pqi_validate_raid_map(struct pqi_ctrl_info *ctrl_info, goto bad_raid_map; } - if (raid_map_size > sizeof(*raid_map)) { - err_msg = "RAID map too large"; - goto bad_raid_map; - } - - num_phys_disks = get_unaligned_le16(&raid_map->layout_map_count) * - (get_unaligned_le16(&raid_map->data_disks_per_row) + - get_unaligned_le16(&raid_map->metadata_disks_per_row)); - num_raid_map_entries = num_phys_disks * - get_unaligned_le16(&raid_map->row_cnt); - - if (num_raid_map_entries > RAID_MAP_MAX_ENTRIES) { - err_msg = "invalid number of map entries in RAID map"; - goto bad_raid_map; - } - if (device->raid_level == SA_RAID_1) { if (get_unaligned_le16(&raid_map->layout_map_count) != 2) { err_msg = "invalid RAID-1 map"; @@ -1074,27 +1196,45 @@ static int pqi_get_raid_map(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device) { int rc; - enum dma_data_direction dir; - struct pqi_raid_path_request request; + u32 raid_map_size; struct raid_map *raid_map; raid_map = kmalloc(sizeof(*raid_map), GFP_KERNEL); if (!raid_map) return -ENOMEM; - rc = pqi_build_raid_path_request(ctrl_info, &request, - CISS_GET_RAID_MAP, device->scsi3addr, raid_map, - sizeof(*raid_map), 0, &dir); + rc = pqi_send_scsi_raid_request(ctrl_info, CISS_GET_RAID_MAP, + device->scsi3addr, raid_map, sizeof(*raid_map), + 0, NULL, NO_TIMEOUT); + if (rc) goto error; - rc = pqi_submit_raid_request_synchronous(ctrl_info, &request.header, 0, - NULL, NO_TIMEOUT); + raid_map_size = get_unaligned_le32(&raid_map->structure_size); - pqi_pci_unmap(ctrl_info->pci_dev, request.sg_descriptors, 1, dir); + if (raid_map_size > sizeof(*raid_map)) { - if (rc) - goto error; + kfree(raid_map); + + raid_map = kmalloc(raid_map_size, GFP_KERNEL); + if (!raid_map) + return -ENOMEM; + + rc = pqi_send_scsi_raid_request(ctrl_info, CISS_GET_RAID_MAP, + device->scsi3addr, raid_map, raid_map_size, + 0, NULL, NO_TIMEOUT); + if (rc) + goto error; + + if (get_unaligned_le32(&raid_map->structure_size) + != raid_map_size) { + dev_warn(&ctrl_info->pci_dev->dev, + "Requested %d bytes, received %d bytes", + raid_map_size, + get_unaligned_le32(&raid_map->structure_size)); + goto error; + } + } rc = pqi_validate_raid_map(ctrl_info, device, raid_map); if (rc) @@ -1165,6 +1305,9 @@ static void pqi_get_volume_status(struct pqi_ctrl_info *ctrl_info, if (rc) goto out; + if (vpd->page_code != CISS_VPD_LV_STATUS) + goto out; + page_length = offsetof(struct ciss_vpd_logical_volume_status, volume_status) + vpd->page_length; if (page_length < sizeof(*vpd)) @@ -1190,6 +1333,9 @@ static int pqi_get_device_info(struct pqi_ctrl_info *ctrl_info, u8 *buffer; unsigned int retries; + if (device->is_expander_smp_device) + return 0; + buffer = kmalloc(64, GFP_KERNEL); if (!buffer) return -ENOMEM; @@ -1225,6 +1371,14 @@ static int pqi_get_device_info(struct pqi_ctrl_info *ctrl_info, } } + if (pqi_get_device_id(ctrl_info, device->scsi3addr, + device->unique_id, sizeof(device->unique_id)) < 0) + dev_warn(&ctrl_info->pci_dev->dev, + "Can't get device id for scsi %d:%d:%d:%d\n", + ctrl_info->scsi_host->host_no, + device->bus, device->target, + device->lun); + out: kfree(buffer); @@ -1387,9 +1541,24 @@ static int pqi_add_device(struct pqi_ctrl_info *ctrl_info, return rc; } +#define PQI_PENDING_IO_TIMEOUT_SECS 20 + static inline void pqi_remove_device(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device) { + int rc; + + pqi_device_remove_start(device); + + rc = pqi_device_wait_for_pending_io(ctrl_info, device, + PQI_PENDING_IO_TIMEOUT_SECS); + if (rc) + dev_err(&ctrl_info->pci_dev->dev, + "scsi %d:%d:%d:%d removing device with %d outstanding commands\n", + ctrl_info->scsi_host->host_no, device->bus, + device->target, device->lun, + atomic_read(&device->scsi_cmds_outstanding)); + if (pqi_is_logical_device(device)) scsi_remove_device(device->sdev); else @@ -1454,6 +1623,14 @@ static enum pqi_find_result pqi_scsi_find_entry(struct pqi_ctrl_info *ctrl_info, return DEVICE_NOT_FOUND; } +static inline const char *pqi_device_type(struct pqi_scsi_dev *device) +{ + if (device->is_expander_smp_device) + return "Enclosure SMP "; + + return scsi_device_type(device->devtype); +} + #define PQI_DEV_INFO_BUFFER_LENGTH 128 static void pqi_dev_info(struct pqi_ctrl_info *ctrl_info, @@ -1489,7 +1666,7 @@ static void pqi_dev_info(struct pqi_ctrl_info *ctrl_info, count += snprintf(buffer + count, PQI_DEV_INFO_BUFFER_LENGTH - count, " %s %.8s %.16s ", - scsi_device_type(device->devtype), + pqi_device_type(device), device->vendor, device->model); @@ -1534,6 +1711,8 @@ static void pqi_scsi_update_device(struct pqi_scsi_dev *existing_device, existing_device->is_physical_device = new_device->is_physical_device; existing_device->is_external_raid_device = new_device->is_external_raid_device; + existing_device->is_expander_smp_device = + new_device->is_expander_smp_device; existing_device->aio_enabled = new_device->aio_enabled; memcpy(existing_device->vendor, new_device->vendor, sizeof(existing_device->vendor)); @@ -1558,6 +1737,7 @@ static void pqi_scsi_update_device(struct pqi_scsi_dev *existing_device, new_device->raid_bypass_configured; existing_device->raid_bypass_enabled = new_device->raid_bypass_enabled; + existing_device->device_offline = false; /* To prevent this from being freed later. */ new_device->raid_map = NULL; @@ -1589,6 +1769,14 @@ static inline void pqi_fixup_botched_add(struct pqi_ctrl_info *ctrl_info, device->keep_device = false; } +static inline bool pqi_is_device_added(struct pqi_scsi_dev *device) +{ + if (device->is_expander_smp_device) + return device->sas_port != NULL; + + return device->sdev != NULL; +} + static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *new_device_list[], unsigned int num_new_devices) { @@ -1674,6 +1862,9 @@ static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info, spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); + if (pqi_ctrl_in_ofa(ctrl_info)) + pqi_ctrl_ofa_done(ctrl_info); + /* Remove all devices that have gone away. */ list_for_each_entry_safe(device, next, &delete_list, delete_list_entry) { @@ -1683,7 +1874,7 @@ static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info, } else { pqi_dev_info(ctrl_info, "removed", device); } - if (device->sdev) + if (pqi_is_device_added(device)) pqi_remove_device(ctrl_info, device); list_del(&device->delete_list_entry); pqi_free_device(device); @@ -1705,7 +1896,7 @@ static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info, /* Expose any new devices. */ list_for_each_entry_safe(device, next, &add_list, add_list_entry) { - if (!device->sdev) { + if (!pqi_is_device_added(device)) { pqi_dev_info(ctrl_info, "added", device); rc = pqi_add_device(ctrl_info, device); if (rc) { @@ -1722,7 +1913,12 @@ static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info, static bool pqi_is_supported_device(struct pqi_scsi_dev *device) { - bool is_supported = false; + bool is_supported; + + if (device->is_expander_smp_device) + return true; + + is_supported = false; switch (device->devtype) { case TYPE_DISK: @@ -1756,6 +1952,30 @@ static inline bool pqi_skip_device(u8 *scsi3addr) return false; } +static inline bool pqi_is_device_with_sas_address(struct pqi_scsi_dev *device) +{ + if (!device->is_physical_device) + return false; + + if (device->is_expander_smp_device) + return true; + + switch (device->devtype) { + case TYPE_DISK: + case TYPE_ZBC: + case TYPE_ENCLOSURE: + return true; + } + + return false; +} + +static inline bool pqi_expose_device(struct pqi_scsi_dev *device) +{ + return !device->is_physical_device || + !pqi_skip_device(device->scsi3addr); +} + static int pqi_update_scsi_devices(struct pqi_ctrl_info *ctrl_info) { int i; @@ -1864,9 +2084,14 @@ static int pqi_update_scsi_devices(struct pqi_ctrl_info *ctrl_info) memcpy(device->scsi3addr, scsi3addr, sizeof(device->scsi3addr)); device->is_physical_device = is_physical_device; - if (!is_physical_device) + if (is_physical_device) { + if (phys_lun_ext_entry->device_type == + SA_EXPANDER_SMP_DEVICE) + device->is_expander_smp_device = true; + } else { device->is_external_raid_device = pqi_is_external_raid_addr(scsi3addr); + } /* Gather information about the device. */ rc = pqi_get_device_info(ctrl_info, device); @@ -1899,30 +2124,23 @@ static int pqi_update_scsi_devices(struct pqi_ctrl_info *ctrl_info) device->wwid = phys_lun_ext_entry->wwid; if ((phys_lun_ext_entry->device_flags & REPORT_PHYS_LUN_DEV_FLAG_AIO_ENABLED) && - phys_lun_ext_entry->aio_handle) + phys_lun_ext_entry->aio_handle) { device->aio_enabled = true; + device->aio_handle = + phys_lun_ext_entry->aio_handle; + } + if (device->devtype == TYPE_DISK || + device->devtype == TYPE_ZBC) { + pqi_get_physical_disk_info(ctrl_info, + device, id_phys); + } } else { memcpy(device->volume_id, log_lun_ext_entry->volume_id, sizeof(device->volume_id)); } - switch (device->devtype) { - case TYPE_DISK: - case TYPE_ZBC: - case TYPE_ENCLOSURE: - if (device->is_physical_device) { - device->sas_address = - get_unaligned_be64(&device->wwid); - if (device->devtype == TYPE_DISK || - device->devtype == TYPE_ZBC) { - device->aio_handle = - phys_lun_ext_entry->aio_handle; - pqi_get_physical_disk_info(ctrl_info, - device, id_phys); - } - } - break; - } + if (pqi_is_device_with_sas_address(device)) + device->sas_address = get_unaligned_be64(&device->wwid); new_device_list[num_valid_devices++] = device; } @@ -1965,7 +2183,7 @@ static void pqi_remove_all_scsi_devices(struct pqi_ctrl_info *ctrl_info) if (!device) break; - if (device->sdev) + if (pqi_is_device_added(device)) pqi_remove_device(ctrl_info, device); pqi_free_device(device); } @@ -1991,7 +2209,13 @@ static int pqi_scan_scsi_devices(struct pqi_ctrl_info *ctrl_info) static void pqi_scan_start(struct Scsi_Host *shost) { - pqi_scan_scsi_devices(shost_to_hba(shost)); + struct pqi_ctrl_info *ctrl_info; + + ctrl_info = shost_to_hba(shost); + if (pqi_ctrl_in_ofa(ctrl_info)) + return; + + pqi_scan_scsi_devices(ctrl_info); } /* Returns TRUE if scan is finished. */ @@ -2018,6 +2242,12 @@ static void pqi_wait_until_lun_reset_finished(struct pqi_ctrl_info *ctrl_info) mutex_unlock(&ctrl_info->lun_reset_mutex); } +static void pqi_wait_until_ofa_finished(struct pqi_ctrl_info *ctrl_info) +{ + mutex_lock(&ctrl_info->ofa_mutex); + mutex_unlock(&ctrl_info->ofa_mutex); +} + static inline void pqi_set_encryption_info( struct pqi_encryption_info *encryption_info, struct raid_map *raid_map, u64 first_block) @@ -2325,9 +2555,6 @@ static int pqi_raid_bypass_submit_scsi_cmd(struct pqi_ctrl_info *ctrl_info, (map_row * total_disks_per_row) + first_column; } - if (unlikely(map_index >= RAID_MAP_MAX_ENTRIES)) - return PQI_RAID_BYPASS_INELIGIBLE; - aio_handle = raid_map->disk_data[map_index].aio_handle; disk_block = get_unaligned_le64(&raid_map->disk_starting_blk) + first_row * strip_size + @@ -2397,7 +2624,7 @@ static int pqi_wait_for_pqi_mode_ready(struct pqi_ctrl_info *ctrl_info) u8 status; pqi_registers = ctrl_info->pqi_registers; - timeout = (PQI_MODE_READY_TIMEOUT_SECS * HZ) + jiffies; + timeout = (PQI_MODE_READY_TIMEOUT_SECS * PQI_HZ) + jiffies; while (1) { signature = readq(&pqi_registers->signature); @@ -2458,10 +2685,9 @@ static inline void pqi_take_device_offline(struct scsi_device *sdev, char *path) return; device->device_offline = true; - scsi_device_set_state(sdev, SDEV_OFFLINE); ctrl_info = shost_to_hba(sdev->host); pqi_schedule_rescan_worker(ctrl_info); - dev_err(&ctrl_info->pci_dev->dev, "offlined %s scsi %d:%d:%d:%d\n", + dev_err(&ctrl_info->pci_dev->dev, "re-scanning %s scsi %d:%d:%d:%d\n", path, ctrl_info->scsi_host->host_no, device->bus, device->target, device->lun); } @@ -2665,6 +2891,9 @@ static int pqi_interpret_task_management_response( case SOP_TMF_FUNCTION_SUCCEEDED: rc = 0; break; + case SOP_TMF_REJECTED: + rc = -EAGAIN; + break; default: rc = -EIO; break; @@ -2704,8 +2933,17 @@ static unsigned int pqi_process_io_intr(struct pqi_ctrl_info *ctrl_info, switch (response->header.iu_type) { case PQI_RESPONSE_IU_RAID_PATH_IO_SUCCESS: case PQI_RESPONSE_IU_AIO_PATH_IO_SUCCESS: + if (io_request->scmd) + io_request->scmd->result = 0; + /* fall through */ case PQI_RESPONSE_IU_GENERAL_MANAGEMENT: break; + case PQI_RESPONSE_IU_VENDOR_GENERAL: + io_request->status = + get_unaligned_le16( + &((struct pqi_vendor_general_response *) + response)->status); + break; case PQI_RESPONSE_IU_TASK_MANAGEMENT: io_request->status = pqi_interpret_task_management_response( @@ -2825,6 +3063,111 @@ static void pqi_acknowledge_event(struct pqi_ctrl_info *ctrl_info, pqi_send_event_ack(ctrl_info, &request, sizeof(request)); } +#define PQI_SOFT_RESET_STATUS_TIMEOUT_SECS 30 +#define PQI_SOFT_RESET_STATUS_POLL_INTERVAL_SECS 1 + +static enum pqi_soft_reset_status pqi_poll_for_soft_reset_status( + struct pqi_ctrl_info *ctrl_info) +{ + unsigned long timeout; + u8 status; + + timeout = (PQI_SOFT_RESET_STATUS_TIMEOUT_SECS * PQI_HZ) + jiffies; + + while (1) { + status = pqi_read_soft_reset_status(ctrl_info); + if (status & PQI_SOFT_RESET_INITIATE) + return RESET_INITIATE_DRIVER; + + if (status & PQI_SOFT_RESET_ABORT) + return RESET_ABORT; + + if (time_after(jiffies, timeout)) { + dev_err(&ctrl_info->pci_dev->dev, + "timed out waiting for soft reset status\n"); + return RESET_TIMEDOUT; + } + + if (!sis_is_firmware_running(ctrl_info)) + return RESET_NORESPONSE; + + ssleep(PQI_SOFT_RESET_STATUS_POLL_INTERVAL_SECS); + } +} + +static void pqi_process_soft_reset(struct pqi_ctrl_info *ctrl_info, + enum pqi_soft_reset_status reset_status) +{ + int rc; + + switch (reset_status) { + case RESET_INITIATE_DRIVER: + /* fall through */ + case RESET_TIMEDOUT: + dev_info(&ctrl_info->pci_dev->dev, + "resetting controller %u\n", ctrl_info->ctrl_id); + sis_soft_reset(ctrl_info); + /* fall through */ + case RESET_INITIATE_FIRMWARE: + rc = pqi_ofa_ctrl_restart(ctrl_info); + pqi_ofa_free_host_buffer(ctrl_info); + dev_info(&ctrl_info->pci_dev->dev, + "Online Firmware Activation for controller %u: %s\n", + ctrl_info->ctrl_id, rc == 0 ? "SUCCESS" : "FAILED"); + break; + case RESET_ABORT: + pqi_ofa_ctrl_unquiesce(ctrl_info); + dev_info(&ctrl_info->pci_dev->dev, + "Online Firmware Activation for controller %u: %s\n", + ctrl_info->ctrl_id, "ABORTED"); + break; + case RESET_NORESPONSE: + pqi_ofa_free_host_buffer(ctrl_info); + pqi_take_ctrl_offline(ctrl_info); + break; + } +} + +static void pqi_ofa_process_event(struct pqi_ctrl_info *ctrl_info, + struct pqi_event *event) +{ + u16 event_id; + enum pqi_soft_reset_status status; + + event_id = get_unaligned_le16(&event->event_id); + + mutex_lock(&ctrl_info->ofa_mutex); + + if (event_id == PQI_EVENT_OFA_QUIESCE) { + dev_info(&ctrl_info->pci_dev->dev, + "Received Online Firmware Activation quiesce event for controller %u\n", + ctrl_info->ctrl_id); + pqi_ofa_ctrl_quiesce(ctrl_info); + pqi_acknowledge_event(ctrl_info, event); + if (ctrl_info->soft_reset_handshake_supported) { + status = pqi_poll_for_soft_reset_status(ctrl_info); + pqi_process_soft_reset(ctrl_info, status); + } else { + pqi_process_soft_reset(ctrl_info, + RESET_INITIATE_FIRMWARE); + } + + } else if (event_id == PQI_EVENT_OFA_MEMORY_ALLOCATION) { + pqi_acknowledge_event(ctrl_info, event); + pqi_ofa_setup_host_buffer(ctrl_info, + le32_to_cpu(event->ofa_bytes_requested)); + pqi_ofa_host_memory_update(ctrl_info); + } else if (event_id == PQI_EVENT_OFA_CANCELLED) { + pqi_ofa_free_host_buffer(ctrl_info); + pqi_acknowledge_event(ctrl_info, event); + dev_info(&ctrl_info->pci_dev->dev, + "Online Firmware Activation(%u) cancel reason : %u\n", + ctrl_info->ctrl_id, event->ofa_cancel_reason); + } + + mutex_unlock(&ctrl_info->ofa_mutex); +} + static void pqi_event_worker(struct work_struct *work) { unsigned int i; @@ -2844,6 +3187,11 @@ static void pqi_event_worker(struct work_struct *work) for (i = 0; i < PQI_NUM_SUPPORTED_EVENTS; i++) { if (event->pending) { event->pending = false; + if (event->event_type == PQI_EVENT_TYPE_OFA) { + pqi_ctrl_unbusy(ctrl_info); + pqi_ofa_process_event(ctrl_info, event); + return; + } pqi_acknowledge_event(ctrl_info, event); } event++; @@ -2853,7 +3201,7 @@ out: pqi_ctrl_unbusy(ctrl_info); } -#define PQI_HEARTBEAT_TIMER_INTERVAL (10 * HZ) +#define PQI_HEARTBEAT_TIMER_INTERVAL (10 * PQI_HZ) static void pqi_heartbeat_timer_handler(struct timer_list *t) { @@ -2922,6 +3270,24 @@ static inline bool pqi_is_supported_event(unsigned int event_type) return pqi_event_type_to_event_index(event_type) != -1; } +static void pqi_ofa_capture_event_payload(struct pqi_event *event, + struct pqi_event_response *response) +{ + u16 event_id; + + event_id = get_unaligned_le16(&event->event_id); + + if (event->event_type == PQI_EVENT_TYPE_OFA) { + if (event_id == PQI_EVENT_OFA_MEMORY_ALLOCATION) { + event->ofa_bytes_requested = + response->data.ofa_memory_allocation.bytes_requested; + } else if (event_id == PQI_EVENT_OFA_CANCELLED) { + event->ofa_cancel_reason = + response->data.ofa_cancelled.reason; + } + } +} + static unsigned int pqi_process_event_intr(struct pqi_ctrl_info *ctrl_info) { unsigned int num_events; @@ -2956,6 +3322,7 @@ static unsigned int pqi_process_event_intr(struct pqi_ctrl_info *ctrl_info) event->event_id = response->event_id; event->additional_event_id = response->additional_event_id; + pqi_ofa_capture_event_payload(event, response); } } @@ -3389,7 +3756,7 @@ static int pqi_alloc_admin_queues(struct pqi_ctrl_info *ctrl_info) return 0; } -#define PQI_ADMIN_QUEUE_CREATE_TIMEOUT_JIFFIES HZ +#define PQI_ADMIN_QUEUE_CREATE_TIMEOUT_JIFFIES PQI_HZ #define PQI_ADMIN_QUEUE_CREATE_POLL_INTERVAL_MSECS 1 static int pqi_create_admin_queues(struct pqi_ctrl_info *ctrl_info) @@ -3482,7 +3849,7 @@ static int pqi_poll_for_admin_response(struct pqi_ctrl_info *ctrl_info, admin_queues = &ctrl_info->admin_queues; oq_ci = admin_queues->oq_ci_copy; - timeout = (PQI_ADMIN_REQUEST_TIMEOUT_SECS * HZ) + jiffies; + timeout = (PQI_ADMIN_REQUEST_TIMEOUT_SECS * PQI_HZ) + jiffies; while (1) { oq_pi = readl(admin_queues->oq_pi); @@ -3597,7 +3964,7 @@ static int pqi_wait_for_completion_io(struct pqi_ctrl_info *ctrl_info, while (1) { if (wait_for_completion_io_timeout(wait, - PQI_WAIT_FOR_COMPLETION_IO_TIMEOUT_SECS * HZ)) { + PQI_WAIT_FOR_COMPLETION_IO_TIMEOUT_SECS * PQI_HZ)) { rc = 0; break; } @@ -4927,7 +5294,17 @@ void pqi_prep_for_scsi_done(struct scsi_cmnd *scmd) { struct pqi_scsi_dev *device; + if (!scmd->device) { + set_host_byte(scmd, DID_NO_CONNECT); + return; + } + device = scmd->device->hostdata; + if (!device) { + set_host_byte(scmd, DID_NO_CONNECT); + return; + } + atomic_dec(&device->scsi_cmds_outstanding); } @@ -4944,16 +5321,24 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, device = scmd->device->hostdata; ctrl_info = shost_to_hba(shost); + if (!device) { + set_host_byte(scmd, DID_NO_CONNECT); + pqi_scsi_done(scmd); + return 0; + } + atomic_inc(&device->scsi_cmds_outstanding); - if (pqi_ctrl_offline(ctrl_info)) { + if (pqi_ctrl_offline(ctrl_info) || pqi_device_in_remove(ctrl_info, + device)) { set_host_byte(scmd, DID_NO_CONNECT); pqi_scsi_done(scmd); return 0; } pqi_ctrl_busy(ctrl_info); - if (pqi_ctrl_blocked(ctrl_info) || pqi_device_in_reset(device)) { + if (pqi_ctrl_blocked(ctrl_info) || pqi_device_in_reset(device) || + pqi_ctrl_in_ofa(ctrl_info)) { rc = SCSI_MLQUEUE_HOST_BUSY; goto out; } @@ -5098,25 +5483,75 @@ static void pqi_fail_io_queued_for_device(struct pqi_ctrl_info *ctrl_info, } } +static void pqi_fail_io_queued_for_all_devices(struct pqi_ctrl_info *ctrl_info) +{ + unsigned int i; + unsigned int path; + struct pqi_queue_group *queue_group; + unsigned long flags; + struct pqi_io_request *io_request; + struct pqi_io_request *next; + struct scsi_cmnd *scmd; + + for (i = 0; i < ctrl_info->num_queue_groups; i++) { + queue_group = &ctrl_info->queue_groups[i]; + + for (path = 0; path < 2; path++) { + spin_lock_irqsave(&queue_group->submit_lock[path], + flags); + + list_for_each_entry_safe(io_request, next, + &queue_group->request_list[path], + request_list_entry) { + + scmd = io_request->scmd; + if (!scmd) + continue; + + list_del(&io_request->request_list_entry); + set_host_byte(scmd, DID_RESET); + pqi_scsi_done(scmd); + } + + spin_unlock_irqrestore( + &queue_group->submit_lock[path], flags); + } + } +} + static int pqi_device_wait_for_pending_io(struct pqi_ctrl_info *ctrl_info, - struct pqi_scsi_dev *device) + struct pqi_scsi_dev *device, unsigned long timeout_secs) { + unsigned long timeout; + + timeout = (timeout_secs * PQI_HZ) + jiffies; + while (atomic_read(&device->scsi_cmds_outstanding)) { pqi_check_ctrl_health(ctrl_info); if (pqi_ctrl_offline(ctrl_info)) return -ENXIO; + if (timeout_secs != NO_TIMEOUT) { + if (time_after(jiffies, timeout)) { + dev_err(&ctrl_info->pci_dev->dev, + "timed out waiting for pending IO\n"); + return -ETIMEDOUT; + } + } usleep_range(1000, 2000); } return 0; } -static int pqi_ctrl_wait_for_pending_io(struct pqi_ctrl_info *ctrl_info) +static int pqi_ctrl_wait_for_pending_io(struct pqi_ctrl_info *ctrl_info, + unsigned long timeout_secs) { bool io_pending; unsigned long flags; + unsigned long timeout; struct pqi_scsi_dev *device; + timeout = (timeout_secs * PQI_HZ) + jiffies; while (1) { io_pending = false; @@ -5138,6 +5573,13 @@ static int pqi_ctrl_wait_for_pending_io(struct pqi_ctrl_info *ctrl_info) if (pqi_ctrl_offline(ctrl_info)) return -ENXIO; + if (timeout_secs != NO_TIMEOUT) { + if (time_after(jiffies, timeout)) { + dev_err(&ctrl_info->pci_dev->dev, + "timed out waiting for pending IO\n"); + return -ETIMEDOUT; + } + } usleep_range(1000, 2000); } @@ -5161,7 +5603,7 @@ static int pqi_wait_for_lun_reset_completion(struct pqi_ctrl_info *ctrl_info, while (1) { if (wait_for_completion_io_timeout(wait, - PQI_LUN_RESET_TIMEOUT_SECS * HZ)) { + PQI_LUN_RESET_TIMEOUT_SECS * PQI_HZ)) { rc = 0; break; } @@ -5212,20 +5654,56 @@ static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, return rc; } +#define PQI_LUN_RESET_RETRIES 3 +#define PQI_LUN_RESET_RETRY_INTERVAL_MSECS 10000 /* Performs a reset at the LUN level. */ -static int pqi_device_reset(struct pqi_ctrl_info *ctrl_info, +static int _pqi_device_reset(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device) { int rc; + unsigned int retries; + unsigned long timeout_secs; - rc = pqi_lun_reset(ctrl_info, device); - if (rc == 0) - rc = pqi_device_wait_for_pending_io(ctrl_info, device); + for (retries = 0;;) { + rc = pqi_lun_reset(ctrl_info, device); + if (rc != -EAGAIN || + ++retries > PQI_LUN_RESET_RETRIES) + break; + msleep(PQI_LUN_RESET_RETRY_INTERVAL_MSECS); + } + timeout_secs = rc ? PQI_LUN_RESET_TIMEOUT_SECS : NO_TIMEOUT; + + rc |= pqi_device_wait_for_pending_io(ctrl_info, device, timeout_secs); return rc == 0 ? SUCCESS : FAILED; } +static int pqi_device_reset(struct pqi_ctrl_info *ctrl_info, + struct pqi_scsi_dev *device) +{ + int rc; + + mutex_lock(&ctrl_info->lun_reset_mutex); + + pqi_ctrl_block_requests(ctrl_info); + pqi_ctrl_wait_until_quiesced(ctrl_info); + pqi_fail_io_queued_for_device(ctrl_info, device); + rc = pqi_wait_until_inbound_queues_empty(ctrl_info); + pqi_device_reset_start(device); + pqi_ctrl_unblock_requests(ctrl_info); + + if (rc) + rc = FAILED; + else + rc = _pqi_device_reset(ctrl_info, device); + + pqi_device_reset_done(device); + + mutex_unlock(&ctrl_info->lun_reset_mutex); + return rc; +} + static int pqi_eh_device_reset_handler(struct scsi_cmnd *scmd) { int rc; @@ -5243,28 +5721,16 @@ static int pqi_eh_device_reset_handler(struct scsi_cmnd *scmd) pqi_check_ctrl_health(ctrl_info); if (pqi_ctrl_offline(ctrl_info)) { + dev_err(&ctrl_info->pci_dev->dev, + "controller %u offlined - cannot send device reset\n", + ctrl_info->ctrl_id); rc = FAILED; goto out; } - mutex_lock(&ctrl_info->lun_reset_mutex); - - pqi_ctrl_block_requests(ctrl_info); - pqi_ctrl_wait_until_quiesced(ctrl_info); - pqi_fail_io_queued_for_device(ctrl_info, device); - rc = pqi_wait_until_inbound_queues_empty(ctrl_info); - pqi_device_reset_start(device); - pqi_ctrl_unblock_requests(ctrl_info); - - if (rc) - rc = FAILED; - else - rc = pqi_device_reset(ctrl_info, device); - - pqi_device_reset_done(device); - - mutex_unlock(&ctrl_info->lun_reset_mutex); + pqi_wait_until_ofa_finished(ctrl_info); + rc = pqi_device_reset(ctrl_info, device); out: dev_err(&ctrl_info->pci_dev->dev, "reset of scsi %d:%d:%d:%d: %s\n", @@ -5308,6 +5774,10 @@ static int pqi_slave_alloc(struct scsi_device *sdev) scsi_change_queue_depth(sdev, device->advertised_queue_depth); } + if (pqi_is_logical_device(device)) + pqi_disable_write_same(sdev); + else + sdev->allow_restart = 1; } spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); @@ -5319,7 +5789,8 @@ static int pqi_map_queues(struct Scsi_Host *shost) { struct pqi_ctrl_info *ctrl_info = shost_to_hba(shost); - return blk_mq_pci_map_queues(&shost->tag_set, ctrl_info->pci_dev, 0); + return blk_mq_pci_map_queues(&shost->tag_set.map[0], + ctrl_info->pci_dev, 0); } static int pqi_getpciinfo_ioctl(struct pqi_ctrl_info *ctrl_info, @@ -5579,6 +6050,9 @@ static int pqi_ioctl(struct scsi_device *sdev, int cmd, void __user *arg) ctrl_info = shost_to_hba(sdev->host); + if (pqi_ctrl_in_ofa(ctrl_info)) + return -EBUSY; + switch (cmd) { case CCISS_DEREGDISK: case CCISS_REGNEWDISK: @@ -5683,6 +6157,150 @@ static struct device_attribute *pqi_shost_attrs[] = { NULL }; +static ssize_t pqi_unique_id_show(struct device *dev, + struct device_attribute *attr, char *buffer) +{ + struct pqi_ctrl_info *ctrl_info; + struct scsi_device *sdev; + struct pqi_scsi_dev *device; + unsigned long flags; + unsigned char uid[16]; + + sdev = to_scsi_device(dev); + ctrl_info = shost_to_hba(sdev->host); + + spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); + + device = sdev->hostdata; + if (!device) { + spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, + flags); + return -ENODEV; + } + memcpy(uid, device->unique_id, sizeof(uid)); + + spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); + + return snprintf(buffer, PAGE_SIZE, + "%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X\n", + uid[0], uid[1], uid[2], uid[3], + uid[4], uid[5], uid[6], uid[7], + uid[8], uid[9], uid[10], uid[11], + uid[12], uid[13], uid[14], uid[15]); +} + +static ssize_t pqi_lunid_show(struct device *dev, + struct device_attribute *attr, char *buffer) +{ + struct pqi_ctrl_info *ctrl_info; + struct scsi_device *sdev; + struct pqi_scsi_dev *device; + unsigned long flags; + u8 lunid[8]; + + sdev = to_scsi_device(dev); + ctrl_info = shost_to_hba(sdev->host); + + spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); + + device = sdev->hostdata; + if (!device) { + spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, + flags); + return -ENODEV; + } + memcpy(lunid, device->scsi3addr, sizeof(lunid)); + + spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); + + return snprintf(buffer, PAGE_SIZE, "0x%8phN\n", lunid); +} + +#define MAX_PATHS 8 +static ssize_t pqi_path_info_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pqi_ctrl_info *ctrl_info; + struct scsi_device *sdev; + struct pqi_scsi_dev *device; + unsigned long flags; + int i; + int output_len = 0; + u8 box; + u8 bay; + u8 path_map_index = 0; + char *active; + unsigned char phys_connector[2]; + + sdev = to_scsi_device(dev); + ctrl_info = shost_to_hba(sdev->host); + + spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); + + device = sdev->hostdata; + if (!device) { + spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, + flags); + return -ENODEV; + } + + bay = device->bay; + for (i = 0; i < MAX_PATHS; i++) { + path_map_index = 1<<i; + if (i == device->active_path_index) + active = "Active"; + else if (device->path_map & path_map_index) + active = "Inactive"; + else + continue; + + output_len += scnprintf(buf + output_len, + PAGE_SIZE - output_len, + "[%d:%d:%d:%d] %20.20s ", + ctrl_info->scsi_host->host_no, + device->bus, device->target, + device->lun, + scsi_device_type(device->devtype)); + + if (device->devtype == TYPE_RAID || + pqi_is_logical_device(device)) + goto end_buffer; + + memcpy(&phys_connector, &device->phys_connector[i], + sizeof(phys_connector)); + if (phys_connector[0] < '0') + phys_connector[0] = '0'; + if (phys_connector[1] < '0') + phys_connector[1] = '0'; + + output_len += scnprintf(buf + output_len, + PAGE_SIZE - output_len, + "PORT: %.2s ", phys_connector); + + box = device->box[i]; + if (box != 0 && box != 0xFF) + output_len += scnprintf(buf + output_len, + PAGE_SIZE - output_len, + "BOX: %hhu ", box); + + if ((device->devtype == TYPE_DISK || + device->devtype == TYPE_ZBC) && + pqi_expose_device(device)) + output_len += scnprintf(buf + output_len, + PAGE_SIZE - output_len, + "BAY: %hhu ", bay); + +end_buffer: + output_len += scnprintf(buf + output_len, + PAGE_SIZE - output_len, + "%s\n", active); + } + + spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); + return output_len; +} + + static ssize_t pqi_sas_address_show(struct device *dev, struct device_attribute *attr, char *buffer) { @@ -5759,12 +6377,18 @@ static ssize_t pqi_raid_level_show(struct device *dev, return snprintf(buffer, PAGE_SIZE, "%s\n", raid_level); } +static DEVICE_ATTR(lunid, 0444, pqi_lunid_show, NULL); +static DEVICE_ATTR(unique_id, 0444, pqi_unique_id_show, NULL); +static DEVICE_ATTR(path_info, 0444, pqi_path_info_show, NULL); static DEVICE_ATTR(sas_address, 0444, pqi_sas_address_show, NULL); static DEVICE_ATTR(ssd_smart_path_enabled, 0444, pqi_ssd_smart_path_enabled_show, NULL); static DEVICE_ATTR(raid_level, 0444, pqi_raid_level_show, NULL); static struct device_attribute *pqi_sdev_attrs[] = { + &dev_attr_lunid, + &dev_attr_unique_id, + &dev_attr_path_info, &dev_attr_sas_address, &dev_attr_ssd_smart_path_enabled, &dev_attr_raid_level, @@ -5779,7 +6403,6 @@ static struct scsi_host_template pqi_driver_template = { .scan_start = pqi_scan_start, .scan_finished = pqi_scan_finished, .this_id = -1, - .use_clustering = ENABLE_CLUSTERING, .eh_device_reset_handler = pqi_eh_device_reset_handler, .ioctl = pqi_ioctl, .slave_alloc = pqi_slave_alloc, @@ -5947,6 +6570,244 @@ out: return rc; } +struct pqi_config_table_section_info { + struct pqi_ctrl_info *ctrl_info; + void *section; + u32 section_offset; + void __iomem *section_iomem_addr; +}; + +static inline bool pqi_is_firmware_feature_supported( + struct pqi_config_table_firmware_features *firmware_features, + unsigned int bit_position) +{ + unsigned int byte_index; + + byte_index = bit_position / BITS_PER_BYTE; + + if (byte_index >= le16_to_cpu(firmware_features->num_elements)) + return false; + + return firmware_features->features_supported[byte_index] & + (1 << (bit_position % BITS_PER_BYTE)) ? true : false; +} + +static inline bool pqi_is_firmware_feature_enabled( + struct pqi_config_table_firmware_features *firmware_features, + void __iomem *firmware_features_iomem_addr, + unsigned int bit_position) +{ + unsigned int byte_index; + u8 __iomem *features_enabled_iomem_addr; + + byte_index = (bit_position / BITS_PER_BYTE) + + (le16_to_cpu(firmware_features->num_elements) * 2); + + features_enabled_iomem_addr = firmware_features_iomem_addr + + offsetof(struct pqi_config_table_firmware_features, + features_supported) + byte_index; + + return *((__force u8 *)features_enabled_iomem_addr) & + (1 << (bit_position % BITS_PER_BYTE)) ? true : false; +} + +static inline void pqi_request_firmware_feature( + struct pqi_config_table_firmware_features *firmware_features, + unsigned int bit_position) +{ + unsigned int byte_index; + + byte_index = (bit_position / BITS_PER_BYTE) + + le16_to_cpu(firmware_features->num_elements); + + firmware_features->features_supported[byte_index] |= + (1 << (bit_position % BITS_PER_BYTE)); +} + +static int pqi_config_table_update(struct pqi_ctrl_info *ctrl_info, + u16 first_section, u16 last_section) +{ + struct pqi_vendor_general_request request; + + memset(&request, 0, sizeof(request)); + + request.header.iu_type = PQI_REQUEST_IU_VENDOR_GENERAL; + put_unaligned_le16(sizeof(request) - PQI_REQUEST_HEADER_LENGTH, + &request.header.iu_length); + put_unaligned_le16(PQI_VENDOR_GENERAL_CONFIG_TABLE_UPDATE, + &request.function_code); + put_unaligned_le16(first_section, + &request.data.config_table_update.first_section); + put_unaligned_le16(last_section, + &request.data.config_table_update.last_section); + + return pqi_submit_raid_request_synchronous(ctrl_info, &request.header, + 0, NULL, NO_TIMEOUT); +} + +static int pqi_enable_firmware_features(struct pqi_ctrl_info *ctrl_info, + struct pqi_config_table_firmware_features *firmware_features, + void __iomem *firmware_features_iomem_addr) +{ + void *features_requested; + void __iomem *features_requested_iomem_addr; + + features_requested = firmware_features->features_supported + + le16_to_cpu(firmware_features->num_elements); + + features_requested_iomem_addr = firmware_features_iomem_addr + + (features_requested - (void *)firmware_features); + + memcpy_toio(features_requested_iomem_addr, features_requested, + le16_to_cpu(firmware_features->num_elements)); + + return pqi_config_table_update(ctrl_info, + PQI_CONFIG_TABLE_SECTION_FIRMWARE_FEATURES, + PQI_CONFIG_TABLE_SECTION_FIRMWARE_FEATURES); +} + +struct pqi_firmware_feature { + char *feature_name; + unsigned int feature_bit; + bool supported; + bool enabled; + void (*feature_status)(struct pqi_ctrl_info *ctrl_info, + struct pqi_firmware_feature *firmware_feature); +}; + +static void pqi_firmware_feature_status(struct pqi_ctrl_info *ctrl_info, + struct pqi_firmware_feature *firmware_feature) +{ + if (!firmware_feature->supported) { + dev_info(&ctrl_info->pci_dev->dev, "%s not supported by controller\n", + firmware_feature->feature_name); + return; + } + + if (firmware_feature->enabled) { + dev_info(&ctrl_info->pci_dev->dev, + "%s enabled\n", firmware_feature->feature_name); + return; + } + + dev_err(&ctrl_info->pci_dev->dev, "failed to enable %s\n", + firmware_feature->feature_name); +} + +static inline void pqi_firmware_feature_update(struct pqi_ctrl_info *ctrl_info, + struct pqi_firmware_feature *firmware_feature) +{ + if (firmware_feature->feature_status) + firmware_feature->feature_status(ctrl_info, firmware_feature); +} + +static DEFINE_MUTEX(pqi_firmware_features_mutex); + +static struct pqi_firmware_feature pqi_firmware_features[] = { + { + .feature_name = "Online Firmware Activation", + .feature_bit = PQI_FIRMWARE_FEATURE_OFA, + .feature_status = pqi_firmware_feature_status, + }, + { + .feature_name = "Serial Management Protocol", + .feature_bit = PQI_FIRMWARE_FEATURE_SMP, + .feature_status = pqi_firmware_feature_status, + }, + { + .feature_name = "New Soft Reset Handshake", + .feature_bit = PQI_FIRMWARE_FEATURE_SOFT_RESET_HANDSHAKE, + .feature_status = pqi_firmware_feature_status, + }, +}; + +static void pqi_process_firmware_features( + struct pqi_config_table_section_info *section_info) +{ + int rc; + struct pqi_ctrl_info *ctrl_info; + struct pqi_config_table_firmware_features *firmware_features; + void __iomem *firmware_features_iomem_addr; + unsigned int i; + unsigned int num_features_supported; + + ctrl_info = section_info->ctrl_info; + firmware_features = section_info->section; + firmware_features_iomem_addr = section_info->section_iomem_addr; + + for (i = 0, num_features_supported = 0; + i < ARRAY_SIZE(pqi_firmware_features); i++) { + if (pqi_is_firmware_feature_supported(firmware_features, + pqi_firmware_features[i].feature_bit)) { + pqi_firmware_features[i].supported = true; + num_features_supported++; + } else { + pqi_firmware_feature_update(ctrl_info, + &pqi_firmware_features[i]); + } + } + + if (num_features_supported == 0) + return; + + for (i = 0; i < ARRAY_SIZE(pqi_firmware_features); i++) { + if (!pqi_firmware_features[i].supported) + continue; + pqi_request_firmware_feature(firmware_features, + pqi_firmware_features[i].feature_bit); + } + + rc = pqi_enable_firmware_features(ctrl_info, firmware_features, + firmware_features_iomem_addr); + if (rc) { + dev_err(&ctrl_info->pci_dev->dev, + "failed to enable firmware features in PQI configuration table\n"); + for (i = 0; i < ARRAY_SIZE(pqi_firmware_features); i++) { + if (!pqi_firmware_features[i].supported) + continue; + pqi_firmware_feature_update(ctrl_info, + &pqi_firmware_features[i]); + } + return; + } + + ctrl_info->soft_reset_handshake_supported = false; + for (i = 0; i < ARRAY_SIZE(pqi_firmware_features); i++) { + if (!pqi_firmware_features[i].supported) + continue; + if (pqi_is_firmware_feature_enabled(firmware_features, + firmware_features_iomem_addr, + pqi_firmware_features[i].feature_bit)) { + pqi_firmware_features[i].enabled = true; + if (pqi_firmware_features[i].feature_bit == + PQI_FIRMWARE_FEATURE_SOFT_RESET_HANDSHAKE) + ctrl_info->soft_reset_handshake_supported = + true; + } + pqi_firmware_feature_update(ctrl_info, + &pqi_firmware_features[i]); + } +} + +static void pqi_init_firmware_features(void) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(pqi_firmware_features); i++) { + pqi_firmware_features[i].supported = false; + pqi_firmware_features[i].enabled = false; + } +} + +static void pqi_process_firmware_features_section( + struct pqi_config_table_section_info *section_info) +{ + mutex_lock(&pqi_firmware_features_mutex); + pqi_init_firmware_features(); + pqi_process_firmware_features(section_info); + mutex_unlock(&pqi_firmware_features_mutex); +} + static int pqi_process_config_table(struct pqi_ctrl_info *ctrl_info) { u32 table_length; @@ -5954,8 +6815,11 @@ static int pqi_process_config_table(struct pqi_ctrl_info *ctrl_info) void __iomem *table_iomem_addr; struct pqi_config_table *config_table; struct pqi_config_table_section_header *section; + struct pqi_config_table_section_info section_info; table_length = ctrl_info->config_table_length; + if (table_length == 0) + return 0; config_table = kmalloc(table_length, GFP_KERNEL); if (!config_table) { @@ -5972,13 +6836,22 @@ static int pqi_process_config_table(struct pqi_ctrl_info *ctrl_info) ctrl_info->config_table_offset; memcpy_fromio(config_table, table_iomem_addr, table_length); + section_info.ctrl_info = ctrl_info; section_offset = get_unaligned_le32(&config_table->first_section_offset); while (section_offset) { section = (void *)config_table + section_offset; + section_info.section = section; + section_info.section_offset = section_offset; + section_info.section_iomem_addr = + table_iomem_addr + section_offset; + switch (get_unaligned_le16(§ion->section_id)) { + case PQI_CONFIG_TABLE_SECTION_FIRMWARE_FEATURES: + pqi_process_firmware_features_section(§ion_info); + break; case PQI_CONFIG_TABLE_SECTION_HEARTBEAT: if (pqi_disable_heartbeat) dev_warn(&ctrl_info->pci_dev->dev, @@ -5991,6 +6864,13 @@ static int pqi_process_config_table(struct pqi_ctrl_info *ctrl_info) struct pqi_config_table_heartbeat, heartbeat_counter); break; + case PQI_CONFIG_TABLE_SECTION_SOFT_RESET: + ctrl_info->soft_reset_status = + table_iomem_addr + + section_offset + + offsetof(struct pqi_config_table_soft_reset, + soft_reset_status); + break; } section_offset = @@ -6123,10 +7003,6 @@ static int pqi_ctrl_init(struct pqi_ctrl_info *ctrl_info) ctrl_info->pqi_mode_enabled = true; pqi_save_ctrl_mode(ctrl_info, PQI_MODE); - rc = pqi_process_config_table(ctrl_info); - if (rc) - return rc; - rc = pqi_alloc_admin_queues(ctrl_info); if (rc) { dev_err(&ctrl_info->pci_dev->dev, @@ -6188,6 +7064,11 @@ static int pqi_ctrl_init(struct pqi_ctrl_info *ctrl_info) pqi_change_irq_mode(ctrl_info, IRQ_MODE_MSIX); ctrl_info->controller_online = true; + + rc = pqi_process_config_table(ctrl_info); + if (rc) + return rc; + pqi_start_heartbeat_timer(ctrl_info); rc = pqi_enable_events(ctrl_info); @@ -6209,6 +7090,13 @@ static int pqi_ctrl_init(struct pqi_ctrl_info *ctrl_info) return rc; } + rc = pqi_set_diag_rescan(ctrl_info); + if (rc) { + dev_err(&ctrl_info->pci_dev->dev, + "error enabling multi-lun rescan\n"); + return rc; + } + rc = pqi_write_driver_version_to_host_wellness(ctrl_info); if (rc) { dev_err(&ctrl_info->pci_dev->dev, @@ -6266,6 +7154,24 @@ static int pqi_ctrl_init_resume(struct pqi_ctrl_info *ctrl_info) return rc; /* + * Get the controller properties. This allows us to determine + * whether or not it supports PQI mode. + */ + rc = sis_get_ctrl_properties(ctrl_info); + if (rc) { + dev_err(&ctrl_info->pci_dev->dev, + "error obtaining controller properties\n"); + return rc; + } + + rc = sis_get_pqi_capabilities(ctrl_info); + if (rc) { + dev_err(&ctrl_info->pci_dev->dev, + "error obtaining controller capabilities\n"); + return rc; + } + + /* * If the function we are about to call succeeds, the * controller will transition from legacy SIS mode * into PQI mode. @@ -6305,9 +7211,14 @@ static int pqi_ctrl_init_resume(struct pqi_ctrl_info *ctrl_info) pqi_change_irq_mode(ctrl_info, IRQ_MODE_MSIX); ctrl_info->controller_online = true; - pqi_start_heartbeat_timer(ctrl_info); pqi_ctrl_unblock_requests(ctrl_info); + rc = pqi_process_config_table(ctrl_info); + if (rc) + return rc; + + pqi_start_heartbeat_timer(ctrl_info); + rc = pqi_enable_events(ctrl_info); if (rc) { dev_err(&ctrl_info->pci_dev->dev, @@ -6315,6 +7226,20 @@ static int pqi_ctrl_init_resume(struct pqi_ctrl_info *ctrl_info) return rc; } + rc = pqi_get_ctrl_firmware_version(ctrl_info); + if (rc) { + dev_err(&ctrl_info->pci_dev->dev, + "error obtaining firmware version\n"); + return rc; + } + + rc = pqi_set_diag_rescan(ctrl_info); + if (rc) { + dev_err(&ctrl_info->pci_dev->dev, + "error enabling multi-lun rescan\n"); + return rc; + } + rc = pqi_write_driver_version_to_host_wellness(ctrl_info); if (rc) { dev_err(&ctrl_info->pci_dev->dev, @@ -6425,6 +7350,7 @@ static struct pqi_ctrl_info *pqi_alloc_ctrl_info(int numa_node) mutex_init(&ctrl_info->scan_mutex); mutex_init(&ctrl_info->lun_reset_mutex); + mutex_init(&ctrl_info->ofa_mutex); INIT_LIST_HEAD(&ctrl_info->scsi_device_list); spin_lock_init(&ctrl_info->scsi_device_list_lock); @@ -6501,6 +7427,217 @@ static void pqi_remove_ctrl(struct pqi_ctrl_info *ctrl_info) pqi_free_ctrl_resources(ctrl_info); } +static void pqi_ofa_ctrl_quiesce(struct pqi_ctrl_info *ctrl_info) +{ + pqi_cancel_update_time_worker(ctrl_info); + pqi_cancel_rescan_worker(ctrl_info); + pqi_wait_until_lun_reset_finished(ctrl_info); + pqi_wait_until_scan_finished(ctrl_info); + pqi_ctrl_ofa_start(ctrl_info); + pqi_ctrl_block_requests(ctrl_info); + pqi_ctrl_wait_until_quiesced(ctrl_info); + pqi_ctrl_wait_for_pending_io(ctrl_info, PQI_PENDING_IO_TIMEOUT_SECS); + pqi_fail_io_queued_for_all_devices(ctrl_info); + pqi_wait_until_inbound_queues_empty(ctrl_info); + pqi_stop_heartbeat_timer(ctrl_info); + ctrl_info->pqi_mode_enabled = false; + pqi_save_ctrl_mode(ctrl_info, SIS_MODE); +} + +static void pqi_ofa_ctrl_unquiesce(struct pqi_ctrl_info *ctrl_info) +{ + pqi_ofa_free_host_buffer(ctrl_info); + ctrl_info->pqi_mode_enabled = true; + pqi_save_ctrl_mode(ctrl_info, PQI_MODE); + ctrl_info->controller_online = true; + pqi_ctrl_unblock_requests(ctrl_info); + pqi_start_heartbeat_timer(ctrl_info); + pqi_schedule_update_time_worker(ctrl_info); + pqi_clear_soft_reset_status(ctrl_info, + PQI_SOFT_RESET_ABORT); + pqi_scan_scsi_devices(ctrl_info); +} + +static int pqi_ofa_alloc_mem(struct pqi_ctrl_info *ctrl_info, + u32 total_size, u32 chunk_size) +{ + u32 sg_count; + u32 size; + int i; + struct pqi_sg_descriptor *mem_descriptor = NULL; + struct device *dev; + struct pqi_ofa_memory *ofap; + + dev = &ctrl_info->pci_dev->dev; + + sg_count = (total_size + chunk_size - 1); + sg_count /= chunk_size; + + ofap = ctrl_info->pqi_ofa_mem_virt_addr; + + if (sg_count*chunk_size < total_size) + goto out; + + ctrl_info->pqi_ofa_chunk_virt_addr = + kcalloc(sg_count, sizeof(void *), GFP_KERNEL); + if (!ctrl_info->pqi_ofa_chunk_virt_addr) + goto out; + + for (size = 0, i = 0; size < total_size; size += chunk_size, i++) { + dma_addr_t dma_handle; + + ctrl_info->pqi_ofa_chunk_virt_addr[i] = + dma_zalloc_coherent(dev, chunk_size, &dma_handle, + GFP_KERNEL); + + if (!ctrl_info->pqi_ofa_chunk_virt_addr[i]) + break; + + mem_descriptor = &ofap->sg_descriptor[i]; + put_unaligned_le64 ((u64) dma_handle, &mem_descriptor->address); + put_unaligned_le32 (chunk_size, &mem_descriptor->length); + } + + if (!size || size < total_size) + goto out_free_chunks; + + put_unaligned_le32(CISS_SG_LAST, &mem_descriptor->flags); + put_unaligned_le16(sg_count, &ofap->num_memory_descriptors); + put_unaligned_le32(size, &ofap->bytes_allocated); + + return 0; + +out_free_chunks: + while (--i >= 0) { + mem_descriptor = &ofap->sg_descriptor[i]; + dma_free_coherent(dev, chunk_size, + ctrl_info->pqi_ofa_chunk_virt_addr[i], + get_unaligned_le64(&mem_descriptor->address)); + } + kfree(ctrl_info->pqi_ofa_chunk_virt_addr); + +out: + put_unaligned_le32 (0, &ofap->bytes_allocated); + return -ENOMEM; +} + +static int pqi_ofa_alloc_host_buffer(struct pqi_ctrl_info *ctrl_info) +{ + u32 total_size; + u32 min_chunk_size; + u32 chunk_sz; + + total_size = le32_to_cpu( + ctrl_info->pqi_ofa_mem_virt_addr->bytes_allocated); + min_chunk_size = total_size / PQI_OFA_MAX_SG_DESCRIPTORS; + + for (chunk_sz = total_size; chunk_sz >= min_chunk_size; chunk_sz /= 2) + if (!pqi_ofa_alloc_mem(ctrl_info, total_size, chunk_sz)) + return 0; + + return -ENOMEM; +} + +static void pqi_ofa_setup_host_buffer(struct pqi_ctrl_info *ctrl_info, + u32 bytes_requested) +{ + struct pqi_ofa_memory *pqi_ofa_memory; + struct device *dev; + + dev = &ctrl_info->pci_dev->dev; + pqi_ofa_memory = dma_zalloc_coherent(dev, + PQI_OFA_MEMORY_DESCRIPTOR_LENGTH, + &ctrl_info->pqi_ofa_mem_dma_handle, + GFP_KERNEL); + + if (!pqi_ofa_memory) + return; + + put_unaligned_le16(PQI_OFA_VERSION, &pqi_ofa_memory->version); + memcpy(&pqi_ofa_memory->signature, PQI_OFA_SIGNATURE, + sizeof(pqi_ofa_memory->signature)); + pqi_ofa_memory->bytes_allocated = cpu_to_le32(bytes_requested); + + ctrl_info->pqi_ofa_mem_virt_addr = pqi_ofa_memory; + + if (pqi_ofa_alloc_host_buffer(ctrl_info) < 0) { + dev_err(dev, "Failed to allocate host buffer of size = %u", + bytes_requested); + } +} + +static void pqi_ofa_free_host_buffer(struct pqi_ctrl_info *ctrl_info) +{ + int i; + struct pqi_sg_descriptor *mem_descriptor; + struct pqi_ofa_memory *ofap; + + ofap = ctrl_info->pqi_ofa_mem_virt_addr; + + if (!ofap) + return; + + if (!ofap->bytes_allocated) + goto out; + + mem_descriptor = ofap->sg_descriptor; + + for (i = 0; i < get_unaligned_le16(&ofap->num_memory_descriptors); + i++) { + dma_free_coherent(&ctrl_info->pci_dev->dev, + get_unaligned_le32(&mem_descriptor[i].length), + ctrl_info->pqi_ofa_chunk_virt_addr[i], + get_unaligned_le64(&mem_descriptor[i].address)); + } + kfree(ctrl_info->pqi_ofa_chunk_virt_addr); + +out: + dma_free_coherent(&ctrl_info->pci_dev->dev, + PQI_OFA_MEMORY_DESCRIPTOR_LENGTH, ofap, + ctrl_info->pqi_ofa_mem_dma_handle); + ctrl_info->pqi_ofa_mem_virt_addr = NULL; +} + +static int pqi_ofa_host_memory_update(struct pqi_ctrl_info *ctrl_info) +{ + struct pqi_vendor_general_request request; + size_t size; + struct pqi_ofa_memory *ofap; + + memset(&request, 0, sizeof(request)); + + ofap = ctrl_info->pqi_ofa_mem_virt_addr; + + request.header.iu_type = PQI_REQUEST_IU_VENDOR_GENERAL; + put_unaligned_le16(sizeof(request) - PQI_REQUEST_HEADER_LENGTH, + &request.header.iu_length); + put_unaligned_le16(PQI_VENDOR_GENERAL_HOST_MEMORY_UPDATE, + &request.function_code); + + if (ofap) { + size = offsetof(struct pqi_ofa_memory, sg_descriptor) + + get_unaligned_le16(&ofap->num_memory_descriptors) * + sizeof(struct pqi_sg_descriptor); + + put_unaligned_le64((u64)ctrl_info->pqi_ofa_mem_dma_handle, + &request.data.ofa_memory_allocation.buffer_address); + put_unaligned_le32(size, + &request.data.ofa_memory_allocation.buffer_length); + + } + + return pqi_submit_raid_request_synchronous(ctrl_info, &request.header, + 0, NULL, NO_TIMEOUT); +} + +#define PQI_POST_RESET_DELAY_B4_MSGU_READY 5000 + +static int pqi_ofa_ctrl_restart(struct pqi_ctrl_info *ctrl_info) +{ + msleep(PQI_POST_RESET_DELAY_B4_MSGU_READY); + return pqi_ctrl_init_resume(ctrl_info); +} + static void pqi_perform_lockup_action(void) { switch (pqi_lockup_action) { @@ -6599,7 +7736,7 @@ static int pqi_pci_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) { int rc; - int node; + int node, cp_node; struct pqi_ctrl_info *ctrl_info; pqi_print_ctrl_info(pci_dev, id); @@ -6617,8 +7754,12 @@ static int pqi_pci_probe(struct pci_dev *pci_dev, "controller device ID matched using wildcards\n"); node = dev_to_node(&pci_dev->dev); - if (node == NUMA_NO_NODE) - set_dev_node(&pci_dev->dev, 0); + if (node == NUMA_NO_NODE) { + cp_node = cpu_to_node(0); + if (cp_node == NUMA_NO_NODE) + cp_node = 0; + set_dev_node(&pci_dev->dev, cp_node); + } ctrl_info = pqi_alloc_ctrl_info(node); if (!ctrl_info) { @@ -6653,6 +7794,8 @@ static void pqi_pci_remove(struct pci_dev *pci_dev) if (!ctrl_info) return; + ctrl_info->in_shutdown = true; + pqi_remove_ctrl(ctrl_info); } @@ -6670,6 +7813,7 @@ static void pqi_shutdown(struct pci_dev *pci_dev) * storage. */ rc = pqi_flush_cache(ctrl_info, SHUTDOWN); + pqi_free_interrupts(ctrl_info); pqi_reset(ctrl_info); if (rc == 0) return; @@ -6714,11 +7858,12 @@ static __maybe_unused int pqi_suspend(struct pci_dev *pci_dev, pm_message_t stat pqi_cancel_rescan_worker(ctrl_info); pqi_wait_until_scan_finished(ctrl_info); pqi_wait_until_lun_reset_finished(ctrl_info); + pqi_wait_until_ofa_finished(ctrl_info); pqi_flush_cache(ctrl_info, SUSPEND); pqi_ctrl_block_requests(ctrl_info); pqi_ctrl_wait_until_quiesced(ctrl_info); pqi_wait_until_inbound_queues_empty(ctrl_info); - pqi_ctrl_wait_for_pending_io(ctrl_info); + pqi_ctrl_wait_for_pending_io(ctrl_info, NO_TIMEOUT); pqi_stop_heartbeat_timer(ctrl_info); if (state.event == PM_EVENT_FREEZE) @@ -6804,6 +7949,14 @@ static const struct pci_device_id pqi_pci_id_table[] = { }, { PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x193d, 0xc460) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x193d, 0xc461) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, 0x193d, 0xf460) }, { @@ -6840,6 +7993,30 @@ static const struct pci_device_id pqi_pci_id_table[] = { }, { PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x19e5, 0xd227) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x19e5, 0xd228) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x19e5, 0xd229) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x19e5, 0xd22a) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x19e5, 0xd22b) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x19e5, 0xd22c) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, PCI_VENDOR_ID_ADAPTEC2, 0x0110) }, { diff --git a/drivers/scsi/smartpqi/smartpqi_sas_transport.c b/drivers/scsi/smartpqi/smartpqi_sas_transport.c index b209a35e482e..0e4ef215115f 100644 --- a/drivers/scsi/smartpqi/smartpqi_sas_transport.c +++ b/drivers/scsi/smartpqi/smartpqi_sas_transport.c @@ -17,9 +17,11 @@ */ #include <linux/kernel.h> +#include <linux/bsg-lib.h> #include <scsi/scsi_host.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_transport_sas.h> +#include <asm/unaligned.h> #include "smartpqi.h" static struct pqi_sas_phy *pqi_alloc_sas_phy(struct pqi_sas_port *pqi_sas_port) @@ -97,14 +99,32 @@ static int pqi_sas_port_add_rphy(struct pqi_sas_port *pqi_sas_port, identify = &rphy->identify; identify->sas_address = pqi_sas_port->sas_address; - identify->initiator_port_protocols = SAS_PROTOCOL_STP; - identify->target_port_protocols = SAS_PROTOCOL_STP; + + if (pqi_sas_port->device && + pqi_sas_port->device->is_expander_smp_device) { + identify->initiator_port_protocols = SAS_PROTOCOL_SMP; + identify->target_port_protocols = SAS_PROTOCOL_SMP; + } else { + identify->initiator_port_protocols = SAS_PROTOCOL_STP; + identify->target_port_protocols = SAS_PROTOCOL_STP; + } return sas_rphy_add(rphy); } +static struct sas_rphy *pqi_sas_rphy_alloc(struct pqi_sas_port *pqi_sas_port) +{ + if (pqi_sas_port->device && + pqi_sas_port->device->is_expander_smp_device) + return sas_expander_alloc(pqi_sas_port->port, + SAS_FANOUT_EXPANDER_DEVICE); + + return sas_end_device_alloc(pqi_sas_port->port); +} + static struct pqi_sas_port *pqi_alloc_sas_port( - struct pqi_sas_node *pqi_sas_node, u64 sas_address) + struct pqi_sas_node *pqi_sas_node, u64 sas_address, + struct pqi_scsi_dev *device) { int rc; struct pqi_sas_port *pqi_sas_port; @@ -127,6 +147,7 @@ static struct pqi_sas_port *pqi_alloc_sas_port( pqi_sas_port->port = port; pqi_sas_port->sas_address = sas_address; + pqi_sas_port->device = device; list_add_tail(&pqi_sas_port->port_list_entry, &pqi_sas_node->port_list_head); @@ -146,7 +167,7 @@ static void pqi_free_sas_port(struct pqi_sas_port *pqi_sas_port) struct pqi_sas_phy *next; list_for_each_entry_safe(pqi_sas_phy, next, - &pqi_sas_port->phy_list_head, phy_list_entry) + &pqi_sas_port->phy_list_head, phy_list_entry) pqi_free_sas_phy(pqi_sas_phy); sas_port_delete(pqi_sas_port->port); @@ -176,7 +197,7 @@ static void pqi_free_sas_node(struct pqi_sas_node *pqi_sas_node) return; list_for_each_entry_safe(pqi_sas_port, next, - &pqi_sas_node->port_list_head, port_list_entry) + &pqi_sas_node->port_list_head, port_list_entry) pqi_free_sas_port(pqi_sas_port); kfree(pqi_sas_node); @@ -206,13 +227,14 @@ int pqi_add_sas_host(struct Scsi_Host *shost, struct pqi_ctrl_info *ctrl_info) struct pqi_sas_port *pqi_sas_port; struct pqi_sas_phy *pqi_sas_phy; - parent_dev = &shost->shost_gendev; + parent_dev = &shost->shost_dev; pqi_sas_node = pqi_alloc_sas_node(parent_dev); if (!pqi_sas_node) return -ENOMEM; - pqi_sas_port = pqi_alloc_sas_port(pqi_sas_node, ctrl_info->sas_address); + pqi_sas_port = pqi_alloc_sas_port(pqi_sas_node, + ctrl_info->sas_address, NULL); if (!pqi_sas_port) { rc = -ENODEV; goto free_sas_node; @@ -254,11 +276,12 @@ int pqi_add_sas_device(struct pqi_sas_node *pqi_sas_node, struct pqi_sas_port *pqi_sas_port; struct sas_rphy *rphy; - pqi_sas_port = pqi_alloc_sas_port(pqi_sas_node, device->sas_address); + pqi_sas_port = pqi_alloc_sas_port(pqi_sas_node, + device->sas_address, device); if (!pqi_sas_port) return -ENOMEM; - rphy = sas_end_device_alloc(pqi_sas_port->port); + rphy = pqi_sas_rphy_alloc(pqi_sas_port); if (!rphy) { rc = -ENODEV; goto free_sas_port; @@ -329,6 +352,128 @@ static int pqi_sas_phy_speed(struct sas_phy *phy, return -EINVAL; } +#define CSMI_IOCTL_TIMEOUT 60 +#define SMP_CRC_FIELD_LENGTH 4 + +static struct bmic_csmi_smp_passthru_buffer * +pqi_build_csmi_smp_passthru_buffer(struct sas_rphy *rphy, + struct bsg_job *job) +{ + struct bmic_csmi_smp_passthru_buffer *smp_buf; + struct bmic_csmi_ioctl_header *ioctl_header; + struct bmic_csmi_smp_passthru *parameters; + u32 req_size; + u32 resp_size; + + smp_buf = kzalloc(sizeof(*smp_buf), GFP_KERNEL); + if (!smp_buf) + return NULL; + + req_size = job->request_payload.payload_len; + resp_size = job->reply_payload.payload_len; + + ioctl_header = &smp_buf->ioctl_header; + put_unaligned_le32(sizeof(smp_buf->ioctl_header), + &ioctl_header->header_length); + put_unaligned_le32(CSMI_IOCTL_TIMEOUT, &ioctl_header->timeout); + put_unaligned_le32(CSMI_CC_SAS_SMP_PASSTHRU, + &ioctl_header->control_code); + put_unaligned_le32(sizeof(smp_buf->parameters), &ioctl_header->length); + + parameters = &smp_buf->parameters; + parameters->phy_identifier = rphy->identify.phy_identifier; + parameters->port_identifier = 0; + parameters->connection_rate = 0; + put_unaligned_be64(rphy->identify.sas_address, + ¶meters->destination_sas_address); + + if (req_size > SMP_CRC_FIELD_LENGTH) + req_size -= SMP_CRC_FIELD_LENGTH; + + put_unaligned_le32(req_size, ¶meters->request_length); + + put_unaligned_le32(resp_size, ¶meters->response_length); + + sg_copy_to_buffer(job->request_payload.sg_list, + job->reply_payload.sg_cnt, ¶meters->request, + req_size); + + return smp_buf; +} + +static unsigned int pqi_build_sas_smp_handler_reply( + struct bmic_csmi_smp_passthru_buffer *smp_buf, struct bsg_job *job, + struct pqi_raid_error_info *error_info) +{ + sg_copy_from_buffer(job->reply_payload.sg_list, + job->reply_payload.sg_cnt, &smp_buf->parameters.response, + le32_to_cpu(smp_buf->parameters.response_length)); + + job->reply_len = le16_to_cpu(error_info->sense_data_length); + memcpy(job->reply, error_info->data, + le16_to_cpu(error_info->sense_data_length)); + + return job->reply_payload.payload_len - + get_unaligned_le32(&error_info->data_in_transferred); +} + +void pqi_sas_smp_handler(struct bsg_job *job, struct Scsi_Host *shost, + struct sas_rphy *rphy) +{ + int rc; + struct pqi_ctrl_info *ctrl_info = shost_to_hba(shost); + struct bmic_csmi_smp_passthru_buffer *smp_buf; + struct pqi_raid_error_info error_info; + unsigned int reslen = 0; + + pqi_ctrl_busy(ctrl_info); + + if (job->reply_payload.payload_len == 0) { + rc = -ENOMEM; + goto out; + } + + if (!rphy) { + rc = -EINVAL; + goto out; + } + + if (rphy->identify.device_type != SAS_FANOUT_EXPANDER_DEVICE) { + rc = -EINVAL; + goto out; + } + + if (job->request_payload.sg_cnt > 1 || job->reply_payload.sg_cnt > 1) { + rc = -EINVAL; + goto out; + } + + if (pqi_ctrl_offline(ctrl_info)) { + rc = -ENXIO; + goto out; + } + + if (pqi_ctrl_blocked(ctrl_info)) { + rc = -EBUSY; + goto out; + } + + smp_buf = pqi_build_csmi_smp_passthru_buffer(rphy, job); + if (!smp_buf) { + rc = -ENOMEM; + goto out; + } + + rc = pqi_csmi_smp_passthru(ctrl_info, smp_buf, sizeof(*smp_buf), + &error_info); + if (rc) + goto out; + + reslen = pqi_build_sas_smp_handler_reply(smp_buf, job, &error_info); +out: + bsg_job_done(job, rc, reslen); + pqi_ctrl_unbusy(ctrl_info); +} struct sas_function_template pqi_sas_transport_functions = { .get_linkerrors = pqi_sas_get_linkerrors, .get_enclosure_identifier = pqi_sas_get_enclosure_identifier, @@ -338,4 +483,5 @@ struct sas_function_template pqi_sas_transport_functions = { .phy_setup = pqi_sas_phy_setup, .phy_release = pqi_sas_phy_release, .set_phy_speed = pqi_sas_phy_speed, + .smp_handler = pqi_sas_smp_handler, }; diff --git a/drivers/scsi/smartpqi/smartpqi_sis.c b/drivers/scsi/smartpqi/smartpqi_sis.c index ea91658c7060..dcd11c6418cc 100644 --- a/drivers/scsi/smartpqi/smartpqi_sis.c +++ b/drivers/scsi/smartpqi/smartpqi_sis.c @@ -34,6 +34,7 @@ #define SIS_REENABLE_SIS_MODE 0x1 #define SIS_ENABLE_MSIX 0x40 #define SIS_ENABLE_INTX 0x80 +#define SIS_SOFT_RESET 0x100 #define SIS_CMD_READY 0x200 #define SIS_TRIGGER_SHUTDOWN 0x800000 #define SIS_PQI_RESET_QUIESCE 0x1000000 @@ -59,7 +60,7 @@ #define SIS_CTRL_KERNEL_UP 0x80 #define SIS_CTRL_KERNEL_PANIC 0x100 -#define SIS_CTRL_READY_TIMEOUT_SECS 30 +#define SIS_CTRL_READY_TIMEOUT_SECS 180 #define SIS_CTRL_READY_RESUME_TIMEOUT_SECS 90 #define SIS_CTRL_READY_POLL_INTERVAL_MSECS 10 @@ -90,7 +91,7 @@ static int sis_wait_for_ctrl_ready_with_timeout(struct pqi_ctrl_info *ctrl_info, unsigned long timeout; u32 status; - timeout = (timeout_secs * HZ) + jiffies; + timeout = (timeout_secs * PQI_HZ) + jiffies; while (1) { status = readl(&ctrl_info->registers->sis_firmware_status); @@ -202,7 +203,7 @@ static int sis_send_sync_cmd(struct pqi_ctrl_info *ctrl_info, * the top of the loop in order to give the controller time to start * processing the command before we start polling. */ - timeout = (SIS_CMD_COMPLETE_TIMEOUT_SECS * HZ) + jiffies; + timeout = (SIS_CMD_COMPLETE_TIMEOUT_SECS * PQI_HZ) + jiffies; while (1) { msleep(SIS_CMD_COMPLETE_POLL_INTERVAL_MSECS); doorbell = readl(®isters->sis_ctrl_to_host_doorbell); @@ -348,7 +349,7 @@ static int sis_wait_for_doorbell_bit_to_clear( u32 doorbell_register; unsigned long timeout; - timeout = (SIS_DOORBELL_BIT_CLEAR_TIMEOUT_SECS * HZ) + jiffies; + timeout = (SIS_DOORBELL_BIT_CLEAR_TIMEOUT_SECS * PQI_HZ) + jiffies; while (1) { doorbell_register = @@ -420,6 +421,12 @@ u32 sis_read_driver_scratch(struct pqi_ctrl_info *ctrl_info) return readl(&ctrl_info->registers->sis_driver_scratch); } +void sis_soft_reset(struct pqi_ctrl_info *ctrl_info) +{ + writel(SIS_SOFT_RESET, + &ctrl_info->registers->sis_host_to_ctrl_doorbell); +} + static void __attribute__((unused)) verify_structures(void) { BUILD_BUG_ON(offsetof(struct sis_base_struct, diff --git a/drivers/scsi/smartpqi/smartpqi_sis.h b/drivers/scsi/smartpqi/smartpqi_sis.h index 2bf889dbf5ab..d018cb9c3f82 100644 --- a/drivers/scsi/smartpqi/smartpqi_sis.h +++ b/drivers/scsi/smartpqi/smartpqi_sis.h @@ -33,5 +33,6 @@ int sis_pqi_reset_quiesce(struct pqi_ctrl_info *ctrl_info); int sis_reenable_sis_mode(struct pqi_ctrl_info *ctrl_info); void sis_write_driver_scratch(struct pqi_ctrl_info *ctrl_info, u32 value); u32 sis_read_driver_scratch(struct pqi_ctrl_info *ctrl_info); +void sis_soft_reset(struct pqi_ctrl_info *ctrl_info); #endif /* _SMARTPQI_SIS_H */ diff --git a/drivers/scsi/snic/snic_main.c b/drivers/scsi/snic/snic_main.c index 5295277d6325..5e824fd6047a 100644 --- a/drivers/scsi/snic/snic_main.c +++ b/drivers/scsi/snic/snic_main.c @@ -127,7 +127,6 @@ static struct scsi_host_template snic_host_template = { .this_id = -1, .cmd_per_lun = SNIC_DFLT_QUEUE_DEPTH, .can_queue = SNIC_MAX_IO_REQ, - .use_clustering = ENABLE_CLUSTERING, .sg_tablesize = SNIC_MAX_SG_DESC_CNT, .max_sectors = 0x800, .shost_attrs = snic_attrs, diff --git a/drivers/scsi/snic/snic_trc.c b/drivers/scsi/snic/snic_trc.c index fc60c933d6c0..458eaba24c78 100644 --- a/drivers/scsi/snic/snic_trc.c +++ b/drivers/scsi/snic/snic_trc.c @@ -126,7 +126,7 @@ snic_trc_init(void) int tbuf_sz = 0, ret; tbuf_sz = (snic_trace_max_pages * PAGE_SIZE); - tbuf = vmalloc(tbuf_sz); + tbuf = vzalloc(tbuf_sz); if (!tbuf) { SNIC_ERR("Failed to Allocate Trace Buffer Size. %d\n", tbuf_sz); SNIC_ERR("Trace Facility not enabled.\n"); @@ -135,7 +135,6 @@ snic_trc_init(void) return ret; } - memset(tbuf, 0, tbuf_sz); trc->buf = (struct snic_trc_data *) tbuf; spin_lock_init(&trc->lock); diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 54dd70ae9731..38ddbbfe5f3c 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -80,7 +80,7 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_WORM); static DEFINE_MUTEX(sr_mutex); static int sr_probe(struct device *); static int sr_remove(struct device *); -static int sr_init_command(struct scsi_cmnd *SCpnt); +static blk_status_t sr_init_command(struct scsi_cmnd *SCpnt); static int sr_done(struct scsi_cmnd *); static int sr_runtime_suspend(struct device *dev); @@ -384,22 +384,22 @@ static int sr_done(struct scsi_cmnd *SCpnt) return good_bytes; } -static int sr_init_command(struct scsi_cmnd *SCpnt) +static blk_status_t sr_init_command(struct scsi_cmnd *SCpnt) { int block = 0, this_count, s_size; struct scsi_cd *cd; struct request *rq = SCpnt->request; - int ret; + blk_status_t ret; ret = scsi_init_io(SCpnt); - if (ret != BLKPREP_OK) + if (ret != BLK_STS_OK) goto out; WARN_ON_ONCE(SCpnt != rq->special); cd = scsi_cd(rq->rq_disk); /* from here on until we're complete, any goto out * is used for a killable error condition */ - ret = BLKPREP_KILL; + ret = BLK_STS_IOERR; SCSI_LOG_HLQUEUE(1, scmd_printk(KERN_INFO, SCpnt, "Doing sr request, block = %d\n", block)); @@ -516,7 +516,7 @@ static int sr_init_command(struct scsi_cmnd *SCpnt) * This indicates that the command is ready from our end to be * queued. */ - ret = BLKPREP_OK; + ret = BLK_STS_OK; out: return ret; } diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 307df2fa39a3..7ff22d3f03e3 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -530,7 +530,7 @@ static void st_scsi_execute_end(struct request *req, blk_status_t status) complete(SRpnt->waiting); blk_rq_unmap_user(tmp); - __blk_put_request(req->q, req); + blk_put_request(req); } static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd, diff --git a/drivers/scsi/stex.c b/drivers/scsi/stex.c index 9b20643ab49d..f6bef7ad65e7 100644 --- a/drivers/scsi/stex.c +++ b/drivers/scsi/stex.c @@ -1489,6 +1489,7 @@ static struct scsi_host_template driver_template = { .eh_abort_handler = stex_abort, .eh_host_reset_handler = stex_reset, .this_id = -1, + .dma_boundary = PAGE_SIZE - 1, }; static struct pci_device_id stex_pci_tbl[] = { @@ -1617,19 +1618,6 @@ static struct st_card_info stex_card_info[] = { }, }; -static int stex_set_dma_mask(struct pci_dev * pdev) -{ - int ret; - - if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) - && !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) - return 0; - ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); - if (!ret) - ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - return ret; -} - static int stex_request_irq(struct st_hba *hba) { struct pci_dev *pdev = hba->pdev; @@ -1710,7 +1698,9 @@ static int stex_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto out_release_regions; } - err = stex_set_dma_mask(pdev); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (err) { printk(KERN_ERR DRV_NAME "(%s): set dma mask failed\n", pci_name(pdev)); diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 8f88348ebe42..84380bae20f1 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1698,7 +1698,6 @@ static struct scsi_host_template scsi_driver = { .slave_configure = storvsc_device_configure, .cmd_per_lun = 2048, .this_id = -1, - .use_clustering = ENABLE_CLUSTERING, /* Make sure we dont get a sg segment crosses a page boundary */ .dma_boundary = PAGE_SIZE-1, .no_write_same = 1, diff --git a/drivers/scsi/sun3_scsi.c b/drivers/scsi/sun3_scsi.c index 9492638296c8..95a7ea7eefa0 100644 --- a/drivers/scsi/sun3_scsi.c +++ b/drivers/scsi/sun3_scsi.c @@ -500,7 +500,7 @@ static struct scsi_host_template sun3_scsi_template = { .this_id = 7, .sg_tablesize = SG_NONE, .cmd_per_lun = 2, - .use_clustering = DISABLE_CLUSTERING, + .dma_boundary = PAGE_SIZE - 1, .cmd_size = NCR5380_CMD_SIZE, }; diff --git a/drivers/scsi/sun_esp.c b/drivers/scsi/sun_esp.c index a11efbcb7f8b..c71bd01fef94 100644 --- a/drivers/scsi/sun_esp.c +++ b/drivers/scsi/sun_esp.c @@ -529,11 +529,10 @@ static int esp_sbus_probe(struct platform_device *op) int hme = 0; int ret; - if (dp->parent && - (!strcmp(dp->parent->name, "espdma") || - !strcmp(dp->parent->name, "dma"))) + if (of_node_name_eq(dp->parent, "espdma") || + of_node_name_eq(dp->parent, "dma")) dma_node = dp->parent; - else if (!strcmp(dp->name, "SUNW,fas")) { + else if (of_node_name_eq(dp, "SUNW,fas")) { dma_node = op->dev.of_node; hme = 1; } diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c index 5f10aa9bad9b..57f6d63e4c40 100644 --- a/drivers/scsi/sym53c8xx_2/sym_glue.c +++ b/drivers/scsi/sym53c8xx_2/sym_glue.c @@ -1312,9 +1312,9 @@ static struct Scsi_Host *sym_attach(struct scsi_host_template *tpnt, int unit, sprintf(np->s.inst_name, "sym%d", np->s.unit); if ((SYM_CONF_DMA_ADDRESSING_MODE > 0) && (np->features & FE_DAC) && - !pci_set_dma_mask(pdev, DMA_DAC_MASK)) { + !dma_set_mask(&pdev->dev, DMA_DAC_MASK)) { set_dac(np); - } else if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) { + } else if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) { printf_warning("%s: No suitable DMA available\n", sym_name(np)); goto attach_failed; } @@ -1660,7 +1660,6 @@ static struct scsi_host_template sym2_template = { .eh_bus_reset_handler = sym53c8xx_eh_bus_reset_handler, .eh_host_reset_handler = sym53c8xx_eh_host_reset_handler, .this_id = 7, - .use_clustering = ENABLE_CLUSTERING, .max_sectors = 0xFFFF, #ifdef SYM_LINUX_PROC_INFO_SUPPORT .show_info = sym_show_info, diff --git a/drivers/scsi/ufs/Kconfig b/drivers/scsi/ufs/Kconfig index 2ddd426323e9..2ddbb26d9c26 100644 --- a/drivers/scsi/ufs/Kconfig +++ b/drivers/scsi/ufs/Kconfig @@ -80,6 +80,14 @@ config SCSI_UFSHCD_PLATFORM If unsure, say N. +config SCSI_UFS_CDNS_PLATFORM + tristate "Cadence UFS Controller platform driver" + depends on SCSI_UFSHCD_PLATFORM + help + This selects the Cadence-specific additions to UFSHCD platform driver. + + If unsure, say N. + config SCSI_UFS_DWC_TC_PLATFORM tristate "DesignWare platform support using a G210 Test Chip" depends on SCSI_UFSHCD_PLATFORM diff --git a/drivers/scsi/ufs/Makefile b/drivers/scsi/ufs/Makefile index aca481329828..a3bd70c3652c 100644 --- a/drivers/scsi/ufs/Makefile +++ b/drivers/scsi/ufs/Makefile @@ -2,6 +2,7 @@ # UFSHCD makefile obj-$(CONFIG_SCSI_UFS_DWC_TC_PCI) += tc-dwc-g210-pci.o ufshcd-dwc.o tc-dwc-g210.o obj-$(CONFIG_SCSI_UFS_DWC_TC_PLATFORM) += tc-dwc-g210-pltfrm.o ufshcd-dwc.o tc-dwc-g210.o +obj-$(CONFIG_SCSI_UFS_CDNS_PLATFORM) += cdns-pltfrm.o obj-$(CONFIG_SCSI_UFS_QCOM) += ufs-qcom.o obj-$(CONFIG_SCSI_UFSHCD) += ufshcd-core.o ufshcd-core-y += ufshcd.o ufs-sysfs.o diff --git a/drivers/scsi/ufs/cdns-pltfrm.c b/drivers/scsi/ufs/cdns-pltfrm.c new file mode 100644 index 000000000000..4a37b4f57164 --- /dev/null +++ b/drivers/scsi/ufs/cdns-pltfrm.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Platform UFS Host driver for Cadence controller + * + * Copyright (C) 2018 Cadence Design Systems, Inc. + * + * Authors: + * Jan Kotas <jank@cadence.com> + * + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/of.h> +#include <linux/time.h> + +#include "ufshcd-pltfrm.h" + +#define CDNS_UFS_REG_HCLKDIV 0xFC + +/** + * Sets HCLKDIV register value based on the core_clk + * @hba: host controller instance + * + * Return zero for success and non-zero for failure + */ +static int cdns_ufs_set_hclkdiv(struct ufs_hba *hba) +{ + struct ufs_clk_info *clki; + struct list_head *head = &hba->clk_list_head; + unsigned long core_clk_rate = 0; + u32 core_clk_div = 0; + + if (list_empty(head)) + return 0; + + list_for_each_entry(clki, head, list) { + if (IS_ERR_OR_NULL(clki->clk)) + continue; + if (!strcmp(clki->name, "core_clk")) + core_clk_rate = clk_get_rate(clki->clk); + } + + if (!core_clk_rate) { + dev_err(hba->dev, "%s: unable to find core_clk rate\n", + __func__); + return -EINVAL; + } + + core_clk_div = core_clk_rate / USEC_PER_SEC; + + ufshcd_writel(hba, core_clk_div, CDNS_UFS_REG_HCLKDIV); + /** + * Make sure the register was updated, + * UniPro layer will not work with an incorrect value. + */ + mb(); + + return 0; +} + +/** + * Sets clocks used by the controller + * @hba: host controller instance + * @on: if true, enable clocks, otherwise disable + * @status: notify stage (pre, post change) + * + * Return zero for success and non-zero for failure + */ +static int cdns_ufs_setup_clocks(struct ufs_hba *hba, bool on, + enum ufs_notify_change_status status) +{ + if ((!on) || (status == PRE_CHANGE)) + return 0; + + return cdns_ufs_set_hclkdiv(hba); +} + +static struct ufs_hba_variant_ops cdns_pltfm_hba_vops = { + .name = "cdns-ufs-pltfm", + .setup_clocks = cdns_ufs_setup_clocks, +}; + +/** + * cdns_ufs_pltfrm_probe - probe routine of the driver + * @pdev: pointer to platform device handle + * + * Return zero for success and non-zero for failure + */ +static int cdns_ufs_pltfrm_probe(struct platform_device *pdev) +{ + int err; + struct device *dev = &pdev->dev; + + /* Perform generic probe */ + err = ufshcd_pltfrm_init(pdev, &cdns_pltfm_hba_vops); + if (err) + dev_err(dev, "ufshcd_pltfrm_init() failed %d\n", err); + + return err; +} + +/** + * cdns_ufs_pltfrm_remove - removes the ufs driver + * @pdev: pointer to platform device handle + * + * Always returns 0 + */ +static int cdns_ufs_pltfrm_remove(struct platform_device *pdev) +{ + struct ufs_hba *hba = platform_get_drvdata(pdev); + + ufshcd_remove(hba); + return 0; +} + +static const struct of_device_id cdns_ufs_of_match[] = { + { .compatible = "cdns,ufshc" }, + {}, +}; + +MODULE_DEVICE_TABLE(of, cdns_ufs_of_match); + +static const struct dev_pm_ops cdns_ufs_dev_pm_ops = { + .suspend = ufshcd_pltfrm_suspend, + .resume = ufshcd_pltfrm_resume, + .runtime_suspend = ufshcd_pltfrm_runtime_suspend, + .runtime_resume = ufshcd_pltfrm_runtime_resume, + .runtime_idle = ufshcd_pltfrm_runtime_idle, +}; + +static struct platform_driver cdns_ufs_pltfrm_driver = { + .probe = cdns_ufs_pltfrm_probe, + .remove = cdns_ufs_pltfrm_remove, + .driver = { + .name = "cdns-ufshcd", + .pm = &cdns_ufs_dev_pm_ops, + .of_match_table = cdns_ufs_of_match, + }, +}; + +module_platform_driver(cdns_ufs_pltfrm_driver); + +MODULE_AUTHOR("Jan Kotas <jank@cadence.com>"); +MODULE_DESCRIPTION("Cadence UFS host controller platform driver"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(UFSHCD_DRIVER_VERSION); diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h index 58087d3916d0..dd65fea07687 100644 --- a/drivers/scsi/ufs/ufs.h +++ b/drivers/scsi/ufs/ufs.h @@ -46,7 +46,7 @@ #define QUERY_DESC_HDR_SIZE 2 #define QUERY_OSF_SIZE (GENERAL_UPIU_REQUEST_SIZE - \ (sizeof(struct utp_upiu_header))) -#define RESPONSE_UPIU_SENSE_DATA_LENGTH 18 +#define UFS_SENSE_SIZE 18 #define UPIU_HEADER_DWORD(byte3, byte2, byte1, byte0)\ cpu_to_be32((byte3 << 24) | (byte2 << 16) |\ @@ -378,6 +378,20 @@ enum query_opcode { UPIU_QUERY_OPCODE_TOGGLE_FLAG = 0x8, }; +/* bRefClkFreq attribute values */ +enum ufs_ref_clk_freq { + REF_CLK_FREQ_19_2_MHZ = 0, + REF_CLK_FREQ_26_MHZ = 1, + REF_CLK_FREQ_38_4_MHZ = 2, + REF_CLK_FREQ_52_MHZ = 3, + REF_CLK_FREQ_INVAL = -1, +}; + +struct ufs_ref_clk { + unsigned long freq_hz; + enum ufs_ref_clk_freq val; +}; + /* Query response result code */ enum { QUERY_RESULT_SUCCESS = 0x00, @@ -444,7 +458,7 @@ struct utp_cmd_rsp { __be32 residual_transfer_count; __be32 reserved[4]; __be16 sense_data_len; - u8 sense_data[RESPONSE_UPIU_SENSE_DATA_LENGTH]; + u8 sense_data[UFS_SENSE_SIZE]; }; /** diff --git a/drivers/scsi/ufs/ufs_bsg.c b/drivers/scsi/ufs/ufs_bsg.c index e5f8e54bf644..775bb4e5e36e 100644 --- a/drivers/scsi/ufs/ufs_bsg.c +++ b/drivers/scsi/ufs/ufs_bsg.c @@ -157,7 +157,7 @@ void ufs_bsg_remove(struct ufs_hba *hba) if (!hba->bsg_queue) return; - bsg_unregister_queue(hba->bsg_queue); + bsg_remove_queue(hba->bsg_queue); device_del(bsg_dev); put_device(bsg_dev); @@ -193,7 +193,7 @@ int ufs_bsg_probe(struct ufs_hba *hba) if (ret) goto out; - q = bsg_setup_queue(bsg_dev, dev_name(bsg_dev), ufs_bsg_request, 0); + q = bsg_setup_queue(bsg_dev, dev_name(bsg_dev), ufs_bsg_request, NULL, 0); if (IS_ERR(q)) { ret = PTR_ERR(q); goto out; diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index f1c57cd33b5b..9ba7671b84f8 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -51,8 +51,6 @@ #define CREATE_TRACE_POINTS #include <trace/events/ufs.h> -#define UFSHCD_REQ_SENSE_SIZE 18 - #define UFSHCD_ENABLE_INTRS (UTP_TRANSFER_REQ_COMPL |\ UTP_TASK_REQ_COMPL |\ UFSHCD_ERROR_MASK) @@ -1551,6 +1549,7 @@ start: * currently running. Hence, fall through to cancel gating * work and to enable clocks. */ + /* fallthrough */ case CLKS_OFF: ufshcd_scsi_block_requests(hba); hba->clk_gating.state = REQ_CLKS_ON; @@ -1562,6 +1561,7 @@ start: * fall through to check if we should wait for this * work to be done or not. */ + /* fallthrough */ case REQ_CLKS_ON: if (async) { rc = -EAGAIN; @@ -1890,11 +1890,10 @@ static inline void ufshcd_copy_sense_data(struct ufshcd_lrb *lrbp) int len_to_copy; len = be16_to_cpu(lrbp->ucd_rsp_ptr->sr.sense_data_len); - len_to_copy = min_t(int, RESPONSE_UPIU_SENSE_DATA_LENGTH, len); + len_to_copy = min_t(int, UFS_SENSE_SIZE, len); - memcpy(lrbp->sense_buffer, - lrbp->ucd_rsp_ptr->sr.sense_data, - min_t(int, len_to_copy, UFSHCD_REQ_SENSE_SIZE)); + memcpy(lrbp->sense_buffer, lrbp->ucd_rsp_ptr->sr.sense_data, + len_to_copy); } } @@ -2456,7 +2455,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) WARN_ON(lrbp->cmd); lrbp->cmd = cmd; - lrbp->sense_bufflen = UFSHCD_REQ_SENSE_SIZE; + lrbp->sense_bufflen = UFS_SENSE_SIZE; lrbp->sense_buffer = cmd->sense_buffer; lrbp->task_tag = tag; lrbp->lun = ufshcd_scsi_to_upiu_lun(cmd->device->lun); @@ -4620,6 +4619,7 @@ ufshcd_scsi_cmd_status(struct ufshcd_lrb *lrbp, int scsi_status) switch (scsi_status) { case SAM_STAT_CHECK_CONDITION: ufshcd_copy_sense_data(lrbp); + /* fallthrough */ case SAM_STAT_GOOD: result |= DID_OK << 16 | COMMAND_COMPLETE << 8 | @@ -6701,6 +6701,74 @@ static void ufshcd_def_desc_sizes(struct ufs_hba *hba) hba->desc_size.hlth_desc = QUERY_DESC_HEALTH_DEF_SIZE; } +static struct ufs_ref_clk ufs_ref_clk_freqs[] = { + {19200000, REF_CLK_FREQ_19_2_MHZ}, + {26000000, REF_CLK_FREQ_26_MHZ}, + {38400000, REF_CLK_FREQ_38_4_MHZ}, + {52000000, REF_CLK_FREQ_52_MHZ}, + {0, REF_CLK_FREQ_INVAL}, +}; + +static enum ufs_ref_clk_freq +ufs_get_bref_clk_from_hz(unsigned long freq) +{ + int i; + + for (i = 0; ufs_ref_clk_freqs[i].freq_hz; i++) + if (ufs_ref_clk_freqs[i].freq_hz == freq) + return ufs_ref_clk_freqs[i].val; + + return REF_CLK_FREQ_INVAL; +} + +void ufshcd_parse_dev_ref_clk_freq(struct ufs_hba *hba, struct clk *refclk) +{ + unsigned long freq; + + freq = clk_get_rate(refclk); + + hba->dev_ref_clk_freq = + ufs_get_bref_clk_from_hz(freq); + + if (hba->dev_ref_clk_freq == REF_CLK_FREQ_INVAL) + dev_err(hba->dev, + "invalid ref_clk setting = %ld\n", freq); +} + +static int ufshcd_set_dev_ref_clk(struct ufs_hba *hba) +{ + int err; + u32 ref_clk; + u32 freq = hba->dev_ref_clk_freq; + + err = ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_READ_ATTR, + QUERY_ATTR_IDN_REF_CLK_FREQ, 0, 0, &ref_clk); + + if (err) { + dev_err(hba->dev, "failed reading bRefClkFreq. err = %d\n", + err); + goto out; + } + + if (ref_clk == freq) + goto out; /* nothing to update */ + + err = ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_WRITE_ATTR, + QUERY_ATTR_IDN_REF_CLK_FREQ, 0, 0, &freq); + + if (err) { + dev_err(hba->dev, "bRefClkFreq setting to %lu Hz failed\n", + ufs_ref_clk_freqs[freq].freq_hz); + goto out; + } + + dev_dbg(hba->dev, "bRefClkFreq setting to %lu Hz succeeded\n", + ufs_ref_clk_freqs[freq].freq_hz); + +out: + return err; +} + /** * ufshcd_probe_hba - probe hba to detect device and initialize * @hba: per-adapter instance @@ -6766,6 +6834,12 @@ static int ufshcd_probe_hba(struct ufs_hba *hba) "%s: Failed getting max supported power mode\n", __func__); } else { + /* + * Set the right value to bRefClkFreq before attempting to + * switch to HS gears. + */ + if (hba->dev_ref_clk_freq != REF_CLK_FREQ_INVAL) + ufshcd_set_dev_ref_clk(hba); ret = ufshcd_config_pwr_mode(hba, &hba->max_pwr_info.info); if (ret) { dev_err(hba->dev, "%s: Failed setting power mode, err = %d\n", @@ -6910,6 +6984,7 @@ static struct scsi_host_template ufshcd_driver_template = { .max_host_blocked = 1, .track_queue_depth = 1, .sdev_groups = ufshcd_driver_groups, + .dma_boundary = PAGE_SIZE - 1, }; static int ufshcd_config_vreg_load(struct device *dev, struct ufs_vreg *vreg, @@ -7252,6 +7327,14 @@ static int ufshcd_init_clocks(struct ufs_hba *hba) goto out; } + /* + * Parse device ref clk freq as per device tree "ref_clk". + * Default dev_ref_clk_freq is set to REF_CLK_FREQ_INVAL + * in ufshcd_alloc_host(). + */ + if (!strcmp(clki->name, "ref_clk")) + ufshcd_parse_dev_ref_clk_freq(hba, clki->clk); + if (clki->max_freq) { ret = clk_set_rate(clki->clk, clki->max_freq); if (ret) { @@ -7379,19 +7462,19 @@ ufshcd_send_request_sense(struct ufs_hba *hba, struct scsi_device *sdp) 0, 0, 0, - UFSHCD_REQ_SENSE_SIZE, + UFS_SENSE_SIZE, 0}; char *buffer; int ret; - buffer = kzalloc(UFSHCD_REQ_SENSE_SIZE, GFP_KERNEL); + buffer = kzalloc(UFS_SENSE_SIZE, GFP_KERNEL); if (!buffer) { ret = -ENOMEM; goto out; } ret = scsi_execute(sdp, cmd, DMA_FROM_DEVICE, buffer, - UFSHCD_REQ_SENSE_SIZE, NULL, NULL, + UFS_SENSE_SIZE, NULL, NULL, msecs_to_jiffies(1000), 3, 0, RQF_PM, NULL); if (ret) pr_err("%s: failed with err %d\n", __func__, ret); @@ -8105,6 +8188,7 @@ int ufshcd_alloc_host(struct device *dev, struct ufs_hba **hba_handle) hba->host = host; hba->dev = dev; *hba_handle = hba; + hba->dev_ref_clk_freq = REF_CLK_FREQ_INVAL; INIT_LIST_HEAD(&hba->clk_list_head); diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 1a1c2b487a4e..69ba7445d2b3 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -550,6 +550,7 @@ struct ufs_hba { void *priv; unsigned int irq; bool is_irq_enabled; + enum ufs_ref_clk_freq dev_ref_clk_freq; /* Interrupt aggregation support is broken */ #define UFSHCD_QUIRK_BROKEN_INTR_AGGR 0x1 @@ -768,6 +769,7 @@ void ufshcd_remove(struct ufs_hba *); int ufshcd_wait_for_register(struct ufs_hba *hba, u32 reg, u32 mask, u32 val, unsigned long interval_us, unsigned long timeout_ms, bool can_sleep); +void ufshcd_parse_dev_ref_clk_freq(struct ufs_hba *hba, struct clk *refclk); static inline void check_upiu_size(void) { diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 1c72db94270e..772b976e4ee4 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -68,33 +68,6 @@ struct virtio_scsi_vq { struct virtqueue *vq; }; -/* - * Per-target queue state. - * - * This struct holds the data needed by the queue steering policy. When a - * target is sent multiple requests, we need to drive them to the same queue so - * that FIFO processing order is kept. However, if a target was idle, we can - * choose a queue arbitrarily. In this case the queue is chosen according to - * the current VCPU, so the driver expects the number of request queues to be - * equal to the number of VCPUs. This makes it easy and fast to select the - * queue, and also lets the driver optimize the IRQ affinity for the virtqueues - * (each virtqueue's affinity is set to the CPU that "owns" the queue). - * - * tgt_seq is held to serialize reading and writing req_vq. - * - * Decrements of reqs are never concurrent with writes of req_vq: before the - * decrement reqs will be != 0; after the decrement the virtqueue completion - * routine will not use the req_vq so it can be changed by a new request. - * Thus they can happen outside the tgt_seq, provided of course we make reqs - * an atomic_t. - */ -struct virtio_scsi_target_state { - seqcount_t tgt_seq; - - /* Currently active virtqueue for requests sent to this target. */ - struct virtio_scsi_vq *req_vq; -}; - /* Driver instance state */ struct virtio_scsi { struct virtio_device *vdev; @@ -693,34 +666,12 @@ static int virtscsi_abort(struct scsi_cmnd *sc) return virtscsi_tmf(vscsi, cmd); } -static int virtscsi_target_alloc(struct scsi_target *starget) -{ - struct Scsi_Host *sh = dev_to_shost(starget->dev.parent); - struct virtio_scsi *vscsi = shost_priv(sh); - - struct virtio_scsi_target_state *tgt = - kmalloc(sizeof(*tgt), GFP_KERNEL); - if (!tgt) - return -ENOMEM; - - seqcount_init(&tgt->tgt_seq); - tgt->req_vq = &vscsi->req_vqs[0]; - - starget->hostdata = tgt; - return 0; -} - -static void virtscsi_target_destroy(struct scsi_target *starget) -{ - struct virtio_scsi_target_state *tgt = starget->hostdata; - kfree(tgt); -} - static int virtscsi_map_queues(struct Scsi_Host *shost) { struct virtio_scsi *vscsi = shost_priv(shost); + struct blk_mq_queue_map *qmap = &shost->tag_set.map[0]; - return blk_mq_virtio_map_queues(&shost->tag_set, vscsi->vdev, 2); + return blk_mq_virtio_map_queues(qmap, vscsi->vdev, 2); } /* @@ -747,9 +698,6 @@ static struct scsi_host_template virtscsi_host_template = { .slave_alloc = virtscsi_device_alloc, .dma_boundary = UINT_MAX, - .use_clustering = ENABLE_CLUSTERING, - .target_alloc = virtscsi_target_alloc, - .target_destroy = virtscsi_target_destroy, .map_queues = virtscsi_map_queues, .track_queue_depth = 1, .force_blk_mq = 1, diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c index 0d6b2a88fc8e..ecee4b3ff073 100644 --- a/drivers/scsi/vmw_pvscsi.c +++ b/drivers/scsi/vmw_pvscsi.c @@ -1007,7 +1007,6 @@ static struct scsi_host_template pvscsi_template = { .sg_tablesize = PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT, .dma_boundary = UINT_MAX, .max_sectors = 0xffff, - .use_clustering = ENABLE_CLUSTERING, .change_queue_depth = pvscsi_change_queue_depth, .eh_abort_handler = pvscsi_abort, .eh_device_reset_handler = pvscsi_device_reset, diff --git a/drivers/scsi/wd719x.c b/drivers/scsi/wd719x.c index 974bfb3f30f4..e3310e9488d2 100644 --- a/drivers/scsi/wd719x.c +++ b/drivers/scsi/wd719x.c @@ -153,8 +153,6 @@ static int wd719x_direct_cmd(struct wd719x *wd, u8 opcode, u8 dev, u8 lun, static void wd719x_destroy(struct wd719x *wd) { - struct wd719x_scb *scb; - /* stop the RISC */ if (wd719x_direct_cmd(wd, WD719X_CMD_SLEEP, 0, 0, 0, 0, WD719X_WAIT_FOR_RISC)) @@ -162,37 +160,35 @@ static void wd719x_destroy(struct wd719x *wd) /* disable RISC */ wd719x_writeb(wd, WD719X_PCI_MODE_SELECT, 0); - /* free all SCBs */ - list_for_each_entry(scb, &wd->active_scbs, list) - pci_free_consistent(wd->pdev, sizeof(struct wd719x_scb), scb, - scb->phys); - list_for_each_entry(scb, &wd->free_scbs, list) - pci_free_consistent(wd->pdev, sizeof(struct wd719x_scb), scb, - scb->phys); + WARN_ON_ONCE(!list_empty(&wd->active_scbs)); + /* free internal buffers */ - pci_free_consistent(wd->pdev, wd->fw_size, wd->fw_virt, wd->fw_phys); + dma_free_coherent(&wd->pdev->dev, wd->fw_size, wd->fw_virt, + wd->fw_phys); wd->fw_virt = NULL; - pci_free_consistent(wd->pdev, WD719X_HASH_TABLE_SIZE, wd->hash_virt, - wd->hash_phys); + dma_free_coherent(&wd->pdev->dev, WD719X_HASH_TABLE_SIZE, wd->hash_virt, + wd->hash_phys); wd->hash_virt = NULL; - pci_free_consistent(wd->pdev, sizeof(struct wd719x_host_param), - wd->params, wd->params_phys); + dma_free_coherent(&wd->pdev->dev, sizeof(struct wd719x_host_param), + wd->params, wd->params_phys); wd->params = NULL; free_irq(wd->pdev->irq, wd); } -/* finish a SCSI command, mark SCB (if any) as free, unmap buffers */ -static void wd719x_finish_cmd(struct scsi_cmnd *cmd, int result) +/* finish a SCSI command, unmap buffers */ +static void wd719x_finish_cmd(struct wd719x_scb *scb, int result) { + struct scsi_cmnd *cmd = scb->cmd; struct wd719x *wd = shost_priv(cmd->device->host); - struct wd719x_scb *scb = (struct wd719x_scb *) cmd->host_scribble; - if (scb) { - list_move(&scb->list, &wd->free_scbs); - dma_unmap_single(&wd->pdev->dev, cmd->SCp.dma_handle, - SCSI_SENSE_BUFFERSIZE, DMA_FROM_DEVICE); - scsi_dma_unmap(cmd); - } + list_del(&scb->list); + + dma_unmap_single(&wd->pdev->dev, scb->phys, + sizeof(struct wd719x_scb), DMA_BIDIRECTIONAL); + scsi_dma_unmap(cmd); + dma_unmap_single(&wd->pdev->dev, cmd->SCp.dma_handle, + SCSI_SENSE_BUFFERSIZE, DMA_FROM_DEVICE); + cmd->result = result << 16; cmd->scsi_done(cmd); } @@ -202,36 +198,10 @@ static int wd719x_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *cmd) { int i, count_sg; unsigned long flags; - struct wd719x_scb *scb; + struct wd719x_scb *scb = scsi_cmd_priv(cmd); struct wd719x *wd = shost_priv(sh); - dma_addr_t phys; - cmd->host_scribble = NULL; - - /* get a free SCB - either from existing ones or allocate a new one */ - spin_lock_irqsave(wd->sh->host_lock, flags); - scb = list_first_entry_or_null(&wd->free_scbs, struct wd719x_scb, list); - if (scb) { - list_del(&scb->list); - phys = scb->phys; - } else { - spin_unlock_irqrestore(wd->sh->host_lock, flags); - scb = pci_alloc_consistent(wd->pdev, sizeof(struct wd719x_scb), - &phys); - spin_lock_irqsave(wd->sh->host_lock, flags); - if (!scb) { - dev_err(&wd->pdev->dev, "unable to allocate SCB\n"); - wd719x_finish_cmd(cmd, DID_ERROR); - spin_unlock_irqrestore(wd->sh->host_lock, flags); - return 0; - } - } - memset(scb, 0, sizeof(struct wd719x_scb)); - list_add(&scb->list, &wd->active_scbs); - - scb->phys = phys; scb->cmd = cmd; - cmd->host_scribble = (char *) scb; scb->CDB_tag = 0; /* Tagged queueing not supported yet */ scb->devid = cmd->device->id; @@ -240,10 +210,19 @@ static int wd719x_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *cmd) /* copy the command */ memcpy(scb->CDB, cmd->cmnd, cmd->cmd_len); + /* map SCB */ + scb->phys = dma_map_single(&wd->pdev->dev, scb, sizeof(*scb), + DMA_BIDIRECTIONAL); + + if (dma_mapping_error(&wd->pdev->dev, scb->phys)) + goto out_error; + /* map sense buffer */ scb->sense_buf_length = SCSI_SENSE_BUFFERSIZE; cmd->SCp.dma_handle = dma_map_single(&wd->pdev->dev, cmd->sense_buffer, SCSI_SENSE_BUFFERSIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(&wd->pdev->dev, cmd->SCp.dma_handle)) + goto out_unmap_scb; scb->sense_buf = cpu_to_le32(cmd->SCp.dma_handle); /* request autosense */ @@ -258,11 +237,8 @@ static int wd719x_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *cmd) /* Scather/gather */ count_sg = scsi_dma_map(cmd); - if (count_sg < 0) { - wd719x_finish_cmd(cmd, DID_ERROR); - spin_unlock_irqrestore(wd->sh->host_lock, flags); - return 0; - } + if (count_sg < 0) + goto out_unmap_sense; BUG_ON(count_sg > WD719X_SG); if (count_sg) { @@ -283,19 +259,33 @@ static int wd719x_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *cmd) scb->data_p = 0; } + spin_lock_irqsave(wd->sh->host_lock, flags); + /* check if the Command register is free */ if (wd719x_readb(wd, WD719X_AMR_COMMAND) != WD719X_CMD_READY) { spin_unlock_irqrestore(wd->sh->host_lock, flags); return SCSI_MLQUEUE_HOST_BUSY; } + list_add(&scb->list, &wd->active_scbs); + /* write pointer to the AMR */ wd719x_writel(wd, WD719X_AMR_SCB_IN, scb->phys); /* send SCB opcode */ wd719x_writeb(wd, WD719X_AMR_COMMAND, WD719X_CMD_PROCESS_SCB); spin_unlock_irqrestore(wd->sh->host_lock, flags); + return 0; +out_unmap_sense: + dma_unmap_single(&wd->pdev->dev, cmd->SCp.dma_handle, + SCSI_SENSE_BUFFERSIZE, DMA_FROM_DEVICE); +out_unmap_scb: + dma_unmap_single(&wd->pdev->dev, scb->phys, sizeof(*scb), + DMA_BIDIRECTIONAL); +out_error: + cmd->result = DID_ERROR << 16; + cmd->scsi_done(cmd); return 0; } @@ -327,8 +317,8 @@ static int wd719x_chip_init(struct wd719x *wd) wd->fw_size = ALIGN(fw_wcs->size, 4) + fw_risc->size; if (!wd->fw_virt) - wd->fw_virt = pci_alloc_consistent(wd->pdev, wd->fw_size, - &wd->fw_phys); + wd->fw_virt = dma_alloc_coherent(&wd->pdev->dev, wd->fw_size, + &wd->fw_phys, GFP_KERNEL); if (!wd->fw_virt) { ret = -ENOMEM; goto wd719x_init_end; @@ -464,7 +454,7 @@ static int wd719x_abort(struct scsi_cmnd *cmd) { int action, result; unsigned long flags; - struct wd719x_scb *scb = (struct wd719x_scb *)cmd->host_scribble; + struct wd719x_scb *scb = scsi_cmd_priv(cmd); struct wd719x *wd = shost_priv(cmd->device->host); dev_info(&wd->pdev->dev, "abort command, tag: %x\n", cmd->tag); @@ -526,10 +516,8 @@ static int wd719x_host_reset(struct scsi_cmnd *cmd) result = FAILED; /* flush all SCBs */ - list_for_each_entry_safe(scb, tmp, &wd->active_scbs, list) { - struct scsi_cmnd *tmp_cmd = scb->cmd; - wd719x_finish_cmd(tmp_cmd, result); - } + list_for_each_entry_safe(scb, tmp, &wd->active_scbs, list) + wd719x_finish_cmd(scb, result); spin_unlock_irqrestore(wd->sh->host_lock, flags); return result; @@ -555,7 +543,6 @@ static inline void wd719x_interrupt_SCB(struct wd719x *wd, union wd719x_regs regs, struct wd719x_scb *scb) { - struct scsi_cmnd *cmd; int result; /* now have to find result from card */ @@ -643,9 +630,8 @@ static inline void wd719x_interrupt_SCB(struct wd719x *wd, result = DID_ERROR; break; } - cmd = scb->cmd; - wd719x_finish_cmd(cmd, result); + wd719x_finish_cmd(scb, result); } static irqreturn_t wd719x_interrupt(int irq, void *dev_id) @@ -809,7 +795,6 @@ static int wd719x_board_found(struct Scsi_Host *sh) int ret; INIT_LIST_HEAD(&wd->active_scbs); - INIT_LIST_HEAD(&wd->free_scbs); sh->base = pci_resource_start(wd->pdev, 0); @@ -820,17 +805,18 @@ static int wd719x_board_found(struct Scsi_Host *sh) wd->fw_virt = NULL; /* memory area for host (EEPROM) parameters */ - wd->params = pci_alloc_consistent(wd->pdev, - sizeof(struct wd719x_host_param), - &wd->params_phys); + wd->params = dma_alloc_coherent(&wd->pdev->dev, + sizeof(struct wd719x_host_param), + &wd->params_phys, GFP_KERNEL); if (!wd->params) { dev_warn(&wd->pdev->dev, "unable to allocate parameter buffer\n"); return -ENOMEM; } /* memory area for the RISC for hash table of outstanding requests */ - wd->hash_virt = pci_alloc_consistent(wd->pdev, WD719X_HASH_TABLE_SIZE, - &wd->hash_phys); + wd->hash_virt = dma_alloc_coherent(&wd->pdev->dev, + WD719X_HASH_TABLE_SIZE, + &wd->hash_phys, GFP_KERNEL); if (!wd->hash_virt) { dev_warn(&wd->pdev->dev, "unable to allocate hash buffer\n"); ret = -ENOMEM; @@ -862,10 +848,10 @@ static int wd719x_board_found(struct Scsi_Host *sh) fail_free_irq: free_irq(wd->pdev->irq, wd); fail_free_hash: - pci_free_consistent(wd->pdev, WD719X_HASH_TABLE_SIZE, wd->hash_virt, + dma_free_coherent(&wd->pdev->dev, WD719X_HASH_TABLE_SIZE, wd->hash_virt, wd->hash_phys); fail_free_params: - pci_free_consistent(wd->pdev, sizeof(struct wd719x_host_param), + dma_free_coherent(&wd->pdev->dev, sizeof(struct wd719x_host_param), wd->params, wd->params_phys); return ret; @@ -874,6 +860,7 @@ fail_free_params: static struct scsi_host_template wd719x_template = { .module = THIS_MODULE, .name = "Western Digital 719x", + .cmd_size = sizeof(struct wd719x_scb), .queuecommand = wd719x_queuecommand, .eh_abort_handler = wd719x_abort, .eh_device_reset_handler = wd719x_dev_reset, @@ -884,7 +871,6 @@ static struct scsi_host_template wd719x_template = { .can_queue = 255, .this_id = 7, .sg_tablesize = WD719X_SG, - .use_clustering = ENABLE_CLUSTERING, }; static int wd719x_pci_probe(struct pci_dev *pdev, const struct pci_device_id *d) @@ -897,7 +883,7 @@ static int wd719x_pci_probe(struct pci_dev *pdev, const struct pci_device_id *d) if (err) goto fail; - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) { + if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) { dev_warn(&pdev->dev, "Unable to set 32-bit DMA mask\n"); goto disable_device; } diff --git a/drivers/scsi/wd719x.h b/drivers/scsi/wd719x.h index 0455b1633ca7..abaabd419a54 100644 --- a/drivers/scsi/wd719x.h +++ b/drivers/scsi/wd719x.h @@ -74,7 +74,6 @@ struct wd719x { void *hash_virt; /* hash table CPU address */ dma_addr_t hash_phys; /* hash table bus address */ struct list_head active_scbs; - struct list_head free_scbs; }; /* timeout delays in microsecs */ diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c index 61389bdc7926..f0068e96a177 100644 --- a/drivers/scsi/xen-scsifront.c +++ b/drivers/scsi/xen-scsifront.c @@ -696,7 +696,6 @@ static struct scsi_host_template scsifront_sht = { .this_id = -1, .cmd_size = sizeof(struct vscsifrnt_shadow), .sg_tablesize = VSCSIIF_SG_TABLESIZE, - .use_clustering = DISABLE_CLUSTERING, .proc_name = "scsifront", }; @@ -1112,7 +1111,7 @@ static void scsifront_backend_changed(struct xenbus_device *dev, case XenbusStateClosed: if (dev->state == XenbusStateClosed) break; - /* Missed the backend's Closing state -- fallthrough */ + /* fall through - Missed the backend's Closing state */ case XenbusStateClosing: scsifront_disconnect(info); break; diff --git a/drivers/staging/rts5208/rtsx.c b/drivers/staging/rts5208/rtsx.c index 69e6abe14abf..c57d66a7405f 100644 --- a/drivers/staging/rts5208/rtsx.c +++ b/drivers/staging/rts5208/rtsx.c @@ -237,12 +237,6 @@ static struct scsi_host_template rtsx_host_template = { /* limit the total size of a transfer to 120 KB */ .max_sectors = 240, - /* merge commands... this seems to help performance, but - * periodically someone should test to see which setting is more - * optimal. - */ - .use_clustering = 1, - /* emulated HBA */ .emulated = 1, diff --git a/drivers/staging/unisys/visorhba/visorhba_main.c b/drivers/staging/unisys/visorhba/visorhba_main.c index 4fc521c51c0e..5cf93e8eb77c 100644 --- a/drivers/staging/unisys/visorhba/visorhba_main.c +++ b/drivers/staging/unisys/visorhba/visorhba_main.c @@ -645,7 +645,6 @@ static struct scsi_host_template visorhba_driver_template = { .this_id = -1, .slave_alloc = visorhba_slave_alloc, .slave_destroy = visorhba_slave_destroy, - .use_clustering = ENABLE_CLUSTERING, }; /* diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index c1d5a173553d..984941e036c8 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1493,8 +1493,6 @@ __iscsit_check_dataout_hdr(struct iscsi_conn *conn, void *buf, if (hdr->flags & ISCSI_FLAG_CMD_FINAL) iscsit_stop_dataout_timer(cmd); - transport_check_aborted_status(se_cmd, - (hdr->flags & ISCSI_FLAG_CMD_FINAL)); return iscsit_dump_data_payload(conn, payload_length, 1); } } else { @@ -1509,12 +1507,9 @@ __iscsit_check_dataout_hdr(struct iscsi_conn *conn, void *buf, * TASK_ABORTED status. */ if (se_cmd->transport_state & CMD_T_ABORTED) { - if (hdr->flags & ISCSI_FLAG_CMD_FINAL) - if (--cmd->outstanding_r2ts < 1) { - iscsit_stop_dataout_timer(cmd); - transport_check_aborted_status( - se_cmd, 1); - } + if (hdr->flags & ISCSI_FLAG_CMD_FINAL && + --cmd->outstanding_r2ts < 1) + iscsit_stop_dataout_timer(cmd); return iscsit_dump_data_payload(conn, payload_length, 1); } diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 95d0a22b2ad6..a5481dfeae8d 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -1343,11 +1343,6 @@ static struct configfs_attribute *lio_target_discovery_auth_attrs[] = { /* Start functions for target_core_fabric_ops */ -static char *iscsi_get_fabric_name(void) -{ - return "iSCSI"; -} - static int iscsi_get_cmd_state(struct se_cmd *se_cmd) { struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); @@ -1549,9 +1544,9 @@ static void lio_release_cmd(struct se_cmd *se_cmd) const struct target_core_fabric_ops iscsi_ops = { .module = THIS_MODULE, - .name = "iscsi", + .fabric_alias = "iscsi", + .fabric_name = "iSCSI", .node_acl_size = sizeof(struct iscsi_node_acl), - .get_fabric_name = iscsi_get_fabric_name, .tpg_get_wwn = lio_tpg_get_endpoint_wwn, .tpg_get_tag = lio_tpg_get_tag, .tpg_get_default_depth = lio_tpg_get_default_depth, @@ -1596,4 +1591,6 @@ const struct target_core_fabric_ops iscsi_ops = { .tfc_tpg_nacl_attrib_attrs = lio_target_nacl_attrib_attrs, .tfc_tpg_nacl_auth_attrs = lio_target_nacl_auth_attrs, .tfc_tpg_nacl_param_attrs = lio_target_nacl_param_attrs, + + .write_pending_must_be_called = true, }; diff --git a/drivers/target/iscsi/iscsi_target_erl1.c b/drivers/target/iscsi/iscsi_target_erl1.c index a211e8154f4c..1b54a9c70851 100644 --- a/drivers/target/iscsi/iscsi_target_erl1.c +++ b/drivers/target/iscsi/iscsi_target_erl1.c @@ -943,20 +943,8 @@ int iscsit_execute_cmd(struct iscsi_cmd *cmd, int ooo) return 0; } spin_unlock_bh(&cmd->istate_lock); - /* - * Determine if delayed TASK_ABORTED status for WRITEs - * should be sent now if no unsolicited data out - * payloads are expected, or if the delayed status - * should be sent after unsolicited data out with - * ISCSI_FLAG_CMD_FINAL set in iscsi_handle_data_out() - */ - if (transport_check_aborted_status(se_cmd, - (cmd->unsolicited_data == 0)) != 0) + if (cmd->se_cmd.transport_state & CMD_T_ABORTED) return 0; - /* - * Otherwise send CHECK_CONDITION and sense for - * exception - */ return transport_send_check_condition_and_sense(se_cmd, cmd->sense_reason, 0); } @@ -974,13 +962,7 @@ int iscsit_execute_cmd(struct iscsi_cmd *cmd, int ooo) if (!(cmd->cmd_flags & ICF_NON_IMMEDIATE_UNSOLICITED_DATA)) { - /* - * Send the delayed TASK_ABORTED status for - * WRITEs if no more unsolicitied data is - * expected. - */ - if (transport_check_aborted_status(se_cmd, 1) - != 0) + if (cmd->se_cmd.transport_state & CMD_T_ABORTED) return 0; iscsit_set_dataout_sequence_values(cmd); @@ -995,11 +977,7 @@ int iscsit_execute_cmd(struct iscsi_cmd *cmd, int ooo) if ((cmd->data_direction == DMA_TO_DEVICE) && !(cmd->cmd_flags & ICF_NON_IMMEDIATE_UNSOLICITED_DATA)) { - /* - * Send the delayed TASK_ABORTED status for WRITEs if - * no more nsolicitied data is expected. - */ - if (transport_check_aborted_status(se_cmd, 1) != 0) + if (cmd->se_cmd.transport_state & CMD_T_ABORTED) return 0; iscsit_set_unsoliticed_dataout(cmd); diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index 36b742932c72..86987da86dd6 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -150,24 +150,26 @@ void iscsit_free_r2ts_from_list(struct iscsi_cmd *cmd) static int iscsit_wait_for_tag(struct se_session *se_sess, int state, int *cpup) { int tag = -1; - DEFINE_WAIT(wait); + DEFINE_SBQ_WAIT(wait); struct sbq_wait_state *ws; + struct sbitmap_queue *sbq; if (state == TASK_RUNNING) return tag; - ws = &se_sess->sess_tag_pool.ws[0]; + sbq = &se_sess->sess_tag_pool; + ws = &sbq->ws[0]; for (;;) { - prepare_to_wait_exclusive(&ws->wait, &wait, state); + sbitmap_prepare_to_wait(sbq, ws, &wait, state); if (signal_pending_state(state, current)) break; - tag = sbitmap_queue_get(&se_sess->sess_tag_pool, cpup); + tag = sbitmap_queue_get(sbq, cpup); if (tag >= 0) break; schedule(); } - finish_wait(&ws->wait, &wait); + sbitmap_finish_wait(sbq, ws, &wait); return tag; } diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index bc8918f382e4..7bd7c0c0db6f 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -324,7 +324,7 @@ static struct scsi_host_template tcm_loop_driver_template = { .sg_tablesize = 256, .cmd_per_lun = 1024, .max_sectors = 0xFFFF, - .use_clustering = DISABLE_CLUSTERING, + .dma_boundary = PAGE_SIZE - 1, .slave_alloc = tcm_loop_slave_alloc, .module = THIS_MODULE, .track_queue_depth = 1, @@ -460,11 +460,6 @@ static void tcm_loop_release_core_bus(void) pr_debug("Releasing TCM Loop Core BUS\n"); } -static char *tcm_loop_get_fabric_name(void) -{ - return "loopback"; -} - static inline struct tcm_loop_tpg *tl_tpg(struct se_portal_group *se_tpg) { return container_of(se_tpg, struct tcm_loop_tpg, tl_se_tpg); @@ -1149,8 +1144,7 @@ static struct configfs_attribute *tcm_loop_wwn_attrs[] = { static const struct target_core_fabric_ops loop_ops = { .module = THIS_MODULE, - .name = "loopback", - .get_fabric_name = tcm_loop_get_fabric_name, + .fabric_name = "loopback", .tpg_get_wwn = tcm_loop_get_endpoint_wwn, .tpg_get_tag = tcm_loop_get_tag, .tpg_check_demo_mode = tcm_loop_check_demo_mode, diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index 3d10189ecedc..08cee13dfb9a 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -1694,11 +1694,6 @@ static int sbp_check_false(struct se_portal_group *se_tpg) return 0; } -static char *sbp_get_fabric_name(void) -{ - return "sbp"; -} - static char *sbp_get_fabric_wwn(struct se_portal_group *se_tpg) { struct sbp_tpg *tpg = container_of(se_tpg, struct sbp_tpg, se_tpg); @@ -2323,8 +2318,7 @@ static struct configfs_attribute *sbp_tpg_attrib_attrs[] = { static const struct target_core_fabric_ops sbp_ops = { .module = THIS_MODULE, - .name = "sbp", - .get_fabric_name = sbp_get_fabric_name, + .fabric_name = "sbp", .tpg_get_wwn = sbp_get_fabric_wwn, .tpg_get_tag = sbp_get_tag, .tpg_check_demo_mode = sbp_check_true, diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 4f134b0c3e29..6b0d9beacf90 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -451,7 +451,7 @@ static inline void set_ascq(struct se_cmd *cmd, u8 alua_ascq) pr_debug("[%s]: ALUA TG Port not available, " "SenseKey: NOT_READY, ASC/ASCQ: " "0x04/0x%02x\n", - cmd->se_tfo->get_fabric_name(), alua_ascq); + cmd->se_tfo->fabric_name, alua_ascq); cmd->scsi_asc = 0x04; cmd->scsi_ascq = alua_ascq; @@ -1229,13 +1229,13 @@ static int core_alua_update_tpg_secondary_metadata(struct se_lun *lun) if (se_tpg->se_tpg_tfo->tpg_get_tag != NULL) { path = kasprintf(GFP_KERNEL, "%s/alua/%s/%s+%hu/lun_%llu", - db_root, se_tpg->se_tpg_tfo->get_fabric_name(), + db_root, se_tpg->se_tpg_tfo->fabric_name, se_tpg->se_tpg_tfo->tpg_get_wwn(se_tpg), se_tpg->se_tpg_tfo->tpg_get_tag(se_tpg), lun->unpacked_lun); } else { path = kasprintf(GFP_KERNEL, "%s/alua/%s/%s/lun_%llu", - db_root, se_tpg->se_tpg_tfo->get_fabric_name(), + db_root, se_tpg->se_tpg_tfo->fabric_name, se_tpg->se_tpg_tfo->tpg_get_wwn(se_tpg), lun->unpacked_lun); } diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index f6b1549f4142..72016d0dfca5 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -172,7 +172,10 @@ static struct target_fabric_configfs *target_core_get_fabric( mutex_lock(&g_tf_lock); list_for_each_entry(tf, &g_tf_list, tf_list) { - if (!strcmp(tf->tf_ops->name, name)) { + const char *cmp_name = tf->tf_ops->fabric_alias; + if (!cmp_name) + cmp_name = tf->tf_ops->fabric_name; + if (!strcmp(cmp_name, name)) { atomic_inc(&tf->tf_access_cnt); mutex_unlock(&g_tf_lock); return tf; @@ -249,7 +252,7 @@ static struct config_group *target_core_register_fabric( return ERR_PTR(-EINVAL); } pr_debug("Target_Core_ConfigFS: REGISTER -> Located fabric:" - " %s\n", tf->tf_ops->name); + " %s\n", tf->tf_ops->fabric_name); /* * On a successful target_core_get_fabric() look, the returned * struct target_fabric_configfs *tf will contain a usage reference. @@ -282,7 +285,7 @@ static void target_core_deregister_fabric( " tf list\n", config_item_name(item)); pr_debug("Target_Core_ConfigFS: DEREGISTER -> located fabric:" - " %s\n", tf->tf_ops->name); + " %s\n", tf->tf_ops->fabric_name); atomic_dec(&tf->tf_access_cnt); pr_debug("Target_Core_ConfigFS: DEREGISTER -> Releasing ci" @@ -342,17 +345,20 @@ EXPORT_SYMBOL(target_undepend_item); static int target_fabric_tf_ops_check(const struct target_core_fabric_ops *tfo) { - if (!tfo->name) { - pr_err("Missing tfo->name\n"); - return -EINVAL; + if (tfo->fabric_alias) { + if (strlen(tfo->fabric_alias) >= TARGET_FABRIC_NAME_SIZE) { + pr_err("Passed alias: %s exceeds " + "TARGET_FABRIC_NAME_SIZE\n", tfo->fabric_alias); + return -EINVAL; + } } - if (strlen(tfo->name) >= TARGET_FABRIC_NAME_SIZE) { - pr_err("Passed name: %s exceeds TARGET_FABRIC" - "_NAME_SIZE\n", tfo->name); + if (!tfo->fabric_name) { + pr_err("Missing tfo->fabric_name\n"); return -EINVAL; } - if (!tfo->get_fabric_name) { - pr_err("Missing tfo->get_fabric_name()\n"); + if (strlen(tfo->fabric_name) >= TARGET_FABRIC_NAME_SIZE) { + pr_err("Passed name: %s exceeds " + "TARGET_FABRIC_NAME_SIZE\n", tfo->fabric_name); return -EINVAL; } if (!tfo->tpg_get_wwn) { @@ -486,7 +492,7 @@ void target_unregister_template(const struct target_core_fabric_ops *fo) mutex_lock(&g_tf_lock); list_for_each_entry(t, &g_tf_list, tf_list) { - if (!strcmp(t->tf_ops->name, fo->name)) { + if (!strcmp(t->tf_ops->fabric_name, fo->fabric_name)) { BUG_ON(atomic_read(&t->tf_access_cnt)); list_del(&t->tf_list); mutex_unlock(&g_tf_lock); @@ -532,9 +538,9 @@ DEF_CONFIGFS_ATTRIB_SHOW(emulate_tpu); DEF_CONFIGFS_ATTRIB_SHOW(emulate_tpws); DEF_CONFIGFS_ATTRIB_SHOW(emulate_caw); DEF_CONFIGFS_ATTRIB_SHOW(emulate_3pc); +DEF_CONFIGFS_ATTRIB_SHOW(emulate_pr); DEF_CONFIGFS_ATTRIB_SHOW(pi_prot_type); DEF_CONFIGFS_ATTRIB_SHOW(hw_pi_prot_type); -DEF_CONFIGFS_ATTRIB_SHOW(pi_prot_format); DEF_CONFIGFS_ATTRIB_SHOW(pi_prot_verify); DEF_CONFIGFS_ATTRIB_SHOW(enforce_pr_isids); DEF_CONFIGFS_ATTRIB_SHOW(is_nonrot); @@ -592,6 +598,7 @@ static ssize_t _name##_store(struct config_item *item, const char *page, \ DEF_CONFIGFS_ATTRIB_STORE_BOOL(emulate_fua_write); DEF_CONFIGFS_ATTRIB_STORE_BOOL(emulate_caw); DEF_CONFIGFS_ATTRIB_STORE_BOOL(emulate_3pc); +DEF_CONFIGFS_ATTRIB_STORE_BOOL(emulate_pr); DEF_CONFIGFS_ATTRIB_STORE_BOOL(enforce_pr_isids); DEF_CONFIGFS_ATTRIB_STORE_BOOL(is_nonrot); @@ -613,12 +620,17 @@ static void dev_set_t10_wwn_model_alias(struct se_device *dev) const char *configname; configname = config_item_name(&dev->dev_group.cg_item); - if (strlen(configname) >= 16) { + if (strlen(configname) >= INQUIRY_MODEL_LEN) { pr_warn("dev[%p]: Backstore name '%s' is too long for " - "INQUIRY_MODEL, truncating to 16 bytes\n", dev, + "INQUIRY_MODEL, truncating to 15 characters\n", dev, configname); } - snprintf(&dev->t10_wwn.model[0], 16, "%s", configname); + /* + * XXX We can't use sizeof(dev->t10_wwn.model) (INQUIRY_MODEL_LEN + 1) + * here without potentially breaking existing setups, so continue to + * truncate one byte shorter than what can be carried in INQUIRY. + */ + strlcpy(dev->t10_wwn.model, configname, INQUIRY_MODEL_LEN); } static ssize_t emulate_model_alias_store(struct config_item *item, @@ -640,11 +652,12 @@ static ssize_t emulate_model_alias_store(struct config_item *item, if (ret < 0) return ret; + BUILD_BUG_ON(sizeof(dev->t10_wwn.model) != INQUIRY_MODEL_LEN + 1); if (flag) { dev_set_t10_wwn_model_alias(dev); } else { - strncpy(&dev->t10_wwn.model[0], - dev->transport->inquiry_prod, 16); + strlcpy(dev->t10_wwn.model, dev->transport->inquiry_prod, + sizeof(dev->t10_wwn.model)); } da->emulate_model_alias = flag; return count; @@ -1116,9 +1129,10 @@ CONFIGFS_ATTR(, emulate_tpu); CONFIGFS_ATTR(, emulate_tpws); CONFIGFS_ATTR(, emulate_caw); CONFIGFS_ATTR(, emulate_3pc); +CONFIGFS_ATTR(, emulate_pr); CONFIGFS_ATTR(, pi_prot_type); CONFIGFS_ATTR_RO(, hw_pi_prot_type); -CONFIGFS_ATTR(, pi_prot_format); +CONFIGFS_ATTR_WO(, pi_prot_format); CONFIGFS_ATTR(, pi_prot_verify); CONFIGFS_ATTR(, enforce_pr_isids); CONFIGFS_ATTR(, is_nonrot); @@ -1156,6 +1170,7 @@ struct configfs_attribute *sbc_attrib_attrs[] = { &attr_emulate_tpws, &attr_emulate_caw, &attr_emulate_3pc, + &attr_emulate_pr, &attr_pi_prot_type, &attr_hw_pi_prot_type, &attr_pi_prot_format, @@ -1211,6 +1226,74 @@ static struct t10_wwn *to_t10_wwn(struct config_item *item) } /* + * STANDARD and VPD page 0x83 T10 Vendor Identification + */ +static ssize_t target_wwn_vendor_id_show(struct config_item *item, + char *page) +{ + return sprintf(page, "%s\n", &to_t10_wwn(item)->vendor[0]); +} + +static ssize_t target_wwn_vendor_id_store(struct config_item *item, + const char *page, size_t count) +{ + struct t10_wwn *t10_wwn = to_t10_wwn(item); + struct se_device *dev = t10_wwn->t10_dev; + /* +2 to allow for a trailing (stripped) '\n' and null-terminator */ + unsigned char buf[INQUIRY_VENDOR_LEN + 2]; + char *stripped = NULL; + size_t len; + int i; + + len = strlcpy(buf, page, sizeof(buf)); + if (len < sizeof(buf)) { + /* Strip any newline added from userspace. */ + stripped = strstrip(buf); + len = strlen(stripped); + } + if (len > INQUIRY_VENDOR_LEN) { + pr_err("Emulated T10 Vendor Identification exceeds" + " INQUIRY_VENDOR_LEN: " __stringify(INQUIRY_VENDOR_LEN) + "\n"); + return -EOVERFLOW; + } + + /* + * SPC 4.3.1: + * ASCII data fields shall contain only ASCII printable characters (i.e., + * code values 20h to 7Eh) and may be terminated with one or more ASCII + * null (00h) characters. + */ + for (i = 0; i < len; i++) { + if ((stripped[i] < 0x20) || (stripped[i] > 0x7E)) { + pr_err("Emulated T10 Vendor Identification contains" + " non-ASCII-printable characters\n"); + return -EINVAL; + } + } + + /* + * Check to see if any active exports exist. If they do exist, fail + * here as changing this information on the fly (underneath the + * initiator side OS dependent multipath code) could cause negative + * effects. + */ + if (dev->export_count) { + pr_err("Unable to set T10 Vendor Identification while" + " active %d exports exist\n", dev->export_count); + return -EINVAL; + } + + BUILD_BUG_ON(sizeof(dev->t10_wwn.vendor) != INQUIRY_VENDOR_LEN + 1); + strlcpy(dev->t10_wwn.vendor, stripped, sizeof(dev->t10_wwn.vendor)); + + pr_debug("Target_Core_ConfigFS: Set emulated T10 Vendor Identification:" + " %s\n", dev->t10_wwn.vendor); + + return count; +} + +/* * VPD page 0x80 Unit serial */ static ssize_t target_wwn_vpd_unit_serial_show(struct config_item *item, @@ -1356,6 +1439,7 @@ DEF_DEV_WWN_ASSOC_SHOW(vpd_assoc_target_port, 0x10); /* VPD page 0x83 Association: SCSI Target Device */ DEF_DEV_WWN_ASSOC_SHOW(vpd_assoc_scsi_target_device, 0x20); +CONFIGFS_ATTR(target_wwn_, vendor_id); CONFIGFS_ATTR(target_wwn_, vpd_unit_serial); CONFIGFS_ATTR_RO(target_wwn_, vpd_protocol_identifier); CONFIGFS_ATTR_RO(target_wwn_, vpd_assoc_logical_unit); @@ -1363,6 +1447,7 @@ CONFIGFS_ATTR_RO(target_wwn_, vpd_assoc_target_port); CONFIGFS_ATTR_RO(target_wwn_, vpd_assoc_scsi_target_device); static struct configfs_attribute *target_core_dev_wwn_attrs[] = { + &target_wwn_attr_vendor_id, &target_wwn_attr_vpd_unit_serial, &target_wwn_attr_vpd_protocol_identifier, &target_wwn_attr_vpd_assoc_logical_unit, @@ -1400,7 +1485,7 @@ static ssize_t target_core_dev_pr_show_spc3_res(struct se_device *dev, core_pr_dump_initiator_port(pr_reg, i_buf, PR_REG_ISID_ID_LEN); return sprintf(page, "SPC-3 Reservation: %s Initiator: %s%s\n", - se_nacl->se_tpg->se_tpg_tfo->get_fabric_name(), + se_nacl->se_tpg->se_tpg_tfo->fabric_name, se_nacl->initiatorname, i_buf); } @@ -1414,7 +1499,7 @@ static ssize_t target_core_dev_pr_show_spc2_res(struct se_device *dev, if (se_nacl) { len = sprintf(page, "SPC-2 Reservation: %s Initiator: %s\n", - se_nacl->se_tpg->se_tpg_tfo->get_fabric_name(), + se_nacl->se_tpg->se_tpg_tfo->fabric_name, se_nacl->initiatorname); } else { len = sprintf(page, "No SPC-2 Reservation holder\n"); @@ -1427,6 +1512,9 @@ static ssize_t target_pr_res_holder_show(struct config_item *item, char *page) struct se_device *dev = pr_to_dev(item); int ret; + if (!dev->dev_attrib.emulate_pr) + return sprintf(page, "SPC_RESERVATIONS_DISABLED\n"); + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR) return sprintf(page, "Passthrough\n"); @@ -1489,13 +1577,13 @@ static ssize_t target_pr_res_pr_holder_tg_port_show(struct config_item *item, tfo = se_tpg->se_tpg_tfo; len += sprintf(page+len, "SPC-3 Reservation: %s" - " Target Node Endpoint: %s\n", tfo->get_fabric_name(), + " Target Node Endpoint: %s\n", tfo->fabric_name, tfo->tpg_get_wwn(se_tpg)); len += sprintf(page+len, "SPC-3 Reservation: Relative Port" " Identifier Tag: %hu %s Portal Group Tag: %hu" " %s Logical Unit: %llu\n", pr_reg->tg_pt_sep_rtpi, - tfo->get_fabric_name(), tfo->tpg_get_tag(se_tpg), - tfo->get_fabric_name(), pr_reg->pr_aptpl_target_lun); + tfo->fabric_name, tfo->tpg_get_tag(se_tpg), + tfo->fabric_name, pr_reg->pr_aptpl_target_lun); out_unlock: spin_unlock(&dev->dev_reservation_lock); @@ -1526,7 +1614,7 @@ static ssize_t target_pr_res_pr_registered_i_pts_show(struct config_item *item, core_pr_dump_initiator_port(pr_reg, i_buf, PR_REG_ISID_ID_LEN); sprintf(buf, "%s Node: %s%s Key: 0x%016Lx PRgen: 0x%08x\n", - tfo->get_fabric_name(), + tfo->fabric_name, pr_reg->pr_reg_nacl->initiatorname, i_buf, pr_reg->pr_res_key, pr_reg->pr_res_generation); @@ -1567,12 +1655,14 @@ static ssize_t target_pr_res_type_show(struct config_item *item, char *page) { struct se_device *dev = pr_to_dev(item); + if (!dev->dev_attrib.emulate_pr) + return sprintf(page, "SPC_RESERVATIONS_DISABLED\n"); if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR) return sprintf(page, "SPC_PASSTHROUGH\n"); - else if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS) + if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS) return sprintf(page, "SPC2_RESERVATIONS\n"); - else - return sprintf(page, "SPC3_PERSISTENT_RESERVATIONS\n"); + + return sprintf(page, "SPC3_PERSISTENT_RESERVATIONS\n"); } static ssize_t target_pr_res_aptpl_active_show(struct config_item *item, @@ -1580,7 +1670,8 @@ static ssize_t target_pr_res_aptpl_active_show(struct config_item *item, { struct se_device *dev = pr_to_dev(item); - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR) + if (!dev->dev_attrib.emulate_pr || + (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)) return 0; return sprintf(page, "APTPL Bit Status: %s\n", @@ -1592,7 +1683,8 @@ static ssize_t target_pr_res_aptpl_metadata_show(struct config_item *item, { struct se_device *dev = pr_to_dev(item); - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR) + if (!dev->dev_attrib.emulate_pr || + (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)) return 0; return sprintf(page, "Ready to process PR APTPL metadata..\n"); @@ -1638,7 +1730,8 @@ static ssize_t target_pr_res_aptpl_metadata_store(struct config_item *item, u16 tpgt = 0; u8 type = 0; - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR) + if (!dev->dev_attrib.emulate_pr || + (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR)) return count; if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS) return count; @@ -2746,7 +2839,7 @@ static ssize_t target_tg_pt_gp_members_show(struct config_item *item, struct se_portal_group *tpg = lun->lun_tpg; cur_len = snprintf(buf, TG_PT_GROUP_NAME_BUF, "%s/%s/tpgt_%hu" - "/%s\n", tpg->se_tpg_tfo->get_fabric_name(), + "/%s\n", tpg->se_tpg_tfo->fabric_name, tpg->se_tpg_tfo->tpg_get_wwn(tpg), tpg->se_tpg_tfo->tpg_get_tag(tpg), config_item_name(&lun->lun_group.cg_item)); diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 47b5ef153135..93c56f4a9911 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -95,7 +95,7 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u64 unpacked_lun) deve->lun_access_ro) { pr_err("TARGET_CORE[%s]: Detected WRITE_PROTECTED LUN" " Access for 0x%08llx\n", - se_cmd->se_tfo->get_fabric_name(), + se_cmd->se_tfo->fabric_name, unpacked_lun); rcu_read_unlock(); ret = TCM_WRITE_PROTECTED; @@ -114,7 +114,7 @@ out_unlock: if (unpacked_lun != 0) { pr_err("TARGET_CORE[%s]: Detected NON_EXISTENT_LUN" " Access for 0x%08llx\n", - se_cmd->se_tfo->get_fabric_name(), + se_cmd->se_tfo->fabric_name, unpacked_lun); return TCM_NON_EXISTENT_LUN; } @@ -188,7 +188,7 @@ out_unlock: if (!se_lun) { pr_debug("TARGET_CORE[%s]: Detected NON_EXISTENT_LUN" " Access for 0x%08llx\n", - se_cmd->se_tfo->get_fabric_name(), + se_cmd->se_tfo->fabric_name, unpacked_lun); return -ENODEV; } @@ -237,7 +237,7 @@ struct se_dev_entry *core_get_se_deve_from_rtpi( if (!lun) { pr_err("%s device entries device pointer is" " NULL, but Initiator has access.\n", - tpg->se_tpg_tfo->get_fabric_name()); + tpg->se_tpg_tfo->fabric_name); continue; } if (lun->lun_rtpi != rtpi) @@ -571,9 +571,9 @@ int core_dev_add_lun( return rc; pr_debug("%s_TPG[%u]_LUN[%llu] - Activated %s Logical Unit from" - " CORE HBA: %u\n", tpg->se_tpg_tfo->get_fabric_name(), + " CORE HBA: %u\n", tpg->se_tpg_tfo->fabric_name, tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun, - tpg->se_tpg_tfo->get_fabric_name(), dev->se_hba->hba_id); + tpg->se_tpg_tfo->fabric_name, dev->se_hba->hba_id); /* * Update LUN maps for dynamically added initiators when * generate_node_acl is enabled. @@ -604,9 +604,9 @@ void core_dev_del_lun( struct se_lun *lun) { pr_debug("%s_TPG[%u]_LUN[%llu] - Deactivating %s Logical Unit from" - " device object\n", tpg->se_tpg_tfo->get_fabric_name(), + " device object\n", tpg->se_tpg_tfo->fabric_name, tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun, - tpg->se_tpg_tfo->get_fabric_name()); + tpg->se_tpg_tfo->fabric_name); core_tpg_remove_lun(tpg, lun); } @@ -621,7 +621,7 @@ struct se_lun_acl *core_dev_init_initiator_node_lun_acl( if (strlen(nacl->initiatorname) >= TRANSPORT_IQN_LEN) { pr_err("%s InitiatorName exceeds maximum size.\n", - tpg->se_tpg_tfo->get_fabric_name()); + tpg->se_tpg_tfo->fabric_name); *ret = -EOVERFLOW; return NULL; } @@ -664,7 +664,7 @@ int core_dev_add_initiator_node_lun_acl( return -EINVAL; pr_debug("%s_TPG[%hu]_LUN[%llu->%llu] - Added %s ACL for " - " InitiatorNode: %s\n", tpg->se_tpg_tfo->get_fabric_name(), + " InitiatorNode: %s\n", tpg->se_tpg_tfo->fabric_name, tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun, lacl->mapped_lun, lun_access_ro ? "RO" : "RW", nacl->initiatorname); @@ -697,7 +697,7 @@ int core_dev_del_initiator_node_lun_acl( pr_debug("%s_TPG[%hu]_LUN[%llu] - Removed ACL for" " InitiatorNode: %s Mapped LUN: %llu\n", - tpg->se_tpg_tfo->get_fabric_name(), + tpg->se_tpg_tfo->fabric_name, tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun, nacl->initiatorname, lacl->mapped_lun); @@ -709,9 +709,9 @@ void core_dev_free_initiator_node_lun_acl( struct se_lun_acl *lacl) { pr_debug("%s_TPG[%hu] - Freeing ACL for %s InitiatorNode: %s" - " Mapped LUN: %llu\n", tpg->se_tpg_tfo->get_fabric_name(), + " Mapped LUN: %llu\n", tpg->se_tpg_tfo->fabric_name, tpg->se_tpg_tfo->tpg_get_tag(tpg), - tpg->se_tpg_tfo->get_fabric_name(), + tpg->se_tpg_tfo->fabric_name, lacl->se_lun_nacl->initiatorname, lacl->mapped_lun); kfree(lacl); @@ -720,36 +720,17 @@ void core_dev_free_initiator_node_lun_acl( static void scsi_dump_inquiry(struct se_device *dev) { struct t10_wwn *wwn = &dev->t10_wwn; - char buf[17]; - int i, device_type; + int device_type = dev->transport->get_device_type(dev); + /* * Print Linux/SCSI style INQUIRY formatting to the kernel ring buffer */ - for (i = 0; i < 8; i++) - if (wwn->vendor[i] >= 0x20) - buf[i] = wwn->vendor[i]; - else - buf[i] = ' '; - buf[i] = '\0'; - pr_debug(" Vendor: %s\n", buf); - - for (i = 0; i < 16; i++) - if (wwn->model[i] >= 0x20) - buf[i] = wwn->model[i]; - else - buf[i] = ' '; - buf[i] = '\0'; - pr_debug(" Model: %s\n", buf); - - for (i = 0; i < 4; i++) - if (wwn->revision[i] >= 0x20) - buf[i] = wwn->revision[i]; - else - buf[i] = ' '; - buf[i] = '\0'; - pr_debug(" Revision: %s\n", buf); - - device_type = dev->transport->get_device_type(dev); + pr_debug(" Vendor: %-" __stringify(INQUIRY_VENDOR_LEN) "s\n", + wwn->vendor); + pr_debug(" Model: %-" __stringify(INQUIRY_MODEL_LEN) "s\n", + wwn->model); + pr_debug(" Revision: %-" __stringify(INQUIRY_REVISION_LEN) "s\n", + wwn->revision); pr_debug(" Type: %s ", scsi_device_type(device_type)); } @@ -805,6 +786,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) dev->dev_attrib.emulate_tpws = DA_EMULATE_TPWS; dev->dev_attrib.emulate_caw = DA_EMULATE_CAW; dev->dev_attrib.emulate_3pc = DA_EMULATE_3PC; + dev->dev_attrib.emulate_pr = DA_EMULATE_PR; dev->dev_attrib.pi_prot_type = TARGET_DIF_TYPE0_PROT; dev->dev_attrib.enforce_pr_isids = DA_ENFORCE_PR_ISIDS; dev->dev_attrib.force_pr_aptpl = DA_FORCE_PR_APTPL; @@ -822,13 +804,19 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) xcopy_lun = &dev->xcopy_lun; rcu_assign_pointer(xcopy_lun->lun_se_dev, dev); - init_completion(&xcopy_lun->lun_ref_comp); init_completion(&xcopy_lun->lun_shutdown_comp); INIT_LIST_HEAD(&xcopy_lun->lun_deve_list); INIT_LIST_HEAD(&xcopy_lun->lun_dev_link); mutex_init(&xcopy_lun->lun_tg_pt_md_mutex); xcopy_lun->lun_tpg = &xcopy_pt_tpg; + /* Preload the default INQUIRY const values */ + strlcpy(dev->t10_wwn.vendor, "LIO-ORG", sizeof(dev->t10_wwn.vendor)); + strlcpy(dev->t10_wwn.model, dev->transport->inquiry_prod, + sizeof(dev->t10_wwn.model)); + strlcpy(dev->t10_wwn.revision, dev->transport->inquiry_rev, + sizeof(dev->t10_wwn.revision)); + return dev; } @@ -987,35 +975,10 @@ int target_configure_device(struct se_device *dev) goto out_destroy_device; /* - * Startup the struct se_device processing thread - */ - dev->tmr_wq = alloc_workqueue("tmr-%s", WQ_MEM_RECLAIM | WQ_UNBOUND, 1, - dev->transport->name); - if (!dev->tmr_wq) { - pr_err("Unable to create tmr workqueue for %s\n", - dev->transport->name); - ret = -ENOMEM; - goto out_free_alua; - } - - /* * Setup work_queue for QUEUE_FULL */ INIT_WORK(&dev->qf_work_queue, target_qf_do_work); - /* - * Preload the initial INQUIRY const values if we are doing - * anything virtual (IBLOCK, FILEIO, RAMDISK), but not for TCM/pSCSI - * passthrough because this is being provided by the backend LLD. - */ - if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)) { - strncpy(&dev->t10_wwn.vendor[0], "LIO-ORG", 8); - strncpy(&dev->t10_wwn.model[0], - dev->transport->inquiry_prod, 16); - strncpy(&dev->t10_wwn.revision[0], - dev->transport->inquiry_rev, 4); - } - scsi_dump_inquiry(dev); spin_lock(&hba->device_lock); @@ -1026,8 +989,6 @@ int target_configure_device(struct se_device *dev) return 0; -out_free_alua: - core_alua_free_lu_gp_mem(dev); out_destroy_device: dev->transport->destroy_device(dev); out_free_index: @@ -1046,8 +1007,6 @@ void target_free_device(struct se_device *dev) WARN_ON(!list_empty(&dev->dev_sep_list)); if (target_dev_configured(dev)) { - destroy_workqueue(dev->tmr_wq); - dev->transport->destroy_device(dev); mutex_lock(&device_mutex); @@ -1159,6 +1118,18 @@ passthrough_parse_cdb(struct se_cmd *cmd, } /* + * With emulate_pr disabled, all reservation requests should fail, + * regardless of whether or not TRANSPORT_FLAG_PASSTHROUGH_PGR is set. + */ + if (!dev->dev_attrib.emulate_pr && + ((cdb[0] == PERSISTENT_RESERVE_IN) || + (cdb[0] == PERSISTENT_RESERVE_OUT) || + (cdb[0] == RELEASE || cdb[0] == RELEASE_10) || + (cdb[0] == RESERVE || cdb[0] == RESERVE_10))) { + return TCM_UNSUPPORTED_SCSI_OPCODE; + } + + /* * For PERSISTENT RESERVE IN/OUT, RELEASE, and RESERVE we need to * emulate the response, since tcmu does not have the information * required to process these commands. diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c index aa2f4f632ebe..9a6e20a2af7d 100644 --- a/drivers/target/target_core_fabric_configfs.c +++ b/drivers/target/target_core_fabric_configfs.c @@ -203,7 +203,7 @@ static ssize_t target_fabric_mappedlun_write_protect_store( pr_debug("%s_ConfigFS: Changed Initiator ACL: %s" " Mapped LUN: %llu Write Protect bit to %s\n", - se_tpg->se_tpg_tfo->get_fabric_name(), + se_tpg->se_tpg_tfo->fabric_name, se_nacl->initiatorname, lacl->mapped_lun, (wp) ? "ON" : "OFF"); return count; diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 0c6635587930..853344415963 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -138,7 +138,6 @@ int init_se_kmem_caches(void); void release_se_kmem_caches(void); u32 scsi_get_new_index(scsi_index_t); void transport_subsystem_check_init(void); -int transport_cmd_finish_abort(struct se_cmd *); unsigned char *transport_dump_cmd_direction(struct se_cmd *); void transport_dump_dev_state(struct se_device *, char *, int *); void transport_dump_dev_info(struct se_device *, struct se_lun *, @@ -148,7 +147,6 @@ int transport_dump_vpd_assoc(struct t10_vpd *, unsigned char *, int); int transport_dump_vpd_ident_type(struct t10_vpd *, unsigned char *, int); int transport_dump_vpd_ident(struct t10_vpd *, unsigned char *, int); void transport_clear_lun_ref(struct se_lun *); -void transport_send_task_abort(struct se_cmd *); sense_reason_t target_cmd_size_check(struct se_cmd *cmd, unsigned int size); void target_qf_do_work(struct work_struct *work); bool target_check_wce(struct se_device *dev); diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 10db5656fd5d..397f38cb7f4e 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -235,7 +235,7 @@ target_scsi2_reservation_release(struct se_cmd *cmd) tpg = sess->se_tpg; pr_debug("SCSI-2 Released reservation for %s LUN: %llu ->" " MAPPED LUN: %llu for %s\n", - tpg->se_tpg_tfo->get_fabric_name(), + tpg->se_tpg_tfo->fabric_name, cmd->se_lun->unpacked_lun, cmd->orig_fe_lun, sess->se_node_acl->initiatorname); @@ -278,7 +278,7 @@ target_scsi2_reservation_reserve(struct se_cmd *cmd) if (dev->dev_reserved_node_acl && (dev->dev_reserved_node_acl != sess->se_node_acl)) { pr_err("SCSI-2 RESERVATION CONFLIFT for %s fabric\n", - tpg->se_tpg_tfo->get_fabric_name()); + tpg->se_tpg_tfo->fabric_name); pr_err("Original reserver LUN: %llu %s\n", cmd->se_lun->unpacked_lun, dev->dev_reserved_node_acl->initiatorname); @@ -297,7 +297,7 @@ target_scsi2_reservation_reserve(struct se_cmd *cmd) dev->dev_reservation_flags |= DRF_SPC2_RESERVATIONS_WITH_ISID; } pr_debug("SCSI-2 Reserved %s LUN: %llu -> MAPPED LUN: %llu" - " for %s\n", tpg->se_tpg_tfo->get_fabric_name(), + " for %s\n", tpg->se_tpg_tfo->fabric_name, cmd->se_lun->unpacked_lun, cmd->orig_fe_lun, sess->se_node_acl->initiatorname); @@ -914,11 +914,11 @@ static void core_scsi3_aptpl_reserve( pr_debug("SPC-3 PR [%s] Service Action: APTPL RESERVE created" " new reservation holder TYPE: %s ALL_TG_PT: %d\n", - tpg->se_tpg_tfo->get_fabric_name(), + tpg->se_tpg_tfo->fabric_name, core_scsi3_pr_dump_type(pr_reg->pr_res_type), (pr_reg->pr_reg_all_tg_pt) ? 1 : 0); pr_debug("SPC-3 PR [%s] RESERVE Node: %s%s\n", - tpg->se_tpg_tfo->get_fabric_name(), node_acl->initiatorname, + tpg->se_tpg_tfo->fabric_name, node_acl->initiatorname, i_buf); } @@ -1036,19 +1036,19 @@ static void __core_scsi3_dump_registration( core_pr_dump_initiator_port(pr_reg, i_buf, PR_REG_ISID_ID_LEN); pr_debug("SPC-3 PR [%s] Service Action: REGISTER%s Initiator" - " Node: %s%s\n", tfo->get_fabric_name(), (register_type == REGISTER_AND_MOVE) ? + " Node: %s%s\n", tfo->fabric_name, (register_type == REGISTER_AND_MOVE) ? "_AND_MOVE" : (register_type == REGISTER_AND_IGNORE_EXISTING_KEY) ? "_AND_IGNORE_EXISTING_KEY" : "", nacl->initiatorname, i_buf); pr_debug("SPC-3 PR [%s] registration on Target Port: %s,0x%04x\n", - tfo->get_fabric_name(), tfo->tpg_get_wwn(se_tpg), + tfo->fabric_name, tfo->tpg_get_wwn(se_tpg), tfo->tpg_get_tag(se_tpg)); pr_debug("SPC-3 PR [%s] for %s TCM Subsystem %s Object Target" - " Port(s)\n", tfo->get_fabric_name(), + " Port(s)\n", tfo->fabric_name, (pr_reg->pr_reg_all_tg_pt) ? "ALL" : "SINGLE", dev->transport->name); pr_debug("SPC-3 PR [%s] SA Res Key: 0x%016Lx PRgeneration:" - " 0x%08x APTPL: %d\n", tfo->get_fabric_name(), + " 0x%08x APTPL: %d\n", tfo->fabric_name, pr_reg->pr_res_key, pr_reg->pr_res_generation, pr_reg->pr_reg_aptpl); } @@ -1329,7 +1329,7 @@ static void __core_scsi3_free_registration( */ while (atomic_read(&pr_reg->pr_res_holders) != 0) { pr_debug("SPC-3 PR [%s] waiting for pr_res_holders\n", - tfo->get_fabric_name()); + tfo->fabric_name); cpu_relax(); } @@ -1341,15 +1341,15 @@ static void __core_scsi3_free_registration( spin_lock(&pr_tmpl->registration_lock); pr_debug("SPC-3 PR [%s] Service Action: UNREGISTER Initiator" - " Node: %s%s\n", tfo->get_fabric_name(), + " Node: %s%s\n", tfo->fabric_name, pr_reg->pr_reg_nacl->initiatorname, i_buf); pr_debug("SPC-3 PR [%s] for %s TCM Subsystem %s Object Target" - " Port(s)\n", tfo->get_fabric_name(), + " Port(s)\n", tfo->fabric_name, (pr_reg->pr_reg_all_tg_pt) ? "ALL" : "SINGLE", dev->transport->name); pr_debug("SPC-3 PR [%s] SA Res Key: 0x%016Lx PRgeneration:" - " 0x%08x\n", tfo->get_fabric_name(), pr_reg->pr_res_key, + " 0x%08x\n", tfo->fabric_name, pr_reg->pr_res_key, pr_reg->pr_res_generation); if (!preempt_and_abort_list) { @@ -1645,7 +1645,7 @@ core_scsi3_decode_spec_i_port( dest_tpg = tmp_tpg; pr_debug("SPC-3 PR SPEC_I_PT: Located %s Node:" " %s Port RTPI: %hu\n", - dest_tpg->se_tpg_tfo->get_fabric_name(), + dest_tpg->se_tpg_tfo->fabric_name, dest_node_acl->initiatorname, dest_rtpi); spin_lock(&dev->se_port_lock); @@ -1662,7 +1662,7 @@ core_scsi3_decode_spec_i_port( pr_debug("SPC-3 PR SPEC_I_PT: Got %s data_length: %u tpdl: %u" " tid_len: %d for %s + %s\n", - dest_tpg->se_tpg_tfo->get_fabric_name(), cmd->data_length, + dest_tpg->se_tpg_tfo->fabric_name, cmd->data_length, tpdl, tid_len, i_str, iport_ptr); if (tid_len > tpdl) { @@ -1683,7 +1683,7 @@ core_scsi3_decode_spec_i_port( if (!dest_se_deve) { pr_err("Unable to locate %s dest_se_deve" " from destination RTPI: %hu\n", - dest_tpg->se_tpg_tfo->get_fabric_name(), + dest_tpg->se_tpg_tfo->fabric_name, dest_rtpi); core_scsi3_nodeacl_undepend_item(dest_node_acl); @@ -1704,7 +1704,7 @@ core_scsi3_decode_spec_i_port( pr_debug("SPC-3 PR SPEC_I_PT: Located %s Node: %s" " dest_se_deve mapped_lun: %llu\n", - dest_tpg->se_tpg_tfo->get_fabric_name(), + dest_tpg->se_tpg_tfo->fabric_name, dest_node_acl->initiatorname, dest_se_deve->mapped_lun); /* @@ -1815,7 +1815,7 @@ core_scsi3_decode_spec_i_port( pr_debug("SPC-3 PR [%s] SPEC_I_PT: Successfully" " registered Transport ID for Node: %s%s Mapped LUN:" - " %llu\n", dest_tpg->se_tpg_tfo->get_fabric_name(), + " %llu\n", dest_tpg->se_tpg_tfo->fabric_name, dest_node_acl->initiatorname, i_buf, (dest_se_deve) ? dest_se_deve->mapped_lun : 0); @@ -1913,7 +1913,7 @@ static int core_scsi3_update_aptpl_buf( "res_holder=1\nres_type=%02x\n" "res_scope=%02x\nres_all_tg_pt=%d\n" "mapped_lun=%llu\n", reg_count, - tpg->se_tpg_tfo->get_fabric_name(), + tpg->se_tpg_tfo->fabric_name, pr_reg->pr_reg_nacl->initiatorname, isid_buf, pr_reg->pr_res_key, pr_reg->pr_res_type, pr_reg->pr_res_scope, pr_reg->pr_reg_all_tg_pt, @@ -1923,7 +1923,7 @@ static int core_scsi3_update_aptpl_buf( "initiator_fabric=%s\ninitiator_node=%s\n%s" "sa_res_key=%llu\nres_holder=0\n" "res_all_tg_pt=%d\nmapped_lun=%llu\n", - reg_count, tpg->se_tpg_tfo->get_fabric_name(), + reg_count, tpg->se_tpg_tfo->fabric_name, pr_reg->pr_reg_nacl->initiatorname, isid_buf, pr_reg->pr_res_key, pr_reg->pr_reg_all_tg_pt, pr_reg->pr_res_mapped_lun); @@ -1942,7 +1942,7 @@ static int core_scsi3_update_aptpl_buf( */ snprintf(tmp, 512, "target_fabric=%s\ntarget_node=%s\n" "tpgt=%hu\nport_rtpi=%hu\ntarget_lun=%llu\nPR_REG_END:" - " %d\n", tpg->se_tpg_tfo->get_fabric_name(), + " %d\n", tpg->se_tpg_tfo->fabric_name, tpg->se_tpg_tfo->tpg_get_wwn(tpg), tpg->se_tpg_tfo->tpg_get_tag(tpg), pr_reg->tg_pt_sep_rtpi, pr_reg->pr_aptpl_target_lun, @@ -2168,7 +2168,7 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key, pr_reg->pr_res_key = sa_res_key; pr_debug("SPC-3 PR [%s] REGISTER%s: Changed Reservation" " Key for %s to: 0x%016Lx PRgeneration:" - " 0x%08x\n", cmd->se_tfo->get_fabric_name(), + " 0x%08x\n", cmd->se_tfo->fabric_name, (register_type == REGISTER_AND_IGNORE_EXISTING_KEY) ? "_AND_IGNORE_EXISTING_KEY" : "", pr_reg->pr_reg_nacl->initiatorname, pr_reg->pr_res_key, pr_reg->pr_res_generation); @@ -2356,9 +2356,9 @@ core_scsi3_pro_reserve(struct se_cmd *cmd, int type, int scope, u64 res_key) pr_err("SPC-3 PR: Attempted RESERVE from" " [%s]: %s while reservation already held by" " [%s]: %s, returning RESERVATION_CONFLICT\n", - cmd->se_tfo->get_fabric_name(), + cmd->se_tfo->fabric_name, se_sess->se_node_acl->initiatorname, - pr_res_nacl->se_tpg->se_tpg_tfo->get_fabric_name(), + pr_res_nacl->se_tpg->se_tpg_tfo->fabric_name, pr_res_holder->pr_reg_nacl->initiatorname); spin_unlock(&dev->dev_reservation_lock); @@ -2379,9 +2379,9 @@ core_scsi3_pro_reserve(struct se_cmd *cmd, int type, int scope, u64 res_key) " [%s]: %s trying to change TYPE and/or SCOPE," " while reservation already held by [%s]: %s," " returning RESERVATION_CONFLICT\n", - cmd->se_tfo->get_fabric_name(), + cmd->se_tfo->fabric_name, se_sess->se_node_acl->initiatorname, - pr_res_nacl->se_tpg->se_tpg_tfo->get_fabric_name(), + pr_res_nacl->se_tpg->se_tpg_tfo->fabric_name, pr_res_holder->pr_reg_nacl->initiatorname); spin_unlock(&dev->dev_reservation_lock); @@ -2414,10 +2414,10 @@ core_scsi3_pro_reserve(struct se_cmd *cmd, int type, int scope, u64 res_key) pr_debug("SPC-3 PR [%s] Service Action: RESERVE created new" " reservation holder TYPE: %s ALL_TG_PT: %d\n", - cmd->se_tfo->get_fabric_name(), core_scsi3_pr_dump_type(type), + cmd->se_tfo->fabric_name, core_scsi3_pr_dump_type(type), (pr_reg->pr_reg_all_tg_pt) ? 1 : 0); pr_debug("SPC-3 PR [%s] RESERVE Node: %s%s\n", - cmd->se_tfo->get_fabric_name(), + cmd->se_tfo->fabric_name, se_sess->se_node_acl->initiatorname, i_buf); spin_unlock(&dev->dev_reservation_lock); @@ -2506,12 +2506,12 @@ out: if (!dev->dev_pr_res_holder) { pr_debug("SPC-3 PR [%s] Service Action: %s RELEASE cleared" " reservation holder TYPE: %s ALL_TG_PT: %d\n", - tfo->get_fabric_name(), (explicit) ? "explicit" : + tfo->fabric_name, (explicit) ? "explicit" : "implicit", core_scsi3_pr_dump_type(pr_res_type), (pr_reg->pr_reg_all_tg_pt) ? 1 : 0); } pr_debug("SPC-3 PR [%s] RELEASE Node: %s%s\n", - tfo->get_fabric_name(), se_nacl->initiatorname, + tfo->fabric_name, se_nacl->initiatorname, i_buf); /* * Clear TYPE and SCOPE for the next PROUT Service Action: RESERVE @@ -2609,9 +2609,9 @@ core_scsi3_emulate_pro_release(struct se_cmd *cmd, int type, int scope, " reservation from [%s]: %s with different TYPE " "and/or SCOPE while reservation already held by" " [%s]: %s, returning RESERVATION_CONFLICT\n", - cmd->se_tfo->get_fabric_name(), + cmd->se_tfo->fabric_name, se_sess->se_node_acl->initiatorname, - pr_res_nacl->se_tpg->se_tpg_tfo->get_fabric_name(), + pr_res_nacl->se_tpg->se_tpg_tfo->fabric_name, pr_res_holder->pr_reg_nacl->initiatorname); spin_unlock(&dev->dev_reservation_lock); @@ -2752,7 +2752,7 @@ core_scsi3_emulate_pro_clear(struct se_cmd *cmd, u64 res_key) spin_unlock(&pr_tmpl->registration_lock); pr_debug("SPC-3 PR [%s] Service Action: CLEAR complete\n", - cmd->se_tfo->get_fabric_name()); + cmd->se_tfo->fabric_name); core_scsi3_update_and_write_aptpl(cmd->se_dev, false); @@ -2791,11 +2791,11 @@ static void __core_scsi3_complete_pro_preempt( pr_debug("SPC-3 PR [%s] Service Action: PREEMPT%s created new" " reservation holder TYPE: %s ALL_TG_PT: %d\n", - tfo->get_fabric_name(), (preempt_type == PREEMPT_AND_ABORT) ? "_AND_ABORT" : "", + tfo->fabric_name, (preempt_type == PREEMPT_AND_ABORT) ? "_AND_ABORT" : "", core_scsi3_pr_dump_type(type), (pr_reg->pr_reg_all_tg_pt) ? 1 : 0); pr_debug("SPC-3 PR [%s] PREEMPT%s from Node: %s%s\n", - tfo->get_fabric_name(), (preempt_type == PREEMPT_AND_ABORT) ? "_AND_ABORT" : "", + tfo->fabric_name, (preempt_type == PREEMPT_AND_ABORT) ? "_AND_ABORT" : "", nacl->initiatorname, i_buf); /* * For PREEMPT_AND_ABORT, add the preempting reservation's @@ -3282,7 +3282,7 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key, " proto_ident: 0x%02x does not match ident: 0x%02x" " from fabric: %s\n", proto_ident, dest_se_tpg->proto_id, - dest_tf_ops->get_fabric_name()); + dest_tf_ops->fabric_name); ret = TCM_INVALID_PARAMETER_LIST; goto out; } @@ -3299,7 +3299,7 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key, buf = NULL; pr_debug("SPC-3 PR [%s] Extracted initiator %s identifier: %s" - " %s\n", dest_tf_ops->get_fabric_name(), (iport_ptr != NULL) ? + " %s\n", dest_tf_ops->fabric_name, (iport_ptr != NULL) ? "port" : "device", initiator_str, (iport_ptr != NULL) ? iport_ptr : ""); /* @@ -3344,7 +3344,7 @@ after_iport_check: if (!dest_node_acl) { pr_err("Unable to locate %s dest_node_acl for" - " TransportID%s\n", dest_tf_ops->get_fabric_name(), + " TransportID%s\n", dest_tf_ops->fabric_name, initiator_str); ret = TCM_INVALID_PARAMETER_LIST; goto out; @@ -3360,7 +3360,7 @@ after_iport_check: } pr_debug("SPC-3 PR REGISTER_AND_MOVE: Found %s dest_node_acl:" - " %s from TransportID\n", dest_tf_ops->get_fabric_name(), + " %s from TransportID\n", dest_tf_ops->fabric_name, dest_node_acl->initiatorname); /* @@ -3370,7 +3370,7 @@ after_iport_check: dest_se_deve = core_get_se_deve_from_rtpi(dest_node_acl, rtpi); if (!dest_se_deve) { pr_err("Unable to locate %s dest_se_deve from RTPI:" - " %hu\n", dest_tf_ops->get_fabric_name(), rtpi); + " %hu\n", dest_tf_ops->fabric_name, rtpi); ret = TCM_INVALID_PARAMETER_LIST; goto out; } @@ -3385,7 +3385,7 @@ after_iport_check: pr_debug("SPC-3 PR REGISTER_AND_MOVE: Located %s node %s LUN" " ACL for dest_se_deve->mapped_lun: %llu\n", - dest_tf_ops->get_fabric_name(), dest_node_acl->initiatorname, + dest_tf_ops->fabric_name, dest_node_acl->initiatorname, dest_se_deve->mapped_lun); /* @@ -3501,13 +3501,13 @@ after_iport_check: pr_debug("SPC-3 PR [%s] Service Action: REGISTER_AND_MOVE" " created new reservation holder TYPE: %s on object RTPI:" - " %hu PRGeneration: 0x%08x\n", dest_tf_ops->get_fabric_name(), + " %hu PRGeneration: 0x%08x\n", dest_tf_ops->fabric_name, core_scsi3_pr_dump_type(type), rtpi, dest_pr_reg->pr_res_generation); pr_debug("SPC-3 PR Successfully moved reservation from" " %s Fabric Node: %s%s -> %s Fabric Node: %s %s\n", - tf_ops->get_fabric_name(), pr_reg_nacl->initiatorname, - i_buf, dest_tf_ops->get_fabric_name(), + tf_ops->fabric_name, pr_reg_nacl->initiatorname, + i_buf, dest_tf_ops->fabric_name, dest_node_acl->initiatorname, (iport_ptr != NULL) ? iport_ptr : ""); /* @@ -4095,6 +4095,8 @@ target_check_reservation(struct se_cmd *cmd) return 0; if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE) return 0; + if (!dev->dev_attrib.emulate_pr) + return 0; if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR) return 0; diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 47d76c862014..b5388a106567 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -179,20 +179,20 @@ out_free: static void pscsi_set_inquiry_info(struct scsi_device *sdev, struct t10_wwn *wwn) { - unsigned char *buf; - if (sdev->inquiry_len < INQUIRY_LEN) return; - - buf = sdev->inquiry; - if (!buf) - return; /* - * Use sdev->inquiry from drivers/scsi/scsi_scan.c:scsi_alloc_sdev() + * Use sdev->inquiry data from drivers/scsi/scsi_scan.c:scsi_add_lun() */ - memcpy(&wwn->vendor[0], &buf[8], sizeof(wwn->vendor)); - memcpy(&wwn->model[0], &buf[16], sizeof(wwn->model)); - memcpy(&wwn->revision[0], &buf[32], sizeof(wwn->revision)); + BUILD_BUG_ON(sizeof(wwn->vendor) != INQUIRY_VENDOR_LEN + 1); + snprintf(wwn->vendor, sizeof(wwn->vendor), + "%." __stringify(INQUIRY_VENDOR_LEN) "s", sdev->vendor); + BUILD_BUG_ON(sizeof(wwn->model) != INQUIRY_MODEL_LEN + 1); + snprintf(wwn->model, sizeof(wwn->model), + "%." __stringify(INQUIRY_MODEL_LEN) "s", sdev->model); + BUILD_BUG_ON(sizeof(wwn->revision) != INQUIRY_REVISION_LEN + 1); + snprintf(wwn->revision, sizeof(wwn->revision), + "%." __stringify(INQUIRY_REVISION_LEN) "s", sdev->rev); } static int @@ -811,7 +811,6 @@ static ssize_t pscsi_show_configfs_dev_params(struct se_device *dev, char *b) struct scsi_device *sd = pdv->pdv_sd; unsigned char host_id[16]; ssize_t bl; - int i; if (phv->phv_mode == PHV_VIRTUAL_HOST_ID) snprintf(host_id, 16, "%d", pdv->pdv_host_id); @@ -824,29 +823,12 @@ static ssize_t pscsi_show_configfs_dev_params(struct se_device *dev, char *b) host_id); if (sd) { - bl += sprintf(b + bl, " "); - bl += sprintf(b + bl, "Vendor: "); - for (i = 0; i < 8; i++) { - if (ISPRINT(sd->vendor[i])) /* printable character? */ - bl += sprintf(b + bl, "%c", sd->vendor[i]); - else - bl += sprintf(b + bl, " "); - } - bl += sprintf(b + bl, " Model: "); - for (i = 0; i < 16; i++) { - if (ISPRINT(sd->model[i])) /* printable character ? */ - bl += sprintf(b + bl, "%c", sd->model[i]); - else - bl += sprintf(b + bl, " "); - } - bl += sprintf(b + bl, " Rev: "); - for (i = 0; i < 4; i++) { - if (ISPRINT(sd->rev[i])) /* printable character ? */ - bl += sprintf(b + bl, "%c", sd->rev[i]); - else - bl += sprintf(b + bl, " "); - } - bl += sprintf(b + bl, "\n"); + bl += sprintf(b + bl, " Vendor: %." + __stringify(INQUIRY_VENDOR_LEN) "s", sd->vendor); + bl += sprintf(b + bl, " Model: %." + __stringify(INQUIRY_MODEL_LEN) "s", sd->model); + bl += sprintf(b + bl, " Rev: %." + __stringify(INQUIRY_REVISION_LEN) "s\n", sd->rev); } return bl; } @@ -1094,7 +1076,7 @@ static void pscsi_req_done(struct request *req, blk_status_t status) break; } - __blk_put_request(req->q, req); + blk_put_request(req); kfree(pt); } diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index f459118bc11b..47094ae01c04 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -108,12 +108,19 @@ spc_emulate_inquiry_std(struct se_cmd *cmd, unsigned char *buf) buf[7] = 0x2; /* CmdQue=1 */ - memcpy(&buf[8], "LIO-ORG ", 8); - memset(&buf[16], 0x20, 16); + /* + * ASCII data fields described as being left-aligned shall have any + * unused bytes at the end of the field (i.e., highest offset) and the + * unused bytes shall be filled with ASCII space characters (20h). + */ + memset(&buf[8], 0x20, + INQUIRY_VENDOR_LEN + INQUIRY_MODEL_LEN + INQUIRY_REVISION_LEN); + memcpy(&buf[8], dev->t10_wwn.vendor, + strnlen(dev->t10_wwn.vendor, INQUIRY_VENDOR_LEN)); memcpy(&buf[16], dev->t10_wwn.model, - min_t(size_t, strlen(dev->t10_wwn.model), 16)); + strnlen(dev->t10_wwn.model, INQUIRY_MODEL_LEN)); memcpy(&buf[32], dev->t10_wwn.revision, - min_t(size_t, strlen(dev->t10_wwn.revision), 4)); + strnlen(dev->t10_wwn.revision, INQUIRY_REVISION_LEN)); buf[4] = 31; /* Set additional length to 31 */ return 0; @@ -251,7 +258,10 @@ check_t10_vend_desc: buf[off] = 0x2; /* ASCII */ buf[off+1] = 0x1; /* T10 Vendor ID */ buf[off+2] = 0x0; - memcpy(&buf[off+4], "LIO-ORG", 8); + /* left align Vendor ID and pad with spaces */ + memset(&buf[off+4], 0x20, INQUIRY_VENDOR_LEN); + memcpy(&buf[off+4], dev->t10_wwn.vendor, + strnlen(dev->t10_wwn.vendor, INQUIRY_VENDOR_LEN)); /* Extra Byte for NULL Terminator */ id_len++; /* Identifier Length */ @@ -1281,6 +1291,14 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size) struct se_device *dev = cmd->se_dev; unsigned char *cdb = cmd->t_task_cdb; + if (!dev->dev_attrib.emulate_pr && + ((cdb[0] == PERSISTENT_RESERVE_IN) || + (cdb[0] == PERSISTENT_RESERVE_OUT) || + (cdb[0] == RELEASE || cdb[0] == RELEASE_10) || + (cdb[0] == RESERVE || cdb[0] == RESERVE_10))) { + return TCM_UNSUPPORTED_SCSI_OPCODE; + } + switch (cdb[0]) { case MODE_SELECT: *size = cdb[4]; diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c index f0db91ebd735..8d9ceedfd455 100644 --- a/drivers/target/target_core_stat.c +++ b/drivers/target/target_core_stat.c @@ -246,43 +246,25 @@ static ssize_t target_stat_lu_lu_name_show(struct config_item *item, char *page) static ssize_t target_stat_lu_vend_show(struct config_item *item, char *page) { struct se_device *dev = to_stat_lu_dev(item); - int i; - char str[sizeof(dev->t10_wwn.vendor)+1]; - /* scsiLuVendorId */ - for (i = 0; i < sizeof(dev->t10_wwn.vendor); i++) - str[i] = ISPRINT(dev->t10_wwn.vendor[i]) ? - dev->t10_wwn.vendor[i] : ' '; - str[i] = '\0'; - return snprintf(page, PAGE_SIZE, "%s\n", str); + return snprintf(page, PAGE_SIZE, "%-" __stringify(INQUIRY_VENDOR_LEN) + "s\n", dev->t10_wwn.vendor); } static ssize_t target_stat_lu_prod_show(struct config_item *item, char *page) { struct se_device *dev = to_stat_lu_dev(item); - int i; - char str[sizeof(dev->t10_wwn.model)+1]; - /* scsiLuProductId */ - for (i = 0; i < sizeof(dev->t10_wwn.model); i++) - str[i] = ISPRINT(dev->t10_wwn.model[i]) ? - dev->t10_wwn.model[i] : ' '; - str[i] = '\0'; - return snprintf(page, PAGE_SIZE, "%s\n", str); + return snprintf(page, PAGE_SIZE, "%-" __stringify(INQUIRY_MODEL_LEN) + "s\n", dev->t10_wwn.model); } static ssize_t target_stat_lu_rev_show(struct config_item *item, char *page) { struct se_device *dev = to_stat_lu_dev(item); - int i; - char str[sizeof(dev->t10_wwn.revision)+1]; - /* scsiLuRevisionId */ - for (i = 0; i < sizeof(dev->t10_wwn.revision); i++) - str[i] = ISPRINT(dev->t10_wwn.revision[i]) ? - dev->t10_wwn.revision[i] : ' '; - str[i] = '\0'; - return snprintf(page, PAGE_SIZE, "%s\n", str); + return snprintf(page, PAGE_SIZE, "%-" __stringify(INQUIRY_REVISION_LEN) + "s\n", dev->t10_wwn.revision); } static ssize_t target_stat_lu_dev_type_show(struct config_item *item, char *page) @@ -612,7 +594,7 @@ static ssize_t target_stat_tgt_port_name_show(struct config_item *item, dev = rcu_dereference(lun->lun_se_dev); if (dev) ret = snprintf(page, PAGE_SIZE, "%sPort#%u\n", - tpg->se_tpg_tfo->get_fabric_name(), + tpg->se_tpg_tfo->fabric_name, lun->lun_rtpi); rcu_read_unlock(); return ret; @@ -767,7 +749,7 @@ static ssize_t target_stat_transport_device_show(struct config_item *item, if (dev) { /* scsiTransportType */ ret = snprintf(page, PAGE_SIZE, "scsiTransport%s\n", - tpg->se_tpg_tfo->get_fabric_name()); + tpg->se_tpg_tfo->fabric_name); } rcu_read_unlock(); return ret; diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index 6d1179a7f043..ad0061e09d4c 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -163,18 +163,23 @@ void core_tmr_abort_task( continue; printk("ABORT_TASK: Found referenced %s task_tag: %llu\n", - se_cmd->se_tfo->get_fabric_name(), ref_tag); + se_cmd->se_tfo->fabric_name, ref_tag); - if (!__target_check_io_state(se_cmd, se_sess, 0)) + if (!__target_check_io_state(se_cmd, se_sess, + dev->dev_attrib.emulate_tas)) continue; spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); - cancel_work_sync(&se_cmd->work); - transport_wait_for_tasks(se_cmd); + /* + * Ensure that this ABORT request is visible to the LU RESET + * code. + */ + if (!tmr->tmr_dev) + WARN_ON_ONCE(transport_lookup_tmr_lun(tmr->task_cmd, + se_cmd->orig_fe_lun) < 0); - if (!transport_cmd_finish_abort(se_cmd)) - target_put_sess_cmd(se_cmd); + target_put_cmd_and_wait(se_cmd); printk("ABORT_TASK: Sending TMR_FUNCTION_COMPLETE for" " ref_tag: %llu\n", ref_tag); @@ -268,14 +273,28 @@ static void core_tmr_drain_tmr_list( (preempt_and_abort_list) ? "Preempt" : "", tmr_p, tmr_p->function, tmr_p->response, cmd->t_state); - cancel_work_sync(&cmd->work); - transport_wait_for_tasks(cmd); - - if (!transport_cmd_finish_abort(cmd)) - target_put_sess_cmd(cmd); + target_put_cmd_and_wait(cmd); } } +/** + * core_tmr_drain_state_list() - abort SCSI commands associated with a device + * + * @dev: Device for which to abort outstanding SCSI commands. + * @prout_cmd: Pointer to the SCSI PREEMPT AND ABORT if this function is called + * to realize the PREEMPT AND ABORT functionality. + * @tmr_sess: Session through which the LUN RESET has been received. + * @tas: Task Aborted Status (TAS) bit from the SCSI control mode page. + * A quote from SPC-4, paragraph "7.5.10 Control mode page": + * "A task aborted status (TAS) bit set to zero specifies that + * aborted commands shall be terminated by the device server + * without any response to the application client. A TAS bit set + * to one specifies that commands aborted by the actions of an I_T + * nexus other than the I_T nexus on which the command was + * received shall be completed with TASK ABORTED status." + * @preempt_and_abort_list: For the PREEMPT AND ABORT functionality, a list + * with registrations that will be preempted. + */ static void core_tmr_drain_state_list( struct se_device *dev, struct se_cmd *prout_cmd, @@ -350,18 +369,7 @@ static void core_tmr_drain_state_list( cmd->tag, (preempt_and_abort_list) ? "preempt" : "", cmd->pr_res_key); - /* - * If the command may be queued onto a workqueue cancel it now. - * - * This is equivalent to removal from the execute queue in the - * loop above, but we do it down here given that - * cancel_work_sync may block. - */ - cancel_work_sync(&cmd->work); - transport_wait_for_tasks(cmd); - - if (!transport_cmd_finish_abort(cmd)) - target_put_sess_cmd(cmd); + target_put_cmd_and_wait(cmd); } } @@ -398,7 +406,7 @@ int core_tmr_lun_reset( if (tmr_nacl && tmr_tpg) { pr_debug("LUN_RESET: TMR caller fabric: %s" " initiator port %s\n", - tmr_tpg->se_tpg_tfo->get_fabric_name(), + tmr_tpg->se_tpg_tfo->fabric_name, tmr_nacl->initiatorname); } } diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 02e8a5d86658..e2ace1059437 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -151,7 +151,7 @@ void core_tpg_add_node_to_devs( pr_debug("TARGET_CORE[%s]->TPG[%u]_LUN[%llu] - Adding %s" " access for LUN in Demo Mode\n", - tpg->se_tpg_tfo->get_fabric_name(), + tpg->se_tpg_tfo->fabric_name, tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun, lun_access_ro ? "READ-ONLY" : "READ-WRITE"); @@ -176,7 +176,7 @@ target_set_nacl_queue_depth(struct se_portal_group *tpg, if (!acl->queue_depth) { pr_warn("Queue depth for %s Initiator Node: %s is 0," - "defaulting to 1.\n", tpg->se_tpg_tfo->get_fabric_name(), + "defaulting to 1.\n", tpg->se_tpg_tfo->fabric_name, acl->initiatorname); acl->queue_depth = 1; } @@ -227,11 +227,11 @@ static void target_add_node_acl(struct se_node_acl *acl) pr_debug("%s_TPG[%hu] - Added %s ACL with TCQ Depth: %d for %s" " Initiator Node: %s\n", - tpg->se_tpg_tfo->get_fabric_name(), + tpg->se_tpg_tfo->fabric_name, tpg->se_tpg_tfo->tpg_get_tag(tpg), acl->dynamic_node_acl ? "DYNAMIC" : "", acl->queue_depth, - tpg->se_tpg_tfo->get_fabric_name(), + tpg->se_tpg_tfo->fabric_name, acl->initiatorname); } @@ -313,7 +313,7 @@ struct se_node_acl *core_tpg_add_initiator_node_acl( if (acl->dynamic_node_acl) { acl->dynamic_node_acl = 0; pr_debug("%s_TPG[%u] - Replacing dynamic ACL" - " for %s\n", tpg->se_tpg_tfo->get_fabric_name(), + " for %s\n", tpg->se_tpg_tfo->fabric_name, tpg->se_tpg_tfo->tpg_get_tag(tpg), initiatorname); mutex_unlock(&tpg->acl_node_mutex); return acl; @@ -321,7 +321,7 @@ struct se_node_acl *core_tpg_add_initiator_node_acl( pr_err("ACL entry for %s Initiator" " Node %s already exists for TPG %u, ignoring" - " request.\n", tpg->se_tpg_tfo->get_fabric_name(), + " request.\n", tpg->se_tpg_tfo->fabric_name, initiatorname, tpg->se_tpg_tfo->tpg_get_tag(tpg)); mutex_unlock(&tpg->acl_node_mutex); return ERR_PTR(-EEXIST); @@ -380,9 +380,9 @@ void core_tpg_del_initiator_node_acl(struct se_node_acl *acl) core_free_device_list_for_node(acl, tpg); pr_debug("%s_TPG[%hu] - Deleted ACL with TCQ Depth: %d for %s" - " Initiator Node: %s\n", tpg->se_tpg_tfo->get_fabric_name(), + " Initiator Node: %s\n", tpg->se_tpg_tfo->fabric_name, tpg->se_tpg_tfo->tpg_get_tag(tpg), acl->queue_depth, - tpg->se_tpg_tfo->get_fabric_name(), acl->initiatorname); + tpg->se_tpg_tfo->fabric_name, acl->initiatorname); kfree(acl); } @@ -418,7 +418,7 @@ int core_tpg_set_initiator_node_queue_depth( pr_debug("Successfully changed queue depth to: %d for Initiator" " Node: %s on %s Target Portal Group: %u\n", acl->queue_depth, - acl->initiatorname, tpg->se_tpg_tfo->get_fabric_name(), + acl->initiatorname, tpg->se_tpg_tfo->fabric_name, tpg->se_tpg_tfo->tpg_get_tag(tpg)); return 0; @@ -512,7 +512,7 @@ int core_tpg_register( spin_unlock_bh(&tpg_lock); pr_debug("TARGET_CORE[%s]: Allocated portal_group for endpoint: %s, " - "Proto: %d, Portal Tag: %u\n", se_tpg->se_tpg_tfo->get_fabric_name(), + "Proto: %d, Portal Tag: %u\n", se_tpg->se_tpg_tfo->fabric_name, se_tpg->se_tpg_tfo->tpg_get_wwn(se_tpg) ? se_tpg->se_tpg_tfo->tpg_get_wwn(se_tpg) : NULL, se_tpg->proto_id, se_tpg->se_tpg_tfo->tpg_get_tag(se_tpg)); @@ -528,7 +528,7 @@ int core_tpg_deregister(struct se_portal_group *se_tpg) LIST_HEAD(node_list); pr_debug("TARGET_CORE[%s]: Deallocating portal_group for endpoint: %s, " - "Proto: %d, Portal Tag: %u\n", tfo->get_fabric_name(), + "Proto: %d, Portal Tag: %u\n", tfo->fabric_name, tfo->tpg_get_wwn(se_tpg) ? tfo->tpg_get_wwn(se_tpg) : NULL, se_tpg->proto_id, tfo->tpg_get_tag(se_tpg)); @@ -577,7 +577,6 @@ struct se_lun *core_tpg_alloc_lun( } lun->unpacked_lun = unpacked_lun; atomic_set(&lun->lun_acl_count, 0); - init_completion(&lun->lun_ref_comp); init_completion(&lun->lun_shutdown_comp); INIT_LIST_HEAD(&lun->lun_deve_list); INIT_LIST_HEAD(&lun->lun_dev_link); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 2cfd61d62e97..ef9e75b359d4 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -224,19 +224,28 @@ void transport_subsystem_check_init(void) sub_api_initialized = 1; } +static void target_release_sess_cmd_refcnt(struct percpu_ref *ref) +{ + struct se_session *sess = container_of(ref, typeof(*sess), cmd_count); + + wake_up(&sess->cmd_list_wq); +} + /** * transport_init_session - initialize a session object * @se_sess: Session object pointer. * * The caller must have zero-initialized @se_sess before calling this function. */ -void transport_init_session(struct se_session *se_sess) +int transport_init_session(struct se_session *se_sess) { INIT_LIST_HEAD(&se_sess->sess_list); INIT_LIST_HEAD(&se_sess->sess_acl_list); INIT_LIST_HEAD(&se_sess->sess_cmd_list); spin_lock_init(&se_sess->sess_cmd_lock); init_waitqueue_head(&se_sess->cmd_list_wq); + return percpu_ref_init(&se_sess->cmd_count, + target_release_sess_cmd_refcnt, 0, GFP_KERNEL); } EXPORT_SYMBOL(transport_init_session); @@ -247,6 +256,7 @@ EXPORT_SYMBOL(transport_init_session); struct se_session *transport_alloc_session(enum target_prot_op sup_prot_ops) { struct se_session *se_sess; + int ret; se_sess = kmem_cache_zalloc(se_sess_cache, GFP_KERNEL); if (!se_sess) { @@ -254,7 +264,11 @@ struct se_session *transport_alloc_session(enum target_prot_op sup_prot_ops) " se_sess_cache\n"); return ERR_PTR(-ENOMEM); } - transport_init_session(se_sess); + ret = transport_init_session(se_sess); + if (ret < 0) { + kmem_cache_free(se_sess_cache, se_sess); + return ERR_PTR(ret); + } se_sess->sup_prot_ops = sup_prot_ops; return se_sess; @@ -273,14 +287,11 @@ int transport_alloc_session_tags(struct se_session *se_sess, { int rc; - se_sess->sess_cmd_map = kcalloc(tag_size, tag_num, - GFP_KERNEL | __GFP_NOWARN | __GFP_RETRY_MAYFAIL); + se_sess->sess_cmd_map = kvcalloc(tag_size, tag_num, + GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!se_sess->sess_cmd_map) { - se_sess->sess_cmd_map = vzalloc(array_size(tag_size, tag_num)); - if (!se_sess->sess_cmd_map) { - pr_err("Unable to allocate se_sess->sess_cmd_map\n"); - return -ENOMEM; - } + pr_err("Unable to allocate se_sess->sess_cmd_map\n"); + return -ENOMEM; } rc = sbitmap_queue_init_node(&se_sess->sess_tag_pool, tag_num, -1, @@ -397,7 +408,7 @@ void __transport_register_session( list_add_tail(&se_sess->sess_list, &se_tpg->tpg_sess_list); pr_debug("TARGET_CORE[%s]: Registered fabric_sess_ptr: %p\n", - se_tpg->se_tpg_tfo->get_fabric_name(), se_sess->fabric_sess_ptr); + se_tpg->se_tpg_tfo->fabric_name, se_sess->fabric_sess_ptr); } EXPORT_SYMBOL(__transport_register_session); @@ -581,6 +592,7 @@ void transport_free_session(struct se_session *se_sess) sbitmap_queue_free(&se_sess->sess_tag_pool); kvfree(se_sess->sess_cmd_map); } + percpu_ref_exit(&se_sess->cmd_count); kmem_cache_free(se_sess_cache, se_sess); } EXPORT_SYMBOL(transport_free_session); @@ -602,7 +614,7 @@ void transport_deregister_session(struct se_session *se_sess) spin_unlock_irqrestore(&se_tpg->session_lock, flags); pr_debug("TARGET_CORE[%s]: Deregistered fabric_sess\n", - se_tpg->se_tpg_tfo->get_fabric_name()); + se_tpg->se_tpg_tfo->fabric_name); /* * If last kref is dropping now for an explicit NodeACL, awake sleeping * ->acl_free_comp caller to wakeup configfs se_node_acl->acl_group @@ -695,32 +707,6 @@ static void transport_lun_remove_cmd(struct se_cmd *cmd) percpu_ref_put(&lun->lun_ref); } -int transport_cmd_finish_abort(struct se_cmd *cmd) -{ - bool send_tas = cmd->transport_state & CMD_T_TAS; - bool ack_kref = (cmd->se_cmd_flags & SCF_ACK_KREF); - int ret = 0; - - if (send_tas) - transport_send_task_abort(cmd); - - if (cmd->se_cmd_flags & SCF_SE_LUN_CMD) - transport_lun_remove_cmd(cmd); - /* - * Allow the fabric driver to unmap any resources before - * releasing the descriptor via TFO->release_cmd() - */ - if (!send_tas) - cmd->se_tfo->aborted_task(cmd); - - if (transport_cmd_check_stop_to_fabric(cmd)) - return 1; - if (!send_tas && ack_kref) - ret = target_put_sess_cmd(cmd); - - return ret; -} - static void target_complete_failure_work(struct work_struct *work) { struct se_cmd *cmd = container_of(work, struct se_cmd, work); @@ -770,12 +756,88 @@ void transport_copy_sense_to_cmd(struct se_cmd *cmd, unsigned char *sense) } EXPORT_SYMBOL(transport_copy_sense_to_cmd); +static void target_handle_abort(struct se_cmd *cmd) +{ + bool tas = cmd->transport_state & CMD_T_TAS; + bool ack_kref = cmd->se_cmd_flags & SCF_ACK_KREF; + int ret; + + pr_debug("tag %#llx: send_abort_response = %d\n", cmd->tag, tas); + + if (tas) { + if (!(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) { + cmd->scsi_status = SAM_STAT_TASK_ABORTED; + pr_debug("Setting SAM_STAT_TASK_ABORTED status for CDB: 0x%02x, ITT: 0x%08llx\n", + cmd->t_task_cdb[0], cmd->tag); + trace_target_cmd_complete(cmd); + ret = cmd->se_tfo->queue_status(cmd); + if (ret) { + transport_handle_queue_full(cmd, cmd->se_dev, + ret, false); + return; + } + } else { + cmd->se_tmr_req->response = TMR_FUNCTION_REJECTED; + cmd->se_tfo->queue_tm_rsp(cmd); + } + } else { + /* + * Allow the fabric driver to unmap any resources before + * releasing the descriptor via TFO->release_cmd(). + */ + cmd->se_tfo->aborted_task(cmd); + if (ack_kref) + WARN_ON_ONCE(target_put_sess_cmd(cmd) != 0); + /* + * To do: establish a unit attention condition on the I_T + * nexus associated with cmd. See also the paragraph "Aborting + * commands" in SAM. + */ + } + + WARN_ON_ONCE(kref_read(&cmd->cmd_kref) == 0); + + transport_lun_remove_cmd(cmd); + + transport_cmd_check_stop_to_fabric(cmd); +} + +static void target_abort_work(struct work_struct *work) +{ + struct se_cmd *cmd = container_of(work, struct se_cmd, work); + + target_handle_abort(cmd); +} + +static bool target_cmd_interrupted(struct se_cmd *cmd) +{ + int post_ret; + + if (cmd->transport_state & CMD_T_ABORTED) { + if (cmd->transport_complete_callback) + cmd->transport_complete_callback(cmd, false, &post_ret); + INIT_WORK(&cmd->work, target_abort_work); + queue_work(target_completion_wq, &cmd->work); + return true; + } else if (cmd->transport_state & CMD_T_STOP) { + if (cmd->transport_complete_callback) + cmd->transport_complete_callback(cmd, false, &post_ret); + complete_all(&cmd->t_transport_stop_comp); + return true; + } + + return false; +} + +/* May be called from interrupt context so must not sleep. */ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status) { - struct se_device *dev = cmd->se_dev; int success; unsigned long flags; + if (target_cmd_interrupted(cmd)) + return; + cmd->scsi_status = scsi_status; spin_lock_irqsave(&cmd->t_state_lock, flags); @@ -791,34 +853,12 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status) break; } - /* - * Check for case where an explicit ABORT_TASK has been received - * and transport_wait_for_tasks() will be waiting for completion.. - */ - if (cmd->transport_state & CMD_T_ABORTED || - cmd->transport_state & CMD_T_STOP) { - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - /* - * If COMPARE_AND_WRITE was stopped by __transport_wait_for_tasks(), - * release se_device->caw_sem obtained by sbc_compare_and_write() - * since target_complete_ok_work() or target_complete_failure_work() - * won't be called to invoke the normal CAW completion callbacks. - */ - if (cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) { - up(&dev->caw_sem); - } - complete_all(&cmd->t_transport_stop_comp); - return; - } else if (!success) { - INIT_WORK(&cmd->work, target_complete_failure_work); - } else { - INIT_WORK(&cmd->work, target_complete_ok_work); - } - cmd->t_state = TRANSPORT_COMPLETE; cmd->transport_state |= (CMD_T_COMPLETE | CMD_T_ACTIVE); spin_unlock_irqrestore(&cmd->t_state_lock, flags); + INIT_WORK(&cmd->work, success ? target_complete_ok_work : + target_complete_failure_work); if (cmd->se_cmd_flags & SCF_USE_CPUID) queue_work_on(cmd->cpuid, target_completion_wq, &cmd->work); else @@ -880,7 +920,7 @@ void target_qf_do_work(struct work_struct *work) atomic_dec_mb(&dev->dev_qf_count); pr_debug("Processing %s cmd: %p QUEUE_FULL in work queue" - " context: %s\n", cmd->se_tfo->get_fabric_name(), cmd, + " context: %s\n", cmd->se_tfo->fabric_name, cmd, (cmd->t_state == TRANSPORT_COMPLETE_QF_OK) ? "COMPLETE_OK" : (cmd->t_state == TRANSPORT_COMPLETE_QF_WP) ? "WRITE_PENDING" : "UNKNOWN"); @@ -1244,7 +1284,7 @@ target_cmd_size_check(struct se_cmd *cmd, unsigned int size) } else if (size != cmd->data_length) { pr_warn_ratelimited("TARGET_CORE[%s]: Expected Transfer Length:" " %u does not match SCSI CDB Length: %u for SAM Opcode:" - " 0x%02x\n", cmd->se_tfo->get_fabric_name(), + " 0x%02x\n", cmd->se_tfo->fabric_name, cmd->data_length, size, cmd->t_task_cdb[0]); if (cmd->data_direction == DMA_TO_DEVICE) { @@ -1316,7 +1356,8 @@ void transport_init_se_cmd( INIT_LIST_HEAD(&cmd->se_cmd_list); INIT_LIST_HEAD(&cmd->state_list); init_completion(&cmd->t_transport_stop_comp); - cmd->compl = NULL; + cmd->free_compl = NULL; + cmd->abrt_compl = NULL; spin_lock_init(&cmd->t_state_lock); INIT_WORK(&cmd->work, NULL); kref_init(&cmd->cmd_kref); @@ -1396,7 +1437,7 @@ target_setup_cmd_from_cdb(struct se_cmd *cmd, unsigned char *cdb) ret = dev->transport->parse_cdb(cmd); if (ret == TCM_UNSUPPORTED_SCSI_OPCODE) pr_warn_ratelimited("%s/%s: Unsupported SCSI Opcode 0x%02x, sending CHECK_CONDITION.\n", - cmd->se_tfo->get_fabric_name(), + cmd->se_tfo->fabric_name, cmd->se_sess->se_node_acl->initiatorname, cmd->t_task_cdb[0]); if (ret) @@ -1792,8 +1833,11 @@ void transport_generic_request_failure(struct se_cmd *cmd, if (cmd->transport_complete_callback) cmd->transport_complete_callback(cmd, false, &post_ret); - if (transport_check_aborted_status(cmd, 1)) + if (cmd->transport_state & CMD_T_ABORTED) { + INIT_WORK(&cmd->work, target_abort_work); + queue_work(target_completion_wq, &cmd->work); return; + } switch (sense_reason) { case TCM_NON_EXISTENT_LUN: @@ -1999,8 +2043,6 @@ static bool target_handle_task_attr(struct se_cmd *cmd) return true; } -static int __transport_check_aborted_status(struct se_cmd *, int); - void target_execute_cmd(struct se_cmd *cmd) { /* @@ -2009,20 +2051,10 @@ void target_execute_cmd(struct se_cmd *cmd) * * If the received CDB has already been aborted stop processing it here. */ - spin_lock_irq(&cmd->t_state_lock); - if (__transport_check_aborted_status(cmd, 1)) { - spin_unlock_irq(&cmd->t_state_lock); - return; - } - if (cmd->transport_state & CMD_T_STOP) { - pr_debug("%s:%d CMD_T_STOP for ITT: 0x%08llx\n", - __func__, __LINE__, cmd->tag); - - spin_unlock_irq(&cmd->t_state_lock); - complete_all(&cmd->t_transport_stop_comp); + if (target_cmd_interrupted(cmd)) return; - } + spin_lock_irq(&cmd->t_state_lock); cmd->t_state = TRANSPORT_PROCESSING; cmd->transport_state &= ~CMD_T_PRE_EXECUTE; cmd->transport_state |= CMD_T_ACTIVE | CMD_T_SENT; @@ -2571,7 +2603,8 @@ transport_generic_new_cmd(struct se_cmd *cmd) * Determine if frontend context caller is requesting the stopping of * this command for frontend exceptions. */ - if (cmd->transport_state & CMD_T_STOP) { + if (cmd->transport_state & CMD_T_STOP && + !cmd->se_tfo->write_pending_must_be_called) { pr_debug("%s:%d CMD_T_STOP for ITT: 0x%08llx\n", __func__, __LINE__, cmd->tag); @@ -2635,13 +2668,29 @@ static void target_wait_free_cmd(struct se_cmd *cmd, bool *aborted, bool *tas) } /* + * Call target_put_sess_cmd() and wait until target_release_cmd_kref(@cmd) has + * finished. + */ +void target_put_cmd_and_wait(struct se_cmd *cmd) +{ + DECLARE_COMPLETION_ONSTACK(compl); + + WARN_ON_ONCE(cmd->abrt_compl); + cmd->abrt_compl = &compl; + target_put_sess_cmd(cmd); + wait_for_completion(&compl); +} + +/* * This function is called by frontend drivers after processing of a command * has finished. * - * The protocol for ensuring that either the regular flow or the TMF - * code drops one reference is as follows: + * The protocol for ensuring that either the regular frontend command + * processing flow or target_handle_abort() code drops one reference is as + * follows: * - Calling .queue_data_in(), .queue_status() or queue_tm_rsp() will cause - * the frontend driver to drop one reference, synchronously or asynchronously. + * the frontend driver to call this function synchronously or asynchronously. + * That will cause one reference to be dropped. * - During regular command processing the target core sets CMD_T_COMPLETE * before invoking one of the .queue_*() functions. * - The code that aborts commands skips commands and TMFs for which @@ -2653,7 +2702,7 @@ static void target_wait_free_cmd(struct se_cmd *cmd, bool *aborted, bool *tas) * - For aborted commands for which CMD_T_TAS has been set .queue_status() will * be called and will drop a reference. * - For aborted commands for which CMD_T_TAS has not been set .aborted_task() - * will be called. transport_cmd_finish_abort() will drop the final reference. + * will be called. target_handle_abort() will drop the final reference. */ int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks) { @@ -2677,9 +2726,8 @@ int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks) transport_lun_remove_cmd(cmd); } if (aborted) - cmd->compl = &compl; - if (!aborted || tas) - ret = target_put_sess_cmd(cmd); + cmd->free_compl = &compl; + ret = target_put_sess_cmd(cmd); if (aborted) { pr_debug("Detected CMD_T_ABORTED for ITT: %llu\n", cmd->tag); wait_for_completion(&compl); @@ -2719,6 +2767,7 @@ int target_get_sess_cmd(struct se_cmd *se_cmd, bool ack_kref) } se_cmd->transport_state |= CMD_T_PRE_EXECUTE; list_add_tail(&se_cmd->se_cmd_list, &se_sess->sess_cmd_list); + percpu_ref_get(&se_sess->cmd_count); out: spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); @@ -2743,21 +2792,24 @@ static void target_release_cmd_kref(struct kref *kref) { struct se_cmd *se_cmd = container_of(kref, struct se_cmd, cmd_kref); struct se_session *se_sess = se_cmd->se_sess; - struct completion *compl = se_cmd->compl; + struct completion *free_compl = se_cmd->free_compl; + struct completion *abrt_compl = se_cmd->abrt_compl; unsigned long flags; if (se_sess) { spin_lock_irqsave(&se_sess->sess_cmd_lock, flags); list_del_init(&se_cmd->se_cmd_list); - if (se_sess->sess_tearing_down && list_empty(&se_sess->sess_cmd_list)) - wake_up(&se_sess->cmd_list_wq); spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); } target_free_cmd_mem(se_cmd); se_cmd->se_tfo->release_cmd(se_cmd); - if (compl) - complete(compl); + if (free_compl) + complete(free_compl); + if (abrt_compl) + complete(abrt_compl); + + percpu_ref_put(&se_sess->cmd_count); } /** @@ -2886,6 +2938,8 @@ void target_sess_cmd_list_set_waiting(struct se_session *se_sess) spin_lock_irqsave(&se_sess->sess_cmd_lock, flags); se_sess->sess_tearing_down = 1; spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); + + percpu_ref_kill(&se_sess->cmd_count); } EXPORT_SYMBOL(target_sess_cmd_list_set_waiting); @@ -2900,52 +2954,24 @@ void target_wait_for_sess_cmds(struct se_session *se_sess) WARN_ON_ONCE(!se_sess->sess_tearing_down); - spin_lock_irq(&se_sess->sess_cmd_lock); do { - ret = wait_event_lock_irq_timeout( - se_sess->cmd_list_wq, - list_empty(&se_sess->sess_cmd_list), - se_sess->sess_cmd_lock, 180 * HZ); + ret = wait_event_timeout(se_sess->cmd_list_wq, + percpu_ref_is_zero(&se_sess->cmd_count), + 180 * HZ); list_for_each_entry(cmd, &se_sess->sess_cmd_list, se_cmd_list) target_show_cmd("session shutdown: still waiting for ", cmd); } while (ret <= 0); - spin_unlock_irq(&se_sess->sess_cmd_lock); } EXPORT_SYMBOL(target_wait_for_sess_cmds); -static void target_lun_confirm(struct percpu_ref *ref) -{ - struct se_lun *lun = container_of(ref, struct se_lun, lun_ref); - - complete(&lun->lun_ref_comp); -} - +/* + * Prevent that new percpu_ref_tryget_live() calls succeed and wait until + * all references to the LUN have been released. Called during LUN shutdown. + */ void transport_clear_lun_ref(struct se_lun *lun) { - /* - * Mark the percpu-ref as DEAD, switch to atomic_t mode, drop - * the initial reference and schedule confirm kill to be - * executed after one full RCU grace period has completed. - */ - percpu_ref_kill_and_confirm(&lun->lun_ref, target_lun_confirm); - /* - * The first completion waits for percpu_ref_switch_to_atomic_rcu() - * to call target_lun_confirm after lun->lun_ref has been marked - * as __PERCPU_REF_DEAD on all CPUs, and switches to atomic_t - * mode so that percpu_ref_tryget_live() lookup of lun->lun_ref - * fails for all new incoming I/O. - */ - wait_for_completion(&lun->lun_ref_comp); - /* - * The second completion waits for percpu_ref_put_many() to - * invoke ->release() after lun->lun_ref has switched to - * atomic_t mode, and lun->lun_ref.count has reached zero. - * - * At this point all target-core lun->lun_ref references have - * been dropped via transport_lun_remove_cmd(), and it's safe - * to proceed with the remaining LUN shutdown. - */ + percpu_ref_kill(&lun->lun_ref); wait_for_completion(&lun->lun_shutdown_comp); } @@ -3229,6 +3255,8 @@ transport_send_check_condition_and_sense(struct se_cmd *cmd, { unsigned long flags; + WARN_ON_ONCE(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB); + spin_lock_irqsave(&cmd->t_state_lock, flags); if (cmd->se_cmd_flags & SCF_SENT_CHECK_CONDITION) { spin_unlock_irqrestore(&cmd->t_state_lock, flags); @@ -3245,114 +3273,15 @@ transport_send_check_condition_and_sense(struct se_cmd *cmd, } EXPORT_SYMBOL(transport_send_check_condition_and_sense); -static int __transport_check_aborted_status(struct se_cmd *cmd, int send_status) - __releases(&cmd->t_state_lock) - __acquires(&cmd->t_state_lock) -{ - int ret; - - assert_spin_locked(&cmd->t_state_lock); - WARN_ON_ONCE(!irqs_disabled()); - - if (!(cmd->transport_state & CMD_T_ABORTED)) - return 0; - /* - * If cmd has been aborted but either no status is to be sent or it has - * already been sent, just return - */ - if (!send_status || !(cmd->se_cmd_flags & SCF_SEND_DELAYED_TAS)) { - if (send_status) - cmd->se_cmd_flags |= SCF_SEND_DELAYED_TAS; - return 1; - } - - pr_debug("Sending delayed SAM_STAT_TASK_ABORTED status for CDB:" - " 0x%02x ITT: 0x%08llx\n", cmd->t_task_cdb[0], cmd->tag); - - cmd->se_cmd_flags &= ~SCF_SEND_DELAYED_TAS; - cmd->scsi_status = SAM_STAT_TASK_ABORTED; - trace_target_cmd_complete(cmd); - - spin_unlock_irq(&cmd->t_state_lock); - ret = cmd->se_tfo->queue_status(cmd); - if (ret) - transport_handle_queue_full(cmd, cmd->se_dev, ret, false); - spin_lock_irq(&cmd->t_state_lock); - - return 1; -} - -int transport_check_aborted_status(struct se_cmd *cmd, int send_status) -{ - int ret; - - spin_lock_irq(&cmd->t_state_lock); - ret = __transport_check_aborted_status(cmd, send_status); - spin_unlock_irq(&cmd->t_state_lock); - - return ret; -} -EXPORT_SYMBOL(transport_check_aborted_status); - -void transport_send_task_abort(struct se_cmd *cmd) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(&cmd->t_state_lock, flags); - if (cmd->se_cmd_flags & (SCF_SENT_CHECK_CONDITION)) { - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - return; - } - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - - /* - * If there are still expected incoming fabric WRITEs, we wait - * until until they have completed before sending a TASK_ABORTED - * response. This response with TASK_ABORTED status will be - * queued back to fabric module by transport_check_aborted_status(). - */ - if (cmd->data_direction == DMA_TO_DEVICE) { - if (cmd->se_tfo->write_pending_status(cmd) != 0) { - spin_lock_irqsave(&cmd->t_state_lock, flags); - if (cmd->se_cmd_flags & SCF_SEND_DELAYED_TAS) { - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - goto send_abort; - } - cmd->se_cmd_flags |= SCF_SEND_DELAYED_TAS; - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - return; - } - } -send_abort: - cmd->scsi_status = SAM_STAT_TASK_ABORTED; - - transport_lun_remove_cmd(cmd); - - pr_debug("Setting SAM_STAT_TASK_ABORTED status for CDB: 0x%02x, ITT: 0x%08llx\n", - cmd->t_task_cdb[0], cmd->tag); - - trace_target_cmd_complete(cmd); - ret = cmd->se_tfo->queue_status(cmd); - if (ret) - transport_handle_queue_full(cmd, cmd->se_dev, ret, false); -} - static void target_tmr_work(struct work_struct *work) { struct se_cmd *cmd = container_of(work, struct se_cmd, work); struct se_device *dev = cmd->se_dev; struct se_tmr_req *tmr = cmd->se_tmr_req; - unsigned long flags; int ret; - spin_lock_irqsave(&cmd->t_state_lock, flags); - if (cmd->transport_state & CMD_T_ABORTED) { - tmr->response = TMR_FUNCTION_REJECTED; - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - goto check_stop; - } - spin_unlock_irqrestore(&cmd->t_state_lock, flags); + if (cmd->transport_state & CMD_T_ABORTED) + goto aborted; switch (tmr->function) { case TMR_ABORT_TASK: @@ -3386,18 +3315,16 @@ static void target_tmr_work(struct work_struct *work) break; } - spin_lock_irqsave(&cmd->t_state_lock, flags); - if (cmd->transport_state & CMD_T_ABORTED) { - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - goto check_stop; - } - spin_unlock_irqrestore(&cmd->t_state_lock, flags); + if (cmd->transport_state & CMD_T_ABORTED) + goto aborted; cmd->se_tfo->queue_tm_rsp(cmd); -check_stop: - transport_lun_remove_cmd(cmd); transport_cmd_check_stop_to_fabric(cmd); + return; + +aborted: + target_handle_abort(cmd); } int transport_generic_handle_tmr( @@ -3416,16 +3343,15 @@ int transport_generic_handle_tmr( spin_unlock_irqrestore(&cmd->t_state_lock, flags); if (aborted) { - pr_warn_ratelimited("handle_tmr caught CMD_T_ABORTED TMR %d" - "ref_tag: %llu tag: %llu\n", cmd->se_tmr_req->function, - cmd->se_tmr_req->ref_task_tag, cmd->tag); - transport_lun_remove_cmd(cmd); - transport_cmd_check_stop_to_fabric(cmd); + pr_warn_ratelimited("handle_tmr caught CMD_T_ABORTED TMR %d ref_tag: %llu tag: %llu\n", + cmd->se_tmr_req->function, + cmd->se_tmr_req->ref_task_tag, cmd->tag); + target_handle_abort(cmd); return 0; } INIT_WORK(&cmd->work, target_tmr_work); - queue_work(cmd->se_dev->tmr_wq, &cmd->work); + schedule_work(&cmd->work); return 0; } EXPORT_SYMBOL(transport_generic_handle_tmr); diff --git a/drivers/target/target_core_ua.c b/drivers/target/target_core_ua.c index c8ac242ce888..ced1c10364eb 100644 --- a/drivers/target/target_core_ua.c +++ b/drivers/target/target_core_ua.c @@ -266,7 +266,7 @@ bool core_scsi3_ua_for_check_condition(struct se_cmd *cmd, u8 *key, u8 *asc, pr_debug("[%s]: %s UNIT ATTENTION condition with" " INTLCK_CTRL: %d, mapped LUN: %llu, got CDB: 0x%02x" " reported ASC: 0x%02x, ASCQ: 0x%02x\n", - nacl->se_tpg->se_tpg_tfo->get_fabric_name(), + nacl->se_tpg->se_tpg_tfo->fabric_name, (dev->dev_attrib.emulate_ua_intlck_ctrl != 0) ? "Reporting" : "Releasing", dev->dev_attrib.emulate_ua_intlck_ctrl, cmd->orig_fe_lun, cmd->t_task_cdb[0], *asc, *ascq); @@ -327,7 +327,7 @@ int core_scsi3_ua_clear_for_request_sense( pr_debug("[%s]: Released UNIT ATTENTION condition, mapped" " LUN: %llu, got REQUEST_SENSE reported ASC: 0x%02x," - " ASCQ: 0x%02x\n", nacl->se_tpg->se_tpg_tfo->get_fabric_name(), + " ASCQ: 0x%02x\n", nacl->se_tpg->se_tpg_tfo->fabric_name, cmd->orig_fe_lun, *asc, *ascq); return (head) ? -EPERM : 0; diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 9cd404acdb82..1e6d24943565 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -958,7 +958,7 @@ static int add_to_cmdr_queue(struct tcmu_cmd *tcmu_cmd) * 0 success * 1 internally queued to wait for ring memory to free. */ -static sense_reason_t queue_cmd_ring(struct tcmu_cmd *tcmu_cmd, int *scsi_err) +static int queue_cmd_ring(struct tcmu_cmd *tcmu_cmd, sense_reason_t *scsi_err) { struct tcmu_dev *udev = tcmu_cmd->tcmu_dev; struct se_cmd *se_cmd = tcmu_cmd->se_cmd; diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 70adcfdca8d1..c2e1fc927fdf 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -399,11 +399,6 @@ struct se_portal_group xcopy_pt_tpg; static struct se_session xcopy_pt_sess; static struct se_node_acl xcopy_pt_nacl; -static char *xcopy_pt_get_fabric_name(void) -{ - return "xcopy-pt"; -} - static int xcopy_pt_get_cmd_state(struct se_cmd *se_cmd) { return 0; @@ -463,7 +458,7 @@ static int xcopy_pt_queue_status(struct se_cmd *se_cmd) } static const struct target_core_fabric_ops xcopy_pt_tfo = { - .get_fabric_name = xcopy_pt_get_fabric_name, + .fabric_name = "xcopy-pt", .get_cmd_state = xcopy_pt_get_cmd_state, .release_cmd = xcopy_pt_release_cmd, .check_stop_free = xcopy_pt_check_stop_free, @@ -479,6 +474,8 @@ static const struct target_core_fabric_ops xcopy_pt_tfo = { int target_xcopy_setup_pt(void) { + int ret; + xcopy_wq = alloc_workqueue("xcopy_wq", WQ_MEM_RECLAIM, 0); if (!xcopy_wq) { pr_err("Unable to allocate xcopy_wq\n"); @@ -496,7 +493,9 @@ int target_xcopy_setup_pt(void) INIT_LIST_HEAD(&xcopy_pt_nacl.acl_list); INIT_LIST_HEAD(&xcopy_pt_nacl.acl_sess_list); memset(&xcopy_pt_sess, 0, sizeof(struct se_session)); - transport_init_session(&xcopy_pt_sess); + ret = transport_init_session(&xcopy_pt_sess); + if (ret < 0) + return ret; xcopy_pt_nacl.se_tpg = &xcopy_pt_tpg; xcopy_pt_nacl.nacl_sess = &xcopy_pt_sess; diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c index e55c4d537592..1ce49518d440 100644 --- a/drivers/target/tcm_fc/tfc_conf.c +++ b/drivers/target/tcm_fc/tfc_conf.c @@ -392,11 +392,6 @@ static inline struct ft_tpg *ft_tpg(struct se_portal_group *se_tpg) return container_of(se_tpg, struct ft_tpg, se_tpg); } -static char *ft_get_fabric_name(void) -{ - return "fc"; -} - static char *ft_get_fabric_wwn(struct se_portal_group *se_tpg) { return ft_tpg(se_tpg)->lport_wwn->name; @@ -427,9 +422,8 @@ static u32 ft_tpg_get_inst_index(struct se_portal_group *se_tpg) static const struct target_core_fabric_ops ft_fabric_ops = { .module = THIS_MODULE, - .name = "fc", + .fabric_name = "fc", .node_acl_size = sizeof(struct ft_node_acl), - .get_fabric_name = ft_get_fabric_name, .tpg_get_wwn = ft_get_fabric_wwn, .tpg_get_tag = ft_get_tag, .tpg_check_demo_mode = ft_check_false, diff --git a/drivers/usb/gadget/function/f_tcm.c b/drivers/usb/gadget/function/f_tcm.c index 106988a6661a..34f5982cab78 100644 --- a/drivers/usb/gadget/function/f_tcm.c +++ b/drivers/usb/gadget/function/f_tcm.c @@ -1256,11 +1256,6 @@ static int usbg_check_false(struct se_portal_group *se_tpg) return 0; } -static char *usbg_get_fabric_name(void) -{ - return "usb_gadget"; -} - static char *usbg_get_fabric_wwn(struct se_portal_group *se_tpg) { struct usbg_tpg *tpg = container_of(se_tpg, @@ -1718,8 +1713,7 @@ static int usbg_check_stop_free(struct se_cmd *se_cmd) static const struct target_core_fabric_ops usbg_ops = { .module = THIS_MODULE, - .name = "usb_gadget", - .get_fabric_name = usbg_get_fabric_name, + .fabric_name = "usb_gadget", .tpg_get_wwn = usbg_get_fabric_wwn, .tpg_get_tag = usbg_get_tag, .tpg_check_demo_mode = usbg_check_true, diff --git a/drivers/usb/image/microtek.c b/drivers/usb/image/microtek.c index 9f2f563c82ed..607be1f4fe27 100644 --- a/drivers/usb/image/microtek.c +++ b/drivers/usb/image/microtek.c @@ -632,7 +632,6 @@ static struct scsi_host_template mts_scsi_host_template = { .sg_tablesize = SG_ALL, .can_queue = 1, .this_id = -1, - .use_clustering = 1, .emulated = 1, .slave_alloc = mts_slave_alloc, .slave_configure = mts_slave_configure, diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index e227bb5b794f..fde2e71a6ade 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -639,13 +639,6 @@ static const struct scsi_host_template usb_stor_host_template = { */ .max_sectors = 240, - /* - * merge commands... this seems to help performance, but - * periodically someone should test to see which setting is more - * optimal. - */ - .use_clustering = 1, - /* emulated HBA */ .emulated = 1, diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 1f7b401c4d04..36742e8e7edc 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -879,6 +879,7 @@ static struct scsi_host_template uas_host_template = { .this_id = -1, .sg_tablesize = SG_NONE, .skip_settle_delay = 1, + .dma_boundary = PAGE_SIZE - 1, }; #define UNUSUAL_DEV(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax, \ diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 50dffe83714c..a08472ae5b1b 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -285,11 +285,6 @@ static int vhost_scsi_check_false(struct se_portal_group *se_tpg) return 0; } -static char *vhost_scsi_get_fabric_name(void) -{ - return "vhost"; -} - static char *vhost_scsi_get_fabric_wwn(struct se_portal_group *se_tpg) { struct vhost_scsi_tpg *tpg = container_of(se_tpg, @@ -2289,8 +2284,7 @@ static struct configfs_attribute *vhost_scsi_wwn_attrs[] = { static const struct target_core_fabric_ops vhost_scsi_ops = { .module = THIS_MODULE, - .name = "vhost", - .get_fabric_name = vhost_scsi_get_fabric_name, + .fabric_name = "vhost", .tpg_get_wwn = vhost_scsi_get_fabric_wwn, .tpg_get_tag = vhost_scsi_get_tpgt, .tpg_check_demo_mode = vhost_scsi_check_true, diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 2a7f545bd0b5..989cf872b98c 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -53,8 +53,6 @@ * API. */ -#define XEN_SWIOTLB_ERROR_CODE (~(dma_addr_t)0x0) - static char *xen_io_tlb_start, *xen_io_tlb_end; static unsigned long xen_io_tlb_nslabs; /* @@ -405,8 +403,8 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, map = swiotlb_tbl_map_single(dev, start_dma_addr, phys, size, dir, attrs); - if (map == SWIOTLB_MAP_ERROR) - return XEN_SWIOTLB_ERROR_CODE; + if (map == DMA_MAPPING_ERROR) + return DMA_MAPPING_ERROR; dev_addr = xen_phys_to_bus(map); xen_dma_map_page(dev, pfn_to_page(map >> PAGE_SHIFT), @@ -421,7 +419,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, attrs |= DMA_ATTR_SKIP_CPU_SYNC; swiotlb_tbl_unmap_single(dev, map, size, dir, attrs); - return XEN_SWIOTLB_ERROR_CODE; + return DMA_MAPPING_ERROR; } /* @@ -443,21 +441,8 @@ static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr, xen_dma_unmap_page(hwdev, dev_addr, size, dir, attrs); /* NOTE: We use dev_addr here, not paddr! */ - if (is_xen_swiotlb_buffer(dev_addr)) { + if (is_xen_swiotlb_buffer(dev_addr)) swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs); - return; - } - - if (dir != DMA_FROM_DEVICE) - return; - - /* - * phys_to_virt doesn't work with hihgmem page but we could - * call dma_mark_clean() with hihgmem page here. However, we - * are fine since dma_mark_clean() is null on POWERPC. We can - * make dma_mark_clean() take a physical address if necessary. - */ - dma_mark_clean(phys_to_virt(paddr), size); } static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, @@ -495,11 +480,6 @@ xen_swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, if (target == SYNC_FOR_DEVICE) xen_dma_sync_single_for_device(hwdev, dev_addr, size, dir); - - if (dir != DMA_FROM_DEVICE) - return; - - dma_mark_clean(phys_to_virt(paddr), size); } void @@ -574,7 +554,7 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, sg_phys(sg), sg->length, dir, attrs); - if (map == SWIOTLB_MAP_ERROR) { + if (map == DMA_MAPPING_ERROR) { dev_warn(hwdev, "swiotlb buffer is full\n"); /* Don't panic here, we expect map_sg users to do proper error handling. */ @@ -700,11 +680,6 @@ xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size, attrs); } -static int xen_swiotlb_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == XEN_SWIOTLB_ERROR_CODE; -} - const struct dma_map_ops xen_swiotlb_dma_ops = { .alloc = xen_swiotlb_alloc_coherent, .free = xen_swiotlb_free_coherent, @@ -719,5 +694,4 @@ const struct dma_map_ops xen_swiotlb_dma_ops = { .dma_supported = xen_swiotlb_dma_supported, .mmap = xen_swiotlb_dma_mmap, .get_sgtable = xen_swiotlb_get_sgtable, - .mapping_error = xen_swiotlb_mapping_error, }; diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 14a3d4cbc2a7..c9e23a126218 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -1712,11 +1712,6 @@ static struct configfs_attribute *scsiback_wwn_attrs[] = { NULL, }; -static char *scsiback_get_fabric_name(void) -{ - return "xen-pvscsi"; -} - static int scsiback_port_link(struct se_portal_group *se_tpg, struct se_lun *lun) { @@ -1810,8 +1805,7 @@ static int scsiback_check_false(struct se_portal_group *se_tpg) static const struct target_core_fabric_ops scsiback_ops = { .module = THIS_MODULE, - .name = "xen-pvscsi", - .get_fabric_name = scsiback_get_fabric_name, + .fabric_name = "xen-pvscsi", .tpg_get_wwn = scsiback_get_fabric_wwn, .tpg_get_tag = scsiback_get_tag, .tpg_check_demo_mode = scsiback_check_true, |