summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-10-25 15:31:56 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2018-10-25 15:31:56 +0200
commit6078e07dcf5348075713124508d03786dc9ffa8b (patch)
tree0a93be6dc3de629352ea269a7b99018ab40b35e0 /drivers
parentMerge tag 'for-v4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/sre/li... (diff)
parentacpi, nfit: Further restrict userspace ARS start requests (diff)
downloadlinux-6078e07dcf5348075713124508d03786dc9ffa8b.tar.xz
linux-6078e07dcf5348075713124508d03786dc9ffa8b.zip
Merge tag 'libnvdimm-for-4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams: - Improve the efficiency and performance of reading nvdimm-namespace labels. Reduce the amount of label data read at driver load time by a few orders of magnitude. Reduce heavyweight call-outs to platform-firmware routines. - Handle media errors located in the 'struct page' array stored on a persistent memory namespace. Let the kernel clear these errors rather than an awkward userspace workaround. - Fix Address Range Scrub (ARS) completion tracking. Correct occasions where the kernel indicates completion of ARS before submission. - Fix asynchronous device registration reference counting. - Add support for reporting an nvdimm dirty-shutdown-count via sysfs. - Fix various small libnvdimm core and uapi issues. * tag 'libnvdimm-for-4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (21 commits) acpi, nfit: Further restrict userspace ARS start requests acpi, nfit: Fix Address Range Scrub completion tracking UAPI: ndctl: Remove use of PAGE_SIZE UAPI: ndctl: Fix g++-unsupported initialisation in headers tools/testing/nvdimm: Populate dirty shutdown data acpi, nfit: Collect shutdown status acpi, nfit: Introduce nfit_mem flags libnvdimm, label: Fix sparse warning nvdimm: Use namespace index data to reduce number of label reads needed nvdimm: Split label init out from the logic for getting config data nvdimm: Remove empty if statement nvdimm: Clarify comment in sizeof_namespace_index nvdimm: Sanity check labeloff libnvdimm, dimm: Maximize label transfer size libnvdimm, pmem: Fix badblocks population for 'raw' namespaces libnvdimm, namespace: Drop the repeat assignment for variable dev->parent libnvdimm, region: Fail badblocks listing for inactive regions libnvdimm, pfn: during init, clear errors in the metadata area libnvdimm: Set device node in nd_device_register libnvdimm: Hold reference on parent while scheduling async init ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/acpi/nfit/core.c297
-rw-r--r--drivers/acpi/nfit/intel.h38
-rw-r--r--drivers/acpi/nfit/nfit.h21
-rw-r--r--drivers/nvdimm/bus.c20
-rw-r--r--drivers/nvdimm/dimm.c6
-rw-r--r--drivers/nvdimm/dimm_devs.c60
-rw-r--r--drivers/nvdimm/label.c144
-rw-r--r--drivers/nvdimm/label.h4
-rw-r--r--drivers/nvdimm/namespace_devs.c1
-rw-r--r--drivers/nvdimm/nd-core.h1
-rw-r--r--drivers/nvdimm/nd.h2
-rw-r--r--drivers/nvdimm/pfn_devs.c61
-rw-r--r--drivers/nvdimm/pmem.c4
-rw-r--r--drivers/nvdimm/region_devs.c11
14 files changed, 509 insertions, 161 deletions
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index b072cfc5f20e..f8c638f3c946 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -25,6 +25,7 @@
#include <asm/cacheflush.h>
#include <acpi/nfit.h>
#include "nfit.h"
+#include "intel.h"
/*
* For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
@@ -191,18 +192,20 @@ static int xlat_nvdimm_status(struct nvdimm *nvdimm, void *buf, unsigned int cmd
* In the _LSI, _LSR, _LSW case the locked status is
* communicated via the read/write commands
*/
- if (nfit_mem->has_lsr)
+ if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags))
break;
if (status >> 16 & ND_CONFIG_LOCKED)
return -EACCES;
break;
case ND_CMD_GET_CONFIG_DATA:
- if (nfit_mem->has_lsr && status == ACPI_LABELS_LOCKED)
+ if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags)
+ && status == ACPI_LABELS_LOCKED)
return -EACCES;
break;
case ND_CMD_SET_CONFIG_DATA:
- if (nfit_mem->has_lsw && status == ACPI_LABELS_LOCKED)
+ if (test_bit(NFIT_MEM_LSW, &nfit_mem->flags)
+ && status == ACPI_LABELS_LOCKED)
return -EACCES;
break;
default:
@@ -480,14 +483,16 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
min_t(u32, 256, in_buf.buffer.length), true);
/* call the BIOS, prefer the named methods over _DSM if available */
- if (nvdimm && cmd == ND_CMD_GET_CONFIG_SIZE && nfit_mem->has_lsr)
+ if (nvdimm && cmd == ND_CMD_GET_CONFIG_SIZE
+ && test_bit(NFIT_MEM_LSR, &nfit_mem->flags))
out_obj = acpi_label_info(handle);
- else if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA && nfit_mem->has_lsr) {
+ else if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA
+ && test_bit(NFIT_MEM_LSR, &nfit_mem->flags)) {
struct nd_cmd_get_config_data_hdr *p = buf;
out_obj = acpi_label_read(handle, p->in_offset, p->in_length);
} else if (nvdimm && cmd == ND_CMD_SET_CONFIG_DATA
- && nfit_mem->has_lsw) {
+ && test_bit(NFIT_MEM_LSW, &nfit_mem->flags)) {
struct nd_cmd_set_config_hdr *p = buf;
out_obj = acpi_label_write(handle, p->in_offset, p->in_length,
@@ -1547,7 +1552,12 @@ static DEVICE_ATTR_RO(dsm_mask);
static ssize_t flags_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- u16 flags = to_nfit_memdev(dev)->flags;
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+ u16 flags = __to_nfit_memdev(nfit_mem)->flags;
+
+ if (test_bit(NFIT_MEM_DIRTY, &nfit_mem->flags))
+ flags |= ACPI_NFIT_MEM_FLUSH_FAILED;
return sprintf(buf, "%s%s%s%s%s%s%s\n",
flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save_fail " : "",
@@ -1578,6 +1588,16 @@ static ssize_t id_show(struct device *dev,
}
static DEVICE_ATTR_RO(id);
+static ssize_t dirty_shutdown_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+
+ return sprintf(buf, "%d\n", nfit_mem->dirty_shutdown);
+}
+static DEVICE_ATTR_RO(dirty_shutdown);
+
static struct attribute *acpi_nfit_dimm_attributes[] = {
&dev_attr_handle.attr,
&dev_attr_phys_id.attr,
@@ -1595,6 +1615,7 @@ static struct attribute *acpi_nfit_dimm_attributes[] = {
&dev_attr_id.attr,
&dev_attr_family.attr,
&dev_attr_dsm_mask.attr,
+ &dev_attr_dirty_shutdown.attr,
NULL,
};
@@ -1603,6 +1624,7 @@ static umode_t acpi_nfit_dimm_attr_visible(struct kobject *kobj,
{
struct device *dev = container_of(kobj, struct device, kobj);
struct nvdimm *nvdimm = to_nvdimm(dev);
+ struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
if (!to_nfit_dcr(dev)) {
/* Without a dcr only the memdev attributes can be surfaced */
@@ -1616,6 +1638,11 @@ static umode_t acpi_nfit_dimm_attr_visible(struct kobject *kobj,
if (a == &dev_attr_format1.attr && num_nvdimm_formats(nvdimm) <= 1)
return 0;
+
+ if (!test_bit(NFIT_MEM_DIRTY_COUNT, &nfit_mem->flags)
+ && a == &dev_attr_dirty_shutdown.attr)
+ return 0;
+
return a->mode;
}
@@ -1694,6 +1721,56 @@ static bool acpi_nvdimm_has_method(struct acpi_device *adev, char *method)
return false;
}
+__weak void nfit_intel_shutdown_status(struct nfit_mem *nfit_mem)
+{
+ struct nd_intel_smart smart = { 0 };
+ union acpi_object in_buf = {
+ .type = ACPI_TYPE_BUFFER,
+ .buffer.pointer = (char *) &smart,
+ .buffer.length = sizeof(smart),
+ };
+ union acpi_object in_obj = {
+ .type = ACPI_TYPE_PACKAGE,
+ .package.count = 1,
+ .package.elements = &in_buf,
+ };
+ const u8 func = ND_INTEL_SMART;
+ const guid_t *guid = to_nfit_uuid(nfit_mem->family);
+ u8 revid = nfit_dsm_revid(nfit_mem->family, func);
+ struct acpi_device *adev = nfit_mem->adev;
+ acpi_handle handle = adev->handle;
+ union acpi_object *out_obj;
+
+ if ((nfit_mem->dsm_mask & (1 << func)) == 0)
+ return;
+
+ out_obj = acpi_evaluate_dsm(handle, guid, revid, func, &in_obj);
+ if (!out_obj)
+ return;
+
+ if (smart.flags & ND_INTEL_SMART_SHUTDOWN_VALID) {
+ if (smart.shutdown_state)
+ set_bit(NFIT_MEM_DIRTY, &nfit_mem->flags);
+ }
+
+ if (smart.flags & ND_INTEL_SMART_SHUTDOWN_COUNT_VALID) {
+ set_bit(NFIT_MEM_DIRTY_COUNT, &nfit_mem->flags);
+ nfit_mem->dirty_shutdown = smart.shutdown_count;
+ }
+ ACPI_FREE(out_obj);
+}
+
+static void populate_shutdown_status(struct nfit_mem *nfit_mem)
+{
+ /*
+ * For DIMMs that provide a dynamic facility to retrieve a
+ * dirty-shutdown status and/or a dirty-shutdown count, cache
+ * these values in nfit_mem.
+ */
+ if (nfit_mem->family == NVDIMM_FAMILY_INTEL)
+ nfit_intel_shutdown_status(nfit_mem);
+}
+
static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
struct nfit_mem *nfit_mem, u32 device_handle)
{
@@ -1708,8 +1785,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
nfit_mem->dsm_mask = acpi_desc->dimm_cmd_force_en;
nfit_mem->family = NVDIMM_FAMILY_INTEL;
adev = to_acpi_dev(acpi_desc);
- if (!adev)
+ if (!adev) {
+ /* unit test case */
+ populate_shutdown_status(nfit_mem);
return 0;
+ }
adev_dimm = acpi_find_child_device(adev, device_handle, false);
nfit_mem->adev = adev_dimm;
@@ -1784,14 +1864,17 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
if (acpi_nvdimm_has_method(adev_dimm, "_LSI")
&& acpi_nvdimm_has_method(adev_dimm, "_LSR")) {
dev_dbg(dev, "%s: has _LSR\n", dev_name(&adev_dimm->dev));
- nfit_mem->has_lsr = true;
+ set_bit(NFIT_MEM_LSR, &nfit_mem->flags);
}
- if (nfit_mem->has_lsr && acpi_nvdimm_has_method(adev_dimm, "_LSW")) {
+ if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags)
+ && acpi_nvdimm_has_method(adev_dimm, "_LSW")) {
dev_dbg(dev, "%s: has _LSW\n", dev_name(&adev_dimm->dev));
- nfit_mem->has_lsw = true;
+ set_bit(NFIT_MEM_LSW, &nfit_mem->flags);
}
+ populate_shutdown_status(nfit_mem);
+
return 0;
}
@@ -1878,11 +1961,11 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
cmd_mask |= nfit_mem->dsm_mask & NVDIMM_STANDARD_CMDMASK;
}
- if (nfit_mem->has_lsr) {
+ if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags)) {
set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
}
- if (nfit_mem->has_lsw)
+ if (test_bit(NFIT_MEM_LSW, &nfit_mem->flags))
set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask);
flush = nfit_mem->nfit_flush ? nfit_mem->nfit_flush->flush
@@ -2466,7 +2549,8 @@ static int ars_get_cap(struct acpi_nfit_desc *acpi_desc,
return cmd_rc;
}
-static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa)
+static int ars_start(struct acpi_nfit_desc *acpi_desc,
+ struct nfit_spa *nfit_spa, enum nfit_ars_state req_type)
{
int rc;
int cmd_rc;
@@ -2477,7 +2561,7 @@ static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa
memset(&ars_start, 0, sizeof(ars_start));
ars_start.address = spa->address;
ars_start.length = spa->length;
- if (test_bit(ARS_SHORT, &nfit_spa->ars_state))
+ if (req_type == ARS_REQ_SHORT)
ars_start.flags = ND_ARS_RETURN_PREV_DATA;
if (nfit_spa_type(spa) == NFIT_SPA_PM)
ars_start.type = ND_ARS_PERSISTENT;
@@ -2534,6 +2618,15 @@ static void ars_complete(struct acpi_nfit_desc *acpi_desc,
struct nd_region *nd_region = nfit_spa->nd_region;
struct device *dev;
+ lockdep_assert_held(&acpi_desc->init_mutex);
+ /*
+ * Only advance the ARS state for ARS runs initiated by the
+ * kernel, ignore ARS results from BIOS initiated runs for scrub
+ * completion tracking.
+ */
+ if (acpi_desc->scrub_spa != nfit_spa)
+ return;
+
if ((ars_status->address >= spa->address && ars_status->address
< spa->address + spa->length)
|| (ars_status->address < spa->address)) {
@@ -2553,28 +2646,13 @@ static void ars_complete(struct acpi_nfit_desc *acpi_desc,
} else
return;
- if (test_bit(ARS_DONE, &nfit_spa->ars_state))
- return;
-
- if (!test_and_clear_bit(ARS_REQ, &nfit_spa->ars_state))
- return;
-
+ acpi_desc->scrub_spa = NULL;
if (nd_region) {
dev = nd_region_dev(nd_region);
nvdimm_region_notify(nd_region, NVDIMM_REVALIDATE_POISON);
} else
dev = acpi_desc->dev;
-
- dev_dbg(dev, "ARS: range %d %s complete\n", spa->range_index,
- test_bit(ARS_SHORT, &nfit_spa->ars_state)
- ? "short" : "long");
- clear_bit(ARS_SHORT, &nfit_spa->ars_state);
- if (test_and_clear_bit(ARS_REQ_REDO, &nfit_spa->ars_state)) {
- set_bit(ARS_SHORT, &nfit_spa->ars_state);
- set_bit(ARS_REQ, &nfit_spa->ars_state);
- dev_dbg(dev, "ARS: processing scrub request received while in progress\n");
- } else
- set_bit(ARS_DONE, &nfit_spa->ars_state);
+ dev_dbg(dev, "ARS: range %d complete\n", spa->range_index);
}
static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc)
@@ -2855,46 +2933,55 @@ static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc)
return 0;
}
-static int ars_register(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa,
- int *query_rc)
+static int ars_register(struct acpi_nfit_desc *acpi_desc,
+ struct nfit_spa *nfit_spa)
{
- int rc = *query_rc;
+ int rc;
- if (no_init_ars)
+ if (no_init_ars || test_bit(ARS_FAILED, &nfit_spa->ars_state))
return acpi_nfit_register_region(acpi_desc, nfit_spa);
- set_bit(ARS_REQ, &nfit_spa->ars_state);
- set_bit(ARS_SHORT, &nfit_spa->ars_state);
+ set_bit(ARS_REQ_SHORT, &nfit_spa->ars_state);
+ set_bit(ARS_REQ_LONG, &nfit_spa->ars_state);
- switch (rc) {
+ switch (acpi_nfit_query_poison(acpi_desc)) {
case 0:
case -EAGAIN:
- rc = ars_start(acpi_desc, nfit_spa);
- if (rc == -EBUSY) {
- *query_rc = rc;
+ rc = ars_start(acpi_desc, nfit_spa, ARS_REQ_SHORT);
+ /* shouldn't happen, try again later */
+ if (rc == -EBUSY)
break;
- } else if (rc == 0) {
- rc = acpi_nfit_query_poison(acpi_desc);
- } else {
+ if (rc) {
set_bit(ARS_FAILED, &nfit_spa->ars_state);
break;
}
- if (rc == -EAGAIN)
- clear_bit(ARS_SHORT, &nfit_spa->ars_state);
- else if (rc == 0)
- ars_complete(acpi_desc, nfit_spa);
+ clear_bit(ARS_REQ_SHORT, &nfit_spa->ars_state);
+ rc = acpi_nfit_query_poison(acpi_desc);
+ if (rc)
+ break;
+ acpi_desc->scrub_spa = nfit_spa;
+ ars_complete(acpi_desc, nfit_spa);
+ /*
+ * If ars_complete() says we didn't complete the
+ * short scrub, we'll try again with a long
+ * request.
+ */
+ acpi_desc->scrub_spa = NULL;
break;
case -EBUSY:
+ case -ENOMEM:
case -ENOSPC:
+ /*
+ * BIOS was using ARS, wait for it to complete (or
+ * resources to become available) and then perform our
+ * own scrubs.
+ */
break;
default:
set_bit(ARS_FAILED, &nfit_spa->ars_state);
break;
}
- if (test_and_clear_bit(ARS_DONE, &nfit_spa->ars_state))
- set_bit(ARS_REQ, &nfit_spa->ars_state);
-
return acpi_nfit_register_region(acpi_desc, nfit_spa);
}
@@ -2916,6 +3003,8 @@ static unsigned int __acpi_nfit_scrub(struct acpi_nfit_desc *acpi_desc,
struct device *dev = acpi_desc->dev;
struct nfit_spa *nfit_spa;
+ lockdep_assert_held(&acpi_desc->init_mutex);
+
if (acpi_desc->cancel)
return 0;
@@ -2939,21 +3028,49 @@ static unsigned int __acpi_nfit_scrub(struct acpi_nfit_desc *acpi_desc,
ars_complete_all(acpi_desc);
list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+ enum nfit_ars_state req_type;
+ int rc;
+
if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
continue;
- if (test_bit(ARS_REQ, &nfit_spa->ars_state)) {
- int rc = ars_start(acpi_desc, nfit_spa);
-
- clear_bit(ARS_DONE, &nfit_spa->ars_state);
- dev = nd_region_dev(nfit_spa->nd_region);
- dev_dbg(dev, "ARS: range %d ARS start (%d)\n",
- nfit_spa->spa->range_index, rc);
- if (rc == 0 || rc == -EBUSY)
- return 1;
- dev_err(dev, "ARS: range %d ARS failed (%d)\n",
- nfit_spa->spa->range_index, rc);
- set_bit(ARS_FAILED, &nfit_spa->ars_state);
+
+ /* prefer short ARS requests first */
+ if (test_bit(ARS_REQ_SHORT, &nfit_spa->ars_state))
+ req_type = ARS_REQ_SHORT;
+ else if (test_bit(ARS_REQ_LONG, &nfit_spa->ars_state))
+ req_type = ARS_REQ_LONG;
+ else
+ continue;
+ rc = ars_start(acpi_desc, nfit_spa, req_type);
+
+ dev = nd_region_dev(nfit_spa->nd_region);
+ dev_dbg(dev, "ARS: range %d ARS start %s (%d)\n",
+ nfit_spa->spa->range_index,
+ req_type == ARS_REQ_SHORT ? "short" : "long",
+ rc);
+ /*
+ * Hmm, we raced someone else starting ARS? Try again in
+ * a bit.
+ */
+ if (rc == -EBUSY)
+ return 1;
+ if (rc == 0) {
+ dev_WARN_ONCE(dev, acpi_desc->scrub_spa,
+ "scrub start while range %d active\n",
+ acpi_desc->scrub_spa->spa->range_index);
+ clear_bit(req_type, &nfit_spa->ars_state);
+ acpi_desc->scrub_spa = nfit_spa;
+ /*
+ * Consider this spa last for future scrub
+ * requests
+ */
+ list_move_tail(&nfit_spa->list, &acpi_desc->spas);
+ return 1;
}
+
+ dev_err(dev, "ARS: range %d ARS failed (%d)\n",
+ nfit_spa->spa->range_index, rc);
+ set_bit(ARS_FAILED, &nfit_spa->ars_state);
}
return 0;
}
@@ -3009,6 +3126,7 @@ static void acpi_nfit_init_ars(struct acpi_nfit_desc *acpi_desc,
struct nd_cmd_ars_cap ars_cap;
int rc;
+ set_bit(ARS_FAILED, &nfit_spa->ars_state);
memset(&ars_cap, 0, sizeof(ars_cap));
rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa);
if (rc < 0)
@@ -3025,16 +3143,14 @@ static void acpi_nfit_init_ars(struct acpi_nfit_desc *acpi_desc,
nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
acpi_desc->max_ars = max(nfit_spa->max_ars, acpi_desc->max_ars);
clear_bit(ARS_FAILED, &nfit_spa->ars_state);
- set_bit(ARS_REQ, &nfit_spa->ars_state);
}
static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
{
struct nfit_spa *nfit_spa;
- int rc, query_rc;
+ int rc;
list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
- set_bit(ARS_FAILED, &nfit_spa->ars_state);
switch (nfit_spa_type(nfit_spa->spa)) {
case NFIT_SPA_VOLATILE:
case NFIT_SPA_PM:
@@ -3043,20 +3159,12 @@ static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
}
}
- /*
- * Reap any results that might be pending before starting new
- * short requests.
- */
- query_rc = acpi_nfit_query_poison(acpi_desc);
- if (query_rc == 0)
- ars_complete_all(acpi_desc);
-
list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
switch (nfit_spa_type(nfit_spa->spa)) {
case NFIT_SPA_VOLATILE:
case NFIT_SPA_PM:
/* register regions and kick off initial ARS run */
- rc = ars_register(acpi_desc, nfit_spa, &query_rc);
+ rc = ars_register(acpi_desc, nfit_spa);
if (rc)
return rc;
break;
@@ -3233,6 +3341,8 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
struct nvdimm *nvdimm, unsigned int cmd)
{
struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
+ struct nfit_spa *nfit_spa;
+ int rc = 0;
if (nvdimm)
return 0;
@@ -3242,16 +3352,24 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
/*
* The kernel and userspace may race to initiate a scrub, but
* the scrub thread is prepared to lose that initial race. It
- * just needs guarantees that any ars it initiates are not
- * interrupted by any intervening start reqeusts from userspace.
+ * just needs guarantees that any ARS it initiates are not
+ * interrupted by any intervening start requests from userspace.
*/
- if (work_busy(&acpi_desc->dwork.work))
- return -EBUSY;
+ mutex_lock(&acpi_desc->init_mutex);
+ list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
+ if (acpi_desc->scrub_spa
+ || test_bit(ARS_REQ_SHORT, &nfit_spa->ars_state)
+ || test_bit(ARS_REQ_LONG, &nfit_spa->ars_state)) {
+ rc = -EBUSY;
+ break;
+ }
+ mutex_unlock(&acpi_desc->init_mutex);
- return 0;
+ return rc;
}
-int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags)
+int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc,
+ enum nfit_ars_state req_type)
{
struct device *dev = acpi_desc->dev;
int scheduled = 0, busy = 0;
@@ -3271,14 +3389,10 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags)
if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
continue;
- if (test_and_set_bit(ARS_REQ, &nfit_spa->ars_state)) {
+ if (test_and_set_bit(req_type, &nfit_spa->ars_state))
busy++;
- set_bit(ARS_REQ_REDO, &nfit_spa->ars_state);
- } else {
- if (test_bit(ARS_SHORT, &flags))
- set_bit(ARS_SHORT, &nfit_spa->ars_state);
+ else
scheduled++;
- }
}
if (scheduled) {
sched_ars(acpi_desc);
@@ -3464,10 +3578,11 @@ static void acpi_nfit_update_notify(struct device *dev, acpi_handle handle)
static void acpi_nfit_uc_error_notify(struct device *dev, acpi_handle handle)
{
struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev);
- unsigned long flags = (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) ?
- 0 : 1 << ARS_SHORT;
- acpi_nfit_ars_rescan(acpi_desc, flags);
+ if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON)
+ acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_LONG);
+ else
+ acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_SHORT);
}
void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event)
diff --git a/drivers/acpi/nfit/intel.h b/drivers/acpi/nfit/intel.h
new file mode 100644
index 000000000000..86746312381f
--- /dev/null
+++ b/drivers/acpi/nfit/intel.h
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(c) 2018 Intel Corporation. All rights reserved.
+ * Intel specific definitions for NVDIMM Firmware Interface Table - NFIT
+ */
+#ifndef _NFIT_INTEL_H_
+#define _NFIT_INTEL_H_
+
+#define ND_INTEL_SMART 1
+
+#define ND_INTEL_SMART_SHUTDOWN_COUNT_VALID (1 << 5)
+#define ND_INTEL_SMART_SHUTDOWN_VALID (1 << 10)
+
+struct nd_intel_smart {
+ u32 status;
+ union {
+ struct {
+ u32 flags;
+ u8 reserved0[4];
+ u8 health;
+ u8 spares;
+ u8 life_used;
+ u8 alarm_flags;
+ u16 media_temperature;
+ u16 ctrl_temperature;
+ u32 shutdown_count;
+ u8 ait_status;
+ u16 pmic_temperature;
+ u8 reserved1[8];
+ u8 shutdown_state;
+ u32 vendor_size;
+ u8 vendor_data[92];
+ } __packed;
+ u8 data[128];
+ };
+} __packed;
+
+#endif
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
index d1274ea2d251..df0f6b8407e7 100644
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -118,10 +118,8 @@ enum nfit_dimm_notifiers {
};
enum nfit_ars_state {
- ARS_REQ,
- ARS_REQ_REDO,
- ARS_DONE,
- ARS_SHORT,
+ ARS_REQ_SHORT,
+ ARS_REQ_LONG,
ARS_FAILED,
};
@@ -159,6 +157,13 @@ struct nfit_memdev {
struct acpi_nfit_memory_map memdev[0];
};
+enum nfit_mem_flags {
+ NFIT_MEM_LSR,
+ NFIT_MEM_LSW,
+ NFIT_MEM_DIRTY,
+ NFIT_MEM_DIRTY_COUNT,
+};
+
/* assembled tables for a given dimm/memory-device */
struct nfit_mem {
struct nvdimm *nvdimm;
@@ -178,9 +183,9 @@ struct nfit_mem {
struct acpi_nfit_desc *acpi_desc;
struct resource *flush_wpq;
unsigned long dsm_mask;
+ unsigned long flags;
+ u32 dirty_shutdown;
int family;
- bool has_lsr;
- bool has_lsw;
};
struct acpi_nfit_desc {
@@ -198,6 +203,7 @@ struct acpi_nfit_desc {
struct device *dev;
u8 ars_start_flags;
struct nd_cmd_ars_status *ars_status;
+ struct nfit_spa *scrub_spa;
struct delayed_work dwork;
struct list_head list;
struct kernfs_node *scrub_count_state;
@@ -252,7 +258,8 @@ struct nfit_blk {
extern struct list_head acpi_descs;
extern struct mutex acpi_desc_lock;
-int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags);
+int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc,
+ enum nfit_ars_state req_type);
#ifdef CONFIG_X86_MCE
void nfit_mce_register(void);
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 8aae6dcc839f..f1fb39921236 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -54,12 +54,6 @@ static int to_nd_device_type(struct device *dev)
static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
{
- /*
- * Ensure that region devices always have their numa node set as
- * early as possible.
- */
- if (is_nd_region(dev))
- set_dev_node(dev, to_nd_region(dev)->numa_node);
return add_uevent_var(env, "MODALIAS=" ND_DEVICE_MODALIAS_FMT,
to_nd_device_type(dev));
}
@@ -488,6 +482,8 @@ static void nd_async_device_register(void *d, async_cookie_t cookie)
put_device(dev);
}
put_device(dev);
+ if (dev->parent)
+ put_device(dev->parent);
}
static void nd_async_device_unregister(void *d, async_cookie_t cookie)
@@ -506,7 +502,19 @@ void __nd_device_register(struct device *dev)
{
if (!dev)
return;
+
+ /*
+ * Ensure that region devices always have their NUMA node set as
+ * early as possible. This way we are able to make certain that
+ * any memory associated with the creation and the creation
+ * itself of the region is associated with the correct node.
+ */
+ if (is_nd_region(dev))
+ set_dev_node(dev, to_nd_region(dev)->numa_node);
+
dev->bus = &nvdimm_bus_type;
+ if (dev->parent)
+ get_device(dev->parent);
get_device(dev);
async_schedule_domain(nd_async_device_register, dev,
&nd_async_domain);
diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c
index 6c8fb7590838..9899c97138a3 100644
--- a/drivers/nvdimm/dimm.c
+++ b/drivers/nvdimm/dimm.c
@@ -75,7 +75,7 @@ static int nvdimm_probe(struct device *dev)
* DIMM capacity. We fail the dimm probe to prevent regions from
* attempting to parse the label area.
*/
- rc = nvdimm_init_config_data(ndd);
+ rc = nd_label_data_init(ndd);
if (rc == -EACCES)
nvdimm_set_locked(dev);
if (rc)
@@ -84,10 +84,6 @@ static int nvdimm_probe(struct device *dev)
dev_dbg(dev, "config data size: %d\n", ndd->nsarea.config_size);
nvdimm_bus_lock(dev);
- ndd->ns_current = nd_label_validate(ndd);
- ndd->ns_next = nd_label_next_nsindex(ndd->ns_current);
- nd_label_copy(ndd, to_next_namespace_index(ndd),
- to_current_namespace_index(ndd));
if (ndd->ns_current >= 0) {
rc = nd_label_reserve_dpa(ndd);
if (rc == 0)
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index 863cabc35215..6c3de2317390 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -85,56 +85,48 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd)
return cmd_rc;
}
-int nvdimm_init_config_data(struct nvdimm_drvdata *ndd)
+int nvdimm_get_config_data(struct nvdimm_drvdata *ndd, void *buf,
+ size_t offset, size_t len)
{
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev);
+ struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
int rc = validate_dimm(ndd), cmd_rc = 0;
struct nd_cmd_get_config_data_hdr *cmd;
- struct nvdimm_bus_descriptor *nd_desc;
- u32 max_cmd_size, config_size;
- size_t offset;
+ size_t max_cmd_size, buf_offset;
if (rc)
return rc;
- if (ndd->data)
- return 0;
-
- if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0
- || ndd->nsarea.config_size < ND_LABEL_MIN_SIZE) {
- dev_dbg(ndd->dev, "failed to init config data area: (%d:%d)\n",
- ndd->nsarea.max_xfer, ndd->nsarea.config_size);
+ if (offset + len > ndd->nsarea.config_size)
return -ENXIO;
- }
- ndd->data = kvmalloc(ndd->nsarea.config_size, GFP_KERNEL);
- if (!ndd->data)
- return -ENOMEM;
-
- max_cmd_size = min_t(u32, PAGE_SIZE, ndd->nsarea.max_xfer);
- cmd = kzalloc(max_cmd_size + sizeof(*cmd), GFP_KERNEL);
+ max_cmd_size = min_t(u32, len, ndd->nsarea.max_xfer);
+ cmd = kvzalloc(max_cmd_size + sizeof(*cmd), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
- nd_desc = nvdimm_bus->nd_desc;
- for (config_size = ndd->nsarea.config_size, offset = 0;
- config_size; config_size -= cmd->in_length,
- offset += cmd->in_length) {
- cmd->in_length = min(config_size, max_cmd_size);
- cmd->in_offset = offset;
+ for (buf_offset = 0; len;
+ len -= cmd->in_length, buf_offset += cmd->in_length) {
+ size_t cmd_size;
+
+ cmd->in_offset = offset + buf_offset;
+ cmd->in_length = min(max_cmd_size, len);
+
+ cmd_size = sizeof(*cmd) + cmd->in_length;
+
rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
- ND_CMD_GET_CONFIG_DATA, cmd,
- cmd->in_length + sizeof(*cmd), &cmd_rc);
+ ND_CMD_GET_CONFIG_DATA, cmd, cmd_size, &cmd_rc);
if (rc < 0)
break;
if (cmd_rc < 0) {
rc = cmd_rc;
break;
}
- memcpy(ndd->data + offset, cmd->out_buf, cmd->in_length);
+
+ /* out_buf should be valid, copy it into our output buffer */
+ memcpy(buf + buf_offset, cmd->out_buf, cmd->in_length);
}
- dev_dbg(ndd->dev, "len: %zu rc: %d\n", offset, rc);
- kfree(cmd);
+ kvfree(cmd);
return rc;
}
@@ -151,15 +143,11 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
if (rc)
return rc;
- if (!ndd->data)
- return -ENXIO;
-
if (offset + len > ndd->nsarea.config_size)
return -ENXIO;
- max_cmd_size = min_t(u32, PAGE_SIZE, len);
- max_cmd_size = min_t(u32, max_cmd_size, ndd->nsarea.max_xfer);
- cmd = kzalloc(max_cmd_size + sizeof(*cmd) + sizeof(u32), GFP_KERNEL);
+ max_cmd_size = min_t(u32, len, ndd->nsarea.max_xfer);
+ cmd = kvzalloc(max_cmd_size + sizeof(*cmd) + sizeof(u32), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
@@ -183,7 +171,7 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
break;
}
}
- kfree(cmd);
+ kvfree(cmd);
return rc;
}
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index 1d28cd656536..750dbaa6ce82 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -75,7 +75,8 @@ size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
/*
* Per UEFI 2.7, the minimum size of the Label Storage Area is large
* enough to hold 2 index blocks and 2 labels. The minimum index
- * block size is 256 bytes, and the minimum label size is 256 bytes.
+ * block size is 256 bytes. The label size is 128 for namespaces
+ * prior to version 1.2 and at minimum 256 for version 1.2 and later.
*/
nslot = nvdimm_num_label_slots(ndd);
space = ndd->nsarea.config_size - nslot * sizeof_namespace_label(ndd);
@@ -183,6 +184,13 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
__le64_to_cpu(nsindex[i]->otheroff));
continue;
}
+ if (__le64_to_cpu(nsindex[i]->labeloff)
+ != 2 * sizeof_namespace_index(ndd)) {
+ dev_dbg(dev, "nsindex%d labeloff: %#llx invalid\n",
+ i, (unsigned long long)
+ __le64_to_cpu(nsindex[i]->labeloff));
+ continue;
+ }
size = __le64_to_cpu(nsindex[i]->mysize);
if (size > sizeof_namespace_index(ndd)
@@ -227,7 +235,7 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
return -1;
}
-int nd_label_validate(struct nvdimm_drvdata *ndd)
+static int nd_label_validate(struct nvdimm_drvdata *ndd)
{
/*
* In order to probe for and validate namespace index blocks we
@@ -250,12 +258,12 @@ int nd_label_validate(struct nvdimm_drvdata *ndd)
return -1;
}
-void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst,
- struct nd_namespace_index *src)
+static void nd_label_copy(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_index *dst,
+ struct nd_namespace_index *src)
{
- if (dst && src)
- /* pass */;
- else
+ /* just exit if either destination or source is NULL */
+ if (!dst || !src)
return;
memcpy(dst, src, sizeof_namespace_index(ndd));
@@ -410,6 +418,128 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)
return 0;
}
+int nd_label_data_init(struct nvdimm_drvdata *ndd)
+{
+ size_t config_size, read_size, max_xfer, offset;
+ struct nd_namespace_index *nsindex;
+ unsigned int i;
+ int rc = 0;
+ u32 nslot;
+
+ if (ndd->data)
+ return 0;
+
+ if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0) {
+ dev_dbg(ndd->dev, "failed to init config data area: (%u:%u)\n",
+ ndd->nsarea.max_xfer, ndd->nsarea.config_size);
+ return -ENXIO;
+ }
+
+ /*
+ * We need to determine the maximum index area as this is the section
+ * we must read and validate before we can start processing labels.
+ *
+ * If the area is too small to contain the two indexes and 2 labels
+ * then we abort.
+ *
+ * Start at a label size of 128 as this should result in the largest
+ * possible namespace index size.
+ */
+ ndd->nslabel_size = 128;
+ read_size = sizeof_namespace_index(ndd) * 2;
+ if (!read_size)
+ return -ENXIO;
+
+ /* Allocate config data */
+ config_size = ndd->nsarea.config_size;
+ ndd->data = kvzalloc(config_size, GFP_KERNEL);
+ if (!ndd->data)
+ return -ENOMEM;
+
+ /*
+ * We want to guarantee as few reads as possible while conserving
+ * memory. To do that we figure out how much unused space will be left
+ * in the last read, divide that by the total number of reads it is
+ * going to take given our maximum transfer size, and then reduce our
+ * maximum transfer size based on that result.
+ */
+ max_xfer = min_t(size_t, ndd->nsarea.max_xfer, config_size);
+ if (read_size < max_xfer) {
+ /* trim waste */
+ max_xfer -= ((max_xfer - 1) - (config_size - 1) % max_xfer) /
+ DIV_ROUND_UP(config_size, max_xfer);
+ /* make certain we read indexes in exactly 1 read */
+ if (max_xfer < read_size)
+ max_xfer = read_size;
+ }
+
+ /* Make our initial read size a multiple of max_xfer size */
+ read_size = min(DIV_ROUND_UP(read_size, max_xfer) * max_xfer,
+ config_size);
+
+ /* Read the index data */
+ rc = nvdimm_get_config_data(ndd, ndd->data, 0, read_size);
+ if (rc)
+ goto out_err;
+
+ /* Validate index data, if not valid assume all labels are invalid */
+ ndd->ns_current = nd_label_validate(ndd);
+ if (ndd->ns_current < 0)
+ return 0;
+
+ /* Record our index values */
+ ndd->ns_next = nd_label_next_nsindex(ndd->ns_current);
+
+ /* Copy "current" index on top of the "next" index */
+ nsindex = to_current_namespace_index(ndd);
+ nd_label_copy(ndd, to_next_namespace_index(ndd), nsindex);
+
+ /* Determine starting offset for label data */
+ offset = __le64_to_cpu(nsindex->labeloff);
+ nslot = __le32_to_cpu(nsindex->nslot);
+
+ /* Loop through the free list pulling in any active labels */
+ for (i = 0; i < nslot; i++, offset += ndd->nslabel_size) {
+ size_t label_read_size;
+
+ /* zero out the unused labels */
+ if (test_bit_le(i, nsindex->free)) {
+ memset(ndd->data + offset, 0, ndd->nslabel_size);
+ continue;
+ }
+
+ /* if we already read past here then just continue */
+ if (offset + ndd->nslabel_size <= read_size)
+ continue;
+
+ /* if we haven't read in a while reset our read_size offset */
+ if (read_size < offset)
+ read_size = offset;
+
+ /* determine how much more will be read after this next call. */
+ label_read_size = offset + ndd->nslabel_size - read_size;
+ label_read_size = DIV_ROUND_UP(label_read_size, max_xfer) *
+ max_xfer;
+
+ /* truncate last read if needed */
+ if (read_size + label_read_size > config_size)
+ label_read_size = config_size - read_size;
+
+ /* Read the label data */
+ rc = nvdimm_get_config_data(ndd, ndd->data + read_size,
+ read_size, label_read_size);
+ if (rc)
+ goto out_err;
+
+ /* push read_size to next read offset */
+ read_size += label_read_size;
+ }
+
+ dev_dbg(ndd->dev, "len: %zu rc: %d\n", offset, rc);
+out_err:
+ return rc;
+}
+
int nd_label_active_count(struct nvdimm_drvdata *ndd)
{
struct nd_namespace_index *nsindex;
diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h
index 18bbe183b3a9..e9a2ad3c2150 100644
--- a/drivers/nvdimm/label.h
+++ b/drivers/nvdimm/label.h
@@ -138,9 +138,7 @@ static inline int nd_label_next_nsindex(int index)
}
struct nvdimm_drvdata;
-int nd_label_validate(struct nvdimm_drvdata *ndd);
-void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst,
- struct nd_namespace_index *src);
+int nd_label_data_init(struct nvdimm_drvdata *ndd);
size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd);
int nd_label_active_count(struct nvdimm_drvdata *ndd);
struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n);
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 4a4266250c28..681af3a8fd62 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -2099,7 +2099,6 @@ static struct device *nd_namespace_pmem_create(struct nd_region *nd_region)
return NULL;
}
dev_set_name(dev, "namespace%d.%d", nd_region->id, nspm->id);
- dev->parent = &nd_region->dev;
dev->groups = nd_namespace_attribute_groups;
nd_namespace_pmem_set_resource(nd_region, nspm, 0);
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index ac68072fb8cd..182258f64417 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -14,7 +14,6 @@
#define __ND_CORE_H__
#include <linux/libnvdimm.h>
#include <linux/device.h>
-#include <linux/libnvdimm.h>
#include <linux/sizes.h>
#include <linux/mutex.h>
#include <linux/nd.h>
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 98317e7ce5b5..e79cc8e5c114 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -241,6 +241,8 @@ struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping);
int nvdimm_check_config_data(struct device *dev);
int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd);
int nvdimm_init_config_data(struct nvdimm_drvdata *ndd);
+int nvdimm_get_config_data(struct nvdimm_drvdata *ndd, void *buf,
+ size_t offset, size_t len);
int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
void *buf, size_t len);
long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 3f7ad5bc443e..24c64090169e 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -361,6 +361,65 @@ struct device *nd_pfn_create(struct nd_region *nd_region)
return dev;
}
+/*
+ * nd_pfn_clear_memmap_errors() clears any errors in the volatile memmap
+ * space associated with the namespace. If the memmap is set to DRAM, then
+ * this is a no-op. Since the memmap area is freshly initialized during
+ * probe, we have an opportunity to clear any badblocks in this area.
+ */
+static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn)
+{
+ struct nd_region *nd_region = to_nd_region(nd_pfn->dev.parent);
+ struct nd_namespace_common *ndns = nd_pfn->ndns;
+ void *zero_page = page_address(ZERO_PAGE(0));
+ struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
+ int num_bad, meta_num, rc, bb_present;
+ sector_t first_bad, meta_start;
+ struct nd_namespace_io *nsio;
+
+ if (nd_pfn->mode != PFN_MODE_PMEM)
+ return 0;
+
+ nsio = to_nd_namespace_io(&ndns->dev);
+ meta_start = (SZ_4K + sizeof(*pfn_sb)) >> 9;
+ meta_num = (le64_to_cpu(pfn_sb->dataoff) >> 9) - meta_start;
+
+ do {
+ unsigned long zero_len;
+ u64 nsoff;
+
+ bb_present = badblocks_check(&nd_region->bb, meta_start,
+ meta_num, &first_bad, &num_bad);
+ if (bb_present) {
+ dev_dbg(&nd_pfn->dev, "meta: %x badblocks at %lx\n",
+ num_bad, first_bad);
+ nsoff = ALIGN_DOWN((nd_region->ndr_start
+ + (first_bad << 9)) - nsio->res.start,
+ PAGE_SIZE);
+ zero_len = ALIGN(num_bad << 9, PAGE_SIZE);
+ while (zero_len) {
+ unsigned long chunk = min(zero_len, PAGE_SIZE);
+
+ rc = nvdimm_write_bytes(ndns, nsoff, zero_page,
+ chunk, 0);
+ if (rc)
+ break;
+
+ zero_len -= chunk;
+ nsoff += chunk;
+ }
+ if (rc) {
+ dev_err(&nd_pfn->dev,
+ "error clearing %x badblocks at %lx\n",
+ num_bad, first_bad);
+ return rc;
+ }
+ }
+ } while (bb_present);
+
+ return 0;
+}
+
int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
{
u64 checksum, offset;
@@ -477,7 +536,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
return -ENXIO;
}
- return 0;
+ return nd_pfn_clear_memmap_errors(nd_pfn);
}
EXPORT_SYMBOL(nd_pfn_validate);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index a75d10c23d80..0e39e3d1846f 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -421,9 +421,11 @@ static int pmem_attach_disk(struct device *dev,
addr = devm_memremap_pages(dev, &pmem->pgmap);
pmem->pfn_flags |= PFN_MAP;
memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
- } else
+ } else {
addr = devm_memremap(dev, pmem->phys_addr,
pmem->size, ARCH_MEMREMAP_PMEM);
+ memcpy(&bb_res, &nsio->res, sizeof(bb_res));
+ }
/*
* At release time the queue must be frozen before
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index fa37afcd43ff..174a418cb171 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -560,10 +560,17 @@ static ssize_t region_badblocks_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nd_region *nd_region = to_nd_region(dev);
+ ssize_t rc;
- return badblocks_show(&nd_region->bb, buf, 0);
-}
+ device_lock(dev);
+ if (dev->driver)
+ rc = badblocks_show(&nd_region->bb, buf, 0);
+ else
+ rc = -ENXIO;
+ device_unlock(dev);
+ return rc;
+}
static DEVICE_ATTR(badblocks, 0444, region_badblocks_show, NULL);
static ssize_t resource_show(struct device *dev,