From 00289cd87676e14913d2d8492d1ce05c4baafdae Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 17 Jul 2019 18:07:53 -0700 Subject: drivers/base: Introduce kill_device() The libnvdimm subsystem arranges for devices to be destroyed as a result of a sysfs operation. Since device_unregister() cannot be called from an actively running sysfs attribute of the same device libnvdimm arranges for device_unregister() to be performed in an out-of-line async context. The driver core maintains a 'dead' state for coordinating its own racing async registration / de-registration requests. Rather than add local 'dead' state tracking infrastructure to libnvdimm device objects, export the existing state tracking via a new kill_device() helper. The kill_device() helper simply marks the device as dead, i.e. that it is on its way to device_del(), or returns that the device was already dead. This can be used in advance of calling device_unregister() for subsystems like libnvdimm that might need to handle multiple user threads racing to delete a device. This refactoring does not change any behavior, but it is a pre-requisite for follow-on fixes and therefore marked for -stable. Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Fixes: 4d88a97aa9e8 ("libnvdimm, nvdimm: dimm driver and base libnvdimm device-driver...") Cc: Tested-by: Jane Chu Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/156341207332.292348.14959761496009347574.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/base/core.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/core.c b/drivers/base/core.c index fd7511e04e62..eaf3aa0cb803 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2211,6 +2211,24 @@ void put_device(struct device *dev) } EXPORT_SYMBOL_GPL(put_device); +bool kill_device(struct device *dev) +{ + /* + * Require the device lock and set the "dead" flag to guarantee that + * the update behavior is consistent with the other bitfields near + * it and that we cannot have an asynchronous probe routine trying + * to run while we are tearing out the bus/class/sysfs from + * underneath the device. + */ + lockdep_assert_held(&dev->mutex); + + if (dev->p->dead) + return false; + dev->p->dead = true; + return true; +} +EXPORT_SYMBOL_GPL(kill_device); + /** * device_del - delete device from system. * @dev: device. @@ -2230,15 +2248,8 @@ void device_del(struct device *dev) struct kobject *glue_dir = NULL; struct class_interface *class_intf; - /* - * Hold the device lock and set the "dead" flag to guarantee that - * the update behavior is consistent with the other bitfields near - * it and that we cannot have an asynchronous probe routine trying - * to run while we are tearing out the bus/class/sysfs from - * underneath the device. - */ device_lock(dev); - dev->p->dead = true; + kill_device(dev); device_unlock(dev); /* Notify clients of device removal. This call must come -- cgit v1.2.3 From 87a30e1f05d73a34e6d1895065541369131aaf1c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 17 Jul 2019 18:08:26 -0700 Subject: driver-core, libnvdimm: Let device subsystems add local lockdep coverage For good reason, the standard device_lock() is marked lockdep_set_novalidate_class() because there is simply no sane way to describe the myriad ways the device_lock() ordered with other locks. However, that leaves subsystems that know their own local device_lock() ordering rules to find lock ordering mistakes manually. Instead, introduce an optional / additional lockdep-enabled lock that a subsystem can acquire in all the same paths that the device_lock() is acquired. A conversion of the NFIT driver and NVDIMM subsystem to a lockdep-validate device_lock() scheme is included. The debug_nvdimm_lock() implementation implements the correct lock-class and stacking order for the libnvdimm device topology hierarchy. Yes, this is a hack, but hopefully it is a useful hack for other subsystems device_lock() debug sessions. Quoting Greg: "Yeah, it feels a bit hacky but it's really up to a subsystem to mess up using it as much as anything else, so user beware :) I don't object to it if it makes things easier for you to debug." Cc: Ingo Molnar Cc: Ira Weiny Cc: Will Deacon Cc: Dave Jiang Cc: Keith Busch Cc: Peter Zijlstra Cc: Vishal Verma Cc: "Rafael J. Wysocki" Cc: Greg Kroah-Hartman Signed-off-by: Dan Williams Acked-by: Greg Kroah-Hartman Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/156341210661.292348.7014034644265455704.stgit@dwillia2-desk3.amr.corp.intel.com --- drivers/acpi/nfit/core.c | 28 ++++++++--------- drivers/acpi/nfit/nfit.h | 24 +++++++++++++++ drivers/base/core.c | 3 ++ drivers/nvdimm/btt_devs.c | 16 +++++----- drivers/nvdimm/bus.c | 28 ++++++++++------- drivers/nvdimm/core.c | 10 +++--- drivers/nvdimm/dimm_devs.c | 4 +-- drivers/nvdimm/namespace_devs.c | 36 +++++++++++----------- drivers/nvdimm/nd-core.h | 68 +++++++++++++++++++++++++++++++++++++++++ drivers/nvdimm/pfn_devs.c | 24 +++++++-------- drivers/nvdimm/pmem.c | 4 +-- drivers/nvdimm/region.c | 2 +- drivers/nvdimm/region_devs.c | 16 +++++----- include/linux/device.h | 5 +++ 14 files changed, 187 insertions(+), 81 deletions(-) (limited to 'drivers/base') diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 23022cf20d26..f22139458ce1 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1282,7 +1282,7 @@ static ssize_t hw_error_scrub_store(struct device *dev, if (rc) return rc; - device_lock(dev); + nfit_device_lock(dev); nd_desc = dev_get_drvdata(dev); if (nd_desc) { struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); @@ -1299,7 +1299,7 @@ static ssize_t hw_error_scrub_store(struct device *dev, break; } } - device_unlock(dev); + nfit_device_unlock(dev); if (rc) return rc; return size; @@ -1319,7 +1319,7 @@ static ssize_t scrub_show(struct device *dev, ssize_t rc = -ENXIO; bool busy; - device_lock(dev); + nfit_device_lock(dev); nd_desc = dev_get_drvdata(dev); if (!nd_desc) { device_unlock(dev); @@ -1339,7 +1339,7 @@ static ssize_t scrub_show(struct device *dev, } mutex_unlock(&acpi_desc->init_mutex); - device_unlock(dev); + nfit_device_unlock(dev); return rc; } @@ -1356,14 +1356,14 @@ static ssize_t scrub_store(struct device *dev, if (val != 1) return -EINVAL; - device_lock(dev); + nfit_device_lock(dev); nd_desc = dev_get_drvdata(dev); if (nd_desc) { struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); rc = acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_LONG); } - device_unlock(dev); + nfit_device_unlock(dev); if (rc) return rc; return size; @@ -1749,9 +1749,9 @@ static void acpi_nvdimm_notify(acpi_handle handle, u32 event, void *data) struct acpi_device *adev = data; struct device *dev = &adev->dev; - device_lock(dev->parent); + nfit_device_lock(dev->parent); __acpi_nvdimm_notify(dev, event); - device_unlock(dev->parent); + nfit_device_unlock(dev->parent); } static bool acpi_nvdimm_has_method(struct acpi_device *adev, char *method) @@ -3457,8 +3457,8 @@ static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc) struct device *dev = acpi_desc->dev; /* Bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */ - device_lock(dev); - device_unlock(dev); + nfit_device_lock(dev); + nfit_device_unlock(dev); /* Bounce the init_mutex to complete initial registration */ mutex_lock(&acpi_desc->init_mutex); @@ -3602,8 +3602,8 @@ void acpi_nfit_shutdown(void *data) * acpi_nfit_ars_rescan() submissions have had a chance to * either submit or see ->cancel set. */ - device_lock(bus_dev); - device_unlock(bus_dev); + nfit_device_lock(bus_dev); + nfit_device_unlock(bus_dev); flush_workqueue(nfit_wq); } @@ -3746,9 +3746,9 @@ EXPORT_SYMBOL_GPL(__acpi_nfit_notify); static void acpi_nfit_notify(struct acpi_device *adev, u32 event) { - device_lock(&adev->dev); + nfit_device_lock(&adev->dev); __acpi_nfit_notify(&adev->dev, adev->handle, event); - device_unlock(&adev->dev); + nfit_device_unlock(&adev->dev); } static const struct acpi_device_id acpi_nfit_ids[] = { diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index 6ee2b02af73e..24241941181c 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -312,6 +312,30 @@ static inline struct acpi_nfit_desc *to_acpi_desc( return container_of(nd_desc, struct acpi_nfit_desc, nd_desc); } +#ifdef CONFIG_PROVE_LOCKING +static inline void nfit_device_lock(struct device *dev) +{ + device_lock(dev); + mutex_lock(&dev->lockdep_mutex); +} + +static inline void nfit_device_unlock(struct device *dev) +{ + mutex_unlock(&dev->lockdep_mutex); + device_unlock(dev); +} +#else +static inline void nfit_device_lock(struct device *dev) +{ + device_lock(dev); +} + +static inline void nfit_device_unlock(struct device *dev) +{ + device_unlock(dev); +} +#endif + const guid_t *to_nfit_uuid(enum nfit_uuids id); int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz); void acpi_nfit_shutdown(void *data); diff --git a/drivers/base/core.c b/drivers/base/core.c index eaf3aa0cb803..4825949d6547 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1663,6 +1663,9 @@ void device_initialize(struct device *dev) kobject_init(&dev->kobj, &device_ktype); INIT_LIST_HEAD(&dev->dma_pools); mutex_init(&dev->mutex); +#ifdef CONFIG_PROVE_LOCKING + mutex_init(&dev->lockdep_mutex); +#endif lockdep_set_novalidate_class(&dev->mutex); spin_lock_init(&dev->devres_lock); INIT_LIST_HEAD(&dev->devres_head); diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 62d00fffa4af..3508a79110c7 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -62,14 +62,14 @@ static ssize_t sector_size_store(struct device *dev, struct nd_btt *nd_btt = to_nd_btt(dev); ssize_t rc; - device_lock(dev); + nd_device_lock(dev); nvdimm_bus_lock(dev); rc = nd_size_select_store(dev, buf, &nd_btt->lbasize, btt_lbasize_supported); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - device_unlock(dev); + nd_device_unlock(dev); return rc ? rc : len; } @@ -91,11 +91,11 @@ static ssize_t uuid_store(struct device *dev, struct nd_btt *nd_btt = to_nd_btt(dev); ssize_t rc; - device_lock(dev); + nd_device_lock(dev); rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); - device_unlock(dev); + nd_device_unlock(dev); return rc ? rc : len; } @@ -120,13 +120,13 @@ static ssize_t namespace_store(struct device *dev, struct nd_btt *nd_btt = to_nd_btt(dev); ssize_t rc; - device_lock(dev); + nd_device_lock(dev); nvdimm_bus_lock(dev); rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - device_unlock(dev); + nd_device_unlock(dev); return rc; } @@ -138,14 +138,14 @@ static ssize_t size_show(struct device *dev, struct nd_btt *nd_btt = to_nd_btt(dev); ssize_t rc; - device_lock(dev); + nd_device_lock(dev); if (dev->driver) rc = sprintf(buf, "%llu\n", nd_btt->size); else { /* no size to convey if the btt instance is disabled */ rc = -ENXIO; } - device_unlock(dev); + nd_device_unlock(dev); return rc; } diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index df41f3571dc9..798c5c4aea9c 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -26,7 +26,7 @@ int nvdimm_major; static int nvdimm_bus_major; -static struct class *nd_class; +struct class *nd_class; static DEFINE_IDA(nd_ida); static int to_nd_device_type(struct device *dev) @@ -91,7 +91,10 @@ static int nvdimm_bus_probe(struct device *dev) dev->driver->name, dev_name(dev)); nvdimm_bus_probe_start(nvdimm_bus); + debug_nvdimm_lock(dev); rc = nd_drv->probe(dev); + debug_nvdimm_unlock(dev); + if (rc == 0) nd_region_probe_success(nvdimm_bus, dev); else @@ -113,8 +116,11 @@ static int nvdimm_bus_remove(struct device *dev) struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); int rc = 0; - if (nd_drv->remove) + if (nd_drv->remove) { + debug_nvdimm_lock(dev); rc = nd_drv->remove(dev); + debug_nvdimm_unlock(dev); + } nd_region_disable(nvdimm_bus, dev); dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name, @@ -140,7 +146,7 @@ static void nvdimm_bus_shutdown(struct device *dev) void nd_device_notify(struct device *dev, enum nvdimm_event event) { - device_lock(dev); + nd_device_lock(dev); if (dev->driver) { struct nd_device_driver *nd_drv; @@ -148,7 +154,7 @@ void nd_device_notify(struct device *dev, enum nvdimm_event event) if (nd_drv->notify) nd_drv->notify(dev, event); } - device_unlock(dev); + nd_device_unlock(dev); } EXPORT_SYMBOL(nd_device_notify); @@ -296,7 +302,7 @@ static void nvdimm_bus_release(struct device *dev) kfree(nvdimm_bus); } -static bool is_nvdimm_bus(struct device *dev) +bool is_nvdimm_bus(struct device *dev) { return dev->release == nvdimm_bus_release; } @@ -575,9 +581,9 @@ void nd_device_unregister(struct device *dev, enum nd_async_mode mode) * or otherwise let the async path handle it if the * unregistration was already queued. */ - device_lock(dev); + nd_device_lock(dev); killed = kill_device(dev); - device_unlock(dev); + nd_device_unlock(dev); if (!killed) return; @@ -888,10 +894,10 @@ void wait_nvdimm_bus_probe_idle(struct device *dev) if (nvdimm_bus->probe_active == 0) break; nvdimm_bus_unlock(dev); - device_unlock(dev); + nd_device_unlock(dev); wait_event(nvdimm_bus->wait, nvdimm_bus->probe_active == 0); - device_lock(dev); + nd_device_lock(dev); nvdimm_bus_lock(dev); } while (true); } @@ -1107,7 +1113,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, goto out; } - device_lock(dev); + nd_device_lock(dev); nvdimm_bus_lock(dev); rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, func, buf); if (rc) @@ -1129,7 +1135,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, out_unlock: nvdimm_bus_unlock(dev); - device_unlock(dev); + nd_device_unlock(dev); out: kfree(in_env); kfree(out_env); diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 5e1f060547bf..9204f1e9fd14 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -246,7 +246,7 @@ static int nd_uuid_parse(struct device *dev, u8 *uuid_out, const char *buf, * * Enforce that uuids can only be changed while the device is disabled * (driver detached) - * LOCKING: expects device_lock() is held on entry + * LOCKING: expects nd_device_lock() is held on entry */ int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf, size_t len) @@ -347,15 +347,15 @@ static DEVICE_ATTR_RO(provider); static int flush_namespaces(struct device *dev, void *data) { - device_lock(dev); - device_unlock(dev); + nd_device_lock(dev); + nd_device_unlock(dev); return 0; } static int flush_regions_dimms(struct device *dev, void *data) { - device_lock(dev); - device_unlock(dev); + nd_device_lock(dev); + nd_device_unlock(dev); device_for_each_child(dev, NULL, flush_namespaces); return 0; } diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index dfecd6e17043..29a065e769ea 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -484,12 +484,12 @@ static ssize_t security_store(struct device *dev, * done while probing is idle and the DIMM is not in active use * in any region. */ - device_lock(dev); + nd_device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); rc = __security_store(dev, buf, len); nvdimm_bus_unlock(dev); - device_unlock(dev); + nd_device_unlock(dev); return rc; } diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index a434a5964cb9..92cd809d7e43 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -410,7 +410,7 @@ static ssize_t alt_name_store(struct device *dev, struct nd_region *nd_region = to_nd_region(dev->parent); ssize_t rc; - device_lock(dev); + nd_device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); rc = __alt_name_store(dev, buf, len); @@ -418,7 +418,7 @@ static ssize_t alt_name_store(struct device *dev, rc = nd_namespace_label_update(nd_region, dev); dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc); nvdimm_bus_unlock(dev); - device_unlock(dev); + nd_device_unlock(dev); return rc < 0 ? rc : len; } @@ -1077,7 +1077,7 @@ static ssize_t size_store(struct device *dev, if (rc) return rc; - device_lock(dev); + nd_device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); rc = __size_store(dev, val); @@ -1103,7 +1103,7 @@ static ssize_t size_store(struct device *dev, dev_dbg(dev, "%llx %s (%d)\n", val, rc < 0 ? "fail" : "success", rc); nvdimm_bus_unlock(dev); - device_unlock(dev); + nd_device_unlock(dev); return rc < 0 ? rc : len; } @@ -1286,7 +1286,7 @@ static ssize_t uuid_store(struct device *dev, } else return -ENXIO; - device_lock(dev); + nd_device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); if (to_ndns(dev)->claim) @@ -1302,7 +1302,7 @@ static ssize_t uuid_store(struct device *dev, dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - device_unlock(dev); + nd_device_unlock(dev); return rc < 0 ? rc : len; } @@ -1376,7 +1376,7 @@ static ssize_t sector_size_store(struct device *dev, } else return -ENXIO; - device_lock(dev); + nd_device_lock(dev); nvdimm_bus_lock(dev); if (to_ndns(dev)->claim) rc = -EBUSY; @@ -1387,7 +1387,7 @@ static ssize_t sector_size_store(struct device *dev, dev_dbg(dev, "result: %zd %s: %s%s", rc, rc < 0 ? "tried" : "wrote", buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - device_unlock(dev); + nd_device_unlock(dev); return rc ? rc : len; } @@ -1502,9 +1502,9 @@ static ssize_t holder_show(struct device *dev, struct nd_namespace_common *ndns = to_ndns(dev); ssize_t rc; - device_lock(dev); + nd_device_lock(dev); rc = sprintf(buf, "%s\n", ndns->claim ? dev_name(ndns->claim) : ""); - device_unlock(dev); + nd_device_unlock(dev); return rc; } @@ -1541,7 +1541,7 @@ static ssize_t holder_class_store(struct device *dev, struct nd_region *nd_region = to_nd_region(dev->parent); ssize_t rc; - device_lock(dev); + nd_device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); rc = __holder_class_store(dev, buf); @@ -1549,7 +1549,7 @@ static ssize_t holder_class_store(struct device *dev, rc = nd_namespace_label_update(nd_region, dev); dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc); nvdimm_bus_unlock(dev); - device_unlock(dev); + nd_device_unlock(dev); return rc < 0 ? rc : len; } @@ -1560,7 +1560,7 @@ static ssize_t holder_class_show(struct device *dev, struct nd_namespace_common *ndns = to_ndns(dev); ssize_t rc; - device_lock(dev); + nd_device_lock(dev); if (ndns->claim_class == NVDIMM_CCLASS_NONE) rc = sprintf(buf, "\n"); else if ((ndns->claim_class == NVDIMM_CCLASS_BTT) || @@ -1572,7 +1572,7 @@ static ssize_t holder_class_show(struct device *dev, rc = sprintf(buf, "dax\n"); else rc = sprintf(buf, "\n"); - device_unlock(dev); + nd_device_unlock(dev); return rc; } @@ -1586,7 +1586,7 @@ static ssize_t mode_show(struct device *dev, char *mode; ssize_t rc; - device_lock(dev); + nd_device_lock(dev); claim = ndns->claim; if (claim && is_nd_btt(claim)) mode = "safe"; @@ -1599,7 +1599,7 @@ static ssize_t mode_show(struct device *dev, else mode = "raw"; rc = sprintf(buf, "%s\n", mode); - device_unlock(dev); + nd_device_unlock(dev); return rc; } @@ -1703,8 +1703,8 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev) * Flush any in-progess probes / removals in the driver * for the raw personality of this namespace. */ - device_lock(&ndns->dev); - device_unlock(&ndns->dev); + nd_device_lock(&ndns->dev); + nd_device_unlock(&ndns->dev); if (ndns->dev.driver) { dev_dbg(&ndns->dev, "is active, can't bind %s\n", dev_name(dev)); diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 6cd470547106..0ac52b6eb00e 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -9,6 +9,7 @@ #include #include #include +#include "nd.h" extern struct list_head nvdimm_bus_list; extern struct mutex nvdimm_bus_list_mutex; @@ -182,4 +183,71 @@ ssize_t nd_namespace_store(struct device *dev, struct nd_namespace_common **_ndns, const char *buf, size_t len); struct nd_pfn *to_nd_pfn_safe(struct device *dev); +bool is_nvdimm_bus(struct device *dev); + +#ifdef CONFIG_PROVE_LOCKING +extern struct class *nd_class; + +enum { + LOCK_BUS, + LOCK_NDCTL, + LOCK_REGION, + LOCK_DIMM = LOCK_REGION, + LOCK_NAMESPACE, + LOCK_CLAIM, +}; + +static inline void debug_nvdimm_lock(struct device *dev) +{ + if (is_nd_region(dev)) + mutex_lock_nested(&dev->lockdep_mutex, LOCK_REGION); + else if (is_nvdimm(dev)) + mutex_lock_nested(&dev->lockdep_mutex, LOCK_DIMM); + else if (is_nd_btt(dev) || is_nd_pfn(dev) || is_nd_dax(dev)) + mutex_lock_nested(&dev->lockdep_mutex, LOCK_CLAIM); + else if (dev->parent && (is_nd_region(dev->parent))) + mutex_lock_nested(&dev->lockdep_mutex, LOCK_NAMESPACE); + else if (is_nvdimm_bus(dev)) + mutex_lock_nested(&dev->lockdep_mutex, LOCK_BUS); + else if (dev->class && dev->class == nd_class) + mutex_lock_nested(&dev->lockdep_mutex, LOCK_NDCTL); + else + dev_WARN(dev, "unknown lock level\n"); +} + +static inline void debug_nvdimm_unlock(struct device *dev) +{ + mutex_unlock(&dev->lockdep_mutex); +} + +static inline void nd_device_lock(struct device *dev) +{ + device_lock(dev); + debug_nvdimm_lock(dev); +} + +static inline void nd_device_unlock(struct device *dev) +{ + debug_nvdimm_unlock(dev); + device_unlock(dev); +} +#else +static inline void nd_device_lock(struct device *dev) +{ + device_lock(dev); +} + +static inline void nd_device_unlock(struct device *dev) +{ + device_unlock(dev); +} + +static inline void debug_nvdimm_lock(struct device *dev) +{ +} + +static inline void debug_nvdimm_unlock(struct device *dev) +{ +} +#endif #endif /* __ND_CORE_H__ */ diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 0f81fc56bbfd..9b09fe18e666 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -67,7 +67,7 @@ static ssize_t mode_store(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc = 0; - device_lock(dev); + nd_device_lock(dev); nvdimm_bus_lock(dev); if (dev->driver) rc = -EBUSY; @@ -89,7 +89,7 @@ static ssize_t mode_store(struct device *dev, dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - device_unlock(dev); + nd_device_unlock(dev); return rc ? rc : len; } @@ -132,14 +132,14 @@ static ssize_t align_store(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; - device_lock(dev); + nd_device_lock(dev); nvdimm_bus_lock(dev); rc = nd_size_select_store(dev, buf, &nd_pfn->align, nd_pfn_supported_alignments()); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - device_unlock(dev); + nd_device_unlock(dev); return rc ? rc : len; } @@ -161,11 +161,11 @@ static ssize_t uuid_store(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; - device_lock(dev); + nd_device_lock(dev); rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); - device_unlock(dev); + nd_device_unlock(dev); return rc ? rc : len; } @@ -190,13 +190,13 @@ static ssize_t namespace_store(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; - device_lock(dev); + nd_device_lock(dev); nvdimm_bus_lock(dev); rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - device_unlock(dev); + nd_device_unlock(dev); return rc; } @@ -208,7 +208,7 @@ static ssize_t resource_show(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; - device_lock(dev); + nd_device_lock(dev); if (dev->driver) { struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; u64 offset = __le64_to_cpu(pfn_sb->dataoff); @@ -222,7 +222,7 @@ static ssize_t resource_show(struct device *dev, /* no address to convey if the pfn instance is disabled */ rc = -ENXIO; } - device_unlock(dev); + nd_device_unlock(dev); return rc; } @@ -234,7 +234,7 @@ static ssize_t size_show(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; - device_lock(dev); + nd_device_lock(dev); if (dev->driver) { struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; u64 offset = __le64_to_cpu(pfn_sb->dataoff); @@ -250,7 +250,7 @@ static ssize_t size_show(struct device *dev, /* no size to convey if the pfn instance is disabled */ rc = -ENXIO; } - device_unlock(dev); + nd_device_unlock(dev); return rc; } diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 28cb44c61d4a..53797e7be18a 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -520,8 +520,8 @@ static int nd_pmem_remove(struct device *dev) nvdimm_namespace_detach_btt(to_nd_btt(dev)); else { /* - * Note, this assumes device_lock() context to not race - * nd_pmem_notify() + * Note, this assumes nd_device_lock() context to not + * race nd_pmem_notify() */ sysfs_put(pmem->bb_state); pmem->bb_state = NULL; diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c index 488c47ac4c4a..37bf8719a2a4 100644 --- a/drivers/nvdimm/region.c +++ b/drivers/nvdimm/region.c @@ -102,7 +102,7 @@ static int nd_region_remove(struct device *dev) nvdimm_bus_unlock(dev); /* - * Note, this assumes device_lock() context to not race + * Note, this assumes nd_device_lock() context to not race * nd_region_notify() */ sysfs_put(nd_region->bb_state); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index a15276cdec7d..91b5a7ade0d5 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -329,7 +329,7 @@ static ssize_t set_cookie_show(struct device *dev, * the v1.1 namespace label cookie definition. To read all this * data we need to wait for probing to settle. */ - device_lock(dev); + nd_device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); if (nd_region->ndr_mappings) { @@ -346,7 +346,7 @@ static ssize_t set_cookie_show(struct device *dev, } } nvdimm_bus_unlock(dev); - device_unlock(dev); + nd_device_unlock(dev); if (rc) return rc; @@ -422,12 +422,12 @@ static ssize_t available_size_show(struct device *dev, * memory nvdimm_bus_lock() is dropped, but that's userspace's * problem to not race itself. */ - device_lock(dev); + nd_device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); available = nd_region_available_dpa(nd_region); nvdimm_bus_unlock(dev); - device_unlock(dev); + nd_device_unlock(dev); return sprintf(buf, "%llu\n", available); } @@ -439,12 +439,12 @@ static ssize_t max_available_extent_show(struct device *dev, struct nd_region *nd_region = to_nd_region(dev); unsigned long long available = 0; - device_lock(dev); + nd_device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); available = nd_region_allocatable_dpa(nd_region); nvdimm_bus_unlock(dev); - device_unlock(dev); + nd_device_unlock(dev); return sprintf(buf, "%llu\n", available); } @@ -563,12 +563,12 @@ static ssize_t region_badblocks_show(struct device *dev, struct nd_region *nd_region = to_nd_region(dev); ssize_t rc; - device_lock(dev); + nd_device_lock(dev); if (dev->driver) rc = badblocks_show(&nd_region->bb, buf, 0); else rc = -ENXIO; - device_unlock(dev); + nd_device_unlock(dev); return rc; } diff --git a/include/linux/device.h b/include/linux/device.h index 0da5c67f6be1..9237b857b598 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -909,6 +909,8 @@ struct dev_links_info { * This identifies the device type and carries type-specific * information. * @mutex: Mutex to synchronize calls to its driver. + * @lockdep_mutex: An optional debug lock that a subsystem can use as a + * peer lock to gain localized lockdep coverage of the device_lock. * @bus: Type of bus device is on. * @driver: Which driver has allocated this * @platform_data: Platform data specific to the device. @@ -991,6 +993,9 @@ struct device { core doesn't touch it */ void *driver_data; /* Driver data, set and get with dev_set_drvdata/dev_get_drvdata */ +#ifdef CONFIG_PROVE_LOCKING + struct mutex lockdep_mutex; +#endif struct mutex mutex; /* mutex to synchronize calls to * its driver. */ -- cgit v1.2.3 From c8917b8ff09e8a4d6ef77e32ce0052f7158baa1f Mon Sep 17 00:00:00 2001 From: Mauro Rossi Date: Mon, 22 Jul 2019 07:55:36 +0200 Subject: firmware: fix build errors in paged buffer handling code fw_{grow,map}_paged_buf() need to be defined as static inline when CONFIG_FW_LOADER_PAGED_BUF is not enabled, infact fw_free_paged_buf() is also defined as static inline when CONFIG_FW_LOADER_PAGED_BUF is not enabled. Fixes the following mutiple definition building errors for Android kernel: drivers/base/firmware_loader/fallback_efi.o: In function `fw_grow_paged_buf': fallback_efi.c:(.text+0x0): multiple definition of `fw_grow_paged_buf' drivers/base/firmware_loader/main.o:(.text+0x73b): first defined here drivers/base/firmware_loader/fallback_efi.o: In function `fw_map_paged_buf': fallback_efi.c:(.text+0xf): multiple definition of `fw_map_paged_buf' drivers/base/firmware_loader/main.o:(.text+0x74a): first defined here [ slightly corrected the patch description -- tiwai ] Fixes: 5342e7093ff2 ("firmware: Factor out the paged buffer handling code") Fixes: 82fd7a8142a1 ("firmware: Add support for loading compressed files") Signed-off-by: Mauro Rossi Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20190722055536.15342-1-tiwai@suse.de Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_loader/firmware.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/firmware_loader/firmware.h b/drivers/base/firmware_loader/firmware.h index 7048a41973ed..842e63f19f22 100644 --- a/drivers/base/firmware_loader/firmware.h +++ b/drivers/base/firmware_loader/firmware.h @@ -141,8 +141,8 @@ int fw_grow_paged_buf(struct fw_priv *fw_priv, int pages_needed); int fw_map_paged_buf(struct fw_priv *fw_priv); #else static inline void fw_free_paged_buf(struct fw_priv *fw_priv) {} -int fw_grow_paged_buf(struct fw_priv *fw_priv, int pages_needed) { return -ENXIO; } -int fw_map_paged_buf(struct fw_priv *fw_priv) { return -ENXIO; } +static int fw_grow_paged_buf(struct fw_priv *fw_priv, int pages_needed) { return -ENXIO; } +static int fw_map_paged_buf(struct fw_priv *fw_priv) { return -ENXIO; } #endif #endif /* __FIRMWARE_LOADER_H */ -- cgit v1.2.3 From 333a2101f47c4360ee78bb827277615d405b1cf7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 23 Jul 2019 10:11:59 +0200 Subject: firmware: Fix missing inline I mistakenly dropped the inline while resolving the patch conflicts in the previous fix patch. Without inline, we get compiler warnings wrt unused functions. Note that Mauro's original patch contained the correct changes; it's all my fault to submit a patch before a morning coffee. Fixes: c8917b8ff09e ("firmware: fix build errors in paged buffer handling code") Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20190723081159.22624-1-tiwai@suse.de Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_loader/firmware.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/firmware_loader/firmware.h b/drivers/base/firmware_loader/firmware.h index 842e63f19f22..7ecd590e67fe 100644 --- a/drivers/base/firmware_loader/firmware.h +++ b/drivers/base/firmware_loader/firmware.h @@ -141,8 +141,8 @@ int fw_grow_paged_buf(struct fw_priv *fw_priv, int pages_needed); int fw_map_paged_buf(struct fw_priv *fw_priv); #else static inline void fw_free_paged_buf(struct fw_priv *fw_priv) {} -static int fw_grow_paged_buf(struct fw_priv *fw_priv, int pages_needed) { return -ENXIO; } -static int fw_map_paged_buf(struct fw_priv *fw_priv) { return -ENXIO; } +static inline int fw_grow_paged_buf(struct fw_priv *fw_priv, int pages_needed) { return -ENXIO; } +static inline int fw_map_paged_buf(struct fw_priv *fw_priv) { return -ENXIO; } #endif #endif /* __FIRMWARE_LOADER_H */ -- cgit v1.2.3 From 46c42d844211ef5902e32aa507beac0817c585e9 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 29 Jul 2019 13:49:54 -0700 Subject: driver core: platform: return -ENXIO for missing GpioInt Commit daaef255dc96 ("driver: platform: Support parsing GpioInt 0 in platform_get_irq()") broke the Embedded Controller driver on most LPC Chromebooks (i.e., most x86 Chromebooks), because cros_ec_lpc expects platform_get_irq() to return -ENXIO for non-existent IRQs. Unfortunately, acpi_dev_gpio_irq_get() doesn't follow this convention and returns -ENOENT instead. So we get this error from cros_ec_lpc: couldn't retrieve IRQ number (-2) I see a variety of drivers that treat -ENXIO specially, so rather than fix all of them, let's fix up the API to restore its previous behavior. I reported this on v2 of this patch: https://lore.kernel.org/lkml/20190220180538.GA42642@google.com/ but apparently the patch had already been merged before v3 got sent out: https://lore.kernel.org/lkml/20190221193429.161300-1-egranata@chromium.org/ and the result is that the bug landed and remains unfixed. I differ from the v3 patch by: * allowing for ret==0, even though acpi_dev_gpio_irq_get() specifically documents (and enforces) that 0 is not a valid return value (noted on the v3 review) * adding a small comment Reported-by: Brian Norris Reported-by: Salvatore Bellizzi Cc: Enrico Granata Cc: Fixes: daaef255dc96 ("driver: platform: Support parsing GpioInt 0 in platform_get_irq()") Signed-off-by: Brian Norris Reviewed-by: Andy Shevchenko Acked-by: Enrico Granata Link: https://lore.kernel.org/r/20190729204954.25510-1-briannorris@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/base') diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 506a0175a5a7..ec974ba9c0c4 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -157,8 +157,13 @@ int platform_get_irq(struct platform_device *dev, unsigned int num) * the device will only expose one IRQ, and this fallback * allows a common code path across either kind of resource. */ - if (num == 0 && has_acpi_companion(&dev->dev)) - return acpi_dev_gpio_irq_get(ACPI_COMPANION(&dev->dev), num); + if (num == 0 && has_acpi_companion(&dev->dev)) { + int ret = acpi_dev_gpio_irq_get(ACPI_COMPANION(&dev->dev), num); + + /* Our callers expect -ENXIO for missing IRQs. */ + if (ret >= 0 || ret == -EPROBE_DEFER) + return ret; + } return -ENXIO; #endif -- cgit v1.2.3 From ac43432cb1f5c2950408534987e57c2071e24d8f Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Sat, 27 Jul 2019 11:21:22 +0800 Subject: driver core: Fix use-after-free and double free on glue directory There is a race condition between removing glue directory and adding a new device under the glue dir. It can be reproduced in following test: CPU1: CPU2: device_add() get_device_parent() class_dir_create_and_add() kobject_add_internal() create_dir() // create glue_dir device_add() get_device_parent() kobject_get() // get glue_dir device_del() cleanup_glue_dir() kobject_del(glue_dir) kobject_add() kobject_add_internal() create_dir() // in glue_dir sysfs_create_dir_ns() kernfs_create_dir_ns(sd) sysfs_remove_dir() // glue_dir->sd=NULL sysfs_put() // free glue_dir->sd // sd is freed kernfs_new_node(sd) kernfs_get(glue_dir) kernfs_add_one() kernfs_put() Before CPU1 remove last child device under glue dir, if CPU2 add a new device under glue dir, the glue_dir kobject reference count will be increase to 2 via kobject_get() in get_device_parent(). And CPU2 has been called kernfs_create_dir_ns(), but not call kernfs_new_node(). Meanwhile, CPU1 call sysfs_remove_dir() and sysfs_put(). This result in glue_dir->sd is freed and it's reference count will be 0. Then CPU2 call kernfs_get(glue_dir) will trigger a warning in kernfs_get() and increase it's reference count to 1. Because glue_dir->sd is freed by CPU1, the next call kernfs_add_one() by CPU2 will fail(This is also use-after-free) and call kernfs_put() to decrease reference count. Because the reference count is decremented to 0, it will also call kmem_cache_free() to free the glue_dir->sd again. This will result in double free. In order to avoid this happening, we also should make sure that kernfs_node for glue_dir is released in CPU1 only when refcount for glue_dir kobj is 1 to fix this race. The following calltrace is captured in kernel 4.14 with the following patch applied: commit 726e41097920 ("drivers: core: Remove glue dirs from sysfs earlier") -------------------------------------------------------------------------- [ 3.633703] WARNING: CPU: 4 PID: 513 at .../fs/kernfs/dir.c:494 Here is WARN_ON(!atomic_read(&kn->count) in kernfs_get(). .... [ 3.633986] Call trace: [ 3.633991] kernfs_create_dir_ns+0xa8/0xb0 [ 3.633994] sysfs_create_dir_ns+0x54/0xe8 [ 3.634001] kobject_add_internal+0x22c/0x3f0 [ 3.634005] kobject_add+0xe4/0x118 [ 3.634011] device_add+0x200/0x870 [ 3.634017] _request_firmware+0x958/0xc38 [ 3.634020] request_firmware_into_buf+0x4c/0x70 .... [ 3.634064] kernel BUG at .../mm/slub.c:294! Here is BUG_ON(object == fp) in set_freepointer(). .... [ 3.634346] Call trace: [ 3.634351] kmem_cache_free+0x504/0x6b8 [ 3.634355] kernfs_put+0x14c/0x1d8 [ 3.634359] kernfs_create_dir_ns+0x88/0xb0 [ 3.634362] sysfs_create_dir_ns+0x54/0xe8 [ 3.634366] kobject_add_internal+0x22c/0x3f0 [ 3.634370] kobject_add+0xe4/0x118 [ 3.634374] device_add+0x200/0x870 [ 3.634378] _request_firmware+0x958/0xc38 [ 3.634381] request_firmware_into_buf+0x4c/0x70 -------------------------------------------------------------------------- Fixes: 726e41097920 ("drivers: core: Remove glue dirs from sysfs earlier") Signed-off-by: Muchun Song Reviewed-by: Mukesh Ojha Signed-off-by: Prateek Sood Link: https://lore.kernel.org/r/20190727032122.24639-1-smuchun@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) (limited to 'drivers/base') diff --git a/drivers/base/core.c b/drivers/base/core.c index da84a73f2ba6..c83d7e87764a 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1820,12 +1820,63 @@ static inline struct kobject *get_glue_dir(struct device *dev) */ static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir) { + unsigned int ref; + /* see if we live in a "glue" directory */ if (!live_in_glue_dir(glue_dir, dev)) return; mutex_lock(&gdp_mutex); - if (!kobject_has_children(glue_dir)) + /** + * There is a race condition between removing glue directory + * and adding a new device under the glue directory. + * + * CPU1: CPU2: + * + * device_add() + * get_device_parent() + * class_dir_create_and_add() + * kobject_add_internal() + * create_dir() // create glue_dir + * + * device_add() + * get_device_parent() + * kobject_get() // get glue_dir + * + * device_del() + * cleanup_glue_dir() + * kobject_del(glue_dir) + * + * kobject_add() + * kobject_add_internal() + * create_dir() // in glue_dir + * sysfs_create_dir_ns() + * kernfs_create_dir_ns(sd) + * + * sysfs_remove_dir() // glue_dir->sd=NULL + * sysfs_put() // free glue_dir->sd + * + * // sd is freed + * kernfs_new_node(sd) + * kernfs_get(glue_dir) + * kernfs_add_one() + * kernfs_put() + * + * Before CPU1 remove last child device under glue dir, if CPU2 add + * a new device under glue dir, the glue_dir kobject reference count + * will be increase to 2 in kobject_get(k). And CPU2 has been called + * kernfs_create_dir_ns(). Meanwhile, CPU1 call sysfs_remove_dir() + * and sysfs_put(). This result in glue_dir->sd is freed. + * + * Then the CPU2 will see a stale "empty" but still potentially used + * glue dir around in kernfs_new_node(). + * + * In order to avoid this happening, we also should make sure that + * kernfs_node for glue_dir is released in CPU1 only when refcount + * for glue_dir kobj is 1. + */ + ref = kref_read(&glue_dir->kref); + if (!kobject_has_children(glue_dir) && !--ref) kobject_del(glue_dir); kobject_put(glue_dir); mutex_unlock(&gdp_mutex); -- cgit v1.2.3