diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-05-28 06:24:19 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-05-28 06:24:19 +0200 |
commit | 9d004b2f4fea97cde123e7f1939b80e77bf2e695 (patch) | |
tree | 3d7055413cecd081d09d30799add0c17c3fb8eb9 /drivers | |
parent | Merge tag 'clang-format-for-linus-v5.19-rc1' of https://github.com/ojeda/linux (diff) | |
parent | cxl/port: Enable HDM Capability after validating DVSEC Ranges (diff) | |
download | linux-9d004b2f4fea97cde123e7f1939b80e77bf2e695.tar.xz linux-9d004b2f4fea97cde123e7f1939b80e77bf2e695.zip |
Merge tag 'cxl-for-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl
Pull cxl updates from Dan Williams:
"Compute Express Link (CXL) updates for this cycle.
The highlight is new driver-core infrastructure and CXL subsystem
changes for allowing lockdep to validate device_lock() usage. Thanks
to PeterZ for setting me straight on the current capabilities of the
lockdep API, and Greg acked it as well.
On the CXL ACPI side this update adds support for CXL _OSC so that
platform firmware knows that it is safe to still grant Linux native
control of PCIe hotplug and error handling in the presence of CXL
devices. A circular dependency problem was discovered between suspend
and CXL memory for cases where the suspend image might be stored in
CXL memory where that image also contains the PCI register state to
restore to re-enable the device. Disable suspend for now until an
architecture is defined to clarify that conflict.
Lastly a collection of reworks, fixes, and cleanups to the CXL
subsystem where support for snooping mailbox commands and properly
handling the "mem_enable" flow are the highlights.
Summary:
- Add driver-core infrastructure for lockdep validation of
device_lock(), and fixup a deadlock report that was previously
hidden behind the 'lockdep no validate' policy.
- Add CXL _OSC support for claiming native control of CXL hotplug and
error handling.
- Disable suspend in the presence of CXL memory unless and until a
protocol is identified for restoring PCI device context from memory
hosted on CXL PCI devices.
- Add support for snooping CXL mailbox commands to protect against
inopportune changes, like set-partition with the 'immediate' flag
set.
- Rework how the driver detects legacy CXL 1.1 configurations (CXL
DVSEC / 'mem_enable') before enabling new CXL 2.0 decode
configurations (CXL HDM Capability).
- Miscellaneous cleanups and fixes from -next exposure"
* tag 'cxl-for-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (47 commits)
cxl/port: Enable HDM Capability after validating DVSEC Ranges
cxl/port: Reuse 'struct cxl_hdm' context for hdm init
cxl/port: Move endpoint HDM Decoder Capability init to port driver
cxl/pci: Drop @info argument to cxl_hdm_decode_init()
cxl/mem: Merge cxl_dvsec_ranges() and cxl_hdm_decode_init()
cxl/mem: Skip range enumeration if mem_enable clear
cxl/mem: Consolidate CXL DVSEC Range enumeration in the core
cxl/pci: Move cxl_await_media_ready() to the core
cxl/mem: Validate port connectivity before dvsec ranges
cxl/mem: Fix cxl_mem_probe() error exit
cxl/pci: Drop wait_for_valid() from cxl_await_media_ready()
cxl/pci: Consolidate wait_for_media() and wait_for_media_ready()
cxl/mem: Drop mem_enabled check from wait_for_media()
nvdimm: Fix firmware activation deadlock scenarios
device-core: Kill the lockdep_mutex
nvdimm: Drop nd_device_lock()
ACPI: NFIT: Drop nfit_device_lock()
nvdimm: Replace lockdep_mutex with local lock classes
cxl: Drop cxl_device_lock()
cxl/acpi: Add root device lockdep validation
...
Diffstat (limited to 'drivers')
34 files changed, 1113 insertions, 793 deletions
diff --git a/drivers/Makefile b/drivers/Makefile index 020780b6b4d2..f735c4955143 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -72,9 +72,9 @@ obj-$(CONFIG_PARPORT) += parport/ obj-y += base/ block/ misc/ mfd/ nfc/ obj-$(CONFIG_LIBNVDIMM) += nvdimm/ obj-$(CONFIG_DAX) += dax/ -obj-$(CONFIG_CXL_BUS) += cxl/ obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/ obj-$(CONFIG_NUBUS) += nubus/ +obj-y += cxl/ obj-y += macintosh/ obj-y += scsi/ obj-y += nvme/ diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index b67d2ee77cd1..86fa61a21826 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -443,7 +443,7 @@ static void acpi_bus_osc_negotiate_usb_control(void) } osc_sb_native_usb4_control = - control & ((u32 *)context.ret.pointer)[OSC_CONTROL_DWORD]; + control & acpi_osc_ctx_get_pci_control(&context); acpi_bus_decode_usb_osc("USB4 _OSC: OS supports", control); acpi_bus_decode_usb_osc("USB4 _OSC: OS controls", diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index fe61f617a943..ae5f4acf2675 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1230,7 +1230,7 @@ static ssize_t hw_error_scrub_store(struct device *dev, if (rc) return rc; - nfit_device_lock(dev); + device_lock(dev); nd_desc = dev_get_drvdata(dev); if (nd_desc) { struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); @@ -1247,7 +1247,7 @@ static ssize_t hw_error_scrub_store(struct device *dev, break; } } - nfit_device_unlock(dev); + device_unlock(dev); if (rc) return rc; return size; @@ -1267,10 +1267,10 @@ static ssize_t scrub_show(struct device *dev, ssize_t rc = -ENXIO; bool busy; - nfit_device_lock(dev); + device_lock(dev); nd_desc = dev_get_drvdata(dev); if (!nd_desc) { - nfit_device_unlock(dev); + device_unlock(dev); return rc; } acpi_desc = to_acpi_desc(nd_desc); @@ -1287,7 +1287,7 @@ static ssize_t scrub_show(struct device *dev, } mutex_unlock(&acpi_desc->init_mutex); - nfit_device_unlock(dev); + device_unlock(dev); return rc; } @@ -1304,14 +1304,14 @@ static ssize_t scrub_store(struct device *dev, if (val != 1) return -EINVAL; - nfit_device_lock(dev); + device_lock(dev); nd_desc = dev_get_drvdata(dev); if (nd_desc) { struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); rc = acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_LONG); } - nfit_device_unlock(dev); + device_unlock(dev); if (rc) return rc; return size; @@ -1697,9 +1697,9 @@ static void acpi_nvdimm_notify(acpi_handle handle, u32 event, void *data) struct acpi_device *adev = data; struct device *dev = &adev->dev; - nfit_device_lock(dev->parent); + device_lock(dev->parent); __acpi_nvdimm_notify(dev, event); - nfit_device_unlock(dev->parent); + device_unlock(dev->parent); } static bool acpi_nvdimm_has_method(struct acpi_device *adev, char *method) @@ -3152,8 +3152,8 @@ static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc) struct device *dev = acpi_desc->dev; /* Bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */ - nfit_device_lock(dev); - nfit_device_unlock(dev); + device_lock(dev); + device_unlock(dev); /* Bounce the init_mutex to complete initial registration */ mutex_lock(&acpi_desc->init_mutex); @@ -3305,8 +3305,8 @@ void acpi_nfit_shutdown(void *data) * acpi_nfit_ars_rescan() submissions have had a chance to * either submit or see ->cancel set. */ - nfit_device_lock(bus_dev); - nfit_device_unlock(bus_dev); + device_lock(bus_dev); + device_unlock(bus_dev); flush_workqueue(nfit_wq); } @@ -3449,9 +3449,9 @@ EXPORT_SYMBOL_GPL(__acpi_nfit_notify); static void acpi_nfit_notify(struct acpi_device *adev, u32 event) { - nfit_device_lock(&adev->dev); + device_lock(&adev->dev); __acpi_nfit_notify(&adev->dev, adev->handle, event); - nfit_device_unlock(&adev->dev); + device_unlock(&adev->dev); } static const struct acpi_device_id acpi_nfit_ids[] = { diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index 50882bdbeb96..6023ad61831a 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -337,30 +337,6 @@ static inline struct acpi_nfit_desc *to_acpi_desc( return container_of(nd_desc, struct acpi_nfit_desc, nd_desc); } -#ifdef CONFIG_PROVE_LOCKING -static inline void nfit_device_lock(struct device *dev) -{ - device_lock(dev); - mutex_lock(&dev->lockdep_mutex); -} - -static inline void nfit_device_unlock(struct device *dev) -{ - mutex_unlock(&dev->lockdep_mutex); - device_unlock(dev); -} -#else -static inline void nfit_device_lock(struct device *dev) -{ - device_lock(dev); -} - -static inline void nfit_device_unlock(struct device *dev) -{ - device_unlock(dev); -} -#endif - const guid_t *to_nfit_uuid(enum nfit_uuids id); int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz); void acpi_nfit_shutdown(void *data); diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index b3b507f20e87..d57cf8454b93 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -140,6 +140,17 @@ static struct pci_osc_bit_struct pci_osc_control_bit[] = { { OSC_PCI_EXPRESS_DPC_CONTROL, "DPC" }, }; +static struct pci_osc_bit_struct cxl_osc_support_bit[] = { + { OSC_CXL_1_1_PORT_REG_ACCESS_SUPPORT, "CXL11PortRegAccess" }, + { OSC_CXL_2_0_PORT_DEV_REG_ACCESS_SUPPORT, "CXL20PortDevRegAccess" }, + { OSC_CXL_PROTOCOL_ERR_REPORTING_SUPPORT, "CXLProtocolErrorReporting" }, + { OSC_CXL_NATIVE_HP_SUPPORT, "CXLNativeHotPlug" }, +}; + +static struct pci_osc_bit_struct cxl_osc_control_bit[] = { + { OSC_CXL_ERROR_REPORTING_CONTROL, "CXLMemErrorReporting" }, +}; + static void decode_osc_bits(struct acpi_pci_root *root, char *msg, u32 word, struct pci_osc_bit_struct *table, int size) { @@ -168,33 +179,73 @@ static void decode_osc_control(struct acpi_pci_root *root, char *msg, u32 word) ARRAY_SIZE(pci_osc_control_bit)); } +static void decode_cxl_osc_support(struct acpi_pci_root *root, char *msg, u32 word) +{ + decode_osc_bits(root, msg, word, cxl_osc_support_bit, + ARRAY_SIZE(cxl_osc_support_bit)); +} + +static void decode_cxl_osc_control(struct acpi_pci_root *root, char *msg, u32 word) +{ + decode_osc_bits(root, msg, word, cxl_osc_control_bit, + ARRAY_SIZE(cxl_osc_control_bit)); +} + +static inline bool is_pcie(struct acpi_pci_root *root) +{ + return root->bridge_type == ACPI_BRIDGE_TYPE_PCIE; +} + +static inline bool is_cxl(struct acpi_pci_root *root) +{ + return root->bridge_type == ACPI_BRIDGE_TYPE_CXL; +} + static u8 pci_osc_uuid_str[] = "33DB4D5B-1FF7-401C-9657-7441C03DD766"; +static u8 cxl_osc_uuid_str[] = "68F2D50B-C469-4d8A-BD3D-941A103FD3FC"; -static acpi_status acpi_pci_run_osc(acpi_handle handle, - const u32 *capbuf, u32 *retval) +static char *to_uuid(struct acpi_pci_root *root) +{ + if (is_cxl(root)) + return cxl_osc_uuid_str; + return pci_osc_uuid_str; +} + +static int cap_length(struct acpi_pci_root *root) +{ + if (is_cxl(root)) + return sizeof(u32) * OSC_CXL_CAPABILITY_DWORDS; + return sizeof(u32) * OSC_PCI_CAPABILITY_DWORDS; +} + +static acpi_status acpi_pci_run_osc(struct acpi_pci_root *root, + const u32 *capbuf, u32 *pci_control, + u32 *cxl_control) { struct acpi_osc_context context = { - .uuid_str = pci_osc_uuid_str, + .uuid_str = to_uuid(root), .rev = 1, - .cap.length = 12, + .cap.length = cap_length(root), .cap.pointer = (void *)capbuf, }; acpi_status status; - status = acpi_run_osc(handle, &context); + status = acpi_run_osc(root->device->handle, &context); if (ACPI_SUCCESS(status)) { - *retval = *((u32 *)(context.ret.pointer + 8)); + *pci_control = acpi_osc_ctx_get_pci_control(&context); + if (is_cxl(root)) + *cxl_control = acpi_osc_ctx_get_cxl_control(&context); kfree(context.ret.pointer); } return status; } -static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, - u32 support, - u32 *control) +static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, u32 support, + u32 *control, u32 cxl_support, + u32 *cxl_control) { acpi_status status; - u32 result, capbuf[3]; + u32 pci_result, cxl_result, capbuf[OSC_CXL_CAPABILITY_DWORDS]; support |= root->osc_support_set; @@ -202,10 +253,28 @@ static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, capbuf[OSC_SUPPORT_DWORD] = support; capbuf[OSC_CONTROL_DWORD] = *control | root->osc_control_set; - status = acpi_pci_run_osc(root->device->handle, capbuf, &result); + if (is_cxl(root)) { + cxl_support |= root->osc_ext_support_set; + capbuf[OSC_EXT_SUPPORT_DWORD] = cxl_support; + capbuf[OSC_EXT_CONTROL_DWORD] = *cxl_control | root->osc_ext_control_set; + } + +retry: + status = acpi_pci_run_osc(root, capbuf, &pci_result, &cxl_result); if (ACPI_SUCCESS(status)) { root->osc_support_set = support; - *control = result; + *control = pci_result; + if (is_cxl(root)) { + root->osc_ext_support_set = cxl_support; + *cxl_control = cxl_result; + } + } else if (is_cxl(root)) { + /* + * CXL _OSC is optional on CXL 1.1 hosts. Fall back to PCIe _OSC + * upon any failure using CXL _OSC. + */ + root->bridge_type = ACPI_BRIDGE_TYPE_PCIE; + goto retry; } return status; } @@ -321,6 +390,8 @@ EXPORT_SYMBOL_GPL(acpi_get_pci_dev); * @handle: ACPI handle of a PCI root bridge (or PCIe Root Complex). * @mask: Mask of _OSC bits to request control of, place to store control mask. * @support: _OSC supported capability. + * @cxl_mask: Mask of CXL _OSC control bits, place to store control mask. + * @cxl_support: CXL _OSC supported capability. * * Run _OSC query for @mask and if that is successful, compare the returned * mask of control bits with @req. If all of the @req bits are set in the @@ -331,12 +402,14 @@ EXPORT_SYMBOL_GPL(acpi_get_pci_dev); * _OSC bits the BIOS has granted control of, but its contents are meaningless * on failure. **/ -static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 support) +static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, + u32 support, u32 *cxl_mask, + u32 cxl_support) { u32 req = OSC_PCI_EXPRESS_CAPABILITY_CONTROL; struct acpi_pci_root *root; acpi_status status; - u32 ctrl, capbuf[3]; + u32 ctrl, cxl_ctrl = 0, capbuf[OSC_CXL_CAPABILITY_DWORDS]; if (!mask) return AE_BAD_PARAMETER; @@ -348,20 +421,42 @@ static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 s ctrl = *mask; *mask |= root->osc_control_set; + if (is_cxl(root)) { + cxl_ctrl = *cxl_mask; + *cxl_mask |= root->osc_ext_control_set; + } + /* Need to check the available controls bits before requesting them. */ do { - status = acpi_pci_query_osc(root, support, mask); + u32 pci_missing = 0, cxl_missing = 0; + + status = acpi_pci_query_osc(root, support, mask, cxl_support, + cxl_mask); if (ACPI_FAILURE(status)) return status; - if (ctrl == *mask) - break; - decode_osc_control(root, "platform does not support", - ctrl & ~(*mask)); + if (is_cxl(root)) { + if (ctrl == *mask && cxl_ctrl == *cxl_mask) + break; + pci_missing = ctrl & ~(*mask); + cxl_missing = cxl_ctrl & ~(*cxl_mask); + } else { + if (ctrl == *mask) + break; + pci_missing = ctrl & ~(*mask); + } + if (pci_missing) + decode_osc_control(root, "platform does not support", + pci_missing); + if (cxl_missing) + decode_cxl_osc_control(root, "CXL platform does not support", + cxl_missing); ctrl = *mask; - } while (*mask); + cxl_ctrl = *cxl_mask; + } while (*mask || *cxl_mask); /* No need to request _OSC if the control was already granted. */ - if ((root->osc_control_set & ctrl) == ctrl) + if ((root->osc_control_set & ctrl) == ctrl && + (root->osc_ext_control_set & cxl_ctrl) == cxl_ctrl) return AE_OK; if ((ctrl & req) != req) { @@ -373,11 +468,17 @@ static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 s capbuf[OSC_QUERY_DWORD] = 0; capbuf[OSC_SUPPORT_DWORD] = root->osc_support_set; capbuf[OSC_CONTROL_DWORD] = ctrl; - status = acpi_pci_run_osc(handle, capbuf, mask); + if (is_cxl(root)) { + capbuf[OSC_EXT_SUPPORT_DWORD] = root->osc_ext_support_set; + capbuf[OSC_EXT_CONTROL_DWORD] = cxl_ctrl; + } + + status = acpi_pci_run_osc(root, capbuf, mask, cxl_mask); if (ACPI_FAILURE(status)) return status; root->osc_control_set = *mask; + root->osc_ext_control_set = *cxl_mask; return AE_OK; } @@ -403,6 +504,53 @@ static u32 calculate_support(void) return support; } +/* + * Background on hotplug support, and making it depend on only + * CONFIG_HOTPLUG_PCI_PCIE vs. also considering CONFIG_MEMORY_HOTPLUG: + * + * CONFIG_ACPI_HOTPLUG_MEMORY does depend on CONFIG_MEMORY_HOTPLUG, but + * there is no existing _OSC for memory hotplug support. The reason is that + * ACPI memory hotplug requires the OS to acknowledge / coordinate with + * memory plug events via a scan handler. On the CXL side the equivalent + * would be if Linux supported the Mechanical Retention Lock [1], or + * otherwise had some coordination for the driver of a PCI device + * undergoing hotplug to be consulted on whether the hotplug should + * proceed or not. + * + * The concern is that if Linux says no to supporting CXL hotplug then + * the BIOS may say no to giving the OS hotplug control of any other PCIe + * device. So the question here is not whether hotplug is enabled, it's + * whether it is handled natively by the at all OS, and if + * CONFIG_HOTPLUG_PCI_PCIE is enabled then the answer is "yes". + * + * Otherwise, the plan for CXL coordinated remove, since the kernel does + * not support blocking hotplug, is to require the memory device to be + * disabled before hotplug is attempted. When CONFIG_MEMORY_HOTPLUG is + * disabled that step will fail and the remove attempt cancelled by the + * user. If that is not honored and the card is removed anyway then it + * does not matter if CONFIG_MEMORY_HOTPLUG is enabled or not, it will + * cause a crash and other badness. + * + * Therefore, just say yes to CXL hotplug and require removal to + * be coordinated by userspace unless and until the kernel grows better + * mechanisms for doing "managed" removal of devices in consultation with + * the driver. + * + * [1]: https://lore.kernel.org/all/20201122014203.4706-1-ashok.raj@intel.com/ + */ +static u32 calculate_cxl_support(void) +{ + u32 support; + + support = OSC_CXL_2_0_PORT_DEV_REG_ACCESS_SUPPORT; + if (pci_aer_available()) + support |= OSC_CXL_PROTOCOL_ERR_REPORTING_SUPPORT; + if (IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)) + support |= OSC_CXL_NATIVE_HP_SUPPORT; + + return support; +} + static u32 calculate_control(void) { u32 control; @@ -434,6 +582,16 @@ static u32 calculate_control(void) return control; } +static u32 calculate_cxl_control(void) +{ + u32 control = 0; + + if (IS_ENABLED(CONFIG_MEMORY_FAILURE)) + control |= OSC_CXL_ERROR_REPORTING_CONTROL; + + return control; +} + static bool os_control_query_checks(struct acpi_pci_root *root, u32 support) { struct acpi_device *device = root->device; @@ -452,10 +610,10 @@ static bool os_control_query_checks(struct acpi_pci_root *root, u32 support) return true; } -static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm, - bool is_pcie) +static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm) { u32 support, control = 0, requested = 0; + u32 cxl_support = 0, cxl_control = 0, cxl_requested = 0; acpi_status status; struct acpi_device *device = root->device; acpi_handle handle = device->handle; @@ -479,10 +637,20 @@ static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm, if (os_control_query_checks(root, support)) requested = control = calculate_control(); - status = acpi_pci_osc_control_set(handle, &control, support); + if (is_cxl(root)) { + cxl_support = calculate_cxl_support(); + decode_cxl_osc_support(root, "OS supports", cxl_support); + cxl_requested = cxl_control = calculate_cxl_control(); + } + + status = acpi_pci_osc_control_set(handle, &control, support, + &cxl_control, cxl_support); if (ACPI_SUCCESS(status)) { if (control) decode_osc_control(root, "OS now controls", control); + if (cxl_control) + decode_cxl_osc_control(root, "OS now controls", + cxl_control); if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) { /* @@ -504,13 +672,18 @@ static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm, *no_aspm = 1; /* _OSC is optional for PCI host bridges */ - if ((status == AE_NOT_FOUND) && !is_pcie) + if (status == AE_NOT_FOUND && !is_pcie(root)) return; if (control) { decode_osc_control(root, "OS requested", requested); decode_osc_control(root, "platform willing to grant", control); } + if (cxl_control) { + decode_cxl_osc_control(root, "OS requested", cxl_requested); + decode_cxl_osc_control(root, "platform willing to grant", + cxl_control); + } dev_info(&device->dev, "_OSC: platform retains control of PCIe features (%s)\n", acpi_format_exception(status)); @@ -527,7 +700,7 @@ static int acpi_pci_root_add(struct acpi_device *device, acpi_handle handle = device->handle; int no_aspm = 0; bool hotadd = system_state == SYSTEM_RUNNING; - bool is_pcie; + const char *acpi_hid; root = kzalloc(sizeof(struct acpi_pci_root), GFP_KERNEL); if (!root) @@ -585,8 +758,15 @@ static int acpi_pci_root_add(struct acpi_device *device, root->mcfg_addr = acpi_pci_root_get_mcfg_addr(handle); - is_pcie = strcmp(acpi_device_hid(device), "PNP0A08") == 0; - negotiate_os_control(root, &no_aspm, is_pcie); + acpi_hid = acpi_device_hid(root->device); + if (strcmp(acpi_hid, "PNP0A08") == 0) + root->bridge_type = ACPI_BRIDGE_TYPE_PCIE; + else if (strcmp(acpi_hid, "ACPI0016") == 0) + root->bridge_type = ACPI_BRIDGE_TYPE_CXL; + else + dev_dbg(&device->dev, "Assuming non-PCIe host bridge\n"); + + negotiate_os_control(root, &no_aspm); /* * TBD: Need PCI interface for enumeration/configuration of roots. diff --git a/drivers/base/core.c b/drivers/base/core.c index 3d6430eb0c6a..2eede2ec3d64 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2864,9 +2864,6 @@ void device_initialize(struct device *dev) kobject_init(&dev->kobj, &device_ktype); INIT_LIST_HEAD(&dev->dma_pools); mutex_init(&dev->mutex); -#ifdef CONFIG_PROVE_LOCKING - mutex_init(&dev->lockdep_mutex); -#endif lockdep_set_novalidate_class(&dev->mutex); spin_lock_init(&dev->devres_lock); INIT_LIST_HEAD(&dev->devres_head); diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index b88ab956bb7c..f64e3984689f 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -98,4 +98,8 @@ config CXL_PORT default CXL_BUS tristate +config CXL_SUSPEND + def_bool y + depends on SUSPEND && CXL_MEM + endif diff --git a/drivers/cxl/Makefile b/drivers/cxl/Makefile index ce267ef11d93..a78270794150 100644 --- a/drivers/cxl/Makefile +++ b/drivers/cxl/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_CXL_BUS) += core/ +obj-y += core/ obj-$(CONFIG_CXL_PCI) += cxl_pci.o obj-$(CONFIG_CXL_MEM) += cxl_mem.o obj-$(CONFIG_CXL_ACPI) += cxl_acpi.o diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index d15a6aec0331..40286f5df812 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -275,6 +275,13 @@ static int add_root_nvdimm_bridge(struct device *match, void *data) return 1; } +static struct lock_class_key cxl_root_key; + +static void cxl_acpi_lock_reset_class(void *dev) +{ + device_lock_reset_class(dev); +} + static int cxl_acpi_probe(struct platform_device *pdev) { int rc; @@ -283,6 +290,12 @@ static int cxl_acpi_probe(struct platform_device *pdev) struct acpi_device *adev = ACPI_COMPANION(host); struct cxl_cfmws_context ctx; + device_lock_set_class(&pdev->dev, &cxl_root_key); + rc = devm_add_action_or_reset(&pdev->dev, cxl_acpi_lock_reset_class, + &pdev->dev); + if (rc) + return rc; + root_port = devm_cxl_add_port(host, host, CXL_RESOURCE_NONE, NULL); if (IS_ERR(root_port)) return PTR_ERR(root_port); diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 6d37cd78b151..9d35085d25af 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_CXL_BUS) += cxl_core.o +obj-$(CONFIG_CXL_SUSPEND) += suspend.o ccflags-y += -I$(srctree)/drivers/cxl cxl_core-y := port.o diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index be61a0d8016b..54f434733b56 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -35,6 +35,7 @@ static bool cxl_raw_allow_all; .flags = _flags, \ } +#define CXL_VARIABLE_PAYLOAD ~0U /* * This table defines the supported mailbox commands for the driver. This table * is made up of a UAPI structure. Non-negative values as parameters in the @@ -44,26 +45,26 @@ static bool cxl_raw_allow_all; static struct cxl_mem_command cxl_mem_commands[CXL_MEM_COMMAND_ID_MAX] = { CXL_CMD(IDENTIFY, 0, 0x43, CXL_CMD_FLAG_FORCE_ENABLE), #ifdef CONFIG_CXL_MEM_RAW_COMMANDS - CXL_CMD(RAW, ~0, ~0, 0), + CXL_CMD(RAW, CXL_VARIABLE_PAYLOAD, CXL_VARIABLE_PAYLOAD, 0), #endif - CXL_CMD(GET_SUPPORTED_LOGS, 0, ~0, CXL_CMD_FLAG_FORCE_ENABLE), + CXL_CMD(GET_SUPPORTED_LOGS, 0, CXL_VARIABLE_PAYLOAD, CXL_CMD_FLAG_FORCE_ENABLE), CXL_CMD(GET_FW_INFO, 0, 0x50, 0), CXL_CMD(GET_PARTITION_INFO, 0, 0x20, 0), - CXL_CMD(GET_LSA, 0x8, ~0, 0), + CXL_CMD(GET_LSA, 0x8, CXL_VARIABLE_PAYLOAD, 0), CXL_CMD(GET_HEALTH_INFO, 0, 0x12, 0), - CXL_CMD(GET_LOG, 0x18, ~0, CXL_CMD_FLAG_FORCE_ENABLE), + CXL_CMD(GET_LOG, 0x18, CXL_VARIABLE_PAYLOAD, CXL_CMD_FLAG_FORCE_ENABLE), CXL_CMD(SET_PARTITION_INFO, 0x0a, 0, 0), - CXL_CMD(SET_LSA, ~0, 0, 0), + CXL_CMD(SET_LSA, CXL_VARIABLE_PAYLOAD, 0, 0), CXL_CMD(GET_ALERT_CONFIG, 0, 0x10, 0), CXL_CMD(SET_ALERT_CONFIG, 0xc, 0, 0), CXL_CMD(GET_SHUTDOWN_STATE, 0, 0x1, 0), CXL_CMD(SET_SHUTDOWN_STATE, 0x1, 0, 0), - CXL_CMD(GET_POISON, 0x10, ~0, 0), + CXL_CMD(GET_POISON, 0x10, CXL_VARIABLE_PAYLOAD, 0), CXL_CMD(INJECT_POISON, 0x8, 0, 0), CXL_CMD(CLEAR_POISON, 0x48, 0, 0), CXL_CMD(GET_SCAN_MEDIA_CAPS, 0x10, 0x4, 0), CXL_CMD(SCAN_MEDIA, 0x11, 0, 0), - CXL_CMD(GET_SCAN_MEDIA, 0, ~0, 0), + CXL_CMD(GET_SCAN_MEDIA, 0, CXL_VARIABLE_PAYLOAD, 0), }; /* @@ -127,6 +128,17 @@ static struct cxl_mem_command *cxl_mem_find_command(u16 opcode) return NULL; } +static const char *cxl_mem_opcode_to_name(u16 opcode) +{ + struct cxl_mem_command *c; + + c = cxl_mem_find_command(opcode); + if (!c) + return NULL; + + return cxl_command_names[c->info.id].name; +} + /** * cxl_mbox_send_cmd() - Send a mailbox command to a device. * @cxlds: The device data for the operation @@ -136,7 +148,7 @@ static struct cxl_mem_command *cxl_mem_find_command(u16 opcode) * @out: Caller allocated buffer for the output. * @out_size: Expected size of output. * - * Context: Any context. Will acquire and release mbox_mutex. + * Context: Any context. * Return: * * %>=0 - Number of bytes returned in @out. * * %-E2BIG - Payload is too large for hardware. @@ -169,17 +181,17 @@ int cxl_mbox_send_cmd(struct cxl_dev_state *cxlds, u16 opcode, void *in, if (rc) return rc; - /* TODO: Map return code to proper kernel style errno */ - if (mbox_cmd.return_code != CXL_MBOX_SUCCESS) - return -ENXIO; + if (mbox_cmd.return_code != CXL_MBOX_CMD_RC_SUCCESS) + return cxl_mbox_cmd_rc2errno(&mbox_cmd); /* * Variable sized commands can't be validated and so it's up to the * caller to do that if they wish. */ - if (cmd->info.size_out >= 0 && mbox_cmd.size_out != out_size) - return -EIO; - + if (cmd->info.size_out != CXL_VARIABLE_PAYLOAD) { + if (mbox_cmd.size_out != out_size) + return -EIO; + } return 0; } EXPORT_SYMBOL_NS_GPL(cxl_mbox_send_cmd, CXL); @@ -208,75 +220,122 @@ static bool cxl_mem_raw_command_allowed(u16 opcode) } /** - * cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND. - * @cxlds: The device data for the operation - * @send_cmd: &struct cxl_send_command copied in from userspace. - * @out_cmd: Sanitized and populated &struct cxl_mem_command. + * cxl_payload_from_user_allowed() - Check contents of in_payload. + * @opcode: The mailbox command opcode. + * @payload_in: Pointer to the input payload passed in from user space. * * Return: - * * %0 - @out_cmd is ready to send. - * * %-ENOTTY - Invalid command specified. - * * %-EINVAL - Reserved fields or invalid values were used. - * * %-ENOMEM - Input or output buffer wasn't sized properly. - * * %-EPERM - Attempted to use a protected command. - * * %-EBUSY - Kernel has claimed exclusive access to this opcode + * * true - payload_in passes check for @opcode. + * * false - payload_in contains invalid or unsupported values. * - * The result of this command is a fully validated command in @out_cmd that is - * safe to send to the hardware. + * The driver may inspect payload contents before sending a mailbox + * command from user space to the device. The intent is to reject + * commands with input payloads that are known to be unsafe. This + * check is not intended to replace the users careful selection of + * mailbox command parameters and makes no guarantee that the user + * command will succeed, nor that it is appropriate. * - * See handle_mailbox_cmd_from_user() + * The specific checks are determined by the opcode. */ -static int cxl_validate_cmd_from_user(struct cxl_dev_state *cxlds, - const struct cxl_send_command *send_cmd, - struct cxl_mem_command *out_cmd) +static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in) { - const struct cxl_command_info *info; - struct cxl_mem_command *c; + switch (opcode) { + case CXL_MBOX_OP_SET_PARTITION_INFO: { + struct cxl_mbox_set_partition_info *pi = payload_in; - if (send_cmd->id == 0 || send_cmd->id >= CXL_MEM_COMMAND_ID_MAX) - return -ENOTTY; + if (pi->flags & CXL_SET_PARTITION_IMMEDIATE_FLAG) + return false; + break; + } + default: + break; + } + return true; +} - /* - * The user can never specify an input payload larger than what hardware - * supports, but output can be arbitrarily large (simply write out as - * much data as the hardware provides). - */ - if (send_cmd->in.size > cxlds->payload_size) +static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox, + struct cxl_dev_state *cxlds, u16 opcode, + size_t in_size, size_t out_size, u64 in_payload) +{ + *mbox = (struct cxl_mbox_cmd) { + .opcode = opcode, + .size_in = in_size, + }; + + if (in_size) { + mbox->payload_in = vmemdup_user(u64_to_user_ptr(in_payload), + in_size); + if (IS_ERR(mbox->payload_in)) + return PTR_ERR(mbox->payload_in); + + if (!cxl_payload_from_user_allowed(opcode, mbox->payload_in)) { + dev_dbg(cxlds->dev, "%s: input payload not allowed\n", + cxl_mem_opcode_to_name(opcode)); + kvfree(mbox->payload_in); + return -EBUSY; + } + } + + /* Prepare to handle a full payload for variable sized output */ + if (out_size == CXL_VARIABLE_PAYLOAD) + mbox->size_out = cxlds->payload_size; + else + mbox->size_out = out_size; + + if (mbox->size_out) { + mbox->payload_out = kvzalloc(mbox->size_out, GFP_KERNEL); + if (!mbox->payload_out) { + kvfree(mbox->payload_in); + return -ENOMEM; + } + } + return 0; +} + +static void cxl_mbox_cmd_dtor(struct cxl_mbox_cmd *mbox) +{ + kvfree(mbox->payload_in); + kvfree(mbox->payload_out); +} + +static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd, + const struct cxl_send_command *send_cmd, + struct cxl_dev_state *cxlds) +{ + if (send_cmd->raw.rsvd) return -EINVAL; /* - * Checks are bypassed for raw commands but a WARN/taint will occur - * later in the callchain + * Unlike supported commands, the output size of RAW commands + * gets passed along without further checking, so it must be + * validated here. */ - if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW) { - const struct cxl_mem_command temp = { - .info = { - .id = CXL_MEM_COMMAND_ID_RAW, - .flags = 0, - .size_in = send_cmd->in.size, - .size_out = send_cmd->out.size, - }, - .opcode = send_cmd->raw.opcode - }; + if (send_cmd->out.size > cxlds->payload_size) + return -EINVAL; - if (send_cmd->raw.rsvd) - return -EINVAL; + if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode)) + return -EPERM; - /* - * Unlike supported commands, the output size of RAW commands - * gets passed along without further checking, so it must be - * validated here. - */ - if (send_cmd->out.size > cxlds->payload_size) - return -EINVAL; + dev_WARN_ONCE(cxlds->dev, true, "raw command path used\n"); - if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode)) - return -EPERM; + *mem_cmd = (struct cxl_mem_command) { + .info = { + .id = CXL_MEM_COMMAND_ID_RAW, + .size_in = send_cmd->in.size, + .size_out = send_cmd->out.size, + }, + .opcode = send_cmd->raw.opcode + }; - memcpy(out_cmd, &temp, sizeof(temp)); + return 0; +} - return 0; - } +static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, + const struct cxl_send_command *send_cmd, + struct cxl_dev_state *cxlds) +{ + struct cxl_mem_command *c = &cxl_mem_commands[send_cmd->id]; + const struct cxl_command_info *info = &c->info; if (send_cmd->flags & ~CXL_MEM_COMMAND_FLAG_MASK) return -EINVAL; @@ -287,10 +346,6 @@ static int cxl_validate_cmd_from_user(struct cxl_dev_state *cxlds, if (send_cmd->in.rsvd || send_cmd->out.rsvd) return -EINVAL; - /* Convert user's command into the internal representation */ - c = &cxl_mem_commands[send_cmd->id]; - info = &c->info; - /* Check that the command is enabled for hardware */ if (!test_bit(info->id, cxlds->enabled_cmds)) return -ENOTTY; @@ -300,22 +355,74 @@ static int cxl_validate_cmd_from_user(struct cxl_dev_state *cxlds, return -EBUSY; /* Check the input buffer is the expected size */ - if (info->size_in >= 0 && info->size_in != send_cmd->in.size) + if (info->size_in != send_cmd->in.size) return -ENOMEM; /* Check the output buffer is at least large enough */ - if (info->size_out >= 0 && send_cmd->out.size < info->size_out) + if (send_cmd->out.size < info->size_out) return -ENOMEM; - memcpy(out_cmd, c, sizeof(*c)); - out_cmd->info.size_in = send_cmd->in.size; + *mem_cmd = (struct cxl_mem_command) { + .info = { + .id = info->id, + .flags = info->flags, + .size_in = send_cmd->in.size, + .size_out = send_cmd->out.size, + }, + .opcode = c->opcode + }; + + return 0; +} + +/** + * cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND. + * @mbox_cmd: Sanitized and populated &struct cxl_mbox_cmd. + * @cxlds: The device data for the operation + * @send_cmd: &struct cxl_send_command copied in from userspace. + * + * Return: + * * %0 - @out_cmd is ready to send. + * * %-ENOTTY - Invalid command specified. + * * %-EINVAL - Reserved fields or invalid values were used. + * * %-ENOMEM - Input or output buffer wasn't sized properly. + * * %-EPERM - Attempted to use a protected command. + * * %-EBUSY - Kernel has claimed exclusive access to this opcode + * + * The result of this command is a fully validated command in @mbox_cmd that is + * safe to send to the hardware. + */ +static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd, + struct cxl_dev_state *cxlds, + const struct cxl_send_command *send_cmd) +{ + struct cxl_mem_command mem_cmd; + int rc; + + if (send_cmd->id == 0 || send_cmd->id >= CXL_MEM_COMMAND_ID_MAX) + return -ENOTTY; + /* - * XXX: out_cmd->info.size_out will be controlled by the driver, and the - * specified number of bytes @send_cmd->out.size will be copied back out - * to userspace. + * The user can never specify an input payload larger than what hardware + * supports, but output can be arbitrarily large (simply write out as + * much data as the hardware provides). */ + if (send_cmd->in.size > cxlds->payload_size) + return -EINVAL; - return 0; + /* Sanitize and construct a cxl_mem_command */ + if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW) + rc = cxl_to_mem_cmd_raw(&mem_cmd, send_cmd, cxlds); + else + rc = cxl_to_mem_cmd(&mem_cmd, send_cmd, cxlds); + + if (rc) + return rc; + + /* Sanitize and construct a cxl_mbox_cmd */ + return cxl_mbox_cmd_ctor(mbox_cmd, cxlds, mem_cmd.opcode, + mem_cmd.info.size_in, mem_cmd.info.size_out, + send_cmd->in.payload); } int cxl_query_cmd(struct cxl_memdev *cxlmd, @@ -355,8 +462,7 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd, /** * handle_mailbox_cmd_from_user() - Dispatch a mailbox command for userspace. * @cxlds: The device data for the operation - * @cmd: The validated command. - * @in_payload: Pointer to userspace's input payload. + * @mbox_cmd: The validated mailbox command. * @out_payload: Pointer to userspace's output payload. * @size_out: (Input) Max payload size to copy out. * (Output) Payload size hardware generated. @@ -371,51 +477,27 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd, * * %-EINTR - Mailbox acquisition interrupted. * * %-EXXX - Transaction level failures. * - * Creates the appropriate mailbox command and dispatches it on behalf of a - * userspace request. The input and output payloads are copied between - * userspace. + * Dispatches a mailbox command on behalf of a userspace request. + * The output payload is copied to userspace. * * See cxl_send_cmd(). */ static int handle_mailbox_cmd_from_user(struct cxl_dev_state *cxlds, - const struct cxl_mem_command *cmd, - u64 in_payload, u64 out_payload, - s32 *size_out, u32 *retval) + struct cxl_mbox_cmd *mbox_cmd, + u64 out_payload, s32 *size_out, + u32 *retval) { struct device *dev = cxlds->dev; - struct cxl_mbox_cmd mbox_cmd = { - .opcode = cmd->opcode, - .size_in = cmd->info.size_in, - .size_out = cmd->info.size_out, - }; int rc; - if (cmd->info.size_out) { - mbox_cmd.payload_out = kvzalloc(cmd->info.size_out, GFP_KERNEL); - if (!mbox_cmd.payload_out) - return -ENOMEM; - } - - if (cmd->info.size_in) { - mbox_cmd.payload_in = vmemdup_user(u64_to_user_ptr(in_payload), - cmd->info.size_in); - if (IS_ERR(mbox_cmd.payload_in)) { - kvfree(mbox_cmd.payload_out); - return PTR_ERR(mbox_cmd.payload_in); - } - } - dev_dbg(dev, "Submitting %s command for user\n" "\topcode: %x\n" - "\tsize: %ub\n", - cxl_command_names[cmd->info.id].name, mbox_cmd.opcode, - cmd->info.size_in); - - dev_WARN_ONCE(dev, cmd->info.id == CXL_MEM_COMMAND_ID_RAW, - "raw command path used\n"); + "\tsize: %zx\n", + cxl_mem_opcode_to_name(mbox_cmd->opcode), + mbox_cmd->opcode, mbox_cmd->size_in); - rc = cxlds->mbox_send(cxlds, &mbox_cmd); + rc = cxlds->mbox_send(cxlds, mbox_cmd); if (rc) goto out; @@ -424,22 +506,21 @@ static int handle_mailbox_cmd_from_user(struct cxl_dev_state *cxlds, * to userspace. While the payload may have written more output than * this it will have to be ignored. */ - if (mbox_cmd.size_out) { - dev_WARN_ONCE(dev, mbox_cmd.size_out > *size_out, + if (mbox_cmd->size_out) { + dev_WARN_ONCE(dev, mbox_cmd->size_out > *size_out, "Invalid return size\n"); if (copy_to_user(u64_to_user_ptr(out_payload), - mbox_cmd.payload_out, mbox_cmd.size_out)) { + mbox_cmd->payload_out, mbox_cmd->size_out)) { rc = -EFAULT; goto out; } } - *size_out = mbox_cmd.size_out; - *retval = mbox_cmd.return_code; + *size_out = mbox_cmd->size_out; + *retval = mbox_cmd->return_code; out: - kvfree(mbox_cmd.payload_in); - kvfree(mbox_cmd.payload_out); + cxl_mbox_cmd_dtor(mbox_cmd); return rc; } @@ -448,7 +529,7 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s) struct cxl_dev_state *cxlds = cxlmd->cxlds; struct device *dev = &cxlmd->dev; struct cxl_send_command send; - struct cxl_mem_command c; + struct cxl_mbox_cmd mbox_cmd; int rc; dev_dbg(dev, "Send IOCTL\n"); @@ -456,17 +537,12 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s) if (copy_from_user(&send, s, sizeof(send))) return -EFAULT; - rc = cxl_validate_cmd_from_user(cxlmd->cxlds, &send, &c); + rc = cxl_validate_cmd_from_user(&mbox_cmd, cxlmd->cxlds, &send); if (rc) return rc; - /* Prepare to handle a full payload for variable sized output */ - if (c.info.size_out < 0) - c.info.size_out = cxlds->payload_size; - - rc = handle_mailbox_cmd_from_user(cxlds, &c, send.in.payload, - send.out.payload, &send.out.size, - &send.retval); + rc = handle_mailbox_cmd_from_user(cxlds, &mbox_cmd, send.out.payload, + &send.out.size, &send.retval); if (rc) return rc; diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 1f76b28f9826..f7cdcd33504a 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -228,6 +228,8 @@ static void detach_memdev(struct work_struct *work) put_device(&cxlmd->dev); } +static struct lock_class_key cxl_memdev_key; + static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, const struct file_operations *fops) { @@ -247,6 +249,7 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, dev = &cxlmd->dev; device_initialize(dev); + lockdep_set_class(&dev->mutex, &cxl_memdev_key); dev->parent = cxlds->dev; dev->bus = &cxl_bus_type; dev->devt = MKDEV(cxl_mem_major, cxlmd->id); diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index c9a494d6976a..c4c99ff7b55e 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -1,8 +1,11 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2021 Intel Corporation. All rights reserved. */ +#include <linux/io-64-nonatomic-lo-hi.h> #include <linux/device.h> +#include <linux/delay.h> #include <linux/pci.h> #include <cxlpci.h> +#include <cxlmem.h> #include <cxl.h> #include "core.h" @@ -13,6 +16,10 @@ * a set of helpers for CXL interactions which occur via PCIe. */ +static unsigned short media_ready_timeout = 60; +module_param(media_ready_timeout, ushort, 0644); +MODULE_PARM_DESC(media_ready_timeout, "seconds to wait for media ready"); + struct cxl_walk_context { struct pci_bus *bus; struct cxl_port *port; @@ -94,3 +101,360 @@ int devm_cxl_port_enumerate_dports(struct cxl_port *port) return ctx.count; } EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, CXL); + +/* + * Wait up to @media_ready_timeout for the device to report memory + * active. + */ +int cxl_await_media_ready(struct cxl_dev_state *cxlds) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int d = cxlds->cxl_dvsec; + bool active = false; + u64 md_status; + int rc, i; + + for (i = media_ready_timeout; i; i--) { + u32 temp; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp); + if (rc) + return rc; + + active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp); + if (active) + break; + msleep(1000); + } + + if (!active) { + dev_err(&pdev->dev, + "timeout awaiting memory active after %d seconds\n", + media_ready_timeout); + return -ETIMEDOUT; + } + + md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); + if (!CXLMDEV_READY(md_status)) + return -EIO; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_await_media_ready, CXL); + +static int wait_for_valid(struct cxl_dev_state *cxlds) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int d = cxlds->cxl_dvsec, rc; + u32 val; + + /* + * Memory_Info_Valid: When set, indicates that the CXL Range 1 Size high + * and Size Low registers are valid. Must be set within 1 second of + * deassertion of reset to CXL device. Likely it is already set by the + * time this runs, but otherwise give a 1.5 second timeout in case of + * clock skew. + */ + rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val); + if (rc) + return rc; + + if (val & CXL_DVSEC_MEM_INFO_VALID) + return 0; + + msleep(1500); + + rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val); + if (rc) + return rc; + + if (val & CXL_DVSEC_MEM_INFO_VALID) + return 0; + + return -ETIMEDOUT; +} + +static int cxl_set_mem_enable(struct cxl_dev_state *cxlds, u16 val) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int d = cxlds->cxl_dvsec; + u16 ctrl; + int rc; + + rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); + if (rc < 0) + return rc; + + if ((ctrl & CXL_DVSEC_MEM_ENABLE) == val) + return 1; + ctrl &= ~CXL_DVSEC_MEM_ENABLE; + ctrl |= val; + + rc = pci_write_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, ctrl); + if (rc < 0) + return rc; + + return 0; +} + +static void clear_mem_enable(void *cxlds) +{ + cxl_set_mem_enable(cxlds, 0); +} + +static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds) +{ + int rc; + + rc = cxl_set_mem_enable(cxlds, CXL_DVSEC_MEM_ENABLE); + if (rc < 0) + return rc; + if (rc > 0) + return 0; + return devm_add_action_or_reset(host, clear_mem_enable, cxlds); +} + +static bool range_contains(struct range *r1, struct range *r2) +{ + return r1->start <= r2->start && r1->end >= r2->end; +} + +/* require dvsec ranges to be covered by a locked platform window */ +static int dvsec_range_allowed(struct device *dev, void *arg) +{ + struct range *dev_range = arg; + struct cxl_decoder *cxld; + struct range root_range; + + if (!is_root_decoder(dev)) + return 0; + + cxld = to_cxl_decoder(dev); + + if (!(cxld->flags & CXL_DECODER_F_LOCK)) + return 0; + if (!(cxld->flags & CXL_DECODER_F_RAM)) + return 0; + + root_range = (struct range) { + .start = cxld->platform_res.start, + .end = cxld->platform_res.end, + }; + + return range_contains(&root_range, dev_range); +} + +static void disable_hdm(void *_cxlhdm) +{ + u32 global_ctrl; + struct cxl_hdm *cxlhdm = _cxlhdm; + void __iomem *hdm = cxlhdm->regs.hdm_decoder; + + global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET); + writel(global_ctrl & ~CXL_HDM_DECODER_ENABLE, + hdm + CXL_HDM_DECODER_CTRL_OFFSET); +} + +static int devm_cxl_enable_hdm(struct device *host, struct cxl_hdm *cxlhdm) +{ + void __iomem *hdm = cxlhdm->regs.hdm_decoder; + u32 global_ctrl; + + global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET); + writel(global_ctrl | CXL_HDM_DECODER_ENABLE, + hdm + CXL_HDM_DECODER_CTRL_OFFSET); + + return devm_add_action_or_reset(host, disable_hdm, cxlhdm); +} + +static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds, + struct cxl_hdm *cxlhdm, + struct cxl_endpoint_dvsec_info *info) +{ + void __iomem *hdm = cxlhdm->regs.hdm_decoder; + struct cxl_port *port = cxlhdm->port; + struct device *dev = cxlds->dev; + struct cxl_port *root; + int i, rc, allowed; + u32 global_ctrl; + + global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET); + + /* + * If the HDM Decoder Capability is already enabled then assume + * that some other agent like platform firmware set it up. + */ + if (global_ctrl & CXL_HDM_DECODER_ENABLE) { + rc = devm_cxl_enable_mem(&port->dev, cxlds); + if (rc) + return false; + return true; + } + + root = to_cxl_port(port->dev.parent); + while (!is_cxl_root(root) && is_cxl_port(root->dev.parent)) + root = to_cxl_port(root->dev.parent); + if (!is_cxl_root(root)) { + dev_err(dev, "Failed to acquire root port for HDM enable\n"); + return false; + } + + for (i = 0, allowed = 0; info->mem_enabled && i < info->ranges; i++) { + struct device *cxld_dev; + + cxld_dev = device_find_child(&root->dev, &info->dvsec_range[i], + dvsec_range_allowed); + if (!cxld_dev) { + dev_dbg(dev, "DVSEC Range%d denied by platform\n", i); + continue; + } + dev_dbg(dev, "DVSEC Range%d allowed by platform\n", i); + put_device(cxld_dev); + allowed++; + } + + if (!allowed) { + cxl_set_mem_enable(cxlds, 0); + info->mem_enabled = 0; + } + + /* + * Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base + * [High,Low] when HDM operation is enabled the range register values + * are ignored by the device, but the spec also recommends matching the + * DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges + * are expected even though Linux does not require or maintain that + * match. If at least one DVSEC range is enabled and allowed, skip HDM + * Decoder Capability Enable. + */ + if (info->mem_enabled) + return false; + + rc = devm_cxl_enable_hdm(&port->dev, cxlhdm); + if (rc) + return false; + + rc = devm_cxl_enable_mem(&port->dev, cxlds); + if (rc) + return false; + + return true; +} + +/** + * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint + * @cxlds: Device state + * @cxlhdm: Mapped HDM decoder Capability + * + * Try to enable the endpoint's HDM Decoder Capability + */ +int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + struct cxl_endpoint_dvsec_info info = { 0 }; + int hdm_count, rc, i, ranges = 0; + struct device *dev = &pdev->dev; + int d = cxlds->cxl_dvsec; + u16 cap, ctrl; + + if (!d) { + dev_dbg(dev, "No DVSEC Capability\n"); + return -ENXIO; + } + + rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap); + if (rc) + return rc; + + rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); + if (rc) + return rc; + + if (!(cap & CXL_DVSEC_MEM_CAPABLE)) { + dev_dbg(dev, "Not MEM Capable\n"); + return -ENXIO; + } + + /* + * It is not allowed by spec for MEM.capable to be set and have 0 legacy + * HDM decoders (values > 2 are also undefined as of CXL 2.0). As this + * driver is for a spec defined class code which must be CXL.mem + * capable, there is no point in continuing to enable CXL.mem. + */ + hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); + if (!hdm_count || hdm_count > 2) + return -EINVAL; + + rc = wait_for_valid(cxlds); + if (rc) { + dev_dbg(dev, "Failure awaiting MEM_INFO_VALID (%d)\n", rc); + return rc; + } + + /* + * The current DVSEC values are moot if the memory capability is + * disabled, and they will remain moot after the HDM Decoder + * capability is enabled. + */ + info.mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); + if (!info.mem_enabled) + goto hdm_init; + + for (i = 0; i < hdm_count; i++) { + u64 base, size; + u32 temp; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp); + if (rc) + return rc; + + size = (u64)temp << 32; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp); + if (rc) + return rc; + + size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp); + if (rc) + return rc; + + base = (u64)temp << 32; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp); + if (rc) + return rc; + + base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK; + + info.dvsec_range[i] = (struct range) { + .start = base, + .end = base + size - 1 + }; + + if (size) + ranges++; + } + + info.ranges = ranges; + + /* + * If DVSEC ranges are being used instead of HDM decoder registers there + * is no use in trying to manage those. + */ +hdm_init: + if (!__cxl_hdm_decode_init(cxlds, cxlhdm, &info)) { + dev_err(dev, + "Legacy range registers configuration prevents HDM operation.\n"); + return -EBUSY; + } + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, CXL); diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index 8de240c4d96b..bec7cfb54ebf 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -80,6 +80,8 @@ struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd) } EXPORT_SYMBOL_NS_GPL(cxl_find_nvdimm_bridge, CXL); +static struct lock_class_key cxl_nvdimm_bridge_key; + static struct cxl_nvdimm_bridge *cxl_nvdimm_bridge_alloc(struct cxl_port *port) { struct cxl_nvdimm_bridge *cxl_nvb; @@ -99,6 +101,7 @@ static struct cxl_nvdimm_bridge *cxl_nvdimm_bridge_alloc(struct cxl_port *port) cxl_nvb->port = port; cxl_nvb->state = CXL_NVB_NEW; device_initialize(dev); + lockdep_set_class(&dev->mutex, &cxl_nvdimm_bridge_key); device_set_pm_not_required(dev); dev->parent = &port->dev; dev->bus = &cxl_bus_type; @@ -121,10 +124,10 @@ static void unregister_nvb(void *_cxl_nvb) * work to flush. Once the state has been changed to 'dead' then no new * work can be queued by user-triggered bind. */ - cxl_device_lock(&cxl_nvb->dev); + device_lock(&cxl_nvb->dev); flush = cxl_nvb->state != CXL_NVB_NEW; cxl_nvb->state = CXL_NVB_DEAD; - cxl_device_unlock(&cxl_nvb->dev); + device_unlock(&cxl_nvb->dev); /* * Even though the device core will trigger device_release_driver() @@ -214,6 +217,8 @@ struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev) } EXPORT_SYMBOL_NS_GPL(to_cxl_nvdimm, CXL); +static struct lock_class_key cxl_nvdimm_key; + static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd) { struct cxl_nvdimm *cxl_nvd; @@ -226,6 +231,7 @@ static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd) dev = &cxl_nvd->dev; cxl_nvd->cxlmd = cxlmd; device_initialize(dev); + lockdep_set_class(&dev->mutex, &cxl_nvdimm_key); device_set_pm_not_required(dev); dev->parent = &cxlmd->dev; dev->bus = &cxl_bus_type; diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 2ab1ba4499b3..ea60abda6500 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -312,10 +312,10 @@ static void cxl_port_release(struct device *dev) struct cxl_port *port = to_cxl_port(dev); struct cxl_ep *ep, *_e; - cxl_device_lock(dev); + device_lock(dev); list_for_each_entry_safe(ep, _e, &port->endpoints, list) cxl_ep_release(ep); - cxl_device_unlock(dev); + device_unlock(dev); ida_free(&cxl_port_ida, port->id); kfree(port); } @@ -391,6 +391,8 @@ static int devm_cxl_link_uport(struct device *host, struct cxl_port *port) return devm_add_action_or_reset(host, cxl_unlink_uport, port); } +static struct lock_class_key cxl_port_key; + static struct cxl_port *cxl_port_alloc(struct device *uport, resource_size_t component_reg_phys, struct cxl_port *parent_port) @@ -415,9 +417,10 @@ static struct cxl_port *cxl_port_alloc(struct device *uport, * description. */ dev = &port->dev; - if (parent_port) + if (parent_port) { dev->parent = &parent_port->dev; - else + port->depth = parent_port->depth + 1; + } else dev->parent = uport; port->uport = uport; @@ -427,6 +430,7 @@ static struct cxl_port *cxl_port_alloc(struct device *uport, INIT_LIST_HEAD(&port->endpoints); device_initialize(dev); + lockdep_set_class_and_subclass(&dev->mutex, &cxl_port_key, port->depth); device_set_pm_not_required(dev); dev->bus = &cxl_bus_type; dev->type = &cxl_port_type; @@ -457,8 +461,6 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, if (IS_ERR(port)) return port; - if (parent_port) - port->depth = parent_port->depth + 1; dev = &port->dev; if (is_cxl_memdev(uport)) rc = dev_set_name(dev, "endpoint%d", port->id); @@ -554,7 +556,7 @@ static int match_root_child(struct device *dev, const void *match) return 0; port = to_cxl_port(dev); - cxl_device_lock(dev); + device_lock(dev); list_for_each_entry(dport, &port->dports, list) { iter = match; while (iter) { @@ -564,7 +566,7 @@ static int match_root_child(struct device *dev, const void *match) } } out: - cxl_device_unlock(dev); + device_unlock(dev); return !!iter; } @@ -623,13 +625,13 @@ static int add_dport(struct cxl_port *port, struct cxl_dport *new) static void cond_cxl_root_lock(struct cxl_port *port) { if (is_cxl_root(port)) - cxl_device_lock(&port->dev); + device_lock(&port->dev); } static void cond_cxl_root_unlock(struct cxl_port *port) { if (is_cxl_root(port)) - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); } static void cxl_dport_remove(void *data) @@ -736,15 +738,15 @@ static int add_ep(struct cxl_port *port, struct cxl_ep *new) { struct cxl_ep *dup; - cxl_device_lock(&port->dev); + device_lock(&port->dev); if (port->dead) { - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); return -ENXIO; } dup = find_ep(port, new->ep); if (!dup) list_add_tail(&new->list, &port->endpoints); - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); return dup ? -EEXIST : 0; } @@ -854,12 +856,12 @@ static void delete_endpoint(void *data) goto out; parent = &parent_port->dev; - cxl_device_lock(parent); + device_lock(parent); if (parent->driver && endpoint->uport) { devm_release_action(parent, cxl_unlink_uport, endpoint); devm_release_action(parent, unregister_port, endpoint); } - cxl_device_unlock(parent); + device_unlock(parent); put_device(parent); out: put_device(&endpoint->dev); @@ -920,7 +922,7 @@ static void cxl_detach_ep(void *data) } parent_port = to_cxl_port(port->dev.parent); - cxl_device_lock(&parent_port->dev); + device_lock(&parent_port->dev); if (!parent_port->dev.driver) { /* * The bottom-up race to delete the port lost to a @@ -928,12 +930,12 @@ static void cxl_detach_ep(void *data) * parent_port ->remove() will have cleaned up all * descendants. */ - cxl_device_unlock(&parent_port->dev); + device_unlock(&parent_port->dev); put_device(&port->dev); continue; } - cxl_device_lock(&port->dev); + device_lock(&port->dev); ep = find_ep(port, &cxlmd->dev); dev_dbg(&cxlmd->dev, "disconnect %s from %s\n", ep ? dev_name(ep->ep) : "", dev_name(&port->dev)); @@ -948,7 +950,7 @@ static void cxl_detach_ep(void *data) port->dead = true; list_splice_init(&port->dports, &reap_dports); } - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); if (!list_empty(&reap_dports)) { dev_dbg(&cxlmd->dev, "delete %s\n", @@ -956,7 +958,7 @@ static void cxl_detach_ep(void *data) delete_switch_port(port, &reap_dports); } put_device(&port->dev); - cxl_device_unlock(&parent_port->dev); + device_unlock(&parent_port->dev); } } @@ -1004,7 +1006,7 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd, return -EAGAIN; } - cxl_device_lock(&parent_port->dev); + device_lock(&parent_port->dev); if (!parent_port->dev.driver) { dev_warn(&cxlmd->dev, "port %s:%s disabled, failed to enumerate CXL.mem\n", @@ -1022,7 +1024,7 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd, get_device(&port->dev); } out: - cxl_device_unlock(&parent_port->dev); + device_unlock(&parent_port->dev); if (IS_ERR(port)) rc = PTR_ERR(port); @@ -1133,14 +1135,14 @@ struct cxl_dport *cxl_find_dport_by_dev(struct cxl_port *port, { struct cxl_dport *dport; - cxl_device_lock(&port->dev); + device_lock(&port->dev); list_for_each_entry(dport, &port->dports, list) if (dport->dport == dev) { - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); return dport; } - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); return NULL; } EXPORT_SYMBOL_NS_GPL(cxl_find_dport_by_dev, CXL); @@ -1173,6 +1175,8 @@ static int decoder_populate_targets(struct cxl_decoder *cxld, return rc; } +static struct lock_class_key cxl_decoder_key; + /** * cxl_decoder_alloc - Allocate a new CXL decoder * @port: owning port of this decoder @@ -1214,6 +1218,7 @@ static struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, seqlock_init(&cxld->target_lock); dev = &cxld->dev; device_initialize(dev); + lockdep_set_class(&dev->mutex, &cxl_decoder_key); device_set_pm_not_required(dev); dev->parent = &port->dev; dev->bus = &cxl_bus_type; @@ -1379,9 +1384,9 @@ int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map) port = to_cxl_port(cxld->dev.parent); - cxl_device_lock(&port->dev); + device_lock(&port->dev); rc = cxl_decoder_add_locked(cxld, target_map); - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); return rc; } @@ -1452,14 +1457,7 @@ static int cxl_bus_probe(struct device *dev) { int rc; - /* - * Take the CXL nested lock since the driver core only holds - * @dev->mutex and not @dev->lockdep_mutex. - */ - cxl_nested_lock(dev); rc = to_cxl_drv(dev->driver)->probe(dev); - cxl_nested_unlock(dev); - dev_dbg(dev, "probe: %d\n", rc); return rc; } @@ -1468,10 +1466,8 @@ static void cxl_bus_remove(struct device *dev) { struct cxl_driver *cxl_drv = to_cxl_drv(dev->driver); - cxl_nested_lock(dev); if (cxl_drv->remove) cxl_drv->remove(dev); - cxl_nested_unlock(dev); } static struct workqueue_struct *cxl_bus_wq; diff --git a/drivers/cxl/core/suspend.c b/drivers/cxl/core/suspend.c new file mode 100644 index 000000000000..a5984d96ea1d --- /dev/null +++ b/drivers/cxl/core/suspend.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ +#include <linux/atomic.h> +#include <linux/export.h> +#include "cxlmem.h" + +static atomic_t mem_active; + +bool cxl_mem_active(void) +{ + return atomic_read(&mem_active) != 0; +} + +void cxl_mem_active_inc(void) +{ + atomic_inc(&mem_active); +} +EXPORT_SYMBOL_NS_GPL(cxl_mem_active_inc, CXL); + +void cxl_mem_active_dec(void) +{ + atomic_dec(&mem_active); +} +EXPORT_SYMBOL_NS_GPL(cxl_mem_active_dec, CXL); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 990b6670222e..140dc3278cde 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -405,82 +405,4 @@ struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd); #define __mock static #endif -#ifdef CONFIG_PROVE_CXL_LOCKING -enum cxl_lock_class { - CXL_ANON_LOCK, - CXL_NVDIMM_LOCK, - CXL_NVDIMM_BRIDGE_LOCK, - CXL_PORT_LOCK, - /* - * Be careful to add new lock classes here, CXL_PORT_LOCK is - * extended by the port depth, so a maximum CXL port topology - * depth would need to be defined first. - */ -}; - -static inline void cxl_nested_lock(struct device *dev) -{ - if (is_cxl_port(dev)) { - struct cxl_port *port = to_cxl_port(dev); - - mutex_lock_nested(&dev->lockdep_mutex, - CXL_PORT_LOCK + port->depth); - } else if (is_cxl_decoder(dev)) { - struct cxl_port *port = to_cxl_port(dev->parent); - - /* - * A decoder is the immediate child of a port, so set - * its lock class equal to other child device siblings. - */ - mutex_lock_nested(&dev->lockdep_mutex, - CXL_PORT_LOCK + port->depth + 1); - } else if (is_cxl_nvdimm_bridge(dev)) - mutex_lock_nested(&dev->lockdep_mutex, CXL_NVDIMM_BRIDGE_LOCK); - else if (is_cxl_nvdimm(dev)) - mutex_lock_nested(&dev->lockdep_mutex, CXL_NVDIMM_LOCK); - else - mutex_lock_nested(&dev->lockdep_mutex, CXL_ANON_LOCK); -} - -static inline void cxl_nested_unlock(struct device *dev) -{ - mutex_unlock(&dev->lockdep_mutex); -} - -static inline void cxl_device_lock(struct device *dev) -{ - /* - * For double lock errors the lockup will happen before lockdep - * warns at cxl_nested_lock(), so assert explicitly. - */ - lockdep_assert_not_held(&dev->lockdep_mutex); - - device_lock(dev); - cxl_nested_lock(dev); -} - -static inline void cxl_device_unlock(struct device *dev) -{ - cxl_nested_unlock(dev); - device_unlock(dev); -} -#else -static inline void cxl_nested_lock(struct device *dev) -{ -} - -static inline void cxl_nested_unlock(struct device *dev) -{ -} - -static inline void cxl_device_lock(struct device *dev) -{ - device_lock(dev); -} - -static inline void cxl_device_unlock(struct device *dev) -{ - device_unlock(dev); -} -#endif #endif /* __CXL_H__ */ diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 5d33ce24fe09..60d10ee1e7fc 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -85,10 +85,61 @@ struct cxl_mbox_cmd { size_t size_in; size_t size_out; u16 return_code; -#define CXL_MBOX_SUCCESS 0 }; /* + * Per CXL 2.0 Section 8.2.8.4.5.1 + */ +#define CMD_CMD_RC_TABLE \ + C(SUCCESS, 0, NULL), \ + C(BACKGROUND, -ENXIO, "background cmd started successfully"), \ + C(INPUT, -ENXIO, "cmd input was invalid"), \ + C(UNSUPPORTED, -ENXIO, "cmd is not supported"), \ + C(INTERNAL, -ENXIO, "internal device error"), \ + C(RETRY, -ENXIO, "temporary error, retry once"), \ + C(BUSY, -ENXIO, "ongoing background operation"), \ + C(MEDIADISABLED, -ENXIO, "media access is disabled"), \ + C(FWINPROGRESS, -ENXIO, "one FW package can be transferred at a time"), \ + C(FWOOO, -ENXIO, "FW package content was transferred out of order"), \ + C(FWAUTH, -ENXIO, "FW package authentication failed"), \ + C(FWSLOT, -ENXIO, "FW slot is not supported for requested operation"), \ + C(FWROLLBACK, -ENXIO, "rolled back to the previous active FW"), \ + C(FWRESET, -ENXIO, "FW failed to activate, needs cold reset"), \ + C(HANDLE, -ENXIO, "one or more Event Record Handles were invalid"), \ + C(PADDR, -ENXIO, "physical address specified is invalid"), \ + C(POISONLMT, -ENXIO, "poison injection limit has been reached"), \ + C(MEDIAFAILURE, -ENXIO, "permanent issue with the media"), \ + C(ABORT, -ENXIO, "background cmd was aborted by device"), \ + C(SECURITY, -ENXIO, "not valid in the current security state"), \ + C(PASSPHRASE, -ENXIO, "phrase doesn't match current set passphrase"), \ + C(MBUNSUPPORTED, -ENXIO, "unsupported on the mailbox it was issued on"),\ + C(PAYLOADLEN, -ENXIO, "invalid payload length") + +#undef C +#define C(a, b, c) CXL_MBOX_CMD_RC_##a +enum { CMD_CMD_RC_TABLE }; +#undef C +#define C(a, b, c) { b, c } +struct cxl_mbox_cmd_rc { + int err; + const char *desc; +}; + +static const +struct cxl_mbox_cmd_rc cxl_mbox_cmd_rctable[] ={ CMD_CMD_RC_TABLE }; +#undef C + +static inline const char *cxl_mbox_cmd_rc2str(struct cxl_mbox_cmd *mbox_cmd) +{ + return cxl_mbox_cmd_rctable[mbox_cmd->return_code].desc; +} + +static inline int cxl_mbox_cmd_rc2errno(struct cxl_mbox_cmd *mbox_cmd) +{ + return cxl_mbox_cmd_rctable[mbox_cmd->return_code].err; +} + +/* * CXL 2.0 - Memory capacity multiplier * See Section 8.2.9.5 * @@ -141,7 +192,6 @@ struct cxl_endpoint_dvsec_info { * @info: Cached DVSEC information about the device. * @serial: PCIe Device Serial Number * @mbox_send: @dev specific transport for transmitting mailbox commands - * @wait_media_ready: @dev specific method to await media ready * * See section 8.2.9.5.2 Capacity Configuration and Label Storage for * details on capacity parameters. @@ -172,11 +222,9 @@ struct cxl_dev_state { u64 next_persistent_bytes; resource_size_t component_reg_phys; - struct cxl_endpoint_dvsec_info info; u64 serial; int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd); - int (*wait_media_ready)(struct cxl_dev_state *cxlds); }; enum cxl_opcode { @@ -262,6 +310,13 @@ struct cxl_mbox_set_lsa { u8 data[]; } __packed; +struct cxl_mbox_set_partition_info { + __le64 volatile_capacity; + u8 flags; +} __packed; + +#define CXL_SET_PARTITION_IMMEDIATE_FLAG BIT(0) + /** * struct cxl_mem_command - Driver representation of a memory device command * @info: Command information as it exists for the UAPI @@ -290,11 +345,23 @@ struct cxl_mem_command { int cxl_mbox_send_cmd(struct cxl_dev_state *cxlds, u16 opcode, void *in, size_t in_size, void *out, size_t out_size); int cxl_dev_state_identify(struct cxl_dev_state *cxlds); +int cxl_await_media_ready(struct cxl_dev_state *cxlds); int cxl_enumerate_cmds(struct cxl_dev_state *cxlds); int cxl_mem_create_range_info(struct cxl_dev_state *cxlds); struct cxl_dev_state *cxl_dev_state_create(struct device *dev); void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); +#ifdef CONFIG_CXL_SUSPEND +void cxl_mem_active_inc(void); +void cxl_mem_active_dec(void); +#else +static inline void cxl_mem_active_inc(void) +{ +} +static inline void cxl_mem_active_dec(void) +{ +} +#endif struct cxl_hdm { struct cxl_component_regs regs; diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 329e7ea3f36a..fce1c11729c2 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -72,4 +72,6 @@ static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev, } int devm_cxl_port_enumerate_dports(struct cxl_port *port); +struct cxl_dev_state; +int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm); #endif /* __CXL_PCI_H__ */ diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 49a4b1c47299..c310f1fd3db0 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -24,27 +24,6 @@ * in higher level operations. */ -static int wait_for_media(struct cxl_memdev *cxlmd) -{ - struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_endpoint_dvsec_info *info = &cxlds->info; - int rc; - - if (!info->mem_enabled) - return -EBUSY; - - rc = cxlds->wait_media_ready(cxlds); - if (rc) - return rc; - - /* - * We know the device is active, and enabled, if any ranges are non-zero - * we'll need to check later before adding the port since that owns the - * HDM decoder registers. - */ - return 0; -} - static int create_endpoint(struct cxl_memdev *cxlmd, struct cxl_port *parent_port) { @@ -67,72 +46,14 @@ static int create_endpoint(struct cxl_memdev *cxlmd, return cxl_endpoint_autoremove(cxlmd, endpoint); } -/** - * cxl_dvsec_decode_init() - Setup HDM decoding for the endpoint - * @cxlds: Device state - * - * Additionally, enables global HDM decoding. Warning: don't call this outside - * of probe. Once probe is complete, the port driver owns all access to the HDM - * decoder registers. - * - * Returns: false if DVSEC Ranges are being used instead of HDM - * decoders, or if it can not be determined if DVSEC Ranges are in use. - * Otherwise, returns true. - */ -__mock bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds) +static void enable_suspend(void *data) { - struct cxl_endpoint_dvsec_info *info = &cxlds->info; - struct cxl_register_map map; - struct cxl_component_reg_map *cmap = &map.component_map; - bool global_enable, do_hdm_init = false; - void __iomem *crb; - u32 global_ctrl; - - /* map hdm decoder */ - crb = ioremap(cxlds->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE); - if (!crb) { - dev_dbg(cxlds->dev, "Failed to map component registers\n"); - return false; - } - - cxl_probe_component_regs(cxlds->dev, crb, cmap); - if (!cmap->hdm_decoder.valid) { - dev_dbg(cxlds->dev, "Invalid HDM decoder registers\n"); - goto out; - } - - global_ctrl = readl(crb + cmap->hdm_decoder.offset + - CXL_HDM_DECODER_CTRL_OFFSET); - global_enable = global_ctrl & CXL_HDM_DECODER_ENABLE; - if (!global_enable && info->ranges) { - dev_dbg(cxlds->dev, - "DVSEC ranges already programmed and HDM decoders not enabled.\n"); - goto out; - } - - do_hdm_init = true; - - /* - * Permanently (for this boot at least) opt the device into HDM - * operation. Individual HDM decoders still need to be enabled after - * this point. - */ - if (!global_enable) { - dev_dbg(cxlds->dev, "Enabling HDM decode\n"); - writel(global_ctrl | CXL_HDM_DECODER_ENABLE, - crb + cmap->hdm_decoder.offset + - CXL_HDM_DECODER_CTRL_OFFSET); - } - -out: - iounmap(crb); - return do_hdm_init; + cxl_mem_active_dec(); } static int cxl_mem_probe(struct device *dev) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_port *parent_port; int rc; @@ -147,44 +68,6 @@ static int cxl_mem_probe(struct device *dev) if (work_pending(&cxlmd->detach_work)) return -EBUSY; - rc = wait_for_media(cxlmd); - if (rc) { - dev_err(dev, "Media not active (%d)\n", rc); - return rc; - } - - /* - * If DVSEC ranges are being used instead of HDM decoder registers there - * is no use in trying to manage those. - */ - if (!cxl_dvsec_decode_init(cxlds)) { - struct cxl_endpoint_dvsec_info *info = &cxlds->info; - int i; - - /* */ - for (i = 0; i < 2; i++) { - u64 base, size; - - /* - * Give a nice warning to the user that BIOS has really - * botched things for them if it didn't place DVSEC - * ranges in the memory map. - */ - base = info->dvsec_range[i].start; - size = range_len(&info->dvsec_range[i]); - if (size && !region_intersects(base, size, - IORESOURCE_SYSTEM_RAM, - IORES_DESC_NONE)) { - dev_err(dev, - "DVSEC range %#llx-%#llx must be reserved by BIOS, but isn't\n", - base, base + size - 1); - } - } - dev_err(dev, - "Active DVSEC range registers in use. Will not bind.\n"); - return -EBUSY; - } - rc = devm_cxl_enumerate_ports(cxlmd); if (rc) return rc; @@ -195,19 +78,36 @@ static int cxl_mem_probe(struct device *dev) return -ENXIO; } - cxl_device_lock(&parent_port->dev); + device_lock(&parent_port->dev); if (!parent_port->dev.driver) { dev_err(dev, "CXL port topology %s not enabled\n", dev_name(&parent_port->dev)); rc = -ENXIO; - goto out; + goto unlock; } rc = create_endpoint(cxlmd, parent_port); -out: - cxl_device_unlock(&parent_port->dev); +unlock: + device_unlock(&parent_port->dev); put_device(&parent_port->dev); - return rc; + if (rc) + return rc; + + /* + * The kernel may be operating out of CXL memory on this device, + * there is no spec defined way to determine whether this device + * preserves contents over suspend, and there is no simple way + * to arrange for the suspend image to avoid CXL memory which + * would setup a circular dependency between PCI resume and save + * state restoration. + * + * TODO: support suspend when all the regions this device is + * hosting are locked and covered by the system address map, + * i.e. platform firmware owns restoring the HDM configuration + * that it locked. + */ + cxl_mem_active_inc(); + return devm_add_action_or_reset(dev, enable_suspend, NULL); } static struct cxl_driver cxl_mem_driver = { diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 3f2182d66829..5a0ae46d4989 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -48,8 +48,7 @@ */ static unsigned short mbox_ready_timeout = 60; module_param(mbox_ready_timeout, ushort, 0644); -MODULE_PARM_DESC(mbox_ready_timeout, - "seconds to wait for mailbox ready / memory active status"); +MODULE_PARM_DESC(mbox_ready_timeout, "seconds to wait for mailbox ready"); static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds) { @@ -177,9 +176,10 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, mbox_cmd->return_code = FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg); - if (mbox_cmd->return_code != 0) { - dev_dbg(dev, "Mailbox operation had an error\n"); - return 0; + if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) { + dev_dbg(dev, "Mailbox operation had an error: %s\n", + cxl_mbox_cmd_rc2str(mbox_cmd)); + return 0; /* completed but caller must check return_code */ } /* #7 */ @@ -386,164 +386,6 @@ static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, return rc; } -static int wait_for_valid(struct cxl_dev_state *cxlds) -{ - struct pci_dev *pdev = to_pci_dev(cxlds->dev); - int d = cxlds->cxl_dvsec, rc; - u32 val; - - /* - * Memory_Info_Valid: When set, indicates that the CXL Range 1 Size high - * and Size Low registers are valid. Must be set within 1 second of - * deassertion of reset to CXL device. Likely it is already set by the - * time this runs, but otherwise give a 1.5 second timeout in case of - * clock skew. - */ - rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val); - if (rc) - return rc; - - if (val & CXL_DVSEC_MEM_INFO_VALID) - return 0; - - msleep(1500); - - rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val); - if (rc) - return rc; - - if (val & CXL_DVSEC_MEM_INFO_VALID) - return 0; - - return -ETIMEDOUT; -} - -/* - * Wait up to @mbox_ready_timeout for the device to report memory - * active. - */ -static int wait_for_media_ready(struct cxl_dev_state *cxlds) -{ - struct pci_dev *pdev = to_pci_dev(cxlds->dev); - int d = cxlds->cxl_dvsec; - bool active = false; - u64 md_status; - int rc, i; - - rc = wait_for_valid(cxlds); - if (rc) - return rc; - - for (i = mbox_ready_timeout; i; i--) { - u32 temp; - - rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp); - if (rc) - return rc; - - active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp); - if (active) - break; - msleep(1000); - } - - if (!active) { - dev_err(&pdev->dev, - "timeout awaiting memory active after %d seconds\n", - mbox_ready_timeout); - return -ETIMEDOUT; - } - - md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); - if (!CXLMDEV_READY(md_status)) - return -EIO; - - return 0; -} - -static int cxl_dvsec_ranges(struct cxl_dev_state *cxlds) -{ - struct cxl_endpoint_dvsec_info *info = &cxlds->info; - struct pci_dev *pdev = to_pci_dev(cxlds->dev); - int d = cxlds->cxl_dvsec; - int hdm_count, rc, i; - u16 cap, ctrl; - - if (!d) - return -ENXIO; - - rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap); - if (rc) - return rc; - - rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); - if (rc) - return rc; - - if (!(cap & CXL_DVSEC_MEM_CAPABLE)) - return -ENXIO; - - /* - * It is not allowed by spec for MEM.capable to be set and have 0 legacy - * HDM decoders (values > 2 are also undefined as of CXL 2.0). As this - * driver is for a spec defined class code which must be CXL.mem - * capable, there is no point in continuing to enable CXL.mem. - */ - hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); - if (!hdm_count || hdm_count > 2) - return -EINVAL; - - rc = wait_for_valid(cxlds); - if (rc) - return rc; - - info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); - - for (i = 0; i < hdm_count; i++) { - u64 base, size; - u32 temp; - - rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp); - if (rc) - return rc; - - size = (u64)temp << 32; - - rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp); - if (rc) - return rc; - - size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK; - - rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp); - if (rc) - return rc; - - base = (u64)temp << 32; - - rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp); - if (rc) - return rc; - - base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK; - - info->dvsec_range[i] = (struct range) { - .start = base, - .end = base + size - 1 - }; - - if (size) - info->ranges++; - } - - return 0; -} - static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct cxl_register_map map; @@ -573,8 +415,6 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev_warn(&pdev->dev, "Device DVSEC not present, skip CXL.mem init\n"); - cxlds->wait_media_ready = wait_for_media_ready; - rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map); if (rc) return rc; @@ -610,11 +450,6 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; - rc = cxl_dvsec_ranges(cxlds); - if (rc) - dev_warn(&pdev->dev, - "Failed to get DVSEC range information (%d)\n", rc); - cxlmd = devm_cxl_add_memdev(cxlds); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 15ad666ab03e..bbeef91e637e 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -43,7 +43,7 @@ static int cxl_nvdimm_probe(struct device *dev) if (!cxl_nvb) return -ENXIO; - cxl_device_lock(&cxl_nvb->dev); + device_lock(&cxl_nvb->dev); if (!cxl_nvb->nvdimm_bus) { rc = -ENXIO; goto out; @@ -68,7 +68,7 @@ static int cxl_nvdimm_probe(struct device *dev) dev_set_drvdata(dev, nvdimm); rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm); out: - cxl_device_unlock(&cxl_nvb->dev); + device_unlock(&cxl_nvb->dev); put_device(&cxl_nvb->dev); return rc; @@ -233,7 +233,7 @@ static void cxl_nvb_update_state(struct work_struct *work) struct nvdimm_bus *victim_bus = NULL; bool release = false, rescan = false; - cxl_device_lock(&cxl_nvb->dev); + device_lock(&cxl_nvb->dev); switch (cxl_nvb->state) { case CXL_NVB_ONLINE: if (!online_nvdimm_bus(cxl_nvb)) { @@ -251,7 +251,7 @@ static void cxl_nvb_update_state(struct work_struct *work) default: break; } - cxl_device_unlock(&cxl_nvb->dev); + device_unlock(&cxl_nvb->dev); if (release) device_release_driver(&cxl_nvb->dev); @@ -327,9 +327,9 @@ static int cxl_nvdimm_bridge_reset(struct device *dev, void *data) return 0; cxl_nvb = to_cxl_nvdimm_bridge(dev); - cxl_device_lock(dev); + device_lock(dev); cxl_nvb->state = CXL_NVB_NEW; - cxl_device_unlock(dev); + device_unlock(dev); return 0; } @@ -344,7 +344,6 @@ static __init int cxl_pmem_init(void) { int rc; - set_bit(CXL_MEM_COMMAND_ID_SET_PARTITION_INFO, exclusive_cmds); set_bit(CXL_MEM_COMMAND_ID_SET_SHUTDOWN_STATE, exclusive_cmds); set_bit(CXL_MEM_COMMAND_ID_SET_LSA, exclusive_cmds); diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index d420da5fc39c..3cf308f114c4 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -36,14 +36,8 @@ static int cxl_port_probe(struct device *dev) struct cxl_hdm *cxlhdm; int rc; - if (is_cxl_endpoint(port)) { - struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport); - get_device(&cxlmd->dev); - rc = devm_add_action_or_reset(dev, schedule_detach, cxlmd); - if (rc) - return rc; - } else { + if (!is_cxl_endpoint(port)) { rc = devm_cxl_port_enumerate_dports(port); if (rc < 0) return rc; @@ -55,6 +49,26 @@ static int cxl_port_probe(struct device *dev) if (IS_ERR(cxlhdm)) return PTR_ERR(cxlhdm); + if (is_cxl_endpoint(port)) { + struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + + get_device(&cxlmd->dev); + rc = devm_add_action_or_reset(dev, schedule_detach, cxlmd); + if (rc) + return rc; + + rc = cxl_hdm_decode_init(cxlds, cxlhdm); + if (rc) + return rc; + + rc = cxl_await_media_ready(cxlds); + if (rc) { + dev_err(dev, "Media not active (%d)\n", rc); + return rc; + } + } + rc = devm_cxl_enumerate_decoders(cxlhdm); if (rc) { dev_err(dev, "Couldn't enumerate decoders (%d)\n", rc); diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index e5a58520d398..fabbb31f2c35 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -50,14 +50,14 @@ static ssize_t sector_size_store(struct device *dev, struct nd_btt *nd_btt = to_nd_btt(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); rc = nd_size_select_store(dev, buf, &nd_btt->lbasize, btt_lbasize_supported); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc ? rc : len; } @@ -79,11 +79,11 @@ static ssize_t uuid_store(struct device *dev, struct nd_btt *nd_btt = to_nd_btt(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); - nd_device_unlock(dev); + device_unlock(dev); return rc ? rc : len; } @@ -108,13 +108,13 @@ static ssize_t namespace_store(struct device *dev, struct nd_btt *nd_btt = to_nd_btt(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -126,14 +126,14 @@ static ssize_t size_show(struct device *dev, struct nd_btt *nd_btt = to_nd_btt(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); if (dev->driver) rc = sprintf(buf, "%llu\n", nd_btt->size); else { /* no size to convey if the btt instance is disabled */ rc = -ENXIO; } - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -178,6 +178,8 @@ bool is_nd_btt(struct device *dev) } EXPORT_SYMBOL(is_nd_btt); +static struct lock_class_key nvdimm_btt_key; + static struct device *__nd_btt_create(struct nd_region *nd_region, unsigned long lbasize, uuid_t *uuid, struct nd_namespace_common *ndns) @@ -205,6 +207,7 @@ static struct device *__nd_btt_create(struct nd_region *nd_region, dev->parent = &nd_region->dev; dev->type = &nd_btt_device_type; device_initialize(&nd_btt->dev); + lockdep_set_class(&nd_btt->dev.mutex, &nvdimm_btt_key); if (ndns && !__nd_attach_ndns(&nd_btt->dev, ndns, &nd_btt->ndns)) { dev_dbg(&ndns->dev, "failed, already claimed by %s\n", dev_name(ndns->claim)); @@ -225,7 +228,7 @@ struct device *nd_btt_create(struct nd_region *nd_region) { struct device *dev = __nd_btt_create(nd_region, 0, NULL, NULL); - __nd_device_register(dev); + nd_device_register(dev); return dev; } @@ -324,7 +327,7 @@ static int __nd_btt_probe(struct nd_btt *nd_btt, if (!nd_btt->uuid) return -ENOMEM; - __nd_device_register(&nd_btt->dev); + nd_device_register(&nd_btt->dev); return 0; } diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 7b0d1443217a..a4fc17db707c 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -88,10 +88,7 @@ static int nvdimm_bus_probe(struct device *dev) dev->driver->name, dev_name(dev)); nvdimm_bus_probe_start(nvdimm_bus); - debug_nvdimm_lock(dev); rc = nd_drv->probe(dev); - debug_nvdimm_unlock(dev); - if ((rc == 0 || rc == -EOPNOTSUPP) && dev->parent && is_nd_region(dev->parent)) nd_region_advance_seeds(to_nd_region(dev->parent), dev); @@ -111,11 +108,8 @@ static void nvdimm_bus_remove(struct device *dev) struct module *provider = to_bus_provider(dev); struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); - if (nd_drv->remove) { - debug_nvdimm_lock(dev); + if (nd_drv->remove) nd_drv->remove(dev); - debug_nvdimm_unlock(dev); - } dev_dbg(&nvdimm_bus->dev, "%s.remove(%s)\n", dev->driver->name, dev_name(dev)); @@ -139,7 +133,7 @@ static void nvdimm_bus_shutdown(struct device *dev) void nd_device_notify(struct device *dev, enum nvdimm_event event) { - nd_device_lock(dev); + device_lock(dev); if (dev->driver) { struct nd_device_driver *nd_drv; @@ -147,7 +141,7 @@ void nd_device_notify(struct device *dev, enum nvdimm_event event) if (nd_drv->notify) nd_drv->notify(dev, event); } - nd_device_unlock(dev); + device_unlock(dev); } EXPORT_SYMBOL(nd_device_notify); @@ -334,6 +328,8 @@ struct nvdimm_bus *nvdimm_to_bus(struct nvdimm *nvdimm) } EXPORT_SYMBOL_GPL(nvdimm_to_bus); +static struct lock_class_key nvdimm_bus_key; + struct nvdimm_bus *nvdimm_bus_register(struct device *parent, struct nvdimm_bus_descriptor *nd_desc) { @@ -360,6 +356,7 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent, nvdimm_bus->dev.bus = &nvdimm_bus_type; nvdimm_bus->dev.of_node = nd_desc->of_node; device_initialize(&nvdimm_bus->dev); + lockdep_set_class(&nvdimm_bus->dev.mutex, &nvdimm_bus_key); device_set_pm_not_required(&nvdimm_bus->dev); rc = dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id); if (rc) @@ -511,7 +508,7 @@ static void nd_async_device_unregister(void *d, async_cookie_t cookie) put_device(dev); } -void __nd_device_register(struct device *dev) +void nd_device_register(struct device *dev) { if (!dev) return; @@ -537,12 +534,6 @@ void __nd_device_register(struct device *dev) async_schedule_dev_domain(nd_async_device_register, dev, &nd_async_domain); } - -void nd_device_register(struct device *dev) -{ - device_initialize(dev); - __nd_device_register(dev); -} EXPORT_SYMBOL(nd_device_register); void nd_device_unregister(struct device *dev, enum nd_async_mode mode) @@ -572,9 +563,9 @@ void nd_device_unregister(struct device *dev, enum nd_async_mode mode) * or otherwise let the async path handle it if the * unregistration was already queued. */ - nd_device_lock(dev); + device_lock(dev); killed = kill_device(dev); - nd_device_unlock(dev); + device_unlock(dev); if (!killed) return; @@ -724,6 +715,8 @@ static void ndctl_release(struct device *dev) kfree(dev); } +static struct lock_class_key nvdimm_ndctl_key; + int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus) { dev_t devt = MKDEV(nvdimm_bus_major, nvdimm_bus->id); @@ -734,6 +727,7 @@ int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus) if (!dev) return -ENOMEM; device_initialize(dev); + lockdep_set_class(&dev->mutex, &nvdimm_ndctl_key); device_set_pm_not_required(dev); dev->class = nd_class; dev->parent = &nvdimm_bus->dev; @@ -930,10 +924,10 @@ void wait_nvdimm_bus_probe_idle(struct device *dev) if (nvdimm_bus->probe_active == 0) break; nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); wait_event(nvdimm_bus->wait, nvdimm_bus->probe_active == 0); - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); } while (true); } @@ -1167,7 +1161,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, goto out; } - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, func, buf); if (rc) @@ -1189,7 +1183,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, out_unlock: nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); out: kfree(in_env); kfree(out_env); diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 69a03358817f..d91799b71d23 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -215,7 +215,7 @@ EXPORT_SYMBOL_GPL(to_nvdimm_bus_dev); * * Enforce that uuids can only be changed while the device is disabled * (driver detached) - * LOCKING: expects nd_device_lock() is held on entry + * LOCKING: expects device_lock() is held on entry */ int nd_uuid_store(struct device *dev, uuid_t **uuid_out, const char *buf, size_t len) @@ -316,15 +316,15 @@ static DEVICE_ATTR_RO(provider); static int flush_namespaces(struct device *dev, void *data) { - nd_device_lock(dev); - nd_device_unlock(dev); + device_lock(dev); + device_unlock(dev); return 0; } static int flush_regions_dimms(struct device *dev, void *data) { - nd_device_lock(dev); - nd_device_unlock(dev); + device_lock(dev); + device_unlock(dev); device_for_each_child(dev, NULL, flush_namespaces); return 0; } @@ -368,9 +368,7 @@ static ssize_t capability_show(struct device *dev, if (!nd_desc->fw_ops) return -EOPNOTSUPP; - nvdimm_bus_lock(dev); cap = nd_desc->fw_ops->capability(nd_desc); - nvdimm_bus_unlock(dev); switch (cap) { case NVDIMM_FWA_CAP_QUIESCE: @@ -395,10 +393,8 @@ static ssize_t activate_show(struct device *dev, if (!nd_desc->fw_ops) return -EOPNOTSUPP; - nvdimm_bus_lock(dev); cap = nd_desc->fw_ops->capability(nd_desc); state = nd_desc->fw_ops->activate_state(nd_desc); - nvdimm_bus_unlock(dev); if (cap < NVDIMM_FWA_CAP_QUIESCE) return -EOPNOTSUPP; @@ -443,7 +439,6 @@ static ssize_t activate_store(struct device *dev, else return -EINVAL; - nvdimm_bus_lock(dev); state = nd_desc->fw_ops->activate_state(nd_desc); switch (state) { @@ -461,7 +456,6 @@ static ssize_t activate_store(struct device *dev, default: rc = -ENXIO; } - nvdimm_bus_unlock(dev); if (rc == 0) rc = len; @@ -484,10 +478,7 @@ static umode_t nvdimm_bus_firmware_visible(struct kobject *kobj, struct attribut if (!nd_desc->fw_ops) return 0; - nvdimm_bus_lock(dev); cap = nd_desc->fw_ops->capability(nd_desc); - nvdimm_bus_unlock(dev); - if (cap < NVDIMM_FWA_CAP_QUIESCE) return 0; diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c index 99965077bac4..7f4a9d28b670 100644 --- a/drivers/nvdimm/dax_devs.c +++ b/drivers/nvdimm/dax_devs.c @@ -80,7 +80,7 @@ struct device *nd_dax_create(struct nd_region *nd_region) nd_dax = nd_dax_alloc(nd_region); if (nd_dax) dev = nd_pfn_devinit(&nd_dax->nd_pfn, NULL); - __nd_device_register(dev); + nd_device_register(dev); return dev; } @@ -119,7 +119,7 @@ int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns) nd_detach_ndns(dax_dev, &nd_pfn->ndns); put_device(dax_dev); } else - __nd_device_register(dax_dev); + nd_device_register(dax_dev); return rc; } diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index ee507eed42b5..c7c980577491 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -341,9 +341,9 @@ static ssize_t available_slots_show(struct device *dev, { ssize_t rc; - nd_device_lock(dev); + device_lock(dev); rc = __available_slots_show(dev_get_drvdata(dev), buf); - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -386,12 +386,12 @@ static ssize_t security_store(struct device *dev, * done while probing is idle and the DIMM is not in active use * in any region. */ - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); rc = nvdimm_security_store(dev, buf, len); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -570,6 +570,8 @@ bool is_nvdimm(struct device *dev) return dev->type == &nvdimm_device_type; } +static struct lock_class_key nvdimm_key; + struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, const struct attribute_group **groups, unsigned long flags, unsigned long cmd_mask, int num_flush, @@ -613,6 +615,8 @@ struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus, /* get security state and extended (master) state */ nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER); nvdimm->sec.ext_flags = nvdimm_security_flags(nvdimm, NVDIMM_MASTER); + device_initialize(dev); + lockdep_set_class(&dev->mutex, &nvdimm_key); nd_device_register(dev); return nvdimm; diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 62b83b2e26e3..bf4f5c09d9b1 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -264,7 +264,7 @@ static ssize_t alt_name_store(struct device *dev, struct nd_region *nd_region = to_nd_region(dev->parent); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); rc = __alt_name_store(dev, buf, len); @@ -272,7 +272,7 @@ static ssize_t alt_name_store(struct device *dev, rc = nd_namespace_label_update(nd_region, dev); dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc < 0 ? rc : len; } @@ -846,7 +846,7 @@ static ssize_t size_store(struct device *dev, if (rc) return rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); rc = __size_store(dev, val); @@ -868,7 +868,7 @@ static ssize_t size_store(struct device *dev, dev_dbg(dev, "%llx %s (%d)\n", val, rc < 0 ? "fail" : "success", rc); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc < 0 ? rc : len; } @@ -1043,7 +1043,7 @@ static ssize_t uuid_store(struct device *dev, } else return -ENXIO; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); if (to_ndns(dev)->claim) @@ -1059,7 +1059,7 @@ static ssize_t uuid_store(struct device *dev, dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc < 0 ? rc : len; } @@ -1118,7 +1118,7 @@ static ssize_t sector_size_store(struct device *dev, } else return -ENXIO; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); if (to_ndns(dev)->claim) rc = -EBUSY; @@ -1129,7 +1129,7 @@ static ssize_t sector_size_store(struct device *dev, dev_dbg(dev, "result: %zd %s: %s%s", rc, rc < 0 ? "tried" : "wrote", buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc ? rc : len; } @@ -1239,9 +1239,9 @@ static ssize_t holder_show(struct device *dev, struct nd_namespace_common *ndns = to_ndns(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); rc = sprintf(buf, "%s\n", ndns->claim ? dev_name(ndns->claim) : ""); - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -1278,7 +1278,7 @@ static ssize_t holder_class_store(struct device *dev, struct nd_region *nd_region = to_nd_region(dev->parent); int rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); rc = __holder_class_store(dev, buf); @@ -1286,7 +1286,7 @@ static ssize_t holder_class_store(struct device *dev, rc = nd_namespace_label_update(nd_region, dev); dev_dbg(dev, "%s(%d)\n", rc < 0 ? "fail " : "", rc); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc < 0 ? rc : len; } @@ -1297,7 +1297,7 @@ static ssize_t holder_class_show(struct device *dev, struct nd_namespace_common *ndns = to_ndns(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); if (ndns->claim_class == NVDIMM_CCLASS_NONE) rc = sprintf(buf, "\n"); else if ((ndns->claim_class == NVDIMM_CCLASS_BTT) || @@ -1309,7 +1309,7 @@ static ssize_t holder_class_show(struct device *dev, rc = sprintf(buf, "dax\n"); else rc = sprintf(buf, "<unknown>\n"); - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -1323,7 +1323,7 @@ static ssize_t mode_show(struct device *dev, char *mode; ssize_t rc; - nd_device_lock(dev); + device_lock(dev); claim = ndns->claim; if (claim && is_nd_btt(claim)) mode = "safe"; @@ -1336,7 +1336,7 @@ static ssize_t mode_show(struct device *dev, else mode = "raw"; rc = sprintf(buf, "%s\n", mode); - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -1456,8 +1456,8 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev) * Flush any in-progess probes / removals in the driver * for the raw personality of this namespace. */ - nd_device_lock(&ndns->dev); - nd_device_unlock(&ndns->dev); + device_lock(&ndns->dev); + device_unlock(&ndns->dev); if (ndns->dev.driver) { dev_dbg(&ndns->dev, "is active, can't bind %s\n", dev_name(dev)); @@ -1830,6 +1830,8 @@ static struct device *nd_namespace_pmem_create(struct nd_region *nd_region) return dev; } +static struct lock_class_key nvdimm_namespace_key; + void nd_region_create_ns_seed(struct nd_region *nd_region) { WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev)); @@ -1845,8 +1847,12 @@ void nd_region_create_ns_seed(struct nd_region *nd_region) */ if (!nd_region->ns_seed) dev_err(&nd_region->dev, "failed to create namespace\n"); - else + else { + device_initialize(nd_region->ns_seed); + lockdep_set_class(&nd_region->ns_seed->mutex, + &nvdimm_namespace_key); nd_device_register(nd_region->ns_seed); + } } void nd_region_create_dax_seed(struct nd_region *nd_region) @@ -2200,6 +2206,8 @@ int nd_region_register_namespaces(struct nd_region *nd_region, int *err) if (id < 0) break; dev_set_name(dev, "namespace%d.%d", nd_region->id, id); + device_initialize(dev); + lockdep_set_class(&dev->mutex, &nvdimm_namespace_key); nd_device_register(dev); } if (i) diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 448f9dcb4bb7..cc86ee09d7c0 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -106,7 +106,7 @@ void nd_region_create_dax_seed(struct nd_region *nd_region); int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus); void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus); void nd_synchronize(void); -void __nd_device_register(struct device *dev); +void nd_device_register(struct device *dev); struct nd_label_id; char *nd_label_gen_id(struct nd_label_id *label_id, const uuid_t *uuid, u32 flags); @@ -161,70 +161,4 @@ static inline void devm_nsio_disable(struct device *dev, { } #endif - -#ifdef CONFIG_PROVE_NVDIMM_LOCKING -extern struct class *nd_class; - -enum { - LOCK_BUS, - LOCK_NDCTL, - LOCK_REGION, - LOCK_DIMM = LOCK_REGION, - LOCK_NAMESPACE, - LOCK_CLAIM, -}; - -static inline void debug_nvdimm_lock(struct device *dev) -{ - if (is_nd_region(dev)) - mutex_lock_nested(&dev->lockdep_mutex, LOCK_REGION); - else if (is_nvdimm(dev)) - mutex_lock_nested(&dev->lockdep_mutex, LOCK_DIMM); - else if (is_nd_btt(dev) || is_nd_pfn(dev) || is_nd_dax(dev)) - mutex_lock_nested(&dev->lockdep_mutex, LOCK_CLAIM); - else if (dev->parent && (is_nd_region(dev->parent))) - mutex_lock_nested(&dev->lockdep_mutex, LOCK_NAMESPACE); - else if (is_nvdimm_bus(dev)) - mutex_lock_nested(&dev->lockdep_mutex, LOCK_BUS); - else if (dev->class && dev->class == nd_class) - mutex_lock_nested(&dev->lockdep_mutex, LOCK_NDCTL); - else - dev_WARN(dev, "unknown lock level\n"); -} - -static inline void debug_nvdimm_unlock(struct device *dev) -{ - mutex_unlock(&dev->lockdep_mutex); -} - -static inline void nd_device_lock(struct device *dev) -{ - device_lock(dev); - debug_nvdimm_lock(dev); -} - -static inline void nd_device_unlock(struct device *dev) -{ - debug_nvdimm_unlock(dev); - device_unlock(dev); -} -#else -static inline void nd_device_lock(struct device *dev) -{ - device_lock(dev); -} - -static inline void nd_device_unlock(struct device *dev) -{ - device_unlock(dev); -} - -static inline void debug_nvdimm_lock(struct device *dev) -{ -} - -static inline void debug_nvdimm_unlock(struct device *dev) -{ -} -#endif #endif /* __ND_CORE_H__ */ diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index c31e184bfa45..0e92ab4b3283 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -55,7 +55,7 @@ static ssize_t mode_store(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc = 0; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); if (dev->driver) rc = -EBUSY; @@ -77,7 +77,7 @@ static ssize_t mode_store(struct device *dev, dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc ? rc : len; } @@ -123,14 +123,14 @@ static ssize_t align_store(struct device *dev, unsigned long aligns[MAX_NVDIMM_ALIGN] = { [0] = 0, }; ssize_t rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); rc = nd_size_select_store(dev, buf, &nd_pfn->align, nd_pfn_supported_alignments(aligns)); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc ? rc : len; } @@ -152,11 +152,11 @@ static ssize_t uuid_store(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); - nd_device_unlock(dev); + device_unlock(dev); return rc ? rc : len; } @@ -181,13 +181,13 @@ static ssize_t namespace_store(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -199,7 +199,7 @@ static ssize_t resource_show(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); if (dev->driver) { struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; u64 offset = __le64_to_cpu(pfn_sb->dataoff); @@ -213,7 +213,7 @@ static ssize_t resource_show(struct device *dev, /* no address to convey if the pfn instance is disabled */ rc = -ENXIO; } - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -225,7 +225,7 @@ static ssize_t size_show(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); if (dev->driver) { struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; u64 offset = __le64_to_cpu(pfn_sb->dataoff); @@ -241,7 +241,7 @@ static ssize_t size_show(struct device *dev, /* no size to convey if the pfn instance is disabled */ rc = -ENXIO; } - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -291,6 +291,8 @@ bool is_nd_pfn(struct device *dev) } EXPORT_SYMBOL(is_nd_pfn); +static struct lock_class_key nvdimm_pfn_key; + struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, struct nd_namespace_common *ndns) { @@ -303,6 +305,7 @@ struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, nd_pfn->align = nd_pfn_default_alignment(); dev = &nd_pfn->dev; device_initialize(&nd_pfn->dev); + lockdep_set_class(&nd_pfn->dev.mutex, &nvdimm_pfn_key); if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { dev_dbg(&ndns->dev, "failed, already claimed by %s\n", dev_name(ndns->claim)); @@ -346,7 +349,7 @@ struct device *nd_pfn_create(struct nd_region *nd_region) nd_pfn = nd_pfn_alloc(nd_region); dev = nd_pfn_devinit(nd_pfn, NULL); - __nd_device_register(dev); + nd_device_register(dev); return dev; } @@ -643,7 +646,7 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) nd_detach_ndns(pfn_dev, &nd_pfn->ndns); put_device(pfn_dev); } else - __nd_device_register(pfn_dev); + nd_device_register(pfn_dev); return rc; } diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 6b24ecada695..629d10fcf53b 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -660,7 +660,7 @@ static void nd_pmem_remove(struct device *dev) nvdimm_namespace_detach_btt(to_nd_btt(dev)); else { /* - * Note, this assumes nd_device_lock() context to not + * Note, this assumes device_lock() context to not * race nd_pmem_notify() */ sysfs_put(pmem->bb_state); diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c index 188560b1c110..390123d293ea 100644 --- a/drivers/nvdimm/region.c +++ b/drivers/nvdimm/region.c @@ -95,7 +95,7 @@ static void nd_region_remove(struct device *dev) nvdimm_bus_unlock(dev); /* - * Note, this assumes nd_device_lock() context to not race + * Note, this assumes device_lock() context to not race * nd_region_notify() */ sysfs_put(nd_region->bb_state); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 0cb274c2b508..d976260eca7a 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -279,7 +279,7 @@ static ssize_t set_cookie_show(struct device *dev, * the v1.1 namespace label cookie definition. To read all this * data we need to wait for probing to settle. */ - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); if (nd_region->ndr_mappings) { @@ -296,7 +296,7 @@ static ssize_t set_cookie_show(struct device *dev, } } nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); if (rc) return rc; @@ -353,12 +353,12 @@ static ssize_t available_size_show(struct device *dev, * memory nvdimm_bus_lock() is dropped, but that's userspace's * problem to not race itself. */ - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); available = nd_region_available_dpa(nd_region); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return sprintf(buf, "%llu\n", available); } @@ -370,12 +370,12 @@ static ssize_t max_available_extent_show(struct device *dev, struct nd_region *nd_region = to_nd_region(dev); unsigned long long available = 0; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); available = nd_region_allocatable_dpa(nd_region); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return sprintf(buf, "%llu\n", available); } @@ -549,12 +549,12 @@ static ssize_t region_badblocks_show(struct device *dev, struct nd_region *nd_region = to_nd_region(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); if (dev->driver) rc = badblocks_show(&nd_region->bb, buf, 0); else rc = -ENXIO; - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -949,6 +949,8 @@ static unsigned long default_align(struct nd_region *nd_region) return align; } +static struct lock_class_key nvdimm_region_key; + static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, struct nd_region_desc *ndr_desc, const struct device_type *dev_type, const char *caller) @@ -1035,6 +1037,8 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, else nd_region->flush = NULL; + device_initialize(dev); + lockdep_set_class(&dev->mutex, &nvdimm_region_key); nd_device_register(dev); return nd_region; |