diff options
author | Johan Hovold <johan+linaro@kernel.org> | 2024-01-30 11:02:43 +0100 |
---|---|---|
committer | Bjorn Helgaas <bhelgaas@google.com> | 2024-01-31 16:03:51 +0100 |
commit | 1e560864159d002b453da42bd2c13a1805515a20 (patch) | |
tree | 3a6f7ecc33a0197caf4c4f4f3ff3bb8461636619 /drivers/pci/bus.c | |
parent | Linux 6.8-rc1 (diff) | |
download | linux-1e560864159d002b453da42bd2c13a1805515a20.tar.xz linux-1e560864159d002b453da42bd2c13a1805515a20.zip |
PCI/ASPM: Fix deadlock when enabling ASPM
A last minute revert in 6.7-final introduced a potential deadlock when
enabling ASPM during probe of Qualcomm PCIe controllers as reported by
lockdep:
============================================
WARNING: possible recursive locking detected
6.7.0 #40 Not tainted
--------------------------------------------
kworker/u16:5/90 is trying to acquire lock:
ffffacfa78ced000 (pci_bus_sem){++++}-{3:3}, at: pcie_aspm_pm_state_change+0x58/0xdc
but task is already holding lock:
ffffacfa78ced000 (pci_bus_sem){++++}-{3:3}, at: pci_walk_bus+0x34/0xbc
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0
----
lock(pci_bus_sem);
lock(pci_bus_sem);
*** DEADLOCK ***
Call trace:
print_deadlock_bug+0x25c/0x348
__lock_acquire+0x10a4/0x2064
lock_acquire+0x1e8/0x318
down_read+0x60/0x184
pcie_aspm_pm_state_change+0x58/0xdc
pci_set_full_power_state+0xa8/0x114
pci_set_power_state+0xc4/0x120
qcom_pcie_enable_aspm+0x1c/0x3c [pcie_qcom]
pci_walk_bus+0x64/0xbc
qcom_pcie_host_post_init_2_7_0+0x28/0x34 [pcie_qcom]
The deadlock can easily be reproduced on machines like the Lenovo ThinkPad
X13s by adding a delay to increase the race window during asynchronous
probe where another thread can take a write lock.
Add a new pci_set_power_state_locked() and associated helper functions that
can be called with the PCI bus semaphore held to avoid taking the read lock
twice.
Link: https://lore.kernel.org/r/ZZu0qx2cmn7IwTyQ@hovoldconsulting.com
Link: https://lore.kernel.org/r/20240130100243.11011-1-johan+linaro@kernel.org
Fixes: f93e71aea6c6 ("Revert "PCI/ASPM: Remove pcie_aspm_pm_state_change()"")
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: <stable@vger.kernel.org> # 6.7
Diffstat (limited to 'drivers/pci/bus.c')
-rw-r--r-- | drivers/pci/bus.c | 49 |
1 files changed, 32 insertions, 17 deletions
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 9c2137dae429..826b5016a101 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -386,21 +386,8 @@ void pci_bus_add_devices(const struct pci_bus *bus) } EXPORT_SYMBOL(pci_bus_add_devices); -/** pci_walk_bus - walk devices on/under bus, calling callback. - * @top bus whose devices should be walked - * @cb callback to be called for each device found - * @userdata arbitrary pointer to be passed to callback. - * - * Walk the given bus, including any bridged devices - * on buses under this bus. Call the provided callback - * on each device found. - * - * We check the return of @cb each time. If it returns anything - * other than 0, we break out. - * - */ -void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), - void *userdata) +static void __pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), + void *userdata, bool locked) { struct pci_dev *dev; struct pci_bus *bus; @@ -408,7 +395,8 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), int retval; bus = top; - down_read(&pci_bus_sem); + if (!locked) + down_read(&pci_bus_sem); next = top->devices.next; for (;;) { if (next == &bus->devices) { @@ -431,10 +419,37 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), if (retval) break; } - up_read(&pci_bus_sem); + if (!locked) + up_read(&pci_bus_sem); +} + +/** + * pci_walk_bus - walk devices on/under bus, calling callback. + * @top: bus whose devices should be walked + * @cb: callback to be called for each device found + * @userdata: arbitrary pointer to be passed to callback + * + * Walk the given bus, including any bridged devices + * on buses under this bus. Call the provided callback + * on each device found. + * + * We check the return of @cb each time. If it returns anything + * other than 0, we break out. + */ +void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata) +{ + __pci_walk_bus(top, cb, userdata, false); } EXPORT_SYMBOL_GPL(pci_walk_bus); +void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata) +{ + lockdep_assert_held(&pci_bus_sem); + + __pci_walk_bus(top, cb, userdata, true); +} +EXPORT_SYMBOL_GPL(pci_walk_bus_locked); + struct pci_bus *pci_bus_get(struct pci_bus *bus) { if (bus) |