diff options
Diffstat (limited to 'drivers/pci/pcie/bwctrl.c')
-rw-r--r-- | drivers/pci/pcie/bwctrl.c | 366 |
1 files changed, 366 insertions, 0 deletions
diff --git a/drivers/pci/pcie/bwctrl.c b/drivers/pci/pcie/bwctrl.c new file mode 100644 index 000000000000..b59cacc740fa --- /dev/null +++ b/drivers/pci/pcie/bwctrl.c @@ -0,0 +1,366 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * PCIe bandwidth controller + * + * Author: Alexandru Gagniuc <mr.nuke.me@gmail.com> + * + * Copyright (C) 2019 Dell Inc + * Copyright (C) 2023-2024 Intel Corporation + * + * The PCIe bandwidth controller provides a way to alter PCIe Link Speeds + * and notify the operating system when the Link Width or Speed changes. The + * notification capability is required for all Root Ports and Downstream + * Ports supporting Link Width wider than x1 and/or multiple Link Speeds. + * + * This service port driver hooks into the Bandwidth Notification interrupt + * watching for changes or links becoming degraded in operation. It updates + * the cached Current Link Speed that is exposed to user space through sysfs. + */ + +#define dev_fmt(fmt) "bwctrl: " fmt + +#include <linux/atomic.h> +#include <linux/bitops.h> +#include <linux/bits.h> +#include <linux/cleanup.h> +#include <linux/errno.h> +#include <linux/interrupt.h> +#include <linux/mutex.h> +#include <linux/pci.h> +#include <linux/pci-bwctrl.h> +#include <linux/rwsem.h> +#include <linux/slab.h> +#include <linux/types.h> + +#include "../pci.h" +#include "portdrv.h" + +/** + * struct pcie_bwctrl_data - PCIe bandwidth controller + * @set_speed_mutex: Serializes link speed changes + * @lbms_count: Count for LBMS (since last reset) + * @cdev: Thermal cooling device associated with the port + */ +struct pcie_bwctrl_data { + struct mutex set_speed_mutex; + atomic_t lbms_count; + struct thermal_cooling_device *cdev; +}; + +/* + * Prevent port removal during LBMS count accessors and Link Speed changes. + * + * These have to be differentiated because pcie_bwctrl_change_speed() calls + * pcie_retrain_link() which uses LBMS count reset accessor on success + * (using just one rwsem triggers "possible recursive locking detected" + * warning). + */ +static DECLARE_RWSEM(pcie_bwctrl_lbms_rwsem); +static DECLARE_RWSEM(pcie_bwctrl_setspeed_rwsem); + +static bool pcie_valid_speed(enum pci_bus_speed speed) +{ + return (speed >= PCIE_SPEED_2_5GT) && (speed <= PCIE_SPEED_64_0GT); +} + +static u16 pci_bus_speed2lnkctl2(enum pci_bus_speed speed) +{ + static const u8 speed_conv[] = { + [PCIE_SPEED_2_5GT] = PCI_EXP_LNKCTL2_TLS_2_5GT, + [PCIE_SPEED_5_0GT] = PCI_EXP_LNKCTL2_TLS_5_0GT, + [PCIE_SPEED_8_0GT] = PCI_EXP_LNKCTL2_TLS_8_0GT, + [PCIE_SPEED_16_0GT] = PCI_EXP_LNKCTL2_TLS_16_0GT, + [PCIE_SPEED_32_0GT] = PCI_EXP_LNKCTL2_TLS_32_0GT, + [PCIE_SPEED_64_0GT] = PCI_EXP_LNKCTL2_TLS_64_0GT, + }; + + if (WARN_ON_ONCE(!pcie_valid_speed(speed))) + return 0; + + return speed_conv[speed]; +} + +static inline u16 pcie_supported_speeds2target_speed(u8 supported_speeds) +{ + return __fls(supported_speeds); +} + +/** + * pcie_bwctrl_select_speed - Select Target Link Speed + * @port: PCIe Port + * @speed_req: Requested PCIe Link Speed + * + * Select Target Link Speed by take into account Supported Link Speeds of + * both the Root Port and the Endpoint. + * + * Return: Target Link Speed (1=2.5GT/s, 2=5GT/s, 3=8GT/s, etc.) + */ +static u16 pcie_bwctrl_select_speed(struct pci_dev *port, enum pci_bus_speed speed_req) +{ + struct pci_bus *bus = port->subordinate; + u8 desired_speeds, supported_speeds; + struct pci_dev *dev; + + desired_speeds = GENMASK(pci_bus_speed2lnkctl2(speed_req), + __fls(PCI_EXP_LNKCAP2_SLS_2_5GB)); + + supported_speeds = port->supported_speeds; + if (bus) { + down_read(&pci_bus_sem); + dev = list_first_entry_or_null(&bus->devices, struct pci_dev, bus_list); + if (dev) + supported_speeds &= dev->supported_speeds; + up_read(&pci_bus_sem); + } + if (!supported_speeds) + return PCI_EXP_LNKCAP2_SLS_2_5GB; + + return pcie_supported_speeds2target_speed(supported_speeds & desired_speeds); +} + +static int pcie_bwctrl_change_speed(struct pci_dev *port, u16 target_speed, bool use_lt) +{ + int ret; + + ret = pcie_capability_clear_and_set_word(port, PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_TLS, target_speed); + if (ret != PCIBIOS_SUCCESSFUL) + return pcibios_err_to_errno(ret); + + ret = pcie_retrain_link(port, use_lt); + if (ret < 0) + return ret; + + /* + * Ensure link speed updates also with platforms that have problems + * with notifications. + */ + if (port->subordinate) + pcie_update_link_speed(port->subordinate); + + return 0; +} + +/** + * pcie_set_target_speed - Set downstream Link Speed for PCIe Port + * @port: PCIe Port + * @speed_req: Requested PCIe Link Speed + * @use_lt: Wait for the LT or DLLLA bit to detect the end of link training + * + * Attempt to set PCIe Port Link Speed to @speed_req. @speed_req may be + * adjusted downwards to the best speed supported by both the Port and PCIe + * Device underneath it. + * + * Return: + * * 0 - on success + * * -EINVAL - @speed_req is not a PCIe Link Speed + * * -ENODEV - @port is not controllable + * * -ETIMEDOUT - changing Link Speed took too long + * * -EAGAIN - Link Speed was changed but @speed_req was not achieved + */ +int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req, + bool use_lt) +{ + struct pci_bus *bus = port->subordinate; + u16 target_speed; + int ret; + + if (WARN_ON_ONCE(!pcie_valid_speed(speed_req))) + return -EINVAL; + + if (bus && bus->cur_bus_speed == speed_req) + return 0; + + target_speed = pcie_bwctrl_select_speed(port, speed_req); + + scoped_guard(rwsem_read, &pcie_bwctrl_setspeed_rwsem) { + struct pcie_bwctrl_data *data = port->link_bwctrl; + + /* + * port->link_bwctrl is NULL during initial scan when called + * e.g. from the Target Speed quirk. + */ + if (data) + mutex_lock(&data->set_speed_mutex); + + ret = pcie_bwctrl_change_speed(port, target_speed, use_lt); + + if (data) + mutex_unlock(&data->set_speed_mutex); + } + + /* + * Despite setting higher speed into the Target Link Speed, empty + * bus won't train to 5GT+ speeds. + */ + if (!ret && bus && bus->cur_bus_speed != speed_req && + !list_empty(&bus->devices)) + ret = -EAGAIN; + + return ret; +} + +static void pcie_bwnotif_enable(struct pcie_device *srv) +{ + struct pcie_bwctrl_data *data = srv->port->link_bwctrl; + struct pci_dev *port = srv->port; + u16 link_status; + int ret; + + /* Count LBMS seen so far as one */ + ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status); + if (ret == PCIBIOS_SUCCESSFUL && link_status & PCI_EXP_LNKSTA_LBMS) + atomic_inc(&data->lbms_count); + + pcie_capability_set_word(port, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE); + pcie_capability_write_word(port, PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS); + + /* + * Update after enabling notifications & clearing status bits ensures + * link speed is up to date. + */ + pcie_update_link_speed(port->subordinate); +} + +static void pcie_bwnotif_disable(struct pci_dev *port) +{ + pcie_capability_clear_word(port, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE); +} + +static irqreturn_t pcie_bwnotif_irq(int irq, void *context) +{ + struct pcie_device *srv = context; + struct pcie_bwctrl_data *data = srv->port->link_bwctrl; + struct pci_dev *port = srv->port; + u16 link_status, events; + int ret; + + ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status); + if (ret != PCIBIOS_SUCCESSFUL) + return IRQ_NONE; + + events = link_status & (PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS); + if (!events) + return IRQ_NONE; + + if (events & PCI_EXP_LNKSTA_LBMS) + atomic_inc(&data->lbms_count); + + pcie_capability_write_word(port, PCI_EXP_LNKSTA, events); + + /* + * Interrupts will not be triggered from any further Link Speed + * change until LBMS is cleared by the write. Therefore, re-read the + * speed (inside pcie_update_link_speed()) after LBMS has been + * cleared to avoid missing link speed changes. + */ + pcie_update_link_speed(port->subordinate); + + return IRQ_HANDLED; +} + +void pcie_reset_lbms_count(struct pci_dev *port) +{ + struct pcie_bwctrl_data *data; + + guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem); + data = port->link_bwctrl; + if (data) + atomic_set(&data->lbms_count, 0); + else + pcie_capability_write_word(port, PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_LBMS); +} + +int pcie_lbms_count(struct pci_dev *port, unsigned long *val) +{ + struct pcie_bwctrl_data *data; + + guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem); + data = port->link_bwctrl; + if (!data) + return -ENOTTY; + + *val = atomic_read(&data->lbms_count); + + return 0; +} + +static int pcie_bwnotif_probe(struct pcie_device *srv) +{ + struct pci_dev *port = srv->port; + int ret; + + struct pcie_bwctrl_data *data = devm_kzalloc(&srv->device, + sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + ret = devm_mutex_init(&srv->device, &data->set_speed_mutex); + if (ret) + return ret; + + ret = devm_request_irq(&srv->device, srv->irq, pcie_bwnotif_irq, + IRQF_SHARED, "PCIe bwctrl", srv); + if (ret) + return ret; + + scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) { + scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) { + port->link_bwctrl = no_free_ptr(data); + pcie_bwnotif_enable(srv); + } + } + + pci_dbg(port, "enabled with IRQ %d\n", srv->irq); + + /* Don't fail on errors. Don't leave IS_ERR() "pointer" into ->cdev */ + port->link_bwctrl->cdev = pcie_cooling_device_register(port); + if (IS_ERR(port->link_bwctrl->cdev)) + port->link_bwctrl->cdev = NULL; + + return 0; +} + +static void pcie_bwnotif_remove(struct pcie_device *srv) +{ + struct pcie_bwctrl_data *data = srv->port->link_bwctrl; + + pcie_cooling_device_unregister(data->cdev); + + pcie_bwnotif_disable(srv->port); + + scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) + scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) + srv->port->link_bwctrl = NULL; +} + +static int pcie_bwnotif_suspend(struct pcie_device *srv) +{ + pcie_bwnotif_disable(srv->port); + return 0; +} + +static int pcie_bwnotif_resume(struct pcie_device *srv) +{ + pcie_bwnotif_enable(srv); + return 0; +} + +static struct pcie_port_service_driver pcie_bwctrl_driver = { + .name = "pcie_bwctrl", + .port_type = PCIE_ANY_PORT, + .service = PCIE_PORT_SERVICE_BWCTRL, + .probe = pcie_bwnotif_probe, + .suspend = pcie_bwnotif_suspend, + .resume = pcie_bwnotif_resume, + .remove = pcie_bwnotif_remove, +}; + +int __init pcie_bwctrl_init(void) +{ + return pcie_port_service_register(&pcie_bwctrl_driver); +} |