summaryrefslogtreecommitdiffstats
path: root/drivers/cxl/pci.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/cxl/pci.c')
-rw-r--r--drivers/cxl/pci.c425
1 files changed, 302 insertions, 123 deletions
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 258004f34281..60b23624d167 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -14,8 +14,6 @@
#include "cxlmem.h"
#include "cxlpci.h"
#include "cxl.h"
-#define CREATE_TRACE_POINTS
-#include <trace/events/cxl.h>
/**
* DOC: cxl pci
@@ -162,7 +160,7 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
writeq(cmd_reg, cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
/* #4 */
- dev_dbg(dev, "Sending command\n");
+ dev_dbg(dev, "Sending command: 0x%04x\n", mbox_cmd->opcode);
writel(CXLDEV_MBOX_CTRL_DOORBELL,
cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
@@ -414,13 +412,295 @@ static bool is_cxl_restricted(struct pci_dev *pdev)
return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END;
}
-static void disable_aer(void *pdev)
+/*
+ * CXL v3.0 6.2.3 Table 6-4
+ * The table indicates that if PCIe Flit Mode is set, then CXL is in 256B flits
+ * mode, otherwise it's 68B flits mode.
+ */
+static bool cxl_pci_flit_256(struct pci_dev *pdev)
+{
+ u16 lnksta2;
+
+ pcie_capability_read_word(pdev, PCI_EXP_LNKSTA2, &lnksta2);
+ return lnksta2 & PCI_EXP_LNKSTA2_FLIT;
+}
+
+static int cxl_pci_ras_unmask(struct pci_dev *pdev)
+{
+ struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus);
+ struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+ void __iomem *addr;
+ u32 orig_val, val, mask;
+ u16 cap;
+ int rc;
+
+ if (!cxlds->regs.ras) {
+ dev_dbg(&pdev->dev, "No RAS registers.\n");
+ return 0;
+ }
+
+ /* BIOS has CXL error control */
+ if (!host_bridge->native_cxl_error)
+ return -ENXIO;
+
+ rc = pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &cap);
+ if (rc)
+ return rc;
+
+ if (cap & PCI_EXP_DEVCTL_URRE) {
+ addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_MASK_OFFSET;
+ orig_val = readl(addr);
+
+ mask = CXL_RAS_UNCORRECTABLE_MASK_MASK;
+ if (!cxl_pci_flit_256(pdev))
+ mask &= ~CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK;
+ val = orig_val & ~mask;
+ writel(val, addr);
+ dev_dbg(&pdev->dev,
+ "Uncorrectable RAS Errors Mask: %#x -> %#x\n",
+ orig_val, val);
+ }
+
+ if (cap & PCI_EXP_DEVCTL_CERE) {
+ addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_MASK_OFFSET;
+ orig_val = readl(addr);
+ val = orig_val & ~CXL_RAS_CORRECTABLE_MASK_MASK;
+ writel(val, addr);
+ dev_dbg(&pdev->dev, "Correctable RAS Errors Mask: %#x -> %#x\n",
+ orig_val, val);
+ }
+
+ return 0;
+}
+
+static void free_event_buf(void *buf)
+{
+ kvfree(buf);
+}
+
+/*
+ * There is a single buffer for reading event logs from the mailbox. All logs
+ * share this buffer protected by the cxlds->event_log_lock.
+ */
+static int cxl_mem_alloc_event_buf(struct cxl_dev_state *cxlds)
+{
+ struct cxl_get_event_payload *buf;
+
+ buf = kvmalloc(cxlds->payload_size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ cxlds->event.buf = buf;
+
+ return devm_add_action_or_reset(cxlds->dev, free_event_buf, buf);
+}
+
+static int cxl_alloc_irq_vectors(struct pci_dev *pdev)
+{
+ int nvecs;
+
+ /*
+ * Per CXL 3.0 3.1.1 CXL.io Endpoint a function on a CXL device must
+ * not generate INTx messages if that function participates in
+ * CXL.cache or CXL.mem.
+ *
+ * Additionally pci_alloc_irq_vectors() handles calling
+ * pci_free_irq_vectors() automatically despite not being called
+ * pcim_*. See pci_setup_msi_context().
+ */
+ nvecs = pci_alloc_irq_vectors(pdev, 1, CXL_PCI_DEFAULT_MAX_VECTORS,
+ PCI_IRQ_MSIX | PCI_IRQ_MSI);
+ if (nvecs < 1) {
+ dev_dbg(&pdev->dev, "Failed to alloc irq vectors: %d\n", nvecs);
+ return -ENXIO;
+ }
+ return 0;
+}
+
+struct cxl_dev_id {
+ struct cxl_dev_state *cxlds;
+};
+
+static irqreturn_t cxl_event_thread(int irq, void *id)
+{
+ struct cxl_dev_id *dev_id = id;
+ struct cxl_dev_state *cxlds = dev_id->cxlds;
+ u32 status;
+
+ do {
+ /*
+ * CXL 3.0 8.2.8.3.1: The lower 32 bits are the status;
+ * ignore the reserved upper 32 bits
+ */
+ status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET);
+ /* Ignore logs unknown to the driver */
+ status &= CXLDEV_EVENT_STATUS_ALL;
+ if (!status)
+ break;
+ cxl_mem_get_event_records(cxlds, status);
+ cond_resched();
+ } while (status);
+
+ return IRQ_HANDLED;
+}
+
+static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting)
+{
+ struct device *dev = cxlds->dev;
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct cxl_dev_id *dev_id;
+ int irq;
+
+ if (FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting) != CXL_INT_MSI_MSIX)
+ return -ENXIO;
+
+ /* dev_id must be globally unique and must contain the cxlds */
+ dev_id = devm_kzalloc(dev, sizeof(*dev_id), GFP_KERNEL);
+ if (!dev_id)
+ return -ENOMEM;
+ dev_id->cxlds = cxlds;
+
+ irq = pci_irq_vector(pdev,
+ FIELD_GET(CXLDEV_EVENT_INT_MSGNUM_MASK, setting));
+ if (irq < 0)
+ return irq;
+
+ return devm_request_threaded_irq(dev, irq, NULL, cxl_event_thread,
+ IRQF_SHARED | IRQF_ONESHOT, NULL,
+ dev_id);
+}
+
+static int cxl_event_get_int_policy(struct cxl_dev_state *cxlds,
+ struct cxl_event_interrupt_policy *policy)
+{
+ struct cxl_mbox_cmd mbox_cmd = {
+ .opcode = CXL_MBOX_OP_GET_EVT_INT_POLICY,
+ .payload_out = policy,
+ .size_out = sizeof(*policy),
+ };
+ int rc;
+
+ rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+ if (rc < 0)
+ dev_err(cxlds->dev, "Failed to get event interrupt policy : %d",
+ rc);
+
+ return rc;
+}
+
+static int cxl_event_config_msgnums(struct cxl_dev_state *cxlds,
+ struct cxl_event_interrupt_policy *policy)
+{
+ struct cxl_mbox_cmd mbox_cmd;
+ int rc;
+
+ *policy = (struct cxl_event_interrupt_policy) {
+ .info_settings = CXL_INT_MSI_MSIX,
+ .warn_settings = CXL_INT_MSI_MSIX,
+ .failure_settings = CXL_INT_MSI_MSIX,
+ .fatal_settings = CXL_INT_MSI_MSIX,
+ };
+
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_SET_EVT_INT_POLICY,
+ .payload_in = policy,
+ .size_in = sizeof(*policy),
+ };
+
+ rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+ if (rc < 0) {
+ dev_err(cxlds->dev, "Failed to set event interrupt policy : %d",
+ rc);
+ return rc;
+ }
+
+ /* Retrieve final interrupt settings */
+ return cxl_event_get_int_policy(cxlds, policy);
+}
+
+static int cxl_event_irqsetup(struct cxl_dev_state *cxlds)
+{
+ struct cxl_event_interrupt_policy policy;
+ int rc;
+
+ rc = cxl_event_config_msgnums(cxlds, &policy);
+ if (rc)
+ return rc;
+
+ rc = cxl_event_req_irq(cxlds, policy.info_settings);
+ if (rc) {
+ dev_err(cxlds->dev, "Failed to get interrupt for event Info log\n");
+ return rc;
+ }
+
+ rc = cxl_event_req_irq(cxlds, policy.warn_settings);
+ if (rc) {
+ dev_err(cxlds->dev, "Failed to get interrupt for event Warn log\n");
+ return rc;
+ }
+
+ rc = cxl_event_req_irq(cxlds, policy.failure_settings);
+ if (rc) {
+ dev_err(cxlds->dev, "Failed to get interrupt for event Failure log\n");
+ return rc;
+ }
+
+ rc = cxl_event_req_irq(cxlds, policy.fatal_settings);
+ if (rc) {
+ dev_err(cxlds->dev, "Failed to get interrupt for event Fatal log\n");
+ return rc;
+ }
+
+ return 0;
+}
+
+static bool cxl_event_int_is_fw(u8 setting)
+{
+ u8 mode = FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting);
+
+ return mode == CXL_INT_FW;
+}
+
+static int cxl_event_config(struct pci_host_bridge *host_bridge,
+ struct cxl_dev_state *cxlds)
{
- pci_disable_pcie_error_reporting(pdev);
+ struct cxl_event_interrupt_policy policy;
+ int rc;
+
+ /*
+ * When BIOS maintains CXL error reporting control, it will process
+ * event records. Only one agent can do so.
+ */
+ if (!host_bridge->native_cxl_error)
+ return 0;
+
+ rc = cxl_mem_alloc_event_buf(cxlds);
+ if (rc)
+ return rc;
+
+ rc = cxl_event_get_int_policy(cxlds, &policy);
+ if (rc)
+ return rc;
+
+ if (cxl_event_int_is_fw(policy.info_settings) ||
+ cxl_event_int_is_fw(policy.warn_settings) ||
+ cxl_event_int_is_fw(policy.failure_settings) ||
+ cxl_event_int_is_fw(policy.fatal_settings)) {
+ dev_err(cxlds->dev, "FW still in control of Event Logs despite _OSC settings\n");
+ return -EBUSY;
+ }
+
+ rc = cxl_event_irqsetup(cxlds);
+ if (rc)
+ return rc;
+
+ cxl_mem_get_event_records(cxlds, CXLDEV_EVENT_STATUS_ALL);
+
+ return 0;
}
static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
+ struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus);
struct cxl_register_map map;
struct cxl_memdev *cxlmd;
struct cxl_dev_state *cxlds;
@@ -436,6 +716,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
rc = pcim_enable_device(pdev);
if (rc)
return rc;
+ pci_set_master(pdev);
cxlds = cxl_dev_state_create(&pdev->dev);
if (IS_ERR(cxlds))
@@ -484,6 +765,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
return rc;
+ rc = cxl_set_timestamp(cxlds);
+ if (rc)
+ return rc;
+
rc = cxl_dev_state_identify(cxlds);
if (rc)
return rc;
@@ -492,16 +777,22 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
return rc;
+ rc = cxl_alloc_irq_vectors(pdev);
+ if (rc)
+ return rc;
+
cxlmd = devm_cxl_add_memdev(cxlds);
if (IS_ERR(cxlmd))
return PTR_ERR(cxlmd);
- if (cxlds->regs.ras) {
- pci_enable_pcie_error_reporting(pdev);
- rc = devm_add_action_or_reset(&pdev->dev, disable_aer, pdev);
- if (rc)
- return rc;
- }
+ rc = cxl_event_config(host_bridge, cxlds);
+ if (rc)
+ return rc;
+
+ rc = cxl_pci_ras_unmask(pdev);
+ if (rc)
+ dev_dbg(&pdev->dev, "No RAS reporting unmasked\n");
+
pci_save_state(pdev);
return rc;
@@ -514,99 +805,6 @@ static const struct pci_device_id cxl_mem_pci_tbl[] = {
};
MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl);
-/* CXL spec rev3.0 8.2.4.16.1 */
-static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log)
-{
- void __iomem *addr;
- u32 *log_addr;
- int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32);
-
- addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET;
- log_addr = log;
-
- for (i = 0; i < log_u32_size; i++) {
- *log_addr = readl(addr);
- log_addr++;
- addr += sizeof(u32);
- }
-}
-
-/*
- * Log the state of the RAS status registers and prepare them to log the
- * next error status. Return 1 if reset needed.
- */
-static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
-{
- struct cxl_memdev *cxlmd = cxlds->cxlmd;
- struct device *dev = &cxlmd->dev;
- u32 hl[CXL_HEADERLOG_SIZE_U32];
- void __iomem *addr;
- u32 status;
- u32 fe;
-
- if (!cxlds->regs.ras)
- return false;
-
- addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
- status = readl(addr);
- if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
- return false;
-
- /* If multiple errors, log header points to first error from ctrl reg */
- if (hweight32(status) > 1) {
- void __iomem *rcc_addr =
- cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET;
-
- fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
- readl(rcc_addr)));
- } else {
- fe = status;
- }
-
- header_log_copy(cxlds, hl);
- trace_cxl_aer_uncorrectable_error(dev, status, fe, hl);
- writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
-
- return true;
-}
-
-static pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
- pci_channel_state_t state)
-{
- struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
- struct cxl_memdev *cxlmd = cxlds->cxlmd;
- struct device *dev = &cxlmd->dev;
- bool ue;
-
- /*
- * A frozen channel indicates an impending reset which is fatal to
- * CXL.mem operation, and will likely crash the system. On the off
- * chance the situation is recoverable dump the status of the RAS
- * capability registers and bounce the active state of the memdev.
- */
- ue = cxl_report_and_clear(cxlds);
-
- switch (state) {
- case pci_channel_io_normal:
- if (ue) {
- device_release_driver(dev);
- return PCI_ERS_RESULT_NEED_RESET;
- }
- return PCI_ERS_RESULT_CAN_RECOVER;
- case pci_channel_io_frozen:
- dev_warn(&pdev->dev,
- "%s: frozen state error detected, disable CXL.mem\n",
- dev_name(dev));
- device_release_driver(dev);
- return PCI_ERS_RESULT_NEED_RESET;
- case pci_channel_io_perm_failure:
- dev_warn(&pdev->dev,
- "failure state error detected, request disconnect\n");
- return PCI_ERS_RESULT_DISCONNECT;
- }
- return PCI_ERS_RESULT_NEED_RESET;
-}
-
static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev)
{
struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
@@ -631,25 +829,6 @@ static void cxl_error_resume(struct pci_dev *pdev)
dev->driver ? "successful" : "failed");
}
-static void cxl_cor_error_detected(struct pci_dev *pdev)
-{
- struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
- struct cxl_memdev *cxlmd = cxlds->cxlmd;
- struct device *dev = &cxlmd->dev;
- void __iomem *addr;
- u32 status;
-
- if (!cxlds->regs.ras)
- return;
-
- addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
- status = readl(addr);
- if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
- writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
- trace_cxl_aer_correctable_error(dev, status);
- }
-}
-
static const struct pci_error_handlers cxl_error_handlers = {
.error_detected = cxl_error_detected,
.slot_reset = cxl_slot_reset,