From fbaa38214cd9e150764ccaa82e04ecf42cc1140c Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 11 Mar 2023 15:40:01 +0100 Subject: cxl/pci: Fix CDAT retrieval on big endian The CDAT exposed in sysfs differs between little endian and big endian arches: On big endian, every 4 bytes are byte-swapped. PCI Configuration Space is little endian (PCI r3.0 sec 6.1). Accessors such as pci_read_config_dword() implicitly swap bytes on big endian. That way, the macros in include/uapi/linux/pci_regs.h work regardless of the arch's endianness. For an example of implicit byte-swapping, see ppc4xx_pciex_read_config(), which calls in_le32(), which uses lwbrx (Load Word Byte-Reverse Indexed). DOE Read/Write Data Mailbox Registers are unlike other registers in Configuration Space in that they contain or receive a 4 byte portion of an opaque byte stream (a "Data Object" per PCIe r6.0 sec 7.9.24.5f). They need to be copied to or from the request/response buffer verbatim. So amend pci_doe_send_req() and pci_doe_recv_resp() to undo the implicit byte-swapping. The CXL_DOE_TABLE_ACCESS_* and PCI_DOE_DATA_OBJECT_DISC_* macros assume implicit byte-swapping. Byte-swap requests after constructing them with those macros and byte-swap responses before parsing them. Change the request and response type to __le32 to avoid sparse warnings. Per a request from Jonathan, replace sizeof(u32) with sizeof(__le32) for consistency. Fixes: c97006046c79 ("cxl/port: Read CDAT table") Tested-by: Ira Weiny Signed-off-by: Lukas Wunner Reviewed-by: Dan Williams Cc: stable@vger.kernel.org # v6.0+ Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/3051114102f41d19df3debbee123129118fc5e6d.1678543498.git.lukas@wunner.de Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 26 +++++++++++++------------- drivers/pci/doe.c | 25 ++++++++++++++----------- include/linux/pci-doe.h | 8 ++++++-- 3 files changed, 33 insertions(+), 26 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 7328a2552411..49a99a84b6aa 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -462,7 +462,7 @@ static struct pci_doe_mb *find_cdat_doe(struct device *uport) return NULL; } -#define CDAT_DOE_REQ(entry_handle) \ +#define CDAT_DOE_REQ(entry_handle) cpu_to_le32 \ (FIELD_PREP(CXL_DOE_TABLE_ACCESS_REQ_CODE, \ CXL_DOE_TABLE_ACCESS_REQ_CODE_READ) | \ FIELD_PREP(CXL_DOE_TABLE_ACCESS_TABLE_TYPE, \ @@ -475,8 +475,8 @@ static void cxl_doe_task_complete(struct pci_doe_task *task) } struct cdat_doe_task { - u32 request_pl; - u32 response_pl[32]; + __le32 request_pl; + __le32 response_pl[32]; struct completion c; struct pci_doe_task task; }; @@ -510,10 +510,10 @@ static int cxl_cdat_get_length(struct device *dev, return rc; } wait_for_completion(&t.c); - if (t.task.rv < sizeof(u32)) + if (t.task.rv < sizeof(__le32)) return -EIO; - *length = t.response_pl[1]; + *length = le32_to_cpu(t.response_pl[1]); dev_dbg(dev, "CDAT length %zu\n", *length); return 0; @@ -524,13 +524,13 @@ static int cxl_cdat_read_table(struct device *dev, struct cxl_cdat *cdat) { size_t length = cdat->length; - u32 *data = cdat->table; + __le32 *data = cdat->table; int entry_handle = 0; do { DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(entry_handle), t); size_t entry_dw; - u32 *entry; + __le32 *entry; int rc; rc = pci_doe_submit_task(cdat_doe, &t.task); @@ -540,21 +540,21 @@ static int cxl_cdat_read_table(struct device *dev, } wait_for_completion(&t.c); /* 1 DW header + 1 DW data min */ - if (t.task.rv < (2 * sizeof(u32))) + if (t.task.rv < (2 * sizeof(__le32))) return -EIO; /* Get the CXL table access header entry handle */ entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, - t.response_pl[0]); + le32_to_cpu(t.response_pl[0])); entry = t.response_pl + 1; - entry_dw = t.task.rv / sizeof(u32); + entry_dw = t.task.rv / sizeof(__le32); /* Skip Header */ entry_dw -= 1; - entry_dw = min(length / sizeof(u32), entry_dw); + entry_dw = min(length / sizeof(__le32), entry_dw); /* Prevent length < 1 DW from causing a buffer overflow */ if (entry_dw) { - memcpy(data, entry, entry_dw * sizeof(u32)); - length -= entry_dw * sizeof(u32); + memcpy(data, entry, entry_dw * sizeof(__le32)); + length -= entry_dw * sizeof(__le32); data += entry_dw; } } while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY); diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c index 66d9ab288646..6f097932ccbf 100644 --- a/drivers/pci/doe.c +++ b/drivers/pci/doe.c @@ -128,7 +128,7 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb, return -EIO; /* Length is 2 DW of header + length of payload in DW */ - length = 2 + task->request_pl_sz / sizeof(u32); + length = 2 + task->request_pl_sz / sizeof(__le32); if (length > PCI_DOE_MAX_LENGTH) return -EIO; if (length == PCI_DOE_MAX_LENGTH) @@ -141,9 +141,9 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb, pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH, length)); - for (i = 0; i < task->request_pl_sz / sizeof(u32); i++) + for (i = 0; i < task->request_pl_sz / sizeof(__le32); i++) pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, - task->request_pl[i]); + le32_to_cpu(task->request_pl[i])); pci_doe_write_ctrl(doe_mb, PCI_DOE_CTRL_GO); @@ -195,11 +195,11 @@ static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *tas /* First 2 dwords have already been read */ length -= 2; - payload_length = min(length, task->response_pl_sz / sizeof(u32)); + payload_length = min(length, task->response_pl_sz / sizeof(__le32)); /* Read the rest of the response payload */ for (i = 0; i < payload_length; i++) { - pci_read_config_dword(pdev, offset + PCI_DOE_READ, - &task->response_pl[i]); + pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val); + task->response_pl[i] = cpu_to_le32(val); /* Prior to the last ack, ensure Data Object Ready */ if (i == (payload_length - 1) && !pci_doe_data_obj_ready(doe_mb)) return -EIO; @@ -217,7 +217,7 @@ static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *tas if (FIELD_GET(PCI_DOE_STATUS_ERROR, val)) return -EIO; - return min(length, task->response_pl_sz / sizeof(u32)) * sizeof(u32); + return min(length, task->response_pl_sz / sizeof(__le32)) * sizeof(__le32); } static void signal_task_complete(struct pci_doe_task *task, int rv) @@ -317,14 +317,16 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid, { u32 request_pl = FIELD_PREP(PCI_DOE_DATA_OBJECT_DISC_REQ_3_INDEX, *index); + __le32 request_pl_le = cpu_to_le32(request_pl); + __le32 response_pl_le; u32 response_pl; DECLARE_COMPLETION_ONSTACK(c); struct pci_doe_task task = { .prot.vid = PCI_VENDOR_ID_PCI_SIG, .prot.type = PCI_DOE_PROTOCOL_DISCOVERY, - .request_pl = &request_pl, + .request_pl = &request_pl_le, .request_pl_sz = sizeof(request_pl), - .response_pl = &response_pl, + .response_pl = &response_pl_le, .response_pl_sz = sizeof(response_pl), .complete = pci_doe_task_complete, .private = &c, @@ -340,6 +342,7 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid, if (task.rv != sizeof(response_pl)) return -EIO; + response_pl = le32_to_cpu(response_pl_le); *vid = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_VID, response_pl); *protocol = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL, response_pl); @@ -533,8 +536,8 @@ int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task) * DOE requests must be a whole number of DW and the response needs to * be big enough for at least 1 DW */ - if (task->request_pl_sz % sizeof(u32) || - task->response_pl_sz < sizeof(u32)) + if (task->request_pl_sz % sizeof(__le32) || + task->response_pl_sz < sizeof(__le32)) return -EINVAL; if (test_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags)) diff --git a/include/linux/pci-doe.h b/include/linux/pci-doe.h index ed9b4df792b8..43765eaf2342 100644 --- a/include/linux/pci-doe.h +++ b/include/linux/pci-doe.h @@ -34,6 +34,10 @@ struct pci_doe_mb; * @work: Used internally by the mailbox * @doe_mb: Used internally by the mailbox * + * Payloads are treated as opaque byte streams which are transmitted verbatim, + * without byte-swapping. If payloads contain little-endian register values, + * the caller is responsible for conversion with cpu_to_le32() / le32_to_cpu(). + * * The payload sizes and rv are specified in bytes with the following * restrictions concerning the protocol. * @@ -45,9 +49,9 @@ struct pci_doe_mb; */ struct pci_doe_task { struct pci_doe_protocol prot; - u32 *request_pl; + __le32 *request_pl; size_t request_pl_sz; - u32 *response_pl; + __le32 *response_pl; size_t response_pl_sz; int rv; void (*complete)(struct pci_doe_task *task); -- cgit v1.2.3 From 34bafc747c54fb58c1908ec3116fa6137393e596 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 11 Mar 2023 15:40:02 +0100 Subject: cxl/pci: Handle truncated CDAT header cxl_cdat_get_length() only checks whether the DOE response size is sufficient for the Table Access response header (1 dword), but not the succeeding CDAT header (1 dword length plus other fields). It thus returns whatever uninitialized memory happens to be on the stack if a truncated DOE response with only 1 dword was received. Fix it. Fixes: c97006046c79 ("cxl/port: Read CDAT table") Reported-by: Ming Li Tested-by: Ira Weiny Signed-off-by: Lukas Wunner Reviewed-by: Ming Li Reviewed-by: Dan Williams Reviewed-by: Jonathan Cameron Cc: stable@vger.kernel.org # v6.0+ Reviewed-by: Kuppuswamy Sathyanarayanan Link: https://lore.kernel.org/r/000e69cd163461c8b1bc2cf4155b6e25402c29c7.1678543498.git.lukas@wunner.de Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 49a99a84b6aa..87da8c935185 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -510,7 +510,7 @@ static int cxl_cdat_get_length(struct device *dev, return rc; } wait_for_completion(&t.c); - if (t.task.rv < sizeof(__le32)) + if (t.task.rv < 2 * sizeof(__le32)) return -EIO; *length = le32_to_cpu(t.response_pl[1]); -- cgit v1.2.3 From b56faef2312057db20479b240eb71bd2e51fb51c Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 11 Mar 2023 15:40:03 +0100 Subject: cxl/pci: Handle truncated CDAT entries If truncated CDAT entries are received from a device, the concatenation of those entries constitutes a corrupt CDAT, yet is happily exposed to user space. Avoid by verifying response lengths and erroring out if truncation is detected. The last CDAT entry may still be truncated despite the checks introduced herein if the length in the CDAT header is too small. However, that is easily detectable by user space because it reaches EOF prematurely. A subsequent commit which rightsizes the CDAT response allocation closes that remaining loophole. The two lines introduced here which exceed 80 chars are shortened to less than 80 chars by a subsequent commit which migrates to a synchronous DOE API and replaces "t.task.rv" by "rc". The existing acpi_cdat_header and acpi_table_cdat struct definitions provided by ACPICA cannot be used because they do not employ __le16 or __le32 types. I believe that cannot be changed because those types are Linux-specific and ACPI is specified for little endian platforms only, hence doesn't care about endianness. So duplicate the structs. Fixes: c97006046c79 ("cxl/port: Read CDAT table") Tested-by: Ira Weiny Signed-off-by: Lukas Wunner Reviewed-by: Dan Williams Reviewed-by: Jonathan Cameron Cc: stable@vger.kernel.org # v6.0+ Link: https://lore.kernel.org/r/bce3aebc0e8e18a1173425a7a865b232c3912963.1678543498.git.lukas@wunner.de Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 13 +++++++++---- drivers/cxl/cxlpci.h | 14 ++++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 87da8c935185..fb600dfbf5a6 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -529,8 +529,8 @@ static int cxl_cdat_read_table(struct device *dev, do { DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(entry_handle), t); + struct cdat_entry_header *entry; size_t entry_dw; - __le32 *entry; int rc; rc = pci_doe_submit_task(cdat_doe, &t.task); @@ -539,14 +539,19 @@ static int cxl_cdat_read_table(struct device *dev, return rc; } wait_for_completion(&t.c); - /* 1 DW header + 1 DW data min */ - if (t.task.rv < (2 * sizeof(__le32))) + + /* 1 DW Table Access Response Header + CDAT entry */ + entry = (struct cdat_entry_header *)(t.response_pl + 1); + if ((entry_handle == 0 && + t.task.rv != sizeof(__le32) + sizeof(struct cdat_header)) || + (entry_handle > 0 && + (t.task.rv < sizeof(__le32) + sizeof(*entry) || + t.task.rv != sizeof(__le32) + le16_to_cpu(entry->length)))) return -EIO; /* Get the CXL table access header entry handle */ entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, le32_to_cpu(t.response_pl[0])); - entry = t.response_pl + 1; entry_dw = t.task.rv / sizeof(__le32); /* Skip Header */ entry_dw -= 1; diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index be6a2ef3cce3..0465ef963cd6 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -68,6 +68,20 @@ enum cxl_regloc_type { CXL_REGLOC_RBI_TYPES }; +struct cdat_header { + __le32 length; + u8 revision; + u8 checksum; + u8 reserved[6]; + __le32 sequence; +} __packed; + +struct cdat_entry_header { + u8 type; + u8 reserved; + __le16 length; +} __packed; + int devm_cxl_port_enumerate_dports(struct cxl_port *port); struct cxl_dev_state; int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, -- cgit v1.2.3 From 4fe2c13d59d849be3b45371e3913ec5dc77fc0fb Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 11 Mar 2023 15:40:04 +0100 Subject: cxl/pci: Handle excessive CDAT length If the length in the CDAT header is larger than the concatenation of the header and all table entries, then the CDAT exposed to user space contains trailing null bytes. Not every consumer may be able to handle that. Per Postel's robustness principle, "be liberal in what you accept" and silently reduce the cached length to avoid exposing those null bytes. Fixes: c97006046c79 ("cxl/port: Read CDAT table") Tested-by: Ira Weiny Signed-off-by: Lukas Wunner Reviewed-by: Dan Williams Reviewed-by: Jonathan Cameron Cc: stable@vger.kernel.org # v6.0+ Link: https://lore.kernel.org/r/6d98b3c7da5343172bd3ccabfabbc1f31c079d74.1678543498.git.lukas@wunner.de Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index fb600dfbf5a6..523d5b9fd7fc 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -564,6 +564,9 @@ static int cxl_cdat_read_table(struct device *dev, } } while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY); + /* Length in CDAT header may exceed concatenation of CDAT entries */ + cdat->length -= length; + return 0; } -- cgit v1.2.3 From 92dc899c3b4927f3cfa23f55bf759171234b5802 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 11 Mar 2023 15:40:05 +0100 Subject: PCI/DOE: Silence WARN splat with CONFIG_DEBUG_OBJECTS=y Gregory Price reports a WARN splat with CONFIG_DEBUG_OBJECTS=y upon CXL probing because pci_doe_submit_task() invokes INIT_WORK() instead of INIT_WORK_ONSTACK() for a work_struct that was allocated on the stack. All callers of pci_doe_submit_task() allocate the work_struct on the stack, so replace INIT_WORK() with INIT_WORK_ONSTACK() as a backportable short-term fix. The long-term fix implemented by a subsequent commit is to move to a synchronous API which allocates the work_struct internally in the DOE library. Stacktrace for posterity: WARNING: CPU: 0 PID: 23 at lib/debugobjects.c:545 __debug_object_init.cold+0x18/0x183 CPU: 0 PID: 23 Comm: kworker/u2:1 Not tainted 6.1.0-0.rc1.20221019gitaae703b02f92.17.fc38.x86_64 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: pci_doe_submit_task+0x5d/0xd0 pci_doe_discovery+0xb4/0x100 pcim_doe_create_mb+0x219/0x290 cxl_pci_probe+0x192/0x430 local_pci_probe+0x41/0x80 pci_device_probe+0xb3/0x220 really_probe+0xde/0x380 __driver_probe_device+0x78/0x170 driver_probe_device+0x1f/0x90 __driver_attach_async_helper+0x5c/0xe0 async_run_entry_fn+0x30/0x130 process_one_work+0x294/0x5b0 Fixes: 9d24322e887b ("PCI/DOE: Add DOE mailbox support functions") Link: https://lore.kernel.org/linux-cxl/Y1bOniJliOFszvIK@memverge.com/ Reported-by: Gregory Price Tested-by: Ira Weiny Tested-by: Gregory Price Signed-off-by: Lukas Wunner Reviewed-by: Ira Weiny Reviewed-by: Dan Williams Reviewed-by: Gregory Price Cc: stable@vger.kernel.org # v6.0+ Reviewed-by: Jonathan Cameron Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/67a9117f463ecdb38a2dbca6a20391ce2f1e7a06.1678543498.git.lukas@wunner.de Signed-off-by: Dan Williams --- drivers/pci/doe.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c index 6f097932ccbf..c14ffdf23f87 100644 --- a/drivers/pci/doe.c +++ b/drivers/pci/doe.c @@ -523,6 +523,8 @@ EXPORT_SYMBOL_GPL(pci_doe_supports_prot); * task->complete will be called when the state machine is done processing this * task. * + * @task must be allocated on the stack. + * * Excess data will be discarded. * * RETURNS: 0 when task has been successfully queued, -ERRNO on error @@ -544,7 +546,7 @@ int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task) return -EIO; task->doe_mb = doe_mb; - INIT_WORK(&task->work, doe_statemachine_work); + INIT_WORK_ONSTACK(&task->work, doe_statemachine_work); queue_work(doe_mb->work_queue, &task->work); return 0; } -- cgit v1.2.3 From abf04be0e7071f2bcd39bf97ba407e7d4439785e Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 11 Mar 2023 15:40:06 +0100 Subject: PCI/DOE: Fix memory leak with CONFIG_DEBUG_OBJECTS=y After a pci_doe_task completes, its work_struct needs to be destroyed to avoid a memory leak with CONFIG_DEBUG_OBJECTS=y. Fixes: 9d24322e887b ("PCI/DOE: Add DOE mailbox support functions") Tested-by: Ira Weiny Signed-off-by: Lukas Wunner Reviewed-by: Ira Weiny Reviewed-by: Davidlohr Bueso Reviewed-by: Dan Williams Reviewed-by: Jonathan Cameron Cc: stable@vger.kernel.org # v6.0+ Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/775768b4912531c3b887d405fc51a50e465e1bf9.1678543498.git.lukas@wunner.de Signed-off-by: Dan Williams --- drivers/pci/doe.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c index c14ffdf23f87..e5e9b287b976 100644 --- a/drivers/pci/doe.c +++ b/drivers/pci/doe.c @@ -224,6 +224,7 @@ static void signal_task_complete(struct pci_doe_task *task, int rv) { task->rv = rv; task->complete(task); + destroy_work_on_stack(&task->work); } static void signal_task_abort(struct pci_doe_task *task, int rv) -- cgit v1.2.3 From 82f0832af26a30ae5f21b335c5f68b538e710c29 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 4 Apr 2023 15:34:12 -0700 Subject: cxl/hdm: Fix double allocation of @cxlhdm devm_cxl_setup_emulated_hdm() reallocates an instance of @cxlhdm that was already allocated at the start of devm_cxl_setup_hdm(). Only one is needed and devm_cxl_setup_emulated_hdm() does not do enough to warrant being an explicit helper. Fixes: 4474ce565ee4 ("cxl/hdm: Create emulated cxl_hdm for devices that do not have HDM decoders") Tested-by: Dave Jiang Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/167703067936.185722.7908921750127154779.stgit@dwillia2-xfh.jf.intel.com Link: https://lore.kernel.org/r/168012574357.221280.5001364964799725366.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/hdm.c | 34 ++++++---------------------------- 1 file changed, 6 insertions(+), 28 deletions(-) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 45deda18ed32..038f88eae226 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -101,27 +101,6 @@ static int map_hdm_decoder_regs(struct cxl_port *port, void __iomem *crb, BIT(CXL_CM_CAP_CAP_ID_HDM)); } -static struct cxl_hdm *devm_cxl_setup_emulated_hdm(struct cxl_port *port, - struct cxl_endpoint_dvsec_info *info) -{ - struct device *dev = &port->dev; - struct cxl_hdm *cxlhdm; - - if (!info->mem_enabled) - return ERR_PTR(-ENODEV); - - cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL); - if (!cxlhdm) - return ERR_PTR(-ENOMEM); - - cxlhdm->port = port; - cxlhdm->decoder_count = info->ranges; - cxlhdm->target_count = info->ranges; - dev_set_drvdata(&port->dev, cxlhdm); - - return cxlhdm; -} - /** * devm_cxl_setup_hdm - map HDM decoder component registers * @port: cxl_port to map @@ -138,13 +117,14 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL); if (!cxlhdm) return ERR_PTR(-ENOMEM); - cxlhdm->port = port; - crb = ioremap(port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE); - if (!crb) { - if (info && info->mem_enabled) - return devm_cxl_setup_emulated_hdm(port, info); + dev_set_drvdata(dev, cxlhdm); + crb = ioremap(port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE); + if (!crb && info && info->mem_enabled) { + cxlhdm->decoder_count = info->ranges; + return cxlhdm; + } else if (!crb) { dev_err(dev, "No component registers mapped\n"); return ERR_PTR(-ENXIO); } @@ -160,8 +140,6 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, return ERR_PTR(-ENXIO); } - dev_set_drvdata(dev, cxlhdm); - return cxlhdm; } EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_hdm, CXL); -- cgit v1.2.3 From b70c2cf95ee1ca2806cb7191504920f8f5b4454e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 3 Apr 2023 14:33:48 -0700 Subject: cxl/hdm: Skip emulation when driver manages mem_enable If the driver is allowed to enable memory operation itself then it can also turn on HDM decoder support at will. With this the second call to cxl_setup_hdm_decoder_from_dvsec(), when an HDM decoder is not committed, is not needed. Fixes: b777e9bec960 ("cxl/hdm: Emulate HDM decoder from DVSEC range registers") Link: http://lore.kernel.org/r/20230220113657.000042e1@huawei.com Reported-by: Jonathan Cameron Tested-by: Jonathan Cameron Reviewed-by: Jonathan Cameron Reviewed-by: Fan Ni Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/167703068474.185722.664126485486344246.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/hdm.c | 31 ++++++++++++++++++------------- drivers/cxl/cxl.h | 4 +++- drivers/cxl/port.c | 2 +- 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 038f88eae226..cc123996b1a4 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -717,19 +717,29 @@ static int cxl_setup_hdm_decoder_from_dvsec(struct cxl_port *port, return 0; } -static bool should_emulate_decoders(struct cxl_port *port) +static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) { - struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev); - void __iomem *hdm = cxlhdm->regs.hdm_decoder; + struct cxl_hdm *cxlhdm; + void __iomem *hdm; u32 ctrl; int i; - if (!is_cxl_endpoint(cxlhdm->port)) + if (!info) return false; + cxlhdm = dev_get_drvdata(&info->port->dev); + hdm = cxlhdm->regs.hdm_decoder; + if (!hdm) return true; + /* + * If HDM decoders are present and the driver is in control of + * Mem_Enable skip DVSEC based emulation + */ + if (!info->mem_enabled) + return false; + /* * If any decoders are committed already, there should not be any * emulated DVSEC decoders. @@ -747,7 +757,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, int *target_map, void __iomem *hdm, int which, u64 *dpa_base, struct cxl_endpoint_dvsec_info *info) { - struct cxl_endpoint_decoder *cxled = NULL; + struct cxl_endpoint_decoder *cxled; u64 size, base, skip, dpa_size; bool committed; u32 remainder; @@ -758,12 +768,9 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, unsigned char target_id[8]; } target_list; - if (should_emulate_decoders(port)) + if (should_emulate_decoders(info)) return cxl_setup_hdm_decoder_from_dvsec(port, cxld, which, info); - if (is_endpoint_decoder(&cxld->dev)) - cxled = to_cxl_endpoint_decoder(&cxld->dev); - ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which)); base = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which)); size = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which)); @@ -784,9 +791,6 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, .end = base + size - 1, }; - if (cxled && !committed && range_len(&info->dvsec_range[which])) - return cxl_setup_hdm_decoder_from_dvsec(port, cxld, which, info); - /* decoders are enabled if committed */ if (committed) { cxld->flags |= CXL_DECODER_F_ENABLE; @@ -824,7 +828,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, if (rc) return rc; - if (!cxled) { + if (!info) { target_list.value = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_TL_LOW(which)); for (i = 0; i < cxld->interleave_ways; i++) @@ -844,6 +848,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, return -ENXIO; } skip = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SKIP_LOW(which)); + cxled = to_cxl_endpoint_decoder(&cxld->dev); rc = devm_cxl_dpa_reserve(cxled, *dpa_base + skip, dpa_size, skip); if (rc) { dev_err(&port->dev, diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index f2b0962a552d..aab87d74474d 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -695,13 +695,15 @@ int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint) /** * struct cxl_endpoint_dvsec_info - Cached DVSEC info - * @mem_enabled: cached value of mem_enabled in the DVSEC, PCIE_DEVICE + * @mem_enabled: cached value of mem_enabled in the DVSEC at init time * @ranges: Number of active HDM ranges this device uses. + * @port: endpoint port associated with this info instance * @dvsec_range: cached attributes of the ranges in the DVSEC, PCIE_DEVICE */ struct cxl_endpoint_dvsec_info { bool mem_enabled; int ranges; + struct cxl_port *port; struct range dvsec_range[2]; }; diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 1049bb5ea496..9c8f46ed336b 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -78,8 +78,8 @@ static int cxl_switch_port_probe(struct cxl_port *port) static int cxl_endpoint_port_probe(struct cxl_port *port) { + struct cxl_endpoint_dvsec_info info = { .port = port }; struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport); - struct cxl_endpoint_dvsec_info info = { 0 }; struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_hdm *cxlhdm; struct cxl_port *root; -- cgit v1.2.3 From d35b495ddf92c964eedf2ac86fdbf88dc3e5cbc9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 3 Apr 2023 14:39:16 -0700 Subject: cxl/port: Fix find_cxl_root() for RCDs and simplify it The find_cxl_root() helper is used to lookup root decoders and other CXL platform topology information for a given endpoint. It turns out that for RCDs it has never worked. The result of find_cxl_root(&cxlmd->dev) is always NULL for the RCH topology case because it expects to find a cxl_port at the host-bridge. RCH topologies only have the root cxl_port object with the host-bridge as a dport. While there are no reports of this being a problem to date, by inspection region enumeration should crash as a result of this problem, and it does in a local unit test for this scenario. However, an observation that ever since: commit f17b558d6663 ("cxl/pmem: Refactor nvdimm device registration, delete the workqueue") ...all callers of find_cxl_root() occur after the memdev connection to the port topology has been established. That means that find_cxl_root() can be simplified to a walk of the endpoint port topology to the root. Switch to that arrangement which also fixes the RCD bug. Fixes: a32320b71f08 ("cxl/region: Add region autodiscovery") Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/168002857715.50647.344876437247313909.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/pmem.c | 6 +++--- drivers/cxl/core/port.c | 38 +++++++------------------------------- drivers/cxl/core/region.c | 2 +- drivers/cxl/cxl.h | 4 ++-- drivers/cxl/port.c | 2 +- 5 files changed, 14 insertions(+), 38 deletions(-) diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index c2e4b1093788..f8c38d997252 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -62,9 +62,9 @@ static int match_nvdimm_bridge(struct device *dev, void *data) return is_cxl_nvdimm_bridge(dev); } -struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *start) +struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd) { - struct cxl_port *port = find_cxl_root(start); + struct cxl_port *port = find_cxl_root(dev_get_drvdata(&cxlmd->dev)); struct device *dev; if (!port) @@ -253,7 +253,7 @@ int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd) struct device *dev; int rc; - cxl_nvb = cxl_find_nvdimm_bridge(&cxlmd->dev); + cxl_nvb = cxl_find_nvdimm_bridge(cxlmd); if (!cxl_nvb) return -ENODEV; diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 8ee6b6e2e2a4..4d1f9c5b5029 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -823,41 +823,17 @@ static bool dev_is_cxl_root_child(struct device *dev) return false; } -/* Find a 2nd level CXL port that has a dport that is an ancestor of @match */ -static int match_root_child(struct device *dev, const void *match) +struct cxl_port *find_cxl_root(struct cxl_port *port) { - const struct device *iter = NULL; - struct cxl_dport *dport; - struct cxl_port *port; - - if (!dev_is_cxl_root_child(dev)) - return 0; - - port = to_cxl_port(dev); - iter = match; - while (iter) { - dport = cxl_find_dport_by_dev(port, iter); - if (dport) - break; - iter = iter->parent; - } - - return !!iter; -} + struct cxl_port *iter = port; -struct cxl_port *find_cxl_root(struct device *dev) -{ - struct device *port_dev; - struct cxl_port *root; + while (iter && !is_cxl_root(iter)) + iter = to_cxl_port(iter->dev.parent); - port_dev = bus_find_device(&cxl_bus_type, NULL, dev, match_root_child); - if (!port_dev) + if (!iter) return NULL; - - root = to_cxl_port(port_dev->parent); - get_device(&root->dev); - put_device(port_dev); - return root; + get_device(&iter->dev); + return iter; } EXPORT_SYMBOL_NS_GPL(find_cxl_root, CXL); diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index f29028148806..808f23ec4e2b 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2251,7 +2251,7 @@ static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr) * bridge for one device is the same for all. */ if (i == 0) { - cxl_nvb = cxl_find_nvdimm_bridge(&cxlmd->dev); + cxl_nvb = cxl_find_nvdimm_bridge(cxlmd); if (!cxl_nvb) { cxlr_pmem = ERR_PTR(-ENODEV); goto out; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index aab87d74474d..044a92d9813e 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -658,7 +658,7 @@ struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port); struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, resource_size_t component_reg_phys, struct cxl_dport *parent_dport); -struct cxl_port *find_cxl_root(struct device *dev); +struct cxl_port *find_cxl_root(struct cxl_port *port); int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd); void cxl_bus_rescan(void); void cxl_bus_drain(void); @@ -760,7 +760,7 @@ struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev); bool is_cxl_nvdimm(struct device *dev); bool is_cxl_nvdimm_bridge(struct device *dev); int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd); -struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *dev); +struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd); #ifdef CONFIG_CXL_REGION bool is_cxl_pmem_region(struct device *dev); diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 9c8f46ed336b..22a7ab2bae7c 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -119,7 +119,7 @@ static int cxl_endpoint_port_probe(struct cxl_port *port) * This can't fail in practice as CXL root exit unregisters all * descendant ports and that in turn synchronizes with cxl_port_probe() */ - root = find_cxl_root(&cxlmd->dev); + root = find_cxl_root(port); /* * Now that all endpoint decoders are successfully enumerated, try to -- cgit v1.2.3 From 030f880342b875c7d714d06d3ca4058ae9f13fee Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 3 Apr 2023 14:44:41 -0700 Subject: cxl/region: Fix region setup/teardown for RCDs RCDs (CXL memory devices that link train without VH capability and show up as root complex integrated endpoints), hide the presence of the link between the endpoint and the host-bridge. The CXL region setup/teardown paths assume that a link hop is present and go looking for at least one 'struct cxl_port' instance between the CXL root port-object and an endpoint port-object leading to crashes of the form: BUG: kernel NULL pointer dereference, address: 0000000000000008 [..] RIP: 0010:cxl_region_setup_targets+0x3e9/0xae0 [cxl_core] [..] Call Trace: cxl_region_attach+0x46c/0x7a0 [cxl_core] cxl_create_region+0x20b/0x270 [cxl_core] cxl_mock_mem_probe+0x641/0x800 [cxl_mock_mem] platform_probe+0x5b/0xb0 Detect RCDs explicitly and skip walking the non-existent port hierarchy between root and endpoint in that case. While this has been a problem since: commit 0a19bfc8de93 ("cxl/port: Add RCD endpoint port enumeration") ...it becomes a more reliable crash scenario with the new autodiscovery implementation. Fixes: a32320b71f08 ("cxl/region: Add region autodiscovery") Reviewed-by: Ira Weiny Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/168002858268.50647.728091521032131326.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 808f23ec4e2b..52bbf6268d5f 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -134,9 +134,13 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count) struct cxl_endpoint_decoder *cxled = p->targets[i]; struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); struct cxl_port *iter = cxled_to_port(cxled); + struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_ep *ep; int rc = 0; + if (cxlds->rcd) + goto endpoint_reset; + while (!is_cxl_root(to_cxl_port(iter->dev.parent))) iter = to_cxl_port(iter->dev.parent); @@ -153,6 +157,7 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count) return rc; } +endpoint_reset: rc = cxled->cxld.reset(&cxled->cxld); if (rc) return rc; @@ -1199,6 +1204,7 @@ static void cxl_region_teardown_targets(struct cxl_region *cxlr) { struct cxl_region_params *p = &cxlr->params; struct cxl_endpoint_decoder *cxled; + struct cxl_dev_state *cxlds; struct cxl_memdev *cxlmd; struct cxl_port *iter; struct cxl_ep *ep; @@ -1214,6 +1220,10 @@ static void cxl_region_teardown_targets(struct cxl_region *cxlr) for (i = 0; i < p->nr_targets; i++) { cxled = p->targets[i]; cxlmd = cxled_to_memdev(cxled); + cxlds = cxlmd->cxlds; + + if (cxlds->rcd) + continue; iter = cxled_to_port(cxled); while (!is_cxl_root(to_cxl_port(iter->dev.parent))) @@ -1229,14 +1239,24 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr) { struct cxl_region_params *p = &cxlr->params; struct cxl_endpoint_decoder *cxled; + struct cxl_dev_state *cxlds; + int i, rc, rch = 0, vh = 0; struct cxl_memdev *cxlmd; struct cxl_port *iter; struct cxl_ep *ep; - int i, rc; for (i = 0; i < p->nr_targets; i++) { cxled = p->targets[i]; cxlmd = cxled_to_memdev(cxled); + cxlds = cxlmd->cxlds; + + /* validate that all targets agree on topology */ + if (!cxlds->rcd) { + vh++; + } else { + rch++; + continue; + } iter = cxled_to_port(cxled); while (!is_cxl_root(to_cxl_port(iter->dev.parent))) @@ -1256,6 +1276,12 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr) } } + if (rch && vh) { + dev_err(&cxlr->dev, "mismatched CXL topologies detected\n"); + cxl_region_teardown_targets(cxlr); + return -ENXIO; + } + return 0; } -- cgit v1.2.3 From 9ff3eec958cf365857ae8a630237ece4f83bb337 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 3 Apr 2023 15:01:20 -0700 Subject: cxl/region: Move coherence tracking into cxl_region_attach() Each time the contents of a given HPA are potentially changed in a cache incoherent manner the CXL core sets CXL_REGION_F_INCOHERENT to invalidate CPU caches before the region is used. Successful invocation of attach_target() indicates that DPA has been newly assigned to a given HPA in the dynamic region creation flow. However, attach_target() is also reused in the autodiscovery flow where the region was activated by platform firmware. In that case there is no need to invalidate caches because that region is already in active use and nothing about the autodiscovery flow modifies the HPA-to-DPA relationship. In the autodiscovery case cxl_region_attach() exits early after determining the endpoint decoder is already correctly attached to the region. Fixes: a32320b71f08 ("cxl/region: Add region autodiscovery") Reviewed-by: Fan Ni Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/168002858817.50647.1217607907088920888.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 52bbf6268d5f..b2fd67fcebfb 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1674,6 +1674,7 @@ static int cxl_region_attach(struct cxl_region *cxlr, if (rc) goto err_decrement; p->state = CXL_CONFIG_ACTIVE; + set_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags); } cxled->cxld.interleave_ways = p->interleave_ways; @@ -1775,8 +1776,6 @@ static int attach_target(struct cxl_region *cxlr, down_read(&cxl_dpa_rwsem); rc = cxl_region_attach(cxlr, cxled, pos); - if (rc == 0) - set_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags); up_read(&cxl_dpa_rwsem); up_write(&cxl_region_rwsem); return rc; -- cgit v1.2.3 From 52cc48ad2a76a5fe82d239044d67944bbb928de6 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 3 Apr 2023 15:13:37 -0700 Subject: cxl/hdm: Limit emulation to the number of range registers Recall that range register emulation seeks to treat the 2 potential range registers as Linux CXL "decoder" objects. The number of range registers can be 1 or 2, while HDM decoder ranges can include more than 2. Be careful not to confuse DVSEC range count with HDM capability decoder count. Commit to range register earlier in devm_cxl_setup_hdm(). Otherwise, a device with more HDM decoders than range registers can set @cxlhdm->decoder_count to an invalid value. Avoid introducing a forward declaration by just moving the definition of should_emulate_decoders() earlier in the file. should_emulate_decoders() is unchanged. Tested-by: Dave Jiang Fixes: d7a2153762c7 ("cxl/hdm: Add emulation when HDM decoders are not committed") Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/168012574932.221280.15944705098679646436.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/hdm.c | 82 ++++++++++++++++++++++++++++---------------------- 1 file changed, 46 insertions(+), 36 deletions(-) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index cc123996b1a4..9884b6d4d930 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -101,6 +101,42 @@ static int map_hdm_decoder_regs(struct cxl_port *port, void __iomem *crb, BIT(CXL_CM_CAP_CAP_ID_HDM)); } +static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) +{ + struct cxl_hdm *cxlhdm; + void __iomem *hdm; + u32 ctrl; + int i; + + if (!info) + return false; + + cxlhdm = dev_get_drvdata(&info->port->dev); + hdm = cxlhdm->regs.hdm_decoder; + + if (!hdm) + return true; + + /* + * If HDM decoders are present and the driver is in control of + * Mem_Enable skip DVSEC based emulation + */ + if (!info->mem_enabled) + return false; + + /* + * If any decoders are committed already, there should not be any + * emulated DVSEC decoders. + */ + for (i = 0; i < cxlhdm->decoder_count; i++) { + ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i)); + if (FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl)) + return false; + } + + return true; +} + /** * devm_cxl_setup_hdm - map HDM decoder component registers * @port: cxl_port to map @@ -140,6 +176,16 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, return ERR_PTR(-ENXIO); } + /* + * Now that the hdm capability is parsed, decide if range + * register emulation is needed and fixup cxlhdm accordingly. + */ + if (should_emulate_decoders(info)) { + dev_dbg(dev, "Fallback map %d range register%s\n", info->ranges, + info->ranges > 1 ? "s" : ""); + cxlhdm->decoder_count = info->ranges; + } + return cxlhdm; } EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_hdm, CXL); @@ -717,42 +763,6 @@ static int cxl_setup_hdm_decoder_from_dvsec(struct cxl_port *port, return 0; } -static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) -{ - struct cxl_hdm *cxlhdm; - void __iomem *hdm; - u32 ctrl; - int i; - - if (!info) - return false; - - cxlhdm = dev_get_drvdata(&info->port->dev); - hdm = cxlhdm->regs.hdm_decoder; - - if (!hdm) - return true; - - /* - * If HDM decoders are present and the driver is in control of - * Mem_Enable skip DVSEC based emulation - */ - if (!info->mem_enabled) - return false; - - /* - * If any decoders are committed already, there should not be any - * emulated DVSEC decoders. - */ - for (i = 0; i < cxlhdm->decoder_count; i++) { - ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i)); - if (FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl)) - return false; - } - - return true; -} - static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, int *target_map, void __iomem *hdm, int which, u64 *dpa_base, struct cxl_endpoint_dvsec_info *info) -- cgit v1.2.3 From 24b18197184ac39bb8566fb82c0bf788bcd0d45b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 3 Apr 2023 16:01:32 -0700 Subject: cxl/hdm: Extend DVSEC range register emulation for region enumeration One motivation for mapping range registers to decoder objects is to use those settings for region autodiscovery. The need to map a region for devices programmed to use range registers is especially urgent now that the kernel no longer routes "Soft Reserved" ranges in the memory map to device-dax by default. The CXL memory range loses all access mechanisms. Complete the implementation by marking the DPA reservation and setting the endpoint-decoder state to signal autodiscovery. Note that the default settings of ways=1 and granularity=4096 set in cxl_decode_init() do not need to be updated. Fixes: 09d09e04d2fc ("cxl/dax: Create dax devices for CXL RAM regions") Tested-by: Dave Jiang Tested-by: Gregory Price Link: https://lore.kernel.org/r/168012575521.221280.14177293493678527326.stgit@dwillia2-xfh.jf.intel.com Reviewed-by: Dave Jiang Signed-off-by: Dan Williams --- drivers/cxl/core/hdm.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 9884b6d4d930..02cc2c38b44b 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -738,14 +738,20 @@ static int cxl_decoder_reset(struct cxl_decoder *cxld) return 0; } -static int cxl_setup_hdm_decoder_from_dvsec(struct cxl_port *port, - struct cxl_decoder *cxld, int which, - struct cxl_endpoint_dvsec_info *info) +static int cxl_setup_hdm_decoder_from_dvsec( + struct cxl_port *port, struct cxl_decoder *cxld, u64 *dpa_base, + int which, struct cxl_endpoint_dvsec_info *info) { + struct cxl_endpoint_decoder *cxled; + u64 len; + int rc; + if (!is_cxl_endpoint(port)) return -EOPNOTSUPP; - if (!range_len(&info->dvsec_range[which])) + cxled = to_cxl_endpoint_decoder(&cxld->dev); + len = range_len(&info->dvsec_range[which]); + if (!len) return -ENOENT; cxld->target_type = CXL_DECODER_EXPANDER; @@ -760,6 +766,16 @@ static int cxl_setup_hdm_decoder_from_dvsec(struct cxl_port *port, cxld->flags |= CXL_DECODER_F_ENABLE | CXL_DECODER_F_LOCK; port->commit_end = cxld->id; + rc = devm_cxl_dpa_reserve(cxled, *dpa_base, len, 0); + if (rc) { + dev_err(&port->dev, + "decoder%d.%d: Failed to reserve DPA range %#llx - %#llx\n (%d)", + port->id, cxld->id, *dpa_base, *dpa_base + len - 1, rc); + return rc; + } + *dpa_base += len; + cxled->state = CXL_DECODER_STATE_AUTO; + return 0; } @@ -779,7 +795,8 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, } target_list; if (should_emulate_decoders(info)) - return cxl_setup_hdm_decoder_from_dvsec(port, cxld, which, info); + return cxl_setup_hdm_decoder_from_dvsec(port, cxld, dpa_base, + which, info); ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which)); base = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which)); -- cgit v1.2.3