diff options
author | Dan Williams <dan.j.williams@intel.com> | 2023-04-03 23:44:41 +0200 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2023-04-05 00:34:34 +0200 |
commit | 030f880342b875c7d714d06d3ca4058ae9f13fee (patch) | |
tree | eac15b70e3c7a86b9fbb6388e0d37ed47d0c071b | |
parent | cxl/port: Fix find_cxl_root() for RCDs and simplify it (diff) | |
download | linux-030f880342b875c7d714d06d3ca4058ae9f13fee.tar.xz linux-030f880342b875c7d714d06d3ca4058ae9f13fee.zip |
cxl/region: Fix region setup/teardown for RCDs
RCDs (CXL memory devices that link train without VH capability and show
up as root complex integrated endpoints), hide the presence of the link
between the endpoint and the host-bridge. The CXL region setup/teardown
paths assume that a link hop is present and go looking for at least one
'struct cxl_port' instance between the CXL root port-object and an
endpoint port-object leading to crashes of the form:
BUG: kernel NULL pointer dereference, address: 0000000000000008
[..]
RIP: 0010:cxl_region_setup_targets+0x3e9/0xae0 [cxl_core]
[..]
Call Trace:
<TASK>
cxl_region_attach+0x46c/0x7a0 [cxl_core]
cxl_create_region+0x20b/0x270 [cxl_core]
cxl_mock_mem_probe+0x641/0x800 [cxl_mock_mem]
platform_probe+0x5b/0xb0
Detect RCDs explicitly and skip walking the non-existent port hierarchy
between root and endpoint in that case.
While this has been a problem since:
commit 0a19bfc8de93 ("cxl/port: Add RCD endpoint port enumeration")
...it becomes a more reliable crash scenario with the new autodiscovery
implementation.
Fixes: a32320b71f08 ("cxl/region: Add region autodiscovery")
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/168002858268.50647.728091521032131326.stgit@dwillia2-xfh.jf.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
-rw-r--r-- | drivers/cxl/core/region.c | 28 |
1 files changed, 27 insertions, 1 deletions
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 808f23ec4e2b..52bbf6268d5f 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -134,9 +134,13 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count) struct cxl_endpoint_decoder *cxled = p->targets[i]; struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); struct cxl_port *iter = cxled_to_port(cxled); + struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_ep *ep; int rc = 0; + if (cxlds->rcd) + goto endpoint_reset; + while (!is_cxl_root(to_cxl_port(iter->dev.parent))) iter = to_cxl_port(iter->dev.parent); @@ -153,6 +157,7 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count) return rc; } +endpoint_reset: rc = cxled->cxld.reset(&cxled->cxld); if (rc) return rc; @@ -1199,6 +1204,7 @@ static void cxl_region_teardown_targets(struct cxl_region *cxlr) { struct cxl_region_params *p = &cxlr->params; struct cxl_endpoint_decoder *cxled; + struct cxl_dev_state *cxlds; struct cxl_memdev *cxlmd; struct cxl_port *iter; struct cxl_ep *ep; @@ -1214,6 +1220,10 @@ static void cxl_region_teardown_targets(struct cxl_region *cxlr) for (i = 0; i < p->nr_targets; i++) { cxled = p->targets[i]; cxlmd = cxled_to_memdev(cxled); + cxlds = cxlmd->cxlds; + + if (cxlds->rcd) + continue; iter = cxled_to_port(cxled); while (!is_cxl_root(to_cxl_port(iter->dev.parent))) @@ -1229,14 +1239,24 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr) { struct cxl_region_params *p = &cxlr->params; struct cxl_endpoint_decoder *cxled; + struct cxl_dev_state *cxlds; + int i, rc, rch = 0, vh = 0; struct cxl_memdev *cxlmd; struct cxl_port *iter; struct cxl_ep *ep; - int i, rc; for (i = 0; i < p->nr_targets; i++) { cxled = p->targets[i]; cxlmd = cxled_to_memdev(cxled); + cxlds = cxlmd->cxlds; + + /* validate that all targets agree on topology */ + if (!cxlds->rcd) { + vh++; + } else { + rch++; + continue; + } iter = cxled_to_port(cxled); while (!is_cxl_root(to_cxl_port(iter->dev.parent))) @@ -1256,6 +1276,12 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr) } } + if (rch && vh) { + dev_err(&cxlr->dev, "mismatched CXL topologies detected\n"); + cxl_region_teardown_targets(cxlr); + return -ENXIO; + } + return 0; } |