summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2023-02-11 03:11:01 +0100
committerDan Williams <dan.j.williams@intel.com>2023-02-11 03:11:01 +0100
commitb8b9ffced017528bcdd262730ab10bc5084c3bb4 (patch)
tree1edb4782acb2d6704f3847181a480f375214a40f
parentMerge branch 'for-6.3/cxl' into cxl/next (diff)
parentcxl/dax: Create dax devices for CXL RAM regions (diff)
downloadlinux-b8b9ffced017528bcdd262730ab10bc5084c3bb4.tar.xz
linux-b8b9ffced017528bcdd262730ab10bc5084c3bb4.zip
Merge branch 'for-6.3/cxl-ram-region' into cxl/next
Include the support for enumerating and provisioning ram regions for v6.3. This also include a default policy change for ram / volatile device-dax instances to assign them to the dax_kmem driver by default.
-rw-r--r--Documentation/ABI/testing/sysfs-bus-cxl64
-rw-r--r--MAINTAINERS1
-rw-r--r--drivers/acpi/numa/hmat.c4
-rw-r--r--drivers/cxl/Kconfig12
-rw-r--r--drivers/cxl/acpi.c3
-rw-r--r--drivers/cxl/core/core.h7
-rw-r--r--drivers/cxl/core/hdm.c25
-rw-r--r--drivers/cxl/core/memdev.c1
-rw-r--r--drivers/cxl/core/pci.c5
-rw-r--r--drivers/cxl/core/port.c92
-rw-r--r--drivers/cxl/core/region.c856
-rw-r--r--drivers/cxl/cxl.h57
-rw-r--r--drivers/cxl/cxlmem.h5
-rw-r--r--drivers/cxl/port.c112
-rw-r--r--drivers/dax/Kconfig17
-rw-r--r--drivers/dax/Makefile2
-rw-r--r--drivers/dax/bus.c53
-rw-r--r--drivers/dax/bus.h12
-rw-r--r--drivers/dax/cxl.c53
-rw-r--r--drivers/dax/device.c3
-rw-r--r--drivers/dax/hmem/Makefile3
-rw-r--r--drivers/dax/hmem/device.c102
-rw-r--r--drivers/dax/hmem/hmem.c148
-rw-r--r--drivers/dax/kmem.c1
-rw-r--r--include/linux/dax.h7
-rw-r--r--include/linux/memregion.h2
-rw-r--r--include/linux/range.h5
-rw-r--r--lib/stackinit_kunit.c6
-rw-r--r--tools/testing/cxl/test/cxl.c147
29 files changed, 1488 insertions, 317 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl
index 329a7e46c805..3acf2f17a73f 100644
--- a/Documentation/ABI/testing/sysfs-bus-cxl
+++ b/Documentation/ABI/testing/sysfs-bus-cxl
@@ -198,7 +198,7 @@ Description:
What: /sys/bus/cxl/devices/endpointX/CDAT
Date: July, 2022
-KernelVersion: v5.20
+KernelVersion: v6.0
Contact: linux-cxl@vger.kernel.org
Description:
(RO) If this sysfs entry is not present no DOE mailbox was
@@ -209,7 +209,7 @@ Description:
What: /sys/bus/cxl/devices/decoderX.Y/mode
Date: May, 2022
-KernelVersion: v5.20
+KernelVersion: v6.0
Contact: linux-cxl@vger.kernel.org
Description:
(RW) When a CXL decoder is of devtype "cxl_decoder_endpoint" it
@@ -229,7 +229,7 @@ Description:
What: /sys/bus/cxl/devices/decoderX.Y/dpa_resource
Date: May, 2022
-KernelVersion: v5.20
+KernelVersion: v6.0
Contact: linux-cxl@vger.kernel.org
Description:
(RO) When a CXL decoder is of devtype "cxl_decoder_endpoint",
@@ -240,7 +240,7 @@ Description:
What: /sys/bus/cxl/devices/decoderX.Y/dpa_size
Date: May, 2022
-KernelVersion: v5.20
+KernelVersion: v6.0
Contact: linux-cxl@vger.kernel.org
Description:
(RW) When a CXL decoder is of devtype "cxl_decoder_endpoint" it
@@ -260,7 +260,7 @@ Description:
What: /sys/bus/cxl/devices/decoderX.Y/interleave_ways
Date: May, 2022
-KernelVersion: v5.20
+KernelVersion: v6.0
Contact: linux-cxl@vger.kernel.org
Description:
(RO) The number of targets across which this decoder's host
@@ -275,7 +275,7 @@ Description:
What: /sys/bus/cxl/devices/decoderX.Y/interleave_granularity
Date: May, 2022
-KernelVersion: v5.20
+KernelVersion: v6.0
Contact: linux-cxl@vger.kernel.org
Description:
(RO) The number of consecutive bytes of host physical address
@@ -285,25 +285,25 @@ Description:
interleave_granularity).
-What: /sys/bus/cxl/devices/decoderX.Y/create_pmem_region
-Date: May, 2022
-KernelVersion: v5.20
+What: /sys/bus/cxl/devices/decoderX.Y/create_{pmem,ram}_region
+Date: May, 2022, January, 2023
+KernelVersion: v6.0 (pmem), v6.3 (ram)
Contact: linux-cxl@vger.kernel.org
Description:
(RW) Write a string in the form 'regionZ' to start the process
- of defining a new persistent memory region (interleave-set)
- within the decode range bounded by root decoder 'decoderX.Y'.
- The value written must match the current value returned from
- reading this attribute. An atomic compare exchange operation is
- done on write to assign the requested id to a region and
- allocate the region-id for the next creation attempt. EBUSY is
- returned if the region name written does not match the current
- cached value.
+ of defining a new persistent, or volatile memory region
+ (interleave-set) within the decode range bounded by root decoder
+ 'decoderX.Y'. The value written must match the current value
+ returned from reading this attribute. An atomic compare exchange
+ operation is done on write to assign the requested id to a
+ region and allocate the region-id for the next creation attempt.
+ EBUSY is returned if the region name written does not match the
+ current cached value.
What: /sys/bus/cxl/devices/decoderX.Y/delete_region
Date: May, 2022
-KernelVersion: v5.20
+KernelVersion: v6.0
Contact: linux-cxl@vger.kernel.org
Description:
(WO) Write a string in the form 'regionZ' to delete that region,
@@ -312,17 +312,18 @@ Description:
What: /sys/bus/cxl/devices/regionZ/uuid
Date: May, 2022
-KernelVersion: v5.20
+KernelVersion: v6.0
Contact: linux-cxl@vger.kernel.org
Description:
(RW) Write a unique identifier for the region. This field must
be set for persistent regions and it must not conflict with the
- UUID of another region.
+ UUID of another region. For volatile ram regions this
+ attribute is a read-only empty string.
What: /sys/bus/cxl/devices/regionZ/interleave_granularity
Date: May, 2022
-KernelVersion: v5.20
+KernelVersion: v6.0
Contact: linux-cxl@vger.kernel.org
Description:
(RW) Set the number of consecutive bytes each device in the
@@ -333,7 +334,7 @@ Description:
What: /sys/bus/cxl/devices/regionZ/interleave_ways
Date: May, 2022
-KernelVersion: v5.20
+KernelVersion: v6.0
Contact: linux-cxl@vger.kernel.org
Description:
(RW) Configures the number of devices participating in the
@@ -343,7 +344,7 @@ Description:
What: /sys/bus/cxl/devices/regionZ/size
Date: May, 2022
-KernelVersion: v5.20
+KernelVersion: v6.0
Contact: linux-cxl@vger.kernel.org
Description:
(RW) System physical address space to be consumed by the region.
@@ -358,9 +359,20 @@ Description:
results in the same address being allocated.
+What: /sys/bus/cxl/devices/regionZ/mode
+Date: January, 2023
+KernelVersion: v6.3
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) The mode of a region is established at region creation time
+ and dictates the mode of the endpoint decoder that comprise the
+ region. For more details on the possible modes see
+ /sys/bus/cxl/devices/decoderX.Y/mode
+
+
What: /sys/bus/cxl/devices/regionZ/resource
Date: May, 2022
-KernelVersion: v5.20
+KernelVersion: v6.0
Contact: linux-cxl@vger.kernel.org
Description:
(RO) A region is a contiguous partition of a CXL root decoder
@@ -372,7 +384,7 @@ Description:
What: /sys/bus/cxl/devices/regionZ/target[0..N]
Date: May, 2022
-KernelVersion: v5.20
+KernelVersion: v6.0
Contact: linux-cxl@vger.kernel.org
Description:
(RW) Write an endpoint decoder object name to 'targetX' where X
@@ -391,7 +403,7 @@ Description:
What: /sys/bus/cxl/devices/regionZ/commit
Date: May, 2022
-KernelVersion: v5.20
+KernelVersion: v6.0
Contact: linux-cxl@vger.kernel.org
Description:
(RW) Write a boolean 'true' string value to this attribute to
diff --git a/MAINTAINERS b/MAINTAINERS
index 8a5c25c20d00..b5993d8c3661 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6034,6 +6034,7 @@ M: Dan Williams <dan.j.williams@intel.com>
M: Vishal Verma <vishal.l.verma@intel.com>
M: Dave Jiang <dave.jiang@intel.com>
L: nvdimm@lists.linux.dev
+L: linux-cxl@vger.kernel.org
S: Supported
F: drivers/dax/
diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c
index 605a0c7053be..bba268ecd802 100644
--- a/drivers/acpi/numa/hmat.c
+++ b/drivers/acpi/numa/hmat.c
@@ -718,7 +718,7 @@ static void hmat_register_target_devices(struct memory_target *target)
for (res = target->memregions.child; res; res = res->sibling) {
int target_nid = pxm_to_node(target->memory_pxm);
- hmem_register_device(target_nid, res);
+ hmem_register_resource(target_nid, res);
}
}
@@ -869,4 +869,4 @@ out_put:
acpi_put_table(tbl);
return 0;
}
-device_initcall(hmat_init);
+subsys_initcall(hmat_init);
diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig
index 9e709ecba50f..ff4e78117b31 100644
--- a/drivers/cxl/Kconfig
+++ b/drivers/cxl/Kconfig
@@ -104,12 +104,22 @@ config CXL_SUSPEND
depends on SUSPEND && CXL_MEM
config CXL_REGION
- bool
+ bool "CXL: Region Support"
default CXL_BUS
# For MAX_PHYSMEM_BITS
depends on SPARSEMEM
select MEMREGION
select GET_FREE_REGION
+ help
+ Enable the CXL core to enumerate and provision CXL regions. A CXL
+ region is defined by one or more CXL expanders that decode a given
+ system-physical address range. For CXL regions established by
+ platform-firmware this option enables memory error handling to
+ identify the devices participating in a given interleaved memory
+ range. Otherwise, platform-firmware managed CXL is enabled by being
+ placed in the system address map and does not need a driver.
+
+ If unsure say 'y'
config CXL_REGION_INVALIDATION_TEST
bool "CXL: Region Cache Management Bypass (TEST)"
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index d7159fb3beef..7e1765b09e04 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -731,7 +731,8 @@ static void __exit cxl_acpi_exit(void)
cxl_bus_drain();
}
-module_init(cxl_acpi_init);
+/* load before dax_hmem sees 'Soft Reserved' CXL ranges */
+subsys_initcall(cxl_acpi_init);
module_exit(cxl_acpi_exit);
MODULE_LICENSE("GPL v2");
MODULE_IMPORT_NS(CXL);
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 8c04672dca56..cde475e13216 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -11,15 +11,18 @@ extern struct attribute_group cxl_base_attribute_group;
#ifdef CONFIG_CXL_REGION
extern struct device_attribute dev_attr_create_pmem_region;
+extern struct device_attribute dev_attr_create_ram_region;
extern struct device_attribute dev_attr_delete_region;
extern struct device_attribute dev_attr_region;
extern const struct device_type cxl_pmem_region_type;
+extern const struct device_type cxl_dax_region_type;
extern const struct device_type cxl_region_type;
void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled);
#define CXL_REGION_ATTR(x) (&dev_attr_##x.attr)
#define CXL_REGION_TYPE(x) (&cxl_region_type)
#define SET_CXL_REGION_ATTR(x) (&dev_attr_##x.attr),
#define CXL_PMEM_REGION_TYPE(x) (&cxl_pmem_region_type)
+#define CXL_DAX_REGION_TYPE(x) (&cxl_dax_region_type)
int cxl_region_init(void);
void cxl_region_exit(void);
#else
@@ -37,6 +40,7 @@ static inline void cxl_region_exit(void)
#define CXL_REGION_TYPE(x) NULL
#define SET_CXL_REGION_ATTR(x)
#define CXL_PMEM_REGION_TYPE(x) NULL
+#define CXL_DAX_REGION_TYPE(x) NULL
#endif
struct cxl_send_command;
@@ -56,9 +60,6 @@ resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled);
resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled);
extern struct rw_semaphore cxl_dpa_rwsem;
-bool is_switch_decoder(struct device *dev);
-struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev);
-
int cxl_memdev_init(void);
void cxl_memdev_exit(void);
void cxl_mbox_init(void);
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index dcc16d7cb8f3..80eccae6ba9e 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -279,7 +279,7 @@ success:
return 0;
}
-static int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
+int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
resource_size_t base, resource_size_t len,
resource_size_t skipped)
{
@@ -295,6 +295,7 @@ static int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
return devm_add_action_or_reset(&port->dev, cxl_dpa_release, cxled);
}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_dpa_reserve, CXL);
resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled)
{
@@ -676,6 +677,14 @@ static int cxl_decoder_reset(struct cxl_decoder *cxld)
port->commit_end--;
cxld->flags &= ~CXL_DECODER_F_ENABLE;
+ /* Userspace is now responsible for reconfiguring this decoder */
+ if (is_endpoint_decoder(&cxld->dev)) {
+ struct cxl_endpoint_decoder *cxled;
+
+ cxled = to_cxl_endpoint_decoder(&cxld->dev);
+ cxled->state = CXL_DECODER_STATE_MANUAL;
+ }
+
return 0;
}
@@ -783,6 +792,9 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
return rc;
}
*dpa_base += dpa_size + skip;
+
+ cxled->state = CXL_DECODER_STATE_AUTO;
+
return 0;
}
@@ -826,7 +838,8 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm)
cxled = cxl_endpoint_decoder_alloc(port);
if (IS_ERR(cxled)) {
dev_warn(&port->dev,
- "Failed to allocate the decoder\n");
+ "Failed to allocate decoder%d.%d\n",
+ port->id, i);
return PTR_ERR(cxled);
}
cxld = &cxled->cxld;
@@ -836,7 +849,8 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm)
cxlsd = cxl_switch_decoder_alloc(port, target_count);
if (IS_ERR(cxlsd)) {
dev_warn(&port->dev,
- "Failed to allocate the decoder\n");
+ "Failed to allocate decoder%d.%d\n",
+ port->id, i);
return PTR_ERR(cxlsd);
}
cxld = &cxlsd->cxld;
@@ -844,13 +858,16 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm)
rc = init_hdm_decoder(port, cxld, target_map, hdm, i, &dpa_base);
if (rc) {
+ dev_warn(&port->dev,
+ "Failed to initialize decoder%d.%d\n",
+ port->id, i);
put_device(&cxld->dev);
return rc;
}
rc = add_hdm_decoder(port, cxld, target_map);
if (rc) {
dev_warn(&port->dev,
- "Failed to add decoder to port\n");
+ "Failed to add decoder%d.%d\n", port->id, i);
return rc;
}
}
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index 12bd9ddaba22..0af8856936dc 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -246,6 +246,7 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
if (rc < 0)
goto err;
cxlmd->id = rc;
+ cxlmd->depth = -1;
dev = &cxlmd->dev;
device_initialize(dev);
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 184ead6a2796..2b463f107cb5 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -214,11 +214,6 @@ static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds)
return devm_add_action_or_reset(host, clear_mem_enable, cxlds);
}
-static bool range_contains(struct range *r1, struct range *r2)
-{
- return r1->start <= r2->start && r1->end >= r2->end;
-}
-
/* require dvsec ranges to be covered by a locked platform window */
static int dvsec_range_allowed(struct device *dev, void *arg)
{
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 609aa6801b14..724190418698 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -46,6 +46,8 @@ static int cxl_device_id(struct device *dev)
return CXL_DEVICE_NVDIMM;
if (dev->type == CXL_PMEM_REGION_TYPE())
return CXL_DEVICE_PMEM_REGION;
+ if (dev->type == CXL_DAX_REGION_TYPE())
+ return CXL_DEVICE_DAX_REGION;
if (is_cxl_port(dev)) {
if (is_cxl_root(to_cxl_port(dev)))
return CXL_DEVICE_ROOT;
@@ -180,17 +182,7 @@ static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
{
struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev);
- switch (cxled->mode) {
- case CXL_DECODER_RAM:
- return sysfs_emit(buf, "ram\n");
- case CXL_DECODER_PMEM:
- return sysfs_emit(buf, "pmem\n");
- case CXL_DECODER_NONE:
- return sysfs_emit(buf, "none\n");
- case CXL_DECODER_MIXED:
- default:
- return sysfs_emit(buf, "mixed\n");
- }
+ return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxled->mode));
}
static ssize_t mode_store(struct device *dev, struct device_attribute *attr,
@@ -304,6 +296,7 @@ static struct attribute *cxl_decoder_root_attrs[] = {
&dev_attr_cap_type3.attr,
&dev_attr_target_list.attr,
SET_CXL_REGION_ATTR(create_pmem_region)
+ SET_CXL_REGION_ATTR(create_ram_region)
SET_CXL_REGION_ATTR(delete_region)
NULL,
};
@@ -315,6 +308,13 @@ static bool can_create_pmem(struct cxl_root_decoder *cxlrd)
return (cxlrd->cxlsd.cxld.flags & flags) == flags;
}
+static bool can_create_ram(struct cxl_root_decoder *cxlrd)
+{
+ unsigned long flags = CXL_DECODER_F_TYPE3 | CXL_DECODER_F_RAM;
+
+ return (cxlrd->cxlsd.cxld.flags & flags) == flags;
+}
+
static umode_t cxl_root_decoder_visible(struct kobject *kobj, struct attribute *a, int n)
{
struct device *dev = kobj_to_dev(kobj);
@@ -323,7 +323,11 @@ static umode_t cxl_root_decoder_visible(struct kobject *kobj, struct attribute *
if (a == CXL_REGION_ATTR(create_pmem_region) && !can_create_pmem(cxlrd))
return 0;
- if (a == CXL_REGION_ATTR(delete_region) && !can_create_pmem(cxlrd))
+ if (a == CXL_REGION_ATTR(create_ram_region) && !can_create_ram(cxlrd))
+ return 0;
+
+ if (a == CXL_REGION_ATTR(delete_region) &&
+ !(can_create_pmem(cxlrd) || can_create_ram(cxlrd)))
return 0;
return a->mode;
@@ -444,6 +448,7 @@ bool is_endpoint_decoder(struct device *dev)
{
return dev->type == &cxl_decoder_endpoint_type;
}
+EXPORT_SYMBOL_NS_GPL(is_endpoint_decoder, CXL);
bool is_root_decoder(struct device *dev)
{
@@ -455,6 +460,7 @@ bool is_switch_decoder(struct device *dev)
{
return is_root_decoder(dev) || dev->type == &cxl_decoder_switch_type;
}
+EXPORT_SYMBOL_NS_GPL(is_switch_decoder, CXL);
struct cxl_decoder *to_cxl_decoder(struct device *dev)
{
@@ -482,6 +488,7 @@ struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev)
return NULL;
return container_of(dev, struct cxl_switch_decoder, cxld.dev);
}
+EXPORT_SYMBOL_NS_GPL(to_cxl_switch_decoder, CXL);
static void cxl_ep_release(struct cxl_ep *ep)
{
@@ -1207,6 +1214,7 @@ int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
get_device(&endpoint->dev);
dev_set_drvdata(dev, endpoint);
+ cxlmd->depth = endpoint->depth;
return devm_add_action_or_reset(dev, delete_endpoint, cxlmd);
}
EXPORT_SYMBOL_NS_GPL(cxl_endpoint_autoremove, CXL);
@@ -1241,50 +1249,55 @@ static void reap_dports(struct cxl_port *port)
}
}
+struct detach_ctx {
+ struct cxl_memdev *cxlmd;
+ int depth;
+};
+
+static int port_has_memdev(struct device *dev, const void *data)
+{
+ const struct detach_ctx *ctx = data;
+ struct cxl_port *port;
+
+ if (!is_cxl_port(dev))
+ return 0;
+
+ port = to_cxl_port(dev);
+ if (port->depth != ctx->depth)
+ return 0;
+
+ return !!cxl_ep_load(port, ctx->cxlmd);
+}
+
static void cxl_detach_ep(void *data)
{
struct cxl_memdev *cxlmd = data;
- struct device *iter;
- for (iter = &cxlmd->dev; iter; iter = grandparent(iter)) {
- struct device *dport_dev = grandparent(iter);
+ for (int i = cxlmd->depth - 1; i >= 1; i--) {
struct cxl_port *port, *parent_port;
+ struct detach_ctx ctx = {
+ .cxlmd = cxlmd,
+ .depth = i,
+ };
+ struct device *dev;
struct cxl_ep *ep;
bool died = false;
- if (!dport_dev)
- break;
-
- port = find_cxl_port(dport_dev, NULL);
- if (!port)
- continue;
-
- if (is_cxl_root(port)) {
- put_device(&port->dev);
+ dev = bus_find_device(&cxl_bus_type, NULL, &ctx,
+ port_has_memdev);
+ if (!dev)
continue;
- }
+ port = to_cxl_port(dev);
parent_port = to_cxl_port(port->dev.parent);
device_lock(&parent_port->dev);
- if (!parent_port->dev.driver) {
- /*
- * The bottom-up race to delete the port lost to a
- * top-down port disable, give up here, because the
- * parent_port ->remove() will have cleaned up all
- * descendants.
- */
- device_unlock(&parent_port->dev);
- put_device(&port->dev);
- continue;
- }
-
device_lock(&port->dev);
ep = cxl_ep_load(port, cxlmd);
dev_dbg(&cxlmd->dev, "disconnect %s from %s\n",
ep ? dev_name(ep->ep) : "", dev_name(&port->dev));
cxl_ep_remove(port, ep);
if (ep && !port->dead && xa_empty(&port->endpoints) &&
- !is_cxl_root(parent_port)) {
+ !is_cxl_root(parent_port) && parent_port->dev.driver) {
/*
* This was the last ep attached to a dynamically
* enumerated port. Block new cxl_add_ep() and garbage
@@ -1620,6 +1633,7 @@ struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
}
cxlrd->calc_hb = calc_hb;
+ mutex_init(&cxlrd->range_lock);
cxld = &cxlsd->cxld;
cxld->dev.type = &cxl_decoder_root_type;
@@ -2003,6 +2017,6 @@ static void cxl_core_exit(void)
debugfs_remove_recursive(cxl_debugfs);
}
-module_init(cxl_core_init);
+subsys_initcall(cxl_core_init);
module_exit(cxl_core_exit);
MODULE_LICENSE("GPL v2");
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 67e83d961670..fe1d8392870e 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -6,6 +6,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/uuid.h>
+#include <linux/sort.h>
#include <linux/idr.h>
#include <cxlmem.h>
#include <cxl.h>
@@ -45,7 +46,10 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
rc = down_read_interruptible(&cxl_region_rwsem);
if (rc)
return rc;
- rc = sysfs_emit(buf, "%pUb\n", &p->uuid);
+ if (cxlr->mode != CXL_DECODER_PMEM)
+ rc = sysfs_emit(buf, "\n");
+ else
+ rc = sysfs_emit(buf, "%pUb\n", &p->uuid);
up_read(&cxl_region_rwsem);
return rc;
@@ -302,8 +306,12 @@ static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a,
struct device *dev = kobj_to_dev(kobj);
struct cxl_region *cxlr = to_cxl_region(dev);
+ /*
+ * Support tooling that expects to find a 'uuid' attribute for all
+ * regions regardless of mode.
+ */
if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_DECODER_PMEM)
- return 0;
+ return 0444;
return a->mode;
}
@@ -460,6 +468,15 @@ static ssize_t resource_show(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR_RO(resource);
+static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl_region *cxlr = to_cxl_region(dev);
+
+ return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxlr->mode));
+}
+static DEVICE_ATTR_RO(mode);
+
static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size)
{
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
@@ -510,7 +527,12 @@ static void cxl_region_iomem_release(struct cxl_region *cxlr)
if (device_is_registered(&cxlr->dev))
lockdep_assert_held_write(&cxl_region_rwsem);
if (p->res) {
- remove_resource(p->res);
+ /*
+ * Autodiscovered regions may not have been able to insert their
+ * resource.
+ */
+ if (p->res->parent)
+ remove_resource(p->res);
kfree(p->res);
p->res = NULL;
}
@@ -587,6 +609,7 @@ static struct attribute *cxl_region_attrs[] = {
&dev_attr_interleave_granularity.attr,
&dev_attr_resource.attr,
&dev_attr_size.attr,
+ &dev_attr_mode.attr,
NULL,
};
@@ -1090,12 +1113,35 @@ static int cxl_port_setup_targets(struct cxl_port *port,
return rc;
}
- cxld->interleave_ways = iw;
- cxld->interleave_granularity = ig;
- cxld->hpa_range = (struct range) {
- .start = p->res->start,
- .end = p->res->end,
- };
+ if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
+ if (cxld->interleave_ways != iw ||
+ cxld->interleave_granularity != ig ||
+ cxld->hpa_range.start != p->res->start ||
+ cxld->hpa_range.end != p->res->end ||
+ ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) {
+ dev_err(&cxlr->dev,
+ "%s:%s %s expected iw: %d ig: %d %pr\n",
+ dev_name(port->uport), dev_name(&port->dev),
+ __func__, iw, ig, p->res);
+ dev_err(&cxlr->dev,
+ "%s:%s %s got iw: %d ig: %d state: %s %#llx:%#llx\n",
+ dev_name(port->uport), dev_name(&port->dev),
+ __func__, cxld->interleave_ways,
+ cxld->interleave_granularity,
+ (cxld->flags & CXL_DECODER_F_ENABLE) ?
+ "enabled" :
+ "disabled",
+ cxld->hpa_range.start, cxld->hpa_range.end);
+ return -ENXIO;
+ }
+ } else {
+ cxld->interleave_ways = iw;
+ cxld->interleave_granularity = ig;
+ cxld->hpa_range = (struct range) {
+ .start = p->res->start,
+ .end = p->res->end,
+ };
+ }
dev_dbg(&cxlr->dev, "%s:%s iw: %d ig: %d\n", dev_name(port->uport),
dev_name(&port->dev), iw, ig);
add_target:
@@ -1106,7 +1152,17 @@ add_target:
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
return -ENXIO;
}
- cxlsd->target[cxl_rr->nr_targets_set] = ep->dport;
+ if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
+ if (cxlsd->target[cxl_rr->nr_targets_set] != ep->dport) {
+ dev_dbg(&cxlr->dev, "%s:%s: %s expected %s at %d\n",
+ dev_name(port->uport), dev_name(&port->dev),
+ dev_name(&cxlsd->cxld.dev),
+ dev_name(ep->dport->dport),
+ cxl_rr->nr_targets_set);
+ return -ENXIO;
+ }
+ } else
+ cxlsd->target[cxl_rr->nr_targets_set] = ep->dport;
inc = 1;
out_target_set:
cxl_rr->nr_targets_set += inc;
@@ -1148,6 +1204,13 @@ static void cxl_region_teardown_targets(struct cxl_region *cxlr)
struct cxl_ep *ep;
int i;
+ /*
+ * In the auto-discovery case skip automatic teardown since the
+ * address space is already active
+ */
+ if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags))
+ return;
+
for (i = 0; i < p->nr_targets; i++) {
cxled = p->targets[i];
cxlmd = cxled_to_memdev(cxled);
@@ -1180,8 +1243,8 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr)
iter = to_cxl_port(iter->dev.parent);
/*
- * Descend the topology tree programming targets while
- * looking for conflicts.
+ * Descend the topology tree programming / validating
+ * targets while looking for conflicts.
*/
for (ep = cxl_ep_load(iter, cxlmd); iter;
iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
@@ -1196,29 +1259,13 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr)
return 0;
}
-static int cxl_region_attach(struct cxl_region *cxlr,
- struct cxl_endpoint_decoder *cxled, int pos)
+static int cxl_region_validate_position(struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled,
+ int pos)
{
- struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
- struct cxl_port *ep_port, *root_port, *iter;
struct cxl_region_params *p = &cxlr->params;
- struct cxl_dport *dport;
- int i, rc = -ENXIO;
-
- if (cxled->mode == CXL_DECODER_DEAD) {
- dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev));
- return -ENODEV;
- }
-
- /* all full of members, or interleave config not established? */
- if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) {
- dev_dbg(&cxlr->dev, "region already active\n");
- return -EBUSY;
- } else if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) {
- dev_dbg(&cxlr->dev, "interleave config missing\n");
- return -ENXIO;
- }
+ int i;
if (pos < 0 || pos >= p->interleave_ways) {
dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
@@ -1257,6 +1304,256 @@ static int cxl_region_attach(struct cxl_region *cxlr,
}
}
+ return 0;
+}
+
+static int cxl_region_attach_position(struct cxl_region *cxlr,
+ struct cxl_root_decoder *cxlrd,
+ struct cxl_endpoint_decoder *cxled,
+ const struct cxl_dport *dport, int pos)
+{
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_port *iter;
+ int rc;
+
+ if (cxlrd->calc_hb(cxlrd, pos) != dport) {
+ dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n",
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
+ dev_name(&cxlrd->cxlsd.cxld.dev));
+ return -ENXIO;
+ }
+
+ for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
+ iter = to_cxl_port(iter->dev.parent)) {
+ rc = cxl_port_attach_region(iter, cxlr, cxled, pos);
+ if (rc)
+ goto err;
+ }
+
+ return 0;
+
+err:
+ for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
+ iter = to_cxl_port(iter->dev.parent))
+ cxl_port_detach_region(iter, cxlr, cxled);
+ return rc;
+}
+
+static int cxl_region_attach_auto(struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled, int pos)
+{
+ struct cxl_region_params *p = &cxlr->params;
+
+ if (cxled->state != CXL_DECODER_STATE_AUTO) {
+ dev_err(&cxlr->dev,
+ "%s: unable to add decoder to autodetected region\n",
+ dev_name(&cxled->cxld.dev));
+ return -EINVAL;
+ }
+
+ if (pos >= 0) {
+ dev_dbg(&cxlr->dev, "%s: expected auto position, not %d\n",
+ dev_name(&cxled->cxld.dev), pos);
+ return -EINVAL;
+ }
+
+ if (p->nr_targets >= p->interleave_ways) {
+ dev_err(&cxlr->dev, "%s: no more target slots available\n",
+ dev_name(&cxled->cxld.dev));
+ return -ENXIO;
+ }
+
+ /*
+ * Temporarily record the endpoint decoder into the target array. Yes,
+ * this means that userspace can view devices in the wrong position
+ * before the region activates, and must be careful to understand when
+ * it might be racing region autodiscovery.
+ */
+ pos = p->nr_targets;
+ p->targets[pos] = cxled;
+ cxled->pos = pos;
+ p->nr_targets++;
+
+ return 0;
+}
+
+static struct cxl_port *next_port(struct cxl_port *port)
+{
+ if (!port->parent_dport)
+ return NULL;
+ return port->parent_dport->port;
+}
+
+static int decoder_match_range(struct device *dev, void *data)
+{
+ struct cxl_endpoint_decoder *cxled = data;
+ struct cxl_switch_decoder *cxlsd;
+
+ if (!is_switch_decoder(dev))
+ return 0;
+
+ cxlsd = to_cxl_switch_decoder(dev);
+ return range_contains(&cxlsd->cxld.hpa_range, &cxled->cxld.hpa_range);
+}
+
+static void find_positions(const struct cxl_switch_decoder *cxlsd,
+ const struct cxl_port *iter_a,
+ const struct cxl_port *iter_b, int *a_pos,
+ int *b_pos)
+{
+ int i;
+
+ for (i = 0, *a_pos = -1, *b_pos = -1; i < cxlsd->nr_targets; i++) {
+ if (cxlsd->target[i] == iter_a->parent_dport)
+ *a_pos = i;
+ else if (cxlsd->target[i] == iter_b->parent_dport)
+ *b_pos = i;
+ if (*a_pos >= 0 && *b_pos >= 0)
+ break;
+ }
+}
+
+static int cmp_decode_pos(const void *a, const void *b)
+{
+ struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a;
+ struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b;
+ struct cxl_memdev *cxlmd_a = cxled_to_memdev(cxled_a);
+ struct cxl_memdev *cxlmd_b = cxled_to_memdev(cxled_b);
+ struct cxl_port *port_a = cxled_to_port(cxled_a);
+ struct cxl_port *port_b = cxled_to_port(cxled_b);
+ struct cxl_port *iter_a, *iter_b, *port = NULL;
+ struct cxl_switch_decoder *cxlsd;
+ struct device *dev;
+ int a_pos, b_pos;
+ unsigned int seq;
+
+ /* Exit early if any prior sorting failed */
+ if (cxled_a->pos < 0 || cxled_b->pos < 0)
+ return 0;
+
+ /*
+ * Walk up the hierarchy to find a shared port, find the decoder that
+ * maps the range, compare the relative position of those dport
+ * mappings.
+ */
+ for (iter_a = port_a; iter_a; iter_a = next_port(iter_a)) {
+ struct cxl_port *next_a, *next_b;
+
+ next_a = next_port(iter_a);
+ if (!next_a)
+ break;
+
+ for (iter_b = port_b; iter_b; iter_b = next_port(iter_b)) {
+ next_b = next_port(iter_b);
+ if (next_a != next_b)
+ continue;
+ port = next_a;
+ break;
+ }
+
+ if (port)
+ break;
+ }
+
+ if (!port) {
+ dev_err(cxlmd_a->dev.parent,
+ "failed to find shared port with %s\n",
+ dev_name(cxlmd_b->dev.parent));
+ goto err;
+ }
+
+ dev = device_find_child(&port->dev, cxled_a, decoder_match_range);
+ if (!dev) {
+ struct range *range = &cxled_a->cxld.hpa_range;
+
+ dev_err(port->uport,
+ "failed to find decoder that maps %#llx-%#llx\n",
+ range->start, range->end);
+ goto err;
+ }
+
+ cxlsd = to_cxl_switch_decoder(dev);
+ do {
+ seq = read_seqbegin(&cxlsd->target_lock);
+ find_positions(cxlsd, iter_a, iter_b, &a_pos, &b_pos);
+ } while (read_seqretry(&cxlsd->target_lock, seq));
+
+ put_device(dev);
+
+ if (a_pos < 0 || b_pos < 0) {
+ dev_err(port->uport,
+ "failed to find shared decoder for %s and %s\n",
+ dev_name(cxlmd_a->dev.parent),
+ dev_name(cxlmd_b->dev.parent));
+ goto err;
+ }
+
+ dev_dbg(port->uport, "%s comes %s %s\n", dev_name(cxlmd_a->dev.parent),
+ a_pos - b_pos < 0 ? "before" : "after",
+ dev_name(cxlmd_b->dev.parent));
+
+ return a_pos - b_pos;
+err:
+ cxled_a->pos = -1;
+ return 0;
+}
+
+static int cxl_region_sort_targets(struct cxl_region *cxlr)
+{
+ struct cxl_region_params *p = &cxlr->params;
+ int i, rc = 0;
+
+ sort(p->targets, p->nr_targets, sizeof(p->targets[0]), cmp_decode_pos,
+ NULL);
+
+ for (i = 0; i < p->nr_targets; i++) {
+ struct cxl_endpoint_decoder *cxled = p->targets[i];
+
+ /*
+ * Record that sorting failed, but still continue to restore
+ * cxled->pos with its ->targets[] position so that follow-on
+ * code paths can reliably do p->targets[cxled->pos] to
+ * self-reference their entry.
+ */
+ if (cxled->pos < 0)
+ rc = -ENXIO;
+ cxled->pos = i;
+ }
+
+ dev_dbg(&cxlr->dev, "region sort %s\n", rc ? "failed" : "successful");
+ return rc;
+}
+
+static int cxl_region_attach(struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled, int pos)
+{
+ struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_region_params *p = &cxlr->params;
+ struct cxl_port *ep_port, *root_port;
+ struct cxl_dport *dport;
+ int rc = -ENXIO;
+
+ if (cxled->mode != cxlr->mode) {
+ dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n",
+ dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode);
+ return -EINVAL;
+ }
+
+ if (cxled->mode == CXL_DECODER_DEAD) {
+ dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev));
+ return -ENODEV;
+ }
+
+ /* all full of members, or interleave config not established? */
+ if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) {
+ dev_dbg(&cxlr->dev, "region already active\n");
+ return -EBUSY;
+ } else if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) {
+ dev_dbg(&cxlr->dev, "interleave config missing\n");
+ return -ENXIO;
+ }
+
ep_port = cxled_to_port(cxled);
root_port = cxlrd_to_port(cxlrd);
dport = cxl_find_dport_by_dev(root_port, ep_port->host_bridge);
@@ -1267,13 +1564,6 @@ static int cxl_region_attach(struct cxl_region *cxlr,
return -ENXIO;
}
- if (cxlrd->calc_hb(cxlrd, pos) != dport) {
- dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n",
- dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
- dev_name(&cxlrd->cxlsd.cxld.dev));
- return -ENXIO;
- }
-
if (cxled->cxld.target_type != cxlr->type) {
dev_dbg(&cxlr->dev, "%s:%s type mismatch: %d vs %d\n",
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
@@ -1297,13 +1587,58 @@ static int cxl_region_attach(struct cxl_region *cxlr,
return -EINVAL;
}
- for (iter = ep_port; !is_cxl_root(iter);
- iter = to_cxl_port(iter->dev.parent)) {
- rc = cxl_port_attach_region(iter, cxlr, cxled, pos);
+ if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
+ int i;
+
+ rc = cxl_region_attach_auto(cxlr, cxled, pos);
if (rc)
- goto err;
+ return rc;
+
+ /* await more targets to arrive... */
+ if (p->nr_targets < p->interleave_ways)
+ return 0;
+
+ /*
+ * All targets are here, which implies all PCI enumeration that
+ * affects this region has been completed. Walk the topology to
+ * sort the devices into their relative region decode position.
+ */
+ rc = cxl_region_sort_targets(cxlr);
+ if (rc)
+ return rc;
+
+ for (i = 0; i < p->nr_targets; i++) {
+ cxled = p->targets[i];
+ ep_port = cxled_to_port(cxled);
+ dport = cxl_find_dport_by_dev(root_port,
+ ep_port->host_bridge);
+ rc = cxl_region_attach_position(cxlr, cxlrd, cxled,
+ dport, i);
+ if (rc)
+ return rc;
+ }
+
+ rc = cxl_region_setup_targets(cxlr);
+ if (rc)
+ return rc;
+
+ /*
+ * If target setup succeeds in the autodiscovery case
+ * then the region is already committed.
+ */
+ p->state = CXL_CONFIG_COMMIT;
+
+ return 0;
}
+ rc = cxl_region_validate_position(cxlr, cxled, pos);
+ if (rc)
+ return rc;
+
+ rc = cxl_region_attach_position(cxlr, cxlrd, cxled, dport, pos);
+ if (rc)
+ return rc;
+
p->targets[pos] = cxled;
cxled->pos = pos;
p->nr_targets++;
@@ -1326,10 +1661,8 @@ static int cxl_region_attach(struct cxl_region *cxlr,
err_decrement:
p->nr_targets--;
-err:
- for (iter = ep_port; !is_cxl_root(iter);
- iter = to_cxl_port(iter->dev.parent))
- cxl_port_detach_region(iter, cxlr, cxled);
+ cxled->pos = -1;
+ p->targets[pos] = NULL;
return rc;
}
@@ -1401,31 +1734,25 @@ void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
up_write(&cxl_region_rwsem);
}
-static int attach_target(struct cxl_region *cxlr, const char *decoder, int pos)
+static int attach_target(struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled, int pos,
+ unsigned int state)
{
- struct device *dev;
- int rc;
-
- dev = bus_find_device_by_name(&cxl_bus_type, NULL, decoder);
- if (!dev)
- return -ENODEV;
-
- if (!is_endpoint_decoder(dev)) {
- put_device(dev);
- return -EINVAL;
- }
+ int rc = 0;
- rc = down_write_killable(&cxl_region_rwsem);
+ if (state == TASK_INTERRUPTIBLE)
+ rc = down_write_killable(&cxl_region_rwsem);
+ else
+ down_write(&cxl_region_rwsem);
if (rc)
- goto out;
+ return rc;
+
down_read(&cxl_dpa_rwsem);
- rc = cxl_region_attach(cxlr, to_cxl_endpoint_decoder(dev), pos);
+ rc = cxl_region_attach(cxlr, cxled, pos);
if (rc == 0)
set_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags);
up_read(&cxl_dpa_rwsem);
up_write(&cxl_region_rwsem);
-out:
- put_device(dev);
return rc;
}
@@ -1463,8 +1790,23 @@ static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos,
if (sysfs_streq(buf, "\n"))
rc = detach_target(cxlr, pos);
- else
- rc = attach_target(cxlr, buf, pos);
+ else {
+ struct device *dev;
+
+ dev = bus_find_device_by_name(&cxl_bus_type, NULL, buf);
+ if (!dev)
+ return -ENODEV;
+
+ if (!is_endpoint_decoder(dev)) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ rc = attach_target(cxlr, to_cxl_endpoint_decoder(dev), pos,
+ TASK_INTERRUPTIBLE);
+out:
+ put_device(dev);
+ }
if (rc < 0)
return rc;
@@ -1668,6 +2010,15 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
struct device *dev;
int rc;
+ switch (mode) {
+ case CXL_DECODER_RAM:
+ case CXL_DECODER_PMEM:
+ break;
+ default:
+ dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode);
+ return ERR_PTR(-EINVAL);
+ }
+
cxlr = cxl_region_alloc(cxlrd, id);
if (IS_ERR(cxlr))
return cxlr;
@@ -1696,12 +2047,38 @@ err:
return ERR_PTR(rc);
}
+static ssize_t __create_region_show(struct cxl_root_decoder *cxlrd, char *buf)
+{
+ return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id));
+}
+
static ssize_t create_pmem_region_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
+ return __create_region_show(to_cxl_root_decoder(dev), buf);
+}
- return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id));
+static ssize_t create_ram_region_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return __create_region_show(to_cxl_root_decoder(dev), buf);
+}
+
+static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
+ enum cxl_decoder_mode mode, int id)
+{
+ int rc;
+
+ rc = memregion_alloc(GFP_KERNEL);
+ if (rc < 0)
+ return ERR_PTR(rc);
+
+ if (atomic_cmpxchg(&cxlrd->region_id, id, rc) != id) {
+ memregion_free(rc);
+ return ERR_PTR(-EBUSY);
+ }
+
+ return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_EXPANDER);
}
static ssize_t create_pmem_region_store(struct device *dev,
@@ -1710,29 +2087,39 @@ static ssize_t create_pmem_region_store(struct device *dev,
{
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
struct cxl_region *cxlr;
- int id, rc;
+ int rc, id;
rc = sscanf(buf, "region%d\n", &id);
if (rc != 1)
return -EINVAL;
- rc = memregion_alloc(GFP_KERNEL);
- if (rc < 0)
- return rc;
+ cxlr = __create_region(cxlrd, CXL_DECODER_PMEM, id);
+ if (IS_ERR(cxlr))
+ return PTR_ERR(cxlr);
- if (atomic_cmpxchg(&cxlrd->region_id, id, rc) != id) {
- memregion_free(rc);
- return -EBUSY;
- }
+ return len;
+}
+DEVICE_ATTR_RW(create_pmem_region);
- cxlr = devm_cxl_add_region(cxlrd, id, CXL_DECODER_PMEM,
- CXL_DECODER_EXPANDER);
+static ssize_t create_ram_region_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
+ struct cxl_region *cxlr;
+ int rc, id;
+
+ rc = sscanf(buf, "region%d\n", &id);
+ if (rc != 1)
+ return -EINVAL;
+
+ cxlr = __create_region(cxlrd, CXL_DECODER_RAM, id);
if (IS_ERR(cxlr))
return PTR_ERR(cxlr);
return len;
}
-DEVICE_ATTR_RW(create_pmem_region);
+DEVICE_ATTR_RW(create_ram_region);
static ssize_t region_show(struct device *dev, struct device_attribute *attr,
char *buf)
@@ -1893,6 +2280,75 @@ out:
return cxlr_pmem;
}
+static void cxl_dax_region_release(struct device *dev)
+{
+ struct cxl_dax_region *cxlr_dax = to_cxl_dax_region(dev);
+
+ kfree(cxlr_dax);
+}
+
+static const struct attribute_group *cxl_dax_region_attribute_groups[] = {
+ &cxl_base_attribute_group,
+ NULL,
+};
+
+const struct device_type cxl_dax_region_type = {
+ .name = "cxl_dax_region",
+ .release = cxl_dax_region_release,
+ .groups = cxl_dax_region_attribute_groups,
+};
+
+static bool is_cxl_dax_region(struct device *dev)
+{
+ return dev->type == &cxl_dax_region_type;
+}
+
+struct cxl_dax_region *to_cxl_dax_region(struct device *dev)
+{
+ if (dev_WARN_ONCE(dev, !is_cxl_dax_region(dev),
+ "not a cxl_dax_region device\n"))
+ return NULL;
+ return container_of(dev, struct cxl_dax_region, dev);
+}
+EXPORT_SYMBOL_NS_GPL(to_cxl_dax_region, CXL);
+
+static struct lock_class_key cxl_dax_region_key;
+
+static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr)
+{
+ struct cxl_region_params *p = &cxlr->params;
+ struct cxl_dax_region *cxlr_dax;
+ struct device *dev;
+
+ down_read(&cxl_region_rwsem);
+ if (p->state != CXL_CONFIG_COMMIT) {
+ cxlr_dax = ERR_PTR(-ENXIO);
+ goto out;
+ }
+
+ cxlr_dax = kzalloc(sizeof(*cxlr_dax), GFP_KERNEL);
+ if (!cxlr_dax) {
+ cxlr_dax = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ cxlr_dax->hpa_range.start = p->res->start;
+ cxlr_dax->hpa_range.end = p->res->end;
+
+ dev = &cxlr_dax->dev;
+ cxlr_dax->cxlr = cxlr;
+ device_initialize(dev);
+ lockdep_set_class(&dev->mutex, &cxl_dax_region_key);
+ device_set_pm_not_required(dev);
+ dev->parent = &cxlr->dev;
+ dev->bus = &cxl_bus_type;
+ dev->type = &cxl_dax_region_type;
+out:
+ up_read(&cxl_region_rwsem);
+
+ return cxlr_dax;
+}
+
static void cxlr_pmem_unregister(void *_cxlr_pmem)
{
struct cxl_pmem_region *cxlr_pmem = _cxlr_pmem;
@@ -1977,6 +2433,228 @@ err_bridge:
return rc;
}
+static void cxlr_dax_unregister(void *_cxlr_dax)
+{
+ struct cxl_dax_region *cxlr_dax = _cxlr_dax;
+
+ device_unregister(&cxlr_dax->dev);
+}
+
+static int devm_cxl_add_dax_region(struct cxl_region *cxlr)
+{
+ struct cxl_dax_region *cxlr_dax;
+ struct device *dev;
+ int rc;
+
+ cxlr_dax = cxl_dax_region_alloc(cxlr);
+ if (IS_ERR(cxlr_dax))
+ return PTR_ERR(cxlr_dax);
+
+ dev = &cxlr_dax->dev;
+ rc = dev_set_name(dev, "dax_region%d", cxlr->id);
+ if (rc)
+ goto err;
+
+ rc = device_add(dev);
+ if (rc)
+ goto err;
+
+ dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
+ dev_name(dev));
+
+ return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister,
+ cxlr_dax);
+err:
+ put_device(dev);
+ return rc;
+}
+
+static int match_decoder_by_range(struct device *dev, void *data)
+{
+ struct range *r1, *r2 = data;
+ struct cxl_root_decoder *cxlrd;
+
+ if (!is_root_decoder(dev))
+ return 0;
+
+ cxlrd = to_cxl_root_decoder(dev);
+ r1 = &cxlrd->cxlsd.cxld.hpa_range;
+ return range_contains(r1, r2);
+}
+
+static int match_region_by_range(struct device *dev, void *data)
+{
+ struct cxl_region_params *p;
+ struct cxl_region *cxlr;
+ struct range *r = data;
+ int rc = 0;
+
+ if (!is_cxl_region(dev))
+ return 0;
+
+ cxlr = to_cxl_region(dev);
+ p = &cxlr->params;
+
+ down_read(&cxl_region_rwsem);
+ if (p->res && p->res->start == r->start && p->res->end == r->end)
+ rc = 1;
+ up_read(&cxl_region_rwsem);
+
+ return rc;
+}
+
+/* Establish an empty region covering the given HPA range */
+static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
+ struct cxl_endpoint_decoder *cxled)
+{
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_port *port = cxlrd_to_port(cxlrd);
+ struct range *hpa = &cxled->cxld.hpa_range;
+ struct cxl_region_params *p;
+ struct cxl_region *cxlr;
+ struct resource *res;
+ int rc;
+
+ do {
+ cxlr = __create_region(cxlrd, cxled->mode,
+ atomic_read(&cxlrd->region_id));
+ } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
+
+ if (IS_ERR(cxlr)) {
+ dev_err(cxlmd->dev.parent,
+ "%s:%s: %s failed assign region: %ld\n",
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
+ __func__, PTR_ERR(cxlr));
+ return cxlr;
+ }
+
+ down_write(&cxl_region_rwsem);
+ p = &cxlr->params;
+ if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
+ dev_err(cxlmd->dev.parent,
+ "%s:%s: %s autodiscovery interrupted\n",
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
+ __func__);
+ rc = -EBUSY;
+ goto err;
+ }
+
+ set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
+
+ res = kmalloc(sizeof(*res), GFP_KERNEL);
+ if (!res) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ *res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
+ dev_name(&cxlr->dev));
+ rc = insert_resource(cxlrd->res, res);
+ if (rc) {
+ /*
+ * Platform-firmware may not have split resources like "System
+ * RAM" on CXL window boundaries see cxl_region_iomem_release()
+ */
+ dev_warn(cxlmd->dev.parent,
+ "%s:%s: %s %s cannot insert resource\n",
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
+ __func__, dev_name(&cxlr->dev));
+ }
+
+ p->res = res;
+ p->interleave_ways = cxled->cxld.interleave_ways;
+ p->interleave_granularity = cxled->cxld.interleave_granularity;
+ p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
+
+ rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
+ if (rc)
+ goto err;
+
+ dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig: %d\n",
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__,
+ dev_name(&cxlr->dev), p->res, p->interleave_ways,
+ p->interleave_granularity);
+
+ /* ...to match put_device() in cxl_add_to_region() */
+ get_device(&cxlr->dev);
+ up_write(&cxl_region_rwsem);
+
+ return cxlr;
+
+err:
+ up_write(&cxl_region_rwsem);
+ devm_release_action(port->uport, unregister_region, cxlr);
+ return ERR_PTR(rc);
+}
+
+int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled)
+{
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct range *hpa = &cxled->cxld.hpa_range;
+ struct cxl_decoder *cxld = &cxled->cxld;
+ struct device *cxlrd_dev, *region_dev;
+ struct cxl_root_decoder *cxlrd;
+ struct cxl_region_params *p;
+ struct cxl_region *cxlr;
+ bool attach = false;
+ int rc;
+
+ cxlrd_dev = device_find_child(&root->dev, &cxld->hpa_range,
+ match_decoder_by_range);
+ if (!cxlrd_dev) {
+ dev_err(cxlmd->dev.parent,
+ "%s:%s no CXL window for range %#llx:%#llx\n",
+ dev_name(&cxlmd->dev), dev_name(&cxld->dev),
+ cxld->hpa_range.start, cxld->hpa_range.end);
+ return -ENXIO;
+ }
+
+ cxlrd = to_cxl_root_decoder(cxlrd_dev);
+
+ /*
+ * Ensure that if multiple threads race to construct_region() for @hpa
+ * one does the construction and the others add to that.
+ */
+ mutex_lock(&cxlrd->range_lock);
+ region_dev = device_find_child(&cxlrd->cxlsd.cxld.dev, hpa,
+ match_region_by_range);
+ if (!region_dev) {
+ cxlr = construct_region(cxlrd, cxled);
+ region_dev = &cxlr->dev;
+ } else
+ cxlr = to_cxl_region(region_dev);
+ mutex_unlock(&cxlrd->range_lock);
+
+ if (IS_ERR(cxlr)) {
+ rc = PTR_ERR(cxlr);
+ goto out;
+ }
+
+ attach_target(cxlr, cxled, -1, TASK_UNINTERRUPTIBLE);
+
+ down_read(&cxl_region_rwsem);
+ p = &cxlr->params;
+ attach = p->state == CXL_CONFIG_COMMIT;
+ up_read(&cxl_region_rwsem);
+
+ if (attach) {
+ /*
+ * If device_attach() fails the range may still be active via
+ * the platform-firmware memory map, otherwise the driver for
+ * regions is local to this file, so driver matching can't fail.
+ */
+ if (device_attach(&cxlr->dev) < 0)
+ dev_err(&cxlr->dev, "failed to enable, range: %pr\n",
+ p->res);
+ }
+
+ put_device(region_dev);
+out:
+ put_device(cxlrd_dev);
+ return rc;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, CXL);
+
static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
{
if (!test_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags))
@@ -2001,6 +2679,15 @@ static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
return 0;
}
+static int is_system_ram(struct resource *res, void *arg)
+{
+ struct cxl_region *cxlr = arg;
+ struct cxl_region_params *p = &cxlr->params;
+
+ dev_dbg(&cxlr->dev, "%pr has System RAM: %pr\n", p->res, res);
+ return 1;
+}
+
static int cxl_region_probe(struct device *dev)
{
struct cxl_region *cxlr = to_cxl_region(dev);
@@ -2034,6 +2721,17 @@ out:
switch (cxlr->mode) {
case CXL_DECODER_PMEM:
return devm_cxl_add_pmem_region(cxlr);
+ case CXL_DECODER_RAM:
+ /*
+ * The region can not be manged by CXL if any portion of
+ * it is already online as 'System RAM'
+ */
+ if (walk_iomem_res_desc(IORES_DESC_NONE,
+ IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
+ p->res->start, p->res->end, cxlr,
+ is_system_ram) > 0)
+ return 0;
+ return devm_cxl_add_dax_region(cxlr);
default:
dev_dbg(&cxlr->dev, "unsupported region mode: %d\n",
cxlr->mode);
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index b3964149c77b..1289d1ea3540 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -277,6 +277,8 @@ resource_size_t cxl_rcrb_to_component(struct device *dev,
* cxl_decoder flags that define the type of memory / devices this
* decoder supports as well as configuration lock status See "CXL 2.0
* 8.2.5.12.7 CXL HDM Decoder 0 Control Register" for details.
+ * Additionally indicate whether decoder settings were autodetected,
+ * user customized.
*/
#define CXL_DECODER_F_RAM BIT(0)
#define CXL_DECODER_F_PMEM BIT(1)
@@ -336,12 +338,36 @@ enum cxl_decoder_mode {
CXL_DECODER_DEAD,
};
+static inline const char *cxl_decoder_mode_name(enum cxl_decoder_mode mode)
+{
+ static const char * const names[] = {
+ [CXL_DECODER_NONE] = "none",
+ [CXL_DECODER_RAM] = "ram",
+ [CXL_DECODER_PMEM] = "pmem",
+ [CXL_DECODER_MIXED] = "mixed",
+ };
+
+ if (mode >= CXL_DECODER_NONE && mode <= CXL_DECODER_MIXED)
+ return names[mode];
+ return "mixed";
+}
+
+/*
+ * Track whether this decoder is reserved for region autodiscovery, or
+ * free for userspace provisioning.
+ */
+enum cxl_decoder_state {
+ CXL_DECODER_STATE_MANUAL,
+ CXL_DECODER_STATE_AUTO,
+};
+
/**
* struct cxl_endpoint_decoder - Endpoint / SPA to DPA decoder
* @cxld: base cxl_decoder_object
* @dpa_res: actively claimed DPA span of this decoder
* @skip: offset into @dpa_res where @cxld.hpa_range maps
* @mode: which memory type / access-mode-partition this decoder targets
+ * @state: autodiscovery state
* @pos: interleave position in @cxld.region
*/
struct cxl_endpoint_decoder {
@@ -349,6 +375,7 @@ struct cxl_endpoint_decoder {
struct resource *dpa_res;
resource_size_t skip;
enum cxl_decoder_mode mode;
+ enum cxl_decoder_state state;
int pos;
};
@@ -382,6 +409,7 @@ typedef struct cxl_dport *(*cxl_calc_hb_fn)(struct cxl_root_decoder *cxlrd,
* @region_id: region id for next region provisioning event
* @calc_hb: which host bridge covers the n'th position by granularity
* @platform_data: platform specific configuration data
+ * @range_lock: sync region autodiscovery by address range
* @cxlsd: base cxl switch decoder
*/
struct cxl_root_decoder {
@@ -389,6 +417,7 @@ struct cxl_root_decoder {
atomic_t region_id;
cxl_calc_hb_fn calc_hb;
void *platform_data;
+ struct mutex range_lock;
struct cxl_switch_decoder cxlsd;
};
@@ -438,6 +467,13 @@ struct cxl_region_params {
*/
#define CXL_REGION_F_INCOHERENT 0
+/*
+ * Indicate whether this region has been assembled by autodetection or
+ * userspace assembly. Prevent endpoint decoders outside of automatic
+ * detection from being added to the region.
+ */
+#define CXL_REGION_F_AUTO 1
+
/**
* struct cxl_region - CXL region
* @dev: This region's device
@@ -493,6 +529,12 @@ struct cxl_pmem_region {
struct cxl_pmem_region_mapping mapping[];
};
+struct cxl_dax_region {
+ struct device dev;
+ struct cxl_region *cxlr;
+ struct range hpa_range;
+};
+
/**
* struct cxl_port - logical collection of upstream port devices and
* downstream port devices to construct a CXL memory
@@ -633,8 +675,10 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
struct cxl_decoder *to_cxl_decoder(struct device *dev);
struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev);
+struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev);
struct cxl_endpoint_decoder *to_cxl_endpoint_decoder(struct device *dev);
bool is_root_decoder(struct device *dev);
+bool is_switch_decoder(struct device *dev);
bool is_endpoint_decoder(struct device *dev);
struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
unsigned int nr_targets,
@@ -685,6 +729,7 @@ void cxl_driver_unregister(struct cxl_driver *cxl_drv);
#define CXL_DEVICE_MEMORY_EXPANDER 5
#define CXL_DEVICE_REGION 6
#define CXL_DEVICE_PMEM_REGION 7
+#define CXL_DEVICE_DAX_REGION 8
#define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*")
#define CXL_MODALIAS_FMT "cxl:t%d"
@@ -701,6 +746,9 @@ struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *dev);
#ifdef CONFIG_CXL_REGION
bool is_cxl_pmem_region(struct device *dev);
struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev);
+int cxl_add_to_region(struct cxl_port *root,
+ struct cxl_endpoint_decoder *cxled);
+struct cxl_dax_region *to_cxl_dax_region(struct device *dev);
#else
static inline bool is_cxl_pmem_region(struct device *dev)
{
@@ -710,6 +758,15 @@ static inline struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev)
{
return NULL;
}
+static inline int cxl_add_to_region(struct cxl_port *root,
+ struct cxl_endpoint_decoder *cxled)
+{
+ return 0;
+}
+static inline struct cxl_dax_region *to_cxl_dax_region(struct device *dev)
+{
+ return NULL;
+}
#endif
/*
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 4b066802e03d..64ede1a06eaf 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -39,6 +39,7 @@
* @cxl_nvb: coordinate removal of @cxl_nvd if present
* @cxl_nvd: optional bridge to an nvdimm if the device supports pmem
* @id: id number of this memdev instance.
+ * @depth: endpoint port depth
*/
struct cxl_memdev {
struct device dev;
@@ -48,6 +49,7 @@ struct cxl_memdev {
struct cxl_nvdimm_bridge *cxl_nvb;
struct cxl_nvdimm *cxl_nvd;
int id;
+ int depth;
};
static inline struct cxl_memdev *to_cxl_memdev(struct device *dev)
@@ -80,6 +82,9 @@ static inline bool is_cxl_endpoint(struct cxl_port *port)
}
struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds);
+int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
+ resource_size_t base, resource_size_t len,
+ resource_size_t skipped);
static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port,
struct cxl_memdev *cxlmd)
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index 5453771bf330..d6c151dabaa7 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -30,57 +30,111 @@ static void schedule_detach(void *cxlmd)
schedule_cxl_memdev_detach(cxlmd);
}
-static int cxl_port_probe(struct device *dev)
+static int discover_region(struct device *dev, void *root)
+{
+ struct cxl_endpoint_decoder *cxled;
+ int rc;
+
+ if (!is_endpoint_decoder(dev))
+ return 0;
+
+ cxled = to_cxl_endpoint_decoder(dev);
+ if ((cxled->cxld.flags & CXL_DECODER_F_ENABLE) == 0)
+ return 0;
+
+ if (cxled->state != CXL_DECODER_STATE_AUTO)
+ return 0;
+
+ /*
+ * Region enumeration is opportunistic, if this add-event fails,
+ * continue to the next endpoint decoder.
+ */
+ rc = cxl_add_to_region(root, cxled);
+ if (rc)
+ dev_dbg(dev, "failed to add to region: %#llx-%#llx\n",
+ cxled->cxld.hpa_range.start, cxled->cxld.hpa_range.end);
+
+ return 0;
+}
+
+static int cxl_switch_port_probe(struct cxl_port *port)
{
- struct cxl_port *port = to_cxl_port(dev);
struct cxl_hdm *cxlhdm;
int rc;
+ rc = devm_cxl_port_enumerate_dports(port);
+ if (rc < 0)
+ return rc;
- if (!is_cxl_endpoint(port)) {
- rc = devm_cxl_port_enumerate_dports(port);
- if (rc < 0)
- return rc;
- if (rc == 1)
- return devm_cxl_add_passthrough_decoder(port);
- }
+ if (rc == 1)
+ return devm_cxl_add_passthrough_decoder(port);
cxlhdm = devm_cxl_setup_hdm(port);
if (IS_ERR(cxlhdm))
return PTR_ERR(cxlhdm);
- if (is_cxl_endpoint(port)) {
- struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport);
- struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ return devm_cxl_enumerate_decoders(cxlhdm);
+}
+
+static int cxl_endpoint_port_probe(struct cxl_port *port)
+{
+ struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct cxl_hdm *cxlhdm;
+ struct cxl_port *root;
+ int rc;
+
+ cxlhdm = devm_cxl_setup_hdm(port);
+ if (IS_ERR(cxlhdm))
+ return PTR_ERR(cxlhdm);
- /* Cache the data early to ensure is_visible() works */
- read_cdat_data(port);
+ /* Cache the data early to ensure is_visible() works */
+ read_cdat_data(port);
- get_device(&cxlmd->dev);
- rc = devm_add_action_or_reset(dev, schedule_detach, cxlmd);
- if (rc)
- return rc;
+ get_device(&cxlmd->dev);
+ rc = devm_add_action_or_reset(&port->dev, schedule_detach, cxlmd);
+ if (rc)
+ return rc;
- rc = cxl_hdm_decode_init(cxlds, cxlhdm);
- if (rc)
- return rc;
+ rc = cxl_hdm_decode_init(cxlds, cxlhdm);
+ if (rc)
+ return rc;
- rc = cxl_await_media_ready(cxlds);
- if (rc) {
- dev_err(dev, "Media not active (%d)\n", rc);
- return rc;
- }
+ rc = cxl_await_media_ready(cxlds);
+ if (rc) {
+ dev_err(&port->dev, "Media not active (%d)\n", rc);
+ return rc;
}
rc = devm_cxl_enumerate_decoders(cxlhdm);
- if (rc) {
- dev_err(dev, "Couldn't enumerate decoders (%d)\n", rc);
+ if (rc)
return rc;
- }
+
+ /*
+ * This can't fail in practice as CXL root exit unregisters all
+ * descendant ports and that in turn synchronizes with cxl_port_probe()
+ */
+ root = find_cxl_root(&cxlmd->dev);
+
+ /*
+ * Now that all endpoint decoders are successfully enumerated, try to
+ * assemble regions from committed decoders
+ */
+ device_for_each_child(&port->dev, root, discover_region);
+ put_device(&root->dev);
return 0;
}
+static int cxl_port_probe(struct device *dev)
+{
+ struct cxl_port *port = to_cxl_port(dev);
+
+ if (is_cxl_endpoint(port))
+ return cxl_endpoint_port_probe(port);
+ return cxl_switch_port_probe(port);
+}
+
static ssize_t CDAT_read(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr, char *buf,
loff_t offset, size_t count)
diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig
index 5fdf269a822e..bd06e16c7ac8 100644
--- a/drivers/dax/Kconfig
+++ b/drivers/dax/Kconfig
@@ -45,12 +45,25 @@ config DEV_DAX_HMEM
Say M if unsure.
+config DEV_DAX_CXL
+ tristate "CXL DAX: direct access to CXL RAM regions"
+ depends on CXL_REGION && DEV_DAX
+ default CXL_REGION && DEV_DAX
+ help
+ CXL RAM regions are either mapped by platform-firmware
+ and published in the initial system-memory map as "System RAM", mapped
+ by platform-firmware as "Soft Reserved", or dynamically provisioned
+ after boot by the CXL driver. In the latter two cases a device-dax
+ instance is created to access that unmapped-by-default address range.
+ Per usual it can remain as dedicated access via a device interface, or
+ converted to "System RAM" via the dax_kmem facility.
+
config DEV_DAX_HMEM_DEVICES
- depends on DEV_DAX_HMEM && DAX=y
+ depends on DEV_DAX_HMEM && DAX
def_bool y
config DEV_DAX_KMEM
- tristate "KMEM DAX: volatile-use of persistent memory"
+ tristate "KMEM DAX: map dax-devices as System-RAM"
default DEV_DAX
depends on DEV_DAX
depends on MEMORY_HOTPLUG # for add_memory() and friends
diff --git a/drivers/dax/Makefile b/drivers/dax/Makefile
index 90a56ca3b345..5ed5c39857c8 100644
--- a/drivers/dax/Makefile
+++ b/drivers/dax/Makefile
@@ -3,10 +3,12 @@ obj-$(CONFIG_DAX) += dax.o
obj-$(CONFIG_DEV_DAX) += device_dax.o
obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
+obj-$(CONFIG_DEV_DAX_CXL) += dax_cxl.o
dax-y := super.o
dax-y += bus.o
device_dax-y := device.o
dax_pmem-y := pmem.o
+dax_cxl-y := cxl.o
obj-y += hmem/
diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
index 1dad813ee4a6..012d576004e9 100644
--- a/drivers/dax/bus.c
+++ b/drivers/dax/bus.c
@@ -56,6 +56,25 @@ static int dax_match_id(struct dax_device_driver *dax_drv, struct device *dev)
return match;
}
+static int dax_match_type(struct dax_device_driver *dax_drv, struct device *dev)
+{
+ enum dax_driver_type type = DAXDRV_DEVICE_TYPE;
+ struct dev_dax *dev_dax = to_dev_dax(dev);
+
+ if (dev_dax->region->res.flags & IORESOURCE_DAX_KMEM)
+ type = DAXDRV_KMEM_TYPE;
+
+ if (dax_drv->type == type)
+ return 1;
+
+ /* default to device mode if dax_kmem is disabled */
+ if (dax_drv->type == DAXDRV_DEVICE_TYPE &&
+ !IS_ENABLED(CONFIG_DEV_DAX_KMEM))
+ return 1;
+
+ return 0;
+}
+
enum id_action {
ID_REMOVE,
ID_ADD,
@@ -216,14 +235,9 @@ static int dax_bus_match(struct device *dev, struct device_driver *drv)
{
struct dax_device_driver *dax_drv = to_dax_drv(drv);
- /*
- * All but the 'device-dax' driver, which has 'match_always'
- * set, requires an exact id match.
- */
- if (dax_drv->match_always)
+ if (dax_match_id(dax_drv, dev))
return 1;
-
- return dax_match_id(dax_drv, dev);
+ return dax_match_type(dax_drv, dev);
}
/*
@@ -1413,13 +1427,10 @@ err_id:
}
EXPORT_SYMBOL_GPL(devm_create_dev_dax);
-static int match_always_count;
-
int __dax_driver_register(struct dax_device_driver *dax_drv,
struct module *module, const char *mod_name)
{
struct device_driver *drv = &dax_drv->drv;
- int rc = 0;
/*
* dax_bus_probe() calls dax_drv->probe() unconditionally.
@@ -1434,26 +1445,7 @@ int __dax_driver_register(struct dax_device_driver *dax_drv,
drv->mod_name = mod_name;
drv->bus = &dax_bus_type;
- /* there can only be one default driver */
- mutex_lock(&dax_bus_lock);
- match_always_count += dax_drv->match_always;
- if (match_always_count > 1) {
- match_always_count--;
- WARN_ON(1);
- rc = -EINVAL;
- }
- mutex_unlock(&dax_bus_lock);
- if (rc)
- return rc;
-
- rc = driver_register(drv);
- if (rc && dax_drv->match_always) {
- mutex_lock(&dax_bus_lock);
- match_always_count -= dax_drv->match_always;
- mutex_unlock(&dax_bus_lock);
- }
-
- return rc;
+ return driver_register(drv);
}
EXPORT_SYMBOL_GPL(__dax_driver_register);
@@ -1463,7 +1455,6 @@ void dax_driver_unregister(struct dax_device_driver *dax_drv)
struct dax_id *dax_id, *_id;
mutex_lock(&dax_bus_lock);
- match_always_count -= dax_drv->match_always;
list_for_each_entry_safe(dax_id, _id, &dax_drv->ids, list) {
list_del(&dax_id->list);
kfree(dax_id);
diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h
index fbb940293d6d..8cd79ab34292 100644
--- a/drivers/dax/bus.h
+++ b/drivers/dax/bus.h
@@ -11,7 +11,10 @@ struct dax_device;
struct dax_region;
void dax_region_put(struct dax_region *dax_region);
-#define IORESOURCE_DAX_STATIC (1UL << 0)
+/* dax bus specific ioresource flags */
+#define IORESOURCE_DAX_STATIC BIT(0)
+#define IORESOURCE_DAX_KMEM BIT(1)
+
struct dax_region *alloc_dax_region(struct device *parent, int region_id,
struct range *range, int target_node, unsigned int align,
unsigned long flags);
@@ -25,10 +28,15 @@ struct dev_dax_data {
struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data);
+enum dax_driver_type {
+ DAXDRV_KMEM_TYPE,
+ DAXDRV_DEVICE_TYPE,
+};
+
struct dax_device_driver {
struct device_driver drv;
struct list_head ids;
- int match_always;
+ enum dax_driver_type type;
int (*probe)(struct dev_dax *dev);
void (*remove)(struct dev_dax *dev);
};
diff --git a/drivers/dax/cxl.c b/drivers/dax/cxl.c
new file mode 100644
index 000000000000..ccdf8de85bd5
--- /dev/null
+++ b/drivers/dax/cxl.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2023 Intel Corporation. All rights reserved. */
+#include <linux/module.h>
+#include <linux/dax.h>
+
+#include "../cxl/cxl.h"
+#include "bus.h"
+
+static int cxl_dax_region_probe(struct device *dev)
+{
+ struct cxl_dax_region *cxlr_dax = to_cxl_dax_region(dev);
+ int nid = phys_to_target_node(cxlr_dax->hpa_range.start);
+ struct cxl_region *cxlr = cxlr_dax->cxlr;
+ struct dax_region *dax_region;
+ struct dev_dax_data data;
+ struct dev_dax *dev_dax;
+
+ if (nid == NUMA_NO_NODE)
+ nid = memory_add_physaddr_to_nid(cxlr_dax->hpa_range.start);
+
+ dax_region = alloc_dax_region(dev, cxlr->id, &cxlr_dax->hpa_range, nid,
+ PMD_SIZE, IORESOURCE_DAX_KMEM);
+ if (!dax_region)
+ return -ENOMEM;
+
+ data = (struct dev_dax_data) {
+ .dax_region = dax_region,
+ .id = -1,
+ .size = range_len(&cxlr_dax->hpa_range),
+ };
+ dev_dax = devm_create_dev_dax(&data);
+ if (IS_ERR(dev_dax))
+ return PTR_ERR(dev_dax);
+
+ /* child dev_dax instances now own the lifetime of the dax_region */
+ dax_region_put(dax_region);
+ return 0;
+}
+
+static struct cxl_driver cxl_dax_region_driver = {
+ .name = "cxl_dax_region",
+ .probe = cxl_dax_region_probe,
+ .id = CXL_DEVICE_DAX_REGION,
+ .drv = {
+ .suppress_bind_attrs = true,
+ },
+};
+
+module_cxl_driver(cxl_dax_region_driver);
+MODULE_ALIAS_CXL(CXL_DEVICE_DAX_REGION);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_IMPORT_NS(CXL);
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 5494d745ced5..ecdff79e31f2 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -475,8 +475,7 @@ EXPORT_SYMBOL_GPL(dev_dax_probe);
static struct dax_device_driver device_dax_driver = {
.probe = dev_dax_probe,
- /* all probe actions are unwound by devm, so .remove isn't necessary */
- .match_always = 1,
+ .type = DAXDRV_DEVICE_TYPE,
};
static int __init dax_init(void)
diff --git a/drivers/dax/hmem/Makefile b/drivers/dax/hmem/Makefile
index 57377b4c3d47..d4c4cd6bccd7 100644
--- a/drivers/dax/hmem/Makefile
+++ b/drivers/dax/hmem/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_DEV_DAX_HMEM) += dax_hmem.o
+# device_hmem.o deliberately precedes dax_hmem.o for initcall ordering
obj-$(CONFIG_DEV_DAX_HMEM_DEVICES) += device_hmem.o
+obj-$(CONFIG_DEV_DAX_HMEM) += dax_hmem.o
device_hmem-y := device.o
dax_hmem-y := hmem.o
diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c
index 903325aac991..f9e1a76a04a9 100644
--- a/drivers/dax/hmem/device.c
+++ b/drivers/dax/hmem/device.c
@@ -8,6 +8,8 @@
static bool nohmem;
module_param_named(disable, nohmem, bool, 0444);
+static bool platform_initialized;
+static DEFINE_MUTEX(hmem_resource_lock);
static struct resource hmem_active = {
.name = "HMEM devices",
.start = 0,
@@ -15,80 +17,66 @@ static struct resource hmem_active = {
.flags = IORESOURCE_MEM,
};
-void hmem_register_device(int target_nid, struct resource *r)
+int walk_hmem_resources(struct device *host, walk_hmem_fn fn)
+{
+ struct resource *res;
+ int rc = 0;
+
+ mutex_lock(&hmem_resource_lock);
+ for (res = hmem_active.child; res; res = res->sibling) {
+ rc = fn(host, (int) res->desc, res);
+ if (rc)
+ break;
+ }
+ mutex_unlock(&hmem_resource_lock);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(walk_hmem_resources);
+
+static void __hmem_register_resource(int target_nid, struct resource *res)
{
- /* define a clean / non-busy resource for the platform device */
- struct resource res = {
- .start = r->start,
- .end = r->end,
- .flags = IORESOURCE_MEM,
- .desc = IORES_DESC_SOFT_RESERVED,
- };
struct platform_device *pdev;
- struct memregion_info info;
- int rc, id;
+ struct resource *new;
+ int rc;
- if (nohmem)
+ new = __request_region(&hmem_active, res->start, resource_size(res), "",
+ 0);
+ if (!new) {
+ pr_debug("hmem range %pr already active\n", res);
return;
+ }
- rc = region_intersects(res.start, resource_size(&res), IORESOURCE_MEM,
- IORES_DESC_SOFT_RESERVED);
- if (rc != REGION_INTERSECTS)
- return;
+ new->desc = target_nid;
- id = memregion_alloc(GFP_KERNEL);
- if (id < 0) {
- pr_err("memregion allocation failure for %pr\n", &res);
+ if (platform_initialized)
return;
- }
- pdev = platform_device_alloc("hmem", id);
+ pdev = platform_device_alloc("hmem_platform", 0);
if (!pdev) {
- pr_err("hmem device allocation failure for %pr\n", &res);
- goto out_pdev;
- }
-
- if (!__request_region(&hmem_active, res.start, resource_size(&res),
- dev_name(&pdev->dev), 0)) {
- dev_dbg(&pdev->dev, "hmem range %pr already active\n", &res);
- goto out_active;
- }
-
- pdev->dev.numa_node = numa_map_to_online_node(target_nid);
- info = (struct memregion_info) {
- .target_node = target_nid,
- };
- rc = platform_device_add_data(pdev, &info, sizeof(info));
- if (rc < 0) {
- pr_err("hmem memregion_info allocation failure for %pr\n", &res);
- goto out_resource;
- }
-
- rc = platform_device_add_resources(pdev, &res, 1);
- if (rc < 0) {
- pr_err("hmem resource allocation failure for %pr\n", &res);
- goto out_resource;
+ pr_err_once("failed to register device-dax hmem_platform device\n");
+ return;
}
rc = platform_device_add(pdev);
- if (rc < 0) {
- dev_err(&pdev->dev, "device add failed for %pr\n", &res);
- goto out_resource;
- }
+ if (rc)
+ platform_device_put(pdev);
+ else
+ platform_initialized = true;
+}
- return;
+void hmem_register_resource(int target_nid, struct resource *res)
+{
+ if (nohmem)
+ return;
-out_resource:
- __release_region(&hmem_active, res.start, resource_size(&res));
-out_active:
- platform_device_put(pdev);
-out_pdev:
- memregion_free(id);
+ mutex_lock(&hmem_resource_lock);
+ __hmem_register_resource(target_nid, res);
+ mutex_unlock(&hmem_resource_lock);
}
static __init int hmem_register_one(struct resource *res, void *data)
{
- hmem_register_device(phys_to_target_node(res->start), res);
+ hmem_register_resource(phys_to_target_node(res->start), res);
return 0;
}
@@ -104,4 +92,4 @@ static __init int hmem_init(void)
* As this is a fallback for address ranges unclaimed by the ACPI HMAT
* parsing it must be at an initcall level greater than hmat_init().
*/
-late_initcall(hmem_init);
+device_initcall(hmem_init);
diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c
index 1bf040dbc834..e5fe8b39fb94 100644
--- a/drivers/dax/hmem/hmem.c
+++ b/drivers/dax/hmem/hmem.c
@@ -3,6 +3,7 @@
#include <linux/memregion.h>
#include <linux/module.h>
#include <linux/pfn_t.h>
+#include <linux/dax.h>
#include "../bus.h"
static bool region_idle;
@@ -10,30 +11,32 @@ module_param_named(region_idle, region_idle, bool, 0644);
static int dax_hmem_probe(struct platform_device *pdev)
{
+ unsigned long flags = IORESOURCE_DAX_KMEM;
struct device *dev = &pdev->dev;
struct dax_region *dax_region;
struct memregion_info *mri;
struct dev_dax_data data;
struct dev_dax *dev_dax;
- struct resource *res;
- struct range range;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res)
- return -ENOMEM;
+ /*
+ * @region_idle == true indicates that an administrative agent
+ * wants to manipulate the range partitioning before the devices
+ * are created, so do not send them to the dax_kmem driver by
+ * default.
+ */
+ if (region_idle)
+ flags = 0;
mri = dev->platform_data;
- range.start = res->start;
- range.end = res->end;
- dax_region = alloc_dax_region(dev, pdev->id, &range, mri->target_node,
- PMD_SIZE, 0);
+ dax_region = alloc_dax_region(dev, pdev->id, &mri->range,
+ mri->target_node, PMD_SIZE, flags);
if (!dax_region)
return -ENOMEM;
data = (struct dev_dax_data) {
.dax_region = dax_region,
.id = -1,
- .size = region_idle ? 0 : resource_size(res),
+ .size = region_idle ? 0 : range_len(&mri->range),
};
dev_dax = devm_create_dev_dax(&data);
if (IS_ERR(dev_dax))
@@ -44,22 +47,131 @@ static int dax_hmem_probe(struct platform_device *pdev)
return 0;
}
-static int dax_hmem_remove(struct platform_device *pdev)
-{
- /* devm handles teardown */
- return 0;
-}
-
static struct platform_driver dax_hmem_driver = {
.probe = dax_hmem_probe,
- .remove = dax_hmem_remove,
.driver = {
.name = "hmem",
},
};
-module_platform_driver(dax_hmem_driver);
+static void release_memregion(void *data)
+{
+ memregion_free((long) data);
+}
+
+static void release_hmem(void *pdev)
+{
+ platform_device_unregister(pdev);
+}
+
+static int hmem_register_device(struct device *host, int target_nid,
+ const struct resource *res)
+{
+ struct platform_device *pdev;
+ struct memregion_info info;
+ long id;
+ int rc;
+
+ if (IS_ENABLED(CONFIG_CXL_REGION) &&
+ region_intersects(res->start, resource_size(res), IORESOURCE_MEM,
+ IORES_DESC_CXL) != REGION_DISJOINT) {
+ dev_dbg(host, "deferring range to CXL: %pr\n", res);
+ return 0;
+ }
+
+ rc = region_intersects(res->start, resource_size(res), IORESOURCE_MEM,
+ IORES_DESC_SOFT_RESERVED);
+ if (rc != REGION_INTERSECTS)
+ return 0;
+
+ id = memregion_alloc(GFP_KERNEL);
+ if (id < 0) {
+ dev_err(host, "memregion allocation failure for %pr\n", res);
+ return -ENOMEM;
+ }
+ rc = devm_add_action_or_reset(host, release_memregion, (void *) id);
+ if (rc)
+ return rc;
+
+ pdev = platform_device_alloc("hmem", id);
+ if (!pdev) {
+ dev_err(host, "device allocation failure for %pr\n", res);
+ return -ENOMEM;
+ }
+
+ pdev->dev.numa_node = numa_map_to_online_node(target_nid);
+ info = (struct memregion_info) {
+ .target_node = target_nid,
+ .range = {
+ .start = res->start,
+ .end = res->end,
+ },
+ };
+ rc = platform_device_add_data(pdev, &info, sizeof(info));
+ if (rc < 0) {
+ dev_err(host, "memregion_info allocation failure for %pr\n",
+ res);
+ goto out_put;
+ }
+
+ rc = platform_device_add(pdev);
+ if (rc < 0) {
+ dev_err(host, "%s add failed for %pr\n", dev_name(&pdev->dev),
+ res);
+ goto out_put;
+ }
+
+ return devm_add_action_or_reset(host, release_hmem, pdev);
+
+out_put:
+ platform_device_put(pdev);
+ return rc;
+}
+
+static int dax_hmem_platform_probe(struct platform_device *pdev)
+{
+ return walk_hmem_resources(&pdev->dev, hmem_register_device);
+}
+
+static struct platform_driver dax_hmem_platform_driver = {
+ .probe = dax_hmem_platform_probe,
+ .driver = {
+ .name = "hmem_platform",
+ },
+};
+
+static __init int dax_hmem_init(void)
+{
+ int rc;
+
+ rc = platform_driver_register(&dax_hmem_platform_driver);
+ if (rc)
+ return rc;
+
+ rc = platform_driver_register(&dax_hmem_driver);
+ if (rc)
+ platform_driver_unregister(&dax_hmem_platform_driver);
+
+ return rc;
+}
+
+static __exit void dax_hmem_exit(void)
+{
+ platform_driver_unregister(&dax_hmem_driver);
+ platform_driver_unregister(&dax_hmem_platform_driver);
+}
+
+module_init(dax_hmem_init);
+module_exit(dax_hmem_exit);
+
+/* Allow for CXL to define its own dax regions */
+#if IS_ENABLED(CONFIG_CXL_REGION)
+#if IS_MODULE(CONFIG_CXL_ACPI)
+MODULE_SOFTDEP("pre: cxl_acpi");
+#endif
+#endif
MODULE_ALIAS("platform:hmem*");
+MODULE_ALIAS("platform:hmem_platform*");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Intel Corporation");
diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c
index 4852a2dbdb27..918d01d3fbaa 100644
--- a/drivers/dax/kmem.c
+++ b/drivers/dax/kmem.c
@@ -239,6 +239,7 @@ static void dev_dax_kmem_remove(struct dev_dax *dev_dax)
static struct dax_device_driver device_dax_kmem_driver = {
.probe = dev_dax_kmem_probe,
.remove = dev_dax_kmem_remove,
+ .type = DAXDRV_KMEM_TYPE,
};
static int __init dax_kmem_init(void)
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 2b5ecb591059..bf6258472e49 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -262,11 +262,14 @@ static inline bool dax_mapping(struct address_space *mapping)
}
#ifdef CONFIG_DEV_DAX_HMEM_DEVICES
-void hmem_register_device(int target_nid, struct resource *r);
+void hmem_register_resource(int target_nid, struct resource *r);
#else
-static inline void hmem_register_device(int target_nid, struct resource *r)
+static inline void hmem_register_resource(int target_nid, struct resource *r)
{
}
#endif
+typedef int (*walk_hmem_fn)(struct device *dev, int target_nid,
+ const struct resource *res);
+int walk_hmem_resources(struct device *dev, walk_hmem_fn fn);
#endif
diff --git a/include/linux/memregion.h b/include/linux/memregion.h
index bf83363807ac..c01321467789 100644
--- a/include/linux/memregion.h
+++ b/include/linux/memregion.h
@@ -3,10 +3,12 @@
#define _MEMREGION_H_
#include <linux/types.h>
#include <linux/errno.h>
+#include <linux/range.h>
#include <linux/bug.h>
struct memregion_info {
int target_node;
+ struct range range;
};
#ifdef CONFIG_MEMREGION
diff --git a/include/linux/range.h b/include/linux/range.h
index 274681cc3154..7efb6a9b069b 100644
--- a/include/linux/range.h
+++ b/include/linux/range.h
@@ -13,6 +13,11 @@ static inline u64 range_len(const struct range *range)
return range->end - range->start + 1;
}
+static inline bool range_contains(struct range *r1, struct range *r2)
+{
+ return r1->start <= r2->start && r1->end >= r2->end;
+}
+
int add_range(struct range *range, int az, int nr_range,
u64 start, u64 end);
diff --git a/lib/stackinit_kunit.c b/lib/stackinit_kunit.c
index 4591d6cf5e01..05947a2feb93 100644
--- a/lib/stackinit_kunit.c
+++ b/lib/stackinit_kunit.c
@@ -31,8 +31,8 @@ static volatile u8 forced_mask = 0xff;
static void *fill_start, *target_start;
static size_t fill_size, target_size;
-static bool range_contains(char *haystack_start, size_t haystack_size,
- char *needle_start, size_t needle_size)
+static bool stackinit_range_contains(char *haystack_start, size_t haystack_size,
+ char *needle_start, size_t needle_size)
{
if (needle_start >= haystack_start &&
needle_start + needle_size <= haystack_start + haystack_size)
@@ -175,7 +175,7 @@ static noinline void test_ ## name (struct kunit *test) \
\
/* Validate that compiler lined up fill and target. */ \
KUNIT_ASSERT_TRUE_MSG(test, \
- range_contains(fill_start, fill_size, \
+ stackinit_range_contains(fill_start, fill_size, \
target_start, target_size), \
"stack fill missed target!? " \
"(fill %zu wide, target offset by %d)\n", \
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index a65305218c90..41c5d735308e 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -703,6 +703,142 @@ static int mock_decoder_reset(struct cxl_decoder *cxld)
return 0;
}
+static void default_mock_decoder(struct cxl_decoder *cxld)
+{
+ cxld->hpa_range = (struct range){
+ .start = 0,
+ .end = -1,
+ };
+
+ cxld->interleave_ways = 1;
+ cxld->interleave_granularity = 256;
+ cxld->target_type = CXL_DECODER_EXPANDER;
+ cxld->commit = mock_decoder_commit;
+ cxld->reset = mock_decoder_reset;
+}
+
+static int first_decoder(struct device *dev, void *data)
+{
+ struct cxl_decoder *cxld;
+
+ if (!is_switch_decoder(dev))
+ return 0;
+ cxld = to_cxl_decoder(dev);
+ if (cxld->id == 0)
+ return 1;
+ return 0;
+}
+
+static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
+{
+ struct acpi_cedt_cfmws *window = mock_cfmws[0];
+ struct platform_device *pdev = NULL;
+ struct cxl_endpoint_decoder *cxled;
+ struct cxl_switch_decoder *cxlsd;
+ struct cxl_port *port, *iter;
+ const int size = SZ_512M;
+ struct cxl_memdev *cxlmd;
+ struct cxl_dport *dport;
+ struct device *dev;
+ bool hb0 = false;
+ u64 base;
+ int i;
+
+ if (is_endpoint_decoder(&cxld->dev)) {
+ cxled = to_cxl_endpoint_decoder(&cxld->dev);
+ cxlmd = cxled_to_memdev(cxled);
+ WARN_ON(!dev_is_platform(cxlmd->dev.parent));
+ pdev = to_platform_device(cxlmd->dev.parent);
+
+ /* check is endpoint is attach to host-bridge0 */
+ port = cxled_to_port(cxled);
+ do {
+ if (port->uport == &cxl_host_bridge[0]->dev) {
+ hb0 = true;
+ break;
+ }
+ if (is_cxl_port(port->dev.parent))
+ port = to_cxl_port(port->dev.parent);
+ else
+ port = NULL;
+ } while (port);
+ port = cxled_to_port(cxled);
+ }
+
+ /*
+ * The first decoder on the first 2 devices on the first switch
+ * attached to host-bridge0 mock a fake / static RAM region. All
+ * other decoders are default disabled. Given the round robin
+ * assignment those devices are named cxl_mem.0, and cxl_mem.4.
+ *
+ * See 'cxl list -BMPu -m cxl_mem.0,cxl_mem.4'
+ */
+ if (!hb0 || pdev->id % 4 || pdev->id > 4 || cxld->id > 0) {
+ default_mock_decoder(cxld);
+ return;
+ }
+
+ base = window->base_hpa;
+ cxld->hpa_range = (struct range) {
+ .start = base,
+ .end = base + size - 1,
+ };
+
+ cxld->interleave_ways = 2;
+ eig_to_granularity(window->granularity, &cxld->interleave_granularity);
+ cxld->target_type = CXL_DECODER_EXPANDER;
+ cxld->flags = CXL_DECODER_F_ENABLE;
+ cxled->state = CXL_DECODER_STATE_AUTO;
+ port->commit_end = cxld->id;
+ devm_cxl_dpa_reserve(cxled, 0, size / cxld->interleave_ways, 0);
+ cxld->commit = mock_decoder_commit;
+ cxld->reset = mock_decoder_reset;
+
+ /*
+ * Now that endpoint decoder is set up, walk up the hierarchy
+ * and setup the switch and root port decoders targeting @cxlmd.
+ */
+ iter = port;
+ for (i = 0; i < 2; i++) {
+ dport = iter->parent_dport;
+ iter = dport->port;
+ dev = device_find_child(&iter->dev, NULL, first_decoder);
+ /*
+ * Ancestor ports are guaranteed to be enumerated before
+ * @port, and all ports have at least one decoder.
+ */
+ if (WARN_ON(!dev))
+ continue;
+ cxlsd = to_cxl_switch_decoder(dev);
+ if (i == 0) {
+ /* put cxl_mem.4 second in the decode order */
+ if (pdev->id == 4)
+ cxlsd->target[1] = dport;
+ else
+ cxlsd->target[0] = dport;
+ } else
+ cxlsd->target[0] = dport;
+ cxld = &cxlsd->cxld;
+ cxld->target_type = CXL_DECODER_EXPANDER;
+ cxld->flags = CXL_DECODER_F_ENABLE;
+ iter->commit_end = 0;
+ /*
+ * Switch targets 2 endpoints, while host bridge targets
+ * one root port
+ */
+ if (i == 0)
+ cxld->interleave_ways = 2;
+ else
+ cxld->interleave_ways = 1;
+ cxld->interleave_granularity = 256;
+ cxld->hpa_range = (struct range) {
+ .start = base,
+ .end = base + size - 1,
+ };
+ put_device(dev);
+ }
+}
+
static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm)
{
struct cxl_port *port = cxlhdm->port;
@@ -748,16 +884,7 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm)
cxld = &cxled->cxld;
}
- cxld->hpa_range = (struct range) {
- .start = 0,
- .end = -1,
- };
-
- cxld->interleave_ways = min_not_zero(target_count, 1);
- cxld->interleave_granularity = SZ_4K;
- cxld->target_type = CXL_DECODER_EXPANDER;
- cxld->commit = mock_decoder_commit;
- cxld->reset = mock_decoder_reset;
+ mock_init_hdm_decoder(cxld);
if (target_count) {
rc = device_for_each_child(port->uport, &ctx,