25 files changed, 2552 insertions, 427 deletions
diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig
index ff4e78117b31..fcbf8295fde3 100644
--- a/drivers/cxl/Kconfig
+++ b/drivers/cxl/Kconfig
@@ -82,6 +82,7 @@ config CXL_PMEM
 config CXL_MEM
 	tristate "CXL: Memory Expansion"
 	depends on CXL_PCI
+	select FW_UPLOAD
 	default CXL_BUS
 	help
 	  The CXL.mem protocol allows a device to act as a provider of "System
@@ -139,4 +140,17 @@ config CXL_REGION_INVALIDATION_TEST
 	  If unsure, or if this kernel is meant for production environments,
 	  say N.
 
+config CXL_PMU
+	tristate "CXL Performance Monitoring Unit"
+	default CXL_BUS
+	depends on PERF_EVENTS
+	help
+	  Support performance monitoring as defined in CXL rev 3.0
+	  section 13.2: Performance Monitoring. CXL components may have
+	  one or more CXL Performance Monitoring Units (CPMUs).
+
+	  Say 'y/m' to enable a driver that will attach to performance
+	  monitoring units and provide standard perf based interfaces.
+
+	  If unsure say 'm'.
 endif
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index 89ee01323d43..658e6b84a769 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -258,7 +258,7 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
 
 	cxld = &cxlrd->cxlsd.cxld;
 	cxld->flags = cfmws_to_decoder_flags(cfmws->restrictions);
-	cxld->target_type = CXL_DECODER_EXPANDER;
+	cxld->target_type = CXL_DECODER_HOSTONLYMEM;
 	cxld->hpa_range = (struct range) {
 		.start = res->start,
 		.end = res->end,
diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
index ca4ae31d8f57..1f66b5d4d935 100644
--- a/drivers/cxl/core/Makefile
+++ b/drivers/cxl/core/Makefile
@@ -12,5 +12,6 @@ cxl_core-y += memdev.o
 cxl_core-y += mbox.o
 cxl_core-y += pci.o
 cxl_core-y += hdm.o
+cxl_core-y += pmu.o
 cxl_core-$(CONFIG_TRACING) += trace.o
 cxl_core-$(CONFIG_CXL_REGION) += region.o
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index b001669a5133..45e7e044cf4a 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -6,6 +6,7 @@
 
 extern const struct device_type cxl_nvdimm_bridge_type;
 extern const struct device_type cxl_nvdimm_type;
+extern const struct device_type cxl_pmu_type;
 
 extern struct attribute_group cxl_base_attribute_group;
 
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index 5abfa9276dac..4449b34a80cc 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -570,8 +570,9 @@ static void cxld_set_interleave(struct cxl_decoder *cxld, u32 *ctrl)
 
 static void cxld_set_type(struct cxl_decoder *cxld, u32 *ctrl)
 {
-	u32p_replace_bits(ctrl, !!(cxld->target_type == 3),
-			  CXL_HDM_DECODER0_CTRL_TYPE);
+	u32p_replace_bits(ctrl,
+			  !!(cxld->target_type == CXL_DECODER_HOSTONLYMEM),
+			  CXL_HDM_DECODER0_CTRL_HOSTONLY);
 }
 
 static int cxlsd_set_targets(struct cxl_switch_decoder *cxlsd, u64 *tgt)
@@ -764,7 +765,7 @@ static int cxl_setup_hdm_decoder_from_dvsec(
 	if (!len)
 		return -ENOENT;
 
-	cxld->target_type = CXL_DECODER_EXPANDER;
+	cxld->target_type = CXL_DECODER_HOSTONLYMEM;
 	cxld->commit = NULL;
 	cxld->reset = NULL;
 	cxld->hpa_range = info->dvsec_range[which];
@@ -793,8 +794,8 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
 			    int *target_map, void __iomem *hdm, int which,
 			    u64 *dpa_base, struct cxl_endpoint_dvsec_info *info)
 {
+	struct cxl_endpoint_decoder *cxled = NULL;
 	u64 size, base, skip, dpa_size, lo, hi;
-	struct cxl_endpoint_decoder *cxled;
 	bool committed;
 	u32 remainder;
 	int i, rc;
@@ -827,6 +828,8 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
 		return -ENXIO;
 	}
 
+	if (info)
+		cxled = to_cxl_endpoint_decoder(&cxld->dev);
 	cxld->hpa_range = (struct range) {
 		.start = base,
 		.end = base + size - 1,
@@ -837,10 +840,10 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
 		cxld->flags |= CXL_DECODER_F_ENABLE;
 		if (ctrl & CXL_HDM_DECODER0_CTRL_LOCK)
 			cxld->flags |= CXL_DECODER_F_LOCK;
-		if (FIELD_GET(CXL_HDM_DECODER0_CTRL_TYPE, ctrl))
-			cxld->target_type = CXL_DECODER_EXPANDER;
+		if (FIELD_GET(CXL_HDM_DECODER0_CTRL_HOSTONLY, ctrl))
+			cxld->target_type = CXL_DECODER_HOSTONLYMEM;
 		else
-			cxld->target_type = CXL_DECODER_ACCELERATOR;
+			cxld->target_type = CXL_DECODER_DEVMEM;
 		if (cxld->id != port->commit_end + 1) {
 			dev_warn(&port->dev,
 				 "decoder%d.%d: Committed out of order\n",
@@ -856,12 +859,28 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
 		}
 		port->commit_end = cxld->id;
 	} else {
-		/* unless / until type-2 drivers arrive, assume type-3 */
-		if (FIELD_GET(CXL_HDM_DECODER0_CTRL_TYPE, ctrl) == 0) {
-			ctrl |= CXL_HDM_DECODER0_CTRL_TYPE;
+		if (cxled) {
+			struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+			struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+			/*
+			 * Default by devtype until a device arrives that needs
+			 * more precision.
+			 */
+			if (cxlds->type == CXL_DEVTYPE_CLASSMEM)
+				cxld->target_type = CXL_DECODER_HOSTONLYMEM;
+			else
+				cxld->target_type = CXL_DECODER_DEVMEM;
+		} else {
+			/* To be overridden by region type at commit time */
+			cxld->target_type = CXL_DECODER_HOSTONLYMEM;
+		}
+
+		if (!FIELD_GET(CXL_HDM_DECODER0_CTRL_HOSTONLY, ctrl) &&
+		    cxld->target_type == CXL_DECODER_HOSTONLYMEM) {
+			ctrl |= CXL_HDM_DECODER0_CTRL_HOSTONLY;
 			writel(ctrl, hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which));
 		}
-		cxld->target_type = CXL_DECODER_EXPANDER;
 	}
 	rc = eiw_to_ways(FIELD_GET(CXL_HDM_DECODER0_CTRL_IW_MASK, ctrl),
 			  &cxld->interleave_ways);
@@ -880,7 +899,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
 		port->id, cxld->id, cxld->hpa_range.start, cxld->hpa_range.end,
 		cxld->interleave_ways, cxld->interleave_granularity);
 
-	if (!info) {
+	if (!cxled) {
 		lo = readl(hdm + CXL_HDM_DECODER0_TL_LOW(which));
 		hi = readl(hdm + CXL_HDM_DECODER0_TL_HIGH(which));
 		target_list.value = (hi << 32) + lo;
@@ -903,7 +922,6 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
 	lo = readl(hdm + CXL_HDM_DECODER0_SKIP_LOW(which));
 	hi = readl(hdm + CXL_HDM_DECODER0_SKIP_HIGH(which));
 	skip = (hi << 32) + lo;
-	cxled = to_cxl_endpoint_decoder(&cxld->dev);
 	rc = devm_cxl_dpa_reserve(cxled, *dpa_base + skip, dpa_size, skip);
 	if (rc) {
 		dev_err(&port->dev,
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index bea9cf31a12d..d6d067fbee97 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -182,7 +182,7 @@ static const char *cxl_mem_opcode_to_name(u16 opcode)
 
 /**
  * cxl_internal_send_cmd() - Kernel internal interface to send a mailbox command
- * @cxlds: The device data for the operation
+ * @mds: The driver data for the operation
  * @mbox_cmd: initialized command to execute
  *
  * Context: Any context.
@@ -198,19 +198,19 @@ static const char *cxl_mem_opcode_to_name(u16 opcode)
  * error. While this distinction can be useful for commands from userspace, the
  * kernel will only be able to use results when both are successful.
  */
-int cxl_internal_send_cmd(struct cxl_dev_state *cxlds,
+int cxl_internal_send_cmd(struct cxl_memdev_state *mds,
 			  struct cxl_mbox_cmd *mbox_cmd)
 {
 	size_t out_size, min_out;
 	int rc;
 
-	if (mbox_cmd->size_in > cxlds->payload_size ||
-	    mbox_cmd->size_out > cxlds->payload_size)
+	if (mbox_cmd->size_in > mds->payload_size ||
+	    mbox_cmd->size_out > mds->payload_size)
 		return -E2BIG;
 
 	out_size = mbox_cmd->size_out;
 	min_out = mbox_cmd->min_out;
-	rc = cxlds->mbox_send(cxlds, mbox_cmd);
+	rc = mds->mbox_send(mds, mbox_cmd);
 	/*
 	 * EIO is reserved for a payload size mismatch and mbox_send()
 	 * may not return this error.
@@ -220,7 +220,8 @@ int cxl_internal_send_cmd(struct cxl_dev_state *cxlds,
 	if (rc)
 		return rc;
 
-	if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS)
+	if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS &&
+	    mbox_cmd->return_code != CXL_MBOX_CMD_RC_BACKGROUND)
 		return cxl_mbox_cmd_rc2errno(mbox_cmd);
 
 	if (!out_size)
@@ -297,7 +298,7 @@ static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in)
 }
 
 static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox,
-			     struct cxl_dev_state *cxlds, u16 opcode,
+			     struct cxl_memdev_state *mds, u16 opcode,
 			     size_t in_size, size_t out_size, u64 in_payload)
 {
 	*mbox = (struct cxl_mbox_cmd) {
@@ -312,7 +313,7 @@ static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox,
 			return PTR_ERR(mbox->payload_in);
 
 		if (!cxl_payload_from_user_allowed(opcode, mbox->payload_in)) {
-			dev_dbg(cxlds->dev, "%s: input payload not allowed\n",
+			dev_dbg(mds->cxlds.dev, "%s: input payload not allowed\n",
 				cxl_mem_opcode_to_name(opcode));
 			kvfree(mbox->payload_in);
 			return -EBUSY;
@@ -321,7 +322,7 @@ static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox,
 
 	/* Prepare to handle a full payload for variable sized output */
 	if (out_size == CXL_VARIABLE_PAYLOAD)
-		mbox->size_out = cxlds->payload_size;
+		mbox->size_out = mds->payload_size;
 	else
 		mbox->size_out = out_size;
 
@@ -343,7 +344,7 @@ static void cxl_mbox_cmd_dtor(struct cxl_mbox_cmd *mbox)
 
 static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd,
 			      const struct cxl_send_command *send_cmd,
-			      struct cxl_dev_state *cxlds)
+			      struct cxl_memdev_state *mds)
 {
 	if (send_cmd->raw.rsvd)
 		return -EINVAL;
@@ -353,13 +354,13 @@ static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd,
 	 * gets passed along without further checking, so it must be
 	 * validated here.
 	 */
-	if (send_cmd->out.size > cxlds->payload_size)
+	if (send_cmd->out.size > mds->payload_size)
 		return -EINVAL;
 
 	if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode))
 		return -EPERM;
 
-	dev_WARN_ONCE(cxlds->dev, true, "raw command path used\n");
+	dev_WARN_ONCE(mds->cxlds.dev, true, "raw command path used\n");
 
 	*mem_cmd = (struct cxl_mem_command) {
 		.info = {
@@ -375,7 +376,7 @@ static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd,
 
 static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd,
 			  const struct cxl_send_command *send_cmd,
-			  struct cxl_dev_state *cxlds)
+			  struct cxl_memdev_state *mds)
 {
 	struct cxl_mem_command *c = &cxl_mem_commands[send_cmd->id];
 	const struct cxl_command_info *info = &c->info;
@@ -390,11 +391,11 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd,
 		return -EINVAL;
 
 	/* Check that the command is enabled for hardware */
-	if (!test_bit(info->id, cxlds->enabled_cmds))
+	if (!test_bit(info->id, mds->enabled_cmds))
 		return -ENOTTY;
 
 	/* Check that the command is not claimed for exclusive kernel use */
-	if (test_bit(info->id, cxlds->exclusive_cmds))
+	if (test_bit(info->id, mds->exclusive_cmds))
 		return -EBUSY;
 
 	/* Check the input buffer is the expected size */
@@ -423,7 +424,7 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd,
 /**
  * cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND.
  * @mbox_cmd: Sanitized and populated &struct cxl_mbox_cmd.
- * @cxlds: The device data for the operation
+ * @mds: The driver data for the operation
  * @send_cmd: &struct cxl_send_command copied in from userspace.
  *
  * Return:
@@ -438,7 +439,7 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd,
  * safe to send to the hardware.
  */
 static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd,
-				      struct cxl_dev_state *cxlds,
+				      struct cxl_memdev_state *mds,
 				      const struct cxl_send_command *send_cmd)
 {
 	struct cxl_mem_command mem_cmd;
@@ -452,20 +453,20 @@ static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd,
 	 * supports, but output can be arbitrarily large (simply write out as
 	 * much data as the hardware provides).
 	 */
-	if (send_cmd->in.size > cxlds->payload_size)
+	if (send_cmd->in.size > mds->payload_size)
 		return -EINVAL;
 
 	/* Sanitize and construct a cxl_mem_command */
 	if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW)
-		rc = cxl_to_mem_cmd_raw(&mem_cmd, send_cmd, cxlds);
+		rc = cxl_to_mem_cmd_raw(&mem_cmd, send_cmd, mds);
 	else
-		rc = cxl_to_mem_cmd(&mem_cmd, send_cmd, cxlds);
+		rc = cxl_to_mem_cmd(&mem_cmd, send_cmd, mds);
 
 	if (rc)
 		return rc;
 
 	/* Sanitize and construct a cxl_mbox_cmd */
-	return cxl_mbox_cmd_ctor(mbox_cmd, cxlds, mem_cmd.opcode,
+	return cxl_mbox_cmd_ctor(mbox_cmd, mds, mem_cmd.opcode,
 				 mem_cmd.info.size_in, mem_cmd.info.size_out,
 				 send_cmd->in.payload);
 }
@@ -473,6 +474,7 @@ static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd,
 int cxl_query_cmd(struct cxl_memdev *cxlmd,
 		  struct cxl_mem_query_commands __user *q)
 {
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
 	struct device *dev = &cxlmd->dev;
 	struct cxl_mem_command *cmd;
 	u32 n_commands;
@@ -494,9 +496,9 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd,
 	cxl_for_each_cmd(cmd) {
 		struct cxl_command_info info = cmd->info;
 
-		if (test_bit(info.id, cxlmd->cxlds->enabled_cmds))
+		if (test_bit(info.id, mds->enabled_cmds))
 			info.flags |= CXL_MEM_COMMAND_FLAG_ENABLED;
-		if (test_bit(info.id, cxlmd->cxlds->exclusive_cmds))
+		if (test_bit(info.id, mds->exclusive_cmds))
 			info.flags |= CXL_MEM_COMMAND_FLAG_EXCLUSIVE;
 
 		if (copy_to_user(&q->commands[j++], &info, sizeof(info)))
@@ -511,7 +513,7 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd,
 
 /**
  * handle_mailbox_cmd_from_user() - Dispatch a mailbox command for userspace.
- * @cxlds: The device data for the operation
+ * @mds: The driver data for the operation
  * @mbox_cmd: The validated mailbox command.
  * @out_payload: Pointer to userspace's output payload.
  * @size_out: (Input) Max payload size to copy out.
@@ -532,12 +534,12 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd,
  *
  * See cxl_send_cmd().
  */
-static int handle_mailbox_cmd_from_user(struct cxl_dev_state *cxlds,
+static int handle_mailbox_cmd_from_user(struct cxl_memdev_state *mds,
 					struct cxl_mbox_cmd *mbox_cmd,
 					u64 out_payload, s32 *size_out,
 					u32 *retval)
 {
-	struct device *dev = cxlds->dev;
+	struct device *dev = mds->cxlds.dev;
 	int rc;
 
 	dev_dbg(dev,
@@ -547,7 +549,7 @@ static int handle_mailbox_cmd_from_user(struct cxl_dev_state *cxlds,
 		cxl_mem_opcode_to_name(mbox_cmd->opcode),
 		mbox_cmd->opcode, mbox_cmd->size_in);
 
-	rc = cxlds->mbox_send(cxlds, mbox_cmd);
+	rc = mds->mbox_send(mds, mbox_cmd);
 	if (rc)
 		goto out;
 
@@ -576,7 +578,7 @@ out:
 
 int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s)
 {
-	struct cxl_dev_state *cxlds = cxlmd->cxlds;
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
 	struct device *dev = &cxlmd->dev;
 	struct cxl_send_command send;
 	struct cxl_mbox_cmd mbox_cmd;
@@ -587,11 +589,11 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s)
 	if (copy_from_user(&send, s, sizeof(send)))
 		return -EFAULT;
 
-	rc = cxl_validate_cmd_from_user(&mbox_cmd, cxlmd->cxlds, &send);
+	rc = cxl_validate_cmd_from_user(&mbox_cmd, mds, &send);
 	if (rc)
 		return rc;
 
-	rc = handle_mailbox_cmd_from_user(cxlds, &mbox_cmd, send.out.payload,
+	rc = handle_mailbox_cmd_from_user(mds, &mbox_cmd, send.out.payload,
 					  &send.out.size, &send.retval);
 	if (rc)
 		return rc;
@@ -602,13 +604,14 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s)
 	return 0;
 }
 
-static int cxl_xfer_log(struct cxl_dev_state *cxlds, uuid_t *uuid, u32 *size, u8 *out)
+static int cxl_xfer_log(struct cxl_memdev_state *mds, uuid_t *uuid,
+			u32 *size, u8 *out)
 {
 	u32 remaining = *size;
 	u32 offset = 0;
 
 	while (remaining) {
-		u32 xfer_size = min_t(u32, remaining, cxlds->payload_size);
+		u32 xfer_size = min_t(u32, remaining, mds->payload_size);
 		struct cxl_mbox_cmd mbox_cmd;
 		struct cxl_mbox_get_log log;
 		int rc;
@@ -627,7 +630,7 @@ static int cxl_xfer_log(struct cxl_dev_state *cxlds, uuid_t *uuid, u32 *size, u8
 			.payload_out = out,
 		};
 
-		rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+		rc = cxl_internal_send_cmd(mds, &mbox_cmd);
 
 		/*
 		 * The output payload length that indicates the number
@@ -654,17 +657,18 @@ static int cxl_xfer_log(struct cxl_dev_state *cxlds, uuid_t *uuid, u32 *size, u8
 
 /**
  * cxl_walk_cel() - Walk through the Command Effects Log.
- * @cxlds: The device data for the operation
+ * @mds: The driver data for the operation
  * @size: Length of the Command Effects Log.
  * @cel: CEL
  *
  * Iterate over each entry in the CEL and determine if the driver supports the
  * command. If so, the command is enabled for the device and can be used later.
  */
-static void cxl_walk_cel(struct cxl_dev_state *cxlds, size_t size, u8 *cel)
+static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel)
 {
 	struct cxl_cel_entry *cel_entry;
 	const int cel_entries = size / sizeof(*cel_entry);
+	struct device *dev = mds->cxlds.dev;
 	int i;
 
 	cel_entry = (struct cxl_cel_entry *) cel;
@@ -674,39 +678,39 @@ static void cxl_walk_cel(struct cxl_dev_state *cxlds, size_t size, u8 *cel)
 		struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
 
 		if (!cmd && !cxl_is_poison_command(opcode)) {
-			dev_dbg(cxlds->dev,
+			dev_dbg(dev,
 				"Opcode 0x%04x unsupported by driver\n", opcode);
 			continue;
 		}
 
 		if (cmd)
-			set_bit(cmd->info.id, cxlds->enabled_cmds);
+			set_bit(cmd->info.id, mds->enabled_cmds);
 
 		if (cxl_is_poison_command(opcode))
-			cxl_set_poison_cmd_enabled(&cxlds->poison, opcode);
+			cxl_set_poison_cmd_enabled(&mds->poison, opcode);
 
-		dev_dbg(cxlds->dev, "Opcode 0x%04x enabled\n", opcode);
+		dev_dbg(dev, "Opcode 0x%04x enabled\n", opcode);
 	}
 }
 
-static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_dev_state *cxlds)
+static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_memdev_state *mds)
 {
 	struct cxl_mbox_get_supported_logs *ret;
 	struct cxl_mbox_cmd mbox_cmd;
 	int rc;
 
-	ret = kvmalloc(cxlds->payload_size, GFP_KERNEL);
+	ret = kvmalloc(mds->payload_size, GFP_KERNEL);
 	if (!ret)
 		return ERR_PTR(-ENOMEM);
 
 	mbox_cmd = (struct cxl_mbox_cmd) {
 		.opcode = CXL_MBOX_OP_GET_SUPPORTED_LOGS,
-		.size_out = cxlds->payload_size,
+		.size_out = mds->payload_size,
 		.payload_out = ret,
 		/* At least the record number field must be valid */
 		.min_out = 2,
 	};
-	rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
 	if (rc < 0) {
 		kvfree(ret);
 		return ERR_PTR(rc);
@@ -729,22 +733,22 @@ static const uuid_t log_uuid[] = {
 
 /**
  * cxl_enumerate_cmds() - Enumerate commands for a device.
- * @cxlds: The device data for the operation
+ * @mds: The driver data for the operation
  *
  * Returns 0 if enumerate completed successfully.
  *
  * CXL devices have optional support for certain commands. This function will
  * determine the set of supported commands for the hardware and update the
- * enabled_cmds bitmap in the @cxlds.
+ * enabled_cmds bitmap in the @mds.
  */
-int cxl_enumerate_cmds(struct cxl_dev_state *cxlds)
+int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
 {
 	struct cxl_mbox_get_supported_logs *gsl;
-	struct device *dev = cxlds->dev;
+	struct device *dev = mds->cxlds.dev;
 	struct cxl_mem_command *cmd;
 	int i, rc;
 
-	gsl = cxl_get_gsl(cxlds);
+	gsl = cxl_get_gsl(mds);
 	if (IS_ERR(gsl))
 		return PTR_ERR(gsl);
 
@@ -765,19 +769,19 @@ int cxl_enumerate_cmds(struct cxl_dev_state *cxlds)
 			goto out;
 		}
 
-		rc = cxl_xfer_log(cxlds, &uuid, &size, log);
+		rc = cxl_xfer_log(mds, &uuid, &size, log);
 		if (rc) {
 			kvfree(log);
 			goto out;
 		}
 
-		cxl_walk_cel(cxlds, size, log);
+		cxl_walk_cel(mds, size, log);
 		kvfree(log);
 
 		/* In case CEL was bogus, enable some default commands. */
 		cxl_for_each_cmd(cmd)
 			if (cmd->flags & CXL_CMD_FLAG_FORCE_ENABLE)
-				set_bit(cmd->info.id, cxlds->enabled_cmds);
+				set_bit(cmd->info.id, mds->enabled_cmds);
 
 		/* Found the required CEL */
 		rc = 0;
@@ -838,7 +842,7 @@ static void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
 	}
 }
 
-static int cxl_clear_event_record(struct cxl_dev_state *cxlds,
+static int cxl_clear_event_record(struct cxl_memdev_state *mds,
 				  enum cxl_event_log_type log,
 				  struct cxl_get_event_payload *get_pl)
 {
@@ -852,9 +856,9 @@ static int cxl_clear_event_record(struct cxl_dev_state *cxlds,
 	int i;
 
 	/* Payload size may limit the max handles */
-	if (pl_size > cxlds->payload_size) {
-		max_handles = (cxlds->payload_size - sizeof(*payload)) /
-				sizeof(__le16);
+	if (pl_size > mds->payload_size) {
+		max_handles = (mds->payload_size - sizeof(*payload)) /
+			      sizeof(__le16);
 		pl_size = struct_size(payload, handles, max_handles);
 	}
 
@@ -879,12 +883,12 @@ static int cxl_clear_event_record(struct cxl_dev_state *cxlds,
 	i = 0;
 	for (cnt = 0; cnt < total; cnt++) {
 		payload->handles[i++] = get_pl->records[cnt].hdr.handle;
-		dev_dbg(cxlds->dev, "Event log '%d': Clearing %u\n",
-			log, le16_to_cpu(payload->handles[i]));
+		dev_dbg(mds->cxlds.dev, "Event log '%d': Clearing %u\n", log,
+			le16_to_cpu(payload->handles[i]));
 
 		if (i == max_handles) {
 			payload->nr_recs = i;
-			rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+			rc = cxl_internal_send_cmd(mds, &mbox_cmd);
 			if (rc)
 				goto free_pl;
 			i = 0;
@@ -895,7 +899,7 @@ static int cxl_clear_event_record(struct cxl_dev_state *cxlds,
 	if (i) {
 		payload->nr_recs = i;
 		mbox_cmd.size_in = struct_size(payload, handles, i);
-		rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+		rc = cxl_internal_send_cmd(mds, &mbox_cmd);
 		if (rc)
 			goto free_pl;
 	}
@@ -905,32 +909,34 @@ free_pl:
 	return rc;
 }
 
-static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds,
+static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
 				    enum cxl_event_log_type type)
 {
+	struct cxl_memdev *cxlmd = mds->cxlds.cxlmd;
+	struct device *dev = mds->cxlds.dev;
 	struct cxl_get_event_payload *payload;
 	struct cxl_mbox_cmd mbox_cmd;
 	u8 log_type = type;
 	u16 nr_rec;
 
-	mutex_lock(&cxlds->event.log_lock);
-	payload = cxlds->event.buf;
+	mutex_lock(&mds->event.log_lock);
+	payload = mds->event.buf;
 
 	mbox_cmd = (struct cxl_mbox_cmd) {
 		.opcode = CXL_MBOX_OP_GET_EVENT_RECORD,
 		.payload_in = &log_type,
 		.size_in = sizeof(log_type),
 		.payload_out = payload,
-		.size_out = cxlds->payload_size,
+		.size_out = mds->payload_size,
 		.min_out = struct_size(payload, records, 0),
 	};
 
 	do {
 		int rc, i;
 
-		rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+		rc = cxl_internal_send_cmd(mds, &mbox_cmd);
 		if (rc) {
-			dev_err_ratelimited(cxlds->dev,
+			dev_err_ratelimited(dev,
 				"Event log '%d': Failed to query event records : %d",
 				type, rc);
 			break;
@@ -941,27 +947,27 @@ static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds,
 			break;
 
 		for (i = 0; i < nr_rec; i++)
-			cxl_event_trace_record(cxlds->cxlmd, type,
+			cxl_event_trace_record(cxlmd, type,
 					       &payload->records[i]);
 
 		if (payload->flags & CXL_GET_EVENT_FLAG_OVERFLOW)
-			trace_cxl_overflow(cxlds->cxlmd, type, payload);
+			trace_cxl_overflow(cxlmd, type, payload);
 
-		rc = cxl_clear_event_record(cxlds, type, payload);
+		rc = cxl_clear_event_record(mds, type, payload);
 		if (rc) {
-			dev_err_ratelimited(cxlds->dev,
+			dev_err_ratelimited(dev,
 				"Event log '%d': Failed to clear events : %d",
 				type, rc);
 			break;
 		}
 	} while (nr_rec);
 
-	mutex_unlock(&cxlds->event.log_lock);
+	mutex_unlock(&mds->event.log_lock);
 }
 
 /**
  * cxl_mem_get_event_records - Get Event Records from the device
- * @cxlds: The device data for the operation
+ * @mds: The driver data for the operation
  * @status: Event Status register value identifying which events are available.
  *
  * Retrieve all event records available on the device, report them as trace
@@ -970,24 +976,24 @@ static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds,
  * See CXL rev 3.0 @8.2.9.2.2 Get Event Records
  * See CXL rev 3.0 @8.2.9.2.3 Clear Event Records
  */
-void cxl_mem_get_event_records(struct cxl_dev_state *cxlds, u32 status)
+void cxl_mem_get_event_records(struct cxl_memdev_state *mds, u32 status)
 {
-	dev_dbg(cxlds->dev, "Reading event logs: %x\n", status);
+	dev_dbg(mds->cxlds.dev, "Reading event logs: %x\n", status);
 
 	if (status & CXLDEV_EVENT_STATUS_FATAL)
-		cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FATAL);
+		cxl_mem_get_records_log(mds, CXL_EVENT_TYPE_FATAL);
 	if (status & CXLDEV_EVENT_STATUS_FAIL)
-		cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FAIL);
+		cxl_mem_get_records_log(mds, CXL_EVENT_TYPE_FAIL);
 	if (status & CXLDEV_EVENT_STATUS_WARN)
-		cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_WARN);
+		cxl_mem_get_records_log(mds, CXL_EVENT_TYPE_WARN);
 	if (status & CXLDEV_EVENT_STATUS_INFO)
-		cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_INFO);
+		cxl_mem_get_records_log(mds, CXL_EVENT_TYPE_INFO);
 }
 EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, CXL);
 
 /**
  * cxl_mem_get_partition_info - Get partition info
- * @cxlds: The device data for the operation
+ * @mds: The driver data for the operation
  *
  * Retrieve the current partition info for the device specified.  The active
  * values are the current capacity in bytes.  If not 0, the 'next' values are
@@ -997,7 +1003,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, CXL);
  *
  * See CXL @8.2.9.5.2.1 Get Partition Info
  */
-static int cxl_mem_get_partition_info(struct cxl_dev_state *cxlds)
+static int cxl_mem_get_partition_info(struct cxl_memdev_state *mds)
 {
 	struct cxl_mbox_get_partition_info pi;
 	struct cxl_mbox_cmd mbox_cmd;
@@ -1008,17 +1014,17 @@ static int cxl_mem_get_partition_info(struct cxl_dev_state *cxlds)
 		.size_out = sizeof(pi),
 		.payload_out = &pi,
 	};
-	rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
 	if (rc)
 		return rc;
 
-	cxlds->active_volatile_bytes =
+	mds->active_volatile_bytes =
 		le64_to_cpu(pi.active_volatile_cap) * CXL_CAPACITY_MULTIPLIER;
-	cxlds->active_persistent_bytes =
+	mds->active_persistent_bytes =
 		le64_to_cpu(pi.active_persistent_cap) * CXL_CAPACITY_MULTIPLIER;
-	cxlds->next_volatile_bytes =
+	mds->next_volatile_bytes =
 		le64_to_cpu(pi.next_volatile_cap) * CXL_CAPACITY_MULTIPLIER;
-	cxlds->next_persistent_bytes =
+	mds->next_persistent_bytes =
 		le64_to_cpu(pi.next_volatile_cap) * CXL_CAPACITY_MULTIPLIER;
 
 	return 0;
@@ -1026,14 +1032,14 @@ static int cxl_mem_get_partition_info(struct cxl_dev_state *cxlds)
 
 /**
  * cxl_dev_state_identify() - Send the IDENTIFY command to the device.
- * @cxlds: The device data for the operation
+ * @mds: The driver data for the operation
  *
  * Return: 0 if identify was executed successfully or media not ready.
  *
  * This will dispatch the identify command to the device and on success populate
  * structures to be exported to sysfs.
  */
-int cxl_dev_state_identify(struct cxl_dev_state *cxlds)
+int cxl_dev_state_identify(struct cxl_memdev_state *mds)
 {
 	/* See CXL 2.0 Table 175 Identify Memory Device Output Payload */
 	struct cxl_mbox_identify id;
@@ -1041,7 +1047,7 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds)
 	u32 val;
 	int rc;
 
-	if (!cxlds->media_ready)
+	if (!mds->cxlds.media_ready)
 		return 0;
 
 	mbox_cmd = (struct cxl_mbox_cmd) {
@@ -1049,31 +1055,92 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds)
 		.size_out = sizeof(id),
 		.payload_out = &id,
 	};
-	rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
 	if (rc < 0)
 		return rc;
 
-	cxlds->total_bytes =
+	mds->total_bytes =
 		le64_to_cpu(id.total_capacity) * CXL_CAPACITY_MULTIPLIER;
-	cxlds->volatile_only_bytes =
+	mds->volatile_only_bytes =
 		le64_to_cpu(id.volatile_capacity) * CXL_CAPACITY_MULTIPLIER;
-	cxlds->persistent_only_bytes =
+	mds->persistent_only_bytes =
 		le64_to_cpu(id.persistent_capacity) * CXL_CAPACITY_MULTIPLIER;
-	cxlds->partition_align_bytes =
+	mds->partition_align_bytes =
 		le64_to_cpu(id.partition_align) * CXL_CAPACITY_MULTIPLIER;
 
-	cxlds->lsa_size = le32_to_cpu(id.lsa_size);
-	memcpy(cxlds->firmware_version, id.fw_revision, sizeof(id.fw_revision));
+	mds->lsa_size = le32_to_cpu(id.lsa_size);
+	memcpy(mds->firmware_version, id.fw_revision,
+	       sizeof(id.fw_revision));
 
-	if (test_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds)) {
+	if (test_bit(CXL_POISON_ENABLED_LIST, mds->poison.enabled_cmds)) {
 		val = get_unaligned_le24(id.poison_list_max_mer);
-		cxlds->poison.max_errors = min_t(u32, val, CXL_POISON_LIST_MAX);
+		mds->poison.max_errors = min_t(u32, val, CXL_POISON_LIST_MAX);
 	}
 
 	return 0;
 }
 EXPORT_SYMBOL_NS_GPL(cxl_dev_state_identify, CXL);
 
+/**
+ * cxl_mem_sanitize() - Send a sanitization command to the device.
+ * @mds: The device data for the operation
+ * @cmd: The specific sanitization command opcode
+ *
+ * Return: 0 if the command was executed successfully, regardless of
+ * whether or not the actual security operation is done in the background,
+ * such as for the Sanitize case.
+ * Error return values can be the result of the mailbox command, -EINVAL
+ * when security requirements are not met or invalid contexts.
+ *
+ * See CXL 3.0 @8.2.9.8.5.1 Sanitize and @8.2.9.8.5.2 Secure Erase.
+ */
+int cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd)
+{
+	int rc;
+	u32 sec_out = 0;
+	struct cxl_get_security_output {
+		__le32 flags;
+	} out;
+	struct cxl_mbox_cmd sec_cmd = {
+		.opcode = CXL_MBOX_OP_GET_SECURITY_STATE,
+		.payload_out = &out,
+		.size_out = sizeof(out),
+	};
+	struct cxl_mbox_cmd mbox_cmd = { .opcode = cmd };
+	struct cxl_dev_state *cxlds = &mds->cxlds;
+
+	if (cmd != CXL_MBOX_OP_SANITIZE && cmd != CXL_MBOX_OP_SECURE_ERASE)
+		return -EINVAL;
+
+	rc = cxl_internal_send_cmd(mds, &sec_cmd);
+	if (rc < 0) {
+		dev_err(cxlds->dev, "Failed to get security state : %d", rc);
+		return rc;
+	}
+
+	/*
+	 * Prior to using these commands, any security applied to
+	 * the user data areas of the device shall be DISABLED (or
+	 * UNLOCKED for secure erase case).
+	 */
+	sec_out = le32_to_cpu(out.flags);
+	if (sec_out & CXL_PMEM_SEC_STATE_USER_PASS_SET)
+		return -EINVAL;
+
+	if (cmd == CXL_MBOX_OP_SECURE_ERASE &&
+	    sec_out & CXL_PMEM_SEC_STATE_LOCKED)
+		return -EINVAL;
+
+	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+	if (rc < 0) {
+		dev_err(cxlds->dev, "Failed to sanitize device : %d", rc);
+		return rc;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_mem_sanitize, CXL);
+
 static int add_dpa_res(struct device *dev, struct resource *parent,
 		       struct resource *res, resource_size_t start,
 		       resource_size_t size, const char *type)
@@ -1100,8 +1167,9 @@ static int add_dpa_res(struct device *dev, struct resource *parent,
 	return 0;
 }
 
-int cxl_mem_create_range_info(struct cxl_dev_state *cxlds)
+int cxl_mem_create_range_info(struct cxl_memdev_state *mds)
 {
+	struct cxl_dev_state *cxlds = &mds->cxlds;
 	struct device *dev = cxlds->dev;
 	int rc;
 
@@ -1113,35 +1181,35 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds)
 	}
 
 	cxlds->dpa_res =
-		(struct resource)DEFINE_RES_MEM(0, cxlds->total_bytes);
+		(struct resource)DEFINE_RES_MEM(0, mds->total_bytes);
 
-	if (cxlds->partition_align_bytes == 0) {
+	if (mds->partition_align_bytes == 0) {
 		rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0,
-				 cxlds->volatile_only_bytes, "ram");
+				 mds->volatile_only_bytes, "ram");
 		if (rc)
 			return rc;
 		return add_dpa_res(dev, &cxlds->dpa_res, &cxlds->pmem_res,
-				   cxlds->volatile_only_bytes,
-				   cxlds->persistent_only_bytes, "pmem");
+				   mds->volatile_only_bytes,
+				   mds->persistent_only_bytes, "pmem");
 	}
 
-	rc = cxl_mem_get_partition_info(cxlds);
+	rc = cxl_mem_get_partition_info(mds);
 	if (rc) {
 		dev_err(dev, "Failed to query partition information\n");
 		return rc;
 	}
 
 	rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0,
-			 cxlds->active_volatile_bytes, "ram");
+			 mds->active_volatile_bytes, "ram");
 	if (rc)
 		return rc;
 	return add_dpa_res(dev, &cxlds->dpa_res, &cxlds->pmem_res,
-			   cxlds->active_volatile_bytes,
-			   cxlds->active_persistent_bytes, "pmem");
+			   mds->active_volatile_bytes,
+			   mds->active_persistent_bytes, "pmem");
 }
 EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, CXL);
 
-int cxl_set_timestamp(struct cxl_dev_state *cxlds)
+int cxl_set_timestamp(struct cxl_memdev_state *mds)
 {
 	struct cxl_mbox_cmd mbox_cmd;
 	struct cxl_mbox_set_timestamp_in pi;
@@ -1154,7 +1222,7 @@ int cxl_set_timestamp(struct cxl_dev_state *cxlds)
 		.payload_in = &pi,
 	};
 
-	rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
 	/*
 	 * Command is optional. Devices may have another way of providing
 	 * a timestamp, or may return all 0s in timestamp fields.
@@ -1170,18 +1238,18 @@ EXPORT_SYMBOL_NS_GPL(cxl_set_timestamp, CXL);
 int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
 		       struct cxl_region *cxlr)
 {
-	struct cxl_dev_state *cxlds = cxlmd->cxlds;
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
 	struct cxl_mbox_poison_out *po;
 	struct cxl_mbox_poison_in pi;
 	struct cxl_mbox_cmd mbox_cmd;
 	int nr_records = 0;
 	int rc;
 
-	rc = mutex_lock_interruptible(&cxlds->poison.lock);
+	rc = mutex_lock_interruptible(&mds->poison.lock);
 	if (rc)
 		return rc;
 
-	po = cxlds->poison.list_out;
+	po = mds->poison.list_out;
 	pi.offset = cpu_to_le64(offset);
 	pi.length = cpu_to_le64(len / CXL_POISON_LEN_MULT);
 
@@ -1189,13 +1257,13 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
 		.opcode = CXL_MBOX_OP_GET_POISON,
 		.size_in = sizeof(pi),
 		.payload_in = &pi,
-		.size_out = cxlds->payload_size,
+		.size_out = mds->payload_size,
 		.payload_out = po,
 		.min_out = struct_size(po, record, 0),
 	};
 
 	do {
-		rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+		rc = cxl_internal_send_cmd(mds, &mbox_cmd);
 		if (rc)
 			break;
 
@@ -1206,14 +1274,14 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
 
 		/* Protect against an uncleared _FLAG_MORE */
 		nr_records = nr_records + le16_to_cpu(po->count);
-		if (nr_records >= cxlds->poison.max_errors) {
+		if (nr_records >= mds->poison.max_errors) {
 			dev_dbg(&cxlmd->dev, "Max Error Records reached: %d\n",
 				nr_records);
 			break;
 		}
 	} while (po->flags & CXL_POISON_FLAG_MORE);
 
-	mutex_unlock(&cxlds->poison.lock);
+	mutex_unlock(&mds->poison.lock);
 	return rc;
 }
 EXPORT_SYMBOL_NS_GPL(cxl_mem_get_poison, CXL);
@@ -1223,52 +1291,53 @@ static void free_poison_buf(void *buf)
 	kvfree(buf);
 }
 
-/* Get Poison List output buffer is protected by cxlds->poison.lock */
-static int cxl_poison_alloc_buf(struct cxl_dev_state *cxlds)
+/* Get Poison List output buffer is protected by mds->poison.lock */
+static int cxl_poison_alloc_buf(struct cxl_memdev_state *mds)
 {
-	cxlds->poison.list_out = kvmalloc(cxlds->payload_size, GFP_KERNEL);
-	if (!cxlds->poison.list_out)
+	mds->poison.list_out = kvmalloc(mds->payload_size, GFP_KERNEL);
+	if (!mds->poison.list_out)
 		return -ENOMEM;
 
-	return devm_add_action_or_reset(cxlds->dev, free_poison_buf,
-					cxlds->poison.list_out);
+	return devm_add_action_or_reset(mds->cxlds.dev, free_poison_buf,
+					mds->poison.list_out);
 }
 
-int cxl_poison_state_init(struct cxl_dev_state *cxlds)
+int cxl_poison_state_init(struct cxl_memdev_state *mds)
 {
 	int rc;
 
-	if (!test_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds))
+	if (!test_bit(CXL_POISON_ENABLED_LIST, mds->poison.enabled_cmds))
 		return 0;
 
-	rc = cxl_poison_alloc_buf(cxlds);
+	rc = cxl_poison_alloc_buf(mds);
 	if (rc) {
-		clear_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds);
+		clear_bit(CXL_POISON_ENABLED_LIST, mds->poison.enabled_cmds);
 		return rc;
 	}
 
-	mutex_init(&cxlds->poison.lock);
+	mutex_init(&mds->poison.lock);
 	return 0;
 }
 EXPORT_SYMBOL_NS_GPL(cxl_poison_state_init, CXL);
 
-struct cxl_dev_state *cxl_dev_state_create(struct device *dev)
+struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
 {
-	struct cxl_dev_state *cxlds;
+	struct cxl_memdev_state *mds;
 
-	cxlds = devm_kzalloc(dev, sizeof(*cxlds), GFP_KERNEL);
-	if (!cxlds) {
+	mds = devm_kzalloc(dev, sizeof(*mds), GFP_KERNEL);
+	if (!mds) {
 		dev_err(dev, "No memory available\n");
 		return ERR_PTR(-ENOMEM);
 	}
 
-	mutex_init(&cxlds->mbox_mutex);
-	mutex_init(&cxlds->event.log_lock);
-	cxlds->dev = dev;
+	mutex_init(&mds->mbox_mutex);
+	mutex_init(&mds->event.log_lock);
+	mds->cxlds.dev = dev;
+	mds->cxlds.type = CXL_DEVTYPE_CLASSMEM;
 
-	return cxlds;
+	return mds;
 }
-EXPORT_SYMBOL_NS_GPL(cxl_dev_state_create, CXL);
+EXPORT_SYMBOL_NS_GPL(cxl_memdev_state_create, CXL);
 
 void __init cxl_mbox_init(void)
 {
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index 057a43267290..90237b9487a7 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /* Copyright(c) 2020 Intel Corporation. */
 
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/firmware.h>
 #include <linux/device.h>
 #include <linux/slab.h>
 #include <linux/idr.h>
@@ -39,8 +41,11 @@ static ssize_t firmware_version_show(struct device *dev,
 {
 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
 
-	return sysfs_emit(buf, "%.16s\n", cxlds->firmware_version);
+	if (!mds)
+		return sysfs_emit(buf, "\n");
+	return sysfs_emit(buf, "%.16s\n", mds->firmware_version);
 }
 static DEVICE_ATTR_RO(firmware_version);
 
@@ -49,8 +54,11 @@ static ssize_t payload_max_show(struct device *dev,
 {
 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
 
-	return sysfs_emit(buf, "%zu\n", cxlds->payload_size);
+	if (!mds)
+		return sysfs_emit(buf, "\n");
+	return sysfs_emit(buf, "%zu\n", mds->payload_size);
 }
 static DEVICE_ATTR_RO(payload_max);
 
@@ -59,8 +67,11 @@ static ssize_t label_storage_size_show(struct device *dev,
 {
 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
 
-	return sysfs_emit(buf, "%zu\n", cxlds->lsa_size);
+	if (!mds)
+		return sysfs_emit(buf, "\n");
+	return sysfs_emit(buf, "%zu\n", mds->lsa_size);
 }
 static DEVICE_ATTR_RO(label_storage_size);
 
@@ -107,6 +118,89 @@ static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RO(numa_node);
 
+static ssize_t security_state_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf)
+{
+	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+	struct cxl_dev_state *cxlds = cxlmd->cxlds;
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
+	u64 reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
+	u32 pct = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK, reg);
+	u16 cmd = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg);
+	unsigned long state = mds->security.state;
+
+	if (cmd == CXL_MBOX_OP_SANITIZE && pct != 100)
+		return sysfs_emit(buf, "sanitize\n");
+
+	if (!(state & CXL_PMEM_SEC_STATE_USER_PASS_SET))
+		return sysfs_emit(buf, "disabled\n");
+	if (state & CXL_PMEM_SEC_STATE_FROZEN ||
+	    state & CXL_PMEM_SEC_STATE_MASTER_PLIMIT ||
+	    state & CXL_PMEM_SEC_STATE_USER_PLIMIT)
+		return sysfs_emit(buf, "frozen\n");
+	if (state & CXL_PMEM_SEC_STATE_LOCKED)
+		return sysfs_emit(buf, "locked\n");
+	else
+		return sysfs_emit(buf, "unlocked\n");
+}
+static struct device_attribute dev_attr_security_state =
+	__ATTR(state, 0444, security_state_show, NULL);
+
+static ssize_t security_sanitize_store(struct device *dev,
+				       struct device_attribute *attr,
+				       const char *buf, size_t len)
+{
+	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+	struct cxl_port *port = cxlmd->endpoint;
+	bool sanitize;
+	ssize_t rc;
+
+	if (kstrtobool(buf, &sanitize) || !sanitize)
+		return -EINVAL;
+
+	if (!port || !is_cxl_endpoint(port))
+		return -EINVAL;
+
+	/* ensure no regions are mapped to this memdev */
+	if (port->commit_end != -1)
+		return -EBUSY;
+
+	rc = cxl_mem_sanitize(mds, CXL_MBOX_OP_SANITIZE);
+
+	return rc ? rc : len;
+}
+static struct device_attribute dev_attr_security_sanitize =
+	__ATTR(sanitize, 0200, NULL, security_sanitize_store);
+
+static ssize_t security_erase_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t len)
+{
+	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+	struct cxl_port *port = cxlmd->endpoint;
+	ssize_t rc;
+	bool erase;
+
+	if (kstrtobool(buf, &erase) || !erase)
+		return -EINVAL;
+
+	if (!port || !is_cxl_endpoint(port))
+		return -EINVAL;
+
+	/* ensure no regions are mapped to this memdev */
+	if (port->commit_end != -1)
+		return -EBUSY;
+
+	rc = cxl_mem_sanitize(mds, CXL_MBOX_OP_SECURE_ERASE);
+
+	return rc ? rc : len;
+}
+static struct device_attribute dev_attr_security_erase =
+	__ATTR(erase, 0200, NULL, security_erase_store);
+
 static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd)
 {
 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
@@ -140,7 +234,7 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd)
 	struct cxl_port *port;
 	int rc;
 
-	port = dev_get_drvdata(&cxlmd->dev);
+	port = cxlmd->endpoint;
 	if (!port || !is_cxl_endpoint(port))
 		return -EINVAL;
 
@@ -198,7 +292,7 @@ static struct cxl_region *cxl_dpa_to_region(struct cxl_memdev *cxlmd, u64 dpa)
 	ctx = (struct cxl_dpa_to_region_context) {
 		.dpa = dpa,
 	};
-	port = dev_get_drvdata(&cxlmd->dev);
+	port = cxlmd->endpoint;
 	if (port && is_cxl_endpoint(port) && port->commit_end != -1)
 		device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region);
 
@@ -231,7 +325,7 @@ static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa)
 
 int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
 {
-	struct cxl_dev_state *cxlds = cxlmd->cxlds;
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
 	struct cxl_mbox_inject_poison inject;
 	struct cxl_poison_record record;
 	struct cxl_mbox_cmd mbox_cmd;
@@ -255,13 +349,13 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
 		.size_in = sizeof(inject),
 		.payload_in = &inject,
 	};
-	rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
 	if (rc)
 		goto out;
 
 	cxlr = cxl_dpa_to_region(cxlmd, dpa);
 	if (cxlr)
-		dev_warn_once(cxlds->dev,
+		dev_warn_once(mds->cxlds.dev,
 			      "poison inject dpa:%#llx region: %s\n", dpa,
 			      dev_name(&cxlr->dev));
 
@@ -279,7 +373,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, CXL);
 
 int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
 {
-	struct cxl_dev_state *cxlds = cxlmd->cxlds;
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
 	struct cxl_mbox_clear_poison clear;
 	struct cxl_poison_record record;
 	struct cxl_mbox_cmd mbox_cmd;
@@ -312,14 +406,15 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
 		.payload_in = &clear,
 	};
 
-	rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
 	if (rc)
 		goto out;
 
 	cxlr = cxl_dpa_to_region(cxlmd, dpa);
 	if (cxlr)
-		dev_warn_once(cxlds->dev, "poison clear dpa:%#llx region: %s\n",
-			      dpa, dev_name(&cxlr->dev));
+		dev_warn_once(mds->cxlds.dev,
+			      "poison clear dpa:%#llx region: %s\n", dpa,
+			      dev_name(&cxlr->dev));
 
 	record = (struct cxl_poison_record) {
 		.address = cpu_to_le64(dpa),
@@ -352,6 +447,13 @@ static struct attribute *cxl_memdev_ram_attributes[] = {
 	NULL,
 };
 
+static struct attribute *cxl_memdev_security_attributes[] = {
+	&dev_attr_security_state.attr,
+	&dev_attr_security_sanitize.attr,
+	&dev_attr_security_erase.attr,
+	NULL,
+};
+
 static umode_t cxl_memdev_visible(struct kobject *kobj, struct attribute *a,
 				  int n)
 {
@@ -375,10 +477,16 @@ static struct attribute_group cxl_memdev_pmem_attribute_group = {
 	.attrs = cxl_memdev_pmem_attributes,
 };
 
+static struct attribute_group cxl_memdev_security_attribute_group = {
+	.name = "security",
+	.attrs = cxl_memdev_security_attributes,
+};
+
 static const struct attribute_group *cxl_memdev_attribute_groups[] = {
 	&cxl_memdev_attribute_group,
 	&cxl_memdev_ram_attribute_group,
 	&cxl_memdev_pmem_attribute_group,
+	&cxl_memdev_security_attribute_group,
 	NULL,
 };
 
@@ -397,17 +505,18 @@ EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, CXL);
 
 /**
  * set_exclusive_cxl_commands() - atomically disable user cxl commands
- * @cxlds: The device state to operate on
+ * @mds: The device state to operate on
  * @cmds: bitmap of commands to mark exclusive
  *
  * Grab the cxl_memdev_rwsem in write mode to flush in-flight
  * invocations of the ioctl path and then disable future execution of
  * commands with the command ids set in @cmds.
  */
-void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds)
+void set_exclusive_cxl_commands(struct cxl_memdev_state *mds,
+				unsigned long *cmds)
 {
 	down_write(&cxl_memdev_rwsem);
-	bitmap_or(cxlds->exclusive_cmds, cxlds->exclusive_cmds, cmds,
+	bitmap_or(mds->exclusive_cmds, mds->exclusive_cmds, cmds,
 		  CXL_MEM_COMMAND_ID_MAX);
 	up_write(&cxl_memdev_rwsem);
 }
@@ -415,23 +524,34 @@ EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, CXL);
 
 /**
  * clear_exclusive_cxl_commands() - atomically enable user cxl commands
- * @cxlds: The device state to modify
+ * @mds: The device state to modify
  * @cmds: bitmap of commands to mark available for userspace
  */
-void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds)
+void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds,
+				  unsigned long *cmds)
 {
 	down_write(&cxl_memdev_rwsem);
-	bitmap_andnot(cxlds->exclusive_cmds, cxlds->exclusive_cmds, cmds,
+	bitmap_andnot(mds->exclusive_cmds, mds->exclusive_cmds, cmds,
 		      CXL_MEM_COMMAND_ID_MAX);
 	up_write(&cxl_memdev_rwsem);
 }
 EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, CXL);
 
+static void cxl_memdev_security_shutdown(struct device *dev)
+{
+	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+
+	if (mds->security.poll)
+		cancel_delayed_work_sync(&mds->security.poll_dwork);
+}
+
 static void cxl_memdev_shutdown(struct device *dev)
 {
 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
 
 	down_write(&cxl_memdev_rwsem);
+	cxl_memdev_security_shutdown(dev);
 	cxlmd->cxlds = NULL;
 	up_write(&cxl_memdev_rwsem);
 }
@@ -511,10 +631,12 @@ static long cxl_memdev_ioctl(struct file *file, unsigned int cmd,
 			     unsigned long arg)
 {
 	struct cxl_memdev *cxlmd = file->private_data;
+	struct cxl_dev_state *cxlds;
 	int rc = -ENXIO;
 
 	down_read(&cxl_memdev_rwsem);
-	if (cxlmd->cxlds)
+	cxlds = cxlmd->cxlds;
+	if (cxlds && cxlds->type == CXL_DEVTYPE_CLASSMEM)
 		rc = __cxl_memdev_ioctl(cxlmd, cmd, arg);
 	up_read(&cxl_memdev_rwsem);
 
@@ -542,6 +664,316 @@ static int cxl_memdev_release_file(struct inode *inode, struct file *file)
 	return 0;
 }
 
+/**
+ * cxl_mem_get_fw_info - Get Firmware info
+ * @cxlds: The device data for the operation
+ *
+ * Retrieve firmware info for the device specified.
+ *
+ * Return: 0 if no error: or the result of the mailbox command.
+ *
+ * See CXL-3.0 8.2.9.3.1 Get FW Info
+ */
+static int cxl_mem_get_fw_info(struct cxl_memdev_state *mds)
+{
+	struct cxl_mbox_get_fw_info info;
+	struct cxl_mbox_cmd mbox_cmd;
+	int rc;
+
+	mbox_cmd = (struct cxl_mbox_cmd) {
+		.opcode = CXL_MBOX_OP_GET_FW_INFO,
+		.size_out = sizeof(info),
+		.payload_out = &info,
+	};
+
+	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+	if (rc < 0)
+		return rc;
+
+	mds->fw.num_slots = info.num_slots;
+	mds->fw.cur_slot = FIELD_GET(CXL_FW_INFO_SLOT_INFO_CUR_MASK,
+				       info.slot_info);
+
+	return 0;
+}
+
+/**
+ * cxl_mem_activate_fw - Activate Firmware
+ * @mds: The device data for the operation
+ * @slot: slot number to activate
+ *
+ * Activate firmware in a given slot for the device specified.
+ *
+ * Return: 0 if no error: or the result of the mailbox command.
+ *
+ * See CXL-3.0 8.2.9.3.3 Activate FW
+ */
+static int cxl_mem_activate_fw(struct cxl_memdev_state *mds, int slot)
+{
+	struct cxl_mbox_activate_fw activate;
+	struct cxl_mbox_cmd mbox_cmd;
+
+	if (slot == 0 || slot > mds->fw.num_slots)
+		return -EINVAL;
+
+	mbox_cmd = (struct cxl_mbox_cmd) {
+		.opcode = CXL_MBOX_OP_ACTIVATE_FW,
+		.size_in = sizeof(activate),
+		.payload_in = &activate,
+	};
+
+	/* Only offline activation supported for now */
+	activate.action = CXL_FW_ACTIVATE_OFFLINE;
+	activate.slot = slot;
+
+	return cxl_internal_send_cmd(mds, &mbox_cmd);
+}
+
+/**
+ * cxl_mem_abort_fw_xfer - Abort an in-progress FW transfer
+ * @mds: The device data for the operation
+ *
+ * Abort an in-progress firmware transfer for the device specified.
+ *
+ * Return: 0 if no error: or the result of the mailbox command.
+ *
+ * See CXL-3.0 8.2.9.3.2 Transfer FW
+ */
+static int cxl_mem_abort_fw_xfer(struct cxl_memdev_state *mds)
+{
+	struct cxl_mbox_transfer_fw *transfer;
+	struct cxl_mbox_cmd mbox_cmd;
+	int rc;
+
+	transfer = kzalloc(struct_size(transfer, data, 0), GFP_KERNEL);
+	if (!transfer)
+		return -ENOMEM;
+
+	/* Set a 1s poll interval and a total wait time of 30s */
+	mbox_cmd = (struct cxl_mbox_cmd) {
+		.opcode = CXL_MBOX_OP_TRANSFER_FW,
+		.size_in = sizeof(*transfer),
+		.payload_in = transfer,
+		.poll_interval_ms = 1000,
+		.poll_count = 30,
+	};
+
+	transfer->action = CXL_FW_TRANSFER_ACTION_ABORT;
+
+	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+	kfree(transfer);
+	return rc;
+}
+
+static void cxl_fw_cleanup(struct fw_upload *fwl)
+{
+	struct cxl_memdev_state *mds = fwl->dd_handle;
+
+	mds->fw.next_slot = 0;
+}
+
+static int cxl_fw_do_cancel(struct fw_upload *fwl)
+{
+	struct cxl_memdev_state *mds = fwl->dd_handle;
+	struct cxl_dev_state *cxlds = &mds->cxlds;
+	struct cxl_memdev *cxlmd = cxlds->cxlmd;
+	int rc;
+
+	rc = cxl_mem_abort_fw_xfer(mds);
+	if (rc < 0)
+		dev_err(&cxlmd->dev, "Error aborting FW transfer: %d\n", rc);
+
+	return FW_UPLOAD_ERR_CANCELED;
+}
+
+static enum fw_upload_err cxl_fw_prepare(struct fw_upload *fwl, const u8 *data,
+					 u32 size)
+{
+	struct cxl_memdev_state *mds = fwl->dd_handle;
+	struct cxl_mbox_transfer_fw *transfer;
+
+	if (!size)
+		return FW_UPLOAD_ERR_INVALID_SIZE;
+
+	mds->fw.oneshot = struct_size(transfer, data, size) <
+			    mds->payload_size;
+
+	if (cxl_mem_get_fw_info(mds))
+		return FW_UPLOAD_ERR_HW_ERROR;
+
+	/*
+	 * So far no state has been changed, hence no other cleanup is
+	 * necessary. Simply return the cancelled status.
+	 */
+	if (test_and_clear_bit(CXL_FW_CANCEL, mds->fw.state))
+		return FW_UPLOAD_ERR_CANCELED;
+
+	return FW_UPLOAD_ERR_NONE;
+}
+
+static enum fw_upload_err cxl_fw_write(struct fw_upload *fwl, const u8 *data,
+				       u32 offset, u32 size, u32 *written)
+{
+	struct cxl_memdev_state *mds = fwl->dd_handle;
+	struct cxl_dev_state *cxlds = &mds->cxlds;
+	struct cxl_memdev *cxlmd = cxlds->cxlmd;
+	struct cxl_mbox_transfer_fw *transfer;
+	struct cxl_mbox_cmd mbox_cmd;
+	u32 cur_size, remaining;
+	size_t size_in;
+	int rc;
+
+	*written = 0;
+
+	/* Offset has to be aligned to 128B (CXL-3.0 8.2.9.3.2 Table 8-57) */
+	if (!IS_ALIGNED(offset, CXL_FW_TRANSFER_ALIGNMENT)) {
+		dev_err(&cxlmd->dev,
+			"misaligned offset for FW transfer slice (%u)\n",
+			offset);
+		return FW_UPLOAD_ERR_RW_ERROR;
+	}
+
+	/*
+	 * Pick transfer size based on mds->payload_size @size must bw 128-byte
+	 * aligned, ->payload_size is a power of 2 starting at 256 bytes, and
+	 * sizeof(*transfer) is 128.  These constraints imply that @cur_size
+	 * will always be 128b aligned.
+	 */
+	cur_size = min_t(size_t, size, mds->payload_size - sizeof(*transfer));
+
+	remaining = size - cur_size;
+	size_in = struct_size(transfer, data, cur_size);
+
+	if (test_and_clear_bit(CXL_FW_CANCEL, mds->fw.state))
+		return cxl_fw_do_cancel(fwl);
+
+	/*
+	 * Slot numbers are 1-indexed
+	 * cur_slot is the 0-indexed next_slot (i.e. 'cur_slot - 1 + 1')
+	 * Check for rollover using modulo, and 1-index it by adding 1
+	 */
+	mds->fw.next_slot = (mds->fw.cur_slot % mds->fw.num_slots) + 1;
+
+	/* Do the transfer via mailbox cmd */
+	transfer = kzalloc(size_in, GFP_KERNEL);
+	if (!transfer)
+		return FW_UPLOAD_ERR_RW_ERROR;
+
+	transfer->offset = cpu_to_le32(offset / CXL_FW_TRANSFER_ALIGNMENT);
+	memcpy(transfer->data, data + offset, cur_size);
+	if (mds->fw.oneshot) {
+		transfer->action = CXL_FW_TRANSFER_ACTION_FULL;
+		transfer->slot = mds->fw.next_slot;
+	} else {
+		if (offset == 0) {
+			transfer->action = CXL_FW_TRANSFER_ACTION_INITIATE;
+		} else if (remaining == 0) {
+			transfer->action = CXL_FW_TRANSFER_ACTION_END;
+			transfer->slot = mds->fw.next_slot;
+		} else {
+			transfer->action = CXL_FW_TRANSFER_ACTION_CONTINUE;
+		}
+	}
+
+	mbox_cmd = (struct cxl_mbox_cmd) {
+		.opcode = CXL_MBOX_OP_TRANSFER_FW,
+		.size_in = size_in,
+		.payload_in = transfer,
+		.poll_interval_ms = 1000,
+		.poll_count = 30,
+	};
+
+	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+	if (rc < 0) {
+		rc = FW_UPLOAD_ERR_RW_ERROR;
+		goto out_free;
+	}
+
+	*written = cur_size;
+
+	/* Activate FW if oneshot or if the last slice was written */
+	if (mds->fw.oneshot || remaining == 0) {
+		dev_dbg(&cxlmd->dev, "Activating firmware slot: %d\n",
+			mds->fw.next_slot);
+		rc = cxl_mem_activate_fw(mds, mds->fw.next_slot);
+		if (rc < 0) {
+			dev_err(&cxlmd->dev, "Error activating firmware: %d\n",
+				rc);
+			rc = FW_UPLOAD_ERR_HW_ERROR;
+			goto out_free;
+		}
+	}
+
+	rc = FW_UPLOAD_ERR_NONE;
+
+out_free:
+	kfree(transfer);
+	return rc;
+}
+
+static enum fw_upload_err cxl_fw_poll_complete(struct fw_upload *fwl)
+{
+	struct cxl_memdev_state *mds = fwl->dd_handle;
+
+	/*
+	 * cxl_internal_send_cmd() handles background operations synchronously.
+	 * No need to wait for completions here - any errors would've been
+	 * reported and handled during the ->write() call(s).
+	 * Just check if a cancel request was received, and return success.
+	 */
+	if (test_and_clear_bit(CXL_FW_CANCEL, mds->fw.state))
+		return cxl_fw_do_cancel(fwl);
+
+	return FW_UPLOAD_ERR_NONE;
+}
+
+static void cxl_fw_cancel(struct fw_upload *fwl)
+{
+	struct cxl_memdev_state *mds = fwl->dd_handle;
+
+	set_bit(CXL_FW_CANCEL, mds->fw.state);
+}
+
+static const struct fw_upload_ops cxl_memdev_fw_ops = {
+        .prepare = cxl_fw_prepare,
+        .write = cxl_fw_write,
+        .poll_complete = cxl_fw_poll_complete,
+        .cancel = cxl_fw_cancel,
+        .cleanup = cxl_fw_cleanup,
+};
+
+static void devm_cxl_remove_fw_upload(void *fwl)
+{
+	firmware_upload_unregister(fwl);
+}
+
+int cxl_memdev_setup_fw_upload(struct cxl_memdev_state *mds)
+{
+	struct cxl_dev_state *cxlds = &mds->cxlds;
+	struct device *dev = &cxlds->cxlmd->dev;
+	struct fw_upload *fwl;
+	int rc;
+
+	if (!test_bit(CXL_MEM_COMMAND_ID_GET_FW_INFO, mds->enabled_cmds))
+		return 0;
+
+	fwl = firmware_upload_register(THIS_MODULE, dev, dev_name(dev),
+				       &cxl_memdev_fw_ops, mds);
+	if (IS_ERR(fwl))
+		return dev_err_probe(dev, PTR_ERR(fwl),
+				     "Failed to register firmware loader\n");
+
+	rc = devm_add_action_or_reset(cxlds->dev, devm_cxl_remove_fw_upload,
+				      fwl);
+	if (rc)
+		dev_err(dev,
+			"Failed to add firmware loader remove action: %d\n",
+			rc);
+
+	return rc;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_memdev_setup_fw_upload, CXL);
+
 static const struct file_operations cxl_memdev_fops = {
 	.owner = THIS_MODULE,
 	.unlocked_ioctl = cxl_memdev_ioctl,
@@ -551,6 +983,35 @@ static const struct file_operations cxl_memdev_fops = {
 	.llseek = noop_llseek,
 };
 
+static void put_sanitize(void *data)
+{
+	struct cxl_memdev_state *mds = data;
+
+	sysfs_put(mds->security.sanitize_node);
+}
+
+static int cxl_memdev_security_init(struct cxl_memdev *cxlmd)
+{
+	struct cxl_dev_state *cxlds = cxlmd->cxlds;
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
+	struct device *dev = &cxlmd->dev;
+	struct kernfs_node *sec;
+
+	sec = sysfs_get_dirent(dev->kobj.sd, "security");
+	if (!sec) {
+		dev_err(dev, "sysfs_get_dirent 'security' failed\n");
+		return -ENODEV;
+	}
+	mds->security.sanitize_node = sysfs_get_dirent(sec, "state");
+	sysfs_put(sec);
+	if (!mds->security.sanitize_node) {
+		dev_err(dev, "sysfs_get_dirent 'state' failed\n");
+		return -ENODEV;
+	}
+
+	return devm_add_action_or_reset(cxlds->dev, put_sanitize, mds);
+ }
+
 struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
 {
 	struct cxl_memdev *cxlmd;
@@ -579,6 +1040,10 @@ struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
 	if (rc)
 		goto err;
 
+	rc = cxl_memdev_security_init(cxlmd);
+	if (rc)
+		goto err;
+
 	rc = devm_add_action_or_reset(cxlds->dev, cxl_memdev_unregister, cxlmd);
 	if (rc)
 		return ERR_PTR(rc);
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 375f01c6cad6..c7a7887ebdcf 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -308,36 +308,17 @@ static void disable_hdm(void *_cxlhdm)
 	       hdm + CXL_HDM_DECODER_CTRL_OFFSET);
 }
 
-int devm_cxl_enable_hdm(struct cxl_port *port, struct cxl_hdm *cxlhdm)
+static int devm_cxl_enable_hdm(struct device *host, struct cxl_hdm *cxlhdm)
 {
-	void __iomem *hdm;
+	void __iomem *hdm = cxlhdm->regs.hdm_decoder;
 	u32 global_ctrl;
 
-	/*
-	 * If the hdm capability was not mapped there is nothing to enable and
-	 * the caller is responsible for what happens next.  For example,
-	 * emulate a passthrough decoder.
-	 */
-	if (IS_ERR(cxlhdm))
-		return 0;
-
-	hdm = cxlhdm->regs.hdm_decoder;
 	global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
-
-	/*
-	 * If the HDM decoder capability was enabled on entry, skip
-	 * registering disable_hdm() since this decode capability may be
-	 * owned by platform firmware.
-	 */
-	if (global_ctrl & CXL_HDM_DECODER_ENABLE)
-		return 0;
-
 	writel(global_ctrl | CXL_HDM_DECODER_ENABLE,
 	       hdm + CXL_HDM_DECODER_CTRL_OFFSET);
 
-	return devm_add_action_or_reset(&port->dev, disable_hdm, cxlhdm);
+	return devm_add_action_or_reset(host, disable_hdm, cxlhdm);
 }
-EXPORT_SYMBOL_NS_GPL(devm_cxl_enable_hdm, CXL);
 
 int cxl_dvsec_rr_decode(struct device *dev, int d,
 			struct cxl_endpoint_dvsec_info *info)
@@ -511,7 +492,7 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
 	if (info->mem_enabled)
 		return 0;
 
-	rc = devm_cxl_enable_hdm(port, cxlhdm);
+	rc = devm_cxl_enable_hdm(&port->dev, cxlhdm);
 	if (rc)
 		return rc;
 
diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c
index f8c38d997252..fc94f5240327 100644
--- a/drivers/cxl/core/pmem.c
+++ b/drivers/cxl/core/pmem.c
@@ -64,7 +64,7 @@ static int match_nvdimm_bridge(struct device *dev, void *data)
 
 struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd)
 {
-	struct cxl_port *port = find_cxl_root(dev_get_drvdata(&cxlmd->dev));
+	struct cxl_port *port = find_cxl_root(cxlmd->endpoint);
 	struct device *dev;
 
 	if (!port)
diff --git a/drivers/cxl/core/pmu.c b/drivers/cxl/core/pmu.c
new file mode 100644
index 000000000000..7684c843e5a5
--- /dev/null
+++ b/drivers/cxl/core/pmu.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2023 Huawei. All rights reserved. */
+
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/idr.h>
+#include <cxlmem.h>
+#include <pmu.h>
+#include <cxl.h>
+#include "core.h"
+
+static void cxl_pmu_release(struct device *dev)
+{
+	struct cxl_pmu *pmu = to_cxl_pmu(dev);
+
+	kfree(pmu);
+}
+
+const struct device_type cxl_pmu_type = {
+	.name = "cxl_pmu",
+	.release = cxl_pmu_release,
+};
+
+static void remove_dev(void *dev)
+{
+	device_del(dev);
+}
+
+int devm_cxl_pmu_add(struct device *parent, struct cxl_pmu_regs *regs,
+		     int assoc_id, int index, enum cxl_pmu_type type)
+{
+	struct cxl_pmu *pmu;
+	struct device *dev;
+	int rc;
+
+	pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
+	if (!pmu)
+		return -ENOMEM;
+
+	pmu->assoc_id = assoc_id;
+	pmu->index = index;
+	pmu->type = type;
+	pmu->base = regs->pmu;
+	dev = &pmu->dev;
+	device_initialize(dev);
+	device_set_pm_not_required(dev);
+	dev->parent = parent;
+	dev->bus = &cxl_bus_type;
+	dev->type = &cxl_pmu_type;
+	switch (pmu->type) {
+	case CXL_PMU_MEMDEV:
+		rc = dev_set_name(dev, "pmu_mem%d.%d", assoc_id, index);
+		break;
+	}
+	if (rc)
+		goto err;
+
+	rc = device_add(dev);
+	if (rc)
+		goto err;
+
+	return devm_add_action_or_reset(parent, remove_dev, dev);
+
+err:
+	put_device(&pmu->dev);
+	return rc;
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_pmu_add, CXL);
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index cbd3d17f6410..724be8448eb4 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -56,6 +56,8 @@ static int cxl_device_id(const struct device *dev)
 		return CXL_DEVICE_MEMORY_EXPANDER;
 	if (dev->type == CXL_REGION_TYPE())
 		return CXL_DEVICE_REGION;
+	if (dev->type == &cxl_pmu_type)
+		return CXL_DEVICE_PMU;
 	return 0;
 }
 
@@ -117,9 +119,9 @@ static ssize_t target_type_show(struct device *dev,
 	struct cxl_decoder *cxld = to_cxl_decoder(dev);
 
 	switch (cxld->target_type) {
-	case CXL_DECODER_ACCELERATOR:
+	case CXL_DECODER_DEVMEM:
 		return sysfs_emit(buf, "accelerator\n");
-	case CXL_DECODER_EXPANDER:
+	case CXL_DECODER_HOSTONLYMEM:
 		return sysfs_emit(buf, "expander\n");
 	}
 	return -ENXIO;
@@ -1213,7 +1215,7 @@ static struct device *grandparent(struct device *dev)
 static void delete_endpoint(void *data)
 {
 	struct cxl_memdev *cxlmd = data;
-	struct cxl_port *endpoint = dev_get_drvdata(&cxlmd->dev);
+	struct cxl_port *endpoint = cxlmd->endpoint;
 	struct cxl_port *parent_port;
 	struct device *parent;
 
@@ -1228,6 +1230,7 @@ static void delete_endpoint(void *data)
 		devm_release_action(parent, cxl_unlink_uport, endpoint);
 		devm_release_action(parent, unregister_port, endpoint);
 	}
+	cxlmd->endpoint = NULL;
 	device_unlock(parent);
 	put_device(parent);
 out:
@@ -1239,7 +1242,7 @@ int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
 	struct device *dev = &cxlmd->dev;
 
 	get_device(&endpoint->dev);
-	dev_set_drvdata(dev, endpoint);
+	cxlmd->endpoint = endpoint;
 	cxlmd->depth = endpoint->depth;
 	return devm_add_action_or_reset(dev, delete_endpoint, cxlmd);
 }
@@ -1610,7 +1613,7 @@ static int cxl_decoder_init(struct cxl_port *port, struct cxl_decoder *cxld)
 	/* Pre initialize an "empty" decoder */
 	cxld->interleave_ways = 1;
 	cxld->interleave_granularity = PAGE_SIZE;
-	cxld->target_type = CXL_DECODER_EXPANDER;
+	cxld->target_type = CXL_DECODER_HOSTONLYMEM;
 	cxld->hpa_range = (struct range) {
 		.start = 0,
 		.end = -1,
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 39825e5301d0..e115ba382e04 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -125,10 +125,38 @@ static struct cxl_region_ref *cxl_rr_load(struct cxl_port *port,
 	return xa_load(&port->regions, (unsigned long)cxlr);
 }
 
+static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
+{
+	if (!cpu_cache_has_invalidate_memregion()) {
+		if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) {
+			dev_warn_once(
+				&cxlr->dev,
+				"Bypassing cpu_cache_invalidate_memregion() for testing!\n");
+			return 0;
+		} else {
+			dev_err(&cxlr->dev,
+				"Failed to synchronize CPU cache state\n");
+			return -ENXIO;
+		}
+	}
+
+	cpu_cache_invalidate_memregion(IORES_DESC_CXL);
+	return 0;
+}
+
 static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
 {
 	struct cxl_region_params *p = &cxlr->params;
-	int i;
+	int i, rc = 0;
+
+	/*
+	 * Before region teardown attempt to flush, and if the flush
+	 * fails cancel the region teardown for data consistency
+	 * concerns
+	 */
+	rc = cxl_region_invalidate_memregion(cxlr);
+	if (rc)
+		return rc;
 
 	for (i = count - 1; i >= 0; i--) {
 		struct cxl_endpoint_decoder *cxled = p->targets[i];
@@ -136,7 +164,6 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
 		struct cxl_port *iter = cxled_to_port(cxled);
 		struct cxl_dev_state *cxlds = cxlmd->cxlds;
 		struct cxl_ep *ep;
-		int rc = 0;
 
 		if (cxlds->rcd)
 			goto endpoint_reset;
@@ -155,14 +182,19 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
 				rc = cxld->reset(cxld);
 			if (rc)
 				return rc;
+			set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
 		}
 
 endpoint_reset:
 		rc = cxled->cxld.reset(&cxled->cxld);
 		if (rc)
 			return rc;
+		set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
 	}
 
+	/* all decoders associated with this region have been torn down */
+	clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
+
 	return 0;
 }
 
@@ -256,9 +288,19 @@ static ssize_t commit_store(struct device *dev, struct device_attribute *attr,
 		goto out;
 	}
 
-	if (commit)
+	/*
+	 * Invalidate caches before region setup to drop any speculative
+	 * consumption of this address space
+	 */
+	rc = cxl_region_invalidate_memregion(cxlr);
+	if (rc)
+		return rc;
+
+	if (commit) {
 		rc = cxl_region_decode_commit(cxlr);
-	else {
+		if (rc == 0)
+			p->state = CXL_CONFIG_COMMIT;
+	} else {
 		p->state = CXL_CONFIG_RESET_PENDING;
 		up_write(&cxl_region_rwsem);
 		device_release_driver(&cxlr->dev);
@@ -268,18 +310,20 @@ static ssize_t commit_store(struct device *dev, struct device_attribute *attr,
 		 * The lock was dropped, so need to revalidate that the reset is
 		 * still pending.
 		 */
-		if (p->state == CXL_CONFIG_RESET_PENDING)
+		if (p->state == CXL_CONFIG_RESET_PENDING) {
 			rc = cxl_region_decode_reset(cxlr, p->interleave_ways);
+			/*
+			 * Revert to committed since there may still be active
+			 * decoders associated with this region, or move forward
+			 * to active to mark the reset successful
+			 */
+			if (rc)
+				p->state = CXL_CONFIG_COMMIT;
+			else
+				p->state = CXL_CONFIG_ACTIVE;
+		}
 	}
 
-	if (rc)
-		goto out;
-
-	if (commit)
-		p->state = CXL_CONFIG_COMMIT;
-	else if (p->state == CXL_CONFIG_RESET_PENDING)
-		p->state = CXL_CONFIG_ACTIVE;
-
 out:
 	up_write(&cxl_region_rwsem);
 
@@ -809,6 +853,18 @@ static int cxl_rr_alloc_decoder(struct cxl_port *port, struct cxl_region *cxlr,
 		return -EBUSY;
 	}
 
+	/*
+	 * Endpoints should already match the region type, but backstop that
+	 * assumption with an assertion. Switch-decoders change mapping-type
+	 * based on what is mapped when they are assigned to a region.
+	 */
+	dev_WARN_ONCE(&cxlr->dev,
+		      port == cxled_to_port(cxled) &&
+			      cxld->target_type != cxlr->type,
+		      "%s:%s mismatch decoder type %d -> %d\n",
+		      dev_name(&cxled_to_memdev(cxled)->dev),
+		      dev_name(&cxld->dev), cxld->target_type, cxlr->type);
+	cxld->target_type = cxlr->type;
 	cxl_rr->decoder = cxld;
 	return 0;
 }
@@ -1675,7 +1731,6 @@ static int cxl_region_attach(struct cxl_region *cxlr,
 		if (rc)
 			goto err_decrement;
 		p->state = CXL_CONFIG_ACTIVE;
-		set_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags);
 	}
 
 	cxled->cxld.interleave_ways = p->interleave_ways;
@@ -2104,7 +2159,7 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
 		return ERR_PTR(-EBUSY);
 	}
 
-	return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_EXPANDER);
+	return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_HOSTONLYMEM);
 }
 
 static ssize_t create_pmem_region_store(struct device *dev,
@@ -2805,30 +2860,6 @@ out:
 }
 EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, CXL);
 
-static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
-{
-	if (!test_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags))
-		return 0;
-
-	if (!cpu_cache_has_invalidate_memregion()) {
-		if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) {
-			dev_warn_once(
-				&cxlr->dev,
-				"Bypassing cpu_cache_invalidate_memregion() for testing!\n");
-			clear_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags);
-			return 0;
-		} else {
-			dev_err(&cxlr->dev,
-				"Failed to synchronize CPU cache state\n");
-			return -ENXIO;
-		}
-	}
-
-	cpu_cache_invalidate_memregion(IORES_DESC_CXL);
-	clear_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags);
-	return 0;
-}
-
 static int is_system_ram(struct resource *res, void *arg)
 {
 	struct cxl_region *cxlr = arg;
@@ -2856,7 +2887,12 @@ static int cxl_region_probe(struct device *dev)
 		goto out;
 	}
 
-	rc = cxl_region_invalidate_memregion(cxlr);
+	if (test_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags)) {
+		dev_err(&cxlr->dev,
+			"failed to activate, re-commit region and retry\n");
+		rc = -ENXIO;
+		goto out;
+	}
 
 	/*
 	 * From this point on any path that changes the region's state away from
diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
index e68848075bb6..6281127b3e9d 100644
--- a/drivers/cxl/core/regs.c
+++ b/drivers/cxl/core/regs.c
@@ -6,6 +6,7 @@
 #include <linux/pci.h>
 #include <cxlmem.h>
 #include <cxlpci.h>
+#include <pmu.h>
 
 #include "core.h"
 
@@ -199,13 +200,13 @@ void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr,
 	return ret_val;
 }
 
-int cxl_map_component_regs(struct cxl_register_map *map,
+int cxl_map_component_regs(const struct cxl_register_map *map,
 			   struct cxl_component_regs *regs,
 			   unsigned long map_mask)
 {
 	struct device *dev = map->dev;
 	struct mapinfo {
-		struct cxl_reg_map *rmap;
+		const struct cxl_reg_map *rmap;
 		void __iomem **addr;
 	} mapinfo[] = {
 		{ &map->component_map.hdm_decoder, &regs->hdm_decoder },
@@ -233,13 +234,13 @@ int cxl_map_component_regs(struct cxl_register_map *map,
 }
 EXPORT_SYMBOL_NS_GPL(cxl_map_component_regs, CXL);
 
-int cxl_map_device_regs(struct cxl_register_map *map,
+int cxl_map_device_regs(const struct cxl_register_map *map,
 			struct cxl_device_regs *regs)
 {
 	struct device *dev = map->dev;
 	resource_size_t phys_addr = map->resource;
 	struct mapinfo {
-		struct cxl_reg_map *rmap;
+		const struct cxl_reg_map *rmap;
 		void __iomem **addr;
 	} mapinfo[] = {
 		{ &map->device_map.status, &regs->status, },
@@ -288,20 +289,23 @@ static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi,
 }
 
 /**
- * cxl_find_regblock() - Locate register blocks by type
+ * cxl_find_regblock_instance() - Locate a register block by type / index
  * @pdev: The CXL PCI device to enumerate.
  * @type: Register Block Indicator id
  * @map: Enumeration output, clobbered on error
+ * @index: Index into which particular instance of a regblock wanted in the
+ *	   order found in register locator DVSEC.
  *
  * Return: 0 if register block enumerated, negative error code otherwise
  *
  * A CXL DVSEC may point to one or more register blocks, search for them
- * by @type.
+ * by @type and @index.
  */
-int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
-		      struct cxl_register_map *map)
+int cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type,
+			       struct cxl_register_map *map, int index)
 {
 	u32 regloc_size, regblocks;
+	int instance = 0;
 	int regloc, i;
 
 	*map = (struct cxl_register_map) {
@@ -329,15 +333,74 @@ int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
 		if (!cxl_decode_regblock(pdev, reg_lo, reg_hi, map))
 			continue;
 
-		if (map->reg_type == type)
-			return 0;
+		if (map->reg_type == type) {
+			if (index == instance)
+				return 0;
+			instance++;
+		}
 	}
 
 	map->resource = CXL_RESOURCE_NONE;
 	return -ENODEV;
 }
+EXPORT_SYMBOL_NS_GPL(cxl_find_regblock_instance, CXL);
+
+/**
+ * cxl_find_regblock() - Locate register blocks by type
+ * @pdev: The CXL PCI device to enumerate.
+ * @type: Register Block Indicator id
+ * @map: Enumeration output, clobbered on error
+ *
+ * Return: 0 if register block enumerated, negative error code otherwise
+ *
+ * A CXL DVSEC may point to one or more register blocks, search for them
+ * by @type.
+ */
+int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
+		      struct cxl_register_map *map)
+{
+	return cxl_find_regblock_instance(pdev, type, map, 0);
+}
 EXPORT_SYMBOL_NS_GPL(cxl_find_regblock, CXL);
 
+/**
+ * cxl_count_regblock() - Count instances of a given regblock type.
+ * @pdev: The CXL PCI device to enumerate.
+ * @type: Register Block Indicator id
+ *
+ * Some regblocks may be repeated. Count how many instances.
+ *
+ * Return: count of matching regblocks.
+ */
+int cxl_count_regblock(struct pci_dev *pdev, enum cxl_regloc_type type)
+{
+	struct cxl_register_map map;
+	int rc, count = 0;
+
+	while (1) {
+		rc = cxl_find_regblock_instance(pdev, type, &map, count);
+		if (rc)
+			return count;
+		count++;
+	}
+}
+EXPORT_SYMBOL_NS_GPL(cxl_count_regblock, CXL);
+
+int cxl_map_pmu_regs(struct pci_dev *pdev, struct cxl_pmu_regs *regs,
+		     struct cxl_register_map *map)
+{
+	struct device *dev = &pdev->dev;
+	resource_size_t phys_addr;
+
+	phys_addr = map->resource;
+	regs->pmu = devm_cxl_iomap_block(dev, phys_addr, CXL_PMU_REGMAP_SIZE);
+	if (!regs->pmu)
+		return -ENOMEM;
+
+	return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_map_pmu_regs, CXL);
+
 static int cxl_map_regblock(struct cxl_register_map *map)
 {
 	struct device *dev = map->dev;
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index b1adca9b27ba..76d92561af29 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -56,7 +56,7 @@
 #define   CXL_HDM_DECODER0_CTRL_COMMIT BIT(9)
 #define   CXL_HDM_DECODER0_CTRL_COMMITTED BIT(10)
 #define   CXL_HDM_DECODER0_CTRL_COMMIT_ERROR BIT(11)
-#define   CXL_HDM_DECODER0_CTRL_TYPE BIT(12)
+#define   CXL_HDM_DECODER0_CTRL_HOSTONLY BIT(12)
 #define CXL_HDM_DECODER0_TL_LOW(i) (0x20 * (i) + 0x24)
 #define CXL_HDM_DECODER0_TL_HIGH(i) (0x20 * (i) + 0x28)
 #define CXL_HDM_DECODER0_SKIP_LOW(i) CXL_HDM_DECODER0_TL_LOW(i)
@@ -176,14 +176,22 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw)
 /* CXL 2.0 8.2.8.4 Mailbox Registers */
 #define CXLDEV_MBOX_CAPS_OFFSET 0x00
 #define   CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK GENMASK(4, 0)
+#define   CXLDEV_MBOX_CAP_BG_CMD_IRQ BIT(6)
+#define   CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK GENMASK(10, 7)
 #define CXLDEV_MBOX_CTRL_OFFSET 0x04
 #define   CXLDEV_MBOX_CTRL_DOORBELL BIT(0)
+#define   CXLDEV_MBOX_CTRL_BG_CMD_IRQ BIT(2)
 #define CXLDEV_MBOX_CMD_OFFSET 0x08
 #define   CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK GENMASK_ULL(15, 0)
 #define   CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK GENMASK_ULL(36, 16)
 #define CXLDEV_MBOX_STATUS_OFFSET 0x10
+#define   CXLDEV_MBOX_STATUS_BG_CMD BIT(0)
 #define   CXLDEV_MBOX_STATUS_RET_CODE_MASK GENMASK_ULL(47, 32)
 #define CXLDEV_MBOX_BG_CMD_STATUS_OFFSET 0x18
+#define   CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK GENMASK_ULL(15, 0)
+#define   CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK GENMASK_ULL(22, 16)
+#define   CXLDEV_MBOX_BG_CMD_COMMAND_RC_MASK GENMASK_ULL(47, 32)
+#define   CXLDEV_MBOX_BG_CMD_COMMAND_VENDOR_MASK GENMASK_ULL(63, 48)
 #define CXLDEV_MBOX_PAYLOAD_OFFSET 0x20
 
 /*
@@ -209,6 +217,10 @@ struct cxl_regs {
 	struct_group_tagged(cxl_device_regs, device_regs,
 		void __iomem *status, *mbox, *memdev;
 	);
+
+	struct_group_tagged(cxl_pmu_regs, pmu_regs,
+		void __iomem *pmu;
+	);
 };
 
 struct cxl_reg_map {
@@ -229,6 +241,10 @@ struct cxl_device_reg_map {
 	struct cxl_reg_map memdev;
 };
 
+struct cxl_pmu_reg_map {
+	struct cxl_reg_map pmu;
+};
+
 /**
  * struct cxl_register_map - DVSEC harvested register block mapping parameters
  * @dev: device for devm operations and logging
@@ -238,6 +254,7 @@ struct cxl_device_reg_map {
  * @reg_type: see enum cxl_regloc_type
  * @component_map: cxl_reg_map for component registers
  * @device_map: cxl_reg_maps for device registers
+ * @pmu_map: cxl_reg_maps for CXL Performance Monitoring Units
  */
 struct cxl_register_map {
 	struct device *dev;
@@ -248,6 +265,7 @@ struct cxl_register_map {
 	union {
 		struct cxl_component_reg_map component_map;
 		struct cxl_device_reg_map device_map;
+		struct cxl_pmu_reg_map pmu_map;
 	};
 };
 
@@ -255,13 +273,18 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base,
 			      struct cxl_component_reg_map *map);
 void cxl_probe_device_regs(struct device *dev, void __iomem *base,
 			   struct cxl_device_reg_map *map);
-int cxl_map_component_regs(struct cxl_register_map *map,
+int cxl_map_component_regs(const struct cxl_register_map *map,
 			   struct cxl_component_regs *regs,
 			   unsigned long map_mask);
-int cxl_map_device_regs(struct cxl_register_map *map,
+int cxl_map_device_regs(const struct cxl_register_map *map,
 			struct cxl_device_regs *regs);
+int cxl_map_pmu_regs(struct pci_dev *pdev, struct cxl_pmu_regs *regs,
+		     struct cxl_register_map *map);
 
 enum cxl_regloc_type;
+int cxl_count_regblock(struct pci_dev *pdev, enum cxl_regloc_type type);
+int cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type,
+			       struct cxl_register_map *map, int index);
 int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
 		      struct cxl_register_map *map);
 int cxl_setup_regs(struct cxl_register_map *map);
@@ -288,8 +311,8 @@ resource_size_t cxl_rcd_component_reg_phys(struct device *dev,
 #define CXL_DECODER_F_MASK  GENMASK(5, 0)
 
 enum cxl_decoder_type {
-       CXL_DECODER_ACCELERATOR = 2,
-       CXL_DECODER_EXPANDER = 3,
+	CXL_DECODER_DEVMEM = 2,
+	CXL_DECODER_HOSTONLYMEM = 3,
 };
 
 /*
@@ -461,17 +484,19 @@ struct cxl_region_params {
 };
 
 /*
- * Flag whether this region needs to have its HPA span synchronized with
- * CPU cache state at region activation time.
- */
-#define CXL_REGION_F_INCOHERENT 0
-
-/*
  * Indicate whether this region has been assembled by autodetection or
  * userspace assembly. Prevent endpoint decoders outside of automatic
  * detection from being added to the region.
  */
-#define CXL_REGION_F_AUTO 1
+#define CXL_REGION_F_AUTO 0
+
+/*
+ * Require that a committed region successfully complete a teardown once
+ * any of its associated decoders have been torn down. This maintains
+ * the commit state for the region since there are committed decoders,
+ * but blocks cxl_region_probe().
+ */
+#define CXL_REGION_F_NEEDS_RESET 1
 
 /**
  * struct cxl_region - CXL region
@@ -717,7 +742,6 @@ struct cxl_endpoint_dvsec_info {
 struct cxl_hdm;
 struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
 				   struct cxl_endpoint_dvsec_info *info);
-int devm_cxl_enable_hdm(struct cxl_port *port, struct cxl_hdm *cxlhdm);
 int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
 				struct cxl_endpoint_dvsec_info *info);
 int devm_cxl_add_passthrough_decoder(struct cxl_port *port);
@@ -757,6 +781,7 @@ void cxl_driver_unregister(struct cxl_driver *cxl_drv);
 #define CXL_DEVICE_REGION		6
 #define CXL_DEVICE_PMEM_REGION		7
 #define CXL_DEVICE_DAX_REGION		8
+#define CXL_DEVICE_PMU			9
 
 #define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*")
 #define CXL_MODALIAS_FMT "cxl:t%d"
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 76743016b64c..79e99c873ca2 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -5,6 +5,7 @@
 #include <uapi/linux/cxl_mem.h>
 #include <linux/cdev.h>
 #include <linux/uuid.h>
+#include <linux/rcuwait.h>
 #include "cxl.h"
 
 /* CXL 2.0 8.2.8.5.1.1 Memory Device Status Register */
@@ -38,6 +39,7 @@
  * @detach_work: active memdev lost a port in its ancestry
  * @cxl_nvb: coordinate removal of @cxl_nvd if present
  * @cxl_nvd: optional bridge to an nvdimm if the device supports pmem
+ * @endpoint: connection to the CXL port topology for this memory device
  * @id: id number of this memdev instance.
  * @depth: endpoint port depth
  */
@@ -48,6 +50,7 @@ struct cxl_memdev {
 	struct work_struct detach_work;
 	struct cxl_nvdimm_bridge *cxl_nvb;
 	struct cxl_nvdimm *cxl_nvd;
+	struct cxl_port *endpoint;
 	int id;
 	int depth;
 };
@@ -82,6 +85,8 @@ static inline bool is_cxl_endpoint(struct cxl_port *port)
 }
 
 struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds);
+struct cxl_memdev_state;
+int cxl_memdev_setup_fw_upload(struct cxl_memdev_state *mds);
 int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
 			 resource_size_t base, resource_size_t len,
 			 resource_size_t skipped);
@@ -108,6 +113,9 @@ static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port,
  *            variable sized output commands, it tells the exact number of bytes
  *            written.
  * @min_out: (input) internal command output payload size validation
+ * @poll_count: (input) Number of timeouts to attempt.
+ * @poll_interval_ms: (input) Time between mailbox background command polling
+ *                    interval timeouts.
  * @return_code: (output) Error code returned from hardware.
  *
  * This is the primary mechanism used to send commands to the hardware.
@@ -123,6 +131,8 @@ struct cxl_mbox_cmd {
 	size_t size_in;
 	size_t size_out;
 	size_t min_out;
+	int poll_count;
+	int poll_interval_ms;
 	u16 return_code;
 };
 
@@ -195,7 +205,7 @@ static inline int cxl_mbox_cmd_rc2errno(struct cxl_mbox_cmd *mbox_cmd)
  */
 #define CXL_CAPACITY_MULTIPLIER SZ_256M
 
-/**
+/*
  * Event Interrupt Policy
  *
  * CXL rev 3.0 section 8.2.9.2.4; Table 8-52
@@ -215,8 +225,8 @@ struct cxl_event_interrupt_policy {
 /**
  * struct cxl_event_state - Event log driver state
  *
- * @event_buf: Buffer to receive event data
- * @event_log_lock: Serialize event_buf and log use
+ * @buf: Buffer to receive event data
+ * @log_lock: Serialize event_buf and log use
  */
 struct cxl_event_state {
 	struct cxl_get_event_payload *buf;
@@ -254,6 +264,115 @@ struct cxl_poison_state {
 	struct mutex lock;  /* Protect reads of poison list */
 };
 
+/*
+ * Get FW Info
+ * CXL rev 3.0 section 8.2.9.3.1; Table 8-56
+ */
+struct cxl_mbox_get_fw_info {
+	u8 num_slots;
+	u8 slot_info;
+	u8 activation_cap;
+	u8 reserved[13];
+	char slot_1_revision[16];
+	char slot_2_revision[16];
+	char slot_3_revision[16];
+	char slot_4_revision[16];
+} __packed;
+
+#define CXL_FW_INFO_SLOT_INFO_CUR_MASK			GENMASK(2, 0)
+#define CXL_FW_INFO_SLOT_INFO_NEXT_MASK			GENMASK(5, 3)
+#define CXL_FW_INFO_SLOT_INFO_NEXT_SHIFT		3
+#define CXL_FW_INFO_ACTIVATION_CAP_HAS_LIVE_ACTIVATE	BIT(0)
+
+/*
+ * Transfer FW Input Payload
+ * CXL rev 3.0 section 8.2.9.3.2; Table 8-57
+ */
+struct cxl_mbox_transfer_fw {
+	u8 action;
+	u8 slot;
+	u8 reserved[2];
+	__le32 offset;
+	u8 reserved2[0x78];
+	u8 data[];
+} __packed;
+
+#define CXL_FW_TRANSFER_ACTION_FULL	0x0
+#define CXL_FW_TRANSFER_ACTION_INITIATE	0x1
+#define CXL_FW_TRANSFER_ACTION_CONTINUE	0x2
+#define CXL_FW_TRANSFER_ACTION_END	0x3
+#define CXL_FW_TRANSFER_ACTION_ABORT	0x4
+
+/*
+ * CXL rev 3.0 section 8.2.9.3.2 mandates 128-byte alignment for FW packages
+ * and for each part transferred in a Transfer FW command.
+ */
+#define CXL_FW_TRANSFER_ALIGNMENT	128
+
+/*
+ * Activate FW Input Payload
+ * CXL rev 3.0 section 8.2.9.3.3; Table 8-58
+ */
+struct cxl_mbox_activate_fw {
+	u8 action;
+	u8 slot;
+} __packed;
+
+#define CXL_FW_ACTIVATE_ONLINE		0x0
+#define CXL_FW_ACTIVATE_OFFLINE		0x1
+
+/* FW state bits */
+#define CXL_FW_STATE_BITS		32
+#define CXL_FW_CANCEL		BIT(0)
+
+/**
+ * struct cxl_fw_state - Firmware upload / activation state
+ *
+ * @state: fw_uploader state bitmask
+ * @oneshot: whether the fw upload fits in a single transfer
+ * @num_slots: Number of FW slots available
+ * @cur_slot: Slot number currently active
+ * @next_slot: Slot number for the new firmware
+ */
+struct cxl_fw_state {
+	DECLARE_BITMAP(state, CXL_FW_STATE_BITS);
+	bool oneshot;
+	int num_slots;
+	int cur_slot;
+	int next_slot;
+};
+
+/**
+ * struct cxl_security_state - Device security state
+ *
+ * @state: state of last security operation
+ * @poll: polling for sanitization is enabled, device has no mbox irq support
+ * @poll_tmo_secs: polling timeout
+ * @poll_dwork: polling work item
+ * @sanitize_node: sanitation sysfs file to notify
+ */
+struct cxl_security_state {
+	unsigned long state;
+	bool poll;
+	int poll_tmo_secs;
+	struct delayed_work poll_dwork;
+	struct kernfs_node *sanitize_node;
+};
+
+/*
+ * enum cxl_devtype - delineate type-2 from a generic type-3 device
+ * @CXL_DEVTYPE_DEVMEM - Vendor specific CXL Type-2 device implementing HDM-D or
+ *			 HDM-DB, no requirement that this device implements a
+ *			 mailbox, or other memory-device-standard manageability
+ *			 flows.
+ * @CXL_DEVTYPE_CLASSMEM - Common class definition of a CXL Type-3 device with
+ *			   HDM-H and class-mandatory memory device registers
+ */
+enum cxl_devtype {
+	CXL_DEVTYPE_DEVMEM,
+	CXL_DEVTYPE_CLASSMEM,
+};
+
 /**
  * struct cxl_dev_state - The driver device state
  *
@@ -267,6 +386,36 @@ struct cxl_poison_state {
  * @cxl_dvsec: Offset to the PCIe device DVSEC
  * @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH)
  * @media_ready: Indicate whether the device media is usable
+ * @dpa_res: Overall DPA resource tree for the device
+ * @pmem_res: Active Persistent memory capacity configuration
+ * @ram_res: Active Volatile memory capacity configuration
+ * @component_reg_phys: register base of component registers
+ * @serial: PCIe Device Serial Number
+ * @type: Generic Memory Class device or Vendor Specific Memory device
+ */
+struct cxl_dev_state {
+	struct device *dev;
+	struct cxl_memdev *cxlmd;
+	struct cxl_regs regs;
+	int cxl_dvsec;
+	bool rcd;
+	bool media_ready;
+	struct resource dpa_res;
+	struct resource pmem_res;
+	struct resource ram_res;
+	resource_size_t component_reg_phys;
+	u64 serial;
+	enum cxl_devtype type;
+};
+
+/**
+ * struct cxl_memdev_state - Generic Type-3 Memory Device Class driver data
+ *
+ * CXL 8.1.12.1 PCI Header - Class Code Register Memory Device defines
+ * common memory device functionality like the presence of a mailbox and
+ * the functionality related to that like Identify Memory Device and Get
+ * Partition Info
+ * @cxlds: Core driver state common across Type-2 and Type-3 devices
  * @payload_size: Size of space for payload
  *                (CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register)
  * @lsa_size: Size of Label Storage Area
@@ -275,9 +424,6 @@ struct cxl_poison_state {
  * @firmware_version: Firmware version for the memory device.
  * @enabled_cmds: Hardware commands found enabled in CEL.
  * @exclusive_cmds: Commands that are kernel-internal only
- * @dpa_res: Overall DPA resource tree for the device
- * @pmem_res: Active Persistent memory capacity configuration
- * @ram_res: Active Volatile memory capacity configuration
  * @total_bytes: sum of all possible capacities
  * @volatile_only_bytes: hard volatile capacity
  * @persistent_only_bytes: hard persistent capacity
@@ -286,54 +432,48 @@ struct cxl_poison_state {
  * @active_persistent_bytes: sum of hard + soft persistent
  * @next_volatile_bytes: volatile capacity change pending device reset
  * @next_persistent_bytes: persistent capacity change pending device reset
- * @component_reg_phys: register base of component registers
- * @info: Cached DVSEC information about the device.
- * @serial: PCIe Device Serial Number
  * @event: event log driver state
  * @poison: poison driver state info
+ * @fw: firmware upload / activation state
  * @mbox_send: @dev specific transport for transmitting mailbox commands
  *
- * See section 8.2.9.5.2 Capacity Configuration and Label Storage for
+ * See CXL 3.0 8.2.9.8.2 Capacity Configuration and Label Storage for
  * details on capacity parameters.
  */
-struct cxl_dev_state {
-	struct device *dev;
-	struct cxl_memdev *cxlmd;
-
-	struct cxl_regs regs;
-	int cxl_dvsec;
-
-	bool rcd;
-	bool media_ready;
+struct cxl_memdev_state {
+	struct cxl_dev_state cxlds;
 	size_t payload_size;
 	size_t lsa_size;
 	struct mutex mbox_mutex; /* Protects device mailbox and firmware */
 	char firmware_version[0x10];
 	DECLARE_BITMAP(enabled_cmds, CXL_MEM_COMMAND_ID_MAX);
 	DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
-
-	struct resource dpa_res;
-	struct resource pmem_res;
-	struct resource ram_res;
 	u64 total_bytes;
 	u64 volatile_only_bytes;
 	u64 persistent_only_bytes;
 	u64 partition_align_bytes;
-
 	u64 active_volatile_bytes;
 	u64 active_persistent_bytes;
 	u64 next_volatile_bytes;
 	u64 next_persistent_bytes;
-
-	resource_size_t component_reg_phys;
-	u64 serial;
-
 	struct cxl_event_state event;
 	struct cxl_poison_state poison;
+	struct cxl_security_state security;
+	struct cxl_fw_state fw;
 
-	int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd);
+	struct rcuwait mbox_wait;
+	int (*mbox_send)(struct cxl_memdev_state *mds,
+			 struct cxl_mbox_cmd *cmd);
 };
 
+static inline struct cxl_memdev_state *
+to_cxl_memdev_state(struct cxl_dev_state *cxlds)
+{
+	if (cxlds->type != CXL_DEVTYPE_CLASSMEM)
+		return NULL;
+	return container_of(cxlds, struct cxl_memdev_state, cxlds);
+}
+
 enum cxl_opcode {
 	CXL_MBOX_OP_INVALID		= 0x0000,
 	CXL_MBOX_OP_RAW			= CXL_MBOX_OP_INVALID,
@@ -342,6 +482,7 @@ enum cxl_opcode {
 	CXL_MBOX_OP_GET_EVT_INT_POLICY	= 0x0102,
 	CXL_MBOX_OP_SET_EVT_INT_POLICY	= 0x0103,
 	CXL_MBOX_OP_GET_FW_INFO		= 0x0200,
+	CXL_MBOX_OP_TRANSFER_FW		= 0x0201,
 	CXL_MBOX_OP_ACTIVATE_FW		= 0x0202,
 	CXL_MBOX_OP_SET_TIMESTAMP	= 0x0301,
 	CXL_MBOX_OP_GET_SUPPORTED_LOGS	= 0x0400,
@@ -362,6 +503,8 @@ enum cxl_opcode {
 	CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS	= 0x4303,
 	CXL_MBOX_OP_SCAN_MEDIA		= 0x4304,
 	CXL_MBOX_OP_GET_SCAN_MEDIA	= 0x4305,
+	CXL_MBOX_OP_SANITIZE		= 0x4400,
+	CXL_MBOX_OP_SECURE_ERASE	= 0x4401,
 	CXL_MBOX_OP_GET_SECURITY_STATE	= 0x4500,
 	CXL_MBOX_OP_SET_PASSPHRASE	= 0x4501,
 	CXL_MBOX_OP_DISABLE_PASSPHRASE	= 0x4502,
@@ -692,18 +835,20 @@ enum {
 	CXL_PMEM_SEC_PASS_USER,
 };
 
-int cxl_internal_send_cmd(struct cxl_dev_state *cxlds,
+int cxl_internal_send_cmd(struct cxl_memdev_state *mds,
 			  struct cxl_mbox_cmd *cmd);
-int cxl_dev_state_identify(struct cxl_dev_state *cxlds);
+int cxl_dev_state_identify(struct cxl_memdev_state *mds);
 int cxl_await_media_ready(struct cxl_dev_state *cxlds);
-int cxl_enumerate_cmds(struct cxl_dev_state *cxlds);
-int cxl_mem_create_range_info(struct cxl_dev_state *cxlds);
-struct cxl_dev_state *cxl_dev_state_create(struct device *dev);
-void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
-void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
-void cxl_mem_get_event_records(struct cxl_dev_state *cxlds, u32 status);
-int cxl_set_timestamp(struct cxl_dev_state *cxlds);
-int cxl_poison_state_init(struct cxl_dev_state *cxlds);
+int cxl_enumerate_cmds(struct cxl_memdev_state *mds);
+int cxl_mem_create_range_info(struct cxl_memdev_state *mds);
+struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev);
+void set_exclusive_cxl_commands(struct cxl_memdev_state *mds,
+				unsigned long *cmds);
+void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds,
+				  unsigned long *cmds);
+void cxl_mem_get_event_records(struct cxl_memdev_state *mds, u32 status);
+int cxl_set_timestamp(struct cxl_memdev_state *mds);
+int cxl_poison_state_init(struct cxl_memdev_state *mds);
 int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
 		       struct cxl_region *cxlr);
 int cxl_trigger_poison_list(struct cxl_memdev *cxlmd);
@@ -722,6 +867,8 @@ static inline void cxl_mem_active_dec(void)
 }
 #endif
 
+int cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd);
+
 struct cxl_hdm {
 	struct cxl_component_regs regs;
 	unsigned int decoder_count;
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index 7c02e55b8042..0fa4799ea316 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -67,6 +67,7 @@ enum cxl_regloc_type {
 	CXL_REGLOC_RBI_COMPONENT,
 	CXL_REGLOC_RBI_VIRT,
 	CXL_REGLOC_RBI_MEMDEV,
+	CXL_REGLOC_RBI_PMU,
 	CXL_REGLOC_RBI_TYPES
 };
 
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 205e2e280aed..317c7548e4e9 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -107,6 +107,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_clear_fops, NULL,
 static int cxl_mem_probe(struct device *dev)
 {
 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
 	struct device *endpoint_parent;
 	struct cxl_port *parent_port;
@@ -131,10 +132,10 @@ static int cxl_mem_probe(struct device *dev)
 	dentry = cxl_debugfs_create_dir(dev_name(dev));
 	debugfs_create_devm_seqfile(dev, "dpamem", dentry, cxl_mem_dpa_show);
 
-	if (test_bit(CXL_POISON_ENABLED_INJECT, cxlds->poison.enabled_cmds))
+	if (test_bit(CXL_POISON_ENABLED_INJECT, mds->poison.enabled_cmds))
 		debugfs_create_file("inject_poison", 0200, dentry, cxlmd,
 				    &cxl_poison_inject_fops);
-	if (test_bit(CXL_POISON_ENABLED_CLEAR, cxlds->poison.enabled_cmds))
+	if (test_bit(CXL_POISON_ENABLED_CLEAR, mds->poison.enabled_cmds))
 		debugfs_create_file("clear_poison", 0200, dentry, cxlmd,
 				    &cxl_poison_clear_fops);
 
@@ -217,9 +218,12 @@ static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n)
 {
 	if (a == &dev_attr_trigger_poison_list.attr) {
 		struct device *dev = kobj_to_dev(kobj);
+		struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+		struct cxl_memdev_state *mds =
+			to_cxl_memdev_state(cxlmd->cxlds);
 
 		if (!test_bit(CXL_POISON_ENABLED_LIST,
-			      to_cxl_memdev(dev)->cxlds->poison.enabled_cmds))
+			      mds->poison.enabled_cmds))
 			return 0;
 	}
 	return a->mode;
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 99a75c54ee39..48f88d96029d 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -13,6 +13,7 @@
 #include "cxlmem.h"
 #include "cxlpci.h"
 #include "cxl.h"
+#include "pmu.h"
 
 /**
  * DOC: cxl pci
@@ -84,9 +85,92 @@ static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds)
 			    status & CXLMDEV_DEV_FATAL ? " fatal" : "",        \
 			    status & CXLMDEV_FW_HALT ? " firmware-halt" : "")
 
+struct cxl_dev_id {
+	struct cxl_dev_state *cxlds;
+};
+
+static int cxl_request_irq(struct cxl_dev_state *cxlds, int irq,
+			   irq_handler_t handler, irq_handler_t thread_fn)
+{
+	struct device *dev = cxlds->dev;
+	struct cxl_dev_id *dev_id;
+
+	/* dev_id must be globally unique and must contain the cxlds */
+	dev_id = devm_kzalloc(dev, sizeof(*dev_id), GFP_KERNEL);
+	if (!dev_id)
+		return -ENOMEM;
+	dev_id->cxlds = cxlds;
+
+	return devm_request_threaded_irq(dev, irq, handler, thread_fn,
+					 IRQF_SHARED | IRQF_ONESHOT,
+					 NULL, dev_id);
+}
+
+static bool cxl_mbox_background_complete(struct cxl_dev_state *cxlds)
+{
+	u64 reg;
+
+	reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
+	return FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK, reg) == 100;
+}
+
+static irqreturn_t cxl_pci_mbox_irq(int irq, void *id)
+{
+	u64 reg;
+	u16 opcode;
+	struct cxl_dev_id *dev_id = id;
+	struct cxl_dev_state *cxlds = dev_id->cxlds;
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
+
+	if (!cxl_mbox_background_complete(cxlds))
+		return IRQ_NONE;
+
+	reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
+	opcode = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg);
+	if (opcode == CXL_MBOX_OP_SANITIZE) {
+		if (mds->security.sanitize_node)
+			sysfs_notify_dirent(mds->security.sanitize_node);
+
+		dev_dbg(cxlds->dev, "Sanitization operation ended\n");
+	} else {
+		/* short-circuit the wait in __cxl_pci_mbox_send_cmd() */
+		rcuwait_wake_up(&mds->mbox_wait);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Sanitization operation polling mode.
+ */
+static void cxl_mbox_sanitize_work(struct work_struct *work)
+{
+	struct cxl_memdev_state *mds =
+		container_of(work, typeof(*mds), security.poll_dwork.work);
+	struct cxl_dev_state *cxlds = &mds->cxlds;
+
+	mutex_lock(&mds->mbox_mutex);
+	if (cxl_mbox_background_complete(cxlds)) {
+		mds->security.poll_tmo_secs = 0;
+		put_device(cxlds->dev);
+
+		if (mds->security.sanitize_node)
+			sysfs_notify_dirent(mds->security.sanitize_node);
+
+		dev_dbg(cxlds->dev, "Sanitization operation ended\n");
+	} else {
+		int timeout = mds->security.poll_tmo_secs + 10;
+
+		mds->security.poll_tmo_secs = min(15 * 60, timeout);
+		queue_delayed_work(system_wq, &mds->security.poll_dwork,
+				   timeout * HZ);
+	}
+	mutex_unlock(&mds->mbox_mutex);
+}
+
 /**
  * __cxl_pci_mbox_send_cmd() - Execute a mailbox command
- * @cxlds: The device state to communicate with.
+ * @mds: The memory device driver data
  * @mbox_cmd: Command to send to the memory device.
  *
  * Context: Any context. Expects mbox_mutex to be held.
@@ -106,16 +190,17 @@ static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds)
  * not need to coordinate with each other. The driver only uses the primary
  * mailbox.
  */
-static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
+static int __cxl_pci_mbox_send_cmd(struct cxl_memdev_state *mds,
 				   struct cxl_mbox_cmd *mbox_cmd)
 {
+	struct cxl_dev_state *cxlds = &mds->cxlds;
 	void __iomem *payload = cxlds->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET;
 	struct device *dev = cxlds->dev;
 	u64 cmd_reg, status_reg;
 	size_t out_len;
 	int rc;
 
-	lockdep_assert_held(&cxlds->mbox_mutex);
+	lockdep_assert_held(&mds->mbox_mutex);
 
 	/*
 	 * Here are the steps from 8.2.8.4 of the CXL 2.0 spec.
@@ -144,6 +229,16 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
 		return -EBUSY;
 	}
 
+	/*
+	 * With sanitize polling, hardware might be done and the poller still
+	 * not be in sync. Ensure no new command comes in until so. Keep the
+	 * hardware semantics and only allow device health status.
+	 */
+	if (mds->security.poll_tmo_secs > 0) {
+		if (mbox_cmd->opcode != CXL_MBOX_OP_GET_HEALTH_INFO)
+			return -EBUSY;
+	}
+
 	cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK,
 			     mbox_cmd->opcode);
 	if (mbox_cmd->size_in) {
@@ -177,12 +272,80 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
 	mbox_cmd->return_code =
 		FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg);
 
+	/*
+	 * Handle the background command in a synchronous manner.
+	 *
+	 * All other mailbox commands will serialize/queue on the mbox_mutex,
+	 * which we currently hold. Furthermore this also guarantees that
+	 * cxl_mbox_background_complete() checks are safe amongst each other,
+	 * in that no new bg operation can occur in between.
+	 *
+	 * Background operations are timesliced in accordance with the nature
+	 * of the command. In the event of timeout, the mailbox state is
+	 * indeterminate until the next successful command submission and the
+	 * driver can get back in sync with the hardware state.
+	 */
+	if (mbox_cmd->return_code == CXL_MBOX_CMD_RC_BACKGROUND) {
+		u64 bg_status_reg;
+		int i, timeout;
+
+		/*
+		 * Sanitization is a special case which monopolizes the device
+		 * and cannot be timesliced. Handle asynchronously instead,
+		 * and allow userspace to poll(2) for completion.
+		 */
+		if (mbox_cmd->opcode == CXL_MBOX_OP_SANITIZE) {
+			if (mds->security.poll_tmo_secs != -1) {
+				/* hold the device throughout */
+				get_device(cxlds->dev);
+
+				/* give first timeout a second */
+				timeout = 1;
+				mds->security.poll_tmo_secs = timeout;
+				queue_delayed_work(system_wq,
+						   &mds->security.poll_dwork,
+						   timeout * HZ);
+			}
+
+			dev_dbg(dev, "Sanitization operation started\n");
+			goto success;
+		}
+
+		dev_dbg(dev, "Mailbox background operation (0x%04x) started\n",
+			mbox_cmd->opcode);
+
+		timeout = mbox_cmd->poll_interval_ms;
+		for (i = 0; i < mbox_cmd->poll_count; i++) {
+			if (rcuwait_wait_event_timeout(&mds->mbox_wait,
+				       cxl_mbox_background_complete(cxlds),
+				       TASK_UNINTERRUPTIBLE,
+				       msecs_to_jiffies(timeout)) > 0)
+				break;
+		}
+
+		if (!cxl_mbox_background_complete(cxlds)) {
+			dev_err(dev, "timeout waiting for background (%d ms)\n",
+				timeout * mbox_cmd->poll_count);
+			return -ETIMEDOUT;
+		}
+
+		bg_status_reg = readq(cxlds->regs.mbox +
+				      CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
+		mbox_cmd->return_code =
+			FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_RC_MASK,
+				  bg_status_reg);
+		dev_dbg(dev,
+			"Mailbox background operation (0x%04x) completed\n",
+			mbox_cmd->opcode);
+	}
+
 	if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) {
 		dev_dbg(dev, "Mailbox operation had an error: %s\n",
 			cxl_mbox_cmd_rc2str(mbox_cmd));
 		return 0; /* completed but caller must check return_code */
 	}
 
+success:
 	/* #7 */
 	cmd_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
 	out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg);
@@ -196,8 +359,9 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
 		 * have requested less data than the hardware supplied even
 		 * within spec.
 		 */
-		size_t n = min3(mbox_cmd->size_out, cxlds->payload_size, out_len);
+		size_t n;
 
+		n = min3(mbox_cmd->size_out, mds->payload_size, out_len);
 		memcpy_fromio(mbox_cmd->payload_out, payload, n);
 		mbox_cmd->size_out = n;
 	} else {
@@ -207,20 +371,23 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
 	return 0;
 }
 
-static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
+static int cxl_pci_mbox_send(struct cxl_memdev_state *mds,
+			     struct cxl_mbox_cmd *cmd)
 {
 	int rc;
 
-	mutex_lock_io(&cxlds->mbox_mutex);
-	rc = __cxl_pci_mbox_send_cmd(cxlds, cmd);
-	mutex_unlock(&cxlds->mbox_mutex);
+	mutex_lock_io(&mds->mbox_mutex);
+	rc = __cxl_pci_mbox_send_cmd(mds, cmd);
+	mutex_unlock(&mds->mbox_mutex);
 
 	return rc;
 }
 
-static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
+static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds)
 {
+	struct cxl_dev_state *cxlds = &mds->cxlds;
 	const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
+	struct device *dev = cxlds->dev;
 	unsigned long timeout;
 	u64 md_status;
 
@@ -234,8 +401,7 @@ static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
 	} while (!time_after(jiffies, timeout));
 
 	if (!(md_status & CXLMDEV_MBOX_IF_READY)) {
-		cxl_err(cxlds->dev, md_status,
-			"timeout awaiting mailbox ready");
+		cxl_err(dev, md_status, "timeout awaiting mailbox ready");
 		return -ETIMEDOUT;
 	}
 
@@ -246,12 +412,12 @@ static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
 	 * source for future doorbell busy events.
 	 */
 	if (cxl_pci_mbox_wait_for_doorbell(cxlds) != 0) {
-		cxl_err(cxlds->dev, md_status, "timeout awaiting mailbox idle");
+		cxl_err(dev, md_status, "timeout awaiting mailbox idle");
 		return -ETIMEDOUT;
 	}
 
-	cxlds->mbox_send = cxl_pci_mbox_send;
-	cxlds->payload_size =
+	mds->mbox_send = cxl_pci_mbox_send;
+	mds->payload_size =
 		1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap);
 
 	/*
@@ -261,16 +427,43 @@ static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
 	 * there's no point in going forward. If the size is too large, there's
 	 * no harm is soft limiting it.
 	 */
-	cxlds->payload_size = min_t(size_t, cxlds->payload_size, SZ_1M);
-	if (cxlds->payload_size < 256) {
-		dev_err(cxlds->dev, "Mailbox is too small (%zub)",
-			cxlds->payload_size);
+	mds->payload_size = min_t(size_t, mds->payload_size, SZ_1M);
+	if (mds->payload_size < 256) {
+		dev_err(dev, "Mailbox is too small (%zub)",
+			mds->payload_size);
 		return -ENXIO;
 	}
 
-	dev_dbg(cxlds->dev, "Mailbox payload sized %zu",
-		cxlds->payload_size);
+	dev_dbg(dev, "Mailbox payload sized %zu", mds->payload_size);
+
+	rcuwait_init(&mds->mbox_wait);
+
+	if (cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ) {
+		u32 ctrl;
+		int irq, msgnum;
+		struct pci_dev *pdev = to_pci_dev(cxlds->dev);
+
+		msgnum = FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap);
+		irq = pci_irq_vector(pdev, msgnum);
+		if (irq < 0)
+			goto mbox_poll;
+
+		if (cxl_request_irq(cxlds, irq, cxl_pci_mbox_irq, NULL))
+			goto mbox_poll;
+
+		/* enable background command mbox irq support */
+		ctrl = readl(cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
+		ctrl |= CXLDEV_MBOX_CTRL_BG_CMD_IRQ;
+		writel(ctrl, cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
+
+		return 0;
+	}
+
+mbox_poll:
+	mds->security.poll = true;
+	INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mbox_sanitize_work);
 
+	dev_dbg(cxlds->dev, "Mailbox interrupts are unsupported");
 	return 0;
 }
 
@@ -334,19 +527,6 @@ static int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
 	return cxl_setup_regs(map);
 }
 
-/*
- * CXL v3.0 6.2.3 Table 6-4
- * The table indicates that if PCIe Flit Mode is set, then CXL is in 256B flits
- * mode, otherwise it's 68B flits mode.
- */
-static bool cxl_pci_flit_256(struct pci_dev *pdev)
-{
-	u16 lnksta2;
-
-	pcie_capability_read_word(pdev, PCI_EXP_LNKSTA2, &lnksta2);
-	return lnksta2 & PCI_EXP_LNKSTA2_FLIT;
-}
-
 static int cxl_pci_ras_unmask(struct pci_dev *pdev)
 {
 	struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus);
@@ -373,9 +553,8 @@ static int cxl_pci_ras_unmask(struct pci_dev *pdev)
 		addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_MASK_OFFSET;
 		orig_val = readl(addr);
 
-		mask = CXL_RAS_UNCORRECTABLE_MASK_MASK;
-		if (!cxl_pci_flit_256(pdev))
-			mask &= ~CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK;
+		mask = CXL_RAS_UNCORRECTABLE_MASK_MASK |
+		       CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK;
 		val = orig_val & ~mask;
 		writel(val, addr);
 		dev_dbg(&pdev->dev,
@@ -402,18 +581,18 @@ static void free_event_buf(void *buf)
 
 /*
  * There is a single buffer for reading event logs from the mailbox.  All logs
- * share this buffer protected by the cxlds->event_log_lock.
+ * share this buffer protected by the mds->event_log_lock.
  */
-static int cxl_mem_alloc_event_buf(struct cxl_dev_state *cxlds)
+static int cxl_mem_alloc_event_buf(struct cxl_memdev_state *mds)
 {
 	struct cxl_get_event_payload *buf;
 
-	buf = kvmalloc(cxlds->payload_size, GFP_KERNEL);
+	buf = kvmalloc(mds->payload_size, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
-	cxlds->event.buf = buf;
+	mds->event.buf = buf;
 
-	return devm_add_action_or_reset(cxlds->dev, free_event_buf, buf);
+	return devm_add_action_or_reset(mds->cxlds.dev, free_event_buf, buf);
 }
 
 static int cxl_alloc_irq_vectors(struct pci_dev *pdev)
@@ -438,14 +617,11 @@ static int cxl_alloc_irq_vectors(struct pci_dev *pdev)
 	return 0;
 }
 
-struct cxl_dev_id {
-	struct cxl_dev_state *cxlds;
-};
-
 static irqreturn_t cxl_event_thread(int irq, void *id)
 {
 	struct cxl_dev_id *dev_id = id;
 	struct cxl_dev_state *cxlds = dev_id->cxlds;
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
 	u32 status;
 
 	do {
@@ -458,7 +634,7 @@ static irqreturn_t cxl_event_thread(int irq, void *id)
 		status &= CXLDEV_EVENT_STATUS_ALL;
 		if (!status)
 			break;
-		cxl_mem_get_event_records(cxlds, status);
+		cxl_mem_get_event_records(mds, status);
 		cond_resched();
 	} while (status);
 
@@ -467,31 +643,21 @@ static irqreturn_t cxl_event_thread(int irq, void *id)
 
 static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting)
 {
-	struct device *dev = cxlds->dev;
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct cxl_dev_id *dev_id;
+	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
 	int irq;
 
 	if (FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting) != CXL_INT_MSI_MSIX)
 		return -ENXIO;
 
-	/* dev_id must be globally unique and must contain the cxlds */
-	dev_id = devm_kzalloc(dev, sizeof(*dev_id), GFP_KERNEL);
-	if (!dev_id)
-		return -ENOMEM;
-	dev_id->cxlds = cxlds;
-
 	irq =  pci_irq_vector(pdev,
 			      FIELD_GET(CXLDEV_EVENT_INT_MSGNUM_MASK, setting));
 	if (irq < 0)
 		return irq;
 
-	return devm_request_threaded_irq(dev, irq, NULL, cxl_event_thread,
-					 IRQF_SHARED | IRQF_ONESHOT, NULL,
-					 dev_id);
+	return cxl_request_irq(cxlds, irq, NULL, cxl_event_thread);
 }
 
-static int cxl_event_get_int_policy(struct cxl_dev_state *cxlds,
+static int cxl_event_get_int_policy(struct cxl_memdev_state *mds,
 				    struct cxl_event_interrupt_policy *policy)
 {
 	struct cxl_mbox_cmd mbox_cmd = {
@@ -501,15 +667,15 @@ static int cxl_event_get_int_policy(struct cxl_dev_state *cxlds,
 	};
 	int rc;
 
-	rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
 	if (rc < 0)
-		dev_err(cxlds->dev, "Failed to get event interrupt policy : %d",
-			rc);
+		dev_err(mds->cxlds.dev,
+			"Failed to get event interrupt policy : %d", rc);
 
 	return rc;
 }
 
-static int cxl_event_config_msgnums(struct cxl_dev_state *cxlds,
+static int cxl_event_config_msgnums(struct cxl_memdev_state *mds,
 				    struct cxl_event_interrupt_policy *policy)
 {
 	struct cxl_mbox_cmd mbox_cmd;
@@ -528,23 +694,24 @@ static int cxl_event_config_msgnums(struct cxl_dev_state *cxlds,
 		.size_in = sizeof(*policy),
 	};
 
-	rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
 	if (rc < 0) {
-		dev_err(cxlds->dev, "Failed to set event interrupt policy : %d",
+		dev_err(mds->cxlds.dev, "Failed to set event interrupt policy : %d",
 			rc);
 		return rc;
 	}
 
 	/* Retrieve final interrupt settings */
-	return cxl_event_get_int_policy(cxlds, policy);
+	return cxl_event_get_int_policy(mds, policy);
 }
 
-static int cxl_event_irqsetup(struct cxl_dev_state *cxlds)
+static int cxl_event_irqsetup(struct cxl_memdev_state *mds)
 {
+	struct cxl_dev_state *cxlds = &mds->cxlds;
 	struct cxl_event_interrupt_policy policy;
 	int rc;
 
-	rc = cxl_event_config_msgnums(cxlds, &policy);
+	rc = cxl_event_config_msgnums(mds, &policy);
 	if (rc)
 		return rc;
 
@@ -583,7 +750,7 @@ static bool cxl_event_int_is_fw(u8 setting)
 }
 
 static int cxl_event_config(struct pci_host_bridge *host_bridge,
-			    struct cxl_dev_state *cxlds)
+			    struct cxl_memdev_state *mds)
 {
 	struct cxl_event_interrupt_policy policy;
 	int rc;
@@ -595,11 +762,11 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge,
 	if (!host_bridge->native_cxl_error)
 		return 0;
 
-	rc = cxl_mem_alloc_event_buf(cxlds);
+	rc = cxl_mem_alloc_event_buf(mds);
 	if (rc)
 		return rc;
 
-	rc = cxl_event_get_int_policy(cxlds, &policy);
+	rc = cxl_event_get_int_policy(mds, &policy);
 	if (rc)
 		return rc;
 
@@ -607,15 +774,16 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge,
 	    cxl_event_int_is_fw(policy.warn_settings) ||
 	    cxl_event_int_is_fw(policy.failure_settings) ||
 	    cxl_event_int_is_fw(policy.fatal_settings)) {
-		dev_err(cxlds->dev, "FW still in control of Event Logs despite _OSC settings\n");
+		dev_err(mds->cxlds.dev,
+			"FW still in control of Event Logs despite _OSC settings\n");
 		return -EBUSY;
 	}
 
-	rc = cxl_event_irqsetup(cxlds);
+	rc = cxl_event_irqsetup(mds);
 	if (rc)
 		return rc;
 
-	cxl_mem_get_event_records(cxlds, CXLDEV_EVENT_STATUS_ALL);
+	cxl_mem_get_event_records(mds, CXLDEV_EVENT_STATUS_ALL);
 
 	return 0;
 }
@@ -623,10 +791,11 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge,
 static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus);
+	struct cxl_memdev_state *mds;
+	struct cxl_dev_state *cxlds;
 	struct cxl_register_map map;
 	struct cxl_memdev *cxlmd;
-	struct cxl_dev_state *cxlds;
-	int rc;
+	int i, rc, pmu_count;
 
 	/*
 	 * Double check the anonymous union trickery in struct cxl_regs
@@ -640,9 +809,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		return rc;
 	pci_set_master(pdev);
 
-	cxlds = cxl_dev_state_create(&pdev->dev);
-	if (IS_ERR(cxlds))
-		return PTR_ERR(cxlds);
+	mds = cxl_memdev_state_create(&pdev->dev);
+	if (IS_ERR(mds))
+		return PTR_ERR(mds);
+	cxlds = &mds->cxlds;
 	pci_set_drvdata(pdev, cxlds);
 
 	cxlds->rcd = is_cxl_restricted(pdev);
@@ -685,31 +855,31 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	else
 		dev_warn(&pdev->dev, "Media not active (%d)\n", rc);
 
-	rc = cxl_pci_setup_mailbox(cxlds);
+	rc = cxl_alloc_irq_vectors(pdev);
 	if (rc)
 		return rc;
 
-	rc = cxl_enumerate_cmds(cxlds);
+	rc = cxl_pci_setup_mailbox(mds);
 	if (rc)
 		return rc;
 
-	rc = cxl_set_timestamp(cxlds);
+	rc = cxl_enumerate_cmds(mds);
 	if (rc)
 		return rc;
 
-	rc = cxl_poison_state_init(cxlds);
+	rc = cxl_set_timestamp(mds);
 	if (rc)
 		return rc;
 
-	rc = cxl_dev_state_identify(cxlds);
+	rc = cxl_poison_state_init(mds);
 	if (rc)
 		return rc;
 
-	rc = cxl_mem_create_range_info(cxlds);
+	rc = cxl_dev_state_identify(mds);
 	if (rc)
 		return rc;
 
-	rc = cxl_alloc_irq_vectors(pdev);
+	rc = cxl_mem_create_range_info(mds);
 	if (rc)
 		return rc;
 
@@ -717,7 +887,34 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (IS_ERR(cxlmd))
 		return PTR_ERR(cxlmd);
 
-	rc = cxl_event_config(host_bridge, cxlds);
+	rc = cxl_memdev_setup_fw_upload(mds);
+	if (rc)
+		return rc;
+
+	pmu_count = cxl_count_regblock(pdev, CXL_REGLOC_RBI_PMU);
+	for (i = 0; i < pmu_count; i++) {
+		struct cxl_pmu_regs pmu_regs;
+
+		rc = cxl_find_regblock_instance(pdev, CXL_REGLOC_RBI_PMU, &map, i);
+		if (rc) {
+			dev_dbg(&pdev->dev, "Could not find PMU regblock\n");
+			break;
+		}
+
+		rc = cxl_map_pmu_regs(pdev, &pmu_regs, &map);
+		if (rc) {
+			dev_dbg(&pdev->dev, "Could not map PMU regs\n");
+			break;
+		}
+
+		rc = devm_cxl_pmu_add(cxlds->dev, &pmu_regs, cxlmd->id, i, CXL_PMU_MEMDEV);
+		if (rc) {
+			dev_dbg(&pdev->dev, "Could not add PMU instance\n");
+			break;
+		}
+	}
+
+	rc = cxl_event_config(host_bridge, mds);
 	if (rc)
 		return rc;
 
diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
index 71cfa1fdf902..7cb8994f8809 100644
--- a/drivers/cxl/pmem.c
+++ b/drivers/cxl/pmem.c
@@ -15,9 +15,9 @@ extern const struct nvdimm_security_ops *cxl_security_ops;
 
 static __read_mostly DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
 
-static void clear_exclusive(void *cxlds)
+static void clear_exclusive(void *mds)
 {
-	clear_exclusive_cxl_commands(cxlds, exclusive_cmds);
+	clear_exclusive_cxl_commands(mds, exclusive_cmds);
 }
 
 static void unregister_nvdimm(void *nvdimm)
@@ -65,13 +65,13 @@ static int cxl_nvdimm_probe(struct device *dev)
 	struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev);
 	struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
 	struct cxl_nvdimm_bridge *cxl_nvb = cxlmd->cxl_nvb;
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
 	unsigned long flags = 0, cmd_mask = 0;
-	struct cxl_dev_state *cxlds = cxlmd->cxlds;
 	struct nvdimm *nvdimm;
 	int rc;
 
-	set_exclusive_cxl_commands(cxlds, exclusive_cmds);
-	rc = devm_add_action_or_reset(dev, clear_exclusive, cxlds);
+	set_exclusive_cxl_commands(mds, exclusive_cmds);
+	rc = devm_add_action_or_reset(dev, clear_exclusive, mds);
 	if (rc)
 		return rc;
 
@@ -100,22 +100,23 @@ static struct cxl_driver cxl_nvdimm_driver = {
 	},
 };
 
-static int cxl_pmem_get_config_size(struct cxl_dev_state *cxlds,
+static int cxl_pmem_get_config_size(struct cxl_memdev_state *mds,
 				    struct nd_cmd_get_config_size *cmd,
 				    unsigned int buf_len)
 {
 	if (sizeof(*cmd) > buf_len)
 		return -EINVAL;
 
-	*cmd = (struct nd_cmd_get_config_size) {
-		 .config_size = cxlds->lsa_size,
-		 .max_xfer = cxlds->payload_size - sizeof(struct cxl_mbox_set_lsa),
+	*cmd = (struct nd_cmd_get_config_size){
+		.config_size = mds->lsa_size,
+		.max_xfer =
+			mds->payload_size - sizeof(struct cxl_mbox_set_lsa),
 	};
 
 	return 0;
 }
 
-static int cxl_pmem_get_config_data(struct cxl_dev_state *cxlds,
+static int cxl_pmem_get_config_data(struct cxl_memdev_state *mds,
 				    struct nd_cmd_get_config_data_hdr *cmd,
 				    unsigned int buf_len)
 {
@@ -140,13 +141,13 @@ static int cxl_pmem_get_config_data(struct cxl_dev_state *cxlds,
 		.payload_out = cmd->out_buf,
 	};
 
-	rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
 	cmd->status = 0;
 
 	return rc;
 }
 
-static int cxl_pmem_set_config_data(struct cxl_dev_state *cxlds,
+static int cxl_pmem_set_config_data(struct cxl_memdev_state *mds,
 				    struct nd_cmd_set_config_hdr *cmd,
 				    unsigned int buf_len)
 {
@@ -176,7 +177,7 @@ static int cxl_pmem_set_config_data(struct cxl_dev_state *cxlds,
 		.size_in = struct_size(set_lsa, data, cmd->in_length),
 	};
 
-	rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
 
 	/*
 	 * Set "firmware" status (4-packed bytes at the end of the input
@@ -194,18 +195,18 @@ static int cxl_pmem_nvdimm_ctl(struct nvdimm *nvdimm, unsigned int cmd,
 	struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
 	unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm);
 	struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
-	struct cxl_dev_state *cxlds = cxlmd->cxlds;
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
 
 	if (!test_bit(cmd, &cmd_mask))
 		return -ENOTTY;
 
 	switch (cmd) {
 	case ND_CMD_GET_CONFIG_SIZE:
-		return cxl_pmem_get_config_size(cxlds, buf, buf_len);
+		return cxl_pmem_get_config_size(mds, buf, buf_len);
 	case ND_CMD_GET_CONFIG_DATA:
-		return cxl_pmem_get_config_data(cxlds, buf, buf_len);
+		return cxl_pmem_get_config_data(mds, buf, buf_len);
 	case ND_CMD_SET_CONFIG_DATA:
-		return cxl_pmem_set_config_data(cxlds, buf, buf_len);
+		return cxl_pmem_set_config_data(mds, buf, buf_len);
 	default:
 		return -ENOTTY;
 	}
diff --git a/drivers/cxl/pmu.h b/drivers/cxl/pmu.h
new file mode 100644
index 000000000000..b1e9bcd9f28c
--- /dev/null
+++ b/drivers/cxl/pmu.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright(c) 2023 Huawei
+ * CXL Specification rev 3.0 Setion 8.2.7 (CPMU Register Interface)
+ */
+#ifndef CXL_PMU_H
+#define CXL_PMU_H
+#include <linux/device.h>
+
+enum cxl_pmu_type {
+	CXL_PMU_MEMDEV,
+};
+
+#define CXL_PMU_REGMAP_SIZE 0xe00 /* Table 8-32 CXL 3.0 specification */
+struct cxl_pmu {
+	struct device dev;
+	void __iomem *base;
+	int assoc_id;
+	int index;
+	enum cxl_pmu_type type;
+};
+
+#define to_cxl_pmu(dev) container_of(dev, struct cxl_pmu, dev)
+struct cxl_pmu_regs;
+int devm_cxl_pmu_add(struct device *parent, struct cxl_pmu_regs *regs,
+		     int assoc_id, int idx, enum cxl_pmu_type type);
+
+#endif
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index 01e84ea54f56..6240e05b9542 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -60,17 +60,13 @@ static int discover_region(struct device *dev, void *root)
 static int cxl_switch_port_probe(struct cxl_port *port)
 {
 	struct cxl_hdm *cxlhdm;
-	int rc, nr_dports;
-
-	nr_dports = devm_cxl_port_enumerate_dports(port);
-	if (nr_dports < 0)
-		return nr_dports;
+	int rc;
 
-	cxlhdm = devm_cxl_setup_hdm(port, NULL);
-	rc = devm_cxl_enable_hdm(port, cxlhdm);
-	if (rc)
+	rc = devm_cxl_port_enumerate_dports(port);
+	if (rc < 0)
 		return rc;
 
+	cxlhdm = devm_cxl_setup_hdm(port, NULL);
 	if (!IS_ERR(cxlhdm))
 		return devm_cxl_enumerate_decoders(cxlhdm, NULL);
 
@@ -79,7 +75,7 @@ static int cxl_switch_port_probe(struct cxl_port *port)
 		return PTR_ERR(cxlhdm);
 	}
 
-	if (nr_dports == 1) {
+	if (rc == 1) {
 		dev_dbg(&port->dev, "Fallback to passthrough decoder\n");
 		return devm_cxl_add_passthrough_decoder(port);
 	}
diff --git a/drivers/cxl/security.c b/drivers/cxl/security.c
index 4ad4bda2d18e..21856a3f408e 100644
--- a/drivers/cxl/security.c
+++ b/drivers/cxl/security.c
@@ -14,7 +14,7 @@ static unsigned long cxl_pmem_get_security_flags(struct nvdimm *nvdimm,
 {
 	struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
 	struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
-	struct cxl_dev_state *cxlds = cxlmd->cxlds;
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
 	unsigned long security_flags = 0;
 	struct cxl_get_security_output {
 		__le32 flags;
@@ -29,11 +29,14 @@ static unsigned long cxl_pmem_get_security_flags(struct nvdimm *nvdimm,
 		.payload_out = &out,
 	};
 
-	rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
 	if (rc < 0)
 		return 0;
 
 	sec_out = le32_to_cpu(out.flags);
+	/* cache security state */
+	mds->security.state = sec_out;
+
 	if (ptype == NVDIMM_MASTER) {
 		if (sec_out & CXL_PMEM_SEC_STATE_MASTER_PASS_SET)
 			set_bit(NVDIMM_SECURITY_UNLOCKED, &security_flags);
@@ -67,7 +70,7 @@ static int cxl_pmem_security_change_key(struct nvdimm *nvdimm,
 {
 	struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
 	struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
-	struct cxl_dev_state *cxlds = cxlmd->cxlds;
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
 	struct cxl_mbox_cmd mbox_cmd;
 	struct cxl_set_pass set_pass;
 
@@ -84,7 +87,7 @@ static int cxl_pmem_security_change_key(struct nvdimm *nvdimm,
 		.payload_in = &set_pass,
 	};
 
-	return cxl_internal_send_cmd(cxlds, &mbox_cmd);
+	return cxl_internal_send_cmd(mds, &mbox_cmd);
 }
 
 static int __cxl_pmem_security_disable(struct nvdimm *nvdimm,
@@ -93,7 +96,7 @@ static int __cxl_pmem_security_disable(struct nvdimm *nvdimm,
 {
 	struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
 	struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
-	struct cxl_dev_state *cxlds = cxlmd->cxlds;
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
 	struct cxl_disable_pass dis_pass;
 	struct cxl_mbox_cmd mbox_cmd;
 
@@ -109,7 +112,7 @@ static int __cxl_pmem_security_disable(struct nvdimm *nvdimm,
 		.payload_in = &dis_pass,
 	};
 
-	return cxl_internal_send_cmd(cxlds, &mbox_cmd);
+	return cxl_internal_send_cmd(mds, &mbox_cmd);
 }
 
 static int cxl_pmem_security_disable(struct nvdimm *nvdimm,
@@ -128,12 +131,12 @@ static int cxl_pmem_security_freeze(struct nvdimm *nvdimm)
 {
 	struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
 	struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
-	struct cxl_dev_state *cxlds = cxlmd->cxlds;
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
 	struct cxl_mbox_cmd mbox_cmd = {
 		.opcode = CXL_MBOX_OP_FREEZE_SECURITY,
 	};
 
-	return cxl_internal_send_cmd(cxlds, &mbox_cmd);
+	return cxl_internal_send_cmd(mds, &mbox_cmd);
 }
 
 static int cxl_pmem_security_unlock(struct nvdimm *nvdimm,
@@ -141,7 +144,7 @@ static int cxl_pmem_security_unlock(struct nvdimm *nvdimm,
 {
 	struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
 	struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
-	struct cxl_dev_state *cxlds = cxlmd->cxlds;
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
 	u8 pass[NVDIMM_PASSPHRASE_LEN];
 	struct cxl_mbox_cmd mbox_cmd;
 	int rc;
@@ -153,7 +156,7 @@ static int cxl_pmem_security_unlock(struct nvdimm *nvdimm,
 		.payload_in = pass,
 	};
 
-	rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
 	if (rc < 0)
 		return rc;
 
@@ -166,7 +169,7 @@ static int cxl_pmem_security_passphrase_erase(struct nvdimm *nvdimm,
 {
 	struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
 	struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
-	struct cxl_dev_state *cxlds = cxlmd->cxlds;
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
 	struct cxl_mbox_cmd mbox_cmd;
 	struct cxl_pass_erase erase;
 	int rc;
@@ -182,7 +185,7 @@ static int cxl_pmem_security_passphrase_erase(struct nvdimm *nvdimm,
 		.payload_in = &erase,
 	};
 
-	rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
 	if (rc < 0)
 		return rc;
 
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 711f82400086..2d2dd400fed2 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -213,4 +213,17 @@ source "drivers/perf/arm_cspmu/Kconfig"
 
 source "drivers/perf/amlogic/Kconfig"
 
+config CXL_PMU
+	tristate "CXL Performance Monitoring Unit"
+	depends on CXL_BUS
+	help
+	  Support performance monitoring as defined in CXL rev 3.0
+	  section 13.2: Performance Monitoring. CXL components may have
+	  one or more CXL Performance Monitoring Units (CPMUs).
+
+	  Say 'y/m' to enable a driver that will attach to performance
+	  monitoring units and provide standard perf based interfaces.
+
+	  If unsure say 'm'.
+
 endmenu
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index dabc859540ce..f1d7ce9da275 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -24,3 +24,4 @@ obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o
 obj-$(CONFIG_ALIBABA_UNCORE_DRW_PMU) += alibaba_uncore_drw_pmu.o
 obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu/
 obj-$(CONFIG_MESON_DDR_PMU) += amlogic/
+obj-$(CONFIG_CXL_PMU) += cxl_pmu.o
diff --git a/drivers/perf/cxl_pmu.c b/drivers/perf/cxl_pmu.c
new file mode 100644
index 000000000000..0a8f597e695b
--- /dev/null
+++ b/drivers/perf/cxl_pmu.c
@@ -0,0 +1,990 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright(c) 2023 Huawei
+ *
+ * The CXL 3.0 specification includes a standard Performance Monitoring Unit,
+ * called the CXL PMU, or CPMU. In order to allow a high degree of
+ * implementation flexibility the specification provides a wide range of
+ * options all of which are self describing.
+ *
+ * Details in CXL rev 3.0 section 8.2.7 CPMU Register Interface
+ */
+
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/perf_event.h>
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/bits.h>
+#include <linux/list.h>
+#include <linux/bug.h>
+#include <linux/pci.h>
+
+#include "../cxl/cxlpci.h"
+#include "../cxl/cxl.h"
+#include "../cxl/pmu.h"
+
+#define CXL_PMU_CAP_REG			0x0
+#define   CXL_PMU_CAP_NUM_COUNTERS_MSK			GENMASK_ULL(4, 0)
+#define   CXL_PMU_CAP_COUNTER_WIDTH_MSK			GENMASK_ULL(15, 8)
+#define   CXL_PMU_CAP_NUM_EVN_CAP_REG_SUP_MSK		GENMASK_ULL(24, 20)
+#define   CXL_PMU_CAP_FILTERS_SUP_MSK			GENMASK_ULL(39, 32)
+#define     CXL_PMU_FILTER_HDM				BIT(0)
+#define     CXL_PMU_FILTER_CHAN_RANK_BANK		BIT(1)
+#define   CXL_PMU_CAP_MSI_N_MSK				GENMASK_ULL(47, 44)
+#define   CXL_PMU_CAP_WRITEABLE_WHEN_FROZEN		BIT_ULL(48)
+#define   CXL_PMU_CAP_FREEZE				BIT_ULL(49)
+#define   CXL_PMU_CAP_INT				BIT_ULL(50)
+#define   CXL_PMU_CAP_VERSION_MSK			GENMASK_ULL(63, 60)
+
+#define CXL_PMU_OVERFLOW_REG		0x10
+#define CXL_PMU_FREEZE_REG		0x18
+#define CXL_PMU_EVENT_CAP_REG(n)	(0x100 + 8 * (n))
+#define   CXL_PMU_EVENT_CAP_SUPPORTED_EVENTS_MSK	GENMASK_ULL(31, 0)
+#define   CXL_PMU_EVENT_CAP_GROUP_ID_MSK		GENMASK_ULL(47, 32)
+#define   CXL_PMU_EVENT_CAP_VENDOR_ID_MSK		GENMASK_ULL(63, 48)
+
+#define CXL_PMU_COUNTER_CFG_REG(n)	(0x200 + 8 * (n))
+#define   CXL_PMU_COUNTER_CFG_TYPE_MSK			GENMASK_ULL(1, 0)
+#define     CXL_PMU_COUNTER_CFG_TYPE_FREE_RUN		0
+#define     CXL_PMU_COUNTER_CFG_TYPE_FIXED_FUN		1
+#define     CXL_PMU_COUNTER_CFG_TYPE_CONFIGURABLE	2
+#define   CXL_PMU_COUNTER_CFG_ENABLE			BIT_ULL(8)
+#define   CXL_PMU_COUNTER_CFG_INT_ON_OVRFLW		BIT_ULL(9)
+#define   CXL_PMU_COUNTER_CFG_FREEZE_ON_OVRFLW		BIT_ULL(10)
+#define   CXL_PMU_COUNTER_CFG_EDGE			BIT_ULL(11)
+#define   CXL_PMU_COUNTER_CFG_INVERT			BIT_ULL(12)
+#define   CXL_PMU_COUNTER_CFG_THRESHOLD_MSK		GENMASK_ULL(23, 16)
+#define   CXL_PMU_COUNTER_CFG_EVENTS_MSK		GENMASK_ULL(55, 24)
+#define   CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK	GENMASK_ULL(63, 59)
+
+#define CXL_PMU_FILTER_CFG_REG(n, f)	(0x400 + 4 * ((f) + (n) * 8))
+#define   CXL_PMU_FILTER_CFG_VALUE_MSK			GENMASK(15, 0)
+
+#define CXL_PMU_COUNTER_REG(n)		(0xc00 + 8 * (n))
+
+/* CXL rev 3.0 Table 13-5 Events under CXL Vendor ID */
+#define CXL_PMU_GID_CLOCK_TICKS		0x00
+#define CXL_PMU_GID_D2H_REQ		0x0010
+#define CXL_PMU_GID_D2H_RSP		0x0011
+#define CXL_PMU_GID_H2D_REQ		0x0012
+#define CXL_PMU_GID_H2D_RSP		0x0013
+#define CXL_PMU_GID_CACHE_DATA		0x0014
+#define CXL_PMU_GID_M2S_REQ		0x0020
+#define CXL_PMU_GID_M2S_RWD		0x0021
+#define CXL_PMU_GID_M2S_BIRSP		0x0022
+#define CXL_PMU_GID_S2M_BISNP		0x0023
+#define CXL_PMU_GID_S2M_NDR		0x0024
+#define CXL_PMU_GID_S2M_DRS		0x0025
+#define CXL_PMU_GID_DDR			0x8000
+
+static int cxl_pmu_cpuhp_state_num;
+
+struct cxl_pmu_ev_cap {
+	u16 vid;
+	u16 gid;
+	u32 msk;
+	union {
+		int counter_idx; /* fixed counters */
+		int event_idx; /* configurable counters */
+	};
+	struct list_head node;
+};
+
+#define CXL_PMU_MAX_COUNTERS 64
+struct cxl_pmu_info {
+	struct pmu pmu;
+	void __iomem *base;
+	struct perf_event **hw_events;
+	struct list_head event_caps_configurable;
+	struct list_head event_caps_fixed;
+	DECLARE_BITMAP(used_counter_bm, CXL_PMU_MAX_COUNTERS);
+	DECLARE_BITMAP(conf_counter_bm, CXL_PMU_MAX_COUNTERS);
+	u16 counter_width;
+	u8 num_counters;
+	u8 num_event_capabilities;
+	int on_cpu;
+	struct hlist_node node;
+	bool filter_hdm;
+	int irq;
+};
+
+#define pmu_to_cxl_pmu_info(_pmu) container_of(_pmu, struct cxl_pmu_info, pmu)
+
+/*
+ * All CPMU counters are discoverable via the Event Capabilities Registers.
+ * Each Event Capability register contains a a VID / GroupID.
+ * A counter may then count any combination (by summing) of events in
+ * that group which are in the Supported Events Bitmask.
+ * However, there are some complexities to the scheme.
+ *  - Fixed function counters refer to an Event Capabilities register.
+ *    That event capability register is not then used for Configurable
+ *    counters.
+ */
+static int cxl_pmu_parse_caps(struct device *dev, struct cxl_pmu_info *info)
+{
+	unsigned long fixed_counter_event_cap_bm = 0;
+	void __iomem *base = info->base;
+	bool freeze_for_enable;
+	u64 val, eval;
+	int i;
+
+	val = readq(base + CXL_PMU_CAP_REG);
+	freeze_for_enable = FIELD_GET(CXL_PMU_CAP_WRITEABLE_WHEN_FROZEN, val) &&
+		FIELD_GET(CXL_PMU_CAP_FREEZE, val);
+	if (!freeze_for_enable) {
+		dev_err(dev, "Counters not writable while frozen\n");
+		return -ENODEV;
+	}
+
+	info->num_counters = FIELD_GET(CXL_PMU_CAP_NUM_COUNTERS_MSK, val) + 1;
+	info->counter_width = FIELD_GET(CXL_PMU_CAP_COUNTER_WIDTH_MSK, val);
+	info->num_event_capabilities = FIELD_GET(CXL_PMU_CAP_NUM_EVN_CAP_REG_SUP_MSK, val) + 1;
+
+	info->filter_hdm = FIELD_GET(CXL_PMU_CAP_FILTERS_SUP_MSK, val) & CXL_PMU_FILTER_HDM;
+	if (FIELD_GET(CXL_PMU_CAP_INT, val))
+		info->irq = FIELD_GET(CXL_PMU_CAP_MSI_N_MSK, val);
+	else
+		info->irq = -1;
+
+	/* First handle fixed function counters; note if configurable counters found */
+	for (i = 0; i < info->num_counters; i++) {
+		struct cxl_pmu_ev_cap *pmu_ev;
+		u32 events_msk;
+		u8 group_idx;
+
+		val = readq(base + CXL_PMU_COUNTER_CFG_REG(i));
+
+		if (FIELD_GET(CXL_PMU_COUNTER_CFG_TYPE_MSK, val) ==
+			CXL_PMU_COUNTER_CFG_TYPE_CONFIGURABLE) {
+			set_bit(i, info->conf_counter_bm);
+		}
+
+		if (FIELD_GET(CXL_PMU_COUNTER_CFG_TYPE_MSK, val) !=
+		    CXL_PMU_COUNTER_CFG_TYPE_FIXED_FUN)
+			continue;
+
+		/* In this case we know which fields are const */
+		group_idx = FIELD_GET(CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK, val);
+		events_msk = FIELD_GET(CXL_PMU_COUNTER_CFG_EVENTS_MSK, val);
+		eval = readq(base + CXL_PMU_EVENT_CAP_REG(group_idx));
+		pmu_ev = devm_kzalloc(dev, sizeof(*pmu_ev), GFP_KERNEL);
+		if (!pmu_ev)
+			return -ENOMEM;
+
+		pmu_ev->vid = FIELD_GET(CXL_PMU_EVENT_CAP_VENDOR_ID_MSK, eval);
+		pmu_ev->gid = FIELD_GET(CXL_PMU_EVENT_CAP_GROUP_ID_MSK, eval);
+		/* For a fixed purpose counter use the events mask from the counter CFG */
+		pmu_ev->msk = events_msk;
+		pmu_ev->counter_idx = i;
+		/* This list add is never unwound as all entries deleted on remove */
+		list_add(&pmu_ev->node, &info->event_caps_fixed);
+		/*
+		 * Configurable counters must not use an Event Capability registers that
+		 * is in use for a Fixed counter
+		 */
+		set_bit(group_idx, &fixed_counter_event_cap_bm);
+	}
+
+	if (!bitmap_empty(info->conf_counter_bm, CXL_PMU_MAX_COUNTERS)) {
+		struct cxl_pmu_ev_cap *pmu_ev;
+		int j;
+		/* Walk event capabilities unused by fixed counters */
+		for_each_clear_bit(j, &fixed_counter_event_cap_bm,
+				   info->num_event_capabilities) {
+			pmu_ev = devm_kzalloc(dev, sizeof(*pmu_ev), GFP_KERNEL);
+			if (!pmu_ev)
+				return -ENOMEM;
+
+			eval = readq(base + CXL_PMU_EVENT_CAP_REG(j));
+			pmu_ev->vid = FIELD_GET(CXL_PMU_EVENT_CAP_VENDOR_ID_MSK, eval);
+			pmu_ev->gid = FIELD_GET(CXL_PMU_EVENT_CAP_GROUP_ID_MSK, eval);
+			pmu_ev->msk = FIELD_GET(CXL_PMU_EVENT_CAP_SUPPORTED_EVENTS_MSK, eval);
+			pmu_ev->event_idx = j;
+			list_add(&pmu_ev->node, &info->event_caps_configurable);
+		}
+	}
+
+	return 0;
+}
+
+static ssize_t cxl_pmu_format_sysfs_show(struct device *dev,
+					 struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *eattr;
+
+	eattr = container_of(attr, struct dev_ext_attribute, attr);
+
+	return sysfs_emit(buf, "%s\n", (char *)eattr->var);
+}
+
+#define CXL_PMU_FORMAT_ATTR(_name, _format)\
+	(&((struct dev_ext_attribute[]) {					\
+		{								\
+			.attr = __ATTR(_name, 0444,				\
+				       cxl_pmu_format_sysfs_show, NULL),	\
+			.var = (void *)_format					\
+		}								\
+		})[0].attr.attr)
+
+enum {
+	cxl_pmu_mask_attr,
+	cxl_pmu_gid_attr,
+	cxl_pmu_vid_attr,
+	cxl_pmu_threshold_attr,
+	cxl_pmu_invert_attr,
+	cxl_pmu_edge_attr,
+	cxl_pmu_hdm_filter_en_attr,
+	cxl_pmu_hdm_attr,
+};
+
+static struct attribute *cxl_pmu_format_attr[] = {
+	[cxl_pmu_mask_attr] = CXL_PMU_FORMAT_ATTR(mask, "config:0-31"),
+	[cxl_pmu_gid_attr] = CXL_PMU_FORMAT_ATTR(gid, "config:32-47"),
+	[cxl_pmu_vid_attr] = CXL_PMU_FORMAT_ATTR(vid, "config:48-63"),
+	[cxl_pmu_threshold_attr] = CXL_PMU_FORMAT_ATTR(threshold, "config1:0-15"),
+	[cxl_pmu_invert_attr] = CXL_PMU_FORMAT_ATTR(invert, "config1:16"),
+	[cxl_pmu_edge_attr] = CXL_PMU_FORMAT_ATTR(edge, "config1:17"),
+	[cxl_pmu_hdm_filter_en_attr] = CXL_PMU_FORMAT_ATTR(hdm_filter_en, "config1:18"),
+	[cxl_pmu_hdm_attr] = CXL_PMU_FORMAT_ATTR(hdm, "config2:0-15"),
+	NULL
+};
+
+#define CXL_PMU_ATTR_CONFIG_MASK_MSK		GENMASK_ULL(31, 0)
+#define CXL_PMU_ATTR_CONFIG_GID_MSK		GENMASK_ULL(47, 32)
+#define CXL_PMU_ATTR_CONFIG_VID_MSK		GENMASK_ULL(63, 48)
+#define CXL_PMU_ATTR_CONFIG1_THRESHOLD_MSK	GENMASK_ULL(15, 0)
+#define CXL_PMU_ATTR_CONFIG1_INVERT_MSK		BIT(16)
+#define CXL_PMU_ATTR_CONFIG1_EDGE_MSK		BIT(17)
+#define CXL_PMU_ATTR_CONFIG1_FILTER_EN_MSK	BIT(18)
+#define CXL_PMU_ATTR_CONFIG2_HDM_MSK		GENMASK(15, 0)
+
+static umode_t cxl_pmu_format_is_visible(struct kobject *kobj,
+					 struct attribute *attr, int a)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct cxl_pmu_info *info = dev_get_drvdata(dev);
+
+	/*
+	 * Filter capability at the CPMU level, so hide the attributes if the particular
+	 * filter is not supported.
+	 */
+	if (!info->filter_hdm &&
+	    (attr == cxl_pmu_format_attr[cxl_pmu_hdm_filter_en_attr] ||
+	     attr == cxl_pmu_format_attr[cxl_pmu_hdm_attr]))
+		return 0;
+
+	return attr->mode;
+}
+
+static const struct attribute_group cxl_pmu_format_group = {
+	.name = "format",
+	.attrs = cxl_pmu_format_attr,
+	.is_visible = cxl_pmu_format_is_visible,
+};
+
+static u32 cxl_pmu_config_get_mask(struct perf_event *event)
+{
+	return FIELD_GET(CXL_PMU_ATTR_CONFIG_MASK_MSK, event->attr.config);
+}
+
+static u16 cxl_pmu_config_get_gid(struct perf_event *event)
+{
+	return FIELD_GET(CXL_PMU_ATTR_CONFIG_GID_MSK, event->attr.config);
+}
+
+static u16 cxl_pmu_config_get_vid(struct perf_event *event)
+{
+	return FIELD_GET(CXL_PMU_ATTR_CONFIG_VID_MSK, event->attr.config);
+}
+
+static u8 cxl_pmu_config1_get_threshold(struct perf_event *event)
+{
+	return FIELD_GET(CXL_PMU_ATTR_CONFIG1_THRESHOLD_MSK, event->attr.config1);
+}
+
+static bool cxl_pmu_config1_get_invert(struct perf_event *event)
+{
+	return FIELD_GET(CXL_PMU_ATTR_CONFIG1_INVERT_MSK, event->attr.config1);
+}
+
+static bool cxl_pmu_config1_get_edge(struct perf_event *event)
+{
+	return FIELD_GET(CXL_PMU_ATTR_CONFIG1_EDGE_MSK, event->attr.config1);
+}
+
+/*
+ * CPMU specification allows for 8 filters, each with a 16 bit value...
+ * So we need to find 8x16bits to store it in.
+ * As the value used for disable is 0xffff, a separate enable switch
+ * is needed.
+ */
+
+static bool cxl_pmu_config1_hdm_filter_en(struct perf_event *event)
+{
+	return FIELD_GET(CXL_PMU_ATTR_CONFIG1_FILTER_EN_MSK, event->attr.config1);
+}
+
+static u16 cxl_pmu_config2_get_hdm_decoder(struct perf_event *event)
+{
+	return FIELD_GET(CXL_PMU_ATTR_CONFIG2_HDM_MSK, event->attr.config2);
+}
+
+static ssize_t cxl_pmu_event_sysfs_show(struct device *dev,
+					struct device_attribute *attr, char *buf)
+{
+	struct perf_pmu_events_attr *pmu_attr =
+		container_of(attr, struct perf_pmu_events_attr, attr);
+
+	return sysfs_emit(buf, "config=%#llx\n", pmu_attr->id);
+}
+
+#define CXL_PMU_EVENT_ATTR(_name, _vid, _gid, _msk)			\
+	PMU_EVENT_ATTR_ID(_name, cxl_pmu_event_sysfs_show,		\
+			  ((u64)(_vid) << 48) | ((u64)(_gid) << 32) | (u64)(_msk))
+
+/* For CXL spec defined events */
+#define CXL_PMU_EVENT_CXL_ATTR(_name, _gid, _msk)			\
+	CXL_PMU_EVENT_ATTR(_name, PCI_DVSEC_VENDOR_ID_CXL, _gid, _msk)
+
+static struct attribute *cxl_pmu_event_attrs[] = {
+	CXL_PMU_EVENT_CXL_ATTR(clock_ticks,			CXL_PMU_GID_CLOCK_TICKS, BIT(0)),
+	/* CXL rev 3.0 Table 3-17 - Device to Host Requests */
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdcurr,			CXL_PMU_GID_D2H_REQ, BIT(1)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdown,			CXL_PMU_GID_D2H_REQ, BIT(2)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdshared,		CXL_PMU_GID_D2H_REQ, BIT(3)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdany,			CXL_PMU_GID_D2H_REQ, BIT(4)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdownnodata,		CXL_PMU_GID_D2H_REQ, BIT(5)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_itomwr,			CXL_PMU_GID_D2H_REQ, BIT(6)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_wrcurr,			CXL_PMU_GID_D2H_REQ, BIT(7)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_clflush,			CXL_PMU_GID_D2H_REQ, BIT(8)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_cleanevict,		CXL_PMU_GID_D2H_REQ, BIT(9)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_dirtyevict,		CXL_PMU_GID_D2H_REQ, BIT(10)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_cleanevictnodata,	CXL_PMU_GID_D2H_REQ, BIT(11)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_wowrinv,			CXL_PMU_GID_D2H_REQ, BIT(12)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_wowrinvf,		CXL_PMU_GID_D2H_REQ, BIT(13)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_wrinv,			CXL_PMU_GID_D2H_REQ, BIT(14)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_cacheflushed,		CXL_PMU_GID_D2H_REQ, BIT(16)),
+	/* CXL rev 3.0 Table 3-20 - D2H Repsonse Encodings */
+	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspihiti,		CXL_PMU_GID_D2H_RSP, BIT(4)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspvhitv,		CXL_PMU_GID_D2H_RSP, BIT(6)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspihitse,		CXL_PMU_GID_D2H_RSP, BIT(5)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspshitse,		CXL_PMU_GID_D2H_RSP, BIT(1)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspsfwdm,		CXL_PMU_GID_D2H_RSP, BIT(7)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspifwdm,		CXL_PMU_GID_D2H_RSP, BIT(15)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspvfwdv,		CXL_PMU_GID_D2H_RSP, BIT(22)),
+	/* CXL rev 3.0 Table 3-21 - CXL.cache - Mapping of H2D Requests to D2H Responses */
+	CXL_PMU_EVENT_CXL_ATTR(h2d_req_snpdata,			CXL_PMU_GID_H2D_REQ, BIT(1)),
+	CXL_PMU_EVENT_CXL_ATTR(h2d_req_snpinv,			CXL_PMU_GID_H2D_REQ, BIT(2)),
+	CXL_PMU_EVENT_CXL_ATTR(h2d_req_snpcur,			CXL_PMU_GID_H2D_REQ, BIT(3)),
+	/* CXL rev 3.0 Table 3-22 - H2D Response Opcode Encodings */
+	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_writepull,		CXL_PMU_GID_H2D_RSP, BIT(1)),
+	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_go,			CXL_PMU_GID_H2D_RSP, BIT(4)),
+	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_gowritepull,		CXL_PMU_GID_H2D_RSP, BIT(5)),
+	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_extcmp,			CXL_PMU_GID_H2D_RSP, BIT(6)),
+	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_gowritepulldrop,		CXL_PMU_GID_H2D_RSP, BIT(8)),
+	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_fastgowritepull,		CXL_PMU_GID_H2D_RSP, BIT(13)),
+	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_goerrwritepull,		CXL_PMU_GID_H2D_RSP, BIT(15)),
+	/* CXL rev 3.0 Table 13-5 directly lists these */
+	CXL_PMU_EVENT_CXL_ATTR(cachedata_d2h_data,		CXL_PMU_GID_CACHE_DATA, BIT(0)),
+	CXL_PMU_EVENT_CXL_ATTR(cachedata_h2d_data,		CXL_PMU_GID_CACHE_DATA, BIT(1)),
+	/* CXL rev 3.0 Table 3-29 M2S Req Memory Opcodes */
+	CXL_PMU_EVENT_CXL_ATTR(m2s_req_meminv,			CXL_PMU_GID_M2S_REQ, BIT(0)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrd,			CXL_PMU_GID_M2S_REQ, BIT(1)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrddata,		CXL_PMU_GID_M2S_REQ, BIT(2)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrdfwd,		CXL_PMU_GID_M2S_REQ, BIT(3)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_req_memwrfwd,		CXL_PMU_GID_M2S_REQ, BIT(4)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_req_memspecrd,		CXL_PMU_GID_M2S_REQ, BIT(8)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_req_meminvnt,		CXL_PMU_GID_M2S_REQ, BIT(9)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_req_memcleanevict,		CXL_PMU_GID_M2S_REQ, BIT(10)),
+	/* CXL rev 3.0 Table 3-35 M2S RwD Memory Opcodes */
+	CXL_PMU_EVENT_CXL_ATTR(m2s_rwd_memwr,			CXL_PMU_GID_M2S_RWD, BIT(1)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_rwd_memwrptl,		CXL_PMU_GID_M2S_RWD, BIT(2)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_rwd_biconflict,		CXL_PMU_GID_M2S_RWD, BIT(4)),
+	/* CXL rev 3.0 Table 3-38 M2S BIRsp Memory Opcodes */
+	CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_i,			CXL_PMU_GID_M2S_BIRSP, BIT(0)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_s,			CXL_PMU_GID_M2S_BIRSP, BIT(1)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_e,			CXL_PMU_GID_M2S_BIRSP, BIT(2)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_iblk,			CXL_PMU_GID_M2S_BIRSP, BIT(4)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_sblk,			CXL_PMU_GID_M2S_BIRSP, BIT(5)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_eblk,			CXL_PMU_GID_M2S_BIRSP, BIT(6)),
+	/* CXL rev 3.0 Table 3-40 S2M BISnp Opcodes */
+	CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_cur,			CXL_PMU_GID_S2M_BISNP, BIT(0)),
+	CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_data,			CXL_PMU_GID_S2M_BISNP, BIT(1)),
+	CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_inv,			CXL_PMU_GID_S2M_BISNP, BIT(2)),
+	CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_curblk,		CXL_PMU_GID_S2M_BISNP, BIT(4)),
+	CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_datblk,		CXL_PMU_GID_S2M_BISNP, BIT(5)),
+	CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_invblk,		CXL_PMU_GID_S2M_BISNP, BIT(6)),
+	/* CXL rev 3.0 Table 3-43 S2M NDR Opcopdes */
+	CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmp,			CXL_PMU_GID_S2M_NDR, BIT(0)),
+	CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmps,			CXL_PMU_GID_S2M_NDR, BIT(1)),
+	CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmpe,			CXL_PMU_GID_S2M_NDR, BIT(2)),
+	CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_biconflictack,		CXL_PMU_GID_S2M_NDR, BIT(3)),
+	/* CXL rev 3.0 Table 3-46 S2M DRS opcodes */
+	CXL_PMU_EVENT_CXL_ATTR(s2m_drs_memdata,			CXL_PMU_GID_S2M_DRS, BIT(0)),
+	CXL_PMU_EVENT_CXL_ATTR(s2m_drs_memdatanxm,		CXL_PMU_GID_S2M_DRS, BIT(1)),
+	/* CXL rev 3.0 Table 13-5 directly lists these */
+	CXL_PMU_EVENT_CXL_ATTR(ddr_act,				CXL_PMU_GID_DDR, BIT(0)),
+	CXL_PMU_EVENT_CXL_ATTR(ddr_pre,				CXL_PMU_GID_DDR, BIT(1)),
+	CXL_PMU_EVENT_CXL_ATTR(ddr_casrd,			CXL_PMU_GID_DDR, BIT(2)),
+	CXL_PMU_EVENT_CXL_ATTR(ddr_caswr,			CXL_PMU_GID_DDR, BIT(3)),
+	CXL_PMU_EVENT_CXL_ATTR(ddr_refresh,			CXL_PMU_GID_DDR, BIT(4)),
+	CXL_PMU_EVENT_CXL_ATTR(ddr_selfrefreshent,		CXL_PMU_GID_DDR, BIT(5)),
+	CXL_PMU_EVENT_CXL_ATTR(ddr_rfm,				CXL_PMU_GID_DDR, BIT(6)),
+	NULL
+};
+
+static struct cxl_pmu_ev_cap *cxl_pmu_find_fixed_counter_ev_cap(struct cxl_pmu_info *info,
+								int vid, int gid, int msk)
+{
+	struct cxl_pmu_ev_cap *pmu_ev;
+
+	list_for_each_entry(pmu_ev, &info->event_caps_fixed, node) {
+		if (vid != pmu_ev->vid || gid != pmu_ev->gid)
+			continue;
+
+		/* Precise match for fixed counter */
+		if (msk == pmu_ev->msk)
+			return pmu_ev;
+	}
+
+	return ERR_PTR(-EINVAL);
+}
+
+static struct cxl_pmu_ev_cap *cxl_pmu_find_config_counter_ev_cap(struct cxl_pmu_info *info,
+								 int vid, int gid, int msk)
+{
+	struct cxl_pmu_ev_cap *pmu_ev;
+
+	list_for_each_entry(pmu_ev, &info->event_caps_configurable, node) {
+		if (vid != pmu_ev->vid || gid != pmu_ev->gid)
+			continue;
+
+		/* Request mask must be subset of supported */
+		if (msk & ~pmu_ev->msk)
+			continue;
+
+		return pmu_ev;
+	}
+
+	return ERR_PTR(-EINVAL);
+}
+
+static umode_t cxl_pmu_event_is_visible(struct kobject *kobj, struct attribute *attr, int a)
+{
+	struct device_attribute *dev_attr = container_of(attr, struct device_attribute, attr);
+	struct perf_pmu_events_attr *pmu_attr =
+		container_of(dev_attr, struct perf_pmu_events_attr, attr);
+	struct device *dev = kobj_to_dev(kobj);
+	struct cxl_pmu_info *info = dev_get_drvdata(dev);
+	int vid = FIELD_GET(CXL_PMU_ATTR_CONFIG_VID_MSK, pmu_attr->id);
+	int gid = FIELD_GET(CXL_PMU_ATTR_CONFIG_GID_MSK, pmu_attr->id);
+	int msk = FIELD_GET(CXL_PMU_ATTR_CONFIG_MASK_MSK, pmu_attr->id);
+
+	if (!IS_ERR(cxl_pmu_find_fixed_counter_ev_cap(info, vid, gid, msk)))
+		return attr->mode;
+
+	if (!IS_ERR(cxl_pmu_find_config_counter_ev_cap(info, vid, gid, msk)))
+		return attr->mode;
+
+	return 0;
+}
+
+static const struct attribute_group cxl_pmu_events = {
+	.name = "events",
+	.attrs = cxl_pmu_event_attrs,
+	.is_visible = cxl_pmu_event_is_visible,
+};
+
+static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct cxl_pmu_info *info = dev_get_drvdata(dev);
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(info->on_cpu));
+}
+static DEVICE_ATTR_RO(cpumask);
+
+static struct attribute *cxl_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL
+};
+
+static const struct attribute_group cxl_pmu_cpumask_group = {
+	.attrs = cxl_pmu_cpumask_attrs,
+};
+
+static const struct attribute_group *cxl_pmu_attr_groups[] = {
+	&cxl_pmu_events,
+	&cxl_pmu_format_group,
+	&cxl_pmu_cpumask_group,
+	NULL
+};
+
+/* If counter_idx == NULL, don't try to allocate a counter. */
+static int cxl_pmu_get_event_idx(struct perf_event *event, int *counter_idx,
+				 int *event_idx)
+{
+	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
+	DECLARE_BITMAP(configurable_and_free, CXL_PMU_MAX_COUNTERS);
+	struct cxl_pmu_ev_cap *pmu_ev;
+	u32 mask;
+	u16 gid, vid;
+	int i;
+
+	vid = cxl_pmu_config_get_vid(event);
+	gid = cxl_pmu_config_get_gid(event);
+	mask = cxl_pmu_config_get_mask(event);
+
+	pmu_ev = cxl_pmu_find_fixed_counter_ev_cap(info, vid, gid, mask);
+	if (!IS_ERR(pmu_ev)) {
+		if (!counter_idx)
+			return 0;
+		if (!test_bit(pmu_ev->counter_idx, info->used_counter_bm)) {
+			*counter_idx = pmu_ev->counter_idx;
+			return 0;
+		}
+		/* Fixed counter is in use, but maybe a configurable one? */
+	}
+
+	pmu_ev = cxl_pmu_find_config_counter_ev_cap(info, vid, gid, mask);
+	if (!IS_ERR(pmu_ev)) {
+		if (!counter_idx)
+			return 0;
+
+		bitmap_andnot(configurable_and_free, info->conf_counter_bm,
+			info->used_counter_bm, CXL_PMU_MAX_COUNTERS);
+
+		i = find_first_bit(configurable_and_free, CXL_PMU_MAX_COUNTERS);
+		if (i == CXL_PMU_MAX_COUNTERS)
+			return -EINVAL;
+
+		*counter_idx = i;
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int cxl_pmu_event_init(struct perf_event *event)
+{
+	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
+	int rc;
+
+	/* Top level type sanity check - is this a Hardware Event being requested */
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+		return -EOPNOTSUPP;
+	/* TODO: Validation of any filter */
+
+	/*
+	 * Verify that it is possible to count what was requested. Either must
+	 * be a fixed counter that is a precise match or a configurable counter
+	 * where this is a subset.
+	 */
+	rc = cxl_pmu_get_event_idx(event, NULL, NULL);
+	if (rc < 0)
+		return rc;
+
+	event->cpu = info->on_cpu;
+
+	return 0;
+}
+
+static void cxl_pmu_enable(struct pmu *pmu)
+{
+	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(pmu);
+	void __iomem *base = info->base;
+
+	/* Can assume frozen at this stage */
+	writeq(0, base + CXL_PMU_FREEZE_REG);
+}
+
+static void cxl_pmu_disable(struct pmu *pmu)
+{
+	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(pmu);
+	void __iomem *base = info->base;
+
+	/*
+	 * Whilst bits above number of counters are RsvdZ
+	 * they are unlikely to be repurposed given
+	 * number of counters is allowed to be 64 leaving
+	 * no reserved bits.  Hence this is only slightly
+	 * naughty.
+	 */
+	writeq(GENMASK_ULL(63, 0), base + CXL_PMU_FREEZE_REG);
+}
+
+static void cxl_pmu_event_start(struct perf_event *event, int flags)
+{
+	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	void __iomem *base = info->base;
+	u64 cfg;
+
+	/*
+	 * All paths to here should either set these flags directly or
+	 * call cxl_pmu_event_stop() which will ensure the correct state.
+	 */
+	if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+		return;
+
+	WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+	hwc->state = 0;
+
+	/*
+	 * Currently only hdm filter control is implemnted, this code will
+	 * want generalizing when more filters are added.
+	 */
+	if (info->filter_hdm) {
+		if (cxl_pmu_config1_hdm_filter_en(event))
+			cfg = cxl_pmu_config2_get_hdm_decoder(event);
+		else
+			cfg = GENMASK(15, 0); /* No filtering if 0xFFFF_FFFF */
+		writeq(cfg, base + CXL_PMU_FILTER_CFG_REG(hwc->idx, 0));
+	}
+
+	cfg = readq(base + CXL_PMU_COUNTER_CFG_REG(hwc->idx));
+	cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_INT_ON_OVRFLW, 1);
+	cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_FREEZE_ON_OVRFLW, 1);
+	cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_ENABLE, 1);
+	cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_EDGE,
+			  cxl_pmu_config1_get_edge(event) ? 1 : 0);
+	cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_INVERT,
+			  cxl_pmu_config1_get_invert(event) ? 1 : 0);
+
+	/* Fixed purpose counters have next two fields RO */
+	if (test_bit(hwc->idx, info->conf_counter_bm)) {
+		cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK,
+				  hwc->event_base);
+		cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_EVENTS_MSK,
+				  cxl_pmu_config_get_mask(event));
+	}
+	cfg &= ~CXL_PMU_COUNTER_CFG_THRESHOLD_MSK;
+	/*
+	 * For events that generate only 1 count per clock the CXL 3.0 spec
+	 * states the threshold shall be set to 1 but if set to 0 it will
+	 * count the raw value anwyay?
+	 * There is no definition of what events will count multiple per cycle
+	 * and hence to which non 1 values of threshold can apply.
+	 * (CXL 3.0 8.2.7.2.1 Counter Configuration - threshold field definition)
+	 */
+	cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_THRESHOLD_MSK,
+			  cxl_pmu_config1_get_threshold(event));
+	writeq(cfg, base + CXL_PMU_COUNTER_CFG_REG(hwc->idx));
+
+	local64_set(&hwc->prev_count, 0);
+	writeq(0, base + CXL_PMU_COUNTER_REG(hwc->idx));
+
+	perf_event_update_userpage(event);
+}
+
+static u64 cxl_pmu_read_counter(struct perf_event *event)
+{
+	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
+	void __iomem *base = info->base;
+
+	return readq(base + CXL_PMU_COUNTER_REG(event->hw.idx));
+}
+
+static void __cxl_pmu_read(struct perf_event *event, bool overflow)
+{
+	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 new_cnt, prev_cnt, delta;
+
+	do {
+		prev_cnt = local64_read(&hwc->prev_count);
+		new_cnt = cxl_pmu_read_counter(event);
+	} while (local64_cmpxchg(&hwc->prev_count, prev_cnt, new_cnt) != prev_cnt);
+
+	/*
+	 * If we know an overflow occur then take that into account.
+	 * Note counter is not reset as that would lose events
+	 */
+	delta = (new_cnt - prev_cnt) & GENMASK_ULL(info->counter_width - 1, 0);
+	if (overflow && delta < GENMASK_ULL(info->counter_width - 1, 0))
+		delta += (1UL << info->counter_width);
+
+	local64_add(delta, &event->count);
+}
+
+static void cxl_pmu_read(struct perf_event *event)
+{
+	__cxl_pmu_read(event, false);
+}
+
+static void cxl_pmu_event_stop(struct perf_event *event, int flags)
+{
+	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
+	void __iomem *base = info->base;
+	struct hw_perf_event *hwc = &event->hw;
+	u64 cfg;
+
+	cxl_pmu_read(event);
+	WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+	hwc->state |= PERF_HES_STOPPED;
+
+	cfg = readq(base + CXL_PMU_COUNTER_CFG_REG(hwc->idx));
+	cfg &= ~(FIELD_PREP(CXL_PMU_COUNTER_CFG_INT_ON_OVRFLW, 1) |
+		 FIELD_PREP(CXL_PMU_COUNTER_CFG_ENABLE, 1));
+	writeq(cfg, base + CXL_PMU_COUNTER_CFG_REG(hwc->idx));
+
+	hwc->state |= PERF_HES_UPTODATE;
+}
+
+static int cxl_pmu_event_add(struct perf_event *event, int flags)
+{
+	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx, rc;
+	int event_idx = 0;
+
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+	rc = cxl_pmu_get_event_idx(event, &idx, &event_idx);
+	if (rc < 0)
+		return rc;
+
+	hwc->idx = idx;
+
+	/* Only set for configurable counters */
+	hwc->event_base = event_idx;
+	info->hw_events[idx] = event;
+	set_bit(idx, info->used_counter_bm);
+
+	if (flags & PERF_EF_START)
+		cxl_pmu_event_start(event, PERF_EF_RELOAD);
+
+	return 0;
+}
+
+static void cxl_pmu_event_del(struct perf_event *event, int flags)
+{
+	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	cxl_pmu_event_stop(event, PERF_EF_UPDATE);
+	clear_bit(hwc->idx, info->used_counter_bm);
+	info->hw_events[hwc->idx] = NULL;
+	perf_event_update_userpage(event);
+}
+
+static irqreturn_t cxl_pmu_irq(int irq, void *data)
+{
+	struct cxl_pmu_info *info = data;
+	void __iomem *base = info->base;
+	u64 overflowed;
+	DECLARE_BITMAP(overflowedbm, 64);
+	int i;
+
+	overflowed = readq(base + CXL_PMU_OVERFLOW_REG);
+
+	/* Interrupt may be shared, so maybe it isn't ours */
+	if (!overflowed)
+		return IRQ_NONE;
+
+	bitmap_from_arr64(overflowedbm, &overflowed, 64);
+	for_each_set_bit(i, overflowedbm, info->num_counters) {
+		struct perf_event *event = info->hw_events[i];
+
+		if (!event) {
+			dev_dbg(info->pmu.dev,
+				"overflow but on non enabled counter %d\n", i);
+			continue;
+		}
+
+		__cxl_pmu_read(event, true);
+	}
+
+	writeq(overflowed, base + CXL_PMU_OVERFLOW_REG);
+
+	return IRQ_HANDLED;
+}
+
+static void cxl_pmu_perf_unregister(void *_info)
+{
+	struct cxl_pmu_info *info = _info;
+
+	perf_pmu_unregister(&info->pmu);
+}
+
+static void cxl_pmu_cpuhp_remove(void *_info)
+{
+	struct cxl_pmu_info *info = _info;
+
+	cpuhp_state_remove_instance_nocalls(cxl_pmu_cpuhp_state_num, &info->node);
+}
+
+static int cxl_pmu_probe(struct device *dev)
+{
+	struct cxl_pmu *pmu = to_cxl_pmu(dev);
+	struct pci_dev *pdev = to_pci_dev(dev->parent);
+	struct cxl_pmu_info *info;
+	char *irq_name;
+	char *dev_name;
+	int rc, irq;
+
+	info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	dev_set_drvdata(dev, info);
+	INIT_LIST_HEAD(&info->event_caps_fixed);
+	INIT_LIST_HEAD(&info->event_caps_configurable);
+
+	info->base = pmu->base;
+
+	info->on_cpu = -1;
+	rc = cxl_pmu_parse_caps(dev, info);
+	if (rc)
+		return rc;
+
+	info->hw_events = devm_kcalloc(dev, sizeof(*info->hw_events),
+				       info->num_counters, GFP_KERNEL);
+	if (!info->hw_events)
+		return -ENOMEM;
+
+	switch (pmu->type) {
+	case CXL_PMU_MEMDEV:
+		dev_name = devm_kasprintf(dev, GFP_KERNEL, "cxl_pmu_mem%d.%d",
+					  pmu->assoc_id, pmu->index);
+		break;
+	}
+	if (!dev_name)
+		return -ENOMEM;
+
+	info->pmu = (struct pmu) {
+		.name = dev_name,
+		.parent = dev,
+		.module = THIS_MODULE,
+		.event_init = cxl_pmu_event_init,
+		.pmu_enable = cxl_pmu_enable,
+		.pmu_disable = cxl_pmu_disable,
+		.add = cxl_pmu_event_add,
+		.del = cxl_pmu_event_del,
+		.start = cxl_pmu_event_start,
+		.stop = cxl_pmu_event_stop,
+		.read = cxl_pmu_read,
+		.task_ctx_nr = perf_invalid_context,
+		.attr_groups = cxl_pmu_attr_groups,
+		.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+	};
+
+	if (info->irq <= 0)
+		return -EINVAL;
+
+	rc = pci_irq_vector(pdev, info->irq);
+	if (rc < 0)
+		return rc;
+	irq = rc;
+
+	irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_overflow\n", dev_name);
+	if (!irq_name)
+		return -ENOMEM;
+
+	rc = devm_request_irq(dev, irq, cxl_pmu_irq, IRQF_SHARED | IRQF_ONESHOT,
+			      irq_name, info);
+	if (rc)
+		return rc;
+	info->irq = irq;
+
+	rc = cpuhp_state_add_instance(cxl_pmu_cpuhp_state_num, &info->node);
+	if (rc)
+		return rc;
+
+	rc = devm_add_action_or_reset(dev, cxl_pmu_cpuhp_remove, info);
+	if (rc)
+		return rc;
+
+	rc = perf_pmu_register(&info->pmu, info->pmu.name, -1);
+	if (rc)
+		return rc;
+
+	rc = devm_add_action_or_reset(dev, cxl_pmu_perf_unregister, info);
+	if (rc)
+		return rc;
+
+	return 0;
+}
+
+static struct cxl_driver cxl_pmu_driver = {
+	.name = "cxl_pmu",
+	.probe = cxl_pmu_probe,
+	.id = CXL_DEVICE_PMU,
+};
+
+static int cxl_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct cxl_pmu_info *info = hlist_entry_safe(node, struct cxl_pmu_info, node);
+
+	if (info->on_cpu != -1)
+		return 0;
+
+	info->on_cpu = cpu;
+	/*
+	 * CPU HP lock is held so we should be guaranteed that the CPU hasn't yet
+	 * gone away again.
+	 */
+	WARN_ON(irq_set_affinity(info->irq, cpumask_of(cpu)));
+
+	return 0;
+}
+
+static int cxl_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct cxl_pmu_info *info = hlist_entry_safe(node, struct cxl_pmu_info, node);
+	unsigned int target;
+
+	if (info->on_cpu != cpu)
+		return 0;
+
+	info->on_cpu = -1;
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids) {
+		dev_err(info->pmu.dev, "Unable to find a suitable CPU\n");
+		return 0;
+	}
+
+	perf_pmu_migrate_context(&info->pmu, cpu, target);
+	info->on_cpu = target;
+	/*
+	 * CPU HP lock is held so we should be guaranteed that this CPU hasn't yet
+	 * gone away.
+	 */
+	WARN_ON(irq_set_affinity(info->irq, cpumask_of(target)));
+
+	return 0;
+}
+
+static __init int cxl_pmu_init(void)
+{
+	int rc;
+
+	rc = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+				     "AP_PERF_CXL_PMU_ONLINE",
+				     cxl_pmu_online_cpu, cxl_pmu_offline_cpu);
+	if (rc < 0)
+		return rc;
+	cxl_pmu_cpuhp_state_num = rc;
+
+	rc = cxl_driver_register(&cxl_pmu_driver);
+	if (rc)
+		cpuhp_remove_multi_state(cxl_pmu_cpuhp_state_num);
+
+	return rc;
+}
+
+static __exit void cxl_pmu_exit(void)
+{
+	cxl_driver_unregister(&cxl_pmu_driver);
+	cpuhp_remove_multi_state(cxl_pmu_cpuhp_state_num);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(CXL);
+module_init(cxl_pmu_init);
+module_exit(cxl_pmu_exit);
+MODULE_ALIAS_CXL(CXL_DEVICE_PMU);