diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/cxl/Kconfig | 14 | ||||
-rw-r--r-- | drivers/cxl/acpi.c | 2 | ||||
-rw-r--r-- | drivers/cxl/core/Makefile | 1 | ||||
-rw-r--r-- | drivers/cxl/core/core.h | 1 | ||||
-rw-r--r-- | drivers/cxl/core/hdm.c | 44 | ||||
-rw-r--r-- | drivers/cxl/core/mbox.c | 339 | ||||
-rw-r--r-- | drivers/cxl/core/memdev.c | 503 | ||||
-rw-r--r-- | drivers/cxl/core/pci.c | 27 | ||||
-rw-r--r-- | drivers/cxl/core/pmem.c | 2 | ||||
-rw-r--r-- | drivers/cxl/core/pmu.c | 68 | ||||
-rw-r--r-- | drivers/cxl/core/port.c | 13 | ||||
-rw-r--r-- | drivers/cxl/core/region.c | 116 | ||||
-rw-r--r-- | drivers/cxl/core/regs.c | 83 | ||||
-rw-r--r-- | drivers/cxl/cxl.h | 51 | ||||
-rw-r--r-- | drivers/cxl/cxlmem.h | 225 | ||||
-rw-r--r-- | drivers/cxl/cxlpci.h | 1 | ||||
-rw-r--r-- | drivers/cxl/mem.c | 10 | ||||
-rw-r--r-- | drivers/cxl/pci.c | 371 | ||||
-rw-r--r-- | drivers/cxl/pmem.c | 35 | ||||
-rw-r--r-- | drivers/cxl/pmu.h | 28 | ||||
-rw-r--r-- | drivers/cxl/port.c | 14 | ||||
-rw-r--r-- | drivers/cxl/security.c | 27 | ||||
-rw-r--r-- | drivers/perf/Kconfig | 13 | ||||
-rw-r--r-- | drivers/perf/Makefile | 1 | ||||
-rw-r--r-- | drivers/perf/cxl_pmu.c | 990 |
25 files changed, 2552 insertions, 427 deletions
diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index ff4e78117b31..fcbf8295fde3 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -82,6 +82,7 @@ config CXL_PMEM config CXL_MEM tristate "CXL: Memory Expansion" depends on CXL_PCI + select FW_UPLOAD default CXL_BUS help The CXL.mem protocol allows a device to act as a provider of "System @@ -139,4 +140,17 @@ config CXL_REGION_INVALIDATION_TEST If unsure, or if this kernel is meant for production environments, say N. +config CXL_PMU + tristate "CXL Performance Monitoring Unit" + default CXL_BUS + depends on PERF_EVENTS + help + Support performance monitoring as defined in CXL rev 3.0 + section 13.2: Performance Monitoring. CXL components may have + one or more CXL Performance Monitoring Units (CPMUs). + + Say 'y/m' to enable a driver that will attach to performance + monitoring units and provide standard perf based interfaces. + + If unsure say 'm'. endif diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 89ee01323d43..658e6b84a769 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -258,7 +258,7 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, cxld = &cxlrd->cxlsd.cxld; cxld->flags = cfmws_to_decoder_flags(cfmws->restrictions); - cxld->target_type = CXL_DECODER_EXPANDER; + cxld->target_type = CXL_DECODER_HOSTONLYMEM; cxld->hpa_range = (struct range) { .start = res->start, .end = res->end, diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index ca4ae31d8f57..1f66b5d4d935 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -12,5 +12,6 @@ cxl_core-y += memdev.o cxl_core-y += mbox.o cxl_core-y += pci.o cxl_core-y += hdm.o +cxl_core-y += pmu.o cxl_core-$(CONFIG_TRACING) += trace.o cxl_core-$(CONFIG_CXL_REGION) += region.o diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index b001669a5133..45e7e044cf4a 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -6,6 +6,7 @@ extern const struct device_type cxl_nvdimm_bridge_type; extern const struct device_type cxl_nvdimm_type; +extern const struct device_type cxl_pmu_type; extern struct attribute_group cxl_base_attribute_group; diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 5abfa9276dac..4449b34a80cc 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -570,8 +570,9 @@ static void cxld_set_interleave(struct cxl_decoder *cxld, u32 *ctrl) static void cxld_set_type(struct cxl_decoder *cxld, u32 *ctrl) { - u32p_replace_bits(ctrl, !!(cxld->target_type == 3), - CXL_HDM_DECODER0_CTRL_TYPE); + u32p_replace_bits(ctrl, + !!(cxld->target_type == CXL_DECODER_HOSTONLYMEM), + CXL_HDM_DECODER0_CTRL_HOSTONLY); } static int cxlsd_set_targets(struct cxl_switch_decoder *cxlsd, u64 *tgt) @@ -764,7 +765,7 @@ static int cxl_setup_hdm_decoder_from_dvsec( if (!len) return -ENOENT; - cxld->target_type = CXL_DECODER_EXPANDER; + cxld->target_type = CXL_DECODER_HOSTONLYMEM; cxld->commit = NULL; cxld->reset = NULL; cxld->hpa_range = info->dvsec_range[which]; @@ -793,8 +794,8 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, int *target_map, void __iomem *hdm, int which, u64 *dpa_base, struct cxl_endpoint_dvsec_info *info) { + struct cxl_endpoint_decoder *cxled = NULL; u64 size, base, skip, dpa_size, lo, hi; - struct cxl_endpoint_decoder *cxled; bool committed; u32 remainder; int i, rc; @@ -827,6 +828,8 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, return -ENXIO; } + if (info) + cxled = to_cxl_endpoint_decoder(&cxld->dev); cxld->hpa_range = (struct range) { .start = base, .end = base + size - 1, @@ -837,10 +840,10 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, cxld->flags |= CXL_DECODER_F_ENABLE; if (ctrl & CXL_HDM_DECODER0_CTRL_LOCK) cxld->flags |= CXL_DECODER_F_LOCK; - if (FIELD_GET(CXL_HDM_DECODER0_CTRL_TYPE, ctrl)) - cxld->target_type = CXL_DECODER_EXPANDER; + if (FIELD_GET(CXL_HDM_DECODER0_CTRL_HOSTONLY, ctrl)) + cxld->target_type = CXL_DECODER_HOSTONLYMEM; else - cxld->target_type = CXL_DECODER_ACCELERATOR; + cxld->target_type = CXL_DECODER_DEVMEM; if (cxld->id != port->commit_end + 1) { dev_warn(&port->dev, "decoder%d.%d: Committed out of order\n", @@ -856,12 +859,28 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, } port->commit_end = cxld->id; } else { - /* unless / until type-2 drivers arrive, assume type-3 */ - if (FIELD_GET(CXL_HDM_DECODER0_CTRL_TYPE, ctrl) == 0) { - ctrl |= CXL_HDM_DECODER0_CTRL_TYPE; + if (cxled) { + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + + /* + * Default by devtype until a device arrives that needs + * more precision. + */ + if (cxlds->type == CXL_DEVTYPE_CLASSMEM) + cxld->target_type = CXL_DECODER_HOSTONLYMEM; + else + cxld->target_type = CXL_DECODER_DEVMEM; + } else { + /* To be overridden by region type at commit time */ + cxld->target_type = CXL_DECODER_HOSTONLYMEM; + } + + if (!FIELD_GET(CXL_HDM_DECODER0_CTRL_HOSTONLY, ctrl) && + cxld->target_type == CXL_DECODER_HOSTONLYMEM) { + ctrl |= CXL_HDM_DECODER0_CTRL_HOSTONLY; writel(ctrl, hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which)); } - cxld->target_type = CXL_DECODER_EXPANDER; } rc = eiw_to_ways(FIELD_GET(CXL_HDM_DECODER0_CTRL_IW_MASK, ctrl), &cxld->interleave_ways); @@ -880,7 +899,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, port->id, cxld->id, cxld->hpa_range.start, cxld->hpa_range.end, cxld->interleave_ways, cxld->interleave_granularity); - if (!info) { + if (!cxled) { lo = readl(hdm + CXL_HDM_DECODER0_TL_LOW(which)); hi = readl(hdm + CXL_HDM_DECODER0_TL_HIGH(which)); target_list.value = (hi << 32) + lo; @@ -903,7 +922,6 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, lo = readl(hdm + CXL_HDM_DECODER0_SKIP_LOW(which)); hi = readl(hdm + CXL_HDM_DECODER0_SKIP_HIGH(which)); skip = (hi << 32) + lo; - cxled = to_cxl_endpoint_decoder(&cxld->dev); rc = devm_cxl_dpa_reserve(cxled, *dpa_base + skip, dpa_size, skip); if (rc) { dev_err(&port->dev, diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index bea9cf31a12d..d6d067fbee97 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -182,7 +182,7 @@ static const char *cxl_mem_opcode_to_name(u16 opcode) /** * cxl_internal_send_cmd() - Kernel internal interface to send a mailbox command - * @cxlds: The device data for the operation + * @mds: The driver data for the operation * @mbox_cmd: initialized command to execute * * Context: Any context. @@ -198,19 +198,19 @@ static const char *cxl_mem_opcode_to_name(u16 opcode) * error. While this distinction can be useful for commands from userspace, the * kernel will only be able to use results when both are successful. */ -int cxl_internal_send_cmd(struct cxl_dev_state *cxlds, +int cxl_internal_send_cmd(struct cxl_memdev_state *mds, struct cxl_mbox_cmd *mbox_cmd) { size_t out_size, min_out; int rc; - if (mbox_cmd->size_in > cxlds->payload_size || - mbox_cmd->size_out > cxlds->payload_size) + if (mbox_cmd->size_in > mds->payload_size || + mbox_cmd->size_out > mds->payload_size) return -E2BIG; out_size = mbox_cmd->size_out; min_out = mbox_cmd->min_out; - rc = cxlds->mbox_send(cxlds, mbox_cmd); + rc = mds->mbox_send(mds, mbox_cmd); /* * EIO is reserved for a payload size mismatch and mbox_send() * may not return this error. @@ -220,7 +220,8 @@ int cxl_internal_send_cmd(struct cxl_dev_state *cxlds, if (rc) return rc; - if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) + if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS && + mbox_cmd->return_code != CXL_MBOX_CMD_RC_BACKGROUND) return cxl_mbox_cmd_rc2errno(mbox_cmd); if (!out_size) @@ -297,7 +298,7 @@ static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in) } static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox, - struct cxl_dev_state *cxlds, u16 opcode, + struct cxl_memdev_state *mds, u16 opcode, size_t in_size, size_t out_size, u64 in_payload) { *mbox = (struct cxl_mbox_cmd) { @@ -312,7 +313,7 @@ static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox, return PTR_ERR(mbox->payload_in); if (!cxl_payload_from_user_allowed(opcode, mbox->payload_in)) { - dev_dbg(cxlds->dev, "%s: input payload not allowed\n", + dev_dbg(mds->cxlds.dev, "%s: input payload not allowed\n", cxl_mem_opcode_to_name(opcode)); kvfree(mbox->payload_in); return -EBUSY; @@ -321,7 +322,7 @@ static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox, /* Prepare to handle a full payload for variable sized output */ if (out_size == CXL_VARIABLE_PAYLOAD) - mbox->size_out = cxlds->payload_size; + mbox->size_out = mds->payload_size; else mbox->size_out = out_size; @@ -343,7 +344,7 @@ static void cxl_mbox_cmd_dtor(struct cxl_mbox_cmd *mbox) static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd, const struct cxl_send_command *send_cmd, - struct cxl_dev_state *cxlds) + struct cxl_memdev_state *mds) { if (send_cmd->raw.rsvd) return -EINVAL; @@ -353,13 +354,13 @@ static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd, * gets passed along without further checking, so it must be * validated here. */ - if (send_cmd->out.size > cxlds->payload_size) + if (send_cmd->out.size > mds->payload_size) return -EINVAL; if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode)) return -EPERM; - dev_WARN_ONCE(cxlds->dev, true, "raw command path used\n"); + dev_WARN_ONCE(mds->cxlds.dev, true, "raw command path used\n"); *mem_cmd = (struct cxl_mem_command) { .info = { @@ -375,7 +376,7 @@ static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd, static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, const struct cxl_send_command *send_cmd, - struct cxl_dev_state *cxlds) + struct cxl_memdev_state *mds) { struct cxl_mem_command *c = &cxl_mem_commands[send_cmd->id]; const struct cxl_command_info *info = &c->info; @@ -390,11 +391,11 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, return -EINVAL; /* Check that the command is enabled for hardware */ - if (!test_bit(info->id, cxlds->enabled_cmds)) + if (!test_bit(info->id, mds->enabled_cmds)) return -ENOTTY; /* Check that the command is not claimed for exclusive kernel use */ - if (test_bit(info->id, cxlds->exclusive_cmds)) + if (test_bit(info->id, mds->exclusive_cmds)) return -EBUSY; /* Check the input buffer is the expected size */ @@ -423,7 +424,7 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, /** * cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND. * @mbox_cmd: Sanitized and populated &struct cxl_mbox_cmd. - * @cxlds: The device data for the operation + * @mds: The driver data for the operation * @send_cmd: &struct cxl_send_command copied in from userspace. * * Return: @@ -438,7 +439,7 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, * safe to send to the hardware. */ static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd, - struct cxl_dev_state *cxlds, + struct cxl_memdev_state *mds, const struct cxl_send_command *send_cmd) { struct cxl_mem_command mem_cmd; @@ -452,20 +453,20 @@ static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd, * supports, but output can be arbitrarily large (simply write out as * much data as the hardware provides). */ - if (send_cmd->in.size > cxlds->payload_size) + if (send_cmd->in.size > mds->payload_size) return -EINVAL; /* Sanitize and construct a cxl_mem_command */ if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW) - rc = cxl_to_mem_cmd_raw(&mem_cmd, send_cmd, cxlds); + rc = cxl_to_mem_cmd_raw(&mem_cmd, send_cmd, mds); else - rc = cxl_to_mem_cmd(&mem_cmd, send_cmd, cxlds); + rc = cxl_to_mem_cmd(&mem_cmd, send_cmd, mds); if (rc) return rc; /* Sanitize and construct a cxl_mbox_cmd */ - return cxl_mbox_cmd_ctor(mbox_cmd, cxlds, mem_cmd.opcode, + return cxl_mbox_cmd_ctor(mbox_cmd, mds, mem_cmd.opcode, mem_cmd.info.size_in, mem_cmd.info.size_out, send_cmd->in.payload); } @@ -473,6 +474,7 @@ static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd, int cxl_query_cmd(struct cxl_memdev *cxlmd, struct cxl_mem_query_commands __user *q) { + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); struct device *dev = &cxlmd->dev; struct cxl_mem_command *cmd; u32 n_commands; @@ -494,9 +496,9 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd, cxl_for_each_cmd(cmd) { struct cxl_command_info info = cmd->info; - if (test_bit(info.id, cxlmd->cxlds->enabled_cmds)) + if (test_bit(info.id, mds->enabled_cmds)) info.flags |= CXL_MEM_COMMAND_FLAG_ENABLED; - if (test_bit(info.id, cxlmd->cxlds->exclusive_cmds)) + if (test_bit(info.id, mds->exclusive_cmds)) info.flags |= CXL_MEM_COMMAND_FLAG_EXCLUSIVE; if (copy_to_user(&q->commands[j++], &info, sizeof(info))) @@ -511,7 +513,7 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd, /** * handle_mailbox_cmd_from_user() - Dispatch a mailbox command for userspace. - * @cxlds: The device data for the operation + * @mds: The driver data for the operation * @mbox_cmd: The validated mailbox command. * @out_payload: Pointer to userspace's output payload. * @size_out: (Input) Max payload size to copy out. @@ -532,12 +534,12 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd, * * See cxl_send_cmd(). */ -static int handle_mailbox_cmd_from_user(struct cxl_dev_state *cxlds, +static int handle_mailbox_cmd_from_user(struct cxl_memdev_state *mds, struct cxl_mbox_cmd *mbox_cmd, u64 out_payload, s32 *size_out, u32 *retval) { - struct device *dev = cxlds->dev; + struct device *dev = mds->cxlds.dev; int rc; dev_dbg(dev, @@ -547,7 +549,7 @@ static int handle_mailbox_cmd_from_user(struct cxl_dev_state *cxlds, cxl_mem_opcode_to_name(mbox_cmd->opcode), mbox_cmd->opcode, mbox_cmd->size_in); - rc = cxlds->mbox_send(cxlds, mbox_cmd); + rc = mds->mbox_send(mds, mbox_cmd); if (rc) goto out; @@ -576,7 +578,7 @@ out: int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s) { - struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); struct device *dev = &cxlmd->dev; struct cxl_send_command send; struct cxl_mbox_cmd mbox_cmd; @@ -587,11 +589,11 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s) if (copy_from_user(&send, s, sizeof(send))) return -EFAULT; - rc = cxl_validate_cmd_from_user(&mbox_cmd, cxlmd->cxlds, &send); + rc = cxl_validate_cmd_from_user(&mbox_cmd, mds, &send); if (rc) return rc; - rc = handle_mailbox_cmd_from_user(cxlds, &mbox_cmd, send.out.payload, + rc = handle_mailbox_cmd_from_user(mds, &mbox_cmd, send.out.payload, &send.out.size, &send.retval); if (rc) return rc; @@ -602,13 +604,14 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s) return 0; } -static int cxl_xfer_log(struct cxl_dev_state *cxlds, uuid_t *uuid, u32 *size, u8 *out) +static int cxl_xfer_log(struct cxl_memdev_state *mds, uuid_t *uuid, + u32 *size, u8 *out) { u32 remaining = *size; u32 offset = 0; while (remaining) { - u32 xfer_size = min_t(u32, remaining, cxlds->payload_size); + u32 xfer_size = min_t(u32, remaining, mds->payload_size); struct cxl_mbox_cmd mbox_cmd; struct cxl_mbox_get_log log; int rc; @@ -627,7 +630,7 @@ static int cxl_xfer_log(struct cxl_dev_state *cxlds, uuid_t *uuid, u32 *size, u8 .payload_out = out, }; - rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + rc = cxl_internal_send_cmd(mds, &mbox_cmd); /* * The output payload length that indicates the number @@ -654,17 +657,18 @@ static int cxl_xfer_log(struct cxl_dev_state *cxlds, uuid_t *uuid, u32 *size, u8 /** * cxl_walk_cel() - Walk through the Command Effects Log. - * @cxlds: The device data for the operation + * @mds: The driver data for the operation * @size: Length of the Command Effects Log. * @cel: CEL * * Iterate over each entry in the CEL and determine if the driver supports the * command. If so, the command is enabled for the device and can be used later. */ -static void cxl_walk_cel(struct cxl_dev_state *cxlds, size_t size, u8 *cel) +static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel) { struct cxl_cel_entry *cel_entry; const int cel_entries = size / sizeof(*cel_entry); + struct device *dev = mds->cxlds.dev; int i; cel_entry = (struct cxl_cel_entry *) cel; @@ -674,39 +678,39 @@ static void cxl_walk_cel(struct cxl_dev_state *cxlds, size_t size, u8 *cel) struct cxl_mem_command *cmd = cxl_mem_find_command(opcode); if (!cmd && !cxl_is_poison_command(opcode)) { - dev_dbg(cxlds->dev, + dev_dbg(dev, "Opcode 0x%04x unsupported by driver\n", opcode); continue; } if (cmd) - set_bit(cmd->info.id, cxlds->enabled_cmds); + set_bit(cmd->info.id, mds->enabled_cmds); if (cxl_is_poison_command(opcode)) - cxl_set_poison_cmd_enabled(&cxlds->poison, opcode); + cxl_set_poison_cmd_enabled(&mds->poison, opcode); - dev_dbg(cxlds->dev, "Opcode 0x%04x enabled\n", opcode); + dev_dbg(dev, "Opcode 0x%04x enabled\n", opcode); } } -static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_dev_state *cxlds) +static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_memdev_state *mds) { struct cxl_mbox_get_supported_logs *ret; struct cxl_mbox_cmd mbox_cmd; int rc; - ret = kvmalloc(cxlds->payload_size, GFP_KERNEL); + ret = kvmalloc(mds->payload_size, GFP_KERNEL); if (!ret) return ERR_PTR(-ENOMEM); mbox_cmd = (struct cxl_mbox_cmd) { .opcode = CXL_MBOX_OP_GET_SUPPORTED_LOGS, - .size_out = cxlds->payload_size, + .size_out = mds->payload_size, .payload_out = ret, /* At least the record number field must be valid */ .min_out = 2, }; - rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + rc = cxl_internal_send_cmd(mds, &mbox_cmd); if (rc < 0) { kvfree(ret); return ERR_PTR(rc); @@ -729,22 +733,22 @@ static const uuid_t log_uuid[] = { /** * cxl_enumerate_cmds() - Enumerate commands for a device. - * @cxlds: The device data for the operation + * @mds: The driver data for the operation * * Returns 0 if enumerate completed successfully. * * CXL devices have optional support for certain commands. This function will * determine the set of supported commands for the hardware and update the - * enabled_cmds bitmap in the @cxlds. + * enabled_cmds bitmap in the @mds. */ -int cxl_enumerate_cmds(struct cxl_dev_state *cxlds) +int cxl_enumerate_cmds(struct cxl_memdev_state *mds) { struct cxl_mbox_get_supported_logs *gsl; - struct device *dev = cxlds->dev; + struct device *dev = mds->cxlds.dev; struct cxl_mem_command *cmd; int i, rc; - gsl = cxl_get_gsl(cxlds); + gsl = cxl_get_gsl(mds); if (IS_ERR(gsl)) return PTR_ERR(gsl); @@ -765,19 +769,19 @@ int cxl_enumerate_cmds(struct cxl_dev_state *cxlds) goto out; } - rc = cxl_xfer_log(cxlds, &uuid, &size, log); + rc = cxl_xfer_log(mds, &uuid, &size, log); if (rc) { kvfree(log); goto out; } - cxl_walk_cel(cxlds, size, log); + cxl_walk_cel(mds, size, log); kvfree(log); /* In case CEL was bogus, enable some default commands. */ cxl_for_each_cmd(cmd) if (cmd->flags & CXL_CMD_FLAG_FORCE_ENABLE) - set_bit(cmd->info.id, cxlds->enabled_cmds); + set_bit(cmd->info.id, mds->enabled_cmds); /* Found the required CEL */ rc = 0; @@ -838,7 +842,7 @@ static void cxl_event_trace_record(const struct cxl_memdev *cxlmd, } } -static int cxl_clear_event_record(struct cxl_dev_state *cxlds, +static int cxl_clear_event_record(struct cxl_memdev_state *mds, enum cxl_event_log_type log, struct cxl_get_event_payload *get_pl) { @@ -852,9 +856,9 @@ static int cxl_clear_event_record(struct cxl_dev_state *cxlds, int i; /* Payload size may limit the max handles */ - if (pl_size > cxlds->payload_size) { - max_handles = (cxlds->payload_size - sizeof(*payload)) / - sizeof(__le16); + if (pl_size > mds->payload_size) { + max_handles = (mds->payload_size - sizeof(*payload)) / + sizeof(__le16); pl_size = struct_size(payload, handles, max_handles); } @@ -879,12 +883,12 @@ static int cxl_clear_event_record(struct cxl_dev_state *cxlds, i = 0; for (cnt = 0; cnt < total; cnt++) { payload->handles[i++] = get_pl->records[cnt].hdr.handle; - dev_dbg(cxlds->dev, "Event log '%d': Clearing %u\n", - log, le16_to_cpu(payload->handles[i])); + dev_dbg(mds->cxlds.dev, "Event log '%d': Clearing %u\n", log, + le16_to_cpu(payload->handles[i])); if (i == max_handles) { payload->nr_recs = i; - rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + rc = cxl_internal_send_cmd(mds, &mbox_cmd); if (rc) goto free_pl; i = 0; @@ -895,7 +899,7 @@ static int cxl_clear_event_record(struct cxl_dev_state *cxlds, if (i) { payload->nr_recs = i; mbox_cmd.size_in = struct_size(payload, handles, i); - rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + rc = cxl_internal_send_cmd(mds, &mbox_cmd); if (rc) goto free_pl; } @@ -905,32 +909,34 @@ free_pl: return rc; } -static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds, +static void cxl_mem_get_records_log(struct cxl_memdev_state *mds, enum cxl_event_log_type type) { + struct cxl_memdev *cxlmd = mds->cxlds.cxlmd; + struct device *dev = mds->cxlds.dev; struct cxl_get_event_payload *payload; struct cxl_mbox_cmd mbox_cmd; u8 log_type = type; u16 nr_rec; - mutex_lock(&cxlds->event.log_lock); - payload = cxlds->event.buf; + mutex_lock(&mds->event.log_lock); + payload = mds->event.buf; mbox_cmd = (struct cxl_mbox_cmd) { .opcode = CXL_MBOX_OP_GET_EVENT_RECORD, .payload_in = &log_type, .size_in = sizeof(log_type), .payload_out = payload, - .size_out = cxlds->payload_size, + .size_out = mds->payload_size, .min_out = struct_size(payload, records, 0), }; do { int rc, i; - rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + rc = cxl_internal_send_cmd(mds, &mbox_cmd); if (rc) { - dev_err_ratelimited(cxlds->dev, + dev_err_ratelimited(dev, "Event log '%d': Failed to query event records : %d", type, rc); break; @@ -941,27 +947,27 @@ static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds, break; for (i = 0; i < nr_rec; i++) - cxl_event_trace_record(cxlds->cxlmd, type, + cxl_event_trace_record(cxlmd, type, &payload->records[i]); if (payload->flags & CXL_GET_EVENT_FLAG_OVERFLOW) - trace_cxl_overflow(cxlds->cxlmd, type, payload); + trace_cxl_overflow(cxlmd, type, payload); - rc = cxl_clear_event_record(cxlds, type, payload); + rc = cxl_clear_event_record(mds, type, payload); if (rc) { - dev_err_ratelimited(cxlds->dev, + dev_err_ratelimited(dev, "Event log '%d': Failed to clear events : %d", type, rc); break; } } while (nr_rec); - mutex_unlock(&cxlds->event.log_lock); + mutex_unlock(&mds->event.log_lock); } /** * cxl_mem_get_event_records - Get Event Records from the device - * @cxlds: The device data for the operation + * @mds: The driver data for the operation * @status: Event Status register value identifying which events are available. * * Retrieve all event records available on the device, report them as trace @@ -970,24 +976,24 @@ static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds, * See CXL rev 3.0 @8.2.9.2.2 Get Event Records * See CXL rev 3.0 @8.2.9.2.3 Clear Event Records */ -void cxl_mem_get_event_records(struct cxl_dev_state *cxlds, u32 status) +void cxl_mem_get_event_records(struct cxl_memdev_state *mds, u32 status) { - dev_dbg(cxlds->dev, "Reading event logs: %x\n", status); + dev_dbg(mds->cxlds.dev, "Reading event logs: %x\n", status); if (status & CXLDEV_EVENT_STATUS_FATAL) - cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FATAL); + cxl_mem_get_records_log(mds, CXL_EVENT_TYPE_FATAL); if (status & CXLDEV_EVENT_STATUS_FAIL) - cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FAIL); + cxl_mem_get_records_log(mds, CXL_EVENT_TYPE_FAIL); if (status & CXLDEV_EVENT_STATUS_WARN) - cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_WARN); + cxl_mem_get_records_log(mds, CXL_EVENT_TYPE_WARN); if (status & CXLDEV_EVENT_STATUS_INFO) - cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_INFO); + cxl_mem_get_records_log(mds, CXL_EVENT_TYPE_INFO); } EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, CXL); /** * cxl_mem_get_partition_info - Get partition info - * @cxlds: The device data for the operation + * @mds: The driver data for the operation * * Retrieve the current partition info for the device specified. The active * values are the current capacity in bytes. If not 0, the 'next' values are @@ -997,7 +1003,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, CXL); * * See CXL @8.2.9.5.2.1 Get Partition Info */ -static int cxl_mem_get_partition_info(struct cxl_dev_state *cxlds) +static int cxl_mem_get_partition_info(struct cxl_memdev_state *mds) { struct cxl_mbox_get_partition_info pi; struct cxl_mbox_cmd mbox_cmd; @@ -1008,17 +1014,17 @@ static int cxl_mem_get_partition_info(struct cxl_dev_state *cxlds) .size_out = sizeof(pi), .payload_out = &pi, }; - rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + rc = cxl_internal_send_cmd(mds, &mbox_cmd); if (rc) return rc; - cxlds->active_volatile_bytes = + mds->active_volatile_bytes = le64_to_cpu(pi.active_volatile_cap) * CXL_CAPACITY_MULTIPLIER; - cxlds->active_persistent_bytes = + mds->active_persistent_bytes = le64_to_cpu(pi.active_persistent_cap) * CXL_CAPACITY_MULTIPLIER; - cxlds->next_volatile_bytes = + mds->next_volatile_bytes = le64_to_cpu(pi.next_volatile_cap) * CXL_CAPACITY_MULTIPLIER; - cxlds->next_persistent_bytes = + mds->next_persistent_bytes = le64_to_cpu(pi.next_volatile_cap) * CXL_CAPACITY_MULTIPLIER; return 0; @@ -1026,14 +1032,14 @@ static int cxl_mem_get_partition_info(struct cxl_dev_state *cxlds) /** * cxl_dev_state_identify() - Send the IDENTIFY command to the device. - * @cxlds: The device data for the operation + * @mds: The driver data for the operation * * Return: 0 if identify was executed successfully or media not ready. * * This will dispatch the identify command to the device and on success populate * structures to be exported to sysfs. */ -int cxl_dev_state_identify(struct cxl_dev_state *cxlds) +int cxl_dev_state_identify(struct cxl_memdev_state *mds) { /* See CXL 2.0 Table 175 Identify Memory Device Output Payload */ struct cxl_mbox_identify id; @@ -1041,7 +1047,7 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds) u32 val; int rc; - if (!cxlds->media_ready) + if (!mds->cxlds.media_ready) return 0; mbox_cmd = (struct cxl_mbox_cmd) { @@ -1049,31 +1055,92 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds) .size_out = sizeof(id), .payload_out = &id, }; - rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + rc = cxl_internal_send_cmd(mds, &mbox_cmd); if (rc < 0) return rc; - cxlds->total_bytes = + mds->total_bytes = le64_to_cpu(id.total_capacity) * CXL_CAPACITY_MULTIPLIER; - cxlds->volatile_only_bytes = + mds->volatile_only_bytes = le64_to_cpu(id.volatile_capacity) * CXL_CAPACITY_MULTIPLIER; - cxlds->persistent_only_bytes = + mds->persistent_only_bytes = le64_to_cpu(id.persistent_capacity) * CXL_CAPACITY_MULTIPLIER; - cxlds->partition_align_bytes = + mds->partition_align_bytes = le64_to_cpu(id.partition_align) * CXL_CAPACITY_MULTIPLIER; - cxlds->lsa_size = le32_to_cpu(id.lsa_size); - memcpy(cxlds->firmware_version, id.fw_revision, sizeof(id.fw_revision)); + mds->lsa_size = le32_to_cpu(id.lsa_size); + memcpy(mds->firmware_version, id.fw_revision, + sizeof(id.fw_revision)); - if (test_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds)) { + if (test_bit(CXL_POISON_ENABLED_LIST, mds->poison.enabled_cmds)) { val = get_unaligned_le24(id.poison_list_max_mer); - cxlds->poison.max_errors = min_t(u32, val, CXL_POISON_LIST_MAX); + mds->poison.max_errors = min_t(u32, val, CXL_POISON_LIST_MAX); } return 0; } EXPORT_SYMBOL_NS_GPL(cxl_dev_state_identify, CXL); +/** + * cxl_mem_sanitize() - Send a sanitization command to the device. + * @mds: The device data for the operation + * @cmd: The specific sanitization command opcode + * + * Return: 0 if the command was executed successfully, regardless of + * whether or not the actual security operation is done in the background, + * such as for the Sanitize case. + * Error return values can be the result of the mailbox command, -EINVAL + * when security requirements are not met or invalid contexts. + * + * See CXL 3.0 @8.2.9.8.5.1 Sanitize and @8.2.9.8.5.2 Secure Erase. + */ +int cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd) +{ + int rc; + u32 sec_out = 0; + struct cxl_get_security_output { + __le32 flags; + } out; + struct cxl_mbox_cmd sec_cmd = { + .opcode = CXL_MBOX_OP_GET_SECURITY_STATE, + .payload_out = &out, + .size_out = sizeof(out), + }; + struct cxl_mbox_cmd mbox_cmd = { .opcode = cmd }; + struct cxl_dev_state *cxlds = &mds->cxlds; + + if (cmd != CXL_MBOX_OP_SANITIZE && cmd != CXL_MBOX_OP_SECURE_ERASE) + return -EINVAL; + + rc = cxl_internal_send_cmd(mds, &sec_cmd); + if (rc < 0) { + dev_err(cxlds->dev, "Failed to get security state : %d", rc); + return rc; + } + + /* + * Prior to using these commands, any security applied to + * the user data areas of the device shall be DISABLED (or + * UNLOCKED for secure erase case). + */ + sec_out = le32_to_cpu(out.flags); + if (sec_out & CXL_PMEM_SEC_STATE_USER_PASS_SET) + return -EINVAL; + + if (cmd == CXL_MBOX_OP_SECURE_ERASE && + sec_out & CXL_PMEM_SEC_STATE_LOCKED) + return -EINVAL; + + rc = cxl_internal_send_cmd(mds, &mbox_cmd); + if (rc < 0) { + dev_err(cxlds->dev, "Failed to sanitize device : %d", rc); + return rc; + } + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_mem_sanitize, CXL); + static int add_dpa_res(struct device *dev, struct resource *parent, struct resource *res, resource_size_t start, resource_size_t size, const char *type) @@ -1100,8 +1167,9 @@ static int add_dpa_res(struct device *dev, struct resource *parent, return 0; } -int cxl_mem_create_range_info(struct cxl_dev_state *cxlds) +int cxl_mem_create_range_info(struct cxl_memdev_state *mds) { + struct cxl_dev_state *cxlds = &mds->cxlds; struct device *dev = cxlds->dev; int rc; @@ -1113,35 +1181,35 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds) } cxlds->dpa_res = - (struct resource)DEFINE_RES_MEM(0, cxlds->total_bytes); + (struct resource)DEFINE_RES_MEM(0, mds->total_bytes); - if (cxlds->partition_align_bytes == 0) { + if (mds->partition_align_bytes == 0) { rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0, - cxlds->volatile_only_bytes, "ram"); + mds->volatile_only_bytes, "ram"); if (rc) return rc; return add_dpa_res(dev, &cxlds->dpa_res, &cxlds->pmem_res, - cxlds->volatile_only_bytes, - cxlds->persistent_only_bytes, "pmem"); + mds->volatile_only_bytes, + mds->persistent_only_bytes, "pmem"); } - rc = cxl_mem_get_partition_info(cxlds); + rc = cxl_mem_get_partition_info(mds); if (rc) { dev_err(dev, "Failed to query partition information\n"); return rc; } rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0, - cxlds->active_volatile_bytes, "ram"); + mds->active_volatile_bytes, "ram"); if (rc) return rc; return add_dpa_res(dev, &cxlds->dpa_res, &cxlds->pmem_res, - cxlds->active_volatile_bytes, - cxlds->active_persistent_bytes, "pmem"); + mds->active_volatile_bytes, + mds->active_persistent_bytes, "pmem"); } EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, CXL); -int cxl_set_timestamp(struct cxl_dev_state *cxlds) +int cxl_set_timestamp(struct cxl_memdev_state *mds) { struct cxl_mbox_cmd mbox_cmd; struct cxl_mbox_set_timestamp_in pi; @@ -1154,7 +1222,7 @@ int cxl_set_timestamp(struct cxl_dev_state *cxlds) .payload_in = &pi, }; - rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + rc = cxl_internal_send_cmd(mds, &mbox_cmd); /* * Command is optional. Devices may have another way of providing * a timestamp, or may return all 0s in timestamp fields. @@ -1170,18 +1238,18 @@ EXPORT_SYMBOL_NS_GPL(cxl_set_timestamp, CXL); int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, struct cxl_region *cxlr) { - struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); struct cxl_mbox_poison_out *po; struct cxl_mbox_poison_in pi; struct cxl_mbox_cmd mbox_cmd; int nr_records = 0; int rc; - rc = mutex_lock_interruptible(&cxlds->poison.lock); + rc = mutex_lock_interruptible(&mds->poison.lock); if (rc) return rc; - po = cxlds->poison.list_out; + po = mds->poison.list_out; pi.offset = cpu_to_le64(offset); pi.length = cpu_to_le64(len / CXL_POISON_LEN_MULT); @@ -1189,13 +1257,13 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, .opcode = CXL_MBOX_OP_GET_POISON, .size_in = sizeof(pi), .payload_in = &pi, - .size_out = cxlds->payload_size, + .size_out = mds->payload_size, .payload_out = po, .min_out = struct_size(po, record, 0), }; do { - rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + rc = cxl_internal_send_cmd(mds, &mbox_cmd); if (rc) break; @@ -1206,14 +1274,14 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, /* Protect against an uncleared _FLAG_MORE */ nr_records = nr_records + le16_to_cpu(po->count); - if (nr_records >= cxlds->poison.max_errors) { + if (nr_records >= mds->poison.max_errors) { dev_dbg(&cxlmd->dev, "Max Error Records reached: %d\n", nr_records); break; } } while (po->flags & CXL_POISON_FLAG_MORE); - mutex_unlock(&cxlds->poison.lock); + mutex_unlock(&mds->poison.lock); return rc; } EXPORT_SYMBOL_NS_GPL(cxl_mem_get_poison, CXL); @@ -1223,52 +1291,53 @@ static void free_poison_buf(void *buf) kvfree(buf); } -/* Get Poison List output buffer is protected by cxlds->poison.lock */ -static int cxl_poison_alloc_buf(struct cxl_dev_state *cxlds) +/* Get Poison List output buffer is protected by mds->poison.lock */ +static int cxl_poison_alloc_buf(struct cxl_memdev_state *mds) { - cxlds->poison.list_out = kvmalloc(cxlds->payload_size, GFP_KERNEL); - if (!cxlds->poison.list_out) + mds->poison.list_out = kvmalloc(mds->payload_size, GFP_KERNEL); + if (!mds->poison.list_out) return -ENOMEM; - return devm_add_action_or_reset(cxlds->dev, free_poison_buf, - cxlds->poison.list_out); + return devm_add_action_or_reset(mds->cxlds.dev, free_poison_buf, + mds->poison.list_out); } -int cxl_poison_state_init(struct cxl_dev_state *cxlds) +int cxl_poison_state_init(struct cxl_memdev_state *mds) { int rc; - if (!test_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds)) + if (!test_bit(CXL_POISON_ENABLED_LIST, mds->poison.enabled_cmds)) return 0; - rc = cxl_poison_alloc_buf(cxlds); + rc = cxl_poison_alloc_buf(mds); if (rc) { - clear_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds); + clear_bit(CXL_POISON_ENABLED_LIST, mds->poison.enabled_cmds); return rc; } - mutex_init(&cxlds->poison.lock); + mutex_init(&mds->poison.lock); return 0; } EXPORT_SYMBOL_NS_GPL(cxl_poison_state_init, CXL); -struct cxl_dev_state *cxl_dev_state_create(struct device *dev) +struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) { - struct cxl_dev_state *cxlds; + struct cxl_memdev_state *mds; - cxlds = devm_kzalloc(dev, sizeof(*cxlds), GFP_KERNEL); - if (!cxlds) { + mds = devm_kzalloc(dev, sizeof(*mds), GFP_KERNEL); + if (!mds) { dev_err(dev, "No memory available\n"); return ERR_PTR(-ENOMEM); } - mutex_init(&cxlds->mbox_mutex); - mutex_init(&cxlds->event.log_lock); - cxlds->dev = dev; + mutex_init(&mds->mbox_mutex); + mutex_init(&mds->event.log_lock); + mds->cxlds.dev = dev; + mds->cxlds.type = CXL_DEVTYPE_CLASSMEM; - return cxlds; + return mds; } -EXPORT_SYMBOL_NS_GPL(cxl_dev_state_create, CXL); +EXPORT_SYMBOL_NS_GPL(cxl_memdev_state_create, CXL); void __init cxl_mbox_init(void) { diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 057a43267290..90237b9487a7 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2020 Intel Corporation. */ +#include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/firmware.h> #include <linux/device.h> #include <linux/slab.h> #include <linux/idr.h> @@ -39,8 +41,11 @@ static ssize_t firmware_version_show(struct device *dev, { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); - return sysfs_emit(buf, "%.16s\n", cxlds->firmware_version); + if (!mds) + return sysfs_emit(buf, "\n"); + return sysfs_emit(buf, "%.16s\n", mds->firmware_version); } static DEVICE_ATTR_RO(firmware_version); @@ -49,8 +54,11 @@ static ssize_t payload_max_show(struct device *dev, { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); - return sysfs_emit(buf, "%zu\n", cxlds->payload_size); + if (!mds) + return sysfs_emit(buf, "\n"); + return sysfs_emit(buf, "%zu\n", mds->payload_size); } static DEVICE_ATTR_RO(payload_max); @@ -59,8 +67,11 @@ static ssize_t label_storage_size_show(struct device *dev, { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); - return sysfs_emit(buf, "%zu\n", cxlds->lsa_size); + if (!mds) + return sysfs_emit(buf, "\n"); + return sysfs_emit(buf, "%zu\n", mds->lsa_size); } static DEVICE_ATTR_RO(label_storage_size); @@ -107,6 +118,89 @@ static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RO(numa_node); +static ssize_t security_state_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + u64 reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); + u32 pct = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK, reg); + u16 cmd = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg); + unsigned long state = mds->security.state; + + if (cmd == CXL_MBOX_OP_SANITIZE && pct != 100) + return sysfs_emit(buf, "sanitize\n"); + + if (!(state & CXL_PMEM_SEC_STATE_USER_PASS_SET)) + return sysfs_emit(buf, "disabled\n"); + if (state & CXL_PMEM_SEC_STATE_FROZEN || + state & CXL_PMEM_SEC_STATE_MASTER_PLIMIT || + state & CXL_PMEM_SEC_STATE_USER_PLIMIT) + return sysfs_emit(buf, "frozen\n"); + if (state & CXL_PMEM_SEC_STATE_LOCKED) + return sysfs_emit(buf, "locked\n"); + else + return sysfs_emit(buf, "unlocked\n"); +} +static struct device_attribute dev_attr_security_state = + __ATTR(state, 0444, security_state_show, NULL); + +static ssize_t security_sanitize_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + struct cxl_port *port = cxlmd->endpoint; + bool sanitize; + ssize_t rc; + + if (kstrtobool(buf, &sanitize) || !sanitize) + return -EINVAL; + + if (!port || !is_cxl_endpoint(port)) + return -EINVAL; + + /* ensure no regions are mapped to this memdev */ + if (port->commit_end != -1) + return -EBUSY; + + rc = cxl_mem_sanitize(mds, CXL_MBOX_OP_SANITIZE); + + return rc ? rc : len; +} +static struct device_attribute dev_attr_security_sanitize = + __ATTR(sanitize, 0200, NULL, security_sanitize_store); + +static ssize_t security_erase_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + struct cxl_port *port = cxlmd->endpoint; + ssize_t rc; + bool erase; + + if (kstrtobool(buf, &erase) || !erase) + return -EINVAL; + + if (!port || !is_cxl_endpoint(port)) + return -EINVAL; + + /* ensure no regions are mapped to this memdev */ + if (port->commit_end != -1) + return -EBUSY; + + rc = cxl_mem_sanitize(mds, CXL_MBOX_OP_SECURE_ERASE); + + return rc ? rc : len; +} +static struct device_attribute dev_attr_security_erase = + __ATTR(erase, 0200, NULL, security_erase_store); + static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd) { struct cxl_dev_state *cxlds = cxlmd->cxlds; @@ -140,7 +234,7 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd) struct cxl_port *port; int rc; - port = dev_get_drvdata(&cxlmd->dev); + port = cxlmd->endpoint; if (!port || !is_cxl_endpoint(port)) return -EINVAL; @@ -198,7 +292,7 @@ static struct cxl_region *cxl_dpa_to_region(struct cxl_memdev *cxlmd, u64 dpa) ctx = (struct cxl_dpa_to_region_context) { .dpa = dpa, }; - port = dev_get_drvdata(&cxlmd->dev); + port = cxlmd->endpoint; if (port && is_cxl_endpoint(port) && port->commit_end != -1) device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region); @@ -231,7 +325,7 @@ static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa) int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) { - struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); struct cxl_mbox_inject_poison inject; struct cxl_poison_record record; struct cxl_mbox_cmd mbox_cmd; @@ -255,13 +349,13 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) .size_in = sizeof(inject), .payload_in = &inject, }; - rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + rc = cxl_internal_send_cmd(mds, &mbox_cmd); if (rc) goto out; cxlr = cxl_dpa_to_region(cxlmd, dpa); if (cxlr) - dev_warn_once(cxlds->dev, + dev_warn_once(mds->cxlds.dev, "poison inject dpa:%#llx region: %s\n", dpa, dev_name(&cxlr->dev)); @@ -279,7 +373,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, CXL); int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) { - struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); struct cxl_mbox_clear_poison clear; struct cxl_poison_record record; struct cxl_mbox_cmd mbox_cmd; @@ -312,14 +406,15 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) .payload_in = &clear, }; - rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + rc = cxl_internal_send_cmd(mds, &mbox_cmd); if (rc) goto out; cxlr = cxl_dpa_to_region(cxlmd, dpa); if (cxlr) - dev_warn_once(cxlds->dev, "poison clear dpa:%#llx region: %s\n", - dpa, dev_name(&cxlr->dev)); + dev_warn_once(mds->cxlds.dev, + "poison clear dpa:%#llx region: %s\n", dpa, + dev_name(&cxlr->dev)); record = (struct cxl_poison_record) { .address = cpu_to_le64(dpa), @@ -352,6 +447,13 @@ static struct attribute *cxl_memdev_ram_attributes[] = { NULL, }; +static struct attribute *cxl_memdev_security_attributes[] = { + &dev_attr_security_state.attr, + &dev_attr_security_sanitize.attr, + &dev_attr_security_erase.attr, + NULL, +}; + static umode_t cxl_memdev_visible(struct kobject *kobj, struct attribute *a, int n) { @@ -375,10 +477,16 @@ static struct attribute_group cxl_memdev_pmem_attribute_group = { .attrs = cxl_memdev_pmem_attributes, }; +static struct attribute_group cxl_memdev_security_attribute_group = { + .name = "security", + .attrs = cxl_memdev_security_attributes, +}; + static const struct attribute_group *cxl_memdev_attribute_groups[] = { &cxl_memdev_attribute_group, &cxl_memdev_ram_attribute_group, &cxl_memdev_pmem_attribute_group, + &cxl_memdev_security_attribute_group, NULL, }; @@ -397,17 +505,18 @@ EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, CXL); /** * set_exclusive_cxl_commands() - atomically disable user cxl commands - * @cxlds: The device state to operate on + * @mds: The device state to operate on * @cmds: bitmap of commands to mark exclusive * * Grab the cxl_memdev_rwsem in write mode to flush in-flight * invocations of the ioctl path and then disable future execution of * commands with the command ids set in @cmds. */ -void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds) +void set_exclusive_cxl_commands(struct cxl_memdev_state *mds, + unsigned long *cmds) { down_write(&cxl_memdev_rwsem); - bitmap_or(cxlds->exclusive_cmds, cxlds->exclusive_cmds, cmds, + bitmap_or(mds->exclusive_cmds, mds->exclusive_cmds, cmds, CXL_MEM_COMMAND_ID_MAX); up_write(&cxl_memdev_rwsem); } @@ -415,23 +524,34 @@ EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, CXL); /** * clear_exclusive_cxl_commands() - atomically enable user cxl commands - * @cxlds: The device state to modify + * @mds: The device state to modify * @cmds: bitmap of commands to mark available for userspace */ -void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds) +void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds, + unsigned long *cmds) { down_write(&cxl_memdev_rwsem); - bitmap_andnot(cxlds->exclusive_cmds, cxlds->exclusive_cmds, cmds, + bitmap_andnot(mds->exclusive_cmds, mds->exclusive_cmds, cmds, CXL_MEM_COMMAND_ID_MAX); up_write(&cxl_memdev_rwsem); } EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, CXL); +static void cxl_memdev_security_shutdown(struct device *dev) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + + if (mds->security.poll) + cancel_delayed_work_sync(&mds->security.poll_dwork); +} + static void cxl_memdev_shutdown(struct device *dev) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); down_write(&cxl_memdev_rwsem); + cxl_memdev_security_shutdown(dev); cxlmd->cxlds = NULL; up_write(&cxl_memdev_rwsem); } @@ -511,10 +631,12 @@ static long cxl_memdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct cxl_memdev *cxlmd = file->private_data; + struct cxl_dev_state *cxlds; int rc = -ENXIO; down_read(&cxl_memdev_rwsem); - if (cxlmd->cxlds) + cxlds = cxlmd->cxlds; + if (cxlds && cxlds->type == CXL_DEVTYPE_CLASSMEM) rc = __cxl_memdev_ioctl(cxlmd, cmd, arg); up_read(&cxl_memdev_rwsem); @@ -542,6 +664,316 @@ static int cxl_memdev_release_file(struct inode *inode, struct file *file) return 0; } +/** + * cxl_mem_get_fw_info - Get Firmware info + * @cxlds: The device data for the operation + * + * Retrieve firmware info for the device specified. + * + * Return: 0 if no error: or the result of the mailbox command. + * + * See CXL-3.0 8.2.9.3.1 Get FW Info + */ +static int cxl_mem_get_fw_info(struct cxl_memdev_state *mds) +{ + struct cxl_mbox_get_fw_info info; + struct cxl_mbox_cmd mbox_cmd; + int rc; + + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_GET_FW_INFO, + .size_out = sizeof(info), + .payload_out = &info, + }; + + rc = cxl_internal_send_cmd(mds, &mbox_cmd); + if (rc < 0) + return rc; + + mds->fw.num_slots = info.num_slots; + mds->fw.cur_slot = FIELD_GET(CXL_FW_INFO_SLOT_INFO_CUR_MASK, + info.slot_info); + + return 0; +} + +/** + * cxl_mem_activate_fw - Activate Firmware + * @mds: The device data for the operation + * @slot: slot number to activate + * + * Activate firmware in a given slot for the device specified. + * + * Return: 0 if no error: or the result of the mailbox command. + * + * See CXL-3.0 8.2.9.3.3 Activate FW + */ +static int cxl_mem_activate_fw(struct cxl_memdev_state *mds, int slot) +{ + struct cxl_mbox_activate_fw activate; + struct cxl_mbox_cmd mbox_cmd; + + if (slot == 0 || slot > mds->fw.num_slots) + return -EINVAL; + + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_ACTIVATE_FW, + .size_in = sizeof(activate), + .payload_in = &activate, + }; + + /* Only offline activation supported for now */ + activate.action = CXL_FW_ACTIVATE_OFFLINE; + activate.slot = slot; + + return cxl_internal_send_cmd(mds, &mbox_cmd); +} + +/** + * cxl_mem_abort_fw_xfer - Abort an in-progress FW transfer + * @mds: The device data for the operation + * + * Abort an in-progress firmware transfer for the device specified. + * + * Return: 0 if no error: or the result of the mailbox command. + * + * See CXL-3.0 8.2.9.3.2 Transfer FW + */ +static int cxl_mem_abort_fw_xfer(struct cxl_memdev_state *mds) +{ + struct cxl_mbox_transfer_fw *transfer; + struct cxl_mbox_cmd mbox_cmd; + int rc; + + transfer = kzalloc(struct_size(transfer, data, 0), GFP_KERNEL); + if (!transfer) + return -ENOMEM; + + /* Set a 1s poll interval and a total wait time of 30s */ + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_TRANSFER_FW, + .size_in = sizeof(*transfer), + .payload_in = transfer, + .poll_interval_ms = 1000, + .poll_count = 30, + }; + + transfer->action = CXL_FW_TRANSFER_ACTION_ABORT; + + rc = cxl_internal_send_cmd(mds, &mbox_cmd); + kfree(transfer); + return rc; +} + +static void cxl_fw_cleanup(struct fw_upload *fwl) +{ + struct cxl_memdev_state *mds = fwl->dd_handle; + + mds->fw.next_slot = 0; +} + +static int cxl_fw_do_cancel(struct fw_upload *fwl) +{ + struct cxl_memdev_state *mds = fwl->dd_handle; + struct cxl_dev_state *cxlds = &mds->cxlds; + struct cxl_memdev *cxlmd = cxlds->cxlmd; + int rc; + + rc = cxl_mem_abort_fw_xfer(mds); + if (rc < 0) + dev_err(&cxlmd->dev, "Error aborting FW transfer: %d\n", rc); + + return FW_UPLOAD_ERR_CANCELED; +} + +static enum fw_upload_err cxl_fw_prepare(struct fw_upload *fwl, const u8 *data, + u32 size) +{ + struct cxl_memdev_state *mds = fwl->dd_handle; + struct cxl_mbox_transfer_fw *transfer; + + if (!size) + return FW_UPLOAD_ERR_INVALID_SIZE; + + mds->fw.oneshot = struct_size(transfer, data, size) < + mds->payload_size; + + if (cxl_mem_get_fw_info(mds)) + return FW_UPLOAD_ERR_HW_ERROR; + + /* + * So far no state has been changed, hence no other cleanup is + * necessary. Simply return the cancelled status. + */ + if (test_and_clear_bit(CXL_FW_CANCEL, mds->fw.state)) + return FW_UPLOAD_ERR_CANCELED; + + return FW_UPLOAD_ERR_NONE; +} + +static enum fw_upload_err cxl_fw_write(struct fw_upload *fwl, const u8 *data, + u32 offset, u32 size, u32 *written) +{ + struct cxl_memdev_state *mds = fwl->dd_handle; + struct cxl_dev_state *cxlds = &mds->cxlds; + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct cxl_mbox_transfer_fw *transfer; + struct cxl_mbox_cmd mbox_cmd; + u32 cur_size, remaining; + size_t size_in; + int rc; + + *written = 0; + + /* Offset has to be aligned to 128B (CXL-3.0 8.2.9.3.2 Table 8-57) */ + if (!IS_ALIGNED(offset, CXL_FW_TRANSFER_ALIGNMENT)) { + dev_err(&cxlmd->dev, + "misaligned offset for FW transfer slice (%u)\n", + offset); + return FW_UPLOAD_ERR_RW_ERROR; + } + + /* + * Pick transfer size based on mds->payload_size @size must bw 128-byte + * aligned, ->payload_size is a power of 2 starting at 256 bytes, and + * sizeof(*transfer) is 128. These constraints imply that @cur_size + * will always be 128b aligned. + */ + cur_size = min_t(size_t, size, mds->payload_size - sizeof(*transfer)); + + remaining = size - cur_size; + size_in = struct_size(transfer, data, cur_size); + + if (test_and_clear_bit(CXL_FW_CANCEL, mds->fw.state)) + return cxl_fw_do_cancel(fwl); + + /* + * Slot numbers are 1-indexed + * cur_slot is the 0-indexed next_slot (i.e. 'cur_slot - 1 + 1') + * Check for rollover using modulo, and 1-index it by adding 1 + */ + mds->fw.next_slot = (mds->fw.cur_slot % mds->fw.num_slots) + 1; + + /* Do the transfer via mailbox cmd */ + transfer = kzalloc(size_in, GFP_KERNEL); + if (!transfer) + return FW_UPLOAD_ERR_RW_ERROR; + + transfer->offset = cpu_to_le32(offset / CXL_FW_TRANSFER_ALIGNMENT); + memcpy(transfer->data, data + offset, cur_size); + if (mds->fw.oneshot) { + transfer->action = CXL_FW_TRANSFER_ACTION_FULL; + transfer->slot = mds->fw.next_slot; + } else { + if (offset == 0) { + transfer->action = CXL_FW_TRANSFER_ACTION_INITIATE; + } else if (remaining == 0) { + transfer->action = CXL_FW_TRANSFER_ACTION_END; + transfer->slot = mds->fw.next_slot; + } else { + transfer->action = CXL_FW_TRANSFER_ACTION_CONTINUE; + } + } + + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_TRANSFER_FW, + .size_in = size_in, + .payload_in = transfer, + .poll_interval_ms = 1000, + .poll_count = 30, + }; + + rc = cxl_internal_send_cmd(mds, &mbox_cmd); + if (rc < 0) { + rc = FW_UPLOAD_ERR_RW_ERROR; + goto out_free; + } + + *written = cur_size; + + /* Activate FW if oneshot or if the last slice was written */ + if (mds->fw.oneshot || remaining == 0) { + dev_dbg(&cxlmd->dev, "Activating firmware slot: %d\n", + mds->fw.next_slot); + rc = cxl_mem_activate_fw(mds, mds->fw.next_slot); + if (rc < 0) { + dev_err(&cxlmd->dev, "Error activating firmware: %d\n", + rc); + rc = FW_UPLOAD_ERR_HW_ERROR; + goto out_free; + } + } + + rc = FW_UPLOAD_ERR_NONE; + +out_free: + kfree(transfer); + return rc; +} + +static enum fw_upload_err cxl_fw_poll_complete(struct fw_upload *fwl) +{ + struct cxl_memdev_state *mds = fwl->dd_handle; + + /* + * cxl_internal_send_cmd() handles background operations synchronously. + * No need to wait for completions here - any errors would've been + * reported and handled during the ->write() call(s). + * Just check if a cancel request was received, and return success. + */ + if (test_and_clear_bit(CXL_FW_CANCEL, mds->fw.state)) + return cxl_fw_do_cancel(fwl); + + return FW_UPLOAD_ERR_NONE; +} + +static void cxl_fw_cancel(struct fw_upload *fwl) +{ + struct cxl_memdev_state *mds = fwl->dd_handle; + + set_bit(CXL_FW_CANCEL, mds->fw.state); +} + +static const struct fw_upload_ops cxl_memdev_fw_ops = { + .prepare = cxl_fw_prepare, + .write = cxl_fw_write, + .poll_complete = cxl_fw_poll_complete, + .cancel = cxl_fw_cancel, + .cleanup = cxl_fw_cleanup, +}; + +static void devm_cxl_remove_fw_upload(void *fwl) +{ + firmware_upload_unregister(fwl); +} + +int cxl_memdev_setup_fw_upload(struct cxl_memdev_state *mds) +{ + struct cxl_dev_state *cxlds = &mds->cxlds; + struct device *dev = &cxlds->cxlmd->dev; + struct fw_upload *fwl; + int rc; + + if (!test_bit(CXL_MEM_COMMAND_ID_GET_FW_INFO, mds->enabled_cmds)) + return 0; + + fwl = firmware_upload_register(THIS_MODULE, dev, dev_name(dev), + &cxl_memdev_fw_ops, mds); + if (IS_ERR(fwl)) + return dev_err_probe(dev, PTR_ERR(fwl), + "Failed to register firmware loader\n"); + + rc = devm_add_action_or_reset(cxlds->dev, devm_cxl_remove_fw_upload, + fwl); + if (rc) + dev_err(dev, + "Failed to add firmware loader remove action: %d\n", + rc); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(cxl_memdev_setup_fw_upload, CXL); + static const struct file_operations cxl_memdev_fops = { .owner = THIS_MODULE, .unlocked_ioctl = cxl_memdev_ioctl, @@ -551,6 +983,35 @@ static const struct file_operations cxl_memdev_fops = { .llseek = noop_llseek, }; +static void put_sanitize(void *data) +{ + struct cxl_memdev_state *mds = data; + + sysfs_put(mds->security.sanitize_node); +} + +static int cxl_memdev_security_init(struct cxl_memdev *cxlmd) +{ + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + struct device *dev = &cxlmd->dev; + struct kernfs_node *sec; + + sec = sysfs_get_dirent(dev->kobj.sd, "security"); + if (!sec) { + dev_err(dev, "sysfs_get_dirent 'security' failed\n"); + return -ENODEV; + } + mds->security.sanitize_node = sysfs_get_dirent(sec, "state"); + sysfs_put(sec); + if (!mds->security.sanitize_node) { + dev_err(dev, "sysfs_get_dirent 'state' failed\n"); + return -ENODEV; + } + + return devm_add_action_or_reset(cxlds->dev, put_sanitize, mds); + } + struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds) { struct cxl_memdev *cxlmd; @@ -579,6 +1040,10 @@ struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds) if (rc) goto err; + rc = cxl_memdev_security_init(cxlmd); + if (rc) + goto err; + rc = devm_add_action_or_reset(cxlds->dev, cxl_memdev_unregister, cxlmd); if (rc) return ERR_PTR(rc); diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 375f01c6cad6..c7a7887ebdcf 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -308,36 +308,17 @@ static void disable_hdm(void *_cxlhdm) hdm + CXL_HDM_DECODER_CTRL_OFFSET); } -int devm_cxl_enable_hdm(struct cxl_port *port, struct cxl_hdm *cxlhdm) +static int devm_cxl_enable_hdm(struct device *host, struct cxl_hdm *cxlhdm) { - void __iomem *hdm; + void __iomem *hdm = cxlhdm->regs.hdm_decoder; u32 global_ctrl; - /* - * If the hdm capability was not mapped there is nothing to enable and - * the caller is responsible for what happens next. For example, - * emulate a passthrough decoder. - */ - if (IS_ERR(cxlhdm)) - return 0; - - hdm = cxlhdm->regs.hdm_decoder; global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET); - - /* - * If the HDM decoder capability was enabled on entry, skip - * registering disable_hdm() since this decode capability may be - * owned by platform firmware. - */ - if (global_ctrl & CXL_HDM_DECODER_ENABLE) - return 0; - writel(global_ctrl | CXL_HDM_DECODER_ENABLE, hdm + CXL_HDM_DECODER_CTRL_OFFSET); - return devm_add_action_or_reset(&port->dev, disable_hdm, cxlhdm); + return devm_add_action_or_reset(host, disable_hdm, cxlhdm); } -EXPORT_SYMBOL_NS_GPL(devm_cxl_enable_hdm, CXL); int cxl_dvsec_rr_decode(struct device *dev, int d, struct cxl_endpoint_dvsec_info *info) @@ -511,7 +492,7 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, if (info->mem_enabled) return 0; - rc = devm_cxl_enable_hdm(port, cxlhdm); + rc = devm_cxl_enable_hdm(&port->dev, cxlhdm); if (rc) return rc; diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index f8c38d997252..fc94f5240327 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -64,7 +64,7 @@ static int match_nvdimm_bridge(struct device *dev, void *data) struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd) { - struct cxl_port *port = find_cxl_root(dev_get_drvdata(&cxlmd->dev)); + struct cxl_port *port = find_cxl_root(cxlmd->endpoint); struct device *dev; if (!port) diff --git a/drivers/cxl/core/pmu.c b/drivers/cxl/core/pmu.c new file mode 100644 index 000000000000..7684c843e5a5 --- /dev/null +++ b/drivers/cxl/core/pmu.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2023 Huawei. All rights reserved. */ + +#include <linux/device.h> +#include <linux/slab.h> +#include <linux/idr.h> +#include <cxlmem.h> +#include <pmu.h> +#include <cxl.h> +#include "core.h" + +static void cxl_pmu_release(struct device *dev) +{ + struct cxl_pmu *pmu = to_cxl_pmu(dev); + + kfree(pmu); +} + +const struct device_type cxl_pmu_type = { + .name = "cxl_pmu", + .release = cxl_pmu_release, +}; + +static void remove_dev(void *dev) +{ + device_del(dev); +} + +int devm_cxl_pmu_add(struct device *parent, struct cxl_pmu_regs *regs, + int assoc_id, int index, enum cxl_pmu_type type) +{ + struct cxl_pmu *pmu; + struct device *dev; + int rc; + + pmu = kzalloc(sizeof(*pmu), GFP_KERNEL); + if (!pmu) + return -ENOMEM; + + pmu->assoc_id = assoc_id; + pmu->index = index; + pmu->type = type; + pmu->base = regs->pmu; + dev = &pmu->dev; + device_initialize(dev); + device_set_pm_not_required(dev); + dev->parent = parent; + dev->bus = &cxl_bus_type; + dev->type = &cxl_pmu_type; + switch (pmu->type) { + case CXL_PMU_MEMDEV: + rc = dev_set_name(dev, "pmu_mem%d.%d", assoc_id, index); + break; + } + if (rc) + goto err; + + rc = device_add(dev); + if (rc) + goto err; + + return devm_add_action_or_reset(parent, remove_dev, dev); + +err: + put_device(&pmu->dev); + return rc; +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_pmu_add, CXL); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index cbd3d17f6410..724be8448eb4 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -56,6 +56,8 @@ static int cxl_device_id(const struct device *dev) return CXL_DEVICE_MEMORY_EXPANDER; if (dev->type == CXL_REGION_TYPE()) return CXL_DEVICE_REGION; + if (dev->type == &cxl_pmu_type) + return CXL_DEVICE_PMU; return 0; } @@ -117,9 +119,9 @@ static ssize_t target_type_show(struct device *dev, struct cxl_decoder *cxld = to_cxl_decoder(dev); switch (cxld->target_type) { - case CXL_DECODER_ACCELERATOR: + case CXL_DECODER_DEVMEM: return sysfs_emit(buf, "accelerator\n"); - case CXL_DECODER_EXPANDER: + case CXL_DECODER_HOSTONLYMEM: return sysfs_emit(buf, "expander\n"); } return -ENXIO; @@ -1213,7 +1215,7 @@ static struct device *grandparent(struct device *dev) static void delete_endpoint(void *data) { struct cxl_memdev *cxlmd = data; - struct cxl_port *endpoint = dev_get_drvdata(&cxlmd->dev); + struct cxl_port *endpoint = cxlmd->endpoint; struct cxl_port *parent_port; struct device *parent; @@ -1228,6 +1230,7 @@ static void delete_endpoint(void *data) devm_release_action(parent, cxl_unlink_uport, endpoint); devm_release_action(parent, unregister_port, endpoint); } + cxlmd->endpoint = NULL; device_unlock(parent); put_device(parent); out: @@ -1239,7 +1242,7 @@ int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint) struct device *dev = &cxlmd->dev; get_device(&endpoint->dev); - dev_set_drvdata(dev, endpoint); + cxlmd->endpoint = endpoint; cxlmd->depth = endpoint->depth; return devm_add_action_or_reset(dev, delete_endpoint, cxlmd); } @@ -1610,7 +1613,7 @@ static int cxl_decoder_init(struct cxl_port *port, struct cxl_decoder *cxld) /* Pre initialize an "empty" decoder */ cxld->interleave_ways = 1; cxld->interleave_granularity = PAGE_SIZE; - cxld->target_type = CXL_DECODER_EXPANDER; + cxld->target_type = CXL_DECODER_HOSTONLYMEM; cxld->hpa_range = (struct range) { .start = 0, .end = -1, diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 39825e5301d0..e115ba382e04 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -125,10 +125,38 @@ static struct cxl_region_ref *cxl_rr_load(struct cxl_port *port, return xa_load(&port->regions, (unsigned long)cxlr); } +static int cxl_region_invalidate_memregion(struct cxl_region *cxlr) +{ + if (!cpu_cache_has_invalidate_memregion()) { + if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) { + dev_warn_once( + &cxlr->dev, + "Bypassing cpu_cache_invalidate_memregion() for testing!\n"); + return 0; + } else { + dev_err(&cxlr->dev, + "Failed to synchronize CPU cache state\n"); + return -ENXIO; + } + } + + cpu_cache_invalidate_memregion(IORES_DESC_CXL); + return 0; +} + static int cxl_region_decode_reset(struct cxl_region *cxlr, int count) { struct cxl_region_params *p = &cxlr->params; - int i; + int i, rc = 0; + + /* + * Before region teardown attempt to flush, and if the flush + * fails cancel the region teardown for data consistency + * concerns + */ + rc = cxl_region_invalidate_memregion(cxlr); + if (rc) + return rc; for (i = count - 1; i >= 0; i--) { struct cxl_endpoint_decoder *cxled = p->targets[i]; @@ -136,7 +164,6 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count) struct cxl_port *iter = cxled_to_port(cxled); struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_ep *ep; - int rc = 0; if (cxlds->rcd) goto endpoint_reset; @@ -155,14 +182,19 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count) rc = cxld->reset(cxld); if (rc) return rc; + set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags); } endpoint_reset: rc = cxled->cxld.reset(&cxled->cxld); if (rc) return rc; + set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags); } + /* all decoders associated with this region have been torn down */ + clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags); + return 0; } @@ -256,9 +288,19 @@ static ssize_t commit_store(struct device *dev, struct device_attribute *attr, goto out; } - if (commit) + /* + * Invalidate caches before region setup to drop any speculative + * consumption of this address space + */ + rc = cxl_region_invalidate_memregion(cxlr); + if (rc) + return rc; + + if (commit) { rc = cxl_region_decode_commit(cxlr); - else { + if (rc == 0) + p->state = CXL_CONFIG_COMMIT; + } else { p->state = CXL_CONFIG_RESET_PENDING; up_write(&cxl_region_rwsem); device_release_driver(&cxlr->dev); @@ -268,18 +310,20 @@ static ssize_t commit_store(struct device *dev, struct device_attribute *attr, * The lock was dropped, so need to revalidate that the reset is * still pending. */ - if (p->state == CXL_CONFIG_RESET_PENDING) + if (p->state == CXL_CONFIG_RESET_PENDING) { rc = cxl_region_decode_reset(cxlr, p->interleave_ways); + /* + * Revert to committed since there may still be active + * decoders associated with this region, or move forward + * to active to mark the reset successful + */ + if (rc) + p->state = CXL_CONFIG_COMMIT; + else + p->state = CXL_CONFIG_ACTIVE; + } } - if (rc) - goto out; - - if (commit) - p->state = CXL_CONFIG_COMMIT; - else if (p->state == CXL_CONFIG_RESET_PENDING) - p->state = CXL_CONFIG_ACTIVE; - out: up_write(&cxl_region_rwsem); @@ -809,6 +853,18 @@ static int cxl_rr_alloc_decoder(struct cxl_port *port, struct cxl_region *cxlr, return -EBUSY; } + /* + * Endpoints should already match the region type, but backstop that + * assumption with an assertion. Switch-decoders change mapping-type + * based on what is mapped when they are assigned to a region. + */ + dev_WARN_ONCE(&cxlr->dev, + port == cxled_to_port(cxled) && + cxld->target_type != cxlr->type, + "%s:%s mismatch decoder type %d -> %d\n", + dev_name(&cxled_to_memdev(cxled)->dev), + dev_name(&cxld->dev), cxld->target_type, cxlr->type); + cxld->target_type = cxlr->type; cxl_rr->decoder = cxld; return 0; } @@ -1675,7 +1731,6 @@ static int cxl_region_attach(struct cxl_region *cxlr, if (rc) goto err_decrement; p->state = CXL_CONFIG_ACTIVE; - set_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags); } cxled->cxld.interleave_ways = p->interleave_ways; @@ -2104,7 +2159,7 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, return ERR_PTR(-EBUSY); } - return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_EXPANDER); + return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_HOSTONLYMEM); } static ssize_t create_pmem_region_store(struct device *dev, @@ -2805,30 +2860,6 @@ out: } EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, CXL); -static int cxl_region_invalidate_memregion(struct cxl_region *cxlr) -{ - if (!test_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags)) - return 0; - - if (!cpu_cache_has_invalidate_memregion()) { - if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) { - dev_warn_once( - &cxlr->dev, - "Bypassing cpu_cache_invalidate_memregion() for testing!\n"); - clear_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags); - return 0; - } else { - dev_err(&cxlr->dev, - "Failed to synchronize CPU cache state\n"); - return -ENXIO; - } - } - - cpu_cache_invalidate_memregion(IORES_DESC_CXL); - clear_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags); - return 0; -} - static int is_system_ram(struct resource *res, void *arg) { struct cxl_region *cxlr = arg; @@ -2856,7 +2887,12 @@ static int cxl_region_probe(struct device *dev) goto out; } - rc = cxl_region_invalidate_memregion(cxlr); + if (test_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags)) { + dev_err(&cxlr->dev, + "failed to activate, re-commit region and retry\n"); + rc = -ENXIO; + goto out; + } /* * From this point on any path that changes the region's state away from diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index e68848075bb6..6281127b3e9d 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -6,6 +6,7 @@ #include <linux/pci.h> #include <cxlmem.h> #include <cxlpci.h> +#include <pmu.h> #include "core.h" @@ -199,13 +200,13 @@ void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr, return ret_val; } -int cxl_map_component_regs(struct cxl_register_map *map, +int cxl_map_component_regs(const struct cxl_register_map *map, struct cxl_component_regs *regs, unsigned long map_mask) { struct device *dev = map->dev; struct mapinfo { - struct cxl_reg_map *rmap; + const struct cxl_reg_map *rmap; void __iomem **addr; } mapinfo[] = { { &map->component_map.hdm_decoder, ®s->hdm_decoder }, @@ -233,13 +234,13 @@ int cxl_map_component_regs(struct cxl_register_map *map, } EXPORT_SYMBOL_NS_GPL(cxl_map_component_regs, CXL); -int cxl_map_device_regs(struct cxl_register_map *map, +int cxl_map_device_regs(const struct cxl_register_map *map, struct cxl_device_regs *regs) { struct device *dev = map->dev; resource_size_t phys_addr = map->resource; struct mapinfo { - struct cxl_reg_map *rmap; + const struct cxl_reg_map *rmap; void __iomem **addr; } mapinfo[] = { { &map->device_map.status, ®s->status, }, @@ -288,20 +289,23 @@ static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi, } /** - * cxl_find_regblock() - Locate register blocks by type + * cxl_find_regblock_instance() - Locate a register block by type / index * @pdev: The CXL PCI device to enumerate. * @type: Register Block Indicator id * @map: Enumeration output, clobbered on error + * @index: Index into which particular instance of a regblock wanted in the + * order found in register locator DVSEC. * * Return: 0 if register block enumerated, negative error code otherwise * * A CXL DVSEC may point to one or more register blocks, search for them - * by @type. + * by @type and @index. */ -int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, - struct cxl_register_map *map) +int cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map, int index) { u32 regloc_size, regblocks; + int instance = 0; int regloc, i; *map = (struct cxl_register_map) { @@ -329,15 +333,74 @@ int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, if (!cxl_decode_regblock(pdev, reg_lo, reg_hi, map)) continue; - if (map->reg_type == type) - return 0; + if (map->reg_type == type) { + if (index == instance) + return 0; + instance++; + } } map->resource = CXL_RESOURCE_NONE; return -ENODEV; } +EXPORT_SYMBOL_NS_GPL(cxl_find_regblock_instance, CXL); + +/** + * cxl_find_regblock() - Locate register blocks by type + * @pdev: The CXL PCI device to enumerate. + * @type: Register Block Indicator id + * @map: Enumeration output, clobbered on error + * + * Return: 0 if register block enumerated, negative error code otherwise + * + * A CXL DVSEC may point to one or more register blocks, search for them + * by @type. + */ +int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map) +{ + return cxl_find_regblock_instance(pdev, type, map, 0); +} EXPORT_SYMBOL_NS_GPL(cxl_find_regblock, CXL); +/** + * cxl_count_regblock() - Count instances of a given regblock type. + * @pdev: The CXL PCI device to enumerate. + * @type: Register Block Indicator id + * + * Some regblocks may be repeated. Count how many instances. + * + * Return: count of matching regblocks. + */ +int cxl_count_regblock(struct pci_dev *pdev, enum cxl_regloc_type type) +{ + struct cxl_register_map map; + int rc, count = 0; + + while (1) { + rc = cxl_find_regblock_instance(pdev, type, &map, count); + if (rc) + return count; + count++; + } +} +EXPORT_SYMBOL_NS_GPL(cxl_count_regblock, CXL); + +int cxl_map_pmu_regs(struct pci_dev *pdev, struct cxl_pmu_regs *regs, + struct cxl_register_map *map) +{ + struct device *dev = &pdev->dev; + resource_size_t phys_addr; + + phys_addr = map->resource; + regs->pmu = devm_cxl_iomap_block(dev, phys_addr, CXL_PMU_REGMAP_SIZE); + if (!regs->pmu) + return -ENOMEM; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_map_pmu_regs, CXL); + static int cxl_map_regblock(struct cxl_register_map *map) { struct device *dev = map->dev; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index b1adca9b27ba..76d92561af29 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -56,7 +56,7 @@ #define CXL_HDM_DECODER0_CTRL_COMMIT BIT(9) #define CXL_HDM_DECODER0_CTRL_COMMITTED BIT(10) #define CXL_HDM_DECODER0_CTRL_COMMIT_ERROR BIT(11) -#define CXL_HDM_DECODER0_CTRL_TYPE BIT(12) +#define CXL_HDM_DECODER0_CTRL_HOSTONLY BIT(12) #define CXL_HDM_DECODER0_TL_LOW(i) (0x20 * (i) + 0x24) #define CXL_HDM_DECODER0_TL_HIGH(i) (0x20 * (i) + 0x28) #define CXL_HDM_DECODER0_SKIP_LOW(i) CXL_HDM_DECODER0_TL_LOW(i) @@ -176,14 +176,22 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw) /* CXL 2.0 8.2.8.4 Mailbox Registers */ #define CXLDEV_MBOX_CAPS_OFFSET 0x00 #define CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK GENMASK(4, 0) +#define CXLDEV_MBOX_CAP_BG_CMD_IRQ BIT(6) +#define CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK GENMASK(10, 7) #define CXLDEV_MBOX_CTRL_OFFSET 0x04 #define CXLDEV_MBOX_CTRL_DOORBELL BIT(0) +#define CXLDEV_MBOX_CTRL_BG_CMD_IRQ BIT(2) #define CXLDEV_MBOX_CMD_OFFSET 0x08 #define CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK GENMASK_ULL(15, 0) #define CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK GENMASK_ULL(36, 16) #define CXLDEV_MBOX_STATUS_OFFSET 0x10 +#define CXLDEV_MBOX_STATUS_BG_CMD BIT(0) #define CXLDEV_MBOX_STATUS_RET_CODE_MASK GENMASK_ULL(47, 32) #define CXLDEV_MBOX_BG_CMD_STATUS_OFFSET 0x18 +#define CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK GENMASK_ULL(15, 0) +#define CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK GENMASK_ULL(22, 16) +#define CXLDEV_MBOX_BG_CMD_COMMAND_RC_MASK GENMASK_ULL(47, 32) +#define CXLDEV_MBOX_BG_CMD_COMMAND_VENDOR_MASK GENMASK_ULL(63, 48) #define CXLDEV_MBOX_PAYLOAD_OFFSET 0x20 /* @@ -209,6 +217,10 @@ struct cxl_regs { struct_group_tagged(cxl_device_regs, device_regs, void __iomem *status, *mbox, *memdev; ); + + struct_group_tagged(cxl_pmu_regs, pmu_regs, + void __iomem *pmu; + ); }; struct cxl_reg_map { @@ -229,6 +241,10 @@ struct cxl_device_reg_map { struct cxl_reg_map memdev; }; +struct cxl_pmu_reg_map { + struct cxl_reg_map pmu; +}; + /** * struct cxl_register_map - DVSEC harvested register block mapping parameters * @dev: device for devm operations and logging @@ -238,6 +254,7 @@ struct cxl_device_reg_map { * @reg_type: see enum cxl_regloc_type * @component_map: cxl_reg_map for component registers * @device_map: cxl_reg_maps for device registers + * @pmu_map: cxl_reg_maps for CXL Performance Monitoring Units */ struct cxl_register_map { struct device *dev; @@ -248,6 +265,7 @@ struct cxl_register_map { union { struct cxl_component_reg_map component_map; struct cxl_device_reg_map device_map; + struct cxl_pmu_reg_map pmu_map; }; }; @@ -255,13 +273,18 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base, struct cxl_component_reg_map *map); void cxl_probe_device_regs(struct device *dev, void __iomem *base, struct cxl_device_reg_map *map); -int cxl_map_component_regs(struct cxl_register_map *map, +int cxl_map_component_regs(const struct cxl_register_map *map, struct cxl_component_regs *regs, unsigned long map_mask); -int cxl_map_device_regs(struct cxl_register_map *map, +int cxl_map_device_regs(const struct cxl_register_map *map, struct cxl_device_regs *regs); +int cxl_map_pmu_regs(struct pci_dev *pdev, struct cxl_pmu_regs *regs, + struct cxl_register_map *map); enum cxl_regloc_type; +int cxl_count_regblock(struct pci_dev *pdev, enum cxl_regloc_type type); +int cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map, int index); int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, struct cxl_register_map *map); int cxl_setup_regs(struct cxl_register_map *map); @@ -288,8 +311,8 @@ resource_size_t cxl_rcd_component_reg_phys(struct device *dev, #define CXL_DECODER_F_MASK GENMASK(5, 0) enum cxl_decoder_type { - CXL_DECODER_ACCELERATOR = 2, - CXL_DECODER_EXPANDER = 3, + CXL_DECODER_DEVMEM = 2, + CXL_DECODER_HOSTONLYMEM = 3, }; /* @@ -461,17 +484,19 @@ struct cxl_region_params { }; /* - * Flag whether this region needs to have its HPA span synchronized with - * CPU cache state at region activation time. - */ -#define CXL_REGION_F_INCOHERENT 0 - -/* * Indicate whether this region has been assembled by autodetection or * userspace assembly. Prevent endpoint decoders outside of automatic * detection from being added to the region. */ -#define CXL_REGION_F_AUTO 1 +#define CXL_REGION_F_AUTO 0 + +/* + * Require that a committed region successfully complete a teardown once + * any of its associated decoders have been torn down. This maintains + * the commit state for the region since there are committed decoders, + * but blocks cxl_region_probe(). + */ +#define CXL_REGION_F_NEEDS_RESET 1 /** * struct cxl_region - CXL region @@ -717,7 +742,6 @@ struct cxl_endpoint_dvsec_info { struct cxl_hdm; struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, struct cxl_endpoint_dvsec_info *info); -int devm_cxl_enable_hdm(struct cxl_port *port, struct cxl_hdm *cxlhdm); int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, struct cxl_endpoint_dvsec_info *info); int devm_cxl_add_passthrough_decoder(struct cxl_port *port); @@ -757,6 +781,7 @@ void cxl_driver_unregister(struct cxl_driver *cxl_drv); #define CXL_DEVICE_REGION 6 #define CXL_DEVICE_PMEM_REGION 7 #define CXL_DEVICE_DAX_REGION 8 +#define CXL_DEVICE_PMU 9 #define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*") #define CXL_MODALIAS_FMT "cxl:t%d" diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 76743016b64c..79e99c873ca2 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -5,6 +5,7 @@ #include <uapi/linux/cxl_mem.h> #include <linux/cdev.h> #include <linux/uuid.h> +#include <linux/rcuwait.h> #include "cxl.h" /* CXL 2.0 8.2.8.5.1.1 Memory Device Status Register */ @@ -38,6 +39,7 @@ * @detach_work: active memdev lost a port in its ancestry * @cxl_nvb: coordinate removal of @cxl_nvd if present * @cxl_nvd: optional bridge to an nvdimm if the device supports pmem + * @endpoint: connection to the CXL port topology for this memory device * @id: id number of this memdev instance. * @depth: endpoint port depth */ @@ -48,6 +50,7 @@ struct cxl_memdev { struct work_struct detach_work; struct cxl_nvdimm_bridge *cxl_nvb; struct cxl_nvdimm *cxl_nvd; + struct cxl_port *endpoint; int id; int depth; }; @@ -82,6 +85,8 @@ static inline bool is_cxl_endpoint(struct cxl_port *port) } struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds); +struct cxl_memdev_state; +int cxl_memdev_setup_fw_upload(struct cxl_memdev_state *mds); int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, resource_size_t base, resource_size_t len, resource_size_t skipped); @@ -108,6 +113,9 @@ static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port, * variable sized output commands, it tells the exact number of bytes * written. * @min_out: (input) internal command output payload size validation + * @poll_count: (input) Number of timeouts to attempt. + * @poll_interval_ms: (input) Time between mailbox background command polling + * interval timeouts. * @return_code: (output) Error code returned from hardware. * * This is the primary mechanism used to send commands to the hardware. @@ -123,6 +131,8 @@ struct cxl_mbox_cmd { size_t size_in; size_t size_out; size_t min_out; + int poll_count; + int poll_interval_ms; u16 return_code; }; @@ -195,7 +205,7 @@ static inline int cxl_mbox_cmd_rc2errno(struct cxl_mbox_cmd *mbox_cmd) */ #define CXL_CAPACITY_MULTIPLIER SZ_256M -/** +/* * Event Interrupt Policy * * CXL rev 3.0 section 8.2.9.2.4; Table 8-52 @@ -215,8 +225,8 @@ struct cxl_event_interrupt_policy { /** * struct cxl_event_state - Event log driver state * - * @event_buf: Buffer to receive event data - * @event_log_lock: Serialize event_buf and log use + * @buf: Buffer to receive event data + * @log_lock: Serialize event_buf and log use */ struct cxl_event_state { struct cxl_get_event_payload *buf; @@ -254,6 +264,115 @@ struct cxl_poison_state { struct mutex lock; /* Protect reads of poison list */ }; +/* + * Get FW Info + * CXL rev 3.0 section 8.2.9.3.1; Table 8-56 + */ +struct cxl_mbox_get_fw_info { + u8 num_slots; + u8 slot_info; + u8 activation_cap; + u8 reserved[13]; + char slot_1_revision[16]; + char slot_2_revision[16]; + char slot_3_revision[16]; + char slot_4_revision[16]; +} __packed; + +#define CXL_FW_INFO_SLOT_INFO_CUR_MASK GENMASK(2, 0) +#define CXL_FW_INFO_SLOT_INFO_NEXT_MASK GENMASK(5, 3) +#define CXL_FW_INFO_SLOT_INFO_NEXT_SHIFT 3 +#define CXL_FW_INFO_ACTIVATION_CAP_HAS_LIVE_ACTIVATE BIT(0) + +/* + * Transfer FW Input Payload + * CXL rev 3.0 section 8.2.9.3.2; Table 8-57 + */ +struct cxl_mbox_transfer_fw { + u8 action; + u8 slot; + u8 reserved[2]; + __le32 offset; + u8 reserved2[0x78]; + u8 data[]; +} __packed; + +#define CXL_FW_TRANSFER_ACTION_FULL 0x0 +#define CXL_FW_TRANSFER_ACTION_INITIATE 0x1 +#define CXL_FW_TRANSFER_ACTION_CONTINUE 0x2 +#define CXL_FW_TRANSFER_ACTION_END 0x3 +#define CXL_FW_TRANSFER_ACTION_ABORT 0x4 + +/* + * CXL rev 3.0 section 8.2.9.3.2 mandates 128-byte alignment for FW packages + * and for each part transferred in a Transfer FW command. + */ +#define CXL_FW_TRANSFER_ALIGNMENT 128 + +/* + * Activate FW Input Payload + * CXL rev 3.0 section 8.2.9.3.3; Table 8-58 + */ +struct cxl_mbox_activate_fw { + u8 action; + u8 slot; +} __packed; + +#define CXL_FW_ACTIVATE_ONLINE 0x0 +#define CXL_FW_ACTIVATE_OFFLINE 0x1 + +/* FW state bits */ +#define CXL_FW_STATE_BITS 32 +#define CXL_FW_CANCEL BIT(0) + +/** + * struct cxl_fw_state - Firmware upload / activation state + * + * @state: fw_uploader state bitmask + * @oneshot: whether the fw upload fits in a single transfer + * @num_slots: Number of FW slots available + * @cur_slot: Slot number currently active + * @next_slot: Slot number for the new firmware + */ +struct cxl_fw_state { + DECLARE_BITMAP(state, CXL_FW_STATE_BITS); + bool oneshot; + int num_slots; + int cur_slot; + int next_slot; +}; + +/** + * struct cxl_security_state - Device security state + * + * @state: state of last security operation + * @poll: polling for sanitization is enabled, device has no mbox irq support + * @poll_tmo_secs: polling timeout + * @poll_dwork: polling work item + * @sanitize_node: sanitation sysfs file to notify + */ +struct cxl_security_state { + unsigned long state; + bool poll; + int poll_tmo_secs; + struct delayed_work poll_dwork; + struct kernfs_node *sanitize_node; +}; + +/* + * enum cxl_devtype - delineate type-2 from a generic type-3 device + * @CXL_DEVTYPE_DEVMEM - Vendor specific CXL Type-2 device implementing HDM-D or + * HDM-DB, no requirement that this device implements a + * mailbox, or other memory-device-standard manageability + * flows. + * @CXL_DEVTYPE_CLASSMEM - Common class definition of a CXL Type-3 device with + * HDM-H and class-mandatory memory device registers + */ +enum cxl_devtype { + CXL_DEVTYPE_DEVMEM, + CXL_DEVTYPE_CLASSMEM, +}; + /** * struct cxl_dev_state - The driver device state * @@ -267,6 +386,36 @@ struct cxl_poison_state { * @cxl_dvsec: Offset to the PCIe device DVSEC * @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH) * @media_ready: Indicate whether the device media is usable + * @dpa_res: Overall DPA resource tree for the device + * @pmem_res: Active Persistent memory capacity configuration + * @ram_res: Active Volatile memory capacity configuration + * @component_reg_phys: register base of component registers + * @serial: PCIe Device Serial Number + * @type: Generic Memory Class device or Vendor Specific Memory device + */ +struct cxl_dev_state { + struct device *dev; + struct cxl_memdev *cxlmd; + struct cxl_regs regs; + int cxl_dvsec; + bool rcd; + bool media_ready; + struct resource dpa_res; + struct resource pmem_res; + struct resource ram_res; + resource_size_t component_reg_phys; + u64 serial; + enum cxl_devtype type; +}; + +/** + * struct cxl_memdev_state - Generic Type-3 Memory Device Class driver data + * + * CXL 8.1.12.1 PCI Header - Class Code Register Memory Device defines + * common memory device functionality like the presence of a mailbox and + * the functionality related to that like Identify Memory Device and Get + * Partition Info + * @cxlds: Core driver state common across Type-2 and Type-3 devices * @payload_size: Size of space for payload * (CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register) * @lsa_size: Size of Label Storage Area @@ -275,9 +424,6 @@ struct cxl_poison_state { * @firmware_version: Firmware version for the memory device. * @enabled_cmds: Hardware commands found enabled in CEL. * @exclusive_cmds: Commands that are kernel-internal only - * @dpa_res: Overall DPA resource tree for the device - * @pmem_res: Active Persistent memory capacity configuration - * @ram_res: Active Volatile memory capacity configuration * @total_bytes: sum of all possible capacities * @volatile_only_bytes: hard volatile capacity * @persistent_only_bytes: hard persistent capacity @@ -286,54 +432,48 @@ struct cxl_poison_state { * @active_persistent_bytes: sum of hard + soft persistent * @next_volatile_bytes: volatile capacity change pending device reset * @next_persistent_bytes: persistent capacity change pending device reset - * @component_reg_phys: register base of component registers - * @info: Cached DVSEC information about the device. - * @serial: PCIe Device Serial Number * @event: event log driver state * @poison: poison driver state info + * @fw: firmware upload / activation state * @mbox_send: @dev specific transport for transmitting mailbox commands * - * See section 8.2.9.5.2 Capacity Configuration and Label Storage for + * See CXL 3.0 8.2.9.8.2 Capacity Configuration and Label Storage for * details on capacity parameters. */ -struct cxl_dev_state { - struct device *dev; - struct cxl_memdev *cxlmd; - - struct cxl_regs regs; - int cxl_dvsec; - - bool rcd; - bool media_ready; +struct cxl_memdev_state { + struct cxl_dev_state cxlds; size_t payload_size; size_t lsa_size; struct mutex mbox_mutex; /* Protects device mailbox and firmware */ char firmware_version[0x10]; DECLARE_BITMAP(enabled_cmds, CXL_MEM_COMMAND_ID_MAX); DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX); - - struct resource dpa_res; - struct resource pmem_res; - struct resource ram_res; u64 total_bytes; u64 volatile_only_bytes; u64 persistent_only_bytes; u64 partition_align_bytes; - u64 active_volatile_bytes; u64 active_persistent_bytes; u64 next_volatile_bytes; u64 next_persistent_bytes; - - resource_size_t component_reg_phys; - u64 serial; - struct cxl_event_state event; struct cxl_poison_state poison; + struct cxl_security_state security; + struct cxl_fw_state fw; - int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd); + struct rcuwait mbox_wait; + int (*mbox_send)(struct cxl_memdev_state *mds, + struct cxl_mbox_cmd *cmd); }; +static inline struct cxl_memdev_state * +to_cxl_memdev_state(struct cxl_dev_state *cxlds) +{ + if (cxlds->type != CXL_DEVTYPE_CLASSMEM) + return NULL; + return container_of(cxlds, struct cxl_memdev_state, cxlds); +} + enum cxl_opcode { CXL_MBOX_OP_INVALID = 0x0000, CXL_MBOX_OP_RAW = CXL_MBOX_OP_INVALID, @@ -342,6 +482,7 @@ enum cxl_opcode { CXL_MBOX_OP_GET_EVT_INT_POLICY = 0x0102, CXL_MBOX_OP_SET_EVT_INT_POLICY = 0x0103, CXL_MBOX_OP_GET_FW_INFO = 0x0200, + CXL_MBOX_OP_TRANSFER_FW = 0x0201, CXL_MBOX_OP_ACTIVATE_FW = 0x0202, CXL_MBOX_OP_SET_TIMESTAMP = 0x0301, CXL_MBOX_OP_GET_SUPPORTED_LOGS = 0x0400, @@ -362,6 +503,8 @@ enum cxl_opcode { CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS = 0x4303, CXL_MBOX_OP_SCAN_MEDIA = 0x4304, CXL_MBOX_OP_GET_SCAN_MEDIA = 0x4305, + CXL_MBOX_OP_SANITIZE = 0x4400, + CXL_MBOX_OP_SECURE_ERASE = 0x4401, CXL_MBOX_OP_GET_SECURITY_STATE = 0x4500, CXL_MBOX_OP_SET_PASSPHRASE = 0x4501, CXL_MBOX_OP_DISABLE_PASSPHRASE = 0x4502, @@ -692,18 +835,20 @@ enum { CXL_PMEM_SEC_PASS_USER, }; -int cxl_internal_send_cmd(struct cxl_dev_state *cxlds, +int cxl_internal_send_cmd(struct cxl_memdev_state *mds, struct cxl_mbox_cmd *cmd); -int cxl_dev_state_identify(struct cxl_dev_state *cxlds); +int cxl_dev_state_identify(struct cxl_memdev_state *mds); int cxl_await_media_ready(struct cxl_dev_state *cxlds); -int cxl_enumerate_cmds(struct cxl_dev_state *cxlds); -int cxl_mem_create_range_info(struct cxl_dev_state *cxlds); -struct cxl_dev_state *cxl_dev_state_create(struct device *dev); -void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); -void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); -void cxl_mem_get_event_records(struct cxl_dev_state *cxlds, u32 status); -int cxl_set_timestamp(struct cxl_dev_state *cxlds); -int cxl_poison_state_init(struct cxl_dev_state *cxlds); +int cxl_enumerate_cmds(struct cxl_memdev_state *mds); +int cxl_mem_create_range_info(struct cxl_memdev_state *mds); +struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev); +void set_exclusive_cxl_commands(struct cxl_memdev_state *mds, + unsigned long *cmds); +void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds, + unsigned long *cmds); +void cxl_mem_get_event_records(struct cxl_memdev_state *mds, u32 status); +int cxl_set_timestamp(struct cxl_memdev_state *mds); +int cxl_poison_state_init(struct cxl_memdev_state *mds); int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, struct cxl_region *cxlr); int cxl_trigger_poison_list(struct cxl_memdev *cxlmd); @@ -722,6 +867,8 @@ static inline void cxl_mem_active_dec(void) } #endif +int cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd); + struct cxl_hdm { struct cxl_component_regs regs; unsigned int decoder_count; diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 7c02e55b8042..0fa4799ea316 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -67,6 +67,7 @@ enum cxl_regloc_type { CXL_REGLOC_RBI_COMPONENT, CXL_REGLOC_RBI_VIRT, CXL_REGLOC_RBI_MEMDEV, + CXL_REGLOC_RBI_PMU, CXL_REGLOC_RBI_TYPES }; diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 205e2e280aed..317c7548e4e9 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -107,6 +107,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_clear_fops, NULL, static int cxl_mem_probe(struct device *dev) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); struct cxl_dev_state *cxlds = cxlmd->cxlds; struct device *endpoint_parent; struct cxl_port *parent_port; @@ -131,10 +132,10 @@ static int cxl_mem_probe(struct device *dev) dentry = cxl_debugfs_create_dir(dev_name(dev)); debugfs_create_devm_seqfile(dev, "dpamem", dentry, cxl_mem_dpa_show); - if (test_bit(CXL_POISON_ENABLED_INJECT, cxlds->poison.enabled_cmds)) + if (test_bit(CXL_POISON_ENABLED_INJECT, mds->poison.enabled_cmds)) debugfs_create_file("inject_poison", 0200, dentry, cxlmd, &cxl_poison_inject_fops); - if (test_bit(CXL_POISON_ENABLED_CLEAR, cxlds->poison.enabled_cmds)) + if (test_bit(CXL_POISON_ENABLED_CLEAR, mds->poison.enabled_cmds)) debugfs_create_file("clear_poison", 0200, dentry, cxlmd, &cxl_poison_clear_fops); @@ -217,9 +218,12 @@ static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n) { if (a == &dev_attr_trigger_poison_list.attr) { struct device *dev = kobj_to_dev(kobj); + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_memdev_state *mds = + to_cxl_memdev_state(cxlmd->cxlds); if (!test_bit(CXL_POISON_ENABLED_LIST, - to_cxl_memdev(dev)->cxlds->poison.enabled_cmds)) + mds->poison.enabled_cmds)) return 0; } return a->mode; diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 99a75c54ee39..48f88d96029d 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -13,6 +13,7 @@ #include "cxlmem.h" #include "cxlpci.h" #include "cxl.h" +#include "pmu.h" /** * DOC: cxl pci @@ -84,9 +85,92 @@ static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds) status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ status & CXLMDEV_FW_HALT ? " firmware-halt" : "") +struct cxl_dev_id { + struct cxl_dev_state *cxlds; +}; + +static int cxl_request_irq(struct cxl_dev_state *cxlds, int irq, + irq_handler_t handler, irq_handler_t thread_fn) +{ + struct device *dev = cxlds->dev; + struct cxl_dev_id *dev_id; + + /* dev_id must be globally unique and must contain the cxlds */ + dev_id = devm_kzalloc(dev, sizeof(*dev_id), GFP_KERNEL); + if (!dev_id) + return -ENOMEM; + dev_id->cxlds = cxlds; + + return devm_request_threaded_irq(dev, irq, handler, thread_fn, + IRQF_SHARED | IRQF_ONESHOT, + NULL, dev_id); +} + +static bool cxl_mbox_background_complete(struct cxl_dev_state *cxlds) +{ + u64 reg; + + reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); + return FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK, reg) == 100; +} + +static irqreturn_t cxl_pci_mbox_irq(int irq, void *id) +{ + u64 reg; + u16 opcode; + struct cxl_dev_id *dev_id = id; + struct cxl_dev_state *cxlds = dev_id->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + + if (!cxl_mbox_background_complete(cxlds)) + return IRQ_NONE; + + reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); + opcode = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg); + if (opcode == CXL_MBOX_OP_SANITIZE) { + if (mds->security.sanitize_node) + sysfs_notify_dirent(mds->security.sanitize_node); + + dev_dbg(cxlds->dev, "Sanitization operation ended\n"); + } else { + /* short-circuit the wait in __cxl_pci_mbox_send_cmd() */ + rcuwait_wake_up(&mds->mbox_wait); + } + + return IRQ_HANDLED; +} + +/* + * Sanitization operation polling mode. + */ +static void cxl_mbox_sanitize_work(struct work_struct *work) +{ + struct cxl_memdev_state *mds = + container_of(work, typeof(*mds), security.poll_dwork.work); + struct cxl_dev_state *cxlds = &mds->cxlds; + + mutex_lock(&mds->mbox_mutex); + if (cxl_mbox_background_complete(cxlds)) { + mds->security.poll_tmo_secs = 0; + put_device(cxlds->dev); + + if (mds->security.sanitize_node) + sysfs_notify_dirent(mds->security.sanitize_node); + + dev_dbg(cxlds->dev, "Sanitization operation ended\n"); + } else { + int timeout = mds->security.poll_tmo_secs + 10; + + mds->security.poll_tmo_secs = min(15 * 60, timeout); + queue_delayed_work(system_wq, &mds->security.poll_dwork, + timeout * HZ); + } + mutex_unlock(&mds->mbox_mutex); +} + /** * __cxl_pci_mbox_send_cmd() - Execute a mailbox command - * @cxlds: The device state to communicate with. + * @mds: The memory device driver data * @mbox_cmd: Command to send to the memory device. * * Context: Any context. Expects mbox_mutex to be held. @@ -106,16 +190,17 @@ static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds) * not need to coordinate with each other. The driver only uses the primary * mailbox. */ -static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, +static int __cxl_pci_mbox_send_cmd(struct cxl_memdev_state *mds, struct cxl_mbox_cmd *mbox_cmd) { + struct cxl_dev_state *cxlds = &mds->cxlds; void __iomem *payload = cxlds->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET; struct device *dev = cxlds->dev; u64 cmd_reg, status_reg; size_t out_len; int rc; - lockdep_assert_held(&cxlds->mbox_mutex); + lockdep_assert_held(&mds->mbox_mutex); /* * Here are the steps from 8.2.8.4 of the CXL 2.0 spec. @@ -144,6 +229,16 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, return -EBUSY; } + /* + * With sanitize polling, hardware might be done and the poller still + * not be in sync. Ensure no new command comes in until so. Keep the + * hardware semantics and only allow device health status. + */ + if (mds->security.poll_tmo_secs > 0) { + if (mbox_cmd->opcode != CXL_MBOX_OP_GET_HEALTH_INFO) + return -EBUSY; + } + cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK, mbox_cmd->opcode); if (mbox_cmd->size_in) { @@ -177,12 +272,80 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, mbox_cmd->return_code = FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg); + /* + * Handle the background command in a synchronous manner. + * + * All other mailbox commands will serialize/queue on the mbox_mutex, + * which we currently hold. Furthermore this also guarantees that + * cxl_mbox_background_complete() checks are safe amongst each other, + * in that no new bg operation can occur in between. + * + * Background operations are timesliced in accordance with the nature + * of the command. In the event of timeout, the mailbox state is + * indeterminate until the next successful command submission and the + * driver can get back in sync with the hardware state. + */ + if (mbox_cmd->return_code == CXL_MBOX_CMD_RC_BACKGROUND) { + u64 bg_status_reg; + int i, timeout; + + /* + * Sanitization is a special case which monopolizes the device + * and cannot be timesliced. Handle asynchronously instead, + * and allow userspace to poll(2) for completion. + */ + if (mbox_cmd->opcode == CXL_MBOX_OP_SANITIZE) { + if (mds->security.poll_tmo_secs != -1) { + /* hold the device throughout */ + get_device(cxlds->dev); + + /* give first timeout a second */ + timeout = 1; + mds->security.poll_tmo_secs = timeout; + queue_delayed_work(system_wq, + &mds->security.poll_dwork, + timeout * HZ); + } + + dev_dbg(dev, "Sanitization operation started\n"); + goto success; + } + + dev_dbg(dev, "Mailbox background operation (0x%04x) started\n", + mbox_cmd->opcode); + + timeout = mbox_cmd->poll_interval_ms; + for (i = 0; i < mbox_cmd->poll_count; i++) { + if (rcuwait_wait_event_timeout(&mds->mbox_wait, + cxl_mbox_background_complete(cxlds), + TASK_UNINTERRUPTIBLE, + msecs_to_jiffies(timeout)) > 0) + break; + } + + if (!cxl_mbox_background_complete(cxlds)) { + dev_err(dev, "timeout waiting for background (%d ms)\n", + timeout * mbox_cmd->poll_count); + return -ETIMEDOUT; + } + + bg_status_reg = readq(cxlds->regs.mbox + + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); + mbox_cmd->return_code = + FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_RC_MASK, + bg_status_reg); + dev_dbg(dev, + "Mailbox background operation (0x%04x) completed\n", + mbox_cmd->opcode); + } + if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) { dev_dbg(dev, "Mailbox operation had an error: %s\n", cxl_mbox_cmd_rc2str(mbox_cmd)); return 0; /* completed but caller must check return_code */ } +success: /* #7 */ cmd_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET); out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg); @@ -196,8 +359,9 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, * have requested less data than the hardware supplied even * within spec. */ - size_t n = min3(mbox_cmd->size_out, cxlds->payload_size, out_len); + size_t n; + n = min3(mbox_cmd->size_out, mds->payload_size, out_len); memcpy_fromio(mbox_cmd->payload_out, payload, n); mbox_cmd->size_out = n; } else { @@ -207,20 +371,23 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, return 0; } -static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +static int cxl_pci_mbox_send(struct cxl_memdev_state *mds, + struct cxl_mbox_cmd *cmd) { int rc; - mutex_lock_io(&cxlds->mbox_mutex); - rc = __cxl_pci_mbox_send_cmd(cxlds, cmd); - mutex_unlock(&cxlds->mbox_mutex); + mutex_lock_io(&mds->mbox_mutex); + rc = __cxl_pci_mbox_send_cmd(mds, cmd); + mutex_unlock(&mds->mbox_mutex); return rc; } -static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds) +static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds) { + struct cxl_dev_state *cxlds = &mds->cxlds; const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET); + struct device *dev = cxlds->dev; unsigned long timeout; u64 md_status; @@ -234,8 +401,7 @@ static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds) } while (!time_after(jiffies, timeout)); if (!(md_status & CXLMDEV_MBOX_IF_READY)) { - cxl_err(cxlds->dev, md_status, - "timeout awaiting mailbox ready"); + cxl_err(dev, md_status, "timeout awaiting mailbox ready"); return -ETIMEDOUT; } @@ -246,12 +412,12 @@ static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds) * source for future doorbell busy events. */ if (cxl_pci_mbox_wait_for_doorbell(cxlds) != 0) { - cxl_err(cxlds->dev, md_status, "timeout awaiting mailbox idle"); + cxl_err(dev, md_status, "timeout awaiting mailbox idle"); return -ETIMEDOUT; } - cxlds->mbox_send = cxl_pci_mbox_send; - cxlds->payload_size = + mds->mbox_send = cxl_pci_mbox_send; + mds->payload_size = 1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap); /* @@ -261,16 +427,43 @@ static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds) * there's no point in going forward. If the size is too large, there's * no harm is soft limiting it. */ - cxlds->payload_size = min_t(size_t, cxlds->payload_size, SZ_1M); - if (cxlds->payload_size < 256) { - dev_err(cxlds->dev, "Mailbox is too small (%zub)", - cxlds->payload_size); + mds->payload_size = min_t(size_t, mds->payload_size, SZ_1M); + if (mds->payload_size < 256) { + dev_err(dev, "Mailbox is too small (%zub)", + mds->payload_size); return -ENXIO; } - dev_dbg(cxlds->dev, "Mailbox payload sized %zu", - cxlds->payload_size); + dev_dbg(dev, "Mailbox payload sized %zu", mds->payload_size); + + rcuwait_init(&mds->mbox_wait); + + if (cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ) { + u32 ctrl; + int irq, msgnum; + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + + msgnum = FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap); + irq = pci_irq_vector(pdev, msgnum); + if (irq < 0) + goto mbox_poll; + + if (cxl_request_irq(cxlds, irq, cxl_pci_mbox_irq, NULL)) + goto mbox_poll; + + /* enable background command mbox irq support */ + ctrl = readl(cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); + ctrl |= CXLDEV_MBOX_CTRL_BG_CMD_IRQ; + writel(ctrl, cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); + + return 0; + } + +mbox_poll: + mds->security.poll = true; + INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mbox_sanitize_work); + dev_dbg(cxlds->dev, "Mailbox interrupts are unsupported"); return 0; } @@ -334,19 +527,6 @@ static int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, return cxl_setup_regs(map); } -/* - * CXL v3.0 6.2.3 Table 6-4 - * The table indicates that if PCIe Flit Mode is set, then CXL is in 256B flits - * mode, otherwise it's 68B flits mode. - */ -static bool cxl_pci_flit_256(struct pci_dev *pdev) -{ - u16 lnksta2; - - pcie_capability_read_word(pdev, PCI_EXP_LNKSTA2, &lnksta2); - return lnksta2 & PCI_EXP_LNKSTA2_FLIT; -} - static int cxl_pci_ras_unmask(struct pci_dev *pdev) { struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); @@ -373,9 +553,8 @@ static int cxl_pci_ras_unmask(struct pci_dev *pdev) addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_MASK_OFFSET; orig_val = readl(addr); - mask = CXL_RAS_UNCORRECTABLE_MASK_MASK; - if (!cxl_pci_flit_256(pdev)) - mask &= ~CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK; + mask = CXL_RAS_UNCORRECTABLE_MASK_MASK | + CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK; val = orig_val & ~mask; writel(val, addr); dev_dbg(&pdev->dev, @@ -402,18 +581,18 @@ static void free_event_buf(void *buf) /* * There is a single buffer for reading event logs from the mailbox. All logs - * share this buffer protected by the cxlds->event_log_lock. + * share this buffer protected by the mds->event_log_lock. */ -static int cxl_mem_alloc_event_buf(struct cxl_dev_state *cxlds) +static int cxl_mem_alloc_event_buf(struct cxl_memdev_state *mds) { struct cxl_get_event_payload *buf; - buf = kvmalloc(cxlds->payload_size, GFP_KERNEL); + buf = kvmalloc(mds->payload_size, GFP_KERNEL); if (!buf) return -ENOMEM; - cxlds->event.buf = buf; + mds->event.buf = buf; - return devm_add_action_or_reset(cxlds->dev, free_event_buf, buf); + return devm_add_action_or_reset(mds->cxlds.dev, free_event_buf, buf); } static int cxl_alloc_irq_vectors(struct pci_dev *pdev) @@ -438,14 +617,11 @@ static int cxl_alloc_irq_vectors(struct pci_dev *pdev) return 0; } -struct cxl_dev_id { - struct cxl_dev_state *cxlds; -}; - static irqreturn_t cxl_event_thread(int irq, void *id) { struct cxl_dev_id *dev_id = id; struct cxl_dev_state *cxlds = dev_id->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); u32 status; do { @@ -458,7 +634,7 @@ static irqreturn_t cxl_event_thread(int irq, void *id) status &= CXLDEV_EVENT_STATUS_ALL; if (!status) break; - cxl_mem_get_event_records(cxlds, status); + cxl_mem_get_event_records(mds, status); cond_resched(); } while (status); @@ -467,31 +643,21 @@ static irqreturn_t cxl_event_thread(int irq, void *id) static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting) { - struct device *dev = cxlds->dev; - struct pci_dev *pdev = to_pci_dev(dev); - struct cxl_dev_id *dev_id; + struct pci_dev *pdev = to_pci_dev(cxlds->dev); int irq; if (FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting) != CXL_INT_MSI_MSIX) return -ENXIO; - /* dev_id must be globally unique and must contain the cxlds */ - dev_id = devm_kzalloc(dev, sizeof(*dev_id), GFP_KERNEL); - if (!dev_id) - return -ENOMEM; - dev_id->cxlds = cxlds; - irq = pci_irq_vector(pdev, FIELD_GET(CXLDEV_EVENT_INT_MSGNUM_MASK, setting)); if (irq < 0) return irq; - return devm_request_threaded_irq(dev, irq, NULL, cxl_event_thread, - IRQF_SHARED | IRQF_ONESHOT, NULL, - dev_id); + return cxl_request_irq(cxlds, irq, NULL, cxl_event_thread); } -static int cxl_event_get_int_policy(struct cxl_dev_state *cxlds, +static int cxl_event_get_int_policy(struct cxl_memdev_state *mds, struct cxl_event_interrupt_policy *policy) { struct cxl_mbox_cmd mbox_cmd = { @@ -501,15 +667,15 @@ static int cxl_event_get_int_policy(struct cxl_dev_state *cxlds, }; int rc; - rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + rc = cxl_internal_send_cmd(mds, &mbox_cmd); if (rc < 0) - dev_err(cxlds->dev, "Failed to get event interrupt policy : %d", - rc); + dev_err(mds->cxlds.dev, + "Failed to get event interrupt policy : %d", rc); return rc; } -static int cxl_event_config_msgnums(struct cxl_dev_state *cxlds, +static int cxl_event_config_msgnums(struct cxl_memdev_state *mds, struct cxl_event_interrupt_policy *policy) { struct cxl_mbox_cmd mbox_cmd; @@ -528,23 +694,24 @@ static int cxl_event_config_msgnums(struct cxl_dev_state *cxlds, .size_in = sizeof(*policy), }; - rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + rc = cxl_internal_send_cmd(mds, &mbox_cmd); if (rc < 0) { - dev_err(cxlds->dev, "Failed to set event interrupt policy : %d", + dev_err(mds->cxlds.dev, "Failed to set event interrupt policy : %d", rc); return rc; } /* Retrieve final interrupt settings */ - return cxl_event_get_int_policy(cxlds, policy); + return cxl_event_get_int_policy(mds, policy); } -static int cxl_event_irqsetup(struct cxl_dev_state *cxlds) +static int cxl_event_irqsetup(struct cxl_memdev_state *mds) { + struct cxl_dev_state *cxlds = &mds->cxlds; struct cxl_event_interrupt_policy policy; int rc; - rc = cxl_event_config_msgnums(cxlds, &policy); + rc = cxl_event_config_msgnums(mds, &policy); if (rc) return rc; @@ -583,7 +750,7 @@ static bool cxl_event_int_is_fw(u8 setting) } static int cxl_event_config(struct pci_host_bridge *host_bridge, - struct cxl_dev_state *cxlds) + struct cxl_memdev_state *mds) { struct cxl_event_interrupt_policy policy; int rc; @@ -595,11 +762,11 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge, if (!host_bridge->native_cxl_error) return 0; - rc = cxl_mem_alloc_event_buf(cxlds); + rc = cxl_mem_alloc_event_buf(mds); if (rc) return rc; - rc = cxl_event_get_int_policy(cxlds, &policy); + rc = cxl_event_get_int_policy(mds, &policy); if (rc) return rc; @@ -607,15 +774,16 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge, cxl_event_int_is_fw(policy.warn_settings) || cxl_event_int_is_fw(policy.failure_settings) || cxl_event_int_is_fw(policy.fatal_settings)) { - dev_err(cxlds->dev, "FW still in control of Event Logs despite _OSC settings\n"); + dev_err(mds->cxlds.dev, + "FW still in control of Event Logs despite _OSC settings\n"); return -EBUSY; } - rc = cxl_event_irqsetup(cxlds); + rc = cxl_event_irqsetup(mds); if (rc) return rc; - cxl_mem_get_event_records(cxlds, CXLDEV_EVENT_STATUS_ALL); + cxl_mem_get_event_records(mds, CXLDEV_EVENT_STATUS_ALL); return 0; } @@ -623,10 +791,11 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge, static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); + struct cxl_memdev_state *mds; + struct cxl_dev_state *cxlds; struct cxl_register_map map; struct cxl_memdev *cxlmd; - struct cxl_dev_state *cxlds; - int rc; + int i, rc, pmu_count; /* * Double check the anonymous union trickery in struct cxl_regs @@ -640,9 +809,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return rc; pci_set_master(pdev); - cxlds = cxl_dev_state_create(&pdev->dev); - if (IS_ERR(cxlds)) - return PTR_ERR(cxlds); + mds = cxl_memdev_state_create(&pdev->dev); + if (IS_ERR(mds)) + return PTR_ERR(mds); + cxlds = &mds->cxlds; pci_set_drvdata(pdev, cxlds); cxlds->rcd = is_cxl_restricted(pdev); @@ -685,31 +855,31 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) else dev_warn(&pdev->dev, "Media not active (%d)\n", rc); - rc = cxl_pci_setup_mailbox(cxlds); + rc = cxl_alloc_irq_vectors(pdev); if (rc) return rc; - rc = cxl_enumerate_cmds(cxlds); + rc = cxl_pci_setup_mailbox(mds); if (rc) return rc; - rc = cxl_set_timestamp(cxlds); + rc = cxl_enumerate_cmds(mds); if (rc) return rc; - rc = cxl_poison_state_init(cxlds); + rc = cxl_set_timestamp(mds); if (rc) return rc; - rc = cxl_dev_state_identify(cxlds); + rc = cxl_poison_state_init(mds); if (rc) return rc; - rc = cxl_mem_create_range_info(cxlds); + rc = cxl_dev_state_identify(mds); if (rc) return rc; - rc = cxl_alloc_irq_vectors(pdev); + rc = cxl_mem_create_range_info(mds); if (rc) return rc; @@ -717,7 +887,34 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); - rc = cxl_event_config(host_bridge, cxlds); + rc = cxl_memdev_setup_fw_upload(mds); + if (rc) + return rc; + + pmu_count = cxl_count_regblock(pdev, CXL_REGLOC_RBI_PMU); + for (i = 0; i < pmu_count; i++) { + struct cxl_pmu_regs pmu_regs; + + rc = cxl_find_regblock_instance(pdev, CXL_REGLOC_RBI_PMU, &map, i); + if (rc) { + dev_dbg(&pdev->dev, "Could not find PMU regblock\n"); + break; + } + + rc = cxl_map_pmu_regs(pdev, &pmu_regs, &map); + if (rc) { + dev_dbg(&pdev->dev, "Could not map PMU regs\n"); + break; + } + + rc = devm_cxl_pmu_add(cxlds->dev, &pmu_regs, cxlmd->id, i, CXL_PMU_MEMDEV); + if (rc) { + dev_dbg(&pdev->dev, "Could not add PMU instance\n"); + break; + } + } + + rc = cxl_event_config(host_bridge, mds); if (rc) return rc; diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 71cfa1fdf902..7cb8994f8809 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -15,9 +15,9 @@ extern const struct nvdimm_security_ops *cxl_security_ops; static __read_mostly DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX); -static void clear_exclusive(void *cxlds) +static void clear_exclusive(void *mds) { - clear_exclusive_cxl_commands(cxlds, exclusive_cmds); + clear_exclusive_cxl_commands(mds, exclusive_cmds); } static void unregister_nvdimm(void *nvdimm) @@ -65,13 +65,13 @@ static int cxl_nvdimm_probe(struct device *dev) struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev); struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; struct cxl_nvdimm_bridge *cxl_nvb = cxlmd->cxl_nvb; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); unsigned long flags = 0, cmd_mask = 0; - struct cxl_dev_state *cxlds = cxlmd->cxlds; struct nvdimm *nvdimm; int rc; - set_exclusive_cxl_commands(cxlds, exclusive_cmds); - rc = devm_add_action_or_reset(dev, clear_exclusive, cxlds); + set_exclusive_cxl_commands(mds, exclusive_cmds); + rc = devm_add_action_or_reset(dev, clear_exclusive, mds); if (rc) return rc; @@ -100,22 +100,23 @@ static struct cxl_driver cxl_nvdimm_driver = { }, }; -static int cxl_pmem_get_config_size(struct cxl_dev_state *cxlds, +static int cxl_pmem_get_config_size(struct cxl_memdev_state *mds, struct nd_cmd_get_config_size *cmd, unsigned int buf_len) { if (sizeof(*cmd) > buf_len) return -EINVAL; - *cmd = (struct nd_cmd_get_config_size) { - .config_size = cxlds->lsa_size, - .max_xfer = cxlds->payload_size - sizeof(struct cxl_mbox_set_lsa), + *cmd = (struct nd_cmd_get_config_size){ + .config_size = mds->lsa_size, + .max_xfer = + mds->payload_size - sizeof(struct cxl_mbox_set_lsa), }; return 0; } -static int cxl_pmem_get_config_data(struct cxl_dev_state *cxlds, +static int cxl_pmem_get_config_data(struct cxl_memdev_state *mds, struct nd_cmd_get_config_data_hdr *cmd, unsigned int buf_len) { @@ -140,13 +141,13 @@ static int cxl_pmem_get_config_data(struct cxl_dev_state *cxlds, .payload_out = cmd->out_buf, }; - rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + rc = cxl_internal_send_cmd(mds, &mbox_cmd); cmd->status = 0; return rc; } -static int cxl_pmem_set_config_data(struct cxl_dev_state *cxlds, +static int cxl_pmem_set_config_data(struct cxl_memdev_state *mds, struct nd_cmd_set_config_hdr *cmd, unsigned int buf_len) { @@ -176,7 +177,7 @@ static int cxl_pmem_set_config_data(struct cxl_dev_state *cxlds, .size_in = struct_size(set_lsa, data, cmd->in_length), }; - rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + rc = cxl_internal_send_cmd(mds, &mbox_cmd); /* * Set "firmware" status (4-packed bytes at the end of the input @@ -194,18 +195,18 @@ static int cxl_pmem_nvdimm_ctl(struct nvdimm *nvdimm, unsigned int cmd, struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm); struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; - struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); if (!test_bit(cmd, &cmd_mask)) return -ENOTTY; switch (cmd) { case ND_CMD_GET_CONFIG_SIZE: - return cxl_pmem_get_config_size(cxlds, buf, buf_len); + return cxl_pmem_get_config_size(mds, buf, buf_len); case ND_CMD_GET_CONFIG_DATA: - return cxl_pmem_get_config_data(cxlds, buf, buf_len); + return cxl_pmem_get_config_data(mds, buf, buf_len); case ND_CMD_SET_CONFIG_DATA: - return cxl_pmem_set_config_data(cxlds, buf, buf_len); + return cxl_pmem_set_config_data(mds, buf, buf_len); default: return -ENOTTY; } diff --git a/drivers/cxl/pmu.h b/drivers/cxl/pmu.h new file mode 100644 index 000000000000..b1e9bcd9f28c --- /dev/null +++ b/drivers/cxl/pmu.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright(c) 2023 Huawei + * CXL Specification rev 3.0 Setion 8.2.7 (CPMU Register Interface) + */ +#ifndef CXL_PMU_H +#define CXL_PMU_H +#include <linux/device.h> + +enum cxl_pmu_type { + CXL_PMU_MEMDEV, +}; + +#define CXL_PMU_REGMAP_SIZE 0xe00 /* Table 8-32 CXL 3.0 specification */ +struct cxl_pmu { + struct device dev; + void __iomem *base; + int assoc_id; + int index; + enum cxl_pmu_type type; +}; + +#define to_cxl_pmu(dev) container_of(dev, struct cxl_pmu, dev) +struct cxl_pmu_regs; +int devm_cxl_pmu_add(struct device *parent, struct cxl_pmu_regs *regs, + int assoc_id, int idx, enum cxl_pmu_type type); + +#endif diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 01e84ea54f56..6240e05b9542 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -60,17 +60,13 @@ static int discover_region(struct device *dev, void *root) static int cxl_switch_port_probe(struct cxl_port *port) { struct cxl_hdm *cxlhdm; - int rc, nr_dports; - - nr_dports = devm_cxl_port_enumerate_dports(port); - if (nr_dports < 0) - return nr_dports; + int rc; - cxlhdm = devm_cxl_setup_hdm(port, NULL); - rc = devm_cxl_enable_hdm(port, cxlhdm); - if (rc) + rc = devm_cxl_port_enumerate_dports(port); + if (rc < 0) return rc; + cxlhdm = devm_cxl_setup_hdm(port, NULL); if (!IS_ERR(cxlhdm)) return devm_cxl_enumerate_decoders(cxlhdm, NULL); @@ -79,7 +75,7 @@ static int cxl_switch_port_probe(struct cxl_port *port) return PTR_ERR(cxlhdm); } - if (nr_dports == 1) { + if (rc == 1) { dev_dbg(&port->dev, "Fallback to passthrough decoder\n"); return devm_cxl_add_passthrough_decoder(port); } diff --git a/drivers/cxl/security.c b/drivers/cxl/security.c index 4ad4bda2d18e..21856a3f408e 100644 --- a/drivers/cxl/security.c +++ b/drivers/cxl/security.c @@ -14,7 +14,7 @@ static unsigned long cxl_pmem_get_security_flags(struct nvdimm *nvdimm, { struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; - struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); unsigned long security_flags = 0; struct cxl_get_security_output { __le32 flags; @@ -29,11 +29,14 @@ static unsigned long cxl_pmem_get_security_flags(struct nvdimm *nvdimm, .payload_out = &out, }; - rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + rc = cxl_internal_send_cmd(mds, &mbox_cmd); if (rc < 0) return 0; sec_out = le32_to_cpu(out.flags); + /* cache security state */ + mds->security.state = sec_out; + if (ptype == NVDIMM_MASTER) { if (sec_out & CXL_PMEM_SEC_STATE_MASTER_PASS_SET) set_bit(NVDIMM_SECURITY_UNLOCKED, &security_flags); @@ -67,7 +70,7 @@ static int cxl_pmem_security_change_key(struct nvdimm *nvdimm, { struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; - struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); struct cxl_mbox_cmd mbox_cmd; struct cxl_set_pass set_pass; @@ -84,7 +87,7 @@ static int cxl_pmem_security_change_key(struct nvdimm *nvdimm, .payload_in = &set_pass, }; - return cxl_internal_send_cmd(cxlds, &mbox_cmd); + return cxl_internal_send_cmd(mds, &mbox_cmd); } static int __cxl_pmem_security_disable(struct nvdimm *nvdimm, @@ -93,7 +96,7 @@ static int __cxl_pmem_security_disable(struct nvdimm *nvdimm, { struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; - struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); struct cxl_disable_pass dis_pass; struct cxl_mbox_cmd mbox_cmd; @@ -109,7 +112,7 @@ static int __cxl_pmem_security_disable(struct nvdimm *nvdimm, .payload_in = &dis_pass, }; - return cxl_internal_send_cmd(cxlds, &mbox_cmd); + return cxl_internal_send_cmd(mds, &mbox_cmd); } static int cxl_pmem_security_disable(struct nvdimm *nvdimm, @@ -128,12 +131,12 @@ static int cxl_pmem_security_freeze(struct nvdimm *nvdimm) { struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; - struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); struct cxl_mbox_cmd mbox_cmd = { .opcode = CXL_MBOX_OP_FREEZE_SECURITY, }; - return cxl_internal_send_cmd(cxlds, &mbox_cmd); + return cxl_internal_send_cmd(mds, &mbox_cmd); } static int cxl_pmem_security_unlock(struct nvdimm *nvdimm, @@ -141,7 +144,7 @@ static int cxl_pmem_security_unlock(struct nvdimm *nvdimm, { struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; - struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); u8 pass[NVDIMM_PASSPHRASE_LEN]; struct cxl_mbox_cmd mbox_cmd; int rc; @@ -153,7 +156,7 @@ static int cxl_pmem_security_unlock(struct nvdimm *nvdimm, .payload_in = pass, }; - rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + rc = cxl_internal_send_cmd(mds, &mbox_cmd); if (rc < 0) return rc; @@ -166,7 +169,7 @@ static int cxl_pmem_security_passphrase_erase(struct nvdimm *nvdimm, { struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; - struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); struct cxl_mbox_cmd mbox_cmd; struct cxl_pass_erase erase; int rc; @@ -182,7 +185,7 @@ static int cxl_pmem_security_passphrase_erase(struct nvdimm *nvdimm, .payload_in = &erase, }; - rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + rc = cxl_internal_send_cmd(mds, &mbox_cmd); if (rc < 0) return rc; diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 711f82400086..2d2dd400fed2 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -213,4 +213,17 @@ source "drivers/perf/arm_cspmu/Kconfig" source "drivers/perf/amlogic/Kconfig" +config CXL_PMU + tristate "CXL Performance Monitoring Unit" + depends on CXL_BUS + help + Support performance monitoring as defined in CXL rev 3.0 + section 13.2: Performance Monitoring. CXL components may have + one or more CXL Performance Monitoring Units (CPMUs). + + Say 'y/m' to enable a driver that will attach to performance + monitoring units and provide standard perf based interfaces. + + If unsure say 'm'. + endmenu diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index dabc859540ce..f1d7ce9da275 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -24,3 +24,4 @@ obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o obj-$(CONFIG_ALIBABA_UNCORE_DRW_PMU) += alibaba_uncore_drw_pmu.o obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu/ obj-$(CONFIG_MESON_DDR_PMU) += amlogic/ +obj-$(CONFIG_CXL_PMU) += cxl_pmu.o diff --git a/drivers/perf/cxl_pmu.c b/drivers/perf/cxl_pmu.c new file mode 100644 index 000000000000..0a8f597e695b --- /dev/null +++ b/drivers/perf/cxl_pmu.c @@ -0,0 +1,990 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright(c) 2023 Huawei + * + * The CXL 3.0 specification includes a standard Performance Monitoring Unit, + * called the CXL PMU, or CPMU. In order to allow a high degree of + * implementation flexibility the specification provides a wide range of + * options all of which are self describing. + * + * Details in CXL rev 3.0 section 8.2.7 CPMU Register Interface + */ + +#include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/perf_event.h> +#include <linux/bitops.h> +#include <linux/device.h> +#include <linux/bits.h> +#include <linux/list.h> +#include <linux/bug.h> +#include <linux/pci.h> + +#include "../cxl/cxlpci.h" +#include "../cxl/cxl.h" +#include "../cxl/pmu.h" + +#define CXL_PMU_CAP_REG 0x0 +#define CXL_PMU_CAP_NUM_COUNTERS_MSK GENMASK_ULL(4, 0) +#define CXL_PMU_CAP_COUNTER_WIDTH_MSK GENMASK_ULL(15, 8) +#define CXL_PMU_CAP_NUM_EVN_CAP_REG_SUP_MSK GENMASK_ULL(24, 20) +#define CXL_PMU_CAP_FILTERS_SUP_MSK GENMASK_ULL(39, 32) +#define CXL_PMU_FILTER_HDM BIT(0) +#define CXL_PMU_FILTER_CHAN_RANK_BANK BIT(1) +#define CXL_PMU_CAP_MSI_N_MSK GENMASK_ULL(47, 44) +#define CXL_PMU_CAP_WRITEABLE_WHEN_FROZEN BIT_ULL(48) +#define CXL_PMU_CAP_FREEZE BIT_ULL(49) +#define CXL_PMU_CAP_INT BIT_ULL(50) +#define CXL_PMU_CAP_VERSION_MSK GENMASK_ULL(63, 60) + +#define CXL_PMU_OVERFLOW_REG 0x10 +#define CXL_PMU_FREEZE_REG 0x18 +#define CXL_PMU_EVENT_CAP_REG(n) (0x100 + 8 * (n)) +#define CXL_PMU_EVENT_CAP_SUPPORTED_EVENTS_MSK GENMASK_ULL(31, 0) +#define CXL_PMU_EVENT_CAP_GROUP_ID_MSK GENMASK_ULL(47, 32) +#define CXL_PMU_EVENT_CAP_VENDOR_ID_MSK GENMASK_ULL(63, 48) + +#define CXL_PMU_COUNTER_CFG_REG(n) (0x200 + 8 * (n)) +#define CXL_PMU_COUNTER_CFG_TYPE_MSK GENMASK_ULL(1, 0) +#define CXL_PMU_COUNTER_CFG_TYPE_FREE_RUN 0 +#define CXL_PMU_COUNTER_CFG_TYPE_FIXED_FUN 1 +#define CXL_PMU_COUNTER_CFG_TYPE_CONFIGURABLE 2 +#define CXL_PMU_COUNTER_CFG_ENABLE BIT_ULL(8) +#define CXL_PMU_COUNTER_CFG_INT_ON_OVRFLW BIT_ULL(9) +#define CXL_PMU_COUNTER_CFG_FREEZE_ON_OVRFLW BIT_ULL(10) +#define CXL_PMU_COUNTER_CFG_EDGE BIT_ULL(11) +#define CXL_PMU_COUNTER_CFG_INVERT BIT_ULL(12) +#define CXL_PMU_COUNTER_CFG_THRESHOLD_MSK GENMASK_ULL(23, 16) +#define CXL_PMU_COUNTER_CFG_EVENTS_MSK GENMASK_ULL(55, 24) +#define CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK GENMASK_ULL(63, 59) + +#define CXL_PMU_FILTER_CFG_REG(n, f) (0x400 + 4 * ((f) + (n) * 8)) +#define CXL_PMU_FILTER_CFG_VALUE_MSK GENMASK(15, 0) + +#define CXL_PMU_COUNTER_REG(n) (0xc00 + 8 * (n)) + +/* CXL rev 3.0 Table 13-5 Events under CXL Vendor ID */ +#define CXL_PMU_GID_CLOCK_TICKS 0x00 +#define CXL_PMU_GID_D2H_REQ 0x0010 +#define CXL_PMU_GID_D2H_RSP 0x0011 +#define CXL_PMU_GID_H2D_REQ 0x0012 +#define CXL_PMU_GID_H2D_RSP 0x0013 +#define CXL_PMU_GID_CACHE_DATA 0x0014 +#define CXL_PMU_GID_M2S_REQ 0x0020 +#define CXL_PMU_GID_M2S_RWD 0x0021 +#define CXL_PMU_GID_M2S_BIRSP 0x0022 +#define CXL_PMU_GID_S2M_BISNP 0x0023 +#define CXL_PMU_GID_S2M_NDR 0x0024 +#define CXL_PMU_GID_S2M_DRS 0x0025 +#define CXL_PMU_GID_DDR 0x8000 + +static int cxl_pmu_cpuhp_state_num; + +struct cxl_pmu_ev_cap { + u16 vid; + u16 gid; + u32 msk; + union { + int counter_idx; /* fixed counters */ + int event_idx; /* configurable counters */ + }; + struct list_head node; +}; + +#define CXL_PMU_MAX_COUNTERS 64 +struct cxl_pmu_info { + struct pmu pmu; + void __iomem *base; + struct perf_event **hw_events; + struct list_head event_caps_configurable; + struct list_head event_caps_fixed; + DECLARE_BITMAP(used_counter_bm, CXL_PMU_MAX_COUNTERS); + DECLARE_BITMAP(conf_counter_bm, CXL_PMU_MAX_COUNTERS); + u16 counter_width; + u8 num_counters; + u8 num_event_capabilities; + int on_cpu; + struct hlist_node node; + bool filter_hdm; + int irq; +}; + +#define pmu_to_cxl_pmu_info(_pmu) container_of(_pmu, struct cxl_pmu_info, pmu) + +/* + * All CPMU counters are discoverable via the Event Capabilities Registers. + * Each Event Capability register contains a a VID / GroupID. + * A counter may then count any combination (by summing) of events in + * that group which are in the Supported Events Bitmask. + * However, there are some complexities to the scheme. + * - Fixed function counters refer to an Event Capabilities register. + * That event capability register is not then used for Configurable + * counters. + */ +static int cxl_pmu_parse_caps(struct device *dev, struct cxl_pmu_info *info) +{ + unsigned long fixed_counter_event_cap_bm = 0; + void __iomem *base = info->base; + bool freeze_for_enable; + u64 val, eval; + int i; + + val = readq(base + CXL_PMU_CAP_REG); + freeze_for_enable = FIELD_GET(CXL_PMU_CAP_WRITEABLE_WHEN_FROZEN, val) && + FIELD_GET(CXL_PMU_CAP_FREEZE, val); + if (!freeze_for_enable) { + dev_err(dev, "Counters not writable while frozen\n"); + return -ENODEV; + } + + info->num_counters = FIELD_GET(CXL_PMU_CAP_NUM_COUNTERS_MSK, val) + 1; + info->counter_width = FIELD_GET(CXL_PMU_CAP_COUNTER_WIDTH_MSK, val); + info->num_event_capabilities = FIELD_GET(CXL_PMU_CAP_NUM_EVN_CAP_REG_SUP_MSK, val) + 1; + + info->filter_hdm = FIELD_GET(CXL_PMU_CAP_FILTERS_SUP_MSK, val) & CXL_PMU_FILTER_HDM; + if (FIELD_GET(CXL_PMU_CAP_INT, val)) + info->irq = FIELD_GET(CXL_PMU_CAP_MSI_N_MSK, val); + else + info->irq = -1; + + /* First handle fixed function counters; note if configurable counters found */ + for (i = 0; i < info->num_counters; i++) { + struct cxl_pmu_ev_cap *pmu_ev; + u32 events_msk; + u8 group_idx; + + val = readq(base + CXL_PMU_COUNTER_CFG_REG(i)); + + if (FIELD_GET(CXL_PMU_COUNTER_CFG_TYPE_MSK, val) == + CXL_PMU_COUNTER_CFG_TYPE_CONFIGURABLE) { + set_bit(i, info->conf_counter_bm); + } + + if (FIELD_GET(CXL_PMU_COUNTER_CFG_TYPE_MSK, val) != + CXL_PMU_COUNTER_CFG_TYPE_FIXED_FUN) + continue; + + /* In this case we know which fields are const */ + group_idx = FIELD_GET(CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK, val); + events_msk = FIELD_GET(CXL_PMU_COUNTER_CFG_EVENTS_MSK, val); + eval = readq(base + CXL_PMU_EVENT_CAP_REG(group_idx)); + pmu_ev = devm_kzalloc(dev, sizeof(*pmu_ev), GFP_KERNEL); + if (!pmu_ev) + return -ENOMEM; + + pmu_ev->vid = FIELD_GET(CXL_PMU_EVENT_CAP_VENDOR_ID_MSK, eval); + pmu_ev->gid = FIELD_GET(CXL_PMU_EVENT_CAP_GROUP_ID_MSK, eval); + /* For a fixed purpose counter use the events mask from the counter CFG */ + pmu_ev->msk = events_msk; + pmu_ev->counter_idx = i; + /* This list add is never unwound as all entries deleted on remove */ + list_add(&pmu_ev->node, &info->event_caps_fixed); + /* + * Configurable counters must not use an Event Capability registers that + * is in use for a Fixed counter + */ + set_bit(group_idx, &fixed_counter_event_cap_bm); + } + + if (!bitmap_empty(info->conf_counter_bm, CXL_PMU_MAX_COUNTERS)) { + struct cxl_pmu_ev_cap *pmu_ev; + int j; + /* Walk event capabilities unused by fixed counters */ + for_each_clear_bit(j, &fixed_counter_event_cap_bm, + info->num_event_capabilities) { + pmu_ev = devm_kzalloc(dev, sizeof(*pmu_ev), GFP_KERNEL); + if (!pmu_ev) + return -ENOMEM; + + eval = readq(base + CXL_PMU_EVENT_CAP_REG(j)); + pmu_ev->vid = FIELD_GET(CXL_PMU_EVENT_CAP_VENDOR_ID_MSK, eval); + pmu_ev->gid = FIELD_GET(CXL_PMU_EVENT_CAP_GROUP_ID_MSK, eval); + pmu_ev->msk = FIELD_GET(CXL_PMU_EVENT_CAP_SUPPORTED_EVENTS_MSK, eval); + pmu_ev->event_idx = j; + list_add(&pmu_ev->node, &info->event_caps_configurable); + } + } + + return 0; +} + +static ssize_t cxl_pmu_format_sysfs_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dev_ext_attribute *eattr; + + eattr = container_of(attr, struct dev_ext_attribute, attr); + + return sysfs_emit(buf, "%s\n", (char *)eattr->var); +} + +#define CXL_PMU_FORMAT_ATTR(_name, _format)\ + (&((struct dev_ext_attribute[]) { \ + { \ + .attr = __ATTR(_name, 0444, \ + cxl_pmu_format_sysfs_show, NULL), \ + .var = (void *)_format \ + } \ + })[0].attr.attr) + +enum { + cxl_pmu_mask_attr, + cxl_pmu_gid_attr, + cxl_pmu_vid_attr, + cxl_pmu_threshold_attr, + cxl_pmu_invert_attr, + cxl_pmu_edge_attr, + cxl_pmu_hdm_filter_en_attr, + cxl_pmu_hdm_attr, +}; + +static struct attribute *cxl_pmu_format_attr[] = { + [cxl_pmu_mask_attr] = CXL_PMU_FORMAT_ATTR(mask, "config:0-31"), + [cxl_pmu_gid_attr] = CXL_PMU_FORMAT_ATTR(gid, "config:32-47"), + [cxl_pmu_vid_attr] = CXL_PMU_FORMAT_ATTR(vid, "config:48-63"), + [cxl_pmu_threshold_attr] = CXL_PMU_FORMAT_ATTR(threshold, "config1:0-15"), + [cxl_pmu_invert_attr] = CXL_PMU_FORMAT_ATTR(invert, "config1:16"), + [cxl_pmu_edge_attr] = CXL_PMU_FORMAT_ATTR(edge, "config1:17"), + [cxl_pmu_hdm_filter_en_attr] = CXL_PMU_FORMAT_ATTR(hdm_filter_en, "config1:18"), + [cxl_pmu_hdm_attr] = CXL_PMU_FORMAT_ATTR(hdm, "config2:0-15"), + NULL +}; + +#define CXL_PMU_ATTR_CONFIG_MASK_MSK GENMASK_ULL(31, 0) +#define CXL_PMU_ATTR_CONFIG_GID_MSK GENMASK_ULL(47, 32) +#define CXL_PMU_ATTR_CONFIG_VID_MSK GENMASK_ULL(63, 48) +#define CXL_PMU_ATTR_CONFIG1_THRESHOLD_MSK GENMASK_ULL(15, 0) +#define CXL_PMU_ATTR_CONFIG1_INVERT_MSK BIT(16) +#define CXL_PMU_ATTR_CONFIG1_EDGE_MSK BIT(17) +#define CXL_PMU_ATTR_CONFIG1_FILTER_EN_MSK BIT(18) +#define CXL_PMU_ATTR_CONFIG2_HDM_MSK GENMASK(15, 0) + +static umode_t cxl_pmu_format_is_visible(struct kobject *kobj, + struct attribute *attr, int a) +{ + struct device *dev = kobj_to_dev(kobj); + struct cxl_pmu_info *info = dev_get_drvdata(dev); + + /* + * Filter capability at the CPMU level, so hide the attributes if the particular + * filter is not supported. + */ + if (!info->filter_hdm && + (attr == cxl_pmu_format_attr[cxl_pmu_hdm_filter_en_attr] || + attr == cxl_pmu_format_attr[cxl_pmu_hdm_attr])) + return 0; + + return attr->mode; +} + +static const struct attribute_group cxl_pmu_format_group = { + .name = "format", + .attrs = cxl_pmu_format_attr, + .is_visible = cxl_pmu_format_is_visible, +}; + +static u32 cxl_pmu_config_get_mask(struct perf_event *event) +{ + return FIELD_GET(CXL_PMU_ATTR_CONFIG_MASK_MSK, event->attr.config); +} + +static u16 cxl_pmu_config_get_gid(struct perf_event *event) +{ + return FIELD_GET(CXL_PMU_ATTR_CONFIG_GID_MSK, event->attr.config); +} + +static u16 cxl_pmu_config_get_vid(struct perf_event *event) +{ + return FIELD_GET(CXL_PMU_ATTR_CONFIG_VID_MSK, event->attr.config); +} + +static u8 cxl_pmu_config1_get_threshold(struct perf_event *event) +{ + return FIELD_GET(CXL_PMU_ATTR_CONFIG1_THRESHOLD_MSK, event->attr.config1); +} + +static bool cxl_pmu_config1_get_invert(struct perf_event *event) +{ + return FIELD_GET(CXL_PMU_ATTR_CONFIG1_INVERT_MSK, event->attr.config1); +} + +static bool cxl_pmu_config1_get_edge(struct perf_event *event) +{ + return FIELD_GET(CXL_PMU_ATTR_CONFIG1_EDGE_MSK, event->attr.config1); +} + +/* + * CPMU specification allows for 8 filters, each with a 16 bit value... + * So we need to find 8x16bits to store it in. + * As the value used for disable is 0xffff, a separate enable switch + * is needed. + */ + +static bool cxl_pmu_config1_hdm_filter_en(struct perf_event *event) +{ + return FIELD_GET(CXL_PMU_ATTR_CONFIG1_FILTER_EN_MSK, event->attr.config1); +} + +static u16 cxl_pmu_config2_get_hdm_decoder(struct perf_event *event) +{ + return FIELD_GET(CXL_PMU_ATTR_CONFIG2_HDM_MSK, event->attr.config2); +} + +static ssize_t cxl_pmu_event_sysfs_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct perf_pmu_events_attr *pmu_attr = + container_of(attr, struct perf_pmu_events_attr, attr); + + return sysfs_emit(buf, "config=%#llx\n", pmu_attr->id); +} + +#define CXL_PMU_EVENT_ATTR(_name, _vid, _gid, _msk) \ + PMU_EVENT_ATTR_ID(_name, cxl_pmu_event_sysfs_show, \ + ((u64)(_vid) << 48) | ((u64)(_gid) << 32) | (u64)(_msk)) + +/* For CXL spec defined events */ +#define CXL_PMU_EVENT_CXL_ATTR(_name, _gid, _msk) \ + CXL_PMU_EVENT_ATTR(_name, PCI_DVSEC_VENDOR_ID_CXL, _gid, _msk) + +static struct attribute *cxl_pmu_event_attrs[] = { + CXL_PMU_EVENT_CXL_ATTR(clock_ticks, CXL_PMU_GID_CLOCK_TICKS, BIT(0)), + /* CXL rev 3.0 Table 3-17 - Device to Host Requests */ + CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdcurr, CXL_PMU_GID_D2H_REQ, BIT(1)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdown, CXL_PMU_GID_D2H_REQ, BIT(2)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdshared, CXL_PMU_GID_D2H_REQ, BIT(3)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdany, CXL_PMU_GID_D2H_REQ, BIT(4)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdownnodata, CXL_PMU_GID_D2H_REQ, BIT(5)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_itomwr, CXL_PMU_GID_D2H_REQ, BIT(6)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_wrcurr, CXL_PMU_GID_D2H_REQ, BIT(7)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_clflush, CXL_PMU_GID_D2H_REQ, BIT(8)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_cleanevict, CXL_PMU_GID_D2H_REQ, BIT(9)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_dirtyevict, CXL_PMU_GID_D2H_REQ, BIT(10)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_cleanevictnodata, CXL_PMU_GID_D2H_REQ, BIT(11)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_wowrinv, CXL_PMU_GID_D2H_REQ, BIT(12)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_wowrinvf, CXL_PMU_GID_D2H_REQ, BIT(13)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_wrinv, CXL_PMU_GID_D2H_REQ, BIT(14)), + CXL_PMU_EVENT_CXL_ATTR(d2h_req_cacheflushed, CXL_PMU_GID_D2H_REQ, BIT(16)), + /* CXL rev 3.0 Table 3-20 - D2H Repsonse Encodings */ + CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspihiti, CXL_PMU_GID_D2H_RSP, BIT(4)), + CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspvhitv, CXL_PMU_GID_D2H_RSP, BIT(6)), + CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspihitse, CXL_PMU_GID_D2H_RSP, BIT(5)), + CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspshitse, CXL_PMU_GID_D2H_RSP, BIT(1)), + CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspsfwdm, CXL_PMU_GID_D2H_RSP, BIT(7)), + CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspifwdm, CXL_PMU_GID_D2H_RSP, BIT(15)), + CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspvfwdv, CXL_PMU_GID_D2H_RSP, BIT(22)), + /* CXL rev 3.0 Table 3-21 - CXL.cache - Mapping of H2D Requests to D2H Responses */ + CXL_PMU_EVENT_CXL_ATTR(h2d_req_snpdata, CXL_PMU_GID_H2D_REQ, BIT(1)), + CXL_PMU_EVENT_CXL_ATTR(h2d_req_snpinv, CXL_PMU_GID_H2D_REQ, BIT(2)), + CXL_PMU_EVENT_CXL_ATTR(h2d_req_snpcur, CXL_PMU_GID_H2D_REQ, BIT(3)), + /* CXL rev 3.0 Table 3-22 - H2D Response Opcode Encodings */ + CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_writepull, CXL_PMU_GID_H2D_RSP, BIT(1)), + CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_go, CXL_PMU_GID_H2D_RSP, BIT(4)), + CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_gowritepull, CXL_PMU_GID_H2D_RSP, BIT(5)), + CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_extcmp, CXL_PMU_GID_H2D_RSP, BIT(6)), + CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_gowritepulldrop, CXL_PMU_GID_H2D_RSP, BIT(8)), + CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_fastgowritepull, CXL_PMU_GID_H2D_RSP, BIT(13)), + CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_goerrwritepull, CXL_PMU_GID_H2D_RSP, BIT(15)), + /* CXL rev 3.0 Table 13-5 directly lists these */ + CXL_PMU_EVENT_CXL_ATTR(cachedata_d2h_data, CXL_PMU_GID_CACHE_DATA, BIT(0)), + CXL_PMU_EVENT_CXL_ATTR(cachedata_h2d_data, CXL_PMU_GID_CACHE_DATA, BIT(1)), + /* CXL rev 3.0 Table 3-29 M2S Req Memory Opcodes */ + CXL_PMU_EVENT_CXL_ATTR(m2s_req_meminv, CXL_PMU_GID_M2S_REQ, BIT(0)), + CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrd, CXL_PMU_GID_M2S_REQ, BIT(1)), + CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrddata, CXL_PMU_GID_M2S_REQ, BIT(2)), + CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrdfwd, CXL_PMU_GID_M2S_REQ, BIT(3)), + CXL_PMU_EVENT_CXL_ATTR(m2s_req_memwrfwd, CXL_PMU_GID_M2S_REQ, BIT(4)), + CXL_PMU_EVENT_CXL_ATTR(m2s_req_memspecrd, CXL_PMU_GID_M2S_REQ, BIT(8)), + CXL_PMU_EVENT_CXL_ATTR(m2s_req_meminvnt, CXL_PMU_GID_M2S_REQ, BIT(9)), + CXL_PMU_EVENT_CXL_ATTR(m2s_req_memcleanevict, CXL_PMU_GID_M2S_REQ, BIT(10)), + /* CXL rev 3.0 Table 3-35 M2S RwD Memory Opcodes */ + CXL_PMU_EVENT_CXL_ATTR(m2s_rwd_memwr, CXL_PMU_GID_M2S_RWD, BIT(1)), + CXL_PMU_EVENT_CXL_ATTR(m2s_rwd_memwrptl, CXL_PMU_GID_M2S_RWD, BIT(2)), + CXL_PMU_EVENT_CXL_ATTR(m2s_rwd_biconflict, CXL_PMU_GID_M2S_RWD, BIT(4)), + /* CXL rev 3.0 Table 3-38 M2S BIRsp Memory Opcodes */ + CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_i, CXL_PMU_GID_M2S_BIRSP, BIT(0)), + CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_s, CXL_PMU_GID_M2S_BIRSP, BIT(1)), + CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_e, CXL_PMU_GID_M2S_BIRSP, BIT(2)), + CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_iblk, CXL_PMU_GID_M2S_BIRSP, BIT(4)), + CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_sblk, CXL_PMU_GID_M2S_BIRSP, BIT(5)), + CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_eblk, CXL_PMU_GID_M2S_BIRSP, BIT(6)), + /* CXL rev 3.0 Table 3-40 S2M BISnp Opcodes */ + CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_cur, CXL_PMU_GID_S2M_BISNP, BIT(0)), + CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_data, CXL_PMU_GID_S2M_BISNP, BIT(1)), + CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_inv, CXL_PMU_GID_S2M_BISNP, BIT(2)), + CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_curblk, CXL_PMU_GID_S2M_BISNP, BIT(4)), + CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_datblk, CXL_PMU_GID_S2M_BISNP, BIT(5)), + CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_invblk, CXL_PMU_GID_S2M_BISNP, BIT(6)), + /* CXL rev 3.0 Table 3-43 S2M NDR Opcopdes */ + CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmp, CXL_PMU_GID_S2M_NDR, BIT(0)), + CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmps, CXL_PMU_GID_S2M_NDR, BIT(1)), + CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmpe, CXL_PMU_GID_S2M_NDR, BIT(2)), + CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_biconflictack, CXL_PMU_GID_S2M_NDR, BIT(3)), + /* CXL rev 3.0 Table 3-46 S2M DRS opcodes */ + CXL_PMU_EVENT_CXL_ATTR(s2m_drs_memdata, CXL_PMU_GID_S2M_DRS, BIT(0)), + CXL_PMU_EVENT_CXL_ATTR(s2m_drs_memdatanxm, CXL_PMU_GID_S2M_DRS, BIT(1)), + /* CXL rev 3.0 Table 13-5 directly lists these */ + CXL_PMU_EVENT_CXL_ATTR(ddr_act, CXL_PMU_GID_DDR, BIT(0)), + CXL_PMU_EVENT_CXL_ATTR(ddr_pre, CXL_PMU_GID_DDR, BIT(1)), + CXL_PMU_EVENT_CXL_ATTR(ddr_casrd, CXL_PMU_GID_DDR, BIT(2)), + CXL_PMU_EVENT_CXL_ATTR(ddr_caswr, CXL_PMU_GID_DDR, BIT(3)), + CXL_PMU_EVENT_CXL_ATTR(ddr_refresh, CXL_PMU_GID_DDR, BIT(4)), + CXL_PMU_EVENT_CXL_ATTR(ddr_selfrefreshent, CXL_PMU_GID_DDR, BIT(5)), + CXL_PMU_EVENT_CXL_ATTR(ddr_rfm, CXL_PMU_GID_DDR, BIT(6)), + NULL +}; + +static struct cxl_pmu_ev_cap *cxl_pmu_find_fixed_counter_ev_cap(struct cxl_pmu_info *info, + int vid, int gid, int msk) +{ + struct cxl_pmu_ev_cap *pmu_ev; + + list_for_each_entry(pmu_ev, &info->event_caps_fixed, node) { + if (vid != pmu_ev->vid || gid != pmu_ev->gid) + continue; + + /* Precise match for fixed counter */ + if (msk == pmu_ev->msk) + return pmu_ev; + } + + return ERR_PTR(-EINVAL); +} + +static struct cxl_pmu_ev_cap *cxl_pmu_find_config_counter_ev_cap(struct cxl_pmu_info *info, + int vid, int gid, int msk) +{ + struct cxl_pmu_ev_cap *pmu_ev; + + list_for_each_entry(pmu_ev, &info->event_caps_configurable, node) { + if (vid != pmu_ev->vid || gid != pmu_ev->gid) + continue; + + /* Request mask must be subset of supported */ + if (msk & ~pmu_ev->msk) + continue; + + return pmu_ev; + } + + return ERR_PTR(-EINVAL); +} + +static umode_t cxl_pmu_event_is_visible(struct kobject *kobj, struct attribute *attr, int a) +{ + struct device_attribute *dev_attr = container_of(attr, struct device_attribute, attr); + struct perf_pmu_events_attr *pmu_attr = + container_of(dev_attr, struct perf_pmu_events_attr, attr); + struct device *dev = kobj_to_dev(kobj); + struct cxl_pmu_info *info = dev_get_drvdata(dev); + int vid = FIELD_GET(CXL_PMU_ATTR_CONFIG_VID_MSK, pmu_attr->id); + int gid = FIELD_GET(CXL_PMU_ATTR_CONFIG_GID_MSK, pmu_attr->id); + int msk = FIELD_GET(CXL_PMU_ATTR_CONFIG_MASK_MSK, pmu_attr->id); + + if (!IS_ERR(cxl_pmu_find_fixed_counter_ev_cap(info, vid, gid, msk))) + return attr->mode; + + if (!IS_ERR(cxl_pmu_find_config_counter_ev_cap(info, vid, gid, msk))) + return attr->mode; + + return 0; +} + +static const struct attribute_group cxl_pmu_events = { + .name = "events", + .attrs = cxl_pmu_event_attrs, + .is_visible = cxl_pmu_event_is_visible, +}; + +static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct cxl_pmu_info *info = dev_get_drvdata(dev); + + return cpumap_print_to_pagebuf(true, buf, cpumask_of(info->on_cpu)); +} +static DEVICE_ATTR_RO(cpumask); + +static struct attribute *cxl_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL +}; + +static const struct attribute_group cxl_pmu_cpumask_group = { + .attrs = cxl_pmu_cpumask_attrs, +}; + +static const struct attribute_group *cxl_pmu_attr_groups[] = { + &cxl_pmu_events, + &cxl_pmu_format_group, + &cxl_pmu_cpumask_group, + NULL +}; + +/* If counter_idx == NULL, don't try to allocate a counter. */ +static int cxl_pmu_get_event_idx(struct perf_event *event, int *counter_idx, + int *event_idx) +{ + struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu); + DECLARE_BITMAP(configurable_and_free, CXL_PMU_MAX_COUNTERS); + struct cxl_pmu_ev_cap *pmu_ev; + u32 mask; + u16 gid, vid; + int i; + + vid = cxl_pmu_config_get_vid(event); + gid = cxl_pmu_config_get_gid(event); + mask = cxl_pmu_config_get_mask(event); + + pmu_ev = cxl_pmu_find_fixed_counter_ev_cap(info, vid, gid, mask); + if (!IS_ERR(pmu_ev)) { + if (!counter_idx) + return 0; + if (!test_bit(pmu_ev->counter_idx, info->used_counter_bm)) { + *counter_idx = pmu_ev->counter_idx; + return 0; + } + /* Fixed counter is in use, but maybe a configurable one? */ + } + + pmu_ev = cxl_pmu_find_config_counter_ev_cap(info, vid, gid, mask); + if (!IS_ERR(pmu_ev)) { + if (!counter_idx) + return 0; + + bitmap_andnot(configurable_and_free, info->conf_counter_bm, + info->used_counter_bm, CXL_PMU_MAX_COUNTERS); + + i = find_first_bit(configurable_and_free, CXL_PMU_MAX_COUNTERS); + if (i == CXL_PMU_MAX_COUNTERS) + return -EINVAL; + + *counter_idx = i; + return 0; + } + + return -EINVAL; +} + +static int cxl_pmu_event_init(struct perf_event *event) +{ + struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu); + int rc; + + /* Top level type sanity check - is this a Hardware Event being requested */ + if (event->attr.type != event->pmu->type) + return -ENOENT; + + if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) + return -EOPNOTSUPP; + /* TODO: Validation of any filter */ + + /* + * Verify that it is possible to count what was requested. Either must + * be a fixed counter that is a precise match or a configurable counter + * where this is a subset. + */ + rc = cxl_pmu_get_event_idx(event, NULL, NULL); + if (rc < 0) + return rc; + + event->cpu = info->on_cpu; + + return 0; +} + +static void cxl_pmu_enable(struct pmu *pmu) +{ + struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(pmu); + void __iomem *base = info->base; + + /* Can assume frozen at this stage */ + writeq(0, base + CXL_PMU_FREEZE_REG); +} + +static void cxl_pmu_disable(struct pmu *pmu) +{ + struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(pmu); + void __iomem *base = info->base; + + /* + * Whilst bits above number of counters are RsvdZ + * they are unlikely to be repurposed given + * number of counters is allowed to be 64 leaving + * no reserved bits. Hence this is only slightly + * naughty. + */ + writeq(GENMASK_ULL(63, 0), base + CXL_PMU_FREEZE_REG); +} + +static void cxl_pmu_event_start(struct perf_event *event, int flags) +{ + struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu); + struct hw_perf_event *hwc = &event->hw; + void __iomem *base = info->base; + u64 cfg; + + /* + * All paths to here should either set these flags directly or + * call cxl_pmu_event_stop() which will ensure the correct state. + */ + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + hwc->state = 0; + + /* + * Currently only hdm filter control is implemnted, this code will + * want generalizing when more filters are added. + */ + if (info->filter_hdm) { + if (cxl_pmu_config1_hdm_filter_en(event)) + cfg = cxl_pmu_config2_get_hdm_decoder(event); + else + cfg = GENMASK(15, 0); /* No filtering if 0xFFFF_FFFF */ + writeq(cfg, base + CXL_PMU_FILTER_CFG_REG(hwc->idx, 0)); + } + + cfg = readq(base + CXL_PMU_COUNTER_CFG_REG(hwc->idx)); + cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_INT_ON_OVRFLW, 1); + cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_FREEZE_ON_OVRFLW, 1); + cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_ENABLE, 1); + cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_EDGE, + cxl_pmu_config1_get_edge(event) ? 1 : 0); + cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_INVERT, + cxl_pmu_config1_get_invert(event) ? 1 : 0); + + /* Fixed purpose counters have next two fields RO */ + if (test_bit(hwc->idx, info->conf_counter_bm)) { + cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK, + hwc->event_base); + cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_EVENTS_MSK, + cxl_pmu_config_get_mask(event)); + } + cfg &= ~CXL_PMU_COUNTER_CFG_THRESHOLD_MSK; + /* + * For events that generate only 1 count per clock the CXL 3.0 spec + * states the threshold shall be set to 1 but if set to 0 it will + * count the raw value anwyay? + * There is no definition of what events will count multiple per cycle + * and hence to which non 1 values of threshold can apply. + * (CXL 3.0 8.2.7.2.1 Counter Configuration - threshold field definition) + */ + cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_THRESHOLD_MSK, + cxl_pmu_config1_get_threshold(event)); + writeq(cfg, base + CXL_PMU_COUNTER_CFG_REG(hwc->idx)); + + local64_set(&hwc->prev_count, 0); + writeq(0, base + CXL_PMU_COUNTER_REG(hwc->idx)); + + perf_event_update_userpage(event); +} + +static u64 cxl_pmu_read_counter(struct perf_event *event) +{ + struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu); + void __iomem *base = info->base; + + return readq(base + CXL_PMU_COUNTER_REG(event->hw.idx)); +} + +static void __cxl_pmu_read(struct perf_event *event, bool overflow) +{ + struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu); + struct hw_perf_event *hwc = &event->hw; + u64 new_cnt, prev_cnt, delta; + + do { + prev_cnt = local64_read(&hwc->prev_count); + new_cnt = cxl_pmu_read_counter(event); + } while (local64_cmpxchg(&hwc->prev_count, prev_cnt, new_cnt) != prev_cnt); + + /* + * If we know an overflow occur then take that into account. + * Note counter is not reset as that would lose events + */ + delta = (new_cnt - prev_cnt) & GENMASK_ULL(info->counter_width - 1, 0); + if (overflow && delta < GENMASK_ULL(info->counter_width - 1, 0)) + delta += (1UL << info->counter_width); + + local64_add(delta, &event->count); +} + +static void cxl_pmu_read(struct perf_event *event) +{ + __cxl_pmu_read(event, false); +} + +static void cxl_pmu_event_stop(struct perf_event *event, int flags) +{ + struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu); + void __iomem *base = info->base; + struct hw_perf_event *hwc = &event->hw; + u64 cfg; + + cxl_pmu_read(event); + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + + cfg = readq(base + CXL_PMU_COUNTER_CFG_REG(hwc->idx)); + cfg &= ~(FIELD_PREP(CXL_PMU_COUNTER_CFG_INT_ON_OVRFLW, 1) | + FIELD_PREP(CXL_PMU_COUNTER_CFG_ENABLE, 1)); + writeq(cfg, base + CXL_PMU_COUNTER_CFG_REG(hwc->idx)); + + hwc->state |= PERF_HES_UPTODATE; +} + +static int cxl_pmu_event_add(struct perf_event *event, int flags) +{ + struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx, rc; + int event_idx = 0; + + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + + rc = cxl_pmu_get_event_idx(event, &idx, &event_idx); + if (rc < 0) + return rc; + + hwc->idx = idx; + + /* Only set for configurable counters */ + hwc->event_base = event_idx; + info->hw_events[idx] = event; + set_bit(idx, info->used_counter_bm); + + if (flags & PERF_EF_START) + cxl_pmu_event_start(event, PERF_EF_RELOAD); + + return 0; +} + +static void cxl_pmu_event_del(struct perf_event *event, int flags) +{ + struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + cxl_pmu_event_stop(event, PERF_EF_UPDATE); + clear_bit(hwc->idx, info->used_counter_bm); + info->hw_events[hwc->idx] = NULL; + perf_event_update_userpage(event); +} + +static irqreturn_t cxl_pmu_irq(int irq, void *data) +{ + struct cxl_pmu_info *info = data; + void __iomem *base = info->base; + u64 overflowed; + DECLARE_BITMAP(overflowedbm, 64); + int i; + + overflowed = readq(base + CXL_PMU_OVERFLOW_REG); + + /* Interrupt may be shared, so maybe it isn't ours */ + if (!overflowed) + return IRQ_NONE; + + bitmap_from_arr64(overflowedbm, &overflowed, 64); + for_each_set_bit(i, overflowedbm, info->num_counters) { + struct perf_event *event = info->hw_events[i]; + + if (!event) { + dev_dbg(info->pmu.dev, + "overflow but on non enabled counter %d\n", i); + continue; + } + + __cxl_pmu_read(event, true); + } + + writeq(overflowed, base + CXL_PMU_OVERFLOW_REG); + + return IRQ_HANDLED; +} + +static void cxl_pmu_perf_unregister(void *_info) +{ + struct cxl_pmu_info *info = _info; + + perf_pmu_unregister(&info->pmu); +} + +static void cxl_pmu_cpuhp_remove(void *_info) +{ + struct cxl_pmu_info *info = _info; + + cpuhp_state_remove_instance_nocalls(cxl_pmu_cpuhp_state_num, &info->node); +} + +static int cxl_pmu_probe(struct device *dev) +{ + struct cxl_pmu *pmu = to_cxl_pmu(dev); + struct pci_dev *pdev = to_pci_dev(dev->parent); + struct cxl_pmu_info *info; + char *irq_name; + char *dev_name; + int rc, irq; + + info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + dev_set_drvdata(dev, info); + INIT_LIST_HEAD(&info->event_caps_fixed); + INIT_LIST_HEAD(&info->event_caps_configurable); + + info->base = pmu->base; + + info->on_cpu = -1; + rc = cxl_pmu_parse_caps(dev, info); + if (rc) + return rc; + + info->hw_events = devm_kcalloc(dev, sizeof(*info->hw_events), + info->num_counters, GFP_KERNEL); + if (!info->hw_events) + return -ENOMEM; + + switch (pmu->type) { + case CXL_PMU_MEMDEV: + dev_name = devm_kasprintf(dev, GFP_KERNEL, "cxl_pmu_mem%d.%d", + pmu->assoc_id, pmu->index); + break; + } + if (!dev_name) + return -ENOMEM; + + info->pmu = (struct pmu) { + .name = dev_name, + .parent = dev, + .module = THIS_MODULE, + .event_init = cxl_pmu_event_init, + .pmu_enable = cxl_pmu_enable, + .pmu_disable = cxl_pmu_disable, + .add = cxl_pmu_event_add, + .del = cxl_pmu_event_del, + .start = cxl_pmu_event_start, + .stop = cxl_pmu_event_stop, + .read = cxl_pmu_read, + .task_ctx_nr = perf_invalid_context, + .attr_groups = cxl_pmu_attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + }; + + if (info->irq <= 0) + return -EINVAL; + + rc = pci_irq_vector(pdev, info->irq); + if (rc < 0) + return rc; + irq = rc; + + irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_overflow\n", dev_name); + if (!irq_name) + return -ENOMEM; + + rc = devm_request_irq(dev, irq, cxl_pmu_irq, IRQF_SHARED | IRQF_ONESHOT, + irq_name, info); + if (rc) + return rc; + info->irq = irq; + + rc = cpuhp_state_add_instance(cxl_pmu_cpuhp_state_num, &info->node); + if (rc) + return rc; + + rc = devm_add_action_or_reset(dev, cxl_pmu_cpuhp_remove, info); + if (rc) + return rc; + + rc = perf_pmu_register(&info->pmu, info->pmu.name, -1); + if (rc) + return rc; + + rc = devm_add_action_or_reset(dev, cxl_pmu_perf_unregister, info); + if (rc) + return rc; + + return 0; +} + +static struct cxl_driver cxl_pmu_driver = { + .name = "cxl_pmu", + .probe = cxl_pmu_probe, + .id = CXL_DEVICE_PMU, +}; + +static int cxl_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct cxl_pmu_info *info = hlist_entry_safe(node, struct cxl_pmu_info, node); + + if (info->on_cpu != -1) + return 0; + + info->on_cpu = cpu; + /* + * CPU HP lock is held so we should be guaranteed that the CPU hasn't yet + * gone away again. + */ + WARN_ON(irq_set_affinity(info->irq, cpumask_of(cpu))); + + return 0; +} + +static int cxl_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct cxl_pmu_info *info = hlist_entry_safe(node, struct cxl_pmu_info, node); + unsigned int target; + + if (info->on_cpu != cpu) + return 0; + + info->on_cpu = -1; + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) { + dev_err(info->pmu.dev, "Unable to find a suitable CPU\n"); + return 0; + } + + perf_pmu_migrate_context(&info->pmu, cpu, target); + info->on_cpu = target; + /* + * CPU HP lock is held so we should be guaranteed that this CPU hasn't yet + * gone away. + */ + WARN_ON(irq_set_affinity(info->irq, cpumask_of(target))); + + return 0; +} + +static __init int cxl_pmu_init(void) +{ + int rc; + + rc = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "AP_PERF_CXL_PMU_ONLINE", + cxl_pmu_online_cpu, cxl_pmu_offline_cpu); + if (rc < 0) + return rc; + cxl_pmu_cpuhp_state_num = rc; + + rc = cxl_driver_register(&cxl_pmu_driver); + if (rc) + cpuhp_remove_multi_state(cxl_pmu_cpuhp_state_num); + + return rc; +} + +static __exit void cxl_pmu_exit(void) +{ + cxl_driver_unregister(&cxl_pmu_driver); + cpuhp_remove_multi_state(cxl_pmu_cpuhp_state_num); +} + +MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(CXL); +module_init(cxl_pmu_init); +module_exit(cxl_pmu_exit); +MODULE_ALIAS_CXL(CXL_DEVICE_PMU); |