diff options
Diffstat (limited to 'drivers/dma/idxd')
-rw-r--r-- | drivers/dma/idxd/Makefile | 2 | ||||
-rw-r--r-- | drivers/dma/idxd/cdev.c | 132 | ||||
-rw-r--r-- | drivers/dma/idxd/device.c | 346 | ||||
-rw-r--r-- | drivers/dma/idxd/dma.c | 77 | ||||
-rw-r--r-- | drivers/dma/idxd/idxd.h | 171 | ||||
-rw-r--r-- | drivers/dma/idxd/init.c | 496 | ||||
-rw-r--r-- | drivers/dma/idxd/irq.c | 33 | ||||
-rw-r--r-- | drivers/dma/idxd/perfmon.c | 662 | ||||
-rw-r--r-- | drivers/dma/idxd/perfmon.h | 119 | ||||
-rw-r--r-- | drivers/dma/idxd/registers.h | 120 | ||||
-rw-r--r-- | drivers/dma/idxd/submit.c | 42 | ||||
-rw-r--r-- | drivers/dma/idxd/sysfs.c | 795 |
12 files changed, 2204 insertions, 791 deletions
diff --git a/drivers/dma/idxd/Makefile b/drivers/dma/idxd/Makefile index 8978b898d777..6d11558756f8 100644 --- a/drivers/dma/idxd/Makefile +++ b/drivers/dma/idxd/Makefile @@ -1,2 +1,4 @@ obj-$(CONFIG_INTEL_IDXD) += idxd.o idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o cdev.o + +idxd-$(CONFIG_INTEL_IDXD_PERFMON) += perfmon.o diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 0db9b82ed8cf..302cba5ff779 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -39,15 +39,15 @@ struct idxd_user_context { struct iommu_sva *sva; }; -enum idxd_cdev_cleanup { - CDEV_NORMAL = 0, - CDEV_FAILED, -}; - static void idxd_cdev_dev_release(struct device *dev) { - dev_dbg(dev, "releasing cdev device\n"); - kfree(dev); + struct idxd_cdev *idxd_cdev = container_of(dev, struct idxd_cdev, dev); + struct idxd_cdev_context *cdev_ctx; + struct idxd_wq *wq = idxd_cdev->wq; + + cdev_ctx = &ictx[wq->idxd->data->type]; + ida_simple_remove(&cdev_ctx->minor_ida, idxd_cdev->minor); + kfree(idxd_cdev); } static struct device_type idxd_cdev_device_type = { @@ -62,14 +62,11 @@ static inline struct idxd_cdev *inode_idxd_cdev(struct inode *inode) return container_of(cdev, struct idxd_cdev, cdev); } -static inline struct idxd_wq *idxd_cdev_wq(struct idxd_cdev *idxd_cdev) -{ - return container_of(idxd_cdev, struct idxd_wq, idxd_cdev); -} - static inline struct idxd_wq *inode_wq(struct inode *inode) { - return idxd_cdev_wq(inode_idxd_cdev(inode)); + struct idxd_cdev *idxd_cdev = inode_idxd_cdev(inode); + + return idxd_cdev->wq; } static int idxd_cdev_open(struct inode *inode, struct file *filp) @@ -220,11 +217,10 @@ static __poll_t idxd_cdev_poll(struct file *filp, struct idxd_user_context *ctx = filp->private_data; struct idxd_wq *wq = ctx->wq; struct idxd_device *idxd = wq->idxd; - struct idxd_cdev *idxd_cdev = &wq->idxd_cdev; unsigned long flags; __poll_t out = 0; - poll_wait(filp, &idxd_cdev->err_queue, wait); + poll_wait(filp, &wq->err_queue, wait); spin_lock_irqsave(&idxd->dev_lock, flags); if (idxd->sw_err.valid) out = EPOLLIN | EPOLLRDNORM; @@ -243,101 +239,69 @@ static const struct file_operations idxd_cdev_fops = { int idxd_cdev_get_major(struct idxd_device *idxd) { - return MAJOR(ictx[idxd->type].devt); + return MAJOR(ictx[idxd->data->type].devt); } -static int idxd_wq_cdev_dev_setup(struct idxd_wq *wq) +int idxd_wq_add_cdev(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; - struct idxd_cdev *idxd_cdev = &wq->idxd_cdev; - struct idxd_cdev_context *cdev_ctx; + struct idxd_cdev *idxd_cdev; + struct cdev *cdev; struct device *dev; - int minor, rc; + struct idxd_cdev_context *cdev_ctx; + int rc, minor; - idxd_cdev->dev = kzalloc(sizeof(*idxd_cdev->dev), GFP_KERNEL); - if (!idxd_cdev->dev) + idxd_cdev = kzalloc(sizeof(*idxd_cdev), GFP_KERNEL); + if (!idxd_cdev) return -ENOMEM; - dev = idxd_cdev->dev; - dev->parent = &idxd->pdev->dev; - dev_set_name(dev, "%s/wq%u.%u", idxd_get_dev_name(idxd), - idxd->id, wq->id); - dev->bus = idxd_get_bus_type(idxd); - - cdev_ctx = &ictx[wq->idxd->type]; + idxd_cdev->wq = wq; + cdev = &idxd_cdev->cdev; + dev = &idxd_cdev->dev; + cdev_ctx = &ictx[wq->idxd->data->type]; minor = ida_simple_get(&cdev_ctx->minor_ida, 0, MINORMASK, GFP_KERNEL); if (minor < 0) { - rc = minor; - kfree(dev); - goto ida_err; - } - - dev->devt = MKDEV(MAJOR(cdev_ctx->devt), minor); - dev->type = &idxd_cdev_device_type; - rc = device_register(dev); - if (rc < 0) { - dev_err(&idxd->pdev->dev, "device register failed\n"); - goto dev_reg_err; + kfree(idxd_cdev); + return minor; } idxd_cdev->minor = minor; - return 0; - - dev_reg_err: - ida_simple_remove(&cdev_ctx->minor_ida, MINOR(dev->devt)); - put_device(dev); - ida_err: - idxd_cdev->dev = NULL; - return rc; -} - -static void idxd_wq_cdev_cleanup(struct idxd_wq *wq, - enum idxd_cdev_cleanup cdev_state) -{ - struct idxd_cdev *idxd_cdev = &wq->idxd_cdev; - struct idxd_cdev_context *cdev_ctx; - - cdev_ctx = &ictx[wq->idxd->type]; - if (cdev_state == CDEV_NORMAL) - cdev_del(&idxd_cdev->cdev); - device_unregister(idxd_cdev->dev); - /* - * The device_type->release() will be called on the device and free - * the allocated struct device. We can just forget it. - */ - ida_simple_remove(&cdev_ctx->minor_ida, idxd_cdev->minor); - idxd_cdev->dev = NULL; - idxd_cdev->minor = -1; -} - -int idxd_wq_add_cdev(struct idxd_wq *wq) -{ - struct idxd_cdev *idxd_cdev = &wq->idxd_cdev; - struct cdev *cdev = &idxd_cdev->cdev; - struct device *dev; - int rc; + device_initialize(dev); + dev->parent = &wq->conf_dev; + dev->bus = &dsa_bus_type; + dev->type = &idxd_cdev_device_type; + dev->devt = MKDEV(MAJOR(cdev_ctx->devt), minor); - rc = idxd_wq_cdev_dev_setup(wq); + rc = dev_set_name(dev, "%s/wq%u.%u", idxd->data->name_prefix, idxd->id, wq->id); if (rc < 0) - return rc; + goto err; - dev = idxd_cdev->dev; + wq->idxd_cdev = idxd_cdev; cdev_init(cdev, &idxd_cdev_fops); - cdev_set_parent(cdev, &dev->kobj); - rc = cdev_add(cdev, dev->devt, 1); + rc = cdev_device_add(cdev, dev); if (rc) { dev_dbg(&wq->idxd->pdev->dev, "cdev_add failed: %d\n", rc); - idxd_wq_cdev_cleanup(wq, CDEV_FAILED); - return rc; + goto err; } - init_waitqueue_head(&idxd_cdev->err_queue); return 0; + + err: + put_device(dev); + wq->idxd_cdev = NULL; + return rc; } void idxd_wq_del_cdev(struct idxd_wq *wq) { - idxd_wq_cdev_cleanup(wq, CDEV_NORMAL); + struct idxd_cdev *idxd_cdev; + struct idxd_cdev_context *cdev_ctx; + + cdev_ctx = &ictx[wq->idxd->data->type]; + idxd_cdev = wq->idxd_cdev; + wq->idxd_cdev = NULL; + cdev_device_del(&idxd_cdev->cdev, &idxd_cdev->dev); + put_device(&idxd_cdev->dev); } int idxd_cdev_register(void) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 84a6ea60ecf0..420b93fe5feb 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -19,7 +19,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, /* Interrupt control bits */ void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id) { - struct irq_data *data = irq_get_irq_data(idxd->msix_entries[vec_id].vector); + struct irq_data *data = irq_get_irq_data(idxd->irq_entries[vec_id].vector); pci_msi_mask_irq(data); } @@ -36,7 +36,7 @@ void idxd_mask_msix_vectors(struct idxd_device *idxd) void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id) { - struct irq_data *data = irq_get_irq_data(idxd->msix_entries[vec_id].vector); + struct irq_data *data = irq_get_irq_data(idxd->irq_entries[vec_id].vector); pci_msi_unmask_irq(data); } @@ -47,6 +47,7 @@ void idxd_unmask_error_interrupts(struct idxd_device *idxd) genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET); genctrl.softerr_int_en = 1; + genctrl.halt_int_en = 1; iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET); } @@ -56,6 +57,7 @@ void idxd_mask_error_interrupts(struct idxd_device *idxd) genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET); genctrl.softerr_int_en = 0; + genctrl.halt_int_en = 0; iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET); } @@ -144,14 +146,8 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq) if (rc < 0) return rc; - if (idxd->type == IDXD_TYPE_DSA) - align = 32; - else if (idxd->type == IDXD_TYPE_IAX) - align = 64; - else - return -ENODEV; - - wq->compls_size = num_descs * idxd->compl_size + align; + align = idxd->data->align; + wq->compls_size = num_descs * idxd->data->compl_size + align; wq->compls_raw = dma_alloc_coherent(dev, wq->compls_size, &wq->compls_addr_raw, GFP_KERNEL); if (!wq->compls_raw) { @@ -178,16 +174,14 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq) struct idxd_desc *desc = wq->descs[i]; desc->hw = wq->hw_descs[i]; - if (idxd->type == IDXD_TYPE_DSA) + if (idxd->data->type == IDXD_TYPE_DSA) desc->completion = &wq->compls[i]; - else if (idxd->type == IDXD_TYPE_IAX) + else if (idxd->data->type == IDXD_TYPE_IAX) desc->iax_completion = &wq->iax_compls[i]; - desc->compl_dma = wq->compls_addr + idxd->compl_size * i; + desc->compl_dma = wq->compls_addr + idxd->data->compl_size * i; desc->id = i; desc->wq = wq; desc->cpu = -1; - dma_async_tx_descriptor_init(&desc->txd, &wq->dma_chan); - desc->txd.tx_submit = idxd_dma_tx_submit; } return 0; @@ -282,6 +276,22 @@ void idxd_wq_drain(struct idxd_wq *wq) idxd_cmd_exec(idxd, IDXD_CMD_DRAIN_WQ, operand, NULL); } +void idxd_wq_reset(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + u32 operand; + + if (wq->state != IDXD_WQ_ENABLED) { + dev_dbg(dev, "WQ %d in wrong state: %d\n", wq->id, wq->state); + return; + } + + operand = BIT(wq->id % 16) | ((wq->id / 16) << 16); + idxd_cmd_exec(idxd, IDXD_CMD_RESET_WQ, operand, NULL); + wq->state = IDXD_WQ_DISABLED; +} + int idxd_wq_map_portal(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; @@ -304,6 +314,19 @@ void idxd_wq_unmap_portal(struct idxd_wq *wq) struct device *dev = &wq->idxd->pdev->dev; devm_iounmap(dev, wq->portal); + wq->portal = NULL; +} + +void idxd_wqs_unmap_portal(struct idxd_device *idxd) +{ + int i; + + for (i = 0; i < idxd->max_wqs; i++) { + struct idxd_wq *wq = idxd->wqs[i]; + + if (wq->portal) + idxd_wq_unmap_portal(wq); + } } int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid) @@ -363,8 +386,6 @@ int idxd_wq_disable_pasid(struct idxd_wq *wq) void idxd_wq_disable_cleanup(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; - struct device *dev = &idxd->pdev->dev; - int i, wq_offset; lockdep_assert_held(&idxd->dev_lock); memset(wq->wqcfg, 0, idxd->wqcfg_size); @@ -376,14 +397,32 @@ void idxd_wq_disable_cleanup(struct idxd_wq *wq) wq->ats_dis = 0; clear_bit(WQ_FLAG_DEDICATED, &wq->flags); memset(wq->name, 0, WQ_NAME_SIZE); +} - for (i = 0; i < WQCFG_STRIDES(idxd); i++) { - wq_offset = WQCFG_OFFSET(idxd, wq->id, i); - iowrite32(0, idxd->reg_base + wq_offset); - dev_dbg(dev, "WQ[%d][%d][%#x]: %#x\n", - wq->id, i, wq_offset, - ioread32(idxd->reg_base + wq_offset)); - } +static void idxd_wq_ref_release(struct percpu_ref *ref) +{ + struct idxd_wq *wq = container_of(ref, struct idxd_wq, wq_active); + + complete(&wq->wq_dead); +} + +int idxd_wq_init_percpu_ref(struct idxd_wq *wq) +{ + int rc; + + memset(&wq->wq_active, 0, sizeof(wq->wq_active)); + rc = percpu_ref_init(&wq->wq_active, idxd_wq_ref_release, 0, GFP_KERNEL); + if (rc < 0) + return rc; + reinit_completion(&wq->wq_dead); + return 0; +} + +void idxd_wq_quiesce(struct idxd_wq *wq) +{ + percpu_ref_kill(&wq->wq_active); + wait_for_completion(&wq->wq_dead); + percpu_ref_exit(&wq->wq_active); } /* Device control bits */ @@ -426,13 +465,13 @@ int idxd_device_init_reset(struct idxd_device *idxd) memset(&cmd, 0, sizeof(cmd)); cmd.cmd = IDXD_CMD_RESET_DEVICE; dev_dbg(dev, "%s: sending reset for init.\n", __func__); - spin_lock_irqsave(&idxd->dev_lock, flags); + spin_lock_irqsave(&idxd->cmd_lock, flags); iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET); while (ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET) & IDXD_CMDSTS_ACTIVE) cpu_relax(); - spin_unlock_irqrestore(&idxd->dev_lock, flags); + spin_unlock_irqrestore(&idxd->cmd_lock, flags); return 0; } @@ -445,7 +484,8 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, if (idxd_device_is_halted(idxd)) { dev_warn(&idxd->pdev->dev, "Device is HALTED!\n"); - *status = IDXD_CMDSTS_HW_ERR; + if (status) + *status = IDXD_CMDSTS_HW_ERR; return; } @@ -454,10 +494,10 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, cmd.operand = operand; cmd.int_req = 1; - spin_lock_irqsave(&idxd->dev_lock, flags); + spin_lock_irqsave(&idxd->cmd_lock, flags); wait_event_lock_irq(idxd->cmd_waitq, !test_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags), - idxd->dev_lock); + idxd->cmd_lock); dev_dbg(&idxd->pdev->dev, "%s: sending cmd: %#x op: %#x\n", __func__, cmd_code, operand); @@ -471,9 +511,9 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, * After command submitted, release lock and go to sleep until * the command completes via interrupt. */ - spin_unlock_irqrestore(&idxd->dev_lock, flags); + spin_unlock_irqrestore(&idxd->cmd_lock, flags); wait_for_completion(&done); - spin_lock_irqsave(&idxd->dev_lock, flags); + spin_lock_irqsave(&idxd->cmd_lock, flags); if (status) { *status = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET); idxd->cmd_status = *status & GENMASK(7, 0); @@ -482,7 +522,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, __clear_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags); /* Wake up other pending commands */ wake_up(&idxd->cmd_waitq); - spin_unlock_irqrestore(&idxd->dev_lock, flags); + spin_unlock_irqrestore(&idxd->cmd_lock, flags); } int idxd_device_enable(struct idxd_device *idxd) @@ -515,7 +555,7 @@ void idxd_device_wqs_clear_state(struct idxd_device *idxd) lockdep_assert_held(&idxd->dev_lock); for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + struct idxd_wq *wq = idxd->wqs[i]; if (wq->state == IDXD_WQ_ENABLED) { idxd_wq_disable_cleanup(wq); @@ -573,7 +613,108 @@ void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid) dev_dbg(dev, "pasid %d drained\n", pasid); } +int idxd_device_request_int_handle(struct idxd_device *idxd, int idx, int *handle, + enum idxd_interrupt_type irq_type) +{ + struct device *dev = &idxd->pdev->dev; + u32 operand, status; + + if (!(idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE))) + return -EOPNOTSUPP; + + dev_dbg(dev, "get int handle, idx %d\n", idx); + + operand = idx & GENMASK(15, 0); + if (irq_type == IDXD_IRQ_IMS) + operand |= CMD_INT_HANDLE_IMS; + + dev_dbg(dev, "cmd: %u operand: %#x\n", IDXD_CMD_REQUEST_INT_HANDLE, operand); + + idxd_cmd_exec(idxd, IDXD_CMD_REQUEST_INT_HANDLE, operand, &status); + + if ((status & IDXD_CMDSTS_ERR_MASK) != IDXD_CMDSTS_SUCCESS) { + dev_dbg(dev, "request int handle failed: %#x\n", status); + return -ENXIO; + } + + *handle = (status >> IDXD_CMDSTS_RES_SHIFT) & GENMASK(15, 0); + + dev_dbg(dev, "int handle acquired: %u\n", *handle); + return 0; +} + +int idxd_device_release_int_handle(struct idxd_device *idxd, int handle, + enum idxd_interrupt_type irq_type) +{ + struct device *dev = &idxd->pdev->dev; + u32 operand, status; + union idxd_command_reg cmd; + unsigned long flags; + + if (!(idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE))) + return -EOPNOTSUPP; + + dev_dbg(dev, "release int handle, handle %d\n", handle); + + memset(&cmd, 0, sizeof(cmd)); + operand = handle & GENMASK(15, 0); + + if (irq_type == IDXD_IRQ_IMS) + operand |= CMD_INT_HANDLE_IMS; + + cmd.cmd = IDXD_CMD_RELEASE_INT_HANDLE; + cmd.operand = operand; + + dev_dbg(dev, "cmd: %u operand: %#x\n", IDXD_CMD_RELEASE_INT_HANDLE, operand); + + spin_lock_irqsave(&idxd->cmd_lock, flags); + iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET); + + while (ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET) & IDXD_CMDSTS_ACTIVE) + cpu_relax(); + status = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET); + spin_unlock_irqrestore(&idxd->cmd_lock, flags); + + if ((status & IDXD_CMDSTS_ERR_MASK) != IDXD_CMDSTS_SUCCESS) { + dev_dbg(dev, "release int handle failed: %#x\n", status); + return -ENXIO; + } + + dev_dbg(dev, "int handle released.\n"); + return 0; +} + /* Device configuration bits */ +void idxd_msix_perm_setup(struct idxd_device *idxd) +{ + union msix_perm mperm; + int i, msixcnt; + + msixcnt = pci_msix_vec_count(idxd->pdev); + if (msixcnt < 0) + return; + + mperm.bits = 0; + mperm.pasid = idxd->pasid; + mperm.pasid_en = device_pasid_enabled(idxd); + for (i = 1; i < msixcnt; i++) + iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8); +} + +void idxd_msix_perm_clear(struct idxd_device *idxd) +{ + union msix_perm mperm; + int i, msixcnt; + + msixcnt = pci_msix_vec_count(idxd->pdev); + if (msixcnt < 0) + return; + + mperm.bits = 0; + for (i = 1; i < msixcnt; i++) + iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8); +} + static void idxd_group_config_write(struct idxd_group *group) { struct idxd_device *idxd = group->idxd; @@ -624,7 +765,7 @@ static int idxd_groups_config_write(struct idxd_device *idxd) ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET)); for (i = 0; i < idxd->max_groups; i++) { - struct idxd_group *group = &idxd->groups[i]; + struct idxd_group *group = idxd->groups[i]; idxd_group_config_write(group); } @@ -642,7 +783,14 @@ static int idxd_wq_config_write(struct idxd_wq *wq) if (!wq->group) return 0; - memset(wq->wqcfg, 0, idxd->wqcfg_size); + /* + * Instead of memset the entire shadow copy of WQCFG, copy from the hardware after + * wq reset. This will copy back the sticky values that are present on some devices. + */ + for (i = 0; i < WQCFG_STRIDES(idxd); i++) { + wq_offset = WQCFG_OFFSET(idxd, wq->id, i); + wq->wqcfg->bits[i] = ioread32(idxd->reg_base + wq_offset); + } /* byte 0-3 */ wq->wqcfg->wq_size = wq->size; @@ -696,7 +844,7 @@ static int idxd_wqs_config_write(struct idxd_device *idxd) int i, rc; for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + struct idxd_wq *wq = idxd->wqs[i]; rc = idxd_wq_config_write(wq); if (rc < 0) @@ -712,7 +860,7 @@ static void idxd_group_flags_setup(struct idxd_device *idxd) /* TC-A 0 and TC-B 1 should be defaults */ for (i = 0; i < idxd->max_groups; i++) { - struct idxd_group *group = &idxd->groups[i]; + struct idxd_group *group = idxd->groups[i]; if (group->tc_a == -1) group->tc_a = group->grpcfg.flags.tc_a = 0; @@ -739,12 +887,12 @@ static int idxd_engines_setup(struct idxd_device *idxd) struct idxd_group *group; for (i = 0; i < idxd->max_groups; i++) { - group = &idxd->groups[i]; + group = idxd->groups[i]; group->grpcfg.engines = 0; } for (i = 0; i < idxd->max_engines; i++) { - eng = &idxd->engines[i]; + eng = idxd->engines[i]; group = eng->group; if (!group) @@ -768,13 +916,13 @@ static int idxd_wqs_setup(struct idxd_device *idxd) struct device *dev = &idxd->pdev->dev; for (i = 0; i < idxd->max_groups; i++) { - group = &idxd->groups[i]; + group = idxd->groups[i]; for (j = 0; j < 4; j++) group->grpcfg.wqs[j] = 0; } for (i = 0; i < idxd->max_wqs; i++) { - wq = &idxd->wqs[i]; + wq = idxd->wqs[i]; group = wq->group; if (!wq->group) @@ -822,3 +970,119 @@ int idxd_device_config(struct idxd_device *idxd) return 0; } + +static int idxd_wq_load_config(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + int wqcfg_offset; + int i; + + wqcfg_offset = WQCFG_OFFSET(idxd, wq->id, 0); + memcpy_fromio(wq->wqcfg, idxd->reg_base + wqcfg_offset, idxd->wqcfg_size); + + wq->size = wq->wqcfg->wq_size; + wq->threshold = wq->wqcfg->wq_thresh; + if (wq->wqcfg->priv) + wq->type = IDXD_WQT_KERNEL; + + /* The driver does not support shared WQ mode in read-only config yet */ + if (wq->wqcfg->mode == 0 || wq->wqcfg->pasid_en) + return -EOPNOTSUPP; + + set_bit(WQ_FLAG_DEDICATED, &wq->flags); + + wq->priority = wq->wqcfg->priority; + + for (i = 0; i < WQCFG_STRIDES(idxd); i++) { + wqcfg_offset = WQCFG_OFFSET(idxd, wq->id, i); + dev_dbg(dev, "WQ[%d][%d][%#x]: %#x\n", wq->id, i, wqcfg_offset, wq->wqcfg->bits[i]); + } + + return 0; +} + +static void idxd_group_load_config(struct idxd_group *group) +{ + struct idxd_device *idxd = group->idxd; + struct device *dev = &idxd->pdev->dev; + int i, j, grpcfg_offset; + + /* + * Load WQS bit fields + * Iterate through all 256 bits 64 bits at a time + */ + for (i = 0; i < GRPWQCFG_STRIDES; i++) { + struct idxd_wq *wq; + + grpcfg_offset = GRPWQCFG_OFFSET(idxd, group->id, i); + group->grpcfg.wqs[i] = ioread64(idxd->reg_base + grpcfg_offset); + dev_dbg(dev, "GRPCFG wq[%d:%d: %#x]: %#llx\n", + group->id, i, grpcfg_offset, group->grpcfg.wqs[i]); + + if (i * 64 >= idxd->max_wqs) + break; + + /* Iterate through all 64 bits and check for wq set */ + for (j = 0; j < 64; j++) { + int id = i * 64 + j; + + /* No need to check beyond max wqs */ + if (id >= idxd->max_wqs) + break; + + /* Set group assignment for wq if wq bit is set */ + if (group->grpcfg.wqs[i] & BIT(j)) { + wq = idxd->wqs[id]; + wq->group = group; + } + } + } + + grpcfg_offset = GRPENGCFG_OFFSET(idxd, group->id); + group->grpcfg.engines = ioread64(idxd->reg_base + grpcfg_offset); + dev_dbg(dev, "GRPCFG engs[%d: %#x]: %#llx\n", group->id, + grpcfg_offset, group->grpcfg.engines); + + /* Iterate through all 64 bits to check engines set */ + for (i = 0; i < 64; i++) { + if (i >= idxd->max_engines) + break; + + if (group->grpcfg.engines & BIT(i)) { + struct idxd_engine *engine = idxd->engines[i]; + + engine->group = group; + } + } + + grpcfg_offset = GRPFLGCFG_OFFSET(idxd, group->id); + group->grpcfg.flags.bits = ioread32(idxd->reg_base + grpcfg_offset); + dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#x\n", + group->id, grpcfg_offset, group->grpcfg.flags.bits); +} + +int idxd_device_load_config(struct idxd_device *idxd) +{ + union gencfg_reg reg; + int i, rc; + + reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); + idxd->token_limit = reg.token_limit; + + for (i = 0; i < idxd->max_groups; i++) { + struct idxd_group *group = idxd->groups[i]; + + idxd_group_load_config(group); + } + + for (i = 0; i < idxd->max_wqs; i++) { + struct idxd_wq *wq = idxd->wqs[i]; + + rc = idxd_wq_load_config(wq); + if (rc < 0) + return rc; + } + + return 0; +} diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index a15e50126434..77439b645044 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -14,7 +14,10 @@ static inline struct idxd_wq *to_idxd_wq(struct dma_chan *c) { - return container_of(c, struct idxd_wq, dma_chan); + struct idxd_dma_chan *idxd_chan; + + idxd_chan = container_of(c, struct idxd_dma_chan, chan); + return idxd_chan->wq; } void idxd_dma_complete_txd(struct idxd_desc *desc, @@ -135,7 +138,7 @@ static void idxd_dma_issue_pending(struct dma_chan *dma_chan) { } -dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx) +static dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx) { struct dma_chan *c = tx->chan; struct idxd_wq *wq = to_idxd_wq(c); @@ -156,14 +159,25 @@ dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx) static void idxd_dma_release(struct dma_device *device) { + struct idxd_dma_dev *idxd_dma = container_of(device, struct idxd_dma_dev, dma); + + kfree(idxd_dma); } int idxd_register_dma_device(struct idxd_device *idxd) { - struct dma_device *dma = &idxd->dma_dev; + struct idxd_dma_dev *idxd_dma; + struct dma_device *dma; + struct device *dev = &idxd->pdev->dev; + int rc; + idxd_dma = kzalloc_node(sizeof(*idxd_dma), GFP_KERNEL, dev_to_node(dev)); + if (!idxd_dma) + return -ENOMEM; + + dma = &idxd_dma->dma; INIT_LIST_HEAD(&dma->channels); - dma->dev = &idxd->pdev->dev; + dma->dev = dev; dma_cap_set(DMA_PRIVATE, dma->cap_mask); dma_cap_set(DMA_COMPLETION_NO_ORDER, dma->cap_mask); @@ -179,35 +193,72 @@ int idxd_register_dma_device(struct idxd_device *idxd) dma->device_alloc_chan_resources = idxd_dma_alloc_chan_resources; dma->device_free_chan_resources = idxd_dma_free_chan_resources; - return dma_async_device_register(&idxd->dma_dev); + rc = dma_async_device_register(dma); + if (rc < 0) { + kfree(idxd_dma); + return rc; + } + + idxd_dma->idxd = idxd; + /* + * This pointer is protected by the refs taken by the dma_chan. It will remain valid + * as long as there are outstanding channels. + */ + idxd->idxd_dma = idxd_dma; + return 0; } void idxd_unregister_dma_device(struct idxd_device *idxd) { - dma_async_device_unregister(&idxd->dma_dev); + dma_async_device_unregister(&idxd->idxd_dma->dma); } int idxd_register_dma_channel(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; - struct dma_device *dma = &idxd->dma_dev; - struct dma_chan *chan = &wq->dma_chan; - int rc; + struct dma_device *dma = &idxd->idxd_dma->dma; + struct device *dev = &idxd->pdev->dev; + struct idxd_dma_chan *idxd_chan; + struct dma_chan *chan; + int rc, i; + + idxd_chan = kzalloc_node(sizeof(*idxd_chan), GFP_KERNEL, dev_to_node(dev)); + if (!idxd_chan) + return -ENOMEM; - memset(&wq->dma_chan, 0, sizeof(struct dma_chan)); + chan = &idxd_chan->chan; chan->device = dma; list_add_tail(&chan->device_node, &dma->channels); + + for (i = 0; i < wq->num_descs; i++) { + struct idxd_desc *desc = wq->descs[i]; + + dma_async_tx_descriptor_init(&desc->txd, chan); + desc->txd.tx_submit = idxd_dma_tx_submit; + } + rc = dma_async_device_channel_register(dma, chan); - if (rc < 0) + if (rc < 0) { + kfree(idxd_chan); return rc; + } + + wq->idxd_chan = idxd_chan; + idxd_chan->wq = wq; + get_device(&wq->conf_dev); return 0; } void idxd_unregister_dma_channel(struct idxd_wq *wq) { - struct dma_chan *chan = &wq->dma_chan; + struct idxd_dma_chan *idxd_chan = wq->idxd_chan; + struct dma_chan *chan = &idxd_chan->chan; + struct idxd_dma_dev *idxd_dma = wq->idxd->idxd_dma; - dma_async_device_channel_unregister(&wq->idxd->dma_dev, chan); + dma_async_device_channel_unregister(&idxd_dma->dma, chan); list_del(&chan->device_node); + kfree(wq->idxd_chan); + wq->idxd_chan = NULL; + put_device(&wq->conf_dev); } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 81a0e65fd316..26482c7d4c3a 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -8,12 +8,18 @@ #include <linux/percpu-rwsem.h> #include <linux/wait.h> #include <linux/cdev.h> +#include <linux/idr.h> +#include <linux/pci.h> +#include <linux/perf_event.h> #include "registers.h" #define IDXD_DRIVER_VERSION "1.00" extern struct kmem_cache *idxd_desc_pool; +struct idxd_device; +struct idxd_wq; + #define IDXD_REG_TIMEOUT 50 #define IDXD_DRAIN_TIMEOUT 5000 @@ -25,6 +31,7 @@ enum idxd_type { }; #define IDXD_NAME_SIZE 128 +#define IDXD_PMU_EVENT_MAX 64 struct idxd_device_driver { struct device_driver drv; @@ -33,6 +40,7 @@ struct idxd_device_driver { struct idxd_irq_entry { struct idxd_device *idxd; int id; + int vector; struct llist_head pending_llist; struct list_head work_list; /* @@ -56,6 +64,31 @@ struct idxd_group { int tc_b; }; +struct idxd_pmu { + struct idxd_device *idxd; + + struct perf_event *event_list[IDXD_PMU_EVENT_MAX]; + int n_events; + + DECLARE_BITMAP(used_mask, IDXD_PMU_EVENT_MAX); + + struct pmu pmu; + char name[IDXD_NAME_SIZE]; + int cpu; + + int n_counters; + int counter_width; + int n_event_categories; + + bool per_counter_caps_supported; + unsigned long supported_event_categories; + + unsigned long supported_filters; + int n_filters; + + struct hlist_node cpuhp_node; +}; + #define IDXD_MAX_PRIORITY 0xf enum idxd_wq_state { @@ -75,10 +108,10 @@ enum idxd_wq_type { }; struct idxd_cdev { + struct idxd_wq *wq; struct cdev cdev; - struct device *dev; + struct device dev; int minor; - struct wait_queue_head err_queue; }; #define IDXD_ALLOCATED_BATCH_SIZE 128U @@ -96,10 +129,18 @@ enum idxd_complete_type { IDXD_COMPLETE_DEV_FAIL, }; +struct idxd_dma_chan { + struct dma_chan chan; + struct idxd_wq *wq; +}; + struct idxd_wq { void __iomem *portal; + struct percpu_ref wq_active; + struct completion wq_dead; struct device conf_dev; - struct idxd_cdev idxd_cdev; + struct idxd_cdev *idxd_cdev; + struct wait_queue_head err_queue; struct idxd_device *idxd; int id; enum idxd_wq_type type; @@ -125,7 +166,7 @@ struct idxd_wq { int compls_size; struct idxd_desc **descs; struct sbitmap_queue sbq; - struct dma_chan dma_chan; + struct idxd_dma_chan *idxd_chan; char name[WQ_NAME_SIZE + 1]; u64 max_xfer_bytes; u32 max_batch_size; @@ -147,6 +188,7 @@ struct idxd_hw { union group_cap_reg group_cap; union engine_cap_reg engine_cap; struct opcap opcap; + u32 cmd_cap; }; enum idxd_device_state { @@ -162,9 +204,22 @@ enum idxd_device_flag { IDXD_FLAG_PASID_ENABLED, }; -struct idxd_device { +struct idxd_dma_dev { + struct idxd_device *idxd; + struct dma_device dma; +}; + +struct idxd_driver_data { + const char *name_prefix; enum idxd_type type; + struct device_type *dev_type; + int compl_size; + int align; +}; + +struct idxd_device { struct device conf_dev; + struct idxd_driver_data *data; struct list_head list; struct idxd_hw hw; enum idxd_device_state state; @@ -177,10 +232,11 @@ struct idxd_device { void __iomem *reg_base; spinlock_t dev_lock; /* spinlock for device */ + spinlock_t cmd_lock; /* spinlock for device commands */ struct completion *cmd_done; - struct idxd_group *groups; - struct idxd_wq *wqs; - struct idxd_engine *engines; + struct idxd_group **groups; + struct idxd_wq **wqs; + struct idxd_engine **engines; struct iommu_sva *sva; unsigned int pasid; @@ -202,17 +258,19 @@ struct idxd_device { int token_limit; int nr_tokens; /* non-reserved tokens */ unsigned int wqcfg_size; - int compl_size; union sw_err_reg sw_err; wait_queue_head_t cmd_waitq; - struct msix_entry *msix_entries; int num_wq_irqs; struct idxd_irq_entry *irq_entries; - struct dma_device dma_dev; + struct idxd_dma_dev *idxd_dma; struct workqueue_struct *wq; struct work_struct work; + + int *int_handles; + + struct idxd_pmu *idxd_pmu; }; /* IDXD software descriptor */ @@ -232,6 +290,7 @@ struct idxd_desc { struct list_head list; int id; int cpu; + unsigned int vector; struct idxd_wq *wq; }; @@ -242,6 +301,44 @@ extern struct bus_type dsa_bus_type; extern struct bus_type iax_bus_type; extern bool support_enqcmd; +extern struct ida idxd_ida; +extern struct device_type dsa_device_type; +extern struct device_type iax_device_type; +extern struct device_type idxd_wq_device_type; +extern struct device_type idxd_engine_device_type; +extern struct device_type idxd_group_device_type; + +static inline bool is_dsa_dev(struct device *dev) +{ + return dev->type == &dsa_device_type; +} + +static inline bool is_iax_dev(struct device *dev) +{ + return dev->type == &iax_device_type; +} + +static inline bool is_idxd_dev(struct device *dev) +{ + return is_dsa_dev(dev) || is_iax_dev(dev); +} + +static inline bool is_idxd_wq_dev(struct device *dev) +{ + return dev->type == &idxd_wq_device_type; +} + +static inline bool is_idxd_wq_dmaengine(struct idxd_wq *wq) +{ + if (wq->type == IDXD_WQT_KERNEL && strcmp(wq->name, "dmaengine") == 0) + return true; + return false; +} + +static inline bool is_idxd_wq_cdev(struct idxd_wq *wq) +{ + return wq->type == IDXD_WQT_USER; +} static inline bool wq_dedicated(struct idxd_wq *wq) { @@ -268,6 +365,11 @@ enum idxd_portal_prot { IDXD_PORTAL_LIMITED, }; +enum idxd_interrupt_type { + IDXD_IRQ_MSIX = 0, + IDXD_IRQ_IMS, +}; + static inline int idxd_get_wq_portal_offset(enum idxd_portal_prot prot) { return prot * 0x1000; @@ -279,18 +381,6 @@ static inline int idxd_get_wq_portal_full_offset(int wq_id, return ((wq_id * 4) << PAGE_SHIFT) + idxd_get_wq_portal_offset(prot); } -static inline void idxd_set_type(struct idxd_device *idxd) -{ - struct pci_dev *pdev = idxd->pdev; - - if (pdev->device == PCI_DEVICE_ID_INTEL_DSA_SPR0) - idxd->type = IDXD_TYPE_DSA; - else if (pdev->device == PCI_DEVICE_ID_INTEL_IAX_SPR0) - idxd->type = IDXD_TYPE_IAX; - else - idxd->type = IDXD_TYPE_UNKNOWN; -} - static inline void idxd_wq_get(struct idxd_wq *wq) { wq->client_count++; @@ -306,17 +396,17 @@ static inline int idxd_wq_refcount(struct idxd_wq *wq) return wq->client_count; }; -const char *idxd_get_dev_name(struct idxd_device *idxd); int idxd_register_bus_type(void); void idxd_unregister_bus_type(void); -int idxd_setup_sysfs(struct idxd_device *idxd); -void idxd_cleanup_sysfs(struct idxd_device *idxd); +int idxd_register_devices(struct idxd_device *idxd); +void idxd_unregister_devices(struct idxd_device *idxd); int idxd_register_driver(void); void idxd_unregister_driver(void); -struct bus_type *idxd_get_bus_type(struct idxd_device *idxd); +void idxd_wqs_quiesce(struct idxd_device *idxd); /* device interrupt control */ -irqreturn_t idxd_irq_handler(int vec, void *data); +void idxd_msix_perm_setup(struct idxd_device *idxd); +void idxd_msix_perm_clear(struct idxd_device *idxd); irqreturn_t idxd_misc_thread(int vec, void *data); irqreturn_t idxd_wq_thread(int irq, void *data); void idxd_mask_error_interrupts(struct idxd_device *idxd); @@ -334,18 +424,27 @@ void idxd_device_cleanup(struct idxd_device *idxd); int idxd_device_config(struct idxd_device *idxd); void idxd_device_wqs_clear_state(struct idxd_device *idxd); void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid); +int idxd_device_load_config(struct idxd_device *idxd); +int idxd_device_request_int_handle(struct idxd_device *idxd, int idx, int *handle, + enum idxd_interrupt_type irq_type); +int idxd_device_release_int_handle(struct idxd_device *idxd, int handle, + enum idxd_interrupt_type irq_type); /* work queue control */ +void idxd_wqs_unmap_portal(struct idxd_device *idxd); int idxd_wq_alloc_resources(struct idxd_wq *wq); void idxd_wq_free_resources(struct idxd_wq *wq); int idxd_wq_enable(struct idxd_wq *wq); int idxd_wq_disable(struct idxd_wq *wq); void idxd_wq_drain(struct idxd_wq *wq); +void idxd_wq_reset(struct idxd_wq *wq); int idxd_wq_map_portal(struct idxd_wq *wq); void idxd_wq_unmap_portal(struct idxd_wq *wq); void idxd_wq_disable_cleanup(struct idxd_wq *wq); int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid); int idxd_wq_disable_pasid(struct idxd_wq *wq); +void idxd_wq_quiesce(struct idxd_wq *wq); +int idxd_wq_init_percpu_ref(struct idxd_wq *wq); /* submission */ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc); @@ -360,7 +459,6 @@ void idxd_unregister_dma_channel(struct idxd_wq *wq); void idxd_parse_completion_status(u8 status, enum dmaengine_tx_result *res); void idxd_dma_complete_txd(struct idxd_desc *desc, enum idxd_complete_type comp_type); -dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx); /* cdev */ int idxd_cdev_register(void); @@ -369,4 +467,19 @@ int idxd_cdev_get_major(struct idxd_device *idxd); int idxd_wq_add_cdev(struct idxd_wq *wq); void idxd_wq_del_cdev(struct idxd_wq *wq); +/* perfmon */ +#if IS_ENABLED(CONFIG_INTEL_IDXD_PERFMON) +int perfmon_pmu_init(struct idxd_device *idxd); +void perfmon_pmu_remove(struct idxd_device *idxd); +void perfmon_counter_overflow(struct idxd_device *idxd); +void perfmon_init(void); +void perfmon_exit(void); +#else +static inline int perfmon_pmu_init(struct idxd_device *idxd) { return 0; } +static inline void perfmon_pmu_remove(struct idxd_device *idxd) {} +static inline void perfmon_counter_overflow(struct idxd_device *idxd) {} +static inline void perfmon_init(void) {} +static inline void perfmon_exit(void) {} +#endif + #endif diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 085a0c3b62c6..2a926bef87f2 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -21,6 +21,7 @@ #include "../dmaengine.h" #include "registers.h" #include "idxd.h" +#include "perfmon.h" MODULE_VERSION(IDXD_DRIVER_VERSION); MODULE_LICENSE("GPL v2"); @@ -33,60 +34,53 @@ MODULE_PARM_DESC(sva, "Toggle SVA support on/off"); #define DRV_NAME "idxd" bool support_enqcmd; - -static struct idr idxd_idrs[IDXD_TYPE_MAX]; -static DEFINE_MUTEX(idxd_idr_lock); +DEFINE_IDA(idxd_ida); + +static struct idxd_driver_data idxd_driver_data[] = { + [IDXD_TYPE_DSA] = { + .name_prefix = "dsa", + .type = IDXD_TYPE_DSA, + .compl_size = sizeof(struct dsa_completion_record), + .align = 32, + .dev_type = &dsa_device_type, + }, + [IDXD_TYPE_IAX] = { + .name_prefix = "iax", + .type = IDXD_TYPE_IAX, + .compl_size = sizeof(struct iax_completion_record), + .align = 64, + .dev_type = &iax_device_type, + }, +}; static struct pci_device_id idxd_pci_tbl[] = { /* DSA ver 1.0 platforms */ - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_DSA_SPR0) }, + { PCI_DEVICE_DATA(INTEL, DSA_SPR0, &idxd_driver_data[IDXD_TYPE_DSA]) }, /* IAX ver 1.0 platforms */ - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IAX_SPR0) }, + { PCI_DEVICE_DATA(INTEL, IAX_SPR0, &idxd_driver_data[IDXD_TYPE_IAX]) }, { 0, } }; MODULE_DEVICE_TABLE(pci, idxd_pci_tbl); -static char *idxd_name[] = { - "dsa", - "iax" -}; - -const char *idxd_get_dev_name(struct idxd_device *idxd) -{ - return idxd_name[idxd->type]; -} - static int idxd_setup_interrupts(struct idxd_device *idxd) { struct pci_dev *pdev = idxd->pdev; struct device *dev = &pdev->dev; - struct msix_entry *msix; struct idxd_irq_entry *irq_entry; int i, msixcnt; int rc = 0; - union msix_perm mperm; msixcnt = pci_msix_vec_count(pdev); if (msixcnt < 0) { dev_err(dev, "Not MSI-X interrupt capable.\n"); - goto err_no_irq; + return -ENOSPC; } - idxd->msix_entries = devm_kzalloc(dev, sizeof(struct msix_entry) * - msixcnt, GFP_KERNEL); - if (!idxd->msix_entries) { - rc = -ENOMEM; - goto err_no_irq; - } - - for (i = 0; i < msixcnt; i++) - idxd->msix_entries[i].entry = i; - - rc = pci_enable_msix_exact(pdev, idxd->msix_entries, msixcnt); - if (rc) { - dev_err(dev, "Failed enabling %d MSIX entries.\n", msixcnt); - goto err_no_irq; + rc = pci_alloc_irq_vectors(pdev, msixcnt, msixcnt, PCI_IRQ_MSIX); + if (rc != msixcnt) { + dev_err(dev, "Failed enabling %d MSIX entries: %d\n", msixcnt, rc); + return -ENOSPC; } dev_dbg(dev, "Enabled %d msix vectors\n", msixcnt); @@ -94,126 +88,266 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) * We implement 1 completion list per MSI-X entry except for * entry 0, which is for errors and others. */ - idxd->irq_entries = devm_kcalloc(dev, msixcnt, - sizeof(struct idxd_irq_entry), - GFP_KERNEL); + idxd->irq_entries = kcalloc_node(msixcnt, sizeof(struct idxd_irq_entry), + GFP_KERNEL, dev_to_node(dev)); if (!idxd->irq_entries) { rc = -ENOMEM; - goto err_no_irq; + goto err_irq_entries; } for (i = 0; i < msixcnt; i++) { idxd->irq_entries[i].id = i; idxd->irq_entries[i].idxd = idxd; + idxd->irq_entries[i].vector = pci_irq_vector(pdev, i); spin_lock_init(&idxd->irq_entries[i].list_lock); } - msix = &idxd->msix_entries[0]; irq_entry = &idxd->irq_entries[0]; - rc = devm_request_threaded_irq(dev, msix->vector, idxd_irq_handler, - idxd_misc_thread, 0, "idxd-misc", - irq_entry); + rc = request_threaded_irq(irq_entry->vector, NULL, idxd_misc_thread, + 0, "idxd-misc", irq_entry); if (rc < 0) { dev_err(dev, "Failed to allocate misc interrupt.\n"); - goto err_no_irq; + goto err_misc_irq; } - dev_dbg(dev, "Allocated idxd-misc handler on msix vector %d\n", - msix->vector); + dev_dbg(dev, "Allocated idxd-misc handler on msix vector %d\n", irq_entry->vector); /* first MSI-X entry is not for wq interrupts */ idxd->num_wq_irqs = msixcnt - 1; for (i = 1; i < msixcnt; i++) { - msix = &idxd->msix_entries[i]; irq_entry = &idxd->irq_entries[i]; init_llist_head(&idxd->irq_entries[i].pending_llist); INIT_LIST_HEAD(&idxd->irq_entries[i].work_list); - rc = devm_request_threaded_irq(dev, msix->vector, - idxd_irq_handler, - idxd_wq_thread, 0, - "idxd-portal", irq_entry); + rc = request_threaded_irq(irq_entry->vector, NULL, + idxd_wq_thread, 0, "idxd-portal", irq_entry); if (rc < 0) { - dev_err(dev, "Failed to allocate irq %d.\n", - msix->vector); - goto err_no_irq; + dev_err(dev, "Failed to allocate irq %d.\n", irq_entry->vector); + goto err_wq_irqs; + } + + dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n", i, irq_entry->vector); + if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) { + /* + * The MSIX vector enumeration starts at 1 with vector 0 being the + * misc interrupt that handles non I/O completion events. The + * interrupt handles are for IMS enumeration on guest. The misc + * interrupt vector does not require a handle and therefore we start + * the int_handles at index 0. Since 'i' starts at 1, the first + * int_handles index will be 0. + */ + rc = idxd_device_request_int_handle(idxd, i, &idxd->int_handles[i - 1], + IDXD_IRQ_MSIX); + if (rc < 0) { + free_irq(irq_entry->vector, irq_entry); + goto err_wq_irqs; + } + dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i - 1]); } - dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n", - i, msix->vector); } idxd_unmask_error_interrupts(idxd); - - /* Setup MSIX permission table */ - mperm.bits = 0; - mperm.pasid = idxd->pasid; - mperm.pasid_en = device_pasid_enabled(idxd); - for (i = 1; i < msixcnt; i++) - iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8); - + idxd_msix_perm_setup(idxd); return 0; - err_no_irq: + err_wq_irqs: + while (--i >= 0) { + irq_entry = &idxd->irq_entries[i]; + free_irq(irq_entry->vector, irq_entry); + if (i != 0) + idxd_device_release_int_handle(idxd, + idxd->int_handles[i], IDXD_IRQ_MSIX); + } + err_misc_irq: /* Disable error interrupt generation */ idxd_mask_error_interrupts(idxd); - pci_disable_msix(pdev); + err_irq_entries: + pci_free_irq_vectors(pdev); dev_err(dev, "No usable interrupts\n"); return rc; } -static int idxd_setup_internals(struct idxd_device *idxd) +static int idxd_setup_wqs(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; - int i; - - init_waitqueue_head(&idxd->cmd_waitq); - idxd->groups = devm_kcalloc(dev, idxd->max_groups, - sizeof(struct idxd_group), GFP_KERNEL); - if (!idxd->groups) - return -ENOMEM; + struct idxd_wq *wq; + int i, rc; - for (i = 0; i < idxd->max_groups; i++) { - idxd->groups[i].idxd = idxd; - idxd->groups[i].id = i; - idxd->groups[i].tc_a = -1; - idxd->groups[i].tc_b = -1; - } - - idxd->wqs = devm_kcalloc(dev, idxd->max_wqs, sizeof(struct idxd_wq), - GFP_KERNEL); + idxd->wqs = kcalloc_node(idxd->max_wqs, sizeof(struct idxd_wq *), + GFP_KERNEL, dev_to_node(dev)); if (!idxd->wqs) return -ENOMEM; - idxd->engines = devm_kcalloc(dev, idxd->max_engines, - sizeof(struct idxd_engine), GFP_KERNEL); - if (!idxd->engines) - return -ENOMEM; - for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + wq = kzalloc_node(sizeof(*wq), GFP_KERNEL, dev_to_node(dev)); + if (!wq) { + rc = -ENOMEM; + goto err; + } wq->id = i; wq->idxd = idxd; + device_initialize(&wq->conf_dev); + wq->conf_dev.parent = &idxd->conf_dev; + wq->conf_dev.bus = &dsa_bus_type; + wq->conf_dev.type = &idxd_wq_device_type; + rc = dev_set_name(&wq->conf_dev, "wq%d.%d", idxd->id, wq->id); + if (rc < 0) { + put_device(&wq->conf_dev); + goto err; + } + mutex_init(&wq->wq_lock); - wq->idxd_cdev.minor = -1; + init_waitqueue_head(&wq->err_queue); + init_completion(&wq->wq_dead); wq->max_xfer_bytes = idxd->max_xfer_bytes; wq->max_batch_size = idxd->max_batch_size; - wq->wqcfg = devm_kzalloc(dev, idxd->wqcfg_size, GFP_KERNEL); - if (!wq->wqcfg) - return -ENOMEM; + wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev)); + if (!wq->wqcfg) { + put_device(&wq->conf_dev); + rc = -ENOMEM; + goto err; + } + idxd->wqs[i] = wq; } + return 0; + + err: + while (--i >= 0) + put_device(&idxd->wqs[i]->conf_dev); + return rc; +} + +static int idxd_setup_engines(struct idxd_device *idxd) +{ + struct idxd_engine *engine; + struct device *dev = &idxd->pdev->dev; + int i, rc; + + idxd->engines = kcalloc_node(idxd->max_engines, sizeof(struct idxd_engine *), + GFP_KERNEL, dev_to_node(dev)); + if (!idxd->engines) + return -ENOMEM; + for (i = 0; i < idxd->max_engines; i++) { - idxd->engines[i].idxd = idxd; - idxd->engines[i].id = i; + engine = kzalloc_node(sizeof(*engine), GFP_KERNEL, dev_to_node(dev)); + if (!engine) { + rc = -ENOMEM; + goto err; + } + + engine->id = i; + engine->idxd = idxd; + device_initialize(&engine->conf_dev); + engine->conf_dev.parent = &idxd->conf_dev; + engine->conf_dev.type = &idxd_engine_device_type; + rc = dev_set_name(&engine->conf_dev, "engine%d.%d", idxd->id, engine->id); + if (rc < 0) { + put_device(&engine->conf_dev); + goto err; + } + + idxd->engines[i] = engine; } - idxd->wq = create_workqueue(dev_name(dev)); - if (!idxd->wq) + return 0; + + err: + while (--i >= 0) + put_device(&idxd->engines[i]->conf_dev); + return rc; +} + +static int idxd_setup_groups(struct idxd_device *idxd) +{ + struct device *dev = &idxd->pdev->dev; + struct idxd_group *group; + int i, rc; + + idxd->groups = kcalloc_node(idxd->max_groups, sizeof(struct idxd_group *), + GFP_KERNEL, dev_to_node(dev)); + if (!idxd->groups) return -ENOMEM; + for (i = 0; i < idxd->max_groups; i++) { + group = kzalloc_node(sizeof(*group), GFP_KERNEL, dev_to_node(dev)); + if (!group) { + rc = -ENOMEM; + goto err; + } + + group->id = i; + group->idxd = idxd; + device_initialize(&group->conf_dev); + group->conf_dev.parent = &idxd->conf_dev; + group->conf_dev.bus = &dsa_bus_type; + group->conf_dev.type = &idxd_group_device_type; + rc = dev_set_name(&group->conf_dev, "group%d.%d", idxd->id, group->id); + if (rc < 0) { + put_device(&group->conf_dev); + goto err; + } + + idxd->groups[i] = group; + group->tc_a = -1; + group->tc_b = -1; + } + + return 0; + + err: + while (--i >= 0) + put_device(&idxd->groups[i]->conf_dev); + return rc; +} + +static int idxd_setup_internals(struct idxd_device *idxd) +{ + struct device *dev = &idxd->pdev->dev; + int rc, i; + + init_waitqueue_head(&idxd->cmd_waitq); + + if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) { + idxd->int_handles = devm_kcalloc(dev, idxd->max_wqs, sizeof(int), GFP_KERNEL); + if (!idxd->int_handles) + return -ENOMEM; + } + + rc = idxd_setup_wqs(idxd); + if (rc < 0) + goto err_wqs; + + rc = idxd_setup_engines(idxd); + if (rc < 0) + goto err_engine; + + rc = idxd_setup_groups(idxd); + if (rc < 0) + goto err_group; + + idxd->wq = create_workqueue(dev_name(dev)); + if (!idxd->wq) { + rc = -ENOMEM; + goto err_wkq_create; + } + return 0; + + err_wkq_create: + for (i = 0; i < idxd->max_groups; i++) + put_device(&idxd->groups[i]->conf_dev); + err_group: + for (i = 0; i < idxd->max_engines; i++) + put_device(&idxd->engines[i]->conf_dev); + err_engine: + for (i = 0; i < idxd->max_wqs; i++) + put_device(&idxd->wqs[i]->conf_dev); + err_wqs: + kfree(idxd->int_handles); + return rc; } static void idxd_read_table_offsets(struct idxd_device *idxd) @@ -241,6 +375,12 @@ static void idxd_read_caps(struct idxd_device *idxd) /* reading generic capabilities */ idxd->hw.gen_cap.bits = ioread64(idxd->reg_base + IDXD_GENCAP_OFFSET); dev_dbg(dev, "gen_cap: %#llx\n", idxd->hw.gen_cap.bits); + + if (idxd->hw.gen_cap.cmd_cap) { + idxd->hw.cmd_cap = ioread32(idxd->reg_base + IDXD_CMDCAP_OFFSET); + dev_dbg(dev, "cmd_cap: %#x\n", idxd->hw.cmd_cap); + } + idxd->max_xfer_bytes = 1ULL << idxd->hw.gen_cap.max_xfer_shift; dev_dbg(dev, "max xfer size: %llu bytes\n", idxd->max_xfer_bytes); idxd->max_batch_size = 1U << idxd->hw.gen_cap.max_batch_shift; @@ -283,17 +423,34 @@ static void idxd_read_caps(struct idxd_device *idxd) } } -static struct idxd_device *idxd_alloc(struct pci_dev *pdev) +static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_data *data) { struct device *dev = &pdev->dev; struct idxd_device *idxd; + int rc; - idxd = devm_kzalloc(dev, sizeof(struct idxd_device), GFP_KERNEL); + idxd = kzalloc_node(sizeof(*idxd), GFP_KERNEL, dev_to_node(dev)); if (!idxd) return NULL; idxd->pdev = pdev; + idxd->data = data; + idxd->id = ida_alloc(&idxd_ida, GFP_KERNEL); + if (idxd->id < 0) + return NULL; + + device_initialize(&idxd->conf_dev); + idxd->conf_dev.parent = dev; + idxd->conf_dev.bus = &dsa_bus_type; + idxd->conf_dev.type = idxd->data->dev_type; + rc = dev_set_name(&idxd->conf_dev, "%s%d", idxd->data->name_prefix, idxd->id); + if (rc < 0) { + put_device(&idxd->conf_dev); + return NULL; + } + spin_lock_init(&idxd->dev_lock); + spin_lock_init(&idxd->cmd_lock); return idxd; } @@ -346,11 +503,18 @@ static int idxd_probe(struct idxd_device *idxd) dev_dbg(dev, "IDXD reset complete\n"); if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM) && sva) { - rc = idxd_enable_system_pasid(idxd); - if (rc < 0) - dev_warn(dev, "Failed to enable PASID. No SVA support: %d\n", rc); - else - set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags); + rc = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA); + if (rc == 0) { + rc = idxd_enable_system_pasid(idxd); + if (rc < 0) { + iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA); + dev_warn(dev, "Failed to enable PASID. No SVA support: %d\n", rc); + } else { + set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags); + } + } else { + dev_warn(dev, "Unable to turn on SVA feature.\n"); + } } else if (!sva) { dev_warn(dev, "User forced SVA off via module param.\n"); } @@ -360,80 +524,75 @@ static int idxd_probe(struct idxd_device *idxd) rc = idxd_setup_internals(idxd); if (rc) - goto err_setup; + goto err; + + /* If the configs are readonly, then load them from device */ + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) { + dev_dbg(dev, "Loading RO device config\n"); + rc = idxd_device_load_config(idxd); + if (rc < 0) + goto err; + } rc = idxd_setup_interrupts(idxd); if (rc) - goto err_setup; + goto err; dev_dbg(dev, "IDXD interrupt setup complete.\n"); - mutex_lock(&idxd_idr_lock); - idxd->id = idr_alloc(&idxd_idrs[idxd->type], idxd, 0, 0, GFP_KERNEL); - mutex_unlock(&idxd_idr_lock); - if (idxd->id < 0) { - rc = -ENOMEM; - goto err_idr_fail; - } - idxd->major = idxd_cdev_get_major(idxd); + rc = perfmon_pmu_init(idxd); + if (rc < 0) + dev_warn(dev, "Failed to initialize perfmon. No PMU support: %d\n", rc); + dev_dbg(dev, "IDXD device %d probed successfully\n", idxd->id); return 0; - err_idr_fail: - idxd_mask_error_interrupts(idxd); - idxd_mask_msix_vectors(idxd); - err_setup: + err: if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); + iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA); return rc; } -static void idxd_type_init(struct idxd_device *idxd) -{ - if (idxd->type == IDXD_TYPE_DSA) - idxd->compl_size = sizeof(struct dsa_completion_record); - else if (idxd->type == IDXD_TYPE_IAX) - idxd->compl_size = sizeof(struct iax_completion_record); -} - static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct device *dev = &pdev->dev; struct idxd_device *idxd; + struct idxd_driver_data *data = (struct idxd_driver_data *)id->driver_data; int rc; - rc = pcim_enable_device(pdev); + rc = pci_enable_device(pdev); if (rc) return rc; dev_dbg(dev, "Alloc IDXD context\n"); - idxd = idxd_alloc(pdev); - if (!idxd) - return -ENOMEM; + idxd = idxd_alloc(pdev, data); + if (!idxd) { + rc = -ENOMEM; + goto err_idxd_alloc; + } dev_dbg(dev, "Mapping BARs\n"); - idxd->reg_base = pcim_iomap(pdev, IDXD_MMIO_BAR, 0); - if (!idxd->reg_base) - return -ENOMEM; + idxd->reg_base = pci_iomap(pdev, IDXD_MMIO_BAR, 0); + if (!idxd->reg_base) { + rc = -ENOMEM; + goto err_iomap; + } dev_dbg(dev, "Set DMA masks\n"); rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); if (rc) rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (rc) - return rc; + goto err; rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); if (rc) rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); if (rc) - return rc; - - idxd_set_type(idxd); - - idxd_type_init(idxd); + goto err; dev_dbg(dev, "Set PCI master\n"); pci_set_master(pdev); @@ -443,13 +602,13 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) rc = idxd_probe(idxd); if (rc) { dev_err(dev, "Intel(R) IDXD DMA Engine init failed\n"); - return -ENODEV; + goto err; } - rc = idxd_setup_sysfs(idxd); + rc = idxd_register_devices(idxd); if (rc) { dev_err(dev, "IDXD sysfs setup failed\n"); - return -ENODEV; + goto err; } idxd->state = IDXD_DEV_CONF_READY; @@ -458,6 +617,14 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) idxd->hw.version); return 0; + + err: + pci_iounmap(pdev, idxd->reg_base); + err_iomap: + put_device(&idxd->conf_dev); + err_idxd_alloc: + pci_disable_device(pdev); + return rc; } static void idxd_flush_pending_llist(struct idxd_irq_entry *ie) @@ -486,6 +653,36 @@ static void idxd_flush_work_list(struct idxd_irq_entry *ie) } } +void idxd_wqs_quiesce(struct idxd_device *idxd) +{ + struct idxd_wq *wq; + int i; + + for (i = 0; i < idxd->max_wqs; i++) { + wq = idxd->wqs[i]; + if (wq->state == IDXD_WQ_ENABLED && wq->type == IDXD_WQT_KERNEL) + idxd_wq_quiesce(wq); + } +} + +static void idxd_release_int_handles(struct idxd_device *idxd) +{ + struct device *dev = &idxd->pdev->dev; + int i, rc; + + for (i = 0; i < idxd->num_wq_irqs; i++) { + if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)) { + rc = idxd_device_release_int_handle(idxd, idxd->int_handles[i], + IDXD_IRQ_MSIX); + if (rc < 0) + dev_warn(dev, "irq handle %d release failed\n", + idxd->int_handles[i]); + else + dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i]); + } + } +} + static void idxd_shutdown(struct pci_dev *pdev) { struct idxd_device *idxd = pci_get_drvdata(pdev); @@ -503,13 +700,19 @@ static void idxd_shutdown(struct pci_dev *pdev) for (i = 0; i < msixcnt; i++) { irq_entry = &idxd->irq_entries[i]; - synchronize_irq(idxd->msix_entries[i].vector); + synchronize_irq(irq_entry->vector); + free_irq(irq_entry->vector, irq_entry); if (i == 0) continue; idxd_flush_pending_llist(irq_entry); idxd_flush_work_list(irq_entry); } + idxd_msix_perm_clear(idxd); + idxd_release_int_handles(idxd); + pci_free_irq_vectors(pdev); + pci_iounmap(pdev, idxd->reg_base); + pci_disable_device(pdev); destroy_workqueue(idxd->wq); } @@ -518,13 +721,12 @@ static void idxd_remove(struct pci_dev *pdev) struct idxd_device *idxd = pci_get_drvdata(pdev); dev_dbg(&pdev->dev, "%s called\n", __func__); - idxd_cleanup_sysfs(idxd); idxd_shutdown(pdev); if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); - mutex_lock(&idxd_idr_lock); - idr_remove(&idxd_idrs[idxd->type], idxd->id); - mutex_unlock(&idxd_idr_lock); + idxd_unregister_devices(idxd); + perfmon_pmu_remove(idxd); + iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); } static struct pci_driver idxd_pci_driver = { @@ -537,7 +739,7 @@ static struct pci_driver idxd_pci_driver = { static int __init idxd_init_module(void) { - int err, i; + int err; /* * If the CPU does not support MOVDIR64B or ENQCMDS, there's no point in @@ -553,8 +755,7 @@ static int __init idxd_init_module(void) else support_enqcmd = true; - for (i = 0; i < IDXD_TYPE_MAX; i++) - idr_init(&idxd_idrs[i]); + perfmon_init(); err = idxd_register_bus_type(); if (err < 0) @@ -589,5 +790,6 @@ static void __exit idxd_exit_module(void) pci_unregister_driver(&idxd_pci_driver); idxd_cdev_remove(); idxd_unregister_bus_type(); + perfmon_exit(); } module_exit(idxd_exit_module); diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index a60ca11a5784..ae68e1e5487a 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -45,7 +45,7 @@ static void idxd_device_reinit(struct work_struct *work) goto out; for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + struct idxd_wq *wq = idxd->wqs[i]; if (wq->state == IDXD_WQ_ENABLED) { rc = idxd_wq_enable(wq); @@ -102,15 +102,6 @@ static int idxd_device_schedule_fault_process(struct idxd_device *idxd, return 0; } -irqreturn_t idxd_irq_handler(int vec, void *data) -{ - struct idxd_irq_entry *irq_entry = data; - struct idxd_device *idxd = irq_entry->idxd; - - idxd_mask_msix_vector(idxd, irq_entry->id); - return IRQ_WAKE_THREAD; -} - static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) { struct device *dev = &idxd->pdev->dev; @@ -124,22 +115,24 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) for (i = 0; i < 4; i++) idxd->sw_err.bits[i] = ioread64(idxd->reg_base + IDXD_SWERR_OFFSET + i * sizeof(u64)); - iowrite64(IDXD_SWERR_ACK, idxd->reg_base + IDXD_SWERR_OFFSET); + + iowrite64(idxd->sw_err.bits[0] & IDXD_SWERR_ACK, + idxd->reg_base + IDXD_SWERR_OFFSET); if (idxd->sw_err.valid && idxd->sw_err.wq_idx_valid) { int id = idxd->sw_err.wq_idx; - struct idxd_wq *wq = &idxd->wqs[id]; + struct idxd_wq *wq = idxd->wqs[id]; if (wq->type == IDXD_WQT_USER) - wake_up_interruptible(&wq->idxd_cdev.err_queue); + wake_up_interruptible(&wq->err_queue); } else { int i; for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + struct idxd_wq *wq = idxd->wqs[i]; if (wq->type == IDXD_WQT_USER) - wake_up_interruptible(&wq->idxd_cdev.err_queue); + wake_up_interruptible(&wq->err_queue); } } @@ -163,11 +156,8 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) } if (cause & IDXD_INTC_PERFMON_OVFL) { - /* - * Driver does not utilize perfmon counter overflow interrupt - * yet. - */ val |= IDXD_INTC_PERFMON_OVFL; + perfmon_counter_overflow(idxd); } val ^= cause; @@ -200,6 +190,8 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) queue_work(idxd->wq, &idxd->work); } else { spin_lock_bh(&idxd->dev_lock); + idxd_wqs_quiesce(idxd); + idxd_wqs_unmap_portal(idxd); idxd_device_wqs_clear_state(idxd); dev_err(&idxd->pdev->dev, "idxd halted, need %s.\n", @@ -233,7 +225,6 @@ irqreturn_t idxd_misc_thread(int vec, void *data) iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET); } - idxd_unmask_msix_vector(idxd, irq_entry->id); return IRQ_HANDLED; } @@ -390,8 +381,6 @@ irqreturn_t idxd_wq_thread(int irq, void *data) int processed; processed = idxd_desc_process(irq_entry); - idxd_unmask_msix_vector(irq_entry->idxd, irq_entry->id); - if (processed == 0) return IRQ_NONE; diff --git a/drivers/dma/idxd/perfmon.c b/drivers/dma/idxd/perfmon.c new file mode 100644 index 000000000000..d73004f47cf4 --- /dev/null +++ b/drivers/dma/idxd/perfmon.c @@ -0,0 +1,662 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2020 Intel Corporation. All rights rsvd. */ + +#include <linux/sched/task.h> +#include <linux/io-64-nonatomic-lo-hi.h> +#include "idxd.h" +#include "perfmon.h" + +static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr, + char *buf); + +static cpumask_t perfmon_dsa_cpu_mask; +static bool cpuhp_set_up; +static enum cpuhp_state cpuhp_slot; + +/* + * perf userspace reads this attribute to determine which cpus to open + * counters on. It's connected to perfmon_dsa_cpu_mask, which is + * maintained by the cpu hotplug handlers. + */ +static DEVICE_ATTR_RO(cpumask); + +static struct attribute *perfmon_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group cpumask_attr_group = { + .attrs = perfmon_cpumask_attrs, +}; + +/* + * These attributes specify the bits in the config word that the perf + * syscall uses to pass the event ids and categories to perfmon. + */ +DEFINE_PERFMON_FORMAT_ATTR(event_category, "config:0-3"); +DEFINE_PERFMON_FORMAT_ATTR(event, "config:4-31"); + +/* + * These attributes specify the bits in the config1 word that the perf + * syscall uses to pass filter data to perfmon. + */ +DEFINE_PERFMON_FORMAT_ATTR(filter_wq, "config1:0-31"); +DEFINE_PERFMON_FORMAT_ATTR(filter_tc, "config1:32-39"); +DEFINE_PERFMON_FORMAT_ATTR(filter_pgsz, "config1:40-43"); +DEFINE_PERFMON_FORMAT_ATTR(filter_sz, "config1:44-51"); +DEFINE_PERFMON_FORMAT_ATTR(filter_eng, "config1:52-59"); + +#define PERFMON_FILTERS_START 2 +#define PERFMON_FILTERS_MAX 5 + +static struct attribute *perfmon_format_attrs[] = { + &format_attr_idxd_event_category.attr, + &format_attr_idxd_event.attr, + &format_attr_idxd_filter_wq.attr, + &format_attr_idxd_filter_tc.attr, + &format_attr_idxd_filter_pgsz.attr, + &format_attr_idxd_filter_sz.attr, + &format_attr_idxd_filter_eng.attr, + NULL, +}; + +static struct attribute_group perfmon_format_attr_group = { + .name = "format", + .attrs = perfmon_format_attrs, +}; + +static const struct attribute_group *perfmon_attr_groups[] = { + &perfmon_format_attr_group, + &cpumask_attr_group, + NULL, +}; + +static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return cpumap_print_to_pagebuf(true, buf, &perfmon_dsa_cpu_mask); +} + +static bool is_idxd_event(struct idxd_pmu *idxd_pmu, struct perf_event *event) +{ + return &idxd_pmu->pmu == event->pmu; +} + +static int perfmon_collect_events(struct idxd_pmu *idxd_pmu, + struct perf_event *leader, + bool do_grp) +{ + struct perf_event *event; + int n, max_count; + + max_count = idxd_pmu->n_counters; + n = idxd_pmu->n_events; + + if (n >= max_count) + return -EINVAL; + + if (is_idxd_event(idxd_pmu, leader)) { + idxd_pmu->event_list[n] = leader; + idxd_pmu->event_list[n]->hw.idx = n; + n++; + } + + if (!do_grp) + return n; + + for_each_sibling_event(event, leader) { + if (!is_idxd_event(idxd_pmu, event) || + event->state <= PERF_EVENT_STATE_OFF) + continue; + + if (n >= max_count) + return -EINVAL; + + idxd_pmu->event_list[n] = event; + idxd_pmu->event_list[n]->hw.idx = n; + n++; + } + + return n; +} + +static void perfmon_assign_hw_event(struct idxd_pmu *idxd_pmu, + struct perf_event *event, int idx) +{ + struct idxd_device *idxd = idxd_pmu->idxd; + struct hw_perf_event *hwc = &event->hw; + + hwc->idx = idx; + hwc->config_base = ioread64(CNTRCFG_REG(idxd, idx)); + hwc->event_base = ioread64(CNTRCFG_REG(idxd, idx)); +} + +static int perfmon_assign_event(struct idxd_pmu *idxd_pmu, + struct perf_event *event) +{ + int i; + + for (i = 0; i < IDXD_PMU_EVENT_MAX; i++) + if (!test_and_set_bit(i, idxd_pmu->used_mask)) + return i; + + return -EINVAL; +} + +/* + * Check whether there are enough counters to satisfy that all the + * events in the group can actually be scheduled at the same time. + * + * To do this, create a fake idxd_pmu object so the event collection + * and assignment functions can be used without affecting the internal + * state of the real idxd_pmu object. + */ +static int perfmon_validate_group(struct idxd_pmu *pmu, + struct perf_event *event) +{ + struct perf_event *leader = event->group_leader; + struct idxd_pmu *fake_pmu; + int i, ret = 0, n, idx; + + fake_pmu = kzalloc(sizeof(*fake_pmu), GFP_KERNEL); + if (!fake_pmu) + return -ENOMEM; + + fake_pmu->pmu.name = pmu->pmu.name; + fake_pmu->n_counters = pmu->n_counters; + + n = perfmon_collect_events(fake_pmu, leader, true); + if (n < 0) { + ret = n; + goto out; + } + + fake_pmu->n_events = n; + n = perfmon_collect_events(fake_pmu, event, false); + if (n < 0) { + ret = n; + goto out; + } + + fake_pmu->n_events = n; + + for (i = 0; i < n; i++) { + event = fake_pmu->event_list[i]; + + idx = perfmon_assign_event(fake_pmu, event); + if (idx < 0) { + ret = idx; + goto out; + } + } +out: + kfree(fake_pmu); + + return ret; +} + +static int perfmon_pmu_event_init(struct perf_event *event) +{ + struct idxd_device *idxd; + int ret = 0; + + idxd = event_to_idxd(event); + event->hw.idx = -1; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* sampling not supported */ + if (event->attr.sample_period) + return -EINVAL; + + if (event->cpu < 0) + return -EINVAL; + + if (event->pmu != &idxd->idxd_pmu->pmu) + return -EINVAL; + + event->hw.event_base = ioread64(PERFMON_TABLE_OFFSET(idxd)); + event->cpu = idxd->idxd_pmu->cpu; + event->hw.config = event->attr.config; + + if (event->group_leader != event) + /* non-group events have themselves as leader */ + ret = perfmon_validate_group(idxd->idxd_pmu, event); + + return ret; +} + +static inline u64 perfmon_pmu_read_counter(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct idxd_device *idxd; + int cntr = hwc->idx; + + idxd = event_to_idxd(event); + + return ioread64(CNTRDATA_REG(idxd, cntr)); +} + +static void perfmon_pmu_event_update(struct perf_event *event) +{ + struct idxd_device *idxd = event_to_idxd(event); + u64 prev_raw_count, new_raw_count, delta, p, n; + int shift = 64 - idxd->idxd_pmu->counter_width; + struct hw_perf_event *hwc = &event->hw; + + do { + prev_raw_count = local64_read(&hwc->prev_count); + new_raw_count = perfmon_pmu_read_counter(event); + } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count); + + n = (new_raw_count << shift); + p = (prev_raw_count << shift); + + delta = ((n - p) >> shift); + + local64_add(delta, &event->count); +} + +void perfmon_counter_overflow(struct idxd_device *idxd) +{ + int i, n_counters, max_loop = OVERFLOW_SIZE; + struct perf_event *event; + unsigned long ovfstatus; + + n_counters = min(idxd->idxd_pmu->n_counters, OVERFLOW_SIZE); + + ovfstatus = ioread32(OVFSTATUS_REG(idxd)); + + /* + * While updating overflowed counters, other counters behind + * them could overflow and be missed in a given pass. + * Normally this could happen at most n_counters times, but in + * theory a tiny counter width could result in continual + * overflows and endless looping. max_loop provides a + * failsafe in that highly unlikely case. + */ + while (ovfstatus && max_loop--) { + /* Figure out which counter(s) overflowed */ + for_each_set_bit(i, &ovfstatus, n_counters) { + unsigned long ovfstatus_clear = 0; + + /* Update event->count for overflowed counter */ + event = idxd->idxd_pmu->event_list[i]; + perfmon_pmu_event_update(event); + /* Writing 1 to OVFSTATUS bit clears it */ + set_bit(i, &ovfstatus_clear); + iowrite32(ovfstatus_clear, OVFSTATUS_REG(idxd)); + } + + ovfstatus = ioread32(OVFSTATUS_REG(idxd)); + } + + /* + * Should never happen. If so, it means a counter(s) looped + * around twice while this handler was running. + */ + WARN_ON_ONCE(ovfstatus); +} + +static inline void perfmon_reset_config(struct idxd_device *idxd) +{ + iowrite32(CONFIG_RESET, PERFRST_REG(idxd)); + iowrite32(0, OVFSTATUS_REG(idxd)); + iowrite32(0, PERFFRZ_REG(idxd)); +} + +static inline void perfmon_reset_counters(struct idxd_device *idxd) +{ + iowrite32(CNTR_RESET, PERFRST_REG(idxd)); +} + +static inline void perfmon_reset(struct idxd_device *idxd) +{ + perfmon_reset_config(idxd); + perfmon_reset_counters(idxd); +} + +static void perfmon_pmu_event_start(struct perf_event *event, int mode) +{ + u32 flt_wq, flt_tc, flt_pg_sz, flt_xfer_sz, flt_eng = 0; + u64 cntr_cfg, cntrdata, event_enc, event_cat = 0; + struct hw_perf_event *hwc = &event->hw; + union filter_cfg flt_cfg; + union event_cfg event_cfg; + struct idxd_device *idxd; + int cntr; + + idxd = event_to_idxd(event); + + event->hw.idx = hwc->idx; + cntr = hwc->idx; + + /* Obtain event category and event value from user space */ + event_cfg.val = event->attr.config; + flt_cfg.val = event->attr.config1; + event_cat = event_cfg.event_cat; + event_enc = event_cfg.event_enc; + + /* Obtain filter configuration from user space */ + flt_wq = flt_cfg.wq; + flt_tc = flt_cfg.tc; + flt_pg_sz = flt_cfg.pg_sz; + flt_xfer_sz = flt_cfg.xfer_sz; + flt_eng = flt_cfg.eng; + + if (flt_wq && test_bit(FLT_WQ, &idxd->idxd_pmu->supported_filters)) + iowrite32(flt_wq, FLTCFG_REG(idxd, cntr, FLT_WQ)); + if (flt_tc && test_bit(FLT_TC, &idxd->idxd_pmu->supported_filters)) + iowrite32(flt_tc, FLTCFG_REG(idxd, cntr, FLT_TC)); + if (flt_pg_sz && test_bit(FLT_PG_SZ, &idxd->idxd_pmu->supported_filters)) + iowrite32(flt_pg_sz, FLTCFG_REG(idxd, cntr, FLT_PG_SZ)); + if (flt_xfer_sz && test_bit(FLT_XFER_SZ, &idxd->idxd_pmu->supported_filters)) + iowrite32(flt_xfer_sz, FLTCFG_REG(idxd, cntr, FLT_XFER_SZ)); + if (flt_eng && test_bit(FLT_ENG, &idxd->idxd_pmu->supported_filters)) + iowrite32(flt_eng, FLTCFG_REG(idxd, cntr, FLT_ENG)); + + /* Read the start value */ + cntrdata = ioread64(CNTRDATA_REG(idxd, cntr)); + local64_set(&event->hw.prev_count, cntrdata); + + /* Set counter to event/category */ + cntr_cfg = event_cat << CNTRCFG_CATEGORY_SHIFT; + cntr_cfg |= event_enc << CNTRCFG_EVENT_SHIFT; + /* Set interrupt on overflow and counter enable bits */ + cntr_cfg |= (CNTRCFG_IRQ_OVERFLOW | CNTRCFG_ENABLE); + + iowrite64(cntr_cfg, CNTRCFG_REG(idxd, cntr)); +} + +static void perfmon_pmu_event_stop(struct perf_event *event, int mode) +{ + struct hw_perf_event *hwc = &event->hw; + struct idxd_device *idxd; + int i, cntr = hwc->idx; + u64 cntr_cfg; + + idxd = event_to_idxd(event); + + /* remove this event from event list */ + for (i = 0; i < idxd->idxd_pmu->n_events; i++) { + if (event != idxd->idxd_pmu->event_list[i]) + continue; + + for (++i; i < idxd->idxd_pmu->n_events; i++) + idxd->idxd_pmu->event_list[i - 1] = idxd->idxd_pmu->event_list[i]; + --idxd->idxd_pmu->n_events; + break; + } + + cntr_cfg = ioread64(CNTRCFG_REG(idxd, cntr)); + cntr_cfg &= ~CNTRCFG_ENABLE; + iowrite64(cntr_cfg, CNTRCFG_REG(idxd, cntr)); + + if (mode == PERF_EF_UPDATE) + perfmon_pmu_event_update(event); + + event->hw.idx = -1; + clear_bit(cntr, idxd->idxd_pmu->used_mask); +} + +static void perfmon_pmu_event_del(struct perf_event *event, int mode) +{ + perfmon_pmu_event_stop(event, PERF_EF_UPDATE); +} + +static int perfmon_pmu_event_add(struct perf_event *event, int flags) +{ + struct idxd_device *idxd = event_to_idxd(event); + struct idxd_pmu *idxd_pmu = idxd->idxd_pmu; + struct hw_perf_event *hwc = &event->hw; + int idx, n; + + n = perfmon_collect_events(idxd_pmu, event, false); + if (n < 0) + return n; + + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (!(flags & PERF_EF_START)) + hwc->state |= PERF_HES_ARCH; + + idx = perfmon_assign_event(idxd_pmu, event); + if (idx < 0) + return idx; + + perfmon_assign_hw_event(idxd_pmu, event, idx); + + if (flags & PERF_EF_START) + perfmon_pmu_event_start(event, 0); + + idxd_pmu->n_events = n; + + return 0; +} + +static void enable_perfmon_pmu(struct idxd_device *idxd) +{ + iowrite32(COUNTER_UNFREEZE, PERFFRZ_REG(idxd)); +} + +static void disable_perfmon_pmu(struct idxd_device *idxd) +{ + iowrite32(COUNTER_FREEZE, PERFFRZ_REG(idxd)); +} + +static void perfmon_pmu_enable(struct pmu *pmu) +{ + struct idxd_device *idxd = pmu_to_idxd(pmu); + + enable_perfmon_pmu(idxd); +} + +static void perfmon_pmu_disable(struct pmu *pmu) +{ + struct idxd_device *idxd = pmu_to_idxd(pmu); + + disable_perfmon_pmu(idxd); +} + +static void skip_filter(int i) +{ + int j; + + for (j = i; j < PERFMON_FILTERS_MAX; j++) + perfmon_format_attrs[PERFMON_FILTERS_START + j] = + perfmon_format_attrs[PERFMON_FILTERS_START + j + 1]; +} + +static void idxd_pmu_init(struct idxd_pmu *idxd_pmu) +{ + int i; + + for (i = 0 ; i < PERFMON_FILTERS_MAX; i++) { + if (!test_bit(i, &idxd_pmu->supported_filters)) + skip_filter(i); + } + + idxd_pmu->pmu.name = idxd_pmu->name; + idxd_pmu->pmu.attr_groups = perfmon_attr_groups; + idxd_pmu->pmu.task_ctx_nr = perf_invalid_context; + idxd_pmu->pmu.event_init = perfmon_pmu_event_init; + idxd_pmu->pmu.pmu_enable = perfmon_pmu_enable, + idxd_pmu->pmu.pmu_disable = perfmon_pmu_disable, + idxd_pmu->pmu.add = perfmon_pmu_event_add; + idxd_pmu->pmu.del = perfmon_pmu_event_del; + idxd_pmu->pmu.start = perfmon_pmu_event_start; + idxd_pmu->pmu.stop = perfmon_pmu_event_stop; + idxd_pmu->pmu.read = perfmon_pmu_event_update; + idxd_pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; + idxd_pmu->pmu.module = THIS_MODULE; +} + +void perfmon_pmu_remove(struct idxd_device *idxd) +{ + if (!idxd->idxd_pmu) + return; + + cpuhp_state_remove_instance(cpuhp_slot, &idxd->idxd_pmu->cpuhp_node); + perf_pmu_unregister(&idxd->idxd_pmu->pmu); + kfree(idxd->idxd_pmu); + idxd->idxd_pmu = NULL; +} + +static int perf_event_cpu_online(unsigned int cpu, struct hlist_node *node) +{ + struct idxd_pmu *idxd_pmu; + + idxd_pmu = hlist_entry_safe(node, typeof(*idxd_pmu), cpuhp_node); + + /* select the first online CPU as the designated reader */ + if (cpumask_empty(&perfmon_dsa_cpu_mask)) { + cpumask_set_cpu(cpu, &perfmon_dsa_cpu_mask); + idxd_pmu->cpu = cpu; + } + + return 0; +} + +static int perf_event_cpu_offline(unsigned int cpu, struct hlist_node *node) +{ + struct idxd_pmu *idxd_pmu; + unsigned int target; + + idxd_pmu = hlist_entry_safe(node, typeof(*idxd_pmu), cpuhp_node); + + if (!cpumask_test_and_clear_cpu(cpu, &perfmon_dsa_cpu_mask)) + return 0; + + target = cpumask_any_but(cpu_online_mask, cpu); + + /* migrate events if there is a valid target */ + if (target < nr_cpu_ids) + cpumask_set_cpu(target, &perfmon_dsa_cpu_mask); + else + target = -1; + + perf_pmu_migrate_context(&idxd_pmu->pmu, cpu, target); + + return 0; +} + +int perfmon_pmu_init(struct idxd_device *idxd) +{ + union idxd_perfcap perfcap; + struct idxd_pmu *idxd_pmu; + int rc = -ENODEV; + + /* + * perfmon module initialization failed, nothing to do + */ + if (!cpuhp_set_up) + return -ENODEV; + + /* + * If perfmon_offset or num_counters is 0, it means perfmon is + * not supported on this hardware. + */ + if (idxd->perfmon_offset == 0) + return -ENODEV; + + idxd_pmu = kzalloc(sizeof(*idxd_pmu), GFP_KERNEL); + if (!idxd_pmu) + return -ENOMEM; + + idxd_pmu->idxd = idxd; + idxd->idxd_pmu = idxd_pmu; + + if (idxd->data->type == IDXD_TYPE_DSA) { + rc = sprintf(idxd_pmu->name, "dsa%d", idxd->id); + if (rc < 0) + goto free; + } else if (idxd->data->type == IDXD_TYPE_IAX) { + rc = sprintf(idxd_pmu->name, "iax%d", idxd->id); + if (rc < 0) + goto free; + } else { + goto free; + } + + perfmon_reset(idxd); + + perfcap.bits = ioread64(PERFCAP_REG(idxd)); + + /* + * If total perf counter is 0, stop further registration. + * This is necessary in order to support driver running on + * guest which does not have pmon support. + */ + if (perfcap.num_perf_counter == 0) + goto free; + + /* A counter width of 0 means it can't count */ + if (perfcap.counter_width == 0) + goto free; + + /* Overflow interrupt and counter freeze support must be available */ + if (!perfcap.overflow_interrupt || !perfcap.counter_freeze) + goto free; + + /* Number of event categories cannot be 0 */ + if (perfcap.num_event_category == 0) + goto free; + + /* + * We don't support per-counter capabilities for now. + */ + if (perfcap.cap_per_counter) + goto free; + + idxd_pmu->n_event_categories = perfcap.num_event_category; + idxd_pmu->supported_event_categories = perfcap.global_event_category; + idxd_pmu->per_counter_caps_supported = perfcap.cap_per_counter; + + /* check filter capability. If 0, then filters are not supported */ + idxd_pmu->supported_filters = perfcap.filter; + if (perfcap.filter) + idxd_pmu->n_filters = hweight8(perfcap.filter); + + /* Store the total number of counters categories, and counter width */ + idxd_pmu->n_counters = perfcap.num_perf_counter; + idxd_pmu->counter_width = perfcap.counter_width; + + idxd_pmu_init(idxd_pmu); + + rc = perf_pmu_register(&idxd_pmu->pmu, idxd_pmu->name, -1); + if (rc) + goto free; + + rc = cpuhp_state_add_instance(cpuhp_slot, &idxd_pmu->cpuhp_node); + if (rc) { + perf_pmu_unregister(&idxd->idxd_pmu->pmu); + goto free; + } +out: + return rc; +free: + kfree(idxd_pmu); + idxd->idxd_pmu = NULL; + + goto out; +} + +void __init perfmon_init(void) +{ + int rc = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "driver/dma/idxd/perf:online", + perf_event_cpu_online, + perf_event_cpu_offline); + if (WARN_ON(rc < 0)) + return; + + cpuhp_slot = rc; + cpuhp_set_up = true; +} + +void __exit perfmon_exit(void) +{ + if (cpuhp_set_up) + cpuhp_remove_multi_state(cpuhp_slot); +} diff --git a/drivers/dma/idxd/perfmon.h b/drivers/dma/idxd/perfmon.h new file mode 100644 index 000000000000..9a081a1bc605 --- /dev/null +++ b/drivers/dma/idxd/perfmon.h @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2020 Intel Corporation. All rights rsvd. */ + +#ifndef _PERFMON_H_ +#define _PERFMON_H_ + +#include <linux/slab.h> +#include <linux/pci.h> +#include <linux/sbitmap.h> +#include <linux/dmaengine.h> +#include <linux/percpu-rwsem.h> +#include <linux/wait.h> +#include <linux/cdev.h> +#include <linux/uuid.h> +#include <linux/idxd.h> +#include <linux/perf_event.h> +#include "registers.h" + +static inline struct idxd_pmu *event_to_pmu(struct perf_event *event) +{ + struct idxd_pmu *idxd_pmu; + struct pmu *pmu; + + pmu = event->pmu; + idxd_pmu = container_of(pmu, struct idxd_pmu, pmu); + + return idxd_pmu; +} + +static inline struct idxd_device *event_to_idxd(struct perf_event *event) +{ + struct idxd_pmu *idxd_pmu; + struct pmu *pmu; + + pmu = event->pmu; + idxd_pmu = container_of(pmu, struct idxd_pmu, pmu); + + return idxd_pmu->idxd; +} + +static inline struct idxd_device *pmu_to_idxd(struct pmu *pmu) +{ + struct idxd_pmu *idxd_pmu; + + idxd_pmu = container_of(pmu, struct idxd_pmu, pmu); + + return idxd_pmu->idxd; +} + +enum dsa_perf_events { + DSA_PERF_EVENT_WQ = 0, + DSA_PERF_EVENT_ENGINE, + DSA_PERF_EVENT_ADDR_TRANS, + DSA_PERF_EVENT_OP, + DSA_PERF_EVENT_COMPL, + DSA_PERF_EVENT_MAX, +}; + +enum filter_enc { + FLT_WQ = 0, + FLT_TC, + FLT_PG_SZ, + FLT_XFER_SZ, + FLT_ENG, + FLT_MAX, +}; + +#define CONFIG_RESET 0x0000000000000001 +#define CNTR_RESET 0x0000000000000002 +#define CNTR_ENABLE 0x0000000000000001 +#define INTR_OVFL 0x0000000000000002 + +#define COUNTER_FREEZE 0x00000000FFFFFFFF +#define COUNTER_UNFREEZE 0x0000000000000000 +#define OVERFLOW_SIZE 32 + +#define CNTRCFG_ENABLE BIT(0) +#define CNTRCFG_IRQ_OVERFLOW BIT(1) +#define CNTRCFG_CATEGORY_SHIFT 8 +#define CNTRCFG_EVENT_SHIFT 32 + +#define PERFMON_TABLE_OFFSET(_idxd) \ +({ \ + typeof(_idxd) __idxd = (_idxd); \ + ((__idxd)->reg_base + (__idxd)->perfmon_offset); \ +}) +#define PERFMON_REG_OFFSET(idxd, offset) \ + (PERFMON_TABLE_OFFSET(idxd) + (offset)) + +#define PERFCAP_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_PERFCAP_OFFSET)) +#define PERFRST_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_PERFRST_OFFSET)) +#define OVFSTATUS_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_OVFSTATUS_OFFSET)) +#define PERFFRZ_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_PERFFRZ_OFFSET)) + +#define FLTCFG_REG(idxd, cntr, flt) \ + (PERFMON_REG_OFFSET(idxd, IDXD_FLTCFG_OFFSET) + ((cntr) * 32) + ((flt) * 4)) + +#define CNTRCFG_REG(idxd, cntr) \ + (PERFMON_REG_OFFSET(idxd, IDXD_CNTRCFG_OFFSET) + ((cntr) * 8)) +#define CNTRDATA_REG(idxd, cntr) \ + (PERFMON_REG_OFFSET(idxd, IDXD_CNTRDATA_OFFSET) + ((cntr) * 8)) +#define CNTRCAP_REG(idxd, cntr) \ + (PERFMON_REG_OFFSET(idxd, IDXD_CNTRCAP_OFFSET) + ((cntr) * 8)) + +#define EVNTCAP_REG(idxd, category) \ + (PERFMON_REG_OFFSET(idxd, IDXD_EVNTCAP_OFFSET) + ((category) * 8)) + +#define DEFINE_PERFMON_FORMAT_ATTR(_name, _format) \ +static ssize_t __perfmon_idxd_##_name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + char *page) \ +{ \ + BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ + return sprintf(page, _format "\n"); \ +} \ +static struct kobj_attribute format_attr_idxd_##_name = \ + __ATTR(_name, 0444, __perfmon_idxd_##_name##_show, NULL) + +#endif diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 751ecb4f9f81..c970c3f025f0 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -24,8 +24,8 @@ union gen_cap_reg { u64 overlap_copy:1; u64 cache_control_mem:1; u64 cache_control_cache:1; + u64 cmd_cap:1; u64 rsvd:3; - u64 int_handle_req:1; u64 dest_readback:1; u64 drain_readback:1; u64 rsvd2:6; @@ -120,7 +120,8 @@ union gencfg_reg { union genctrl_reg { struct { u32 softerr_int_en:1; - u32 rsvd:31; + u32 halt_int_en:1; + u32 rsvd:30; }; u32 bits; } __packed; @@ -180,8 +181,11 @@ enum idxd_cmd { IDXD_CMD_DRAIN_PASID, IDXD_CMD_ABORT_PASID, IDXD_CMD_REQUEST_INT_HANDLE, + IDXD_CMD_RELEASE_INT_HANDLE, }; +#define CMD_INT_HANDLE_IMS 0x10000 + #define IDXD_CMDSTS_OFFSET 0xa8 union cmdsts_reg { struct { @@ -193,6 +197,8 @@ union cmdsts_reg { u32 bits; } __packed; #define IDXD_CMDSTS_ACTIVE 0x80000000 +#define IDXD_CMDSTS_ERR_MASK 0xff +#define IDXD_CMDSTS_RES_SHIFT 8 enum idxd_cmdsts_err { IDXD_CMDSTS_SUCCESS = 0, @@ -228,6 +234,8 @@ enum idxd_cmdsts_err { IDXD_CMDSTS_ERR_NO_HANDLE, }; +#define IDXD_CMDCAP_OFFSET 0xb0 + #define IDXD_SWERR_OFFSET 0xc0 #define IDXD_SWERR_VALID 0x00000001 #define IDXD_SWERR_OVERFLOW 0x00000002 @@ -378,4 +386,112 @@ union wqcfg { #define GRPENGCFG_OFFSET(idxd_dev, n) ((idxd_dev)->grpcfg_offset + (n) * GRPCFG_SIZE + 32) #define GRPFLGCFG_OFFSET(idxd_dev, n) ((idxd_dev)->grpcfg_offset + (n) * GRPCFG_SIZE + 40) +/* Following is performance monitor registers */ +#define IDXD_PERFCAP_OFFSET 0x0 +union idxd_perfcap { + struct { + u64 num_perf_counter:6; + u64 rsvd1:2; + u64 counter_width:8; + u64 num_event_category:4; + u64 global_event_category:16; + u64 filter:8; + u64 rsvd2:8; + u64 cap_per_counter:1; + u64 writeable_counter:1; + u64 counter_freeze:1; + u64 overflow_interrupt:1; + u64 rsvd3:8; + }; + u64 bits; +} __packed; + +#define IDXD_EVNTCAP_OFFSET 0x80 +union idxd_evntcap { + struct { + u64 events:28; + u64 rsvd:36; + }; + u64 bits; +} __packed; + +struct idxd_event { + union { + struct { + u32 event_category:4; + u32 events:28; + }; + u32 val; + }; +} __packed; + +#define IDXD_CNTRCAP_OFFSET 0x800 +struct idxd_cntrcap { + union { + struct { + u32 counter_width:8; + u32 rsvd:20; + u32 num_events:4; + }; + u32 val; + }; + struct idxd_event events[]; +} __packed; + +#define IDXD_PERFRST_OFFSET 0x10 +union idxd_perfrst { + struct { + u32 perfrst_config:1; + u32 perfrst_counter:1; + u32 rsvd:30; + }; + u32 val; +} __packed; + +#define IDXD_OVFSTATUS_OFFSET 0x30 +#define IDXD_PERFFRZ_OFFSET 0x20 +#define IDXD_CNTRCFG_OFFSET 0x100 +union idxd_cntrcfg { + struct { + u64 enable:1; + u64 interrupt_ovf:1; + u64 global_freeze_ovf:1; + u64 rsvd1:5; + u64 event_category:4; + u64 rsvd2:20; + u64 events:28; + u64 rsvd3:4; + }; + u64 val; +} __packed; + +#define IDXD_FLTCFG_OFFSET 0x300 + +#define IDXD_CNTRDATA_OFFSET 0x200 +union idxd_cntrdata { + struct { + u64 event_count_value; + }; + u64 val; +} __packed; + +union event_cfg { + struct { + u64 event_cat:4; + u64 event_enc:28; + }; + u64 val; +} __packed; + +union filter_cfg { + struct { + u64 wq:32; + u64 tc:8; + u64 pg_sz:4; + u64 xfer_sz:8; + u64 eng:8; + }; + u64 val; +} __packed; + #endif diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index a7a61bcc17d5..19afb62abaff 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -15,18 +15,30 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu) desc = wq->descs[idx]; memset(desc->hw, 0, sizeof(struct dsa_hw_desc)); - memset(desc->completion, 0, idxd->compl_size); + memset(desc->completion, 0, idxd->data->compl_size); desc->cpu = cpu; if (device_pasid_enabled(idxd)) desc->hw->pasid = idxd->pasid; /* - * Descriptor completion vectors are 1-8 for MSIX. We will round - * robin through the 8 vectors. + * Descriptor completion vectors are 1...N for MSIX. We will round + * robin through the N vectors. */ wq->vec_ptr = (wq->vec_ptr % idxd->num_wq_irqs) + 1; - desc->hw->int_handle = wq->vec_ptr; + if (!idxd->int_handles) { + desc->hw->int_handle = wq->vec_ptr; + } else { + desc->vector = wq->vec_ptr; + /* + * int_handles are only for descriptor completion. However for device + * MSIX enumeration, vec 0 is used for misc interrupts. Therefore even + * though we are rotating through 1...N for descriptor interrupts, we + * need to acqurie the int_handles from 0..N-1. + */ + desc->hw->int_handle = idxd->int_handles[desc->vector - 1]; + } + return desc; } @@ -79,13 +91,15 @@ void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc) int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) { struct idxd_device *idxd = wq->idxd; - int vec = desc->hw->int_handle; void __iomem *portal; int rc; if (idxd->state != IDXD_DEV_ENABLED) return -EIO; + if (!percpu_ref_tryget_live(&wq->wq_active)) + return -ENXIO; + portal = wq->portal; /* @@ -108,13 +122,25 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) return rc; } + percpu_ref_put(&wq->wq_active); + /* * Pending the descriptor to the lockless list for the irq_entry * that we designated the descriptor to. */ - if (desc->hw->flags & IDXD_OP_FLAG_RCI) - llist_add(&desc->llnode, - &idxd->irq_entries[vec].pending_llist); + if (desc->hw->flags & IDXD_OP_FLAG_RCI) { + int vec; + + /* + * If the driver is on host kernel, it would be the value + * assigned to interrupt handle, which is index for MSIX + * vector. If it's guest then can't use the int_handle since + * that is the index to IMS for the entire device. The guest + * device local index will be used. + */ + vec = !idxd->int_handles ? desc->hw->int_handle : desc->vector; + llist_add(&desc->llnode, &idxd->irq_entries[vec].pending_llist); + } return 0; } diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 4dbb03c545e4..0460d58e3941 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -16,69 +16,6 @@ static char *idxd_wq_type_names[] = { [IDXD_WQT_USER] = "user", }; -static void idxd_conf_device_release(struct device *dev) -{ - dev_dbg(dev, "%s for %s\n", __func__, dev_name(dev)); -} - -static struct device_type idxd_group_device_type = { - .name = "group", - .release = idxd_conf_device_release, -}; - -static struct device_type idxd_wq_device_type = { - .name = "wq", - .release = idxd_conf_device_release, -}; - -static struct device_type idxd_engine_device_type = { - .name = "engine", - .release = idxd_conf_device_release, -}; - -static struct device_type dsa_device_type = { - .name = "dsa", - .release = idxd_conf_device_release, -}; - -static struct device_type iax_device_type = { - .name = "iax", - .release = idxd_conf_device_release, -}; - -static inline bool is_dsa_dev(struct device *dev) -{ - return dev ? dev->type == &dsa_device_type : false; -} - -static inline bool is_iax_dev(struct device *dev) -{ - return dev ? dev->type == &iax_device_type : false; -} - -static inline bool is_idxd_dev(struct device *dev) -{ - return is_dsa_dev(dev) || is_iax_dev(dev); -} - -static inline bool is_idxd_wq_dev(struct device *dev) -{ - return dev ? dev->type == &idxd_wq_device_type : false; -} - -static inline bool is_idxd_wq_dmaengine(struct idxd_wq *wq) -{ - if (wq->type == IDXD_WQT_KERNEL && - strcmp(wq->name, "dmaengine") == 0) - return true; - return false; -} - -static inline bool is_idxd_wq_cdev(struct idxd_wq *wq) -{ - return wq->type == IDXD_WQT_USER; -} - static int idxd_config_bus_match(struct device *dev, struct device_driver *drv) { @@ -110,9 +47,131 @@ static int idxd_config_bus_match(struct device *dev, return matched; } -static int idxd_config_bus_probe(struct device *dev) +static int enable_wq(struct idxd_wq *wq) { + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + unsigned long flags; int rc; + + mutex_lock(&wq->wq_lock); + + if (idxd->state != IDXD_DEV_ENABLED) { + mutex_unlock(&wq->wq_lock); + dev_warn(dev, "Enabling while device not enabled.\n"); + return -EPERM; + } + + if (wq->state != IDXD_WQ_DISABLED) { + mutex_unlock(&wq->wq_lock); + dev_warn(dev, "WQ %d already enabled.\n", wq->id); + return -EBUSY; + } + + if (!wq->group) { + mutex_unlock(&wq->wq_lock); + dev_warn(dev, "WQ not attached to group.\n"); + return -EINVAL; + } + + if (strlen(wq->name) == 0) { + mutex_unlock(&wq->wq_lock); + dev_warn(dev, "WQ name not set.\n"); + return -EINVAL; + } + + /* Shared WQ checks */ + if (wq_shared(wq)) { + if (!device_swq_supported(idxd)) { + dev_warn(dev, "PASID not enabled and shared WQ.\n"); + mutex_unlock(&wq->wq_lock); + return -ENXIO; + } + /* + * Shared wq with the threshold set to 0 means the user + * did not set the threshold or transitioned from a + * dedicated wq but did not set threshold. A value + * of 0 would effectively disable the shared wq. The + * driver does not allow a value of 0 to be set for + * threshold via sysfs. + */ + if (wq->threshold == 0) { + dev_warn(dev, "Shared WQ and threshold 0.\n"); + mutex_unlock(&wq->wq_lock); + return -EINVAL; + } + } + + rc = idxd_wq_alloc_resources(wq); + if (rc < 0) { + mutex_unlock(&wq->wq_lock); + dev_warn(dev, "WQ resource alloc failed\n"); + return rc; + } + + spin_lock_irqsave(&idxd->dev_lock, flags); + if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + rc = idxd_device_config(idxd); + spin_unlock_irqrestore(&idxd->dev_lock, flags); + if (rc < 0) { + mutex_unlock(&wq->wq_lock); + dev_warn(dev, "Writing WQ %d config failed: %d\n", wq->id, rc); + return rc; + } + + rc = idxd_wq_enable(wq); + if (rc < 0) { + mutex_unlock(&wq->wq_lock); + dev_warn(dev, "WQ %d enabling failed: %d\n", wq->id, rc); + return rc; + } + + rc = idxd_wq_map_portal(wq); + if (rc < 0) { + dev_warn(dev, "wq portal mapping failed: %d\n", rc); + rc = idxd_wq_disable(wq); + if (rc < 0) + dev_warn(dev, "IDXD wq disable failed\n"); + mutex_unlock(&wq->wq_lock); + return rc; + } + + wq->client_count = 0; + + if (wq->type == IDXD_WQT_KERNEL) { + rc = idxd_wq_init_percpu_ref(wq); + if (rc < 0) { + dev_dbg(dev, "percpu_ref setup failed\n"); + mutex_unlock(&wq->wq_lock); + return rc; + } + } + + if (is_idxd_wq_dmaengine(wq)) { + rc = idxd_register_dma_channel(wq); + if (rc < 0) { + dev_dbg(dev, "DMA channel register failed\n"); + mutex_unlock(&wq->wq_lock); + return rc; + } + } else if (is_idxd_wq_cdev(wq)) { + rc = idxd_wq_add_cdev(wq); + if (rc < 0) { + dev_dbg(dev, "Cdev creation failed\n"); + mutex_unlock(&wq->wq_lock); + return rc; + } + } + + mutex_unlock(&wq->wq_lock); + dev_info(dev, "wq %s enabled\n", dev_name(&wq->conf_dev)); + + return 0; +} + +static int idxd_config_bus_probe(struct device *dev) +{ + int rc = 0; unsigned long flags; dev_dbg(dev, "%s called\n", __func__); @@ -130,7 +189,8 @@ static int idxd_config_bus_probe(struct device *dev) /* Perform IDXD configuration and enabling */ spin_lock_irqsave(&idxd->dev_lock, flags); - rc = idxd_device_config(idxd); + if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + rc = idxd_device_config(idxd); spin_unlock_irqrestore(&idxd->dev_lock, flags); if (rc < 0) { module_put(THIS_MODULE); @@ -157,115 +217,8 @@ static int idxd_config_bus_probe(struct device *dev) return 0; } else if (is_idxd_wq_dev(dev)) { struct idxd_wq *wq = confdev_to_wq(dev); - struct idxd_device *idxd = wq->idxd; - - mutex_lock(&wq->wq_lock); - - if (idxd->state != IDXD_DEV_ENABLED) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "Enabling while device not enabled.\n"); - return -EPERM; - } - - if (wq->state != IDXD_WQ_DISABLED) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "WQ %d already enabled.\n", wq->id); - return -EBUSY; - } - - if (!wq->group) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "WQ not attached to group.\n"); - return -EINVAL; - } - - if (strlen(wq->name) == 0) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "WQ name not set.\n"); - return -EINVAL; - } - - /* Shared WQ checks */ - if (wq_shared(wq)) { - if (!device_swq_supported(idxd)) { - dev_warn(dev, - "PASID not enabled and shared WQ.\n"); - mutex_unlock(&wq->wq_lock); - return -ENXIO; - } - /* - * Shared wq with the threshold set to 0 means the user - * did not set the threshold or transitioned from a - * dedicated wq but did not set threshold. A value - * of 0 would effectively disable the shared wq. The - * driver does not allow a value of 0 to be set for - * threshold via sysfs. - */ - if (wq->threshold == 0) { - dev_warn(dev, - "Shared WQ and threshold 0.\n"); - mutex_unlock(&wq->wq_lock); - return -EINVAL; - } - } - - rc = idxd_wq_alloc_resources(wq); - if (rc < 0) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "WQ resource alloc failed\n"); - return rc; - } - - spin_lock_irqsave(&idxd->dev_lock, flags); - rc = idxd_device_config(idxd); - spin_unlock_irqrestore(&idxd->dev_lock, flags); - if (rc < 0) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "Writing WQ %d config failed: %d\n", - wq->id, rc); - return rc; - } - - rc = idxd_wq_enable(wq); - if (rc < 0) { - mutex_unlock(&wq->wq_lock); - dev_warn(dev, "WQ %d enabling failed: %d\n", - wq->id, rc); - return rc; - } - - rc = idxd_wq_map_portal(wq); - if (rc < 0) { - dev_warn(dev, "wq portal mapping failed: %d\n", rc); - rc = idxd_wq_disable(wq); - if (rc < 0) - dev_warn(dev, "IDXD wq disable failed\n"); - mutex_unlock(&wq->wq_lock); - return rc; - } - - wq->client_count = 0; - dev_info(dev, "wq %s enabled\n", dev_name(&wq->conf_dev)); - - if (is_idxd_wq_dmaengine(wq)) { - rc = idxd_register_dma_channel(wq); - if (rc < 0) { - dev_dbg(dev, "DMA channel register failed\n"); - mutex_unlock(&wq->wq_lock); - return rc; - } - } else if (is_idxd_wq_cdev(wq)) { - rc = idxd_wq_add_cdev(wq); - if (rc < 0) { - dev_dbg(dev, "Cdev creation failed\n"); - mutex_unlock(&wq->wq_lock); - return rc; - } - } - - mutex_unlock(&wq->wq_lock); - return 0; + return enable_wq(wq); } return -ENODEV; @@ -275,7 +228,6 @@ static void disable_wq(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; - int rc; mutex_lock(&wq->wq_lock); dev_dbg(dev, "%s removing WQ %s\n", __func__, dev_name(&wq->conf_dev)); @@ -284,6 +236,9 @@ static void disable_wq(struct idxd_wq *wq) return; } + if (wq->type == IDXD_WQT_KERNEL) + idxd_wq_quiesce(wq); + if (is_idxd_wq_dmaengine(wq)) idxd_unregister_dma_channel(wq); else if (is_idxd_wq_cdev(wq)) @@ -296,17 +251,13 @@ static void disable_wq(struct idxd_wq *wq) idxd_wq_unmap_portal(wq); idxd_wq_drain(wq); - rc = idxd_wq_disable(wq); + idxd_wq_reset(wq); idxd_wq_free_resources(wq); wq->client_count = 0; mutex_unlock(&wq->wq_lock); - if (rc < 0) - dev_warn(dev, "Failed to disable %s: %d\n", - dev_name(&wq->conf_dev), rc); - else - dev_info(dev, "wq %s disabled\n", dev_name(&wq->conf_dev)); + dev_info(dev, "wq %s disabled\n", dev_name(&wq->conf_dev)); } static int idxd_config_bus_remove(struct device *dev) @@ -327,7 +278,7 @@ static int idxd_config_bus_remove(struct device *dev) dev_dbg(dev, "%s removing dev %s\n", __func__, dev_name(&idxd->conf_dev)); for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + struct idxd_wq *wq = idxd->wqs[i]; if (wq->state == IDXD_WQ_DISABLED) continue; @@ -338,12 +289,14 @@ static int idxd_config_bus_remove(struct device *dev) idxd_unregister_dma_device(idxd); rc = idxd_device_disable(idxd); - for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) { + for (i = 0; i < idxd->max_wqs; i++) { + struct idxd_wq *wq = idxd->wqs[i]; - mutex_lock(&wq->wq_lock); - idxd_wq_disable_cleanup(wq); - mutex_unlock(&wq->wq_lock); + mutex_lock(&wq->wq_lock); + idxd_wq_disable_cleanup(wq); + mutex_unlock(&wq->wq_lock); + } } module_put(THIS_MODULE); if (rc < 0) @@ -369,19 +322,6 @@ struct bus_type dsa_bus_type = { .shutdown = idxd_config_bus_shutdown, }; -struct bus_type iax_bus_type = { - .name = "iax", - .match = idxd_config_bus_match, - .probe = idxd_config_bus_probe, - .remove = idxd_config_bus_remove, - .shutdown = idxd_config_bus_shutdown, -}; - -static struct bus_type *idxd_bus_types[] = { - &dsa_bus_type, - &iax_bus_type -}; - static struct idxd_device_driver dsa_drv = { .drv = { .name = "dsa", @@ -391,60 +331,15 @@ static struct idxd_device_driver dsa_drv = { }, }; -static struct idxd_device_driver iax_drv = { - .drv = { - .name = "iax", - .bus = &iax_bus_type, - .owner = THIS_MODULE, - .mod_name = KBUILD_MODNAME, - }, -}; - -static struct idxd_device_driver *idxd_drvs[] = { - &dsa_drv, - &iax_drv -}; - -struct bus_type *idxd_get_bus_type(struct idxd_device *idxd) -{ - return idxd_bus_types[idxd->type]; -} - -static struct device_type *idxd_get_device_type(struct idxd_device *idxd) -{ - if (idxd->type == IDXD_TYPE_DSA) - return &dsa_device_type; - else if (idxd->type == IDXD_TYPE_IAX) - return &iax_device_type; - else - return NULL; -} - /* IDXD generic driver setup */ int idxd_register_driver(void) { - int i, rc; - - for (i = 0; i < IDXD_TYPE_MAX; i++) { - rc = driver_register(&idxd_drvs[i]->drv); - if (rc < 0) - goto drv_fail; - } - - return 0; - -drv_fail: - while (--i >= 0) - driver_unregister(&idxd_drvs[i]->drv); - return rc; + return driver_register(&dsa_drv.drv); } void idxd_unregister_driver(void) { - int i; - - for (i = 0; i < IDXD_TYPE_MAX; i++) - driver_unregister(&idxd_drvs[i]->drv); + driver_unregister(&dsa_drv.drv); } /* IDXD engine attributes */ @@ -455,9 +350,9 @@ static ssize_t engine_group_id_show(struct device *dev, container_of(dev, struct idxd_engine, conf_dev); if (engine->group) - return sprintf(buf, "%d\n", engine->group->id); + return sysfs_emit(buf, "%d\n", engine->group->id); else - return sprintf(buf, "%d\n", -1); + return sysfs_emit(buf, "%d\n", -1); } static ssize_t engine_group_id_store(struct device *dev, @@ -493,7 +388,7 @@ static ssize_t engine_group_id_store(struct device *dev, if (prevg) prevg->num_engines--; - engine->group = &idxd->groups[id]; + engine->group = idxd->groups[id]; engine->group->num_engines++; return count; @@ -517,6 +412,19 @@ static const struct attribute_group *idxd_engine_attribute_groups[] = { NULL, }; +static void idxd_conf_engine_release(struct device *dev) +{ + struct idxd_engine *engine = container_of(dev, struct idxd_engine, conf_dev); + + kfree(engine); +} + +struct device_type idxd_engine_device_type = { + .name = "engine", + .release = idxd_conf_engine_release, + .groups = idxd_engine_attribute_groups, +}; + /* Group attributes */ static void idxd_set_free_tokens(struct idxd_device *idxd) @@ -524,7 +432,7 @@ static void idxd_set_free_tokens(struct idxd_device *idxd) int i, tokens; for (i = 0, tokens = 0; i < idxd->max_groups; i++) { - struct idxd_group *g = &idxd->groups[i]; + struct idxd_group *g = idxd->groups[i]; tokens += g->tokens_reserved; } @@ -539,7 +447,7 @@ static ssize_t group_tokens_reserved_show(struct device *dev, struct idxd_group *group = container_of(dev, struct idxd_group, conf_dev); - return sprintf(buf, "%u\n", group->tokens_reserved); + return sysfs_emit(buf, "%u\n", group->tokens_reserved); } static ssize_t group_tokens_reserved_store(struct device *dev, @@ -556,7 +464,7 @@ static ssize_t group_tokens_reserved_store(struct device *dev, if (rc < 0) return -EINVAL; - if (idxd->type == IDXD_TYPE_IAX) + if (idxd->data->type == IDXD_TYPE_IAX) return -EOPNOTSUPP; if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) @@ -587,7 +495,7 @@ static ssize_t group_tokens_allowed_show(struct device *dev, struct idxd_group *group = container_of(dev, struct idxd_group, conf_dev); - return sprintf(buf, "%u\n", group->tokens_allowed); + return sysfs_emit(buf, "%u\n", group->tokens_allowed); } static ssize_t group_tokens_allowed_store(struct device *dev, @@ -604,7 +512,7 @@ static ssize_t group_tokens_allowed_store(struct device *dev, if (rc < 0) return -EINVAL; - if (idxd->type == IDXD_TYPE_IAX) + if (idxd->data->type == IDXD_TYPE_IAX) return -EOPNOTSUPP; if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) @@ -632,7 +540,7 @@ static ssize_t group_use_token_limit_show(struct device *dev, struct idxd_group *group = container_of(dev, struct idxd_group, conf_dev); - return sprintf(buf, "%u\n", group->use_token_limit); + return sysfs_emit(buf, "%u\n", group->use_token_limit); } static ssize_t group_use_token_limit_store(struct device *dev, @@ -649,7 +557,7 @@ static ssize_t group_use_token_limit_store(struct device *dev, if (rc < 0) return -EINVAL; - if (idxd->type == IDXD_TYPE_IAX) + if (idxd->data->type == IDXD_TYPE_IAX) return -EOPNOTSUPP; if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) @@ -675,22 +583,22 @@ static ssize_t group_engines_show(struct device *dev, struct idxd_group *group = container_of(dev, struct idxd_group, conf_dev); int i, rc = 0; - char *tmp = buf; struct idxd_device *idxd = group->idxd; for (i = 0; i < idxd->max_engines; i++) { - struct idxd_engine *engine = &idxd->engines[i]; + struct idxd_engine *engine = idxd->engines[i]; if (!engine->group) continue; if (engine->group->id == group->id) - rc += sprintf(tmp + rc, "engine%d.%d ", - idxd->id, engine->id); + rc += sysfs_emit_at(buf, rc, "engine%d.%d ", idxd->id, engine->id); } + if (!rc) + return 0; rc--; - rc += sprintf(tmp + rc, "\n"); + rc += sysfs_emit_at(buf, rc, "\n"); return rc; } @@ -704,22 +612,22 @@ static ssize_t group_work_queues_show(struct device *dev, struct idxd_group *group = container_of(dev, struct idxd_group, conf_dev); int i, rc = 0; - char *tmp = buf; struct idxd_device *idxd = group->idxd; for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + struct idxd_wq *wq = idxd->wqs[i]; if (!wq->group) continue; if (wq->group->id == group->id) - rc += sprintf(tmp + rc, "wq%d.%d ", - idxd->id, wq->id); + rc += sysfs_emit_at(buf, rc, "wq%d.%d ", idxd->id, wq->id); } + if (!rc) + return 0; rc--; - rc += sprintf(tmp + rc, "\n"); + rc += sysfs_emit_at(buf, rc, "\n"); return rc; } @@ -734,7 +642,7 @@ static ssize_t group_traffic_class_a_show(struct device *dev, struct idxd_group *group = container_of(dev, struct idxd_group, conf_dev); - return sprintf(buf, "%d\n", group->tc_a); + return sysfs_emit(buf, "%d\n", group->tc_a); } static ssize_t group_traffic_class_a_store(struct device *dev, @@ -775,7 +683,7 @@ static ssize_t group_traffic_class_b_show(struct device *dev, struct idxd_group *group = container_of(dev, struct idxd_group, conf_dev); - return sprintf(buf, "%d\n", group->tc_b); + return sysfs_emit(buf, "%d\n", group->tc_b); } static ssize_t group_traffic_class_b_store(struct device *dev, @@ -829,13 +737,26 @@ static const struct attribute_group *idxd_group_attribute_groups[] = { NULL, }; +static void idxd_conf_group_release(struct device *dev) +{ + struct idxd_group *group = container_of(dev, struct idxd_group, conf_dev); + + kfree(group); +} + +struct device_type idxd_group_device_type = { + .name = "group", + .release = idxd_conf_group_release, + .groups = idxd_group_attribute_groups, +}; + /* IDXD work queue attribs */ static ssize_t wq_clients_show(struct device *dev, struct device_attribute *attr, char *buf) { struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); - return sprintf(buf, "%d\n", wq->client_count); + return sysfs_emit(buf, "%d\n", wq->client_count); } static struct device_attribute dev_attr_wq_clients = @@ -848,12 +769,12 @@ static ssize_t wq_state_show(struct device *dev, switch (wq->state) { case IDXD_WQ_DISABLED: - return sprintf(buf, "disabled\n"); + return sysfs_emit(buf, "disabled\n"); case IDXD_WQ_ENABLED: - return sprintf(buf, "enabled\n"); + return sysfs_emit(buf, "enabled\n"); } - return sprintf(buf, "unknown\n"); + return sysfs_emit(buf, "unknown\n"); } static struct device_attribute dev_attr_wq_state = @@ -865,9 +786,9 @@ static ssize_t wq_group_id_show(struct device *dev, struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); if (wq->group) - return sprintf(buf, "%u\n", wq->group->id); + return sysfs_emit(buf, "%u\n", wq->group->id); else - return sprintf(buf, "-1\n"); + return sysfs_emit(buf, "-1\n"); } static ssize_t wq_group_id_store(struct device *dev, @@ -901,7 +822,7 @@ static ssize_t wq_group_id_store(struct device *dev, return count; } - group = &idxd->groups[id]; + group = idxd->groups[id]; prevg = wq->group; if (prevg) @@ -919,8 +840,7 @@ static ssize_t wq_mode_show(struct device *dev, struct device_attribute *attr, { struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); - return sprintf(buf, "%s\n", - wq_dedicated(wq) ? "dedicated" : "shared"); + return sysfs_emit(buf, "%s\n", wq_dedicated(wq) ? "dedicated" : "shared"); } static ssize_t wq_mode_store(struct device *dev, @@ -956,7 +876,7 @@ static ssize_t wq_size_show(struct device *dev, struct device_attribute *attr, { struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); - return sprintf(buf, "%u\n", wq->size); + return sysfs_emit(buf, "%u\n", wq->size); } static int total_claimed_wq_size(struct idxd_device *idxd) @@ -965,7 +885,7 @@ static int total_claimed_wq_size(struct idxd_device *idxd) int wq_size = 0; for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + struct idxd_wq *wq = idxd->wqs[i]; wq_size += wq->size; } @@ -989,7 +909,7 @@ static ssize_t wq_size_store(struct device *dev, if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) return -EPERM; - if (wq->state != IDXD_WQ_DISABLED) + if (idxd->state == IDXD_DEV_ENABLED) return -EPERM; if (size + total_claimed_wq_size(idxd) - wq->size > idxd->max_wq_size) @@ -1007,7 +927,7 @@ static ssize_t wq_priority_show(struct device *dev, { struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); - return sprintf(buf, "%u\n", wq->priority); + return sysfs_emit(buf, "%u\n", wq->priority); } static ssize_t wq_priority_store(struct device *dev, @@ -1044,8 +964,7 @@ static ssize_t wq_block_on_fault_show(struct device *dev, { struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); - return sprintf(buf, "%u\n", - test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags)); + return sysfs_emit(buf, "%u\n", test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags)); } static ssize_t wq_block_on_fault_store(struct device *dev, @@ -1084,7 +1003,7 @@ static ssize_t wq_threshold_show(struct device *dev, { struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); - return sprintf(buf, "%u\n", wq->threshold); + return sysfs_emit(buf, "%u\n", wq->threshold); } static ssize_t wq_threshold_store(struct device *dev, @@ -1127,15 +1046,12 @@ static ssize_t wq_type_show(struct device *dev, switch (wq->type) { case IDXD_WQT_KERNEL: - return sprintf(buf, "%s\n", - idxd_wq_type_names[IDXD_WQT_KERNEL]); + return sysfs_emit(buf, "%s\n", idxd_wq_type_names[IDXD_WQT_KERNEL]); case IDXD_WQT_USER: - return sprintf(buf, "%s\n", - idxd_wq_type_names[IDXD_WQT_USER]); + return sysfs_emit(buf, "%s\n", idxd_wq_type_names[IDXD_WQT_USER]); case IDXD_WQT_NONE: default: - return sprintf(buf, "%s\n", - idxd_wq_type_names[IDXD_WQT_NONE]); + return sysfs_emit(buf, "%s\n", idxd_wq_type_names[IDXD_WQT_NONE]); } return -EINVAL; @@ -1176,7 +1092,7 @@ static ssize_t wq_name_show(struct device *dev, { struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); - return sprintf(buf, "%s\n", wq->name); + return sysfs_emit(buf, "%s\n", wq->name); } static ssize_t wq_name_store(struct device *dev, @@ -1211,8 +1127,16 @@ static ssize_t wq_cdev_minor_show(struct device *dev, struct device_attribute *attr, char *buf) { struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + int minor = -1; + + mutex_lock(&wq->wq_lock); + if (wq->idxd_cdev) + minor = wq->idxd_cdev->minor; + mutex_unlock(&wq->wq_lock); - return sprintf(buf, "%d\n", wq->idxd_cdev.minor); + if (minor == -1) + return -ENXIO; + return sysfs_emit(buf, "%d\n", minor); } static struct device_attribute dev_attr_wq_cdev_minor = @@ -1238,7 +1162,7 @@ static ssize_t wq_max_transfer_size_show(struct device *dev, struct device_attri { struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); - return sprintf(buf, "%llu\n", wq->max_xfer_bytes); + return sysfs_emit(buf, "%llu\n", wq->max_xfer_bytes); } static ssize_t wq_max_transfer_size_store(struct device *dev, struct device_attribute *attr, @@ -1272,7 +1196,7 @@ static ssize_t wq_max_batch_size_show(struct device *dev, struct device_attribut { struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); - return sprintf(buf, "%u\n", wq->max_batch_size); + return sysfs_emit(buf, "%u\n", wq->max_batch_size); } static ssize_t wq_max_batch_size_store(struct device *dev, struct device_attribute *attr, @@ -1305,7 +1229,7 @@ static ssize_t wq_ats_disable_show(struct device *dev, struct device_attribute * { struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); - return sprintf(buf, "%u\n", wq->ats_dis); + return sysfs_emit(buf, "%u\n", wq->ats_dis); } static ssize_t wq_ats_disable_store(struct device *dev, struct device_attribute *attr, @@ -1361,6 +1285,20 @@ static const struct attribute_group *idxd_wq_attribute_groups[] = { NULL, }; +static void idxd_conf_wq_release(struct device *dev) +{ + struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev); + + kfree(wq->wqcfg); + kfree(wq); +} + +struct device_type idxd_wq_device_type = { + .name = "wq", + .release = idxd_conf_wq_release, + .groups = idxd_wq_attribute_groups, +}; + /* IDXD device attribs */ static ssize_t version_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -1368,7 +1306,7 @@ static ssize_t version_show(struct device *dev, struct device_attribute *attr, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%#x\n", idxd->hw.version); + return sysfs_emit(buf, "%#x\n", idxd->hw.version); } static DEVICE_ATTR_RO(version); @@ -1379,7 +1317,7 @@ static ssize_t max_work_queues_size_show(struct device *dev, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%u\n", idxd->max_wq_size); + return sysfs_emit(buf, "%u\n", idxd->max_wq_size); } static DEVICE_ATTR_RO(max_work_queues_size); @@ -1389,7 +1327,7 @@ static ssize_t max_groups_show(struct device *dev, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%u\n", idxd->max_groups); + return sysfs_emit(buf, "%u\n", idxd->max_groups); } static DEVICE_ATTR_RO(max_groups); @@ -1399,7 +1337,7 @@ static ssize_t max_work_queues_show(struct device *dev, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%u\n", idxd->max_wqs); + return sysfs_emit(buf, "%u\n", idxd->max_wqs); } static DEVICE_ATTR_RO(max_work_queues); @@ -1409,7 +1347,7 @@ static ssize_t max_engines_show(struct device *dev, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%u\n", idxd->max_engines); + return sysfs_emit(buf, "%u\n", idxd->max_engines); } static DEVICE_ATTR_RO(max_engines); @@ -1419,7 +1357,7 @@ static ssize_t numa_node_show(struct device *dev, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%d\n", dev_to_node(&idxd->pdev->dev)); + return sysfs_emit(buf, "%d\n", dev_to_node(&idxd->pdev->dev)); } static DEVICE_ATTR_RO(numa_node); @@ -1429,7 +1367,7 @@ static ssize_t max_batch_size_show(struct device *dev, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%u\n", idxd->max_batch_size); + return sysfs_emit(buf, "%u\n", idxd->max_batch_size); } static DEVICE_ATTR_RO(max_batch_size); @@ -1440,7 +1378,7 @@ static ssize_t max_transfer_size_show(struct device *dev, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%llu\n", idxd->max_xfer_bytes); + return sysfs_emit(buf, "%llu\n", idxd->max_xfer_bytes); } static DEVICE_ATTR_RO(max_transfer_size); @@ -1449,8 +1387,14 @@ static ssize_t op_cap_show(struct device *dev, { struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); + int i, rc = 0; + + for (i = 0; i < 4; i++) + rc += sysfs_emit_at(buf, rc, "%#llx ", idxd->hw.opcap.bits[i]); - return sprintf(buf, "%#llx\n", idxd->hw.opcap.bits[0]); + rc--; + rc += sysfs_emit_at(buf, rc, "\n"); + return rc; } static DEVICE_ATTR_RO(op_cap); @@ -1460,7 +1404,7 @@ static ssize_t gen_cap_show(struct device *dev, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%#llx\n", idxd->hw.gen_cap.bits); + return sysfs_emit(buf, "%#llx\n", idxd->hw.gen_cap.bits); } static DEVICE_ATTR_RO(gen_cap); @@ -1470,8 +1414,7 @@ static ssize_t configurable_show(struct device *dev, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%u\n", - test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)); + return sysfs_emit(buf, "%u\n", test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)); } static DEVICE_ATTR_RO(configurable); @@ -1485,13 +1428,13 @@ static ssize_t clients_show(struct device *dev, spin_lock_irqsave(&idxd->dev_lock, flags); for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + struct idxd_wq *wq = idxd->wqs[i]; count += wq->client_count; } spin_unlock_irqrestore(&idxd->dev_lock, flags); - return sprintf(buf, "%d\n", count); + return sysfs_emit(buf, "%d\n", count); } static DEVICE_ATTR_RO(clients); @@ -1501,7 +1444,7 @@ static ssize_t pasid_enabled_show(struct device *dev, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%u\n", device_pasid_enabled(idxd)); + return sysfs_emit(buf, "%u\n", device_pasid_enabled(idxd)); } static DEVICE_ATTR_RO(pasid_enabled); @@ -1514,14 +1457,14 @@ static ssize_t state_show(struct device *dev, switch (idxd->state) { case IDXD_DEV_DISABLED: case IDXD_DEV_CONF_READY: - return sprintf(buf, "disabled\n"); + return sysfs_emit(buf, "disabled\n"); case IDXD_DEV_ENABLED: - return sprintf(buf, "enabled\n"); + return sysfs_emit(buf, "enabled\n"); case IDXD_DEV_HALTED: - return sprintf(buf, "halted\n"); + return sysfs_emit(buf, "halted\n"); } - return sprintf(buf, "unknown\n"); + return sysfs_emit(buf, "unknown\n"); } static DEVICE_ATTR_RO(state); @@ -1535,10 +1478,10 @@ static ssize_t errors_show(struct device *dev, spin_lock_irqsave(&idxd->dev_lock, flags); for (i = 0; i < 4; i++) - out += sprintf(buf + out, "%#018llx ", idxd->sw_err.bits[i]); + out += sysfs_emit_at(buf, out, "%#018llx ", idxd->sw_err.bits[i]); spin_unlock_irqrestore(&idxd->dev_lock, flags); out--; - out += sprintf(buf + out, "\n"); + out += sysfs_emit_at(buf, out, "\n"); return out; } static DEVICE_ATTR_RO(errors); @@ -1549,7 +1492,7 @@ static ssize_t max_tokens_show(struct device *dev, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%u\n", idxd->max_tokens); + return sysfs_emit(buf, "%u\n", idxd->max_tokens); } static DEVICE_ATTR_RO(max_tokens); @@ -1559,7 +1502,7 @@ static ssize_t token_limit_show(struct device *dev, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%u\n", idxd->token_limit); + return sysfs_emit(buf, "%u\n", idxd->token_limit); } static ssize_t token_limit_store(struct device *dev, @@ -1598,7 +1541,7 @@ static ssize_t cdev_major_show(struct device *dev, struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%u\n", idxd->major); + return sysfs_emit(buf, "%u\n", idxd->major); } static DEVICE_ATTR_RO(cdev_major); @@ -1607,7 +1550,7 @@ static ssize_t cmd_status_show(struct device *dev, { struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); - return sprintf(buf, "%#x\n", idxd->cmd_status); + return sysfs_emit(buf, "%#x\n", idxd->cmd_status); } static DEVICE_ATTR_RO(cmd_status); @@ -1643,183 +1586,161 @@ static const struct attribute_group *idxd_attribute_groups[] = { NULL, }; -static int idxd_setup_engine_sysfs(struct idxd_device *idxd) +static void idxd_conf_device_release(struct device *dev) { - struct device *dev = &idxd->pdev->dev; - int i, rc; + struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); + + kfree(idxd->groups); + kfree(idxd->wqs); + kfree(idxd->engines); + kfree(idxd->irq_entries); + kfree(idxd->int_handles); + ida_free(&idxd_ida, idxd->id); + kfree(idxd); +} + +struct device_type dsa_device_type = { + .name = "dsa", + .release = idxd_conf_device_release, + .groups = idxd_attribute_groups, +}; + +struct device_type iax_device_type = { + .name = "iax", + .release = idxd_conf_device_release, + .groups = idxd_attribute_groups, +}; + +static int idxd_register_engine_devices(struct idxd_device *idxd) +{ + int i, j, rc; for (i = 0; i < idxd->max_engines; i++) { - struct idxd_engine *engine = &idxd->engines[i]; - - engine->conf_dev.parent = &idxd->conf_dev; - dev_set_name(&engine->conf_dev, "engine%d.%d", - idxd->id, engine->id); - engine->conf_dev.bus = idxd_get_bus_type(idxd); - engine->conf_dev.groups = idxd_engine_attribute_groups; - engine->conf_dev.type = &idxd_engine_device_type; - dev_dbg(dev, "Engine device register: %s\n", - dev_name(&engine->conf_dev)); - rc = device_register(&engine->conf_dev); - if (rc < 0) { - put_device(&engine->conf_dev); + struct idxd_engine *engine = idxd->engines[i]; + + rc = device_add(&engine->conf_dev); + if (rc < 0) goto cleanup; - } } return 0; cleanup: - while (i--) { - struct idxd_engine *engine = &idxd->engines[i]; + j = i - 1; + for (; i < idxd->max_engines; i++) + put_device(&idxd->engines[i]->conf_dev); - device_unregister(&engine->conf_dev); - } + while (j--) + device_unregister(&idxd->engines[j]->conf_dev); return rc; } -static int idxd_setup_group_sysfs(struct idxd_device *idxd) +static int idxd_register_group_devices(struct idxd_device *idxd) { - struct device *dev = &idxd->pdev->dev; - int i, rc; + int i, j, rc; for (i = 0; i < idxd->max_groups; i++) { - struct idxd_group *group = &idxd->groups[i]; - - group->conf_dev.parent = &idxd->conf_dev; - dev_set_name(&group->conf_dev, "group%d.%d", - idxd->id, group->id); - group->conf_dev.bus = idxd_get_bus_type(idxd); - group->conf_dev.groups = idxd_group_attribute_groups; - group->conf_dev.type = &idxd_group_device_type; - dev_dbg(dev, "Group device register: %s\n", - dev_name(&group->conf_dev)); - rc = device_register(&group->conf_dev); - if (rc < 0) { - put_device(&group->conf_dev); + struct idxd_group *group = idxd->groups[i]; + + rc = device_add(&group->conf_dev); + if (rc < 0) goto cleanup; - } } return 0; cleanup: - while (i--) { - struct idxd_group *group = &idxd->groups[i]; + j = i - 1; + for (; i < idxd->max_groups; i++) + put_device(&idxd->groups[i]->conf_dev); - device_unregister(&group->conf_dev); - } + while (j--) + device_unregister(&idxd->groups[j]->conf_dev); return rc; } -static int idxd_setup_wq_sysfs(struct idxd_device *idxd) +static int idxd_register_wq_devices(struct idxd_device *idxd) { - struct device *dev = &idxd->pdev->dev; - int i, rc; + int i, rc, j; for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; - - wq->conf_dev.parent = &idxd->conf_dev; - dev_set_name(&wq->conf_dev, "wq%d.%d", idxd->id, wq->id); - wq->conf_dev.bus = idxd_get_bus_type(idxd); - wq->conf_dev.groups = idxd_wq_attribute_groups; - wq->conf_dev.type = &idxd_wq_device_type; - dev_dbg(dev, "WQ device register: %s\n", - dev_name(&wq->conf_dev)); - rc = device_register(&wq->conf_dev); - if (rc < 0) { - put_device(&wq->conf_dev); + struct idxd_wq *wq = idxd->wqs[i]; + + rc = device_add(&wq->conf_dev); + if (rc < 0) goto cleanup; - } } return 0; cleanup: - while (i--) { - struct idxd_wq *wq = &idxd->wqs[i]; + j = i - 1; + for (; i < idxd->max_wqs; i++) + put_device(&idxd->wqs[i]->conf_dev); - device_unregister(&wq->conf_dev); - } + while (j--) + device_unregister(&idxd->wqs[j]->conf_dev); return rc; } -static int idxd_setup_device_sysfs(struct idxd_device *idxd) -{ - struct device *dev = &idxd->pdev->dev; - int rc; - char devname[IDXD_NAME_SIZE]; - - sprintf(devname, "%s%d", idxd_get_dev_name(idxd), idxd->id); - idxd->conf_dev.parent = dev; - dev_set_name(&idxd->conf_dev, "%s", devname); - idxd->conf_dev.bus = idxd_get_bus_type(idxd); - idxd->conf_dev.groups = idxd_attribute_groups; - idxd->conf_dev.type = idxd_get_device_type(idxd); - - dev_dbg(dev, "IDXD device register: %s\n", dev_name(&idxd->conf_dev)); - rc = device_register(&idxd->conf_dev); - if (rc < 0) { - put_device(&idxd->conf_dev); - return rc; - } - - return 0; -} - -int idxd_setup_sysfs(struct idxd_device *idxd) +int idxd_register_devices(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; - int rc; + int rc, i; - rc = idxd_setup_device_sysfs(idxd); - if (rc < 0) { - dev_dbg(dev, "Device sysfs registering failed: %d\n", rc); + rc = device_add(&idxd->conf_dev); + if (rc < 0) return rc; - } - rc = idxd_setup_wq_sysfs(idxd); + rc = idxd_register_wq_devices(idxd); if (rc < 0) { - /* unregister conf dev */ - dev_dbg(dev, "Work Queue sysfs registering failed: %d\n", rc); - return rc; + dev_dbg(dev, "WQ devices registering failed: %d\n", rc); + goto err_wq; } - rc = idxd_setup_group_sysfs(idxd); + rc = idxd_register_engine_devices(idxd); if (rc < 0) { - /* unregister conf dev */ - dev_dbg(dev, "Group sysfs registering failed: %d\n", rc); - return rc; + dev_dbg(dev, "Engine devices registering failed: %d\n", rc); + goto err_engine; } - rc = idxd_setup_engine_sysfs(idxd); + rc = idxd_register_group_devices(idxd); if (rc < 0) { - /* unregister conf dev */ - dev_dbg(dev, "Engine sysfs registering failed: %d\n", rc); - return rc; + dev_dbg(dev, "Group device registering failed: %d\n", rc); + goto err_group; } return 0; + + err_group: + for (i = 0; i < idxd->max_engines; i++) + device_unregister(&idxd->engines[i]->conf_dev); + err_engine: + for (i = 0; i < idxd->max_wqs; i++) + device_unregister(&idxd->wqs[i]->conf_dev); + err_wq: + device_del(&idxd->conf_dev); + return rc; } -void idxd_cleanup_sysfs(struct idxd_device *idxd) +void idxd_unregister_devices(struct idxd_device *idxd) { int i; for (i = 0; i < idxd->max_wqs; i++) { - struct idxd_wq *wq = &idxd->wqs[i]; + struct idxd_wq *wq = idxd->wqs[i]; device_unregister(&wq->conf_dev); } for (i = 0; i < idxd->max_engines; i++) { - struct idxd_engine *engine = &idxd->engines[i]; + struct idxd_engine *engine = idxd->engines[i]; device_unregister(&engine->conf_dev); } for (i = 0; i < idxd->max_groups; i++) { - struct idxd_group *group = &idxd->groups[i]; + struct idxd_group *group = idxd->groups[i]; device_unregister(&group->conf_dev); } @@ -1829,26 +1750,10 @@ void idxd_cleanup_sysfs(struct idxd_device *idxd) int idxd_register_bus_type(void) { - int i, rc; - - for (i = 0; i < IDXD_TYPE_MAX; i++) { - rc = bus_register(idxd_bus_types[i]); - if (rc < 0) - goto bus_err; - } - - return 0; - -bus_err: - while (--i >= 0) - bus_unregister(idxd_bus_types[i]); - return rc; + return bus_register(&dsa_bus_type); } void idxd_unregister_bus_type(void) { - int i; - - for (i = 0; i < IDXD_TYPE_MAX; i++) - bus_unregister(idxd_bus_types[i]); + bus_unregister(&dsa_bus_type); } |