summaryrefslogtreecommitdiffstats
path: root/drivers/dma/idxd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/dma/idxd')
-rw-r--r--drivers/dma/idxd/Makefile2
-rw-r--r--drivers/dma/idxd/cdev.c132
-rw-r--r--drivers/dma/idxd/device.c346
-rw-r--r--drivers/dma/idxd/dma.c77
-rw-r--r--drivers/dma/idxd/idxd.h171
-rw-r--r--drivers/dma/idxd/init.c496
-rw-r--r--drivers/dma/idxd/irq.c33
-rw-r--r--drivers/dma/idxd/perfmon.c662
-rw-r--r--drivers/dma/idxd/perfmon.h119
-rw-r--r--drivers/dma/idxd/registers.h120
-rw-r--r--drivers/dma/idxd/submit.c42
-rw-r--r--drivers/dma/idxd/sysfs.c795
12 files changed, 2204 insertions, 791 deletions
diff --git a/drivers/dma/idxd/Makefile b/drivers/dma/idxd/Makefile
index 8978b898d777..6d11558756f8 100644
--- a/drivers/dma/idxd/Makefile
+++ b/drivers/dma/idxd/Makefile
@@ -1,2 +1,4 @@
obj-$(CONFIG_INTEL_IDXD) += idxd.o
idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o cdev.o
+
+idxd-$(CONFIG_INTEL_IDXD_PERFMON) += perfmon.o
diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
index 0db9b82ed8cf..302cba5ff779 100644
--- a/drivers/dma/idxd/cdev.c
+++ b/drivers/dma/idxd/cdev.c
@@ -39,15 +39,15 @@ struct idxd_user_context {
struct iommu_sva *sva;
};
-enum idxd_cdev_cleanup {
- CDEV_NORMAL = 0,
- CDEV_FAILED,
-};
-
static void idxd_cdev_dev_release(struct device *dev)
{
- dev_dbg(dev, "releasing cdev device\n");
- kfree(dev);
+ struct idxd_cdev *idxd_cdev = container_of(dev, struct idxd_cdev, dev);
+ struct idxd_cdev_context *cdev_ctx;
+ struct idxd_wq *wq = idxd_cdev->wq;
+
+ cdev_ctx = &ictx[wq->idxd->data->type];
+ ida_simple_remove(&cdev_ctx->minor_ida, idxd_cdev->minor);
+ kfree(idxd_cdev);
}
static struct device_type idxd_cdev_device_type = {
@@ -62,14 +62,11 @@ static inline struct idxd_cdev *inode_idxd_cdev(struct inode *inode)
return container_of(cdev, struct idxd_cdev, cdev);
}
-static inline struct idxd_wq *idxd_cdev_wq(struct idxd_cdev *idxd_cdev)
-{
- return container_of(idxd_cdev, struct idxd_wq, idxd_cdev);
-}
-
static inline struct idxd_wq *inode_wq(struct inode *inode)
{
- return idxd_cdev_wq(inode_idxd_cdev(inode));
+ struct idxd_cdev *idxd_cdev = inode_idxd_cdev(inode);
+
+ return idxd_cdev->wq;
}
static int idxd_cdev_open(struct inode *inode, struct file *filp)
@@ -220,11 +217,10 @@ static __poll_t idxd_cdev_poll(struct file *filp,
struct idxd_user_context *ctx = filp->private_data;
struct idxd_wq *wq = ctx->wq;
struct idxd_device *idxd = wq->idxd;
- struct idxd_cdev *idxd_cdev = &wq->idxd_cdev;
unsigned long flags;
__poll_t out = 0;
- poll_wait(filp, &idxd_cdev->err_queue, wait);
+ poll_wait(filp, &wq->err_queue, wait);
spin_lock_irqsave(&idxd->dev_lock, flags);
if (idxd->sw_err.valid)
out = EPOLLIN | EPOLLRDNORM;
@@ -243,101 +239,69 @@ static const struct file_operations idxd_cdev_fops = {
int idxd_cdev_get_major(struct idxd_device *idxd)
{
- return MAJOR(ictx[idxd->type].devt);
+ return MAJOR(ictx[idxd->data->type].devt);
}
-static int idxd_wq_cdev_dev_setup(struct idxd_wq *wq)
+int idxd_wq_add_cdev(struct idxd_wq *wq)
{
struct idxd_device *idxd = wq->idxd;
- struct idxd_cdev *idxd_cdev = &wq->idxd_cdev;
- struct idxd_cdev_context *cdev_ctx;
+ struct idxd_cdev *idxd_cdev;
+ struct cdev *cdev;
struct device *dev;
- int minor, rc;
+ struct idxd_cdev_context *cdev_ctx;
+ int rc, minor;
- idxd_cdev->dev = kzalloc(sizeof(*idxd_cdev->dev), GFP_KERNEL);
- if (!idxd_cdev->dev)
+ idxd_cdev = kzalloc(sizeof(*idxd_cdev), GFP_KERNEL);
+ if (!idxd_cdev)
return -ENOMEM;
- dev = idxd_cdev->dev;
- dev->parent = &idxd->pdev->dev;
- dev_set_name(dev, "%s/wq%u.%u", idxd_get_dev_name(idxd),
- idxd->id, wq->id);
- dev->bus = idxd_get_bus_type(idxd);
-
- cdev_ctx = &ictx[wq->idxd->type];
+ idxd_cdev->wq = wq;
+ cdev = &idxd_cdev->cdev;
+ dev = &idxd_cdev->dev;
+ cdev_ctx = &ictx[wq->idxd->data->type];
minor = ida_simple_get(&cdev_ctx->minor_ida, 0, MINORMASK, GFP_KERNEL);
if (minor < 0) {
- rc = minor;
- kfree(dev);
- goto ida_err;
- }
-
- dev->devt = MKDEV(MAJOR(cdev_ctx->devt), minor);
- dev->type = &idxd_cdev_device_type;
- rc = device_register(dev);
- if (rc < 0) {
- dev_err(&idxd->pdev->dev, "device register failed\n");
- goto dev_reg_err;
+ kfree(idxd_cdev);
+ return minor;
}
idxd_cdev->minor = minor;
- return 0;
-
- dev_reg_err:
- ida_simple_remove(&cdev_ctx->minor_ida, MINOR(dev->devt));
- put_device(dev);
- ida_err:
- idxd_cdev->dev = NULL;
- return rc;
-}
-
-static void idxd_wq_cdev_cleanup(struct idxd_wq *wq,
- enum idxd_cdev_cleanup cdev_state)
-{
- struct idxd_cdev *idxd_cdev = &wq->idxd_cdev;
- struct idxd_cdev_context *cdev_ctx;
-
- cdev_ctx = &ictx[wq->idxd->type];
- if (cdev_state == CDEV_NORMAL)
- cdev_del(&idxd_cdev->cdev);
- device_unregister(idxd_cdev->dev);
- /*
- * The device_type->release() will be called on the device and free
- * the allocated struct device. We can just forget it.
- */
- ida_simple_remove(&cdev_ctx->minor_ida, idxd_cdev->minor);
- idxd_cdev->dev = NULL;
- idxd_cdev->minor = -1;
-}
-
-int idxd_wq_add_cdev(struct idxd_wq *wq)
-{
- struct idxd_cdev *idxd_cdev = &wq->idxd_cdev;
- struct cdev *cdev = &idxd_cdev->cdev;
- struct device *dev;
- int rc;
+ device_initialize(dev);
+ dev->parent = &wq->conf_dev;
+ dev->bus = &dsa_bus_type;
+ dev->type = &idxd_cdev_device_type;
+ dev->devt = MKDEV(MAJOR(cdev_ctx->devt), minor);
- rc = idxd_wq_cdev_dev_setup(wq);
+ rc = dev_set_name(dev, "%s/wq%u.%u", idxd->data->name_prefix, idxd->id, wq->id);
if (rc < 0)
- return rc;
+ goto err;
- dev = idxd_cdev->dev;
+ wq->idxd_cdev = idxd_cdev;
cdev_init(cdev, &idxd_cdev_fops);
- cdev_set_parent(cdev, &dev->kobj);
- rc = cdev_add(cdev, dev->devt, 1);
+ rc = cdev_device_add(cdev, dev);
if (rc) {
dev_dbg(&wq->idxd->pdev->dev, "cdev_add failed: %d\n", rc);
- idxd_wq_cdev_cleanup(wq, CDEV_FAILED);
- return rc;
+ goto err;
}
- init_waitqueue_head(&idxd_cdev->err_queue);
return 0;
+
+ err:
+ put_device(dev);
+ wq->idxd_cdev = NULL;
+ return rc;
}
void idxd_wq_del_cdev(struct idxd_wq *wq)
{
- idxd_wq_cdev_cleanup(wq, CDEV_NORMAL);
+ struct idxd_cdev *idxd_cdev;
+ struct idxd_cdev_context *cdev_ctx;
+
+ cdev_ctx = &ictx[wq->idxd->data->type];
+ idxd_cdev = wq->idxd_cdev;
+ wq->idxd_cdev = NULL;
+ cdev_device_del(&idxd_cdev->cdev, &idxd_cdev->dev);
+ put_device(&idxd_cdev->dev);
}
int idxd_cdev_register(void)
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
index 84a6ea60ecf0..420b93fe5feb 100644
--- a/drivers/dma/idxd/device.c
+++ b/drivers/dma/idxd/device.c
@@ -19,7 +19,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand,
/* Interrupt control bits */
void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id)
{
- struct irq_data *data = irq_get_irq_data(idxd->msix_entries[vec_id].vector);
+ struct irq_data *data = irq_get_irq_data(idxd->irq_entries[vec_id].vector);
pci_msi_mask_irq(data);
}
@@ -36,7 +36,7 @@ void idxd_mask_msix_vectors(struct idxd_device *idxd)
void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id)
{
- struct irq_data *data = irq_get_irq_data(idxd->msix_entries[vec_id].vector);
+ struct irq_data *data = irq_get_irq_data(idxd->irq_entries[vec_id].vector);
pci_msi_unmask_irq(data);
}
@@ -47,6 +47,7 @@ void idxd_unmask_error_interrupts(struct idxd_device *idxd)
genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET);
genctrl.softerr_int_en = 1;
+ genctrl.halt_int_en = 1;
iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET);
}
@@ -56,6 +57,7 @@ void idxd_mask_error_interrupts(struct idxd_device *idxd)
genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET);
genctrl.softerr_int_en = 0;
+ genctrl.halt_int_en = 0;
iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET);
}
@@ -144,14 +146,8 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq)
if (rc < 0)
return rc;
- if (idxd->type == IDXD_TYPE_DSA)
- align = 32;
- else if (idxd->type == IDXD_TYPE_IAX)
- align = 64;
- else
- return -ENODEV;
-
- wq->compls_size = num_descs * idxd->compl_size + align;
+ align = idxd->data->align;
+ wq->compls_size = num_descs * idxd->data->compl_size + align;
wq->compls_raw = dma_alloc_coherent(dev, wq->compls_size,
&wq->compls_addr_raw, GFP_KERNEL);
if (!wq->compls_raw) {
@@ -178,16 +174,14 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq)
struct idxd_desc *desc = wq->descs[i];
desc->hw = wq->hw_descs[i];
- if (idxd->type == IDXD_TYPE_DSA)
+ if (idxd->data->type == IDXD_TYPE_DSA)
desc->completion = &wq->compls[i];
- else if (idxd->type == IDXD_TYPE_IAX)
+ else if (idxd->data->type == IDXD_TYPE_IAX)
desc->iax_completion = &wq->iax_compls[i];
- desc->compl_dma = wq->compls_addr + idxd->compl_size * i;
+ desc->compl_dma = wq->compls_addr + idxd->data->compl_size * i;
desc->id = i;
desc->wq = wq;
desc->cpu = -1;
- dma_async_tx_descriptor_init(&desc->txd, &wq->dma_chan);
- desc->txd.tx_submit = idxd_dma_tx_submit;
}
return 0;
@@ -282,6 +276,22 @@ void idxd_wq_drain(struct idxd_wq *wq)
idxd_cmd_exec(idxd, IDXD_CMD_DRAIN_WQ, operand, NULL);
}
+void idxd_wq_reset(struct idxd_wq *wq)
+{
+ struct idxd_device *idxd = wq->idxd;
+ struct device *dev = &idxd->pdev->dev;
+ u32 operand;
+
+ if (wq->state != IDXD_WQ_ENABLED) {
+ dev_dbg(dev, "WQ %d in wrong state: %d\n", wq->id, wq->state);
+ return;
+ }
+
+ operand = BIT(wq->id % 16) | ((wq->id / 16) << 16);
+ idxd_cmd_exec(idxd, IDXD_CMD_RESET_WQ, operand, NULL);
+ wq->state = IDXD_WQ_DISABLED;
+}
+
int idxd_wq_map_portal(struct idxd_wq *wq)
{
struct idxd_device *idxd = wq->idxd;
@@ -304,6 +314,19 @@ void idxd_wq_unmap_portal(struct idxd_wq *wq)
struct device *dev = &wq->idxd->pdev->dev;
devm_iounmap(dev, wq->portal);
+ wq->portal = NULL;
+}
+
+void idxd_wqs_unmap_portal(struct idxd_device *idxd)
+{
+ int i;
+
+ for (i = 0; i < idxd->max_wqs; i++) {
+ struct idxd_wq *wq = idxd->wqs[i];
+
+ if (wq->portal)
+ idxd_wq_unmap_portal(wq);
+ }
}
int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid)
@@ -363,8 +386,6 @@ int idxd_wq_disable_pasid(struct idxd_wq *wq)
void idxd_wq_disable_cleanup(struct idxd_wq *wq)
{
struct idxd_device *idxd = wq->idxd;
- struct device *dev = &idxd->pdev->dev;
- int i, wq_offset;
lockdep_assert_held(&idxd->dev_lock);
memset(wq->wqcfg, 0, idxd->wqcfg_size);
@@ -376,14 +397,32 @@ void idxd_wq_disable_cleanup(struct idxd_wq *wq)
wq->ats_dis = 0;
clear_bit(WQ_FLAG_DEDICATED, &wq->flags);
memset(wq->name, 0, WQ_NAME_SIZE);
+}
- for (i = 0; i < WQCFG_STRIDES(idxd); i++) {
- wq_offset = WQCFG_OFFSET(idxd, wq->id, i);
- iowrite32(0, idxd->reg_base + wq_offset);
- dev_dbg(dev, "WQ[%d][%d][%#x]: %#x\n",
- wq->id, i, wq_offset,
- ioread32(idxd->reg_base + wq_offset));
- }
+static void idxd_wq_ref_release(struct percpu_ref *ref)
+{
+ struct idxd_wq *wq = container_of(ref, struct idxd_wq, wq_active);
+
+ complete(&wq->wq_dead);
+}
+
+int idxd_wq_init_percpu_ref(struct idxd_wq *wq)
+{
+ int rc;
+
+ memset(&wq->wq_active, 0, sizeof(wq->wq_active));
+ rc = percpu_ref_init(&wq->wq_active, idxd_wq_ref_release, 0, GFP_KERNEL);
+ if (rc < 0)
+ return rc;
+ reinit_completion(&wq->wq_dead);
+ return 0;
+}
+
+void idxd_wq_quiesce(struct idxd_wq *wq)
+{
+ percpu_ref_kill(&wq->wq_active);
+ wait_for_completion(&wq->wq_dead);
+ percpu_ref_exit(&wq->wq_active);
}
/* Device control bits */
@@ -426,13 +465,13 @@ int idxd_device_init_reset(struct idxd_device *idxd)
memset(&cmd, 0, sizeof(cmd));
cmd.cmd = IDXD_CMD_RESET_DEVICE;
dev_dbg(dev, "%s: sending reset for init.\n", __func__);
- spin_lock_irqsave(&idxd->dev_lock, flags);
+ spin_lock_irqsave(&idxd->cmd_lock, flags);
iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET);
while (ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET) &
IDXD_CMDSTS_ACTIVE)
cpu_relax();
- spin_unlock_irqrestore(&idxd->dev_lock, flags);
+ spin_unlock_irqrestore(&idxd->cmd_lock, flags);
return 0;
}
@@ -445,7 +484,8 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand,
if (idxd_device_is_halted(idxd)) {
dev_warn(&idxd->pdev->dev, "Device is HALTED!\n");
- *status = IDXD_CMDSTS_HW_ERR;
+ if (status)
+ *status = IDXD_CMDSTS_HW_ERR;
return;
}
@@ -454,10 +494,10 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand,
cmd.operand = operand;
cmd.int_req = 1;
- spin_lock_irqsave(&idxd->dev_lock, flags);
+ spin_lock_irqsave(&idxd->cmd_lock, flags);
wait_event_lock_irq(idxd->cmd_waitq,
!test_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags),
- idxd->dev_lock);
+ idxd->cmd_lock);
dev_dbg(&idxd->pdev->dev, "%s: sending cmd: %#x op: %#x\n",
__func__, cmd_code, operand);
@@ -471,9 +511,9 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand,
* After command submitted, release lock and go to sleep until
* the command completes via interrupt.
*/
- spin_unlock_irqrestore(&idxd->dev_lock, flags);
+ spin_unlock_irqrestore(&idxd->cmd_lock, flags);
wait_for_completion(&done);
- spin_lock_irqsave(&idxd->dev_lock, flags);
+ spin_lock_irqsave(&idxd->cmd_lock, flags);
if (status) {
*status = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET);
idxd->cmd_status = *status & GENMASK(7, 0);
@@ -482,7 +522,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand,
__clear_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags);
/* Wake up other pending commands */
wake_up(&idxd->cmd_waitq);
- spin_unlock_irqrestore(&idxd->dev_lock, flags);
+ spin_unlock_irqrestore(&idxd->cmd_lock, flags);
}
int idxd_device_enable(struct idxd_device *idxd)
@@ -515,7 +555,7 @@ void idxd_device_wqs_clear_state(struct idxd_device *idxd)
lockdep_assert_held(&idxd->dev_lock);
for (i = 0; i < idxd->max_wqs; i++) {
- struct idxd_wq *wq = &idxd->wqs[i];
+ struct idxd_wq *wq = idxd->wqs[i];
if (wq->state == IDXD_WQ_ENABLED) {
idxd_wq_disable_cleanup(wq);
@@ -573,7 +613,108 @@ void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid)
dev_dbg(dev, "pasid %d drained\n", pasid);
}
+int idxd_device_request_int_handle(struct idxd_device *idxd, int idx, int *handle,
+ enum idxd_interrupt_type irq_type)
+{
+ struct device *dev = &idxd->pdev->dev;
+ u32 operand, status;
+
+ if (!(idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)))
+ return -EOPNOTSUPP;
+
+ dev_dbg(dev, "get int handle, idx %d\n", idx);
+
+ operand = idx & GENMASK(15, 0);
+ if (irq_type == IDXD_IRQ_IMS)
+ operand |= CMD_INT_HANDLE_IMS;
+
+ dev_dbg(dev, "cmd: %u operand: %#x\n", IDXD_CMD_REQUEST_INT_HANDLE, operand);
+
+ idxd_cmd_exec(idxd, IDXD_CMD_REQUEST_INT_HANDLE, operand, &status);
+
+ if ((status & IDXD_CMDSTS_ERR_MASK) != IDXD_CMDSTS_SUCCESS) {
+ dev_dbg(dev, "request int handle failed: %#x\n", status);
+ return -ENXIO;
+ }
+
+ *handle = (status >> IDXD_CMDSTS_RES_SHIFT) & GENMASK(15, 0);
+
+ dev_dbg(dev, "int handle acquired: %u\n", *handle);
+ return 0;
+}
+
+int idxd_device_release_int_handle(struct idxd_device *idxd, int handle,
+ enum idxd_interrupt_type irq_type)
+{
+ struct device *dev = &idxd->pdev->dev;
+ u32 operand, status;
+ union idxd_command_reg cmd;
+ unsigned long flags;
+
+ if (!(idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)))
+ return -EOPNOTSUPP;
+
+ dev_dbg(dev, "release int handle, handle %d\n", handle);
+
+ memset(&cmd, 0, sizeof(cmd));
+ operand = handle & GENMASK(15, 0);
+
+ if (irq_type == IDXD_IRQ_IMS)
+ operand |= CMD_INT_HANDLE_IMS;
+
+ cmd.cmd = IDXD_CMD_RELEASE_INT_HANDLE;
+ cmd.operand = operand;
+
+ dev_dbg(dev, "cmd: %u operand: %#x\n", IDXD_CMD_RELEASE_INT_HANDLE, operand);
+
+ spin_lock_irqsave(&idxd->cmd_lock, flags);
+ iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET);
+
+ while (ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET) & IDXD_CMDSTS_ACTIVE)
+ cpu_relax();
+ status = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET);
+ spin_unlock_irqrestore(&idxd->cmd_lock, flags);
+
+ if ((status & IDXD_CMDSTS_ERR_MASK) != IDXD_CMDSTS_SUCCESS) {
+ dev_dbg(dev, "release int handle failed: %#x\n", status);
+ return -ENXIO;
+ }
+
+ dev_dbg(dev, "int handle released.\n");
+ return 0;
+}
+
/* Device configuration bits */
+void idxd_msix_perm_setup(struct idxd_device *idxd)
+{
+ union msix_perm mperm;
+ int i, msixcnt;
+
+ msixcnt = pci_msix_vec_count(idxd->pdev);
+ if (msixcnt < 0)
+ return;
+
+ mperm.bits = 0;
+ mperm.pasid = idxd->pasid;
+ mperm.pasid_en = device_pasid_enabled(idxd);
+ for (i = 1; i < msixcnt; i++)
+ iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8);
+}
+
+void idxd_msix_perm_clear(struct idxd_device *idxd)
+{
+ union msix_perm mperm;
+ int i, msixcnt;
+
+ msixcnt = pci_msix_vec_count(idxd->pdev);
+ if (msixcnt < 0)
+ return;
+
+ mperm.bits = 0;
+ for (i = 1; i < msixcnt; i++)
+ iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8);
+}
+
static void idxd_group_config_write(struct idxd_group *group)
{
struct idxd_device *idxd = group->idxd;
@@ -624,7 +765,7 @@ static int idxd_groups_config_write(struct idxd_device *idxd)
ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET));
for (i = 0; i < idxd->max_groups; i++) {
- struct idxd_group *group = &idxd->groups[i];
+ struct idxd_group *group = idxd->groups[i];
idxd_group_config_write(group);
}
@@ -642,7 +783,14 @@ static int idxd_wq_config_write(struct idxd_wq *wq)
if (!wq->group)
return 0;
- memset(wq->wqcfg, 0, idxd->wqcfg_size);
+ /*
+ * Instead of memset the entire shadow copy of WQCFG, copy from the hardware after
+ * wq reset. This will copy back the sticky values that are present on some devices.
+ */
+ for (i = 0; i < WQCFG_STRIDES(idxd); i++) {
+ wq_offset = WQCFG_OFFSET(idxd, wq->id, i);
+ wq->wqcfg->bits[i] = ioread32(idxd->reg_base + wq_offset);
+ }
/* byte 0-3 */
wq->wqcfg->wq_size = wq->size;
@@ -696,7 +844,7 @@ static int idxd_wqs_config_write(struct idxd_device *idxd)
int i, rc;
for (i = 0; i < idxd->max_wqs; i++) {
- struct idxd_wq *wq = &idxd->wqs[i];
+ struct idxd_wq *wq = idxd->wqs[i];
rc = idxd_wq_config_write(wq);
if (rc < 0)
@@ -712,7 +860,7 @@ static void idxd_group_flags_setup(struct idxd_device *idxd)
/* TC-A 0 and TC-B 1 should be defaults */
for (i = 0; i < idxd->max_groups; i++) {
- struct idxd_group *group = &idxd->groups[i];
+ struct idxd_group *group = idxd->groups[i];
if (group->tc_a == -1)
group->tc_a = group->grpcfg.flags.tc_a = 0;
@@ -739,12 +887,12 @@ static int idxd_engines_setup(struct idxd_device *idxd)
struct idxd_group *group;
for (i = 0; i < idxd->max_groups; i++) {
- group = &idxd->groups[i];
+ group = idxd->groups[i];
group->grpcfg.engines = 0;
}
for (i = 0; i < idxd->max_engines; i++) {
- eng = &idxd->engines[i];
+ eng = idxd->engines[i];
group = eng->group;
if (!group)
@@ -768,13 +916,13 @@ static int idxd_wqs_setup(struct idxd_device *idxd)
struct device *dev = &idxd->pdev->dev;
for (i = 0; i < idxd->max_groups; i++) {
- group = &idxd->groups[i];
+ group = idxd->groups[i];
for (j = 0; j < 4; j++)
group->grpcfg.wqs[j] = 0;
}
for (i = 0; i < idxd->max_wqs; i++) {
- wq = &idxd->wqs[i];
+ wq = idxd->wqs[i];
group = wq->group;
if (!wq->group)
@@ -822,3 +970,119 @@ int idxd_device_config(struct idxd_device *idxd)
return 0;
}
+
+static int idxd_wq_load_config(struct idxd_wq *wq)
+{
+ struct idxd_device *idxd = wq->idxd;
+ struct device *dev = &idxd->pdev->dev;
+ int wqcfg_offset;
+ int i;
+
+ wqcfg_offset = WQCFG_OFFSET(idxd, wq->id, 0);
+ memcpy_fromio(wq->wqcfg, idxd->reg_base + wqcfg_offset, idxd->wqcfg_size);
+
+ wq->size = wq->wqcfg->wq_size;
+ wq->threshold = wq->wqcfg->wq_thresh;
+ if (wq->wqcfg->priv)
+ wq->type = IDXD_WQT_KERNEL;
+
+ /* The driver does not support shared WQ mode in read-only config yet */
+ if (wq->wqcfg->mode == 0 || wq->wqcfg->pasid_en)
+ return -EOPNOTSUPP;
+
+ set_bit(WQ_FLAG_DEDICATED, &wq->flags);
+
+ wq->priority = wq->wqcfg->priority;
+
+ for (i = 0; i < WQCFG_STRIDES(idxd); i++) {
+ wqcfg_offset = WQCFG_OFFSET(idxd, wq->id, i);
+ dev_dbg(dev, "WQ[%d][%d][%#x]: %#x\n", wq->id, i, wqcfg_offset, wq->wqcfg->bits[i]);
+ }
+
+ return 0;
+}
+
+static void idxd_group_load_config(struct idxd_group *group)
+{
+ struct idxd_device *idxd = group->idxd;
+ struct device *dev = &idxd->pdev->dev;
+ int i, j, grpcfg_offset;
+
+ /*
+ * Load WQS bit fields
+ * Iterate through all 256 bits 64 bits at a time
+ */
+ for (i = 0; i < GRPWQCFG_STRIDES; i++) {
+ struct idxd_wq *wq;
+
+ grpcfg_offset = GRPWQCFG_OFFSET(idxd, group->id, i);
+ group->grpcfg.wqs[i] = ioread64(idxd->reg_base + grpcfg_offset);
+ dev_dbg(dev, "GRPCFG wq[%d:%d: %#x]: %#llx\n",
+ group->id, i, grpcfg_offset, group->grpcfg.wqs[i]);
+
+ if (i * 64 >= idxd->max_wqs)
+ break;
+
+ /* Iterate through all 64 bits and check for wq set */
+ for (j = 0; j < 64; j++) {
+ int id = i * 64 + j;
+
+ /* No need to check beyond max wqs */
+ if (id >= idxd->max_wqs)
+ break;
+
+ /* Set group assignment for wq if wq bit is set */
+ if (group->grpcfg.wqs[i] & BIT(j)) {
+ wq = idxd->wqs[id];
+ wq->group = group;
+ }
+ }
+ }
+
+ grpcfg_offset = GRPENGCFG_OFFSET(idxd, group->id);
+ group->grpcfg.engines = ioread64(idxd->reg_base + grpcfg_offset);
+ dev_dbg(dev, "GRPCFG engs[%d: %#x]: %#llx\n", group->id,
+ grpcfg_offset, group->grpcfg.engines);
+
+ /* Iterate through all 64 bits to check engines set */
+ for (i = 0; i < 64; i++) {
+ if (i >= idxd->max_engines)
+ break;
+
+ if (group->grpcfg.engines & BIT(i)) {
+ struct idxd_engine *engine = idxd->engines[i];
+
+ engine->group = group;
+ }
+ }
+
+ grpcfg_offset = GRPFLGCFG_OFFSET(idxd, group->id);
+ group->grpcfg.flags.bits = ioread32(idxd->reg_base + grpcfg_offset);
+ dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#x\n",
+ group->id, grpcfg_offset, group->grpcfg.flags.bits);
+}
+
+int idxd_device_load_config(struct idxd_device *idxd)
+{
+ union gencfg_reg reg;
+ int i, rc;
+
+ reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET);
+ idxd->token_limit = reg.token_limit;
+
+ for (i = 0; i < idxd->max_groups; i++) {
+ struct idxd_group *group = idxd->groups[i];
+
+ idxd_group_load_config(group);
+ }
+
+ for (i = 0; i < idxd->max_wqs; i++) {
+ struct idxd_wq *wq = idxd->wqs[i];
+
+ rc = idxd_wq_load_config(wq);
+ if (rc < 0)
+ return rc;
+ }
+
+ return 0;
+}
diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c
index a15e50126434..77439b645044 100644
--- a/drivers/dma/idxd/dma.c
+++ b/drivers/dma/idxd/dma.c
@@ -14,7 +14,10 @@
static inline struct idxd_wq *to_idxd_wq(struct dma_chan *c)
{
- return container_of(c, struct idxd_wq, dma_chan);
+ struct idxd_dma_chan *idxd_chan;
+
+ idxd_chan = container_of(c, struct idxd_dma_chan, chan);
+ return idxd_chan->wq;
}
void idxd_dma_complete_txd(struct idxd_desc *desc,
@@ -135,7 +138,7 @@ static void idxd_dma_issue_pending(struct dma_chan *dma_chan)
{
}
-dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+static dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx)
{
struct dma_chan *c = tx->chan;
struct idxd_wq *wq = to_idxd_wq(c);
@@ -156,14 +159,25 @@ dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx)
static void idxd_dma_release(struct dma_device *device)
{
+ struct idxd_dma_dev *idxd_dma = container_of(device, struct idxd_dma_dev, dma);
+
+ kfree(idxd_dma);
}
int idxd_register_dma_device(struct idxd_device *idxd)
{
- struct dma_device *dma = &idxd->dma_dev;
+ struct idxd_dma_dev *idxd_dma;
+ struct dma_device *dma;
+ struct device *dev = &idxd->pdev->dev;
+ int rc;
+ idxd_dma = kzalloc_node(sizeof(*idxd_dma), GFP_KERNEL, dev_to_node(dev));
+ if (!idxd_dma)
+ return -ENOMEM;
+
+ dma = &idxd_dma->dma;
INIT_LIST_HEAD(&dma->channels);
- dma->dev = &idxd->pdev->dev;
+ dma->dev = dev;
dma_cap_set(DMA_PRIVATE, dma->cap_mask);
dma_cap_set(DMA_COMPLETION_NO_ORDER, dma->cap_mask);
@@ -179,35 +193,72 @@ int idxd_register_dma_device(struct idxd_device *idxd)
dma->device_alloc_chan_resources = idxd_dma_alloc_chan_resources;
dma->device_free_chan_resources = idxd_dma_free_chan_resources;
- return dma_async_device_register(&idxd->dma_dev);
+ rc = dma_async_device_register(dma);
+ if (rc < 0) {
+ kfree(idxd_dma);
+ return rc;
+ }
+
+ idxd_dma->idxd = idxd;
+ /*
+ * This pointer is protected by the refs taken by the dma_chan. It will remain valid
+ * as long as there are outstanding channels.
+ */
+ idxd->idxd_dma = idxd_dma;
+ return 0;
}
void idxd_unregister_dma_device(struct idxd_device *idxd)
{
- dma_async_device_unregister(&idxd->dma_dev);
+ dma_async_device_unregister(&idxd->idxd_dma->dma);
}
int idxd_register_dma_channel(struct idxd_wq *wq)
{
struct idxd_device *idxd = wq->idxd;
- struct dma_device *dma = &idxd->dma_dev;
- struct dma_chan *chan = &wq->dma_chan;
- int rc;
+ struct dma_device *dma = &idxd->idxd_dma->dma;
+ struct device *dev = &idxd->pdev->dev;
+ struct idxd_dma_chan *idxd_chan;
+ struct dma_chan *chan;
+ int rc, i;
+
+ idxd_chan = kzalloc_node(sizeof(*idxd_chan), GFP_KERNEL, dev_to_node(dev));
+ if (!idxd_chan)
+ return -ENOMEM;
- memset(&wq->dma_chan, 0, sizeof(struct dma_chan));
+ chan = &idxd_chan->chan;
chan->device = dma;
list_add_tail(&chan->device_node, &dma->channels);
+
+ for (i = 0; i < wq->num_descs; i++) {
+ struct idxd_desc *desc = wq->descs[i];
+
+ dma_async_tx_descriptor_init(&desc->txd, chan);
+ desc->txd.tx_submit = idxd_dma_tx_submit;
+ }
+
rc = dma_async_device_channel_register(dma, chan);
- if (rc < 0)
+ if (rc < 0) {
+ kfree(idxd_chan);
return rc;
+ }
+
+ wq->idxd_chan = idxd_chan;
+ idxd_chan->wq = wq;
+ get_device(&wq->conf_dev);
return 0;
}
void idxd_unregister_dma_channel(struct idxd_wq *wq)
{
- struct dma_chan *chan = &wq->dma_chan;
+ struct idxd_dma_chan *idxd_chan = wq->idxd_chan;
+ struct dma_chan *chan = &idxd_chan->chan;
+ struct idxd_dma_dev *idxd_dma = wq->idxd->idxd_dma;
- dma_async_device_channel_unregister(&wq->idxd->dma_dev, chan);
+ dma_async_device_channel_unregister(&idxd_dma->dma, chan);
list_del(&chan->device_node);
+ kfree(wq->idxd_chan);
+ wq->idxd_chan = NULL;
+ put_device(&wq->conf_dev);
}
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
index 81a0e65fd316..26482c7d4c3a 100644
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -8,12 +8,18 @@
#include <linux/percpu-rwsem.h>
#include <linux/wait.h>
#include <linux/cdev.h>
+#include <linux/idr.h>
+#include <linux/pci.h>
+#include <linux/perf_event.h>
#include "registers.h"
#define IDXD_DRIVER_VERSION "1.00"
extern struct kmem_cache *idxd_desc_pool;
+struct idxd_device;
+struct idxd_wq;
+
#define IDXD_REG_TIMEOUT 50
#define IDXD_DRAIN_TIMEOUT 5000
@@ -25,6 +31,7 @@ enum idxd_type {
};
#define IDXD_NAME_SIZE 128
+#define IDXD_PMU_EVENT_MAX 64
struct idxd_device_driver {
struct device_driver drv;
@@ -33,6 +40,7 @@ struct idxd_device_driver {
struct idxd_irq_entry {
struct idxd_device *idxd;
int id;
+ int vector;
struct llist_head pending_llist;
struct list_head work_list;
/*
@@ -56,6 +64,31 @@ struct idxd_group {
int tc_b;
};
+struct idxd_pmu {
+ struct idxd_device *idxd;
+
+ struct perf_event *event_list[IDXD_PMU_EVENT_MAX];
+ int n_events;
+
+ DECLARE_BITMAP(used_mask, IDXD_PMU_EVENT_MAX);
+
+ struct pmu pmu;
+ char name[IDXD_NAME_SIZE];
+ int cpu;
+
+ int n_counters;
+ int counter_width;
+ int n_event_categories;
+
+ bool per_counter_caps_supported;
+ unsigned long supported_event_categories;
+
+ unsigned long supported_filters;
+ int n_filters;
+
+ struct hlist_node cpuhp_node;
+};
+
#define IDXD_MAX_PRIORITY 0xf
enum idxd_wq_state {
@@ -75,10 +108,10 @@ enum idxd_wq_type {
};
struct idxd_cdev {
+ struct idxd_wq *wq;
struct cdev cdev;
- struct device *dev;
+ struct device dev;
int minor;
- struct wait_queue_head err_queue;
};
#define IDXD_ALLOCATED_BATCH_SIZE 128U
@@ -96,10 +129,18 @@ enum idxd_complete_type {
IDXD_COMPLETE_DEV_FAIL,
};
+struct idxd_dma_chan {
+ struct dma_chan chan;
+ struct idxd_wq *wq;
+};
+
struct idxd_wq {
void __iomem *portal;
+ struct percpu_ref wq_active;
+ struct completion wq_dead;
struct device conf_dev;
- struct idxd_cdev idxd_cdev;
+ struct idxd_cdev *idxd_cdev;
+ struct wait_queue_head err_queue;
struct idxd_device *idxd;
int id;
enum idxd_wq_type type;
@@ -125,7 +166,7 @@ struct idxd_wq {
int compls_size;
struct idxd_desc **descs;
struct sbitmap_queue sbq;
- struct dma_chan dma_chan;
+ struct idxd_dma_chan *idxd_chan;
char name[WQ_NAME_SIZE + 1];
u64 max_xfer_bytes;
u32 max_batch_size;
@@ -147,6 +188,7 @@ struct idxd_hw {
union group_cap_reg group_cap;
union engine_cap_reg engine_cap;
struct opcap opcap;
+ u32 cmd_cap;
};
enum idxd_device_state {
@@ -162,9 +204,22 @@ enum idxd_device_flag {
IDXD_FLAG_PASID_ENABLED,
};
-struct idxd_device {
+struct idxd_dma_dev {
+ struct idxd_device *idxd;
+ struct dma_device dma;
+};
+
+struct idxd_driver_data {
+ const char *name_prefix;
enum idxd_type type;
+ struct device_type *dev_type;
+ int compl_size;
+ int align;
+};
+
+struct idxd_device {
struct device conf_dev;
+ struct idxd_driver_data *data;
struct list_head list;
struct idxd_hw hw;
enum idxd_device_state state;
@@ -177,10 +232,11 @@ struct idxd_device {
void __iomem *reg_base;
spinlock_t dev_lock; /* spinlock for device */
+ spinlock_t cmd_lock; /* spinlock for device commands */
struct completion *cmd_done;
- struct idxd_group *groups;
- struct idxd_wq *wqs;
- struct idxd_engine *engines;
+ struct idxd_group **groups;
+ struct idxd_wq **wqs;
+ struct idxd_engine **engines;
struct iommu_sva *sva;
unsigned int pasid;
@@ -202,17 +258,19 @@ struct idxd_device {
int token_limit;
int nr_tokens; /* non-reserved tokens */
unsigned int wqcfg_size;
- int compl_size;
union sw_err_reg sw_err;
wait_queue_head_t cmd_waitq;
- struct msix_entry *msix_entries;
int num_wq_irqs;
struct idxd_irq_entry *irq_entries;
- struct dma_device dma_dev;
+ struct idxd_dma_dev *idxd_dma;
struct workqueue_struct *wq;
struct work_struct work;
+
+ int *int_handles;
+
+ struct idxd_pmu *idxd_pmu;
};
/* IDXD software descriptor */
@@ -232,6 +290,7 @@ struct idxd_desc {
struct list_head list;
int id;
int cpu;
+ unsigned int vector;
struct idxd_wq *wq;
};
@@ -242,6 +301,44 @@ extern struct bus_type dsa_bus_type;
extern struct bus_type iax_bus_type;
extern bool support_enqcmd;
+extern struct ida idxd_ida;
+extern struct device_type dsa_device_type;
+extern struct device_type iax_device_type;
+extern struct device_type idxd_wq_device_type;
+extern struct device_type idxd_engine_device_type;
+extern struct device_type idxd_group_device_type;
+
+static inline bool is_dsa_dev(struct device *dev)
+{
+ return dev->type == &dsa_device_type;
+}
+
+static inline bool is_iax_dev(struct device *dev)
+{
+ return dev->type == &iax_device_type;
+}
+
+static inline bool is_idxd_dev(struct device *dev)
+{
+ return is_dsa_dev(dev) || is_iax_dev(dev);
+}
+
+static inline bool is_idxd_wq_dev(struct device *dev)
+{
+ return dev->type == &idxd_wq_device_type;
+}
+
+static inline bool is_idxd_wq_dmaengine(struct idxd_wq *wq)
+{
+ if (wq->type == IDXD_WQT_KERNEL && strcmp(wq->name, "dmaengine") == 0)
+ return true;
+ return false;
+}
+
+static inline bool is_idxd_wq_cdev(struct idxd_wq *wq)
+{
+ return wq->type == IDXD_WQT_USER;
+}
static inline bool wq_dedicated(struct idxd_wq *wq)
{
@@ -268,6 +365,11 @@ enum idxd_portal_prot {
IDXD_PORTAL_LIMITED,
};
+enum idxd_interrupt_type {
+ IDXD_IRQ_MSIX = 0,
+ IDXD_IRQ_IMS,
+};
+
static inline int idxd_get_wq_portal_offset(enum idxd_portal_prot prot)
{
return prot * 0x1000;
@@ -279,18 +381,6 @@ static inline int idxd_get_wq_portal_full_offset(int wq_id,
return ((wq_id * 4) << PAGE_SHIFT) + idxd_get_wq_portal_offset(prot);
}
-static inline void idxd_set_type(struct idxd_device *idxd)
-{
- struct pci_dev *pdev = idxd->pdev;
-
- if (pdev->device == PCI_DEVICE_ID_INTEL_DSA_SPR0)
- idxd->type = IDXD_TYPE_DSA;
- else if (pdev->device == PCI_DEVICE_ID_INTEL_IAX_SPR0)
- idxd->type = IDXD_TYPE_IAX;
- else
- idxd->type = IDXD_TYPE_UNKNOWN;
-}
-
static inline void idxd_wq_get(struct idxd_wq *wq)
{
wq->client_count++;
@@ -306,17 +396,17 @@ static inline int idxd_wq_refcount(struct idxd_wq *wq)
return wq->client_count;
};
-const char *idxd_get_dev_name(struct idxd_device *idxd);
int idxd_register_bus_type(void);
void idxd_unregister_bus_type(void);
-int idxd_setup_sysfs(struct idxd_device *idxd);
-void idxd_cleanup_sysfs(struct idxd_device *idxd);
+int idxd_register_devices(struct idxd_device *idxd);
+void idxd_unregister_devices(struct idxd_device *idxd);
int idxd_register_driver(void);
void idxd_unregister_driver(void);
-struct bus_type *idxd_get_bus_type(struct idxd_device *idxd);
+void idxd_wqs_quiesce(struct idxd_device *idxd);
/* device interrupt control */
-irqreturn_t idxd_irq_handler(int vec, void *data);
+void idxd_msix_perm_setup(struct idxd_device *idxd);
+void idxd_msix_perm_clear(struct idxd_device *idxd);
irqreturn_t idxd_misc_thread(int vec, void *data);
irqreturn_t idxd_wq_thread(int irq, void *data);
void idxd_mask_error_interrupts(struct idxd_device *idxd);
@@ -334,18 +424,27 @@ void idxd_device_cleanup(struct idxd_device *idxd);
int idxd_device_config(struct idxd_device *idxd);
void idxd_device_wqs_clear_state(struct idxd_device *idxd);
void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid);
+int idxd_device_load_config(struct idxd_device *idxd);
+int idxd_device_request_int_handle(struct idxd_device *idxd, int idx, int *handle,
+ enum idxd_interrupt_type irq_type);
+int idxd_device_release_int_handle(struct idxd_device *idxd, int handle,
+ enum idxd_interrupt_type irq_type);
/* work queue control */
+void idxd_wqs_unmap_portal(struct idxd_device *idxd);
int idxd_wq_alloc_resources(struct idxd_wq *wq);
void idxd_wq_free_resources(struct idxd_wq *wq);
int idxd_wq_enable(struct idxd_wq *wq);
int idxd_wq_disable(struct idxd_wq *wq);
void idxd_wq_drain(struct idxd_wq *wq);
+void idxd_wq_reset(struct idxd_wq *wq);
int idxd_wq_map_portal(struct idxd_wq *wq);
void idxd_wq_unmap_portal(struct idxd_wq *wq);
void idxd_wq_disable_cleanup(struct idxd_wq *wq);
int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid);
int idxd_wq_disable_pasid(struct idxd_wq *wq);
+void idxd_wq_quiesce(struct idxd_wq *wq);
+int idxd_wq_init_percpu_ref(struct idxd_wq *wq);
/* submission */
int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc);
@@ -360,7 +459,6 @@ void idxd_unregister_dma_channel(struct idxd_wq *wq);
void idxd_parse_completion_status(u8 status, enum dmaengine_tx_result *res);
void idxd_dma_complete_txd(struct idxd_desc *desc,
enum idxd_complete_type comp_type);
-dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx);
/* cdev */
int idxd_cdev_register(void);
@@ -369,4 +467,19 @@ int idxd_cdev_get_major(struct idxd_device *idxd);
int idxd_wq_add_cdev(struct idxd_wq *wq);
void idxd_wq_del_cdev(struct idxd_wq *wq);
+/* perfmon */
+#if IS_ENABLED(CONFIG_INTEL_IDXD_PERFMON)
+int perfmon_pmu_init(struct idxd_device *idxd);
+void perfmon_pmu_remove(struct idxd_device *idxd);
+void perfmon_counter_overflow(struct idxd_device *idxd);
+void perfmon_init(void);
+void perfmon_exit(void);
+#else
+static inline int perfmon_pmu_init(struct idxd_device *idxd) { return 0; }
+static inline void perfmon_pmu_remove(struct idxd_device *idxd) {}
+static inline void perfmon_counter_overflow(struct idxd_device *idxd) {}
+static inline void perfmon_init(void) {}
+static inline void perfmon_exit(void) {}
+#endif
+
#endif
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 085a0c3b62c6..2a926bef87f2 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -21,6 +21,7 @@
#include "../dmaengine.h"
#include "registers.h"
#include "idxd.h"
+#include "perfmon.h"
MODULE_VERSION(IDXD_DRIVER_VERSION);
MODULE_LICENSE("GPL v2");
@@ -33,60 +34,53 @@ MODULE_PARM_DESC(sva, "Toggle SVA support on/off");
#define DRV_NAME "idxd"
bool support_enqcmd;
-
-static struct idr idxd_idrs[IDXD_TYPE_MAX];
-static DEFINE_MUTEX(idxd_idr_lock);
+DEFINE_IDA(idxd_ida);
+
+static struct idxd_driver_data idxd_driver_data[] = {
+ [IDXD_TYPE_DSA] = {
+ .name_prefix = "dsa",
+ .type = IDXD_TYPE_DSA,
+ .compl_size = sizeof(struct dsa_completion_record),
+ .align = 32,
+ .dev_type = &dsa_device_type,
+ },
+ [IDXD_TYPE_IAX] = {
+ .name_prefix = "iax",
+ .type = IDXD_TYPE_IAX,
+ .compl_size = sizeof(struct iax_completion_record),
+ .align = 64,
+ .dev_type = &iax_device_type,
+ },
+};
static struct pci_device_id idxd_pci_tbl[] = {
/* DSA ver 1.0 platforms */
- { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_DSA_SPR0) },
+ { PCI_DEVICE_DATA(INTEL, DSA_SPR0, &idxd_driver_data[IDXD_TYPE_DSA]) },
/* IAX ver 1.0 platforms */
- { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IAX_SPR0) },
+ { PCI_DEVICE_DATA(INTEL, IAX_SPR0, &idxd_driver_data[IDXD_TYPE_IAX]) },
{ 0, }
};
MODULE_DEVICE_TABLE(pci, idxd_pci_tbl);
-static char *idxd_name[] = {
- "dsa",
- "iax"
-};
-
-const char *idxd_get_dev_name(struct idxd_device *idxd)
-{
- return idxd_name[idxd->type];
-}
-
static int idxd_setup_interrupts(struct idxd_device *idxd)
{
struct pci_dev *pdev = idxd->pdev;
struct device *dev = &pdev->dev;
- struct msix_entry *msix;
struct idxd_irq_entry *irq_entry;
int i, msixcnt;
int rc = 0;
- union msix_perm mperm;
msixcnt = pci_msix_vec_count(pdev);
if (msixcnt < 0) {
dev_err(dev, "Not MSI-X interrupt capable.\n");
- goto err_no_irq;
+ return -ENOSPC;
}
- idxd->msix_entries = devm_kzalloc(dev, sizeof(struct msix_entry) *
- msixcnt, GFP_KERNEL);
- if (!idxd->msix_entries) {
- rc = -ENOMEM;
- goto err_no_irq;
- }
-
- for (i = 0; i < msixcnt; i++)
- idxd->msix_entries[i].entry = i;
-
- rc = pci_enable_msix_exact(pdev, idxd->msix_entries, msixcnt);
- if (rc) {
- dev_err(dev, "Failed enabling %d MSIX entries.\n", msixcnt);
- goto err_no_irq;
+ rc = pci_alloc_irq_vectors(pdev, msixcnt, msixcnt, PCI_IRQ_MSIX);
+ if (rc != msixcnt) {
+ dev_err(dev, "Failed enabling %d MSIX entries: %d\n", msixcnt, rc);
+ return -ENOSPC;
}
dev_dbg(dev, "Enabled %d msix vectors\n", msixcnt);
@@ -94,126 +88,266 @@ static int idxd_setup_interrupts(struct idxd_device *idxd)
* We implement 1 completion list per MSI-X entry except for
* entry 0, which is for errors and others.
*/
- idxd->irq_entries = devm_kcalloc(dev, msixcnt,
- sizeof(struct idxd_irq_entry),
- GFP_KERNEL);
+ idxd->irq_entries = kcalloc_node(msixcnt, sizeof(struct idxd_irq_entry),
+ GFP_KERNEL, dev_to_node(dev));
if (!idxd->irq_entries) {
rc = -ENOMEM;
- goto err_no_irq;
+ goto err_irq_entries;
}
for (i = 0; i < msixcnt; i++) {
idxd->irq_entries[i].id = i;
idxd->irq_entries[i].idxd = idxd;
+ idxd->irq_entries[i].vector = pci_irq_vector(pdev, i);
spin_lock_init(&idxd->irq_entries[i].list_lock);
}
- msix = &idxd->msix_entries[0];
irq_entry = &idxd->irq_entries[0];
- rc = devm_request_threaded_irq(dev, msix->vector, idxd_irq_handler,
- idxd_misc_thread, 0, "idxd-misc",
- irq_entry);
+ rc = request_threaded_irq(irq_entry->vector, NULL, idxd_misc_thread,
+ 0, "idxd-misc", irq_entry);
if (rc < 0) {
dev_err(dev, "Failed to allocate misc interrupt.\n");
- goto err_no_irq;
+ goto err_misc_irq;
}
- dev_dbg(dev, "Allocated idxd-misc handler on msix vector %d\n",
- msix->vector);
+ dev_dbg(dev, "Allocated idxd-misc handler on msix vector %d\n", irq_entry->vector);
/* first MSI-X entry is not for wq interrupts */
idxd->num_wq_irqs = msixcnt - 1;
for (i = 1; i < msixcnt; i++) {
- msix = &idxd->msix_entries[i];
irq_entry = &idxd->irq_entries[i];
init_llist_head(&idxd->irq_entries[i].pending_llist);
INIT_LIST_HEAD(&idxd->irq_entries[i].work_list);
- rc = devm_request_threaded_irq(dev, msix->vector,
- idxd_irq_handler,
- idxd_wq_thread, 0,
- "idxd-portal", irq_entry);
+ rc = request_threaded_irq(irq_entry->vector, NULL,
+ idxd_wq_thread, 0, "idxd-portal", irq_entry);
if (rc < 0) {
- dev_err(dev, "Failed to allocate irq %d.\n",
- msix->vector);
- goto err_no_irq;
+ dev_err(dev, "Failed to allocate irq %d.\n", irq_entry->vector);
+ goto err_wq_irqs;
+ }
+
+ dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n", i, irq_entry->vector);
+ if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) {
+ /*
+ * The MSIX vector enumeration starts at 1 with vector 0 being the
+ * misc interrupt that handles non I/O completion events. The
+ * interrupt handles are for IMS enumeration on guest. The misc
+ * interrupt vector does not require a handle and therefore we start
+ * the int_handles at index 0. Since 'i' starts at 1, the first
+ * int_handles index will be 0.
+ */
+ rc = idxd_device_request_int_handle(idxd, i, &idxd->int_handles[i - 1],
+ IDXD_IRQ_MSIX);
+ if (rc < 0) {
+ free_irq(irq_entry->vector, irq_entry);
+ goto err_wq_irqs;
+ }
+ dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i - 1]);
}
- dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n",
- i, msix->vector);
}
idxd_unmask_error_interrupts(idxd);
-
- /* Setup MSIX permission table */
- mperm.bits = 0;
- mperm.pasid = idxd->pasid;
- mperm.pasid_en = device_pasid_enabled(idxd);
- for (i = 1; i < msixcnt; i++)
- iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8);
-
+ idxd_msix_perm_setup(idxd);
return 0;
- err_no_irq:
+ err_wq_irqs:
+ while (--i >= 0) {
+ irq_entry = &idxd->irq_entries[i];
+ free_irq(irq_entry->vector, irq_entry);
+ if (i != 0)
+ idxd_device_release_int_handle(idxd,
+ idxd->int_handles[i], IDXD_IRQ_MSIX);
+ }
+ err_misc_irq:
/* Disable error interrupt generation */
idxd_mask_error_interrupts(idxd);
- pci_disable_msix(pdev);
+ err_irq_entries:
+ pci_free_irq_vectors(pdev);
dev_err(dev, "No usable interrupts\n");
return rc;
}
-static int idxd_setup_internals(struct idxd_device *idxd)
+static int idxd_setup_wqs(struct idxd_device *idxd)
{
struct device *dev = &idxd->pdev->dev;
- int i;
-
- init_waitqueue_head(&idxd->cmd_waitq);
- idxd->groups = devm_kcalloc(dev, idxd->max_groups,
- sizeof(struct idxd_group), GFP_KERNEL);
- if (!idxd->groups)
- return -ENOMEM;
+ struct idxd_wq *wq;
+ int i, rc;
- for (i = 0; i < idxd->max_groups; i++) {
- idxd->groups[i].idxd = idxd;
- idxd->groups[i].id = i;
- idxd->groups[i].tc_a = -1;
- idxd->groups[i].tc_b = -1;
- }
-
- idxd->wqs = devm_kcalloc(dev, idxd->max_wqs, sizeof(struct idxd_wq),
- GFP_KERNEL);
+ idxd->wqs = kcalloc_node(idxd->max_wqs, sizeof(struct idxd_wq *),
+ GFP_KERNEL, dev_to_node(dev));
if (!idxd->wqs)
return -ENOMEM;
- idxd->engines = devm_kcalloc(dev, idxd->max_engines,
- sizeof(struct idxd_engine), GFP_KERNEL);
- if (!idxd->engines)
- return -ENOMEM;
-
for (i = 0; i < idxd->max_wqs; i++) {
- struct idxd_wq *wq = &idxd->wqs[i];
+ wq = kzalloc_node(sizeof(*wq), GFP_KERNEL, dev_to_node(dev));
+ if (!wq) {
+ rc = -ENOMEM;
+ goto err;
+ }
wq->id = i;
wq->idxd = idxd;
+ device_initialize(&wq->conf_dev);
+ wq->conf_dev.parent = &idxd->conf_dev;
+ wq->conf_dev.bus = &dsa_bus_type;
+ wq->conf_dev.type = &idxd_wq_device_type;
+ rc = dev_set_name(&wq->conf_dev, "wq%d.%d", idxd->id, wq->id);
+ if (rc < 0) {
+ put_device(&wq->conf_dev);
+ goto err;
+ }
+
mutex_init(&wq->wq_lock);
- wq->idxd_cdev.minor = -1;
+ init_waitqueue_head(&wq->err_queue);
+ init_completion(&wq->wq_dead);
wq->max_xfer_bytes = idxd->max_xfer_bytes;
wq->max_batch_size = idxd->max_batch_size;
- wq->wqcfg = devm_kzalloc(dev, idxd->wqcfg_size, GFP_KERNEL);
- if (!wq->wqcfg)
- return -ENOMEM;
+ wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev));
+ if (!wq->wqcfg) {
+ put_device(&wq->conf_dev);
+ rc = -ENOMEM;
+ goto err;
+ }
+ idxd->wqs[i] = wq;
}
+ return 0;
+
+ err:
+ while (--i >= 0)
+ put_device(&idxd->wqs[i]->conf_dev);
+ return rc;
+}
+
+static int idxd_setup_engines(struct idxd_device *idxd)
+{
+ struct idxd_engine *engine;
+ struct device *dev = &idxd->pdev->dev;
+ int i, rc;
+
+ idxd->engines = kcalloc_node(idxd->max_engines, sizeof(struct idxd_engine *),
+ GFP_KERNEL, dev_to_node(dev));
+ if (!idxd->engines)
+ return -ENOMEM;
+
for (i = 0; i < idxd->max_engines; i++) {
- idxd->engines[i].idxd = idxd;
- idxd->engines[i].id = i;
+ engine = kzalloc_node(sizeof(*engine), GFP_KERNEL, dev_to_node(dev));
+ if (!engine) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ engine->id = i;
+ engine->idxd = idxd;
+ device_initialize(&engine->conf_dev);
+ engine->conf_dev.parent = &idxd->conf_dev;
+ engine->conf_dev.type = &idxd_engine_device_type;
+ rc = dev_set_name(&engine->conf_dev, "engine%d.%d", idxd->id, engine->id);
+ if (rc < 0) {
+ put_device(&engine->conf_dev);
+ goto err;
+ }
+
+ idxd->engines[i] = engine;
}
- idxd->wq = create_workqueue(dev_name(dev));
- if (!idxd->wq)
+ return 0;
+
+ err:
+ while (--i >= 0)
+ put_device(&idxd->engines[i]->conf_dev);
+ return rc;
+}
+
+static int idxd_setup_groups(struct idxd_device *idxd)
+{
+ struct device *dev = &idxd->pdev->dev;
+ struct idxd_group *group;
+ int i, rc;
+
+ idxd->groups = kcalloc_node(idxd->max_groups, sizeof(struct idxd_group *),
+ GFP_KERNEL, dev_to_node(dev));
+ if (!idxd->groups)
return -ENOMEM;
+ for (i = 0; i < idxd->max_groups; i++) {
+ group = kzalloc_node(sizeof(*group), GFP_KERNEL, dev_to_node(dev));
+ if (!group) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ group->id = i;
+ group->idxd = idxd;
+ device_initialize(&group->conf_dev);
+ group->conf_dev.parent = &idxd->conf_dev;
+ group->conf_dev.bus = &dsa_bus_type;
+ group->conf_dev.type = &idxd_group_device_type;
+ rc = dev_set_name(&group->conf_dev, "group%d.%d", idxd->id, group->id);
+ if (rc < 0) {
+ put_device(&group->conf_dev);
+ goto err;
+ }
+
+ idxd->groups[i] = group;
+ group->tc_a = -1;
+ group->tc_b = -1;
+ }
+
+ return 0;
+
+ err:
+ while (--i >= 0)
+ put_device(&idxd->groups[i]->conf_dev);
+ return rc;
+}
+
+static int idxd_setup_internals(struct idxd_device *idxd)
+{
+ struct device *dev = &idxd->pdev->dev;
+ int rc, i;
+
+ init_waitqueue_head(&idxd->cmd_waitq);
+
+ if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) {
+ idxd->int_handles = devm_kcalloc(dev, idxd->max_wqs, sizeof(int), GFP_KERNEL);
+ if (!idxd->int_handles)
+ return -ENOMEM;
+ }
+
+ rc = idxd_setup_wqs(idxd);
+ if (rc < 0)
+ goto err_wqs;
+
+ rc = idxd_setup_engines(idxd);
+ if (rc < 0)
+ goto err_engine;
+
+ rc = idxd_setup_groups(idxd);
+ if (rc < 0)
+ goto err_group;
+
+ idxd->wq = create_workqueue(dev_name(dev));
+ if (!idxd->wq) {
+ rc = -ENOMEM;
+ goto err_wkq_create;
+ }
+
return 0;
+
+ err_wkq_create:
+ for (i = 0; i < idxd->max_groups; i++)
+ put_device(&idxd->groups[i]->conf_dev);
+ err_group:
+ for (i = 0; i < idxd->max_engines; i++)
+ put_device(&idxd->engines[i]->conf_dev);
+ err_engine:
+ for (i = 0; i < idxd->max_wqs; i++)
+ put_device(&idxd->wqs[i]->conf_dev);
+ err_wqs:
+ kfree(idxd->int_handles);
+ return rc;
}
static void idxd_read_table_offsets(struct idxd_device *idxd)
@@ -241,6 +375,12 @@ static void idxd_read_caps(struct idxd_device *idxd)
/* reading generic capabilities */
idxd->hw.gen_cap.bits = ioread64(idxd->reg_base + IDXD_GENCAP_OFFSET);
dev_dbg(dev, "gen_cap: %#llx\n", idxd->hw.gen_cap.bits);
+
+ if (idxd->hw.gen_cap.cmd_cap) {
+ idxd->hw.cmd_cap = ioread32(idxd->reg_base + IDXD_CMDCAP_OFFSET);
+ dev_dbg(dev, "cmd_cap: %#x\n", idxd->hw.cmd_cap);
+ }
+
idxd->max_xfer_bytes = 1ULL << idxd->hw.gen_cap.max_xfer_shift;
dev_dbg(dev, "max xfer size: %llu bytes\n", idxd->max_xfer_bytes);
idxd->max_batch_size = 1U << idxd->hw.gen_cap.max_batch_shift;
@@ -283,17 +423,34 @@ static void idxd_read_caps(struct idxd_device *idxd)
}
}
-static struct idxd_device *idxd_alloc(struct pci_dev *pdev)
+static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_data *data)
{
struct device *dev = &pdev->dev;
struct idxd_device *idxd;
+ int rc;
- idxd = devm_kzalloc(dev, sizeof(struct idxd_device), GFP_KERNEL);
+ idxd = kzalloc_node(sizeof(*idxd), GFP_KERNEL, dev_to_node(dev));
if (!idxd)
return NULL;
idxd->pdev = pdev;
+ idxd->data = data;
+ idxd->id = ida_alloc(&idxd_ida, GFP_KERNEL);
+ if (idxd->id < 0)
+ return NULL;
+
+ device_initialize(&idxd->conf_dev);
+ idxd->conf_dev.parent = dev;
+ idxd->conf_dev.bus = &dsa_bus_type;
+ idxd->conf_dev.type = idxd->data->dev_type;
+ rc = dev_set_name(&idxd->conf_dev, "%s%d", idxd->data->name_prefix, idxd->id);
+ if (rc < 0) {
+ put_device(&idxd->conf_dev);
+ return NULL;
+ }
+
spin_lock_init(&idxd->dev_lock);
+ spin_lock_init(&idxd->cmd_lock);
return idxd;
}
@@ -346,11 +503,18 @@ static int idxd_probe(struct idxd_device *idxd)
dev_dbg(dev, "IDXD reset complete\n");
if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM) && sva) {
- rc = idxd_enable_system_pasid(idxd);
- if (rc < 0)
- dev_warn(dev, "Failed to enable PASID. No SVA support: %d\n", rc);
- else
- set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags);
+ rc = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA);
+ if (rc == 0) {
+ rc = idxd_enable_system_pasid(idxd);
+ if (rc < 0) {
+ iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA);
+ dev_warn(dev, "Failed to enable PASID. No SVA support: %d\n", rc);
+ } else {
+ set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags);
+ }
+ } else {
+ dev_warn(dev, "Unable to turn on SVA feature.\n");
+ }
} else if (!sva) {
dev_warn(dev, "User forced SVA off via module param.\n");
}
@@ -360,80 +524,75 @@ static int idxd_probe(struct idxd_device *idxd)
rc = idxd_setup_internals(idxd);
if (rc)
- goto err_setup;
+ goto err;
+
+ /* If the configs are readonly, then load them from device */
+ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) {
+ dev_dbg(dev, "Loading RO device config\n");
+ rc = idxd_device_load_config(idxd);
+ if (rc < 0)
+ goto err;
+ }
rc = idxd_setup_interrupts(idxd);
if (rc)
- goto err_setup;
+ goto err;
dev_dbg(dev, "IDXD interrupt setup complete.\n");
- mutex_lock(&idxd_idr_lock);
- idxd->id = idr_alloc(&idxd_idrs[idxd->type], idxd, 0, 0, GFP_KERNEL);
- mutex_unlock(&idxd_idr_lock);
- if (idxd->id < 0) {
- rc = -ENOMEM;
- goto err_idr_fail;
- }
-
idxd->major = idxd_cdev_get_major(idxd);
+ rc = perfmon_pmu_init(idxd);
+ if (rc < 0)
+ dev_warn(dev, "Failed to initialize perfmon. No PMU support: %d\n", rc);
+
dev_dbg(dev, "IDXD device %d probed successfully\n", idxd->id);
return 0;
- err_idr_fail:
- idxd_mask_error_interrupts(idxd);
- idxd_mask_msix_vectors(idxd);
- err_setup:
+ err:
if (device_pasid_enabled(idxd))
idxd_disable_system_pasid(idxd);
+ iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA);
return rc;
}
-static void idxd_type_init(struct idxd_device *idxd)
-{
- if (idxd->type == IDXD_TYPE_DSA)
- idxd->compl_size = sizeof(struct dsa_completion_record);
- else if (idxd->type == IDXD_TYPE_IAX)
- idxd->compl_size = sizeof(struct iax_completion_record);
-}
-
static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct device *dev = &pdev->dev;
struct idxd_device *idxd;
+ struct idxd_driver_data *data = (struct idxd_driver_data *)id->driver_data;
int rc;
- rc = pcim_enable_device(pdev);
+ rc = pci_enable_device(pdev);
if (rc)
return rc;
dev_dbg(dev, "Alloc IDXD context\n");
- idxd = idxd_alloc(pdev);
- if (!idxd)
- return -ENOMEM;
+ idxd = idxd_alloc(pdev, data);
+ if (!idxd) {
+ rc = -ENOMEM;
+ goto err_idxd_alloc;
+ }
dev_dbg(dev, "Mapping BARs\n");
- idxd->reg_base = pcim_iomap(pdev, IDXD_MMIO_BAR, 0);
- if (!idxd->reg_base)
- return -ENOMEM;
+ idxd->reg_base = pci_iomap(pdev, IDXD_MMIO_BAR, 0);
+ if (!idxd->reg_base) {
+ rc = -ENOMEM;
+ goto err_iomap;
+ }
dev_dbg(dev, "Set DMA masks\n");
rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
if (rc)
rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (rc)
- return rc;
+ goto err;
rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
if (rc)
rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
if (rc)
- return rc;
-
- idxd_set_type(idxd);
-
- idxd_type_init(idxd);
+ goto err;
dev_dbg(dev, "Set PCI master\n");
pci_set_master(pdev);
@@ -443,13 +602,13 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
rc = idxd_probe(idxd);
if (rc) {
dev_err(dev, "Intel(R) IDXD DMA Engine init failed\n");
- return -ENODEV;
+ goto err;
}
- rc = idxd_setup_sysfs(idxd);
+ rc = idxd_register_devices(idxd);
if (rc) {
dev_err(dev, "IDXD sysfs setup failed\n");
- return -ENODEV;
+ goto err;
}
idxd->state = IDXD_DEV_CONF_READY;
@@ -458,6 +617,14 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
idxd->hw.version);
return 0;
+
+ err:
+ pci_iounmap(pdev, idxd->reg_base);
+ err_iomap:
+ put_device(&idxd->conf_dev);
+ err_idxd_alloc:
+ pci_disable_device(pdev);
+ return rc;
}
static void idxd_flush_pending_llist(struct idxd_irq_entry *ie)
@@ -486,6 +653,36 @@ static void idxd_flush_work_list(struct idxd_irq_entry *ie)
}
}
+void idxd_wqs_quiesce(struct idxd_device *idxd)
+{
+ struct idxd_wq *wq;
+ int i;
+
+ for (i = 0; i < idxd->max_wqs; i++) {
+ wq = idxd->wqs[i];
+ if (wq->state == IDXD_WQ_ENABLED && wq->type == IDXD_WQT_KERNEL)
+ idxd_wq_quiesce(wq);
+ }
+}
+
+static void idxd_release_int_handles(struct idxd_device *idxd)
+{
+ struct device *dev = &idxd->pdev->dev;
+ int i, rc;
+
+ for (i = 0; i < idxd->num_wq_irqs; i++) {
+ if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)) {
+ rc = idxd_device_release_int_handle(idxd, idxd->int_handles[i],
+ IDXD_IRQ_MSIX);
+ if (rc < 0)
+ dev_warn(dev, "irq handle %d release failed\n",
+ idxd->int_handles[i]);
+ else
+ dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i]);
+ }
+ }
+}
+
static void idxd_shutdown(struct pci_dev *pdev)
{
struct idxd_device *idxd = pci_get_drvdata(pdev);
@@ -503,13 +700,19 @@ static void idxd_shutdown(struct pci_dev *pdev)
for (i = 0; i < msixcnt; i++) {
irq_entry = &idxd->irq_entries[i];
- synchronize_irq(idxd->msix_entries[i].vector);
+ synchronize_irq(irq_entry->vector);
+ free_irq(irq_entry->vector, irq_entry);
if (i == 0)
continue;
idxd_flush_pending_llist(irq_entry);
idxd_flush_work_list(irq_entry);
}
+ idxd_msix_perm_clear(idxd);
+ idxd_release_int_handles(idxd);
+ pci_free_irq_vectors(pdev);
+ pci_iounmap(pdev, idxd->reg_base);
+ pci_disable_device(pdev);
destroy_workqueue(idxd->wq);
}
@@ -518,13 +721,12 @@ static void idxd_remove(struct pci_dev *pdev)
struct idxd_device *idxd = pci_get_drvdata(pdev);
dev_dbg(&pdev->dev, "%s called\n", __func__);
- idxd_cleanup_sysfs(idxd);
idxd_shutdown(pdev);
if (device_pasid_enabled(idxd))
idxd_disable_system_pasid(idxd);
- mutex_lock(&idxd_idr_lock);
- idr_remove(&idxd_idrs[idxd->type], idxd->id);
- mutex_unlock(&idxd_idr_lock);
+ idxd_unregister_devices(idxd);
+ perfmon_pmu_remove(idxd);
+ iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
}
static struct pci_driver idxd_pci_driver = {
@@ -537,7 +739,7 @@ static struct pci_driver idxd_pci_driver = {
static int __init idxd_init_module(void)
{
- int err, i;
+ int err;
/*
* If the CPU does not support MOVDIR64B or ENQCMDS, there's no point in
@@ -553,8 +755,7 @@ static int __init idxd_init_module(void)
else
support_enqcmd = true;
- for (i = 0; i < IDXD_TYPE_MAX; i++)
- idr_init(&idxd_idrs[i]);
+ perfmon_init();
err = idxd_register_bus_type();
if (err < 0)
@@ -589,5 +790,6 @@ static void __exit idxd_exit_module(void)
pci_unregister_driver(&idxd_pci_driver);
idxd_cdev_remove();
idxd_unregister_bus_type();
+ perfmon_exit();
}
module_exit(idxd_exit_module);
diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c
index a60ca11a5784..ae68e1e5487a 100644
--- a/drivers/dma/idxd/irq.c
+++ b/drivers/dma/idxd/irq.c
@@ -45,7 +45,7 @@ static void idxd_device_reinit(struct work_struct *work)
goto out;
for (i = 0; i < idxd->max_wqs; i++) {
- struct idxd_wq *wq = &idxd->wqs[i];
+ struct idxd_wq *wq = idxd->wqs[i];
if (wq->state == IDXD_WQ_ENABLED) {
rc = idxd_wq_enable(wq);
@@ -102,15 +102,6 @@ static int idxd_device_schedule_fault_process(struct idxd_device *idxd,
return 0;
}
-irqreturn_t idxd_irq_handler(int vec, void *data)
-{
- struct idxd_irq_entry *irq_entry = data;
- struct idxd_device *idxd = irq_entry->idxd;
-
- idxd_mask_msix_vector(idxd, irq_entry->id);
- return IRQ_WAKE_THREAD;
-}
-
static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
{
struct device *dev = &idxd->pdev->dev;
@@ -124,22 +115,24 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
for (i = 0; i < 4; i++)
idxd->sw_err.bits[i] = ioread64(idxd->reg_base +
IDXD_SWERR_OFFSET + i * sizeof(u64));
- iowrite64(IDXD_SWERR_ACK, idxd->reg_base + IDXD_SWERR_OFFSET);
+
+ iowrite64(idxd->sw_err.bits[0] & IDXD_SWERR_ACK,
+ idxd->reg_base + IDXD_SWERR_OFFSET);
if (idxd->sw_err.valid && idxd->sw_err.wq_idx_valid) {
int id = idxd->sw_err.wq_idx;
- struct idxd_wq *wq = &idxd->wqs[id];
+ struct idxd_wq *wq = idxd->wqs[id];
if (wq->type == IDXD_WQT_USER)
- wake_up_interruptible(&wq->idxd_cdev.err_queue);
+ wake_up_interruptible(&wq->err_queue);
} else {
int i;
for (i = 0; i < idxd->max_wqs; i++) {
- struct idxd_wq *wq = &idxd->wqs[i];
+ struct idxd_wq *wq = idxd->wqs[i];
if (wq->type == IDXD_WQT_USER)
- wake_up_interruptible(&wq->idxd_cdev.err_queue);
+ wake_up_interruptible(&wq->err_queue);
}
}
@@ -163,11 +156,8 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
}
if (cause & IDXD_INTC_PERFMON_OVFL) {
- /*
- * Driver does not utilize perfmon counter overflow interrupt
- * yet.
- */
val |= IDXD_INTC_PERFMON_OVFL;
+ perfmon_counter_overflow(idxd);
}
val ^= cause;
@@ -200,6 +190,8 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
queue_work(idxd->wq, &idxd->work);
} else {
spin_lock_bh(&idxd->dev_lock);
+ idxd_wqs_quiesce(idxd);
+ idxd_wqs_unmap_portal(idxd);
idxd_device_wqs_clear_state(idxd);
dev_err(&idxd->pdev->dev,
"idxd halted, need %s.\n",
@@ -233,7 +225,6 @@ irqreturn_t idxd_misc_thread(int vec, void *data)
iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET);
}
- idxd_unmask_msix_vector(idxd, irq_entry->id);
return IRQ_HANDLED;
}
@@ -390,8 +381,6 @@ irqreturn_t idxd_wq_thread(int irq, void *data)
int processed;
processed = idxd_desc_process(irq_entry);
- idxd_unmask_msix_vector(irq_entry->idxd, irq_entry->id);
-
if (processed == 0)
return IRQ_NONE;
diff --git a/drivers/dma/idxd/perfmon.c b/drivers/dma/idxd/perfmon.c
new file mode 100644
index 000000000000..d73004f47cf4
--- /dev/null
+++ b/drivers/dma/idxd/perfmon.c
@@ -0,0 +1,662 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2020 Intel Corporation. All rights rsvd. */
+
+#include <linux/sched/task.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include "idxd.h"
+#include "perfmon.h"
+
+static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
+ char *buf);
+
+static cpumask_t perfmon_dsa_cpu_mask;
+static bool cpuhp_set_up;
+static enum cpuhp_state cpuhp_slot;
+
+/*
+ * perf userspace reads this attribute to determine which cpus to open
+ * counters on. It's connected to perfmon_dsa_cpu_mask, which is
+ * maintained by the cpu hotplug handlers.
+ */
+static DEVICE_ATTR_RO(cpumask);
+
+static struct attribute *perfmon_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static struct attribute_group cpumask_attr_group = {
+ .attrs = perfmon_cpumask_attrs,
+};
+
+/*
+ * These attributes specify the bits in the config word that the perf
+ * syscall uses to pass the event ids and categories to perfmon.
+ */
+DEFINE_PERFMON_FORMAT_ATTR(event_category, "config:0-3");
+DEFINE_PERFMON_FORMAT_ATTR(event, "config:4-31");
+
+/*
+ * These attributes specify the bits in the config1 word that the perf
+ * syscall uses to pass filter data to perfmon.
+ */
+DEFINE_PERFMON_FORMAT_ATTR(filter_wq, "config1:0-31");
+DEFINE_PERFMON_FORMAT_ATTR(filter_tc, "config1:32-39");
+DEFINE_PERFMON_FORMAT_ATTR(filter_pgsz, "config1:40-43");
+DEFINE_PERFMON_FORMAT_ATTR(filter_sz, "config1:44-51");
+DEFINE_PERFMON_FORMAT_ATTR(filter_eng, "config1:52-59");
+
+#define PERFMON_FILTERS_START 2
+#define PERFMON_FILTERS_MAX 5
+
+static struct attribute *perfmon_format_attrs[] = {
+ &format_attr_idxd_event_category.attr,
+ &format_attr_idxd_event.attr,
+ &format_attr_idxd_filter_wq.attr,
+ &format_attr_idxd_filter_tc.attr,
+ &format_attr_idxd_filter_pgsz.attr,
+ &format_attr_idxd_filter_sz.attr,
+ &format_attr_idxd_filter_eng.attr,
+ NULL,
+};
+
+static struct attribute_group perfmon_format_attr_group = {
+ .name = "format",
+ .attrs = perfmon_format_attrs,
+};
+
+static const struct attribute_group *perfmon_attr_groups[] = {
+ &perfmon_format_attr_group,
+ &cpumask_attr_group,
+ NULL,
+};
+
+static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ return cpumap_print_to_pagebuf(true, buf, &perfmon_dsa_cpu_mask);
+}
+
+static bool is_idxd_event(struct idxd_pmu *idxd_pmu, struct perf_event *event)
+{
+ return &idxd_pmu->pmu == event->pmu;
+}
+
+static int perfmon_collect_events(struct idxd_pmu *idxd_pmu,
+ struct perf_event *leader,
+ bool do_grp)
+{
+ struct perf_event *event;
+ int n, max_count;
+
+ max_count = idxd_pmu->n_counters;
+ n = idxd_pmu->n_events;
+
+ if (n >= max_count)
+ return -EINVAL;
+
+ if (is_idxd_event(idxd_pmu, leader)) {
+ idxd_pmu->event_list[n] = leader;
+ idxd_pmu->event_list[n]->hw.idx = n;
+ n++;
+ }
+
+ if (!do_grp)
+ return n;
+
+ for_each_sibling_event(event, leader) {
+ if (!is_idxd_event(idxd_pmu, event) ||
+ event->state <= PERF_EVENT_STATE_OFF)
+ continue;
+
+ if (n >= max_count)
+ return -EINVAL;
+
+ idxd_pmu->event_list[n] = event;
+ idxd_pmu->event_list[n]->hw.idx = n;
+ n++;
+ }
+
+ return n;
+}
+
+static void perfmon_assign_hw_event(struct idxd_pmu *idxd_pmu,
+ struct perf_event *event, int idx)
+{
+ struct idxd_device *idxd = idxd_pmu->idxd;
+ struct hw_perf_event *hwc = &event->hw;
+
+ hwc->idx = idx;
+ hwc->config_base = ioread64(CNTRCFG_REG(idxd, idx));
+ hwc->event_base = ioread64(CNTRCFG_REG(idxd, idx));
+}
+
+static int perfmon_assign_event(struct idxd_pmu *idxd_pmu,
+ struct perf_event *event)
+{
+ int i;
+
+ for (i = 0; i < IDXD_PMU_EVENT_MAX; i++)
+ if (!test_and_set_bit(i, idxd_pmu->used_mask))
+ return i;
+
+ return -EINVAL;
+}
+
+/*
+ * Check whether there are enough counters to satisfy that all the
+ * events in the group can actually be scheduled at the same time.
+ *
+ * To do this, create a fake idxd_pmu object so the event collection
+ * and assignment functions can be used without affecting the internal
+ * state of the real idxd_pmu object.
+ */
+static int perfmon_validate_group(struct idxd_pmu *pmu,
+ struct perf_event *event)
+{
+ struct perf_event *leader = event->group_leader;
+ struct idxd_pmu *fake_pmu;
+ int i, ret = 0, n, idx;
+
+ fake_pmu = kzalloc(sizeof(*fake_pmu), GFP_KERNEL);
+ if (!fake_pmu)
+ return -ENOMEM;
+
+ fake_pmu->pmu.name = pmu->pmu.name;
+ fake_pmu->n_counters = pmu->n_counters;
+
+ n = perfmon_collect_events(fake_pmu, leader, true);
+ if (n < 0) {
+ ret = n;
+ goto out;
+ }
+
+ fake_pmu->n_events = n;
+ n = perfmon_collect_events(fake_pmu, event, false);
+ if (n < 0) {
+ ret = n;
+ goto out;
+ }
+
+ fake_pmu->n_events = n;
+
+ for (i = 0; i < n; i++) {
+ event = fake_pmu->event_list[i];
+
+ idx = perfmon_assign_event(fake_pmu, event);
+ if (idx < 0) {
+ ret = idx;
+ goto out;
+ }
+ }
+out:
+ kfree(fake_pmu);
+
+ return ret;
+}
+
+static int perfmon_pmu_event_init(struct perf_event *event)
+{
+ struct idxd_device *idxd;
+ int ret = 0;
+
+ idxd = event_to_idxd(event);
+ event->hw.idx = -1;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* sampling not supported */
+ if (event->attr.sample_period)
+ return -EINVAL;
+
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ if (event->pmu != &idxd->idxd_pmu->pmu)
+ return -EINVAL;
+
+ event->hw.event_base = ioread64(PERFMON_TABLE_OFFSET(idxd));
+ event->cpu = idxd->idxd_pmu->cpu;
+ event->hw.config = event->attr.config;
+
+ if (event->group_leader != event)
+ /* non-group events have themselves as leader */
+ ret = perfmon_validate_group(idxd->idxd_pmu, event);
+
+ return ret;
+}
+
+static inline u64 perfmon_pmu_read_counter(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct idxd_device *idxd;
+ int cntr = hwc->idx;
+
+ idxd = event_to_idxd(event);
+
+ return ioread64(CNTRDATA_REG(idxd, cntr));
+}
+
+static void perfmon_pmu_event_update(struct perf_event *event)
+{
+ struct idxd_device *idxd = event_to_idxd(event);
+ u64 prev_raw_count, new_raw_count, delta, p, n;
+ int shift = 64 - idxd->idxd_pmu->counter_width;
+ struct hw_perf_event *hwc = &event->hw;
+
+ do {
+ prev_raw_count = local64_read(&hwc->prev_count);
+ new_raw_count = perfmon_pmu_read_counter(event);
+ } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ new_raw_count) != prev_raw_count);
+
+ n = (new_raw_count << shift);
+ p = (prev_raw_count << shift);
+
+ delta = ((n - p) >> shift);
+
+ local64_add(delta, &event->count);
+}
+
+void perfmon_counter_overflow(struct idxd_device *idxd)
+{
+ int i, n_counters, max_loop = OVERFLOW_SIZE;
+ struct perf_event *event;
+ unsigned long ovfstatus;
+
+ n_counters = min(idxd->idxd_pmu->n_counters, OVERFLOW_SIZE);
+
+ ovfstatus = ioread32(OVFSTATUS_REG(idxd));
+
+ /*
+ * While updating overflowed counters, other counters behind
+ * them could overflow and be missed in a given pass.
+ * Normally this could happen at most n_counters times, but in
+ * theory a tiny counter width could result in continual
+ * overflows and endless looping. max_loop provides a
+ * failsafe in that highly unlikely case.
+ */
+ while (ovfstatus && max_loop--) {
+ /* Figure out which counter(s) overflowed */
+ for_each_set_bit(i, &ovfstatus, n_counters) {
+ unsigned long ovfstatus_clear = 0;
+
+ /* Update event->count for overflowed counter */
+ event = idxd->idxd_pmu->event_list[i];
+ perfmon_pmu_event_update(event);
+ /* Writing 1 to OVFSTATUS bit clears it */
+ set_bit(i, &ovfstatus_clear);
+ iowrite32(ovfstatus_clear, OVFSTATUS_REG(idxd));
+ }
+
+ ovfstatus = ioread32(OVFSTATUS_REG(idxd));
+ }
+
+ /*
+ * Should never happen. If so, it means a counter(s) looped
+ * around twice while this handler was running.
+ */
+ WARN_ON_ONCE(ovfstatus);
+}
+
+static inline void perfmon_reset_config(struct idxd_device *idxd)
+{
+ iowrite32(CONFIG_RESET, PERFRST_REG(idxd));
+ iowrite32(0, OVFSTATUS_REG(idxd));
+ iowrite32(0, PERFFRZ_REG(idxd));
+}
+
+static inline void perfmon_reset_counters(struct idxd_device *idxd)
+{
+ iowrite32(CNTR_RESET, PERFRST_REG(idxd));
+}
+
+static inline void perfmon_reset(struct idxd_device *idxd)
+{
+ perfmon_reset_config(idxd);
+ perfmon_reset_counters(idxd);
+}
+
+static void perfmon_pmu_event_start(struct perf_event *event, int mode)
+{
+ u32 flt_wq, flt_tc, flt_pg_sz, flt_xfer_sz, flt_eng = 0;
+ u64 cntr_cfg, cntrdata, event_enc, event_cat = 0;
+ struct hw_perf_event *hwc = &event->hw;
+ union filter_cfg flt_cfg;
+ union event_cfg event_cfg;
+ struct idxd_device *idxd;
+ int cntr;
+
+ idxd = event_to_idxd(event);
+
+ event->hw.idx = hwc->idx;
+ cntr = hwc->idx;
+
+ /* Obtain event category and event value from user space */
+ event_cfg.val = event->attr.config;
+ flt_cfg.val = event->attr.config1;
+ event_cat = event_cfg.event_cat;
+ event_enc = event_cfg.event_enc;
+
+ /* Obtain filter configuration from user space */
+ flt_wq = flt_cfg.wq;
+ flt_tc = flt_cfg.tc;
+ flt_pg_sz = flt_cfg.pg_sz;
+ flt_xfer_sz = flt_cfg.xfer_sz;
+ flt_eng = flt_cfg.eng;
+
+ if (flt_wq && test_bit(FLT_WQ, &idxd->idxd_pmu->supported_filters))
+ iowrite32(flt_wq, FLTCFG_REG(idxd, cntr, FLT_WQ));
+ if (flt_tc && test_bit(FLT_TC, &idxd->idxd_pmu->supported_filters))
+ iowrite32(flt_tc, FLTCFG_REG(idxd, cntr, FLT_TC));
+ if (flt_pg_sz && test_bit(FLT_PG_SZ, &idxd->idxd_pmu->supported_filters))
+ iowrite32(flt_pg_sz, FLTCFG_REG(idxd, cntr, FLT_PG_SZ));
+ if (flt_xfer_sz && test_bit(FLT_XFER_SZ, &idxd->idxd_pmu->supported_filters))
+ iowrite32(flt_xfer_sz, FLTCFG_REG(idxd, cntr, FLT_XFER_SZ));
+ if (flt_eng && test_bit(FLT_ENG, &idxd->idxd_pmu->supported_filters))
+ iowrite32(flt_eng, FLTCFG_REG(idxd, cntr, FLT_ENG));
+
+ /* Read the start value */
+ cntrdata = ioread64(CNTRDATA_REG(idxd, cntr));
+ local64_set(&event->hw.prev_count, cntrdata);
+
+ /* Set counter to event/category */
+ cntr_cfg = event_cat << CNTRCFG_CATEGORY_SHIFT;
+ cntr_cfg |= event_enc << CNTRCFG_EVENT_SHIFT;
+ /* Set interrupt on overflow and counter enable bits */
+ cntr_cfg |= (CNTRCFG_IRQ_OVERFLOW | CNTRCFG_ENABLE);
+
+ iowrite64(cntr_cfg, CNTRCFG_REG(idxd, cntr));
+}
+
+static void perfmon_pmu_event_stop(struct perf_event *event, int mode)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct idxd_device *idxd;
+ int i, cntr = hwc->idx;
+ u64 cntr_cfg;
+
+ idxd = event_to_idxd(event);
+
+ /* remove this event from event list */
+ for (i = 0; i < idxd->idxd_pmu->n_events; i++) {
+ if (event != idxd->idxd_pmu->event_list[i])
+ continue;
+
+ for (++i; i < idxd->idxd_pmu->n_events; i++)
+ idxd->idxd_pmu->event_list[i - 1] = idxd->idxd_pmu->event_list[i];
+ --idxd->idxd_pmu->n_events;
+ break;
+ }
+
+ cntr_cfg = ioread64(CNTRCFG_REG(idxd, cntr));
+ cntr_cfg &= ~CNTRCFG_ENABLE;
+ iowrite64(cntr_cfg, CNTRCFG_REG(idxd, cntr));
+
+ if (mode == PERF_EF_UPDATE)
+ perfmon_pmu_event_update(event);
+
+ event->hw.idx = -1;
+ clear_bit(cntr, idxd->idxd_pmu->used_mask);
+}
+
+static void perfmon_pmu_event_del(struct perf_event *event, int mode)
+{
+ perfmon_pmu_event_stop(event, PERF_EF_UPDATE);
+}
+
+static int perfmon_pmu_event_add(struct perf_event *event, int flags)
+{
+ struct idxd_device *idxd = event_to_idxd(event);
+ struct idxd_pmu *idxd_pmu = idxd->idxd_pmu;
+ struct hw_perf_event *hwc = &event->hw;
+ int idx, n;
+
+ n = perfmon_collect_events(idxd_pmu, event, false);
+ if (n < 0)
+ return n;
+
+ hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+ if (!(flags & PERF_EF_START))
+ hwc->state |= PERF_HES_ARCH;
+
+ idx = perfmon_assign_event(idxd_pmu, event);
+ if (idx < 0)
+ return idx;
+
+ perfmon_assign_hw_event(idxd_pmu, event, idx);
+
+ if (flags & PERF_EF_START)
+ perfmon_pmu_event_start(event, 0);
+
+ idxd_pmu->n_events = n;
+
+ return 0;
+}
+
+static void enable_perfmon_pmu(struct idxd_device *idxd)
+{
+ iowrite32(COUNTER_UNFREEZE, PERFFRZ_REG(idxd));
+}
+
+static void disable_perfmon_pmu(struct idxd_device *idxd)
+{
+ iowrite32(COUNTER_FREEZE, PERFFRZ_REG(idxd));
+}
+
+static void perfmon_pmu_enable(struct pmu *pmu)
+{
+ struct idxd_device *idxd = pmu_to_idxd(pmu);
+
+ enable_perfmon_pmu(idxd);
+}
+
+static void perfmon_pmu_disable(struct pmu *pmu)
+{
+ struct idxd_device *idxd = pmu_to_idxd(pmu);
+
+ disable_perfmon_pmu(idxd);
+}
+
+static void skip_filter(int i)
+{
+ int j;
+
+ for (j = i; j < PERFMON_FILTERS_MAX; j++)
+ perfmon_format_attrs[PERFMON_FILTERS_START + j] =
+ perfmon_format_attrs[PERFMON_FILTERS_START + j + 1];
+}
+
+static void idxd_pmu_init(struct idxd_pmu *idxd_pmu)
+{
+ int i;
+
+ for (i = 0 ; i < PERFMON_FILTERS_MAX; i++) {
+ if (!test_bit(i, &idxd_pmu->supported_filters))
+ skip_filter(i);
+ }
+
+ idxd_pmu->pmu.name = idxd_pmu->name;
+ idxd_pmu->pmu.attr_groups = perfmon_attr_groups;
+ idxd_pmu->pmu.task_ctx_nr = perf_invalid_context;
+ idxd_pmu->pmu.event_init = perfmon_pmu_event_init;
+ idxd_pmu->pmu.pmu_enable = perfmon_pmu_enable,
+ idxd_pmu->pmu.pmu_disable = perfmon_pmu_disable,
+ idxd_pmu->pmu.add = perfmon_pmu_event_add;
+ idxd_pmu->pmu.del = perfmon_pmu_event_del;
+ idxd_pmu->pmu.start = perfmon_pmu_event_start;
+ idxd_pmu->pmu.stop = perfmon_pmu_event_stop;
+ idxd_pmu->pmu.read = perfmon_pmu_event_update;
+ idxd_pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
+ idxd_pmu->pmu.module = THIS_MODULE;
+}
+
+void perfmon_pmu_remove(struct idxd_device *idxd)
+{
+ if (!idxd->idxd_pmu)
+ return;
+
+ cpuhp_state_remove_instance(cpuhp_slot, &idxd->idxd_pmu->cpuhp_node);
+ perf_pmu_unregister(&idxd->idxd_pmu->pmu);
+ kfree(idxd->idxd_pmu);
+ idxd->idxd_pmu = NULL;
+}
+
+static int perf_event_cpu_online(unsigned int cpu, struct hlist_node *node)
+{
+ struct idxd_pmu *idxd_pmu;
+
+ idxd_pmu = hlist_entry_safe(node, typeof(*idxd_pmu), cpuhp_node);
+
+ /* select the first online CPU as the designated reader */
+ if (cpumask_empty(&perfmon_dsa_cpu_mask)) {
+ cpumask_set_cpu(cpu, &perfmon_dsa_cpu_mask);
+ idxd_pmu->cpu = cpu;
+ }
+
+ return 0;
+}
+
+static int perf_event_cpu_offline(unsigned int cpu, struct hlist_node *node)
+{
+ struct idxd_pmu *idxd_pmu;
+ unsigned int target;
+
+ idxd_pmu = hlist_entry_safe(node, typeof(*idxd_pmu), cpuhp_node);
+
+ if (!cpumask_test_and_clear_cpu(cpu, &perfmon_dsa_cpu_mask))
+ return 0;
+
+ target = cpumask_any_but(cpu_online_mask, cpu);
+
+ /* migrate events if there is a valid target */
+ if (target < nr_cpu_ids)
+ cpumask_set_cpu(target, &perfmon_dsa_cpu_mask);
+ else
+ target = -1;
+
+ perf_pmu_migrate_context(&idxd_pmu->pmu, cpu, target);
+
+ return 0;
+}
+
+int perfmon_pmu_init(struct idxd_device *idxd)
+{
+ union idxd_perfcap perfcap;
+ struct idxd_pmu *idxd_pmu;
+ int rc = -ENODEV;
+
+ /*
+ * perfmon module initialization failed, nothing to do
+ */
+ if (!cpuhp_set_up)
+ return -ENODEV;
+
+ /*
+ * If perfmon_offset or num_counters is 0, it means perfmon is
+ * not supported on this hardware.
+ */
+ if (idxd->perfmon_offset == 0)
+ return -ENODEV;
+
+ idxd_pmu = kzalloc(sizeof(*idxd_pmu), GFP_KERNEL);
+ if (!idxd_pmu)
+ return -ENOMEM;
+
+ idxd_pmu->idxd = idxd;
+ idxd->idxd_pmu = idxd_pmu;
+
+ if (idxd->data->type == IDXD_TYPE_DSA) {
+ rc = sprintf(idxd_pmu->name, "dsa%d", idxd->id);
+ if (rc < 0)
+ goto free;
+ } else if (idxd->data->type == IDXD_TYPE_IAX) {
+ rc = sprintf(idxd_pmu->name, "iax%d", idxd->id);
+ if (rc < 0)
+ goto free;
+ } else {
+ goto free;
+ }
+
+ perfmon_reset(idxd);
+
+ perfcap.bits = ioread64(PERFCAP_REG(idxd));
+
+ /*
+ * If total perf counter is 0, stop further registration.
+ * This is necessary in order to support driver running on
+ * guest which does not have pmon support.
+ */
+ if (perfcap.num_perf_counter == 0)
+ goto free;
+
+ /* A counter width of 0 means it can't count */
+ if (perfcap.counter_width == 0)
+ goto free;
+
+ /* Overflow interrupt and counter freeze support must be available */
+ if (!perfcap.overflow_interrupt || !perfcap.counter_freeze)
+ goto free;
+
+ /* Number of event categories cannot be 0 */
+ if (perfcap.num_event_category == 0)
+ goto free;
+
+ /*
+ * We don't support per-counter capabilities for now.
+ */
+ if (perfcap.cap_per_counter)
+ goto free;
+
+ idxd_pmu->n_event_categories = perfcap.num_event_category;
+ idxd_pmu->supported_event_categories = perfcap.global_event_category;
+ idxd_pmu->per_counter_caps_supported = perfcap.cap_per_counter;
+
+ /* check filter capability. If 0, then filters are not supported */
+ idxd_pmu->supported_filters = perfcap.filter;
+ if (perfcap.filter)
+ idxd_pmu->n_filters = hweight8(perfcap.filter);
+
+ /* Store the total number of counters categories, and counter width */
+ idxd_pmu->n_counters = perfcap.num_perf_counter;
+ idxd_pmu->counter_width = perfcap.counter_width;
+
+ idxd_pmu_init(idxd_pmu);
+
+ rc = perf_pmu_register(&idxd_pmu->pmu, idxd_pmu->name, -1);
+ if (rc)
+ goto free;
+
+ rc = cpuhp_state_add_instance(cpuhp_slot, &idxd_pmu->cpuhp_node);
+ if (rc) {
+ perf_pmu_unregister(&idxd->idxd_pmu->pmu);
+ goto free;
+ }
+out:
+ return rc;
+free:
+ kfree(idxd_pmu);
+ idxd->idxd_pmu = NULL;
+
+ goto out;
+}
+
+void __init perfmon_init(void)
+{
+ int rc = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ "driver/dma/idxd/perf:online",
+ perf_event_cpu_online,
+ perf_event_cpu_offline);
+ if (WARN_ON(rc < 0))
+ return;
+
+ cpuhp_slot = rc;
+ cpuhp_set_up = true;
+}
+
+void __exit perfmon_exit(void)
+{
+ if (cpuhp_set_up)
+ cpuhp_remove_multi_state(cpuhp_slot);
+}
diff --git a/drivers/dma/idxd/perfmon.h b/drivers/dma/idxd/perfmon.h
new file mode 100644
index 000000000000..9a081a1bc605
--- /dev/null
+++ b/drivers/dma/idxd/perfmon.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2020 Intel Corporation. All rights rsvd. */
+
+#ifndef _PERFMON_H_
+#define _PERFMON_H_
+
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/sbitmap.h>
+#include <linux/dmaengine.h>
+#include <linux/percpu-rwsem.h>
+#include <linux/wait.h>
+#include <linux/cdev.h>
+#include <linux/uuid.h>
+#include <linux/idxd.h>
+#include <linux/perf_event.h>
+#include "registers.h"
+
+static inline struct idxd_pmu *event_to_pmu(struct perf_event *event)
+{
+ struct idxd_pmu *idxd_pmu;
+ struct pmu *pmu;
+
+ pmu = event->pmu;
+ idxd_pmu = container_of(pmu, struct idxd_pmu, pmu);
+
+ return idxd_pmu;
+}
+
+static inline struct idxd_device *event_to_idxd(struct perf_event *event)
+{
+ struct idxd_pmu *idxd_pmu;
+ struct pmu *pmu;
+
+ pmu = event->pmu;
+ idxd_pmu = container_of(pmu, struct idxd_pmu, pmu);
+
+ return idxd_pmu->idxd;
+}
+
+static inline struct idxd_device *pmu_to_idxd(struct pmu *pmu)
+{
+ struct idxd_pmu *idxd_pmu;
+
+ idxd_pmu = container_of(pmu, struct idxd_pmu, pmu);
+
+ return idxd_pmu->idxd;
+}
+
+enum dsa_perf_events {
+ DSA_PERF_EVENT_WQ = 0,
+ DSA_PERF_EVENT_ENGINE,
+ DSA_PERF_EVENT_ADDR_TRANS,
+ DSA_PERF_EVENT_OP,
+ DSA_PERF_EVENT_COMPL,
+ DSA_PERF_EVENT_MAX,
+};
+
+enum filter_enc {
+ FLT_WQ = 0,
+ FLT_TC,
+ FLT_PG_SZ,
+ FLT_XFER_SZ,
+ FLT_ENG,
+ FLT_MAX,
+};
+
+#define CONFIG_RESET 0x0000000000000001
+#define CNTR_RESET 0x0000000000000002
+#define CNTR_ENABLE 0x0000000000000001
+#define INTR_OVFL 0x0000000000000002
+
+#define COUNTER_FREEZE 0x00000000FFFFFFFF
+#define COUNTER_UNFREEZE 0x0000000000000000
+#define OVERFLOW_SIZE 32
+
+#define CNTRCFG_ENABLE BIT(0)
+#define CNTRCFG_IRQ_OVERFLOW BIT(1)
+#define CNTRCFG_CATEGORY_SHIFT 8
+#define CNTRCFG_EVENT_SHIFT 32
+
+#define PERFMON_TABLE_OFFSET(_idxd) \
+({ \
+ typeof(_idxd) __idxd = (_idxd); \
+ ((__idxd)->reg_base + (__idxd)->perfmon_offset); \
+})
+#define PERFMON_REG_OFFSET(idxd, offset) \
+ (PERFMON_TABLE_OFFSET(idxd) + (offset))
+
+#define PERFCAP_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_PERFCAP_OFFSET))
+#define PERFRST_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_PERFRST_OFFSET))
+#define OVFSTATUS_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_OVFSTATUS_OFFSET))
+#define PERFFRZ_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_PERFFRZ_OFFSET))
+
+#define FLTCFG_REG(idxd, cntr, flt) \
+ (PERFMON_REG_OFFSET(idxd, IDXD_FLTCFG_OFFSET) + ((cntr) * 32) + ((flt) * 4))
+
+#define CNTRCFG_REG(idxd, cntr) \
+ (PERFMON_REG_OFFSET(idxd, IDXD_CNTRCFG_OFFSET) + ((cntr) * 8))
+#define CNTRDATA_REG(idxd, cntr) \
+ (PERFMON_REG_OFFSET(idxd, IDXD_CNTRDATA_OFFSET) + ((cntr) * 8))
+#define CNTRCAP_REG(idxd, cntr) \
+ (PERFMON_REG_OFFSET(idxd, IDXD_CNTRCAP_OFFSET) + ((cntr) * 8))
+
+#define EVNTCAP_REG(idxd, category) \
+ (PERFMON_REG_OFFSET(idxd, IDXD_EVNTCAP_OFFSET) + ((category) * 8))
+
+#define DEFINE_PERFMON_FORMAT_ATTR(_name, _format) \
+static ssize_t __perfmon_idxd_##_name##_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ char *page) \
+{ \
+ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
+ return sprintf(page, _format "\n"); \
+} \
+static struct kobj_attribute format_attr_idxd_##_name = \
+ __ATTR(_name, 0444, __perfmon_idxd_##_name##_show, NULL)
+
+#endif
diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h
index 751ecb4f9f81..c970c3f025f0 100644
--- a/drivers/dma/idxd/registers.h
+++ b/drivers/dma/idxd/registers.h
@@ -24,8 +24,8 @@ union gen_cap_reg {
u64 overlap_copy:1;
u64 cache_control_mem:1;
u64 cache_control_cache:1;
+ u64 cmd_cap:1;
u64 rsvd:3;
- u64 int_handle_req:1;
u64 dest_readback:1;
u64 drain_readback:1;
u64 rsvd2:6;
@@ -120,7 +120,8 @@ union gencfg_reg {
union genctrl_reg {
struct {
u32 softerr_int_en:1;
- u32 rsvd:31;
+ u32 halt_int_en:1;
+ u32 rsvd:30;
};
u32 bits;
} __packed;
@@ -180,8 +181,11 @@ enum idxd_cmd {
IDXD_CMD_DRAIN_PASID,
IDXD_CMD_ABORT_PASID,
IDXD_CMD_REQUEST_INT_HANDLE,
+ IDXD_CMD_RELEASE_INT_HANDLE,
};
+#define CMD_INT_HANDLE_IMS 0x10000
+
#define IDXD_CMDSTS_OFFSET 0xa8
union cmdsts_reg {
struct {
@@ -193,6 +197,8 @@ union cmdsts_reg {
u32 bits;
} __packed;
#define IDXD_CMDSTS_ACTIVE 0x80000000
+#define IDXD_CMDSTS_ERR_MASK 0xff
+#define IDXD_CMDSTS_RES_SHIFT 8
enum idxd_cmdsts_err {
IDXD_CMDSTS_SUCCESS = 0,
@@ -228,6 +234,8 @@ enum idxd_cmdsts_err {
IDXD_CMDSTS_ERR_NO_HANDLE,
};
+#define IDXD_CMDCAP_OFFSET 0xb0
+
#define IDXD_SWERR_OFFSET 0xc0
#define IDXD_SWERR_VALID 0x00000001
#define IDXD_SWERR_OVERFLOW 0x00000002
@@ -378,4 +386,112 @@ union wqcfg {
#define GRPENGCFG_OFFSET(idxd_dev, n) ((idxd_dev)->grpcfg_offset + (n) * GRPCFG_SIZE + 32)
#define GRPFLGCFG_OFFSET(idxd_dev, n) ((idxd_dev)->grpcfg_offset + (n) * GRPCFG_SIZE + 40)
+/* Following is performance monitor registers */
+#define IDXD_PERFCAP_OFFSET 0x0
+union idxd_perfcap {
+ struct {
+ u64 num_perf_counter:6;
+ u64 rsvd1:2;
+ u64 counter_width:8;
+ u64 num_event_category:4;
+ u64 global_event_category:16;
+ u64 filter:8;
+ u64 rsvd2:8;
+ u64 cap_per_counter:1;
+ u64 writeable_counter:1;
+ u64 counter_freeze:1;
+ u64 overflow_interrupt:1;
+ u64 rsvd3:8;
+ };
+ u64 bits;
+} __packed;
+
+#define IDXD_EVNTCAP_OFFSET 0x80
+union idxd_evntcap {
+ struct {
+ u64 events:28;
+ u64 rsvd:36;
+ };
+ u64 bits;
+} __packed;
+
+struct idxd_event {
+ union {
+ struct {
+ u32 event_category:4;
+ u32 events:28;
+ };
+ u32 val;
+ };
+} __packed;
+
+#define IDXD_CNTRCAP_OFFSET 0x800
+struct idxd_cntrcap {
+ union {
+ struct {
+ u32 counter_width:8;
+ u32 rsvd:20;
+ u32 num_events:4;
+ };
+ u32 val;
+ };
+ struct idxd_event events[];
+} __packed;
+
+#define IDXD_PERFRST_OFFSET 0x10
+union idxd_perfrst {
+ struct {
+ u32 perfrst_config:1;
+ u32 perfrst_counter:1;
+ u32 rsvd:30;
+ };
+ u32 val;
+} __packed;
+
+#define IDXD_OVFSTATUS_OFFSET 0x30
+#define IDXD_PERFFRZ_OFFSET 0x20
+#define IDXD_CNTRCFG_OFFSET 0x100
+union idxd_cntrcfg {
+ struct {
+ u64 enable:1;
+ u64 interrupt_ovf:1;
+ u64 global_freeze_ovf:1;
+ u64 rsvd1:5;
+ u64 event_category:4;
+ u64 rsvd2:20;
+ u64 events:28;
+ u64 rsvd3:4;
+ };
+ u64 val;
+} __packed;
+
+#define IDXD_FLTCFG_OFFSET 0x300
+
+#define IDXD_CNTRDATA_OFFSET 0x200
+union idxd_cntrdata {
+ struct {
+ u64 event_count_value;
+ };
+ u64 val;
+} __packed;
+
+union event_cfg {
+ struct {
+ u64 event_cat:4;
+ u64 event_enc:28;
+ };
+ u64 val;
+} __packed;
+
+union filter_cfg {
+ struct {
+ u64 wq:32;
+ u64 tc:8;
+ u64 pg_sz:4;
+ u64 xfer_sz:8;
+ u64 eng:8;
+ };
+ u64 val;
+} __packed;
+
#endif
diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c
index a7a61bcc17d5..19afb62abaff 100644
--- a/drivers/dma/idxd/submit.c
+++ b/drivers/dma/idxd/submit.c
@@ -15,18 +15,30 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu)
desc = wq->descs[idx];
memset(desc->hw, 0, sizeof(struct dsa_hw_desc));
- memset(desc->completion, 0, idxd->compl_size);
+ memset(desc->completion, 0, idxd->data->compl_size);
desc->cpu = cpu;
if (device_pasid_enabled(idxd))
desc->hw->pasid = idxd->pasid;
/*
- * Descriptor completion vectors are 1-8 for MSIX. We will round
- * robin through the 8 vectors.
+ * Descriptor completion vectors are 1...N for MSIX. We will round
+ * robin through the N vectors.
*/
wq->vec_ptr = (wq->vec_ptr % idxd->num_wq_irqs) + 1;
- desc->hw->int_handle = wq->vec_ptr;
+ if (!idxd->int_handles) {
+ desc->hw->int_handle = wq->vec_ptr;
+ } else {
+ desc->vector = wq->vec_ptr;
+ /*
+ * int_handles are only for descriptor completion. However for device
+ * MSIX enumeration, vec 0 is used for misc interrupts. Therefore even
+ * though we are rotating through 1...N for descriptor interrupts, we
+ * need to acqurie the int_handles from 0..N-1.
+ */
+ desc->hw->int_handle = idxd->int_handles[desc->vector - 1];
+ }
+
return desc;
}
@@ -79,13 +91,15 @@ void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc)
int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
{
struct idxd_device *idxd = wq->idxd;
- int vec = desc->hw->int_handle;
void __iomem *portal;
int rc;
if (idxd->state != IDXD_DEV_ENABLED)
return -EIO;
+ if (!percpu_ref_tryget_live(&wq->wq_active))
+ return -ENXIO;
+
portal = wq->portal;
/*
@@ -108,13 +122,25 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
return rc;
}
+ percpu_ref_put(&wq->wq_active);
+
/*
* Pending the descriptor to the lockless list for the irq_entry
* that we designated the descriptor to.
*/
- if (desc->hw->flags & IDXD_OP_FLAG_RCI)
- llist_add(&desc->llnode,
- &idxd->irq_entries[vec].pending_llist);
+ if (desc->hw->flags & IDXD_OP_FLAG_RCI) {
+ int vec;
+
+ /*
+ * If the driver is on host kernel, it would be the value
+ * assigned to interrupt handle, which is index for MSIX
+ * vector. If it's guest then can't use the int_handle since
+ * that is the index to IMS for the entire device. The guest
+ * device local index will be used.
+ */
+ vec = !idxd->int_handles ? desc->hw->int_handle : desc->vector;
+ llist_add(&desc->llnode, &idxd->irq_entries[vec].pending_llist);
+ }
return 0;
}
diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
index 4dbb03c545e4..0460d58e3941 100644
--- a/drivers/dma/idxd/sysfs.c
+++ b/drivers/dma/idxd/sysfs.c
@@ -16,69 +16,6 @@ static char *idxd_wq_type_names[] = {
[IDXD_WQT_USER] = "user",
};
-static void idxd_conf_device_release(struct device *dev)
-{
- dev_dbg(dev, "%s for %s\n", __func__, dev_name(dev));
-}
-
-static struct device_type idxd_group_device_type = {
- .name = "group",
- .release = idxd_conf_device_release,
-};
-
-static struct device_type idxd_wq_device_type = {
- .name = "wq",
- .release = idxd_conf_device_release,
-};
-
-static struct device_type idxd_engine_device_type = {
- .name = "engine",
- .release = idxd_conf_device_release,
-};
-
-static struct device_type dsa_device_type = {
- .name = "dsa",
- .release = idxd_conf_device_release,
-};
-
-static struct device_type iax_device_type = {
- .name = "iax",
- .release = idxd_conf_device_release,
-};
-
-static inline bool is_dsa_dev(struct device *dev)
-{
- return dev ? dev->type == &dsa_device_type : false;
-}
-
-static inline bool is_iax_dev(struct device *dev)
-{
- return dev ? dev->type == &iax_device_type : false;
-}
-
-static inline bool is_idxd_dev(struct device *dev)
-{
- return is_dsa_dev(dev) || is_iax_dev(dev);
-}
-
-static inline bool is_idxd_wq_dev(struct device *dev)
-{
- return dev ? dev->type == &idxd_wq_device_type : false;
-}
-
-static inline bool is_idxd_wq_dmaengine(struct idxd_wq *wq)
-{
- if (wq->type == IDXD_WQT_KERNEL &&
- strcmp(wq->name, "dmaengine") == 0)
- return true;
- return false;
-}
-
-static inline bool is_idxd_wq_cdev(struct idxd_wq *wq)
-{
- return wq->type == IDXD_WQT_USER;
-}
-
static int idxd_config_bus_match(struct device *dev,
struct device_driver *drv)
{
@@ -110,9 +47,131 @@ static int idxd_config_bus_match(struct device *dev,
return matched;
}
-static int idxd_config_bus_probe(struct device *dev)
+static int enable_wq(struct idxd_wq *wq)
{
+ struct idxd_device *idxd = wq->idxd;
+ struct device *dev = &idxd->pdev->dev;
+ unsigned long flags;
int rc;
+
+ mutex_lock(&wq->wq_lock);
+
+ if (idxd->state != IDXD_DEV_ENABLED) {
+ mutex_unlock(&wq->wq_lock);
+ dev_warn(dev, "Enabling while device not enabled.\n");
+ return -EPERM;
+ }
+
+ if (wq->state != IDXD_WQ_DISABLED) {
+ mutex_unlock(&wq->wq_lock);
+ dev_warn(dev, "WQ %d already enabled.\n", wq->id);
+ return -EBUSY;
+ }
+
+ if (!wq->group) {
+ mutex_unlock(&wq->wq_lock);
+ dev_warn(dev, "WQ not attached to group.\n");
+ return -EINVAL;
+ }
+
+ if (strlen(wq->name) == 0) {
+ mutex_unlock(&wq->wq_lock);
+ dev_warn(dev, "WQ name not set.\n");
+ return -EINVAL;
+ }
+
+ /* Shared WQ checks */
+ if (wq_shared(wq)) {
+ if (!device_swq_supported(idxd)) {
+ dev_warn(dev, "PASID not enabled and shared WQ.\n");
+ mutex_unlock(&wq->wq_lock);
+ return -ENXIO;
+ }
+ /*
+ * Shared wq with the threshold set to 0 means the user
+ * did not set the threshold or transitioned from a
+ * dedicated wq but did not set threshold. A value
+ * of 0 would effectively disable the shared wq. The
+ * driver does not allow a value of 0 to be set for
+ * threshold via sysfs.
+ */
+ if (wq->threshold == 0) {
+ dev_warn(dev, "Shared WQ and threshold 0.\n");
+ mutex_unlock(&wq->wq_lock);
+ return -EINVAL;
+ }
+ }
+
+ rc = idxd_wq_alloc_resources(wq);
+ if (rc < 0) {
+ mutex_unlock(&wq->wq_lock);
+ dev_warn(dev, "WQ resource alloc failed\n");
+ return rc;
+ }
+
+ spin_lock_irqsave(&idxd->dev_lock, flags);
+ if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ rc = idxd_device_config(idxd);
+ spin_unlock_irqrestore(&idxd->dev_lock, flags);
+ if (rc < 0) {
+ mutex_unlock(&wq->wq_lock);
+ dev_warn(dev, "Writing WQ %d config failed: %d\n", wq->id, rc);
+ return rc;
+ }
+
+ rc = idxd_wq_enable(wq);
+ if (rc < 0) {
+ mutex_unlock(&wq->wq_lock);
+ dev_warn(dev, "WQ %d enabling failed: %d\n", wq->id, rc);
+ return rc;
+ }
+
+ rc = idxd_wq_map_portal(wq);
+ if (rc < 0) {
+ dev_warn(dev, "wq portal mapping failed: %d\n", rc);
+ rc = idxd_wq_disable(wq);
+ if (rc < 0)
+ dev_warn(dev, "IDXD wq disable failed\n");
+ mutex_unlock(&wq->wq_lock);
+ return rc;
+ }
+
+ wq->client_count = 0;
+
+ if (wq->type == IDXD_WQT_KERNEL) {
+ rc = idxd_wq_init_percpu_ref(wq);
+ if (rc < 0) {
+ dev_dbg(dev, "percpu_ref setup failed\n");
+ mutex_unlock(&wq->wq_lock);
+ return rc;
+ }
+ }
+
+ if (is_idxd_wq_dmaengine(wq)) {
+ rc = idxd_register_dma_channel(wq);
+ if (rc < 0) {
+ dev_dbg(dev, "DMA channel register failed\n");
+ mutex_unlock(&wq->wq_lock);
+ return rc;
+ }
+ } else if (is_idxd_wq_cdev(wq)) {
+ rc = idxd_wq_add_cdev(wq);
+ if (rc < 0) {
+ dev_dbg(dev, "Cdev creation failed\n");
+ mutex_unlock(&wq->wq_lock);
+ return rc;
+ }
+ }
+
+ mutex_unlock(&wq->wq_lock);
+ dev_info(dev, "wq %s enabled\n", dev_name(&wq->conf_dev));
+
+ return 0;
+}
+
+static int idxd_config_bus_probe(struct device *dev)
+{
+ int rc = 0;
unsigned long flags;
dev_dbg(dev, "%s called\n", __func__);
@@ -130,7 +189,8 @@ static int idxd_config_bus_probe(struct device *dev)
/* Perform IDXD configuration and enabling */
spin_lock_irqsave(&idxd->dev_lock, flags);
- rc = idxd_device_config(idxd);
+ if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ rc = idxd_device_config(idxd);
spin_unlock_irqrestore(&idxd->dev_lock, flags);
if (rc < 0) {
module_put(THIS_MODULE);
@@ -157,115 +217,8 @@ static int idxd_config_bus_probe(struct device *dev)
return 0;
} else if (is_idxd_wq_dev(dev)) {
struct idxd_wq *wq = confdev_to_wq(dev);
- struct idxd_device *idxd = wq->idxd;
-
- mutex_lock(&wq->wq_lock);
-
- if (idxd->state != IDXD_DEV_ENABLED) {
- mutex_unlock(&wq->wq_lock);
- dev_warn(dev, "Enabling while device not enabled.\n");
- return -EPERM;
- }
-
- if (wq->state != IDXD_WQ_DISABLED) {
- mutex_unlock(&wq->wq_lock);
- dev_warn(dev, "WQ %d already enabled.\n", wq->id);
- return -EBUSY;
- }
-
- if (!wq->group) {
- mutex_unlock(&wq->wq_lock);
- dev_warn(dev, "WQ not attached to group.\n");
- return -EINVAL;
- }
-
- if (strlen(wq->name) == 0) {
- mutex_unlock(&wq->wq_lock);
- dev_warn(dev, "WQ name not set.\n");
- return -EINVAL;
- }
-
- /* Shared WQ checks */
- if (wq_shared(wq)) {
- if (!device_swq_supported(idxd)) {
- dev_warn(dev,
- "PASID not enabled and shared WQ.\n");
- mutex_unlock(&wq->wq_lock);
- return -ENXIO;
- }
- /*
- * Shared wq with the threshold set to 0 means the user
- * did not set the threshold or transitioned from a
- * dedicated wq but did not set threshold. A value
- * of 0 would effectively disable the shared wq. The
- * driver does not allow a value of 0 to be set for
- * threshold via sysfs.
- */
- if (wq->threshold == 0) {
- dev_warn(dev,
- "Shared WQ and threshold 0.\n");
- mutex_unlock(&wq->wq_lock);
- return -EINVAL;
- }
- }
-
- rc = idxd_wq_alloc_resources(wq);
- if (rc < 0) {
- mutex_unlock(&wq->wq_lock);
- dev_warn(dev, "WQ resource alloc failed\n");
- return rc;
- }
-
- spin_lock_irqsave(&idxd->dev_lock, flags);
- rc = idxd_device_config(idxd);
- spin_unlock_irqrestore(&idxd->dev_lock, flags);
- if (rc < 0) {
- mutex_unlock(&wq->wq_lock);
- dev_warn(dev, "Writing WQ %d config failed: %d\n",
- wq->id, rc);
- return rc;
- }
-
- rc = idxd_wq_enable(wq);
- if (rc < 0) {
- mutex_unlock(&wq->wq_lock);
- dev_warn(dev, "WQ %d enabling failed: %d\n",
- wq->id, rc);
- return rc;
- }
-
- rc = idxd_wq_map_portal(wq);
- if (rc < 0) {
- dev_warn(dev, "wq portal mapping failed: %d\n", rc);
- rc = idxd_wq_disable(wq);
- if (rc < 0)
- dev_warn(dev, "IDXD wq disable failed\n");
- mutex_unlock(&wq->wq_lock);
- return rc;
- }
-
- wq->client_count = 0;
- dev_info(dev, "wq %s enabled\n", dev_name(&wq->conf_dev));
-
- if (is_idxd_wq_dmaengine(wq)) {
- rc = idxd_register_dma_channel(wq);
- if (rc < 0) {
- dev_dbg(dev, "DMA channel register failed\n");
- mutex_unlock(&wq->wq_lock);
- return rc;
- }
- } else if (is_idxd_wq_cdev(wq)) {
- rc = idxd_wq_add_cdev(wq);
- if (rc < 0) {
- dev_dbg(dev, "Cdev creation failed\n");
- mutex_unlock(&wq->wq_lock);
- return rc;
- }
- }
-
- mutex_unlock(&wq->wq_lock);
- return 0;
+ return enable_wq(wq);
}
return -ENODEV;
@@ -275,7 +228,6 @@ static void disable_wq(struct idxd_wq *wq)
{
struct idxd_device *idxd = wq->idxd;
struct device *dev = &idxd->pdev->dev;
- int rc;
mutex_lock(&wq->wq_lock);
dev_dbg(dev, "%s removing WQ %s\n", __func__, dev_name(&wq->conf_dev));
@@ -284,6 +236,9 @@ static void disable_wq(struct idxd_wq *wq)
return;
}
+ if (wq->type == IDXD_WQT_KERNEL)
+ idxd_wq_quiesce(wq);
+
if (is_idxd_wq_dmaengine(wq))
idxd_unregister_dma_channel(wq);
else if (is_idxd_wq_cdev(wq))
@@ -296,17 +251,13 @@ static void disable_wq(struct idxd_wq *wq)
idxd_wq_unmap_portal(wq);
idxd_wq_drain(wq);
- rc = idxd_wq_disable(wq);
+ idxd_wq_reset(wq);
idxd_wq_free_resources(wq);
wq->client_count = 0;
mutex_unlock(&wq->wq_lock);
- if (rc < 0)
- dev_warn(dev, "Failed to disable %s: %d\n",
- dev_name(&wq->conf_dev), rc);
- else
- dev_info(dev, "wq %s disabled\n", dev_name(&wq->conf_dev));
+ dev_info(dev, "wq %s disabled\n", dev_name(&wq->conf_dev));
}
static int idxd_config_bus_remove(struct device *dev)
@@ -327,7 +278,7 @@ static int idxd_config_bus_remove(struct device *dev)
dev_dbg(dev, "%s removing dev %s\n", __func__,
dev_name(&idxd->conf_dev));
for (i = 0; i < idxd->max_wqs; i++) {
- struct idxd_wq *wq = &idxd->wqs[i];
+ struct idxd_wq *wq = idxd->wqs[i];
if (wq->state == IDXD_WQ_DISABLED)
continue;
@@ -338,12 +289,14 @@ static int idxd_config_bus_remove(struct device *dev)
idxd_unregister_dma_device(idxd);
rc = idxd_device_disable(idxd);
- for (i = 0; i < idxd->max_wqs; i++) {
- struct idxd_wq *wq = &idxd->wqs[i];
+ if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) {
+ for (i = 0; i < idxd->max_wqs; i++) {
+ struct idxd_wq *wq = idxd->wqs[i];
- mutex_lock(&wq->wq_lock);
- idxd_wq_disable_cleanup(wq);
- mutex_unlock(&wq->wq_lock);
+ mutex_lock(&wq->wq_lock);
+ idxd_wq_disable_cleanup(wq);
+ mutex_unlock(&wq->wq_lock);
+ }
}
module_put(THIS_MODULE);
if (rc < 0)
@@ -369,19 +322,6 @@ struct bus_type dsa_bus_type = {
.shutdown = idxd_config_bus_shutdown,
};
-struct bus_type iax_bus_type = {
- .name = "iax",
- .match = idxd_config_bus_match,
- .probe = idxd_config_bus_probe,
- .remove = idxd_config_bus_remove,
- .shutdown = idxd_config_bus_shutdown,
-};
-
-static struct bus_type *idxd_bus_types[] = {
- &dsa_bus_type,
- &iax_bus_type
-};
-
static struct idxd_device_driver dsa_drv = {
.drv = {
.name = "dsa",
@@ -391,60 +331,15 @@ static struct idxd_device_driver dsa_drv = {
},
};
-static struct idxd_device_driver iax_drv = {
- .drv = {
- .name = "iax",
- .bus = &iax_bus_type,
- .owner = THIS_MODULE,
- .mod_name = KBUILD_MODNAME,
- },
-};
-
-static struct idxd_device_driver *idxd_drvs[] = {
- &dsa_drv,
- &iax_drv
-};
-
-struct bus_type *idxd_get_bus_type(struct idxd_device *idxd)
-{
- return idxd_bus_types[idxd->type];
-}
-
-static struct device_type *idxd_get_device_type(struct idxd_device *idxd)
-{
- if (idxd->type == IDXD_TYPE_DSA)
- return &dsa_device_type;
- else if (idxd->type == IDXD_TYPE_IAX)
- return &iax_device_type;
- else
- return NULL;
-}
-
/* IDXD generic driver setup */
int idxd_register_driver(void)
{
- int i, rc;
-
- for (i = 0; i < IDXD_TYPE_MAX; i++) {
- rc = driver_register(&idxd_drvs[i]->drv);
- if (rc < 0)
- goto drv_fail;
- }
-
- return 0;
-
-drv_fail:
- while (--i >= 0)
- driver_unregister(&idxd_drvs[i]->drv);
- return rc;
+ return driver_register(&dsa_drv.drv);
}
void idxd_unregister_driver(void)
{
- int i;
-
- for (i = 0; i < IDXD_TYPE_MAX; i++)
- driver_unregister(&idxd_drvs[i]->drv);
+ driver_unregister(&dsa_drv.drv);
}
/* IDXD engine attributes */
@@ -455,9 +350,9 @@ static ssize_t engine_group_id_show(struct device *dev,
container_of(dev, struct idxd_engine, conf_dev);
if (engine->group)
- return sprintf(buf, "%d\n", engine->group->id);
+ return sysfs_emit(buf, "%d\n", engine->group->id);
else
- return sprintf(buf, "%d\n", -1);
+ return sysfs_emit(buf, "%d\n", -1);
}
static ssize_t engine_group_id_store(struct device *dev,
@@ -493,7 +388,7 @@ static ssize_t engine_group_id_store(struct device *dev,
if (prevg)
prevg->num_engines--;
- engine->group = &idxd->groups[id];
+ engine->group = idxd->groups[id];
engine->group->num_engines++;
return count;
@@ -517,6 +412,19 @@ static const struct attribute_group *idxd_engine_attribute_groups[] = {
NULL,
};
+static void idxd_conf_engine_release(struct device *dev)
+{
+ struct idxd_engine *engine = container_of(dev, struct idxd_engine, conf_dev);
+
+ kfree(engine);
+}
+
+struct device_type idxd_engine_device_type = {
+ .name = "engine",
+ .release = idxd_conf_engine_release,
+ .groups = idxd_engine_attribute_groups,
+};
+
/* Group attributes */
static void idxd_set_free_tokens(struct idxd_device *idxd)
@@ -524,7 +432,7 @@ static void idxd_set_free_tokens(struct idxd_device *idxd)
int i, tokens;
for (i = 0, tokens = 0; i < idxd->max_groups; i++) {
- struct idxd_group *g = &idxd->groups[i];
+ struct idxd_group *g = idxd->groups[i];
tokens += g->tokens_reserved;
}
@@ -539,7 +447,7 @@ static ssize_t group_tokens_reserved_show(struct device *dev,
struct idxd_group *group =
container_of(dev, struct idxd_group, conf_dev);
- return sprintf(buf, "%u\n", group->tokens_reserved);
+ return sysfs_emit(buf, "%u\n", group->tokens_reserved);
}
static ssize_t group_tokens_reserved_store(struct device *dev,
@@ -556,7 +464,7 @@ static ssize_t group_tokens_reserved_store(struct device *dev,
if (rc < 0)
return -EINVAL;
- if (idxd->type == IDXD_TYPE_IAX)
+ if (idxd->data->type == IDXD_TYPE_IAX)
return -EOPNOTSUPP;
if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
@@ -587,7 +495,7 @@ static ssize_t group_tokens_allowed_show(struct device *dev,
struct idxd_group *group =
container_of(dev, struct idxd_group, conf_dev);
- return sprintf(buf, "%u\n", group->tokens_allowed);
+ return sysfs_emit(buf, "%u\n", group->tokens_allowed);
}
static ssize_t group_tokens_allowed_store(struct device *dev,
@@ -604,7 +512,7 @@ static ssize_t group_tokens_allowed_store(struct device *dev,
if (rc < 0)
return -EINVAL;
- if (idxd->type == IDXD_TYPE_IAX)
+ if (idxd->data->type == IDXD_TYPE_IAX)
return -EOPNOTSUPP;
if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
@@ -632,7 +540,7 @@ static ssize_t group_use_token_limit_show(struct device *dev,
struct idxd_group *group =
container_of(dev, struct idxd_group, conf_dev);
- return sprintf(buf, "%u\n", group->use_token_limit);
+ return sysfs_emit(buf, "%u\n", group->use_token_limit);
}
static ssize_t group_use_token_limit_store(struct device *dev,
@@ -649,7 +557,7 @@ static ssize_t group_use_token_limit_store(struct device *dev,
if (rc < 0)
return -EINVAL;
- if (idxd->type == IDXD_TYPE_IAX)
+ if (idxd->data->type == IDXD_TYPE_IAX)
return -EOPNOTSUPP;
if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
@@ -675,22 +583,22 @@ static ssize_t group_engines_show(struct device *dev,
struct idxd_group *group =
container_of(dev, struct idxd_group, conf_dev);
int i, rc = 0;
- char *tmp = buf;
struct idxd_device *idxd = group->idxd;
for (i = 0; i < idxd->max_engines; i++) {
- struct idxd_engine *engine = &idxd->engines[i];
+ struct idxd_engine *engine = idxd->engines[i];
if (!engine->group)
continue;
if (engine->group->id == group->id)
- rc += sprintf(tmp + rc, "engine%d.%d ",
- idxd->id, engine->id);
+ rc += sysfs_emit_at(buf, rc, "engine%d.%d ", idxd->id, engine->id);
}
+ if (!rc)
+ return 0;
rc--;
- rc += sprintf(tmp + rc, "\n");
+ rc += sysfs_emit_at(buf, rc, "\n");
return rc;
}
@@ -704,22 +612,22 @@ static ssize_t group_work_queues_show(struct device *dev,
struct idxd_group *group =
container_of(dev, struct idxd_group, conf_dev);
int i, rc = 0;
- char *tmp = buf;
struct idxd_device *idxd = group->idxd;
for (i = 0; i < idxd->max_wqs; i++) {
- struct idxd_wq *wq = &idxd->wqs[i];
+ struct idxd_wq *wq = idxd->wqs[i];
if (!wq->group)
continue;
if (wq->group->id == group->id)
- rc += sprintf(tmp + rc, "wq%d.%d ",
- idxd->id, wq->id);
+ rc += sysfs_emit_at(buf, rc, "wq%d.%d ", idxd->id, wq->id);
}
+ if (!rc)
+ return 0;
rc--;
- rc += sprintf(tmp + rc, "\n");
+ rc += sysfs_emit_at(buf, rc, "\n");
return rc;
}
@@ -734,7 +642,7 @@ static ssize_t group_traffic_class_a_show(struct device *dev,
struct idxd_group *group =
container_of(dev, struct idxd_group, conf_dev);
- return sprintf(buf, "%d\n", group->tc_a);
+ return sysfs_emit(buf, "%d\n", group->tc_a);
}
static ssize_t group_traffic_class_a_store(struct device *dev,
@@ -775,7 +683,7 @@ static ssize_t group_traffic_class_b_show(struct device *dev,
struct idxd_group *group =
container_of(dev, struct idxd_group, conf_dev);
- return sprintf(buf, "%d\n", group->tc_b);
+ return sysfs_emit(buf, "%d\n", group->tc_b);
}
static ssize_t group_traffic_class_b_store(struct device *dev,
@@ -829,13 +737,26 @@ static const struct attribute_group *idxd_group_attribute_groups[] = {
NULL,
};
+static void idxd_conf_group_release(struct device *dev)
+{
+ struct idxd_group *group = container_of(dev, struct idxd_group, conf_dev);
+
+ kfree(group);
+}
+
+struct device_type idxd_group_device_type = {
+ .name = "group",
+ .release = idxd_conf_group_release,
+ .groups = idxd_group_attribute_groups,
+};
+
/* IDXD work queue attribs */
static ssize_t wq_clients_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
- return sprintf(buf, "%d\n", wq->client_count);
+ return sysfs_emit(buf, "%d\n", wq->client_count);
}
static struct device_attribute dev_attr_wq_clients =
@@ -848,12 +769,12 @@ static ssize_t wq_state_show(struct device *dev,
switch (wq->state) {
case IDXD_WQ_DISABLED:
- return sprintf(buf, "disabled\n");
+ return sysfs_emit(buf, "disabled\n");
case IDXD_WQ_ENABLED:
- return sprintf(buf, "enabled\n");
+ return sysfs_emit(buf, "enabled\n");
}
- return sprintf(buf, "unknown\n");
+ return sysfs_emit(buf, "unknown\n");
}
static struct device_attribute dev_attr_wq_state =
@@ -865,9 +786,9 @@ static ssize_t wq_group_id_show(struct device *dev,
struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
if (wq->group)
- return sprintf(buf, "%u\n", wq->group->id);
+ return sysfs_emit(buf, "%u\n", wq->group->id);
else
- return sprintf(buf, "-1\n");
+ return sysfs_emit(buf, "-1\n");
}
static ssize_t wq_group_id_store(struct device *dev,
@@ -901,7 +822,7 @@ static ssize_t wq_group_id_store(struct device *dev,
return count;
}
- group = &idxd->groups[id];
+ group = idxd->groups[id];
prevg = wq->group;
if (prevg)
@@ -919,8 +840,7 @@ static ssize_t wq_mode_show(struct device *dev, struct device_attribute *attr,
{
struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
- return sprintf(buf, "%s\n",
- wq_dedicated(wq) ? "dedicated" : "shared");
+ return sysfs_emit(buf, "%s\n", wq_dedicated(wq) ? "dedicated" : "shared");
}
static ssize_t wq_mode_store(struct device *dev,
@@ -956,7 +876,7 @@ static ssize_t wq_size_show(struct device *dev, struct device_attribute *attr,
{
struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
- return sprintf(buf, "%u\n", wq->size);
+ return sysfs_emit(buf, "%u\n", wq->size);
}
static int total_claimed_wq_size(struct idxd_device *idxd)
@@ -965,7 +885,7 @@ static int total_claimed_wq_size(struct idxd_device *idxd)
int wq_size = 0;
for (i = 0; i < idxd->max_wqs; i++) {
- struct idxd_wq *wq = &idxd->wqs[i];
+ struct idxd_wq *wq = idxd->wqs[i];
wq_size += wq->size;
}
@@ -989,7 +909,7 @@ static ssize_t wq_size_store(struct device *dev,
if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
return -EPERM;
- if (wq->state != IDXD_WQ_DISABLED)
+ if (idxd->state == IDXD_DEV_ENABLED)
return -EPERM;
if (size + total_claimed_wq_size(idxd) - wq->size > idxd->max_wq_size)
@@ -1007,7 +927,7 @@ static ssize_t wq_priority_show(struct device *dev,
{
struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
- return sprintf(buf, "%u\n", wq->priority);
+ return sysfs_emit(buf, "%u\n", wq->priority);
}
static ssize_t wq_priority_store(struct device *dev,
@@ -1044,8 +964,7 @@ static ssize_t wq_block_on_fault_show(struct device *dev,
{
struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
- return sprintf(buf, "%u\n",
- test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags));
+ return sysfs_emit(buf, "%u\n", test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags));
}
static ssize_t wq_block_on_fault_store(struct device *dev,
@@ -1084,7 +1003,7 @@ static ssize_t wq_threshold_show(struct device *dev,
{
struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
- return sprintf(buf, "%u\n", wq->threshold);
+ return sysfs_emit(buf, "%u\n", wq->threshold);
}
static ssize_t wq_threshold_store(struct device *dev,
@@ -1127,15 +1046,12 @@ static ssize_t wq_type_show(struct device *dev,
switch (wq->type) {
case IDXD_WQT_KERNEL:
- return sprintf(buf, "%s\n",
- idxd_wq_type_names[IDXD_WQT_KERNEL]);
+ return sysfs_emit(buf, "%s\n", idxd_wq_type_names[IDXD_WQT_KERNEL]);
case IDXD_WQT_USER:
- return sprintf(buf, "%s\n",
- idxd_wq_type_names[IDXD_WQT_USER]);
+ return sysfs_emit(buf, "%s\n", idxd_wq_type_names[IDXD_WQT_USER]);
case IDXD_WQT_NONE:
default:
- return sprintf(buf, "%s\n",
- idxd_wq_type_names[IDXD_WQT_NONE]);
+ return sysfs_emit(buf, "%s\n", idxd_wq_type_names[IDXD_WQT_NONE]);
}
return -EINVAL;
@@ -1176,7 +1092,7 @@ static ssize_t wq_name_show(struct device *dev,
{
struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
- return sprintf(buf, "%s\n", wq->name);
+ return sysfs_emit(buf, "%s\n", wq->name);
}
static ssize_t wq_name_store(struct device *dev,
@@ -1211,8 +1127,16 @@ static ssize_t wq_cdev_minor_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+ int minor = -1;
+
+ mutex_lock(&wq->wq_lock);
+ if (wq->idxd_cdev)
+ minor = wq->idxd_cdev->minor;
+ mutex_unlock(&wq->wq_lock);
- return sprintf(buf, "%d\n", wq->idxd_cdev.minor);
+ if (minor == -1)
+ return -ENXIO;
+ return sysfs_emit(buf, "%d\n", minor);
}
static struct device_attribute dev_attr_wq_cdev_minor =
@@ -1238,7 +1162,7 @@ static ssize_t wq_max_transfer_size_show(struct device *dev, struct device_attri
{
struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
- return sprintf(buf, "%llu\n", wq->max_xfer_bytes);
+ return sysfs_emit(buf, "%llu\n", wq->max_xfer_bytes);
}
static ssize_t wq_max_transfer_size_store(struct device *dev, struct device_attribute *attr,
@@ -1272,7 +1196,7 @@ static ssize_t wq_max_batch_size_show(struct device *dev, struct device_attribut
{
struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
- return sprintf(buf, "%u\n", wq->max_batch_size);
+ return sysfs_emit(buf, "%u\n", wq->max_batch_size);
}
static ssize_t wq_max_batch_size_store(struct device *dev, struct device_attribute *attr,
@@ -1305,7 +1229,7 @@ static ssize_t wq_ats_disable_show(struct device *dev, struct device_attribute *
{
struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
- return sprintf(buf, "%u\n", wq->ats_dis);
+ return sysfs_emit(buf, "%u\n", wq->ats_dis);
}
static ssize_t wq_ats_disable_store(struct device *dev, struct device_attribute *attr,
@@ -1361,6 +1285,20 @@ static const struct attribute_group *idxd_wq_attribute_groups[] = {
NULL,
};
+static void idxd_conf_wq_release(struct device *dev)
+{
+ struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+
+ kfree(wq->wqcfg);
+ kfree(wq);
+}
+
+struct device_type idxd_wq_device_type = {
+ .name = "wq",
+ .release = idxd_conf_wq_release,
+ .groups = idxd_wq_attribute_groups,
+};
+
/* IDXD device attribs */
static ssize_t version_show(struct device *dev, struct device_attribute *attr,
char *buf)
@@ -1368,7 +1306,7 @@ static ssize_t version_show(struct device *dev, struct device_attribute *attr,
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%#x\n", idxd->hw.version);
+ return sysfs_emit(buf, "%#x\n", idxd->hw.version);
}
static DEVICE_ATTR_RO(version);
@@ -1379,7 +1317,7 @@ static ssize_t max_work_queues_size_show(struct device *dev,
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%u\n", idxd->max_wq_size);
+ return sysfs_emit(buf, "%u\n", idxd->max_wq_size);
}
static DEVICE_ATTR_RO(max_work_queues_size);
@@ -1389,7 +1327,7 @@ static ssize_t max_groups_show(struct device *dev,
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%u\n", idxd->max_groups);
+ return sysfs_emit(buf, "%u\n", idxd->max_groups);
}
static DEVICE_ATTR_RO(max_groups);
@@ -1399,7 +1337,7 @@ static ssize_t max_work_queues_show(struct device *dev,
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%u\n", idxd->max_wqs);
+ return sysfs_emit(buf, "%u\n", idxd->max_wqs);
}
static DEVICE_ATTR_RO(max_work_queues);
@@ -1409,7 +1347,7 @@ static ssize_t max_engines_show(struct device *dev,
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%u\n", idxd->max_engines);
+ return sysfs_emit(buf, "%u\n", idxd->max_engines);
}
static DEVICE_ATTR_RO(max_engines);
@@ -1419,7 +1357,7 @@ static ssize_t numa_node_show(struct device *dev,
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%d\n", dev_to_node(&idxd->pdev->dev));
+ return sysfs_emit(buf, "%d\n", dev_to_node(&idxd->pdev->dev));
}
static DEVICE_ATTR_RO(numa_node);
@@ -1429,7 +1367,7 @@ static ssize_t max_batch_size_show(struct device *dev,
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%u\n", idxd->max_batch_size);
+ return sysfs_emit(buf, "%u\n", idxd->max_batch_size);
}
static DEVICE_ATTR_RO(max_batch_size);
@@ -1440,7 +1378,7 @@ static ssize_t max_transfer_size_show(struct device *dev,
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%llu\n", idxd->max_xfer_bytes);
+ return sysfs_emit(buf, "%llu\n", idxd->max_xfer_bytes);
}
static DEVICE_ATTR_RO(max_transfer_size);
@@ -1449,8 +1387,14 @@ static ssize_t op_cap_show(struct device *dev,
{
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
+ int i, rc = 0;
+
+ for (i = 0; i < 4; i++)
+ rc += sysfs_emit_at(buf, rc, "%#llx ", idxd->hw.opcap.bits[i]);
- return sprintf(buf, "%#llx\n", idxd->hw.opcap.bits[0]);
+ rc--;
+ rc += sysfs_emit_at(buf, rc, "\n");
+ return rc;
}
static DEVICE_ATTR_RO(op_cap);
@@ -1460,7 +1404,7 @@ static ssize_t gen_cap_show(struct device *dev,
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%#llx\n", idxd->hw.gen_cap.bits);
+ return sysfs_emit(buf, "%#llx\n", idxd->hw.gen_cap.bits);
}
static DEVICE_ATTR_RO(gen_cap);
@@ -1470,8 +1414,7 @@ static ssize_t configurable_show(struct device *dev,
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%u\n",
- test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags));
+ return sysfs_emit(buf, "%u\n", test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags));
}
static DEVICE_ATTR_RO(configurable);
@@ -1485,13 +1428,13 @@ static ssize_t clients_show(struct device *dev,
spin_lock_irqsave(&idxd->dev_lock, flags);
for (i = 0; i < idxd->max_wqs; i++) {
- struct idxd_wq *wq = &idxd->wqs[i];
+ struct idxd_wq *wq = idxd->wqs[i];
count += wq->client_count;
}
spin_unlock_irqrestore(&idxd->dev_lock, flags);
- return sprintf(buf, "%d\n", count);
+ return sysfs_emit(buf, "%d\n", count);
}
static DEVICE_ATTR_RO(clients);
@@ -1501,7 +1444,7 @@ static ssize_t pasid_enabled_show(struct device *dev,
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%u\n", device_pasid_enabled(idxd));
+ return sysfs_emit(buf, "%u\n", device_pasid_enabled(idxd));
}
static DEVICE_ATTR_RO(pasid_enabled);
@@ -1514,14 +1457,14 @@ static ssize_t state_show(struct device *dev,
switch (idxd->state) {
case IDXD_DEV_DISABLED:
case IDXD_DEV_CONF_READY:
- return sprintf(buf, "disabled\n");
+ return sysfs_emit(buf, "disabled\n");
case IDXD_DEV_ENABLED:
- return sprintf(buf, "enabled\n");
+ return sysfs_emit(buf, "enabled\n");
case IDXD_DEV_HALTED:
- return sprintf(buf, "halted\n");
+ return sysfs_emit(buf, "halted\n");
}
- return sprintf(buf, "unknown\n");
+ return sysfs_emit(buf, "unknown\n");
}
static DEVICE_ATTR_RO(state);
@@ -1535,10 +1478,10 @@ static ssize_t errors_show(struct device *dev,
spin_lock_irqsave(&idxd->dev_lock, flags);
for (i = 0; i < 4; i++)
- out += sprintf(buf + out, "%#018llx ", idxd->sw_err.bits[i]);
+ out += sysfs_emit_at(buf, out, "%#018llx ", idxd->sw_err.bits[i]);
spin_unlock_irqrestore(&idxd->dev_lock, flags);
out--;
- out += sprintf(buf + out, "\n");
+ out += sysfs_emit_at(buf, out, "\n");
return out;
}
static DEVICE_ATTR_RO(errors);
@@ -1549,7 +1492,7 @@ static ssize_t max_tokens_show(struct device *dev,
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%u\n", idxd->max_tokens);
+ return sysfs_emit(buf, "%u\n", idxd->max_tokens);
}
static DEVICE_ATTR_RO(max_tokens);
@@ -1559,7 +1502,7 @@ static ssize_t token_limit_show(struct device *dev,
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%u\n", idxd->token_limit);
+ return sysfs_emit(buf, "%u\n", idxd->token_limit);
}
static ssize_t token_limit_store(struct device *dev,
@@ -1598,7 +1541,7 @@ static ssize_t cdev_major_show(struct device *dev,
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%u\n", idxd->major);
+ return sysfs_emit(buf, "%u\n", idxd->major);
}
static DEVICE_ATTR_RO(cdev_major);
@@ -1607,7 +1550,7 @@ static ssize_t cmd_status_show(struct device *dev,
{
struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%#x\n", idxd->cmd_status);
+ return sysfs_emit(buf, "%#x\n", idxd->cmd_status);
}
static DEVICE_ATTR_RO(cmd_status);
@@ -1643,183 +1586,161 @@ static const struct attribute_group *idxd_attribute_groups[] = {
NULL,
};
-static int idxd_setup_engine_sysfs(struct idxd_device *idxd)
+static void idxd_conf_device_release(struct device *dev)
{
- struct device *dev = &idxd->pdev->dev;
- int i, rc;
+ struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev);
+
+ kfree(idxd->groups);
+ kfree(idxd->wqs);
+ kfree(idxd->engines);
+ kfree(idxd->irq_entries);
+ kfree(idxd->int_handles);
+ ida_free(&idxd_ida, idxd->id);
+ kfree(idxd);
+}
+
+struct device_type dsa_device_type = {
+ .name = "dsa",
+ .release = idxd_conf_device_release,
+ .groups = idxd_attribute_groups,
+};
+
+struct device_type iax_device_type = {
+ .name = "iax",
+ .release = idxd_conf_device_release,
+ .groups = idxd_attribute_groups,
+};
+
+static int idxd_register_engine_devices(struct idxd_device *idxd)
+{
+ int i, j, rc;
for (i = 0; i < idxd->max_engines; i++) {
- struct idxd_engine *engine = &idxd->engines[i];
-
- engine->conf_dev.parent = &idxd->conf_dev;
- dev_set_name(&engine->conf_dev, "engine%d.%d",
- idxd->id, engine->id);
- engine->conf_dev.bus = idxd_get_bus_type(idxd);
- engine->conf_dev.groups = idxd_engine_attribute_groups;
- engine->conf_dev.type = &idxd_engine_device_type;
- dev_dbg(dev, "Engine device register: %s\n",
- dev_name(&engine->conf_dev));
- rc = device_register(&engine->conf_dev);
- if (rc < 0) {
- put_device(&engine->conf_dev);
+ struct idxd_engine *engine = idxd->engines[i];
+
+ rc = device_add(&engine->conf_dev);
+ if (rc < 0)
goto cleanup;
- }
}
return 0;
cleanup:
- while (i--) {
- struct idxd_engine *engine = &idxd->engines[i];
+ j = i - 1;
+ for (; i < idxd->max_engines; i++)
+ put_device(&idxd->engines[i]->conf_dev);
- device_unregister(&engine->conf_dev);
- }
+ while (j--)
+ device_unregister(&idxd->engines[j]->conf_dev);
return rc;
}
-static int idxd_setup_group_sysfs(struct idxd_device *idxd)
+static int idxd_register_group_devices(struct idxd_device *idxd)
{
- struct device *dev = &idxd->pdev->dev;
- int i, rc;
+ int i, j, rc;
for (i = 0; i < idxd->max_groups; i++) {
- struct idxd_group *group = &idxd->groups[i];
-
- group->conf_dev.parent = &idxd->conf_dev;
- dev_set_name(&group->conf_dev, "group%d.%d",
- idxd->id, group->id);
- group->conf_dev.bus = idxd_get_bus_type(idxd);
- group->conf_dev.groups = idxd_group_attribute_groups;
- group->conf_dev.type = &idxd_group_device_type;
- dev_dbg(dev, "Group device register: %s\n",
- dev_name(&group->conf_dev));
- rc = device_register(&group->conf_dev);
- if (rc < 0) {
- put_device(&group->conf_dev);
+ struct idxd_group *group = idxd->groups[i];
+
+ rc = device_add(&group->conf_dev);
+ if (rc < 0)
goto cleanup;
- }
}
return 0;
cleanup:
- while (i--) {
- struct idxd_group *group = &idxd->groups[i];
+ j = i - 1;
+ for (; i < idxd->max_groups; i++)
+ put_device(&idxd->groups[i]->conf_dev);
- device_unregister(&group->conf_dev);
- }
+ while (j--)
+ device_unregister(&idxd->groups[j]->conf_dev);
return rc;
}
-static int idxd_setup_wq_sysfs(struct idxd_device *idxd)
+static int idxd_register_wq_devices(struct idxd_device *idxd)
{
- struct device *dev = &idxd->pdev->dev;
- int i, rc;
+ int i, rc, j;
for (i = 0; i < idxd->max_wqs; i++) {
- struct idxd_wq *wq = &idxd->wqs[i];
-
- wq->conf_dev.parent = &idxd->conf_dev;
- dev_set_name(&wq->conf_dev, "wq%d.%d", idxd->id, wq->id);
- wq->conf_dev.bus = idxd_get_bus_type(idxd);
- wq->conf_dev.groups = idxd_wq_attribute_groups;
- wq->conf_dev.type = &idxd_wq_device_type;
- dev_dbg(dev, "WQ device register: %s\n",
- dev_name(&wq->conf_dev));
- rc = device_register(&wq->conf_dev);
- if (rc < 0) {
- put_device(&wq->conf_dev);
+ struct idxd_wq *wq = idxd->wqs[i];
+
+ rc = device_add(&wq->conf_dev);
+ if (rc < 0)
goto cleanup;
- }
}
return 0;
cleanup:
- while (i--) {
- struct idxd_wq *wq = &idxd->wqs[i];
+ j = i - 1;
+ for (; i < idxd->max_wqs; i++)
+ put_device(&idxd->wqs[i]->conf_dev);
- device_unregister(&wq->conf_dev);
- }
+ while (j--)
+ device_unregister(&idxd->wqs[j]->conf_dev);
return rc;
}
-static int idxd_setup_device_sysfs(struct idxd_device *idxd)
-{
- struct device *dev = &idxd->pdev->dev;
- int rc;
- char devname[IDXD_NAME_SIZE];
-
- sprintf(devname, "%s%d", idxd_get_dev_name(idxd), idxd->id);
- idxd->conf_dev.parent = dev;
- dev_set_name(&idxd->conf_dev, "%s", devname);
- idxd->conf_dev.bus = idxd_get_bus_type(idxd);
- idxd->conf_dev.groups = idxd_attribute_groups;
- idxd->conf_dev.type = idxd_get_device_type(idxd);
-
- dev_dbg(dev, "IDXD device register: %s\n", dev_name(&idxd->conf_dev));
- rc = device_register(&idxd->conf_dev);
- if (rc < 0) {
- put_device(&idxd->conf_dev);
- return rc;
- }
-
- return 0;
-}
-
-int idxd_setup_sysfs(struct idxd_device *idxd)
+int idxd_register_devices(struct idxd_device *idxd)
{
struct device *dev = &idxd->pdev->dev;
- int rc;
+ int rc, i;
- rc = idxd_setup_device_sysfs(idxd);
- if (rc < 0) {
- dev_dbg(dev, "Device sysfs registering failed: %d\n", rc);
+ rc = device_add(&idxd->conf_dev);
+ if (rc < 0)
return rc;
- }
- rc = idxd_setup_wq_sysfs(idxd);
+ rc = idxd_register_wq_devices(idxd);
if (rc < 0) {
- /* unregister conf dev */
- dev_dbg(dev, "Work Queue sysfs registering failed: %d\n", rc);
- return rc;
+ dev_dbg(dev, "WQ devices registering failed: %d\n", rc);
+ goto err_wq;
}
- rc = idxd_setup_group_sysfs(idxd);
+ rc = idxd_register_engine_devices(idxd);
if (rc < 0) {
- /* unregister conf dev */
- dev_dbg(dev, "Group sysfs registering failed: %d\n", rc);
- return rc;
+ dev_dbg(dev, "Engine devices registering failed: %d\n", rc);
+ goto err_engine;
}
- rc = idxd_setup_engine_sysfs(idxd);
+ rc = idxd_register_group_devices(idxd);
if (rc < 0) {
- /* unregister conf dev */
- dev_dbg(dev, "Engine sysfs registering failed: %d\n", rc);
- return rc;
+ dev_dbg(dev, "Group device registering failed: %d\n", rc);
+ goto err_group;
}
return 0;
+
+ err_group:
+ for (i = 0; i < idxd->max_engines; i++)
+ device_unregister(&idxd->engines[i]->conf_dev);
+ err_engine:
+ for (i = 0; i < idxd->max_wqs; i++)
+ device_unregister(&idxd->wqs[i]->conf_dev);
+ err_wq:
+ device_del(&idxd->conf_dev);
+ return rc;
}
-void idxd_cleanup_sysfs(struct idxd_device *idxd)
+void idxd_unregister_devices(struct idxd_device *idxd)
{
int i;
for (i = 0; i < idxd->max_wqs; i++) {
- struct idxd_wq *wq = &idxd->wqs[i];
+ struct idxd_wq *wq = idxd->wqs[i];
device_unregister(&wq->conf_dev);
}
for (i = 0; i < idxd->max_engines; i++) {
- struct idxd_engine *engine = &idxd->engines[i];
+ struct idxd_engine *engine = idxd->engines[i];
device_unregister(&engine->conf_dev);
}
for (i = 0; i < idxd->max_groups; i++) {
- struct idxd_group *group = &idxd->groups[i];
+ struct idxd_group *group = idxd->groups[i];
device_unregister(&group->conf_dev);
}
@@ -1829,26 +1750,10 @@ void idxd_cleanup_sysfs(struct idxd_device *idxd)
int idxd_register_bus_type(void)
{
- int i, rc;
-
- for (i = 0; i < IDXD_TYPE_MAX; i++) {
- rc = bus_register(idxd_bus_types[i]);
- if (rc < 0)
- goto bus_err;
- }
-
- return 0;
-
-bus_err:
- while (--i >= 0)
- bus_unregister(idxd_bus_types[i]);
- return rc;
+ return bus_register(&dsa_bus_type);
}
void idxd_unregister_bus_type(void)
{
- int i;
-
- for (i = 0; i < IDXD_TYPE_MAX; i++)
- bus_unregister(idxd_bus_types[i]);
+ bus_unregister(&dsa_bus_type);
}