diff options
Diffstat (limited to 'drivers/dma')
-rw-r--r-- | drivers/dma/at_xdmac.c | 8 | ||||
-rw-r--r-- | drivers/dma/dma-jz4780.c | 118 | ||||
-rw-r--r-- | drivers/dma/dmaengine.c | 7 | ||||
-rw-r--r-- | drivers/dma/idxd/device.c | 38 | ||||
-rw-r--r-- | drivers/dma/idxd/dma.c | 28 | ||||
-rw-r--r-- | drivers/dma/idxd/idxd.h | 30 | ||||
-rw-r--r-- | drivers/dma/idxd/init.c | 101 | ||||
-rw-r--r-- | drivers/dma/idxd/irq.c | 234 | ||||
-rw-r--r-- | drivers/dma/idxd/registers.h | 1 | ||||
-rw-r--r-- | drivers/dma/idxd/submit.c | 63 | ||||
-rw-r--r-- | drivers/dma/idxd/sysfs.c | 37 | ||||
-rw-r--r-- | drivers/dma/ppc4xx/adma.c | 3 | ||||
-rw-r--r-- | drivers/dma/qcom/gpi.c | 4 | ||||
-rw-r--r-- | drivers/dma/sh/shdma-base.c | 6 | ||||
-rw-r--r-- | drivers/dma/stm32-mdma.c | 76 | ||||
-rw-r--r-- | drivers/dma/ti/Makefile | 3 | ||||
-rw-r--r-- | drivers/dma/ti/edma.c | 3 | ||||
-rw-r--r-- | drivers/dma/ti/k3-psil-j721s2.c | 167 | ||||
-rw-r--r-- | drivers/dma/ti/k3-psil-priv.h | 1 | ||||
-rw-r--r-- | drivers/dma/ti/k3-psil.c | 1 | ||||
-rw-r--r-- | drivers/dma/ti/k3-udma.c | 1 | ||||
-rw-r--r-- | drivers/dma/xilinx/xilinx_dma.c | 133 |
22 files changed, 852 insertions, 211 deletions
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 275a76f188ae..e42dede5b243 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -2031,7 +2031,7 @@ static int __maybe_unused atmel_xdmac_resume(struct device *dev) static int at_xdmac_probe(struct platform_device *pdev) { struct at_xdmac *atxdmac; - int irq, size, nr_channels, i, ret; + int irq, nr_channels, i, ret; void __iomem *base; u32 reg; @@ -2056,9 +2056,9 @@ static int at_xdmac_probe(struct platform_device *pdev) return -EINVAL; } - size = sizeof(*atxdmac); - size += nr_channels * sizeof(struct at_xdmac_chan); - atxdmac = devm_kzalloc(&pdev->dev, size, GFP_KERNEL); + atxdmac = devm_kzalloc(&pdev->dev, + struct_size(atxdmac, chan, nr_channels), + GFP_KERNEL); if (!atxdmac) { dev_err(&pdev->dev, "can't allocate at_xdmac structure\n"); return -ENOMEM; diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c index 96701dedcac8..fc513eb2b289 100644 --- a/drivers/dma/dma-jz4780.c +++ b/drivers/dma/dma-jz4780.c @@ -104,10 +104,10 @@ * descriptor base address in the upper 8 bits. */ struct jz4780_dma_hwdesc { - uint32_t dcm; - uint32_t dsa; - uint32_t dta; - uint32_t dtc; + u32 dcm; + u32 dsa; + u32 dta; + u32 dtc; }; /* Size of allocations for hardware descriptor blocks. */ @@ -122,7 +122,8 @@ struct jz4780_dma_desc { dma_addr_t desc_phys; unsigned int count; enum dma_transaction_type type; - uint32_t status; + u32 transfer_type; + u32 status; }; struct jz4780_dma_chan { @@ -130,8 +131,8 @@ struct jz4780_dma_chan { unsigned int id; struct dma_pool *desc_pool; - uint32_t transfer_type; - uint32_t transfer_shift; + u32 transfer_type_tx, transfer_type_rx; + u32 transfer_shift; struct dma_slave_config config; struct jz4780_dma_desc *desc; @@ -152,12 +153,12 @@ struct jz4780_dma_dev { unsigned int irq; const struct jz4780_dma_soc_data *soc_data; - uint32_t chan_reserved; + u32 chan_reserved; struct jz4780_dma_chan chan[]; }; struct jz4780_dma_filter_data { - uint32_t transfer_type; + u32 transfer_type_tx, transfer_type_rx; int channel; }; @@ -179,26 +180,26 @@ static inline struct jz4780_dma_dev *jz4780_dma_chan_parent( dma_device); } -static inline uint32_t jz4780_dma_chn_readl(struct jz4780_dma_dev *jzdma, +static inline u32 jz4780_dma_chn_readl(struct jz4780_dma_dev *jzdma, unsigned int chn, unsigned int reg) { return readl(jzdma->chn_base + reg + JZ_DMA_REG_CHAN(chn)); } static inline void jz4780_dma_chn_writel(struct jz4780_dma_dev *jzdma, - unsigned int chn, unsigned int reg, uint32_t val) + unsigned int chn, unsigned int reg, u32 val) { writel(val, jzdma->chn_base + reg + JZ_DMA_REG_CHAN(chn)); } -static inline uint32_t jz4780_dma_ctrl_readl(struct jz4780_dma_dev *jzdma, +static inline u32 jz4780_dma_ctrl_readl(struct jz4780_dma_dev *jzdma, unsigned int reg) { return readl(jzdma->ctrl_base + reg); } static inline void jz4780_dma_ctrl_writel(struct jz4780_dma_dev *jzdma, - unsigned int reg, uint32_t val) + unsigned int reg, u32 val) { writel(val, jzdma->ctrl_base + reg); } @@ -226,9 +227,10 @@ static inline void jz4780_dma_chan_disable(struct jz4780_dma_dev *jzdma, jz4780_dma_ctrl_writel(jzdma, JZ_DMA_REG_DCKEC, BIT(chn)); } -static struct jz4780_dma_desc *jz4780_dma_desc_alloc( - struct jz4780_dma_chan *jzchan, unsigned int count, - enum dma_transaction_type type) +static struct jz4780_dma_desc * +jz4780_dma_desc_alloc(struct jz4780_dma_chan *jzchan, unsigned int count, + enum dma_transaction_type type, + enum dma_transfer_direction direction) { struct jz4780_dma_desc *desc; @@ -248,6 +250,12 @@ static struct jz4780_dma_desc *jz4780_dma_desc_alloc( desc->count = count; desc->type = type; + + if (direction == DMA_DEV_TO_MEM) + desc->transfer_type = jzchan->transfer_type_rx; + else + desc->transfer_type = jzchan->transfer_type_tx; + return desc; } @@ -260,8 +268,8 @@ static void jz4780_dma_desc_free(struct virt_dma_desc *vdesc) kfree(desc); } -static uint32_t jz4780_dma_transfer_size(struct jz4780_dma_chan *jzchan, - unsigned long val, uint32_t *shift) +static u32 jz4780_dma_transfer_size(struct jz4780_dma_chan *jzchan, + unsigned long val, u32 *shift) { struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan); int ord = ffs(val) - 1; @@ -303,7 +311,7 @@ static int jz4780_dma_setup_hwdesc(struct jz4780_dma_chan *jzchan, enum dma_transfer_direction direction) { struct dma_slave_config *config = &jzchan->config; - uint32_t width, maxburst, tsz; + u32 width, maxburst, tsz; if (direction == DMA_MEM_TO_DEV) { desc->dcm = JZ_DMA_DCM_SAI; @@ -361,7 +369,7 @@ static struct dma_async_tx_descriptor *jz4780_dma_prep_slave_sg( unsigned int i; int err; - desc = jz4780_dma_desc_alloc(jzchan, sg_len, DMA_SLAVE); + desc = jz4780_dma_desc_alloc(jzchan, sg_len, DMA_SLAVE, direction); if (!desc) return NULL; @@ -410,7 +418,7 @@ static struct dma_async_tx_descriptor *jz4780_dma_prep_dma_cyclic( periods = buf_len / period_len; - desc = jz4780_dma_desc_alloc(jzchan, periods, DMA_CYCLIC); + desc = jz4780_dma_desc_alloc(jzchan, periods, DMA_CYCLIC, direction); if (!desc) return NULL; @@ -453,16 +461,16 @@ static struct dma_async_tx_descriptor *jz4780_dma_prep_dma_memcpy( { struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); struct jz4780_dma_desc *desc; - uint32_t tsz; + u32 tsz; - desc = jz4780_dma_desc_alloc(jzchan, 1, DMA_MEMCPY); + desc = jz4780_dma_desc_alloc(jzchan, 1, DMA_MEMCPY, 0); if (!desc) return NULL; tsz = jz4780_dma_transfer_size(jzchan, dest | src | len, &jzchan->transfer_shift); - jzchan->transfer_type = JZ_DMA_DRT_AUTO; + desc->transfer_type = JZ_DMA_DRT_AUTO; desc->desc[0].dsa = src; desc->desc[0].dta = dest; @@ -528,7 +536,7 @@ static void jz4780_dma_begin(struct jz4780_dma_chan *jzchan) /* Set transfer type. */ jz4780_dma_chn_writel(jzdma, jzchan->id, JZ_DMA_REG_DRT, - jzchan->transfer_type); + jzchan->desc->transfer_type); /* * Set the transfer count. This is redundant for a descriptor-driven @@ -670,7 +678,7 @@ static bool jz4780_dma_chan_irq(struct jz4780_dma_dev *jzdma, { const unsigned int soc_flags = jzdma->soc_data->flags; struct jz4780_dma_desc *desc = jzchan->desc; - uint32_t dcs; + u32 dcs; bool ack = true; spin_lock(&jzchan->vchan.lock); @@ -727,7 +735,7 @@ static irqreturn_t jz4780_dma_irq_handler(int irq, void *data) struct jz4780_dma_dev *jzdma = data; unsigned int nb_channels = jzdma->soc_data->nb_channels; unsigned long pending; - uint32_t dmac; + u32 dmac; int i; pending = jz4780_dma_ctrl_readl(jzdma, JZ_DMA_REG_DIRQP); @@ -788,7 +796,8 @@ static bool jz4780_dma_filter_fn(struct dma_chan *chan, void *param) return false; } - jzchan->transfer_type = data->transfer_type; + jzchan->transfer_type_tx = data->transfer_type_tx; + jzchan->transfer_type_rx = data->transfer_type_rx; return true; } @@ -800,11 +809,17 @@ static struct dma_chan *jz4780_of_dma_xlate(struct of_phandle_args *dma_spec, dma_cap_mask_t mask = jzdma->dma_device.cap_mask; struct jz4780_dma_filter_data data; - if (dma_spec->args_count != 2) + if (dma_spec->args_count == 2) { + data.transfer_type_tx = dma_spec->args[0]; + data.transfer_type_rx = dma_spec->args[0]; + data.channel = dma_spec->args[1]; + } else if (dma_spec->args_count == 3) { + data.transfer_type_tx = dma_spec->args[0]; + data.transfer_type_rx = dma_spec->args[1]; + data.channel = dma_spec->args[2]; + } else { return NULL; - - data.transfer_type = dma_spec->args[0]; - data.channel = dma_spec->args[1]; + } if (data.channel > -1) { if (data.channel >= jzdma->soc_data->nb_channels) { @@ -822,7 +837,8 @@ static struct dma_chan *jz4780_of_dma_xlate(struct of_phandle_args *dma_spec, return NULL; } - jzdma->chan[data.channel].transfer_type = data.transfer_type; + jzdma->chan[data.channel].transfer_type_tx = data.transfer_type_tx; + jzdma->chan[data.channel].transfer_type_rx = data.transfer_type_rx; return dma_get_slave_channel( &jzdma->chan[data.channel].vchan.chan); @@ -938,6 +954,14 @@ static int jz4780_dma_probe(struct platform_device *pdev) jzchan->vchan.desc_free = jz4780_dma_desc_free; } + /* + * On JZ4760, chan0 won't enable properly the first time. + * Enabling then disabling chan1 will magically make chan0 work + * correctly. + */ + jz4780_dma_chan_enable(jzdma, 1); + jz4780_dma_chan_disable(jzdma, 1); + ret = platform_get_irq(pdev, 0); if (ret < 0) goto err_disable_clk; @@ -1011,12 +1035,36 @@ static const struct jz4780_dma_soc_data jz4760_dma_soc_data = { .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC, }; +static const struct jz4780_dma_soc_data jz4760_mdma_soc_data = { + .nb_channels = 2, + .transfer_ord_max = 6, + .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC, +}; + +static const struct jz4780_dma_soc_data jz4760_bdma_soc_data = { + .nb_channels = 3, + .transfer_ord_max = 6, + .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC, +}; + static const struct jz4780_dma_soc_data jz4760b_dma_soc_data = { .nb_channels = 5, .transfer_ord_max = 6, .flags = JZ_SOC_DATA_PER_CHAN_PM, }; +static const struct jz4780_dma_soc_data jz4760b_mdma_soc_data = { + .nb_channels = 2, + .transfer_ord_max = 6, + .flags = JZ_SOC_DATA_PER_CHAN_PM, +}; + +static const struct jz4780_dma_soc_data jz4760b_bdma_soc_data = { + .nb_channels = 3, + .transfer_ord_max = 6, + .flags = JZ_SOC_DATA_PER_CHAN_PM, +}; + static const struct jz4780_dma_soc_data jz4770_dma_soc_data = { .nb_channels = 6, .transfer_ord_max = 6, @@ -1045,7 +1093,11 @@ static const struct of_device_id jz4780_dma_dt_match[] = { { .compatible = "ingenic,jz4740-dma", .data = &jz4740_dma_soc_data }, { .compatible = "ingenic,jz4725b-dma", .data = &jz4725b_dma_soc_data }, { .compatible = "ingenic,jz4760-dma", .data = &jz4760_dma_soc_data }, + { .compatible = "ingenic,jz4760-mdma", .data = &jz4760_mdma_soc_data }, + { .compatible = "ingenic,jz4760-bdma", .data = &jz4760_bdma_soc_data }, { .compatible = "ingenic,jz4760b-dma", .data = &jz4760b_dma_soc_data }, + { .compatible = "ingenic,jz4760b-mdma", .data = &jz4760b_mdma_soc_data }, + { .compatible = "ingenic,jz4760b-bdma", .data = &jz4760b_bdma_soc_data }, { .compatible = "ingenic,jz4770-dma", .data = &jz4770_dma_soc_data }, { .compatible = "ingenic,jz4780-dma", .data = &jz4780_dma_soc_data }, { .compatible = "ingenic,x1000-dma", .data = &x1000_dma_soc_data }, diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index d9f7c097cfd6..2cfa8458b51b 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -1159,6 +1159,13 @@ int dma_async_device_register(struct dma_device *device) return -EIO; } + if (dma_has_cap(DMA_MEMCPY_SG, device->cap_mask) && !device->device_prep_dma_memcpy_sg) { + dev_err(device->dev, + "Device claims capability %s, but op is not defined\n", + "DMA_MEMCPY_SG"); + return -EIO; + } + if (dma_has_cap(DMA_XOR, device->cap_mask) && !device->device_prep_dma_xor) { dev_err(device->dev, "Device claims capability %s, but op is not defined\n", diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index fab412349f7f..5a50ee6f6881 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -387,9 +387,12 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq) wq->threshold = 0; wq->priority = 0; wq->ats_dis = 0; + wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES; clear_bit(WQ_FLAG_DEDICATED, &wq->flags); clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags); memset(wq->name, 0, WQ_NAME_SIZE); + wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER; + wq->max_batch_size = WQ_DEFAULT_MAX_BATCH; } static void idxd_wq_ref_release(struct percpu_ref *ref) @@ -404,19 +407,31 @@ int idxd_wq_init_percpu_ref(struct idxd_wq *wq) int rc; memset(&wq->wq_active, 0, sizeof(wq->wq_active)); - rc = percpu_ref_init(&wq->wq_active, idxd_wq_ref_release, 0, GFP_KERNEL); + rc = percpu_ref_init(&wq->wq_active, idxd_wq_ref_release, + PERCPU_REF_ALLOW_REINIT, GFP_KERNEL); if (rc < 0) return rc; reinit_completion(&wq->wq_dead); + reinit_completion(&wq->wq_resurrect); return 0; } -void idxd_wq_quiesce(struct idxd_wq *wq) +void __idxd_wq_quiesce(struct idxd_wq *wq) { + lockdep_assert_held(&wq->wq_lock); + reinit_completion(&wq->wq_resurrect); percpu_ref_kill(&wq->wq_active); + complete_all(&wq->wq_resurrect); wait_for_completion(&wq->wq_dead); } +void idxd_wq_quiesce(struct idxd_wq *wq) +{ + mutex_lock(&wq->wq_lock); + __idxd_wq_quiesce(wq); + mutex_unlock(&wq->wq_lock); +} + /* Device control bits */ static inline bool idxd_is_enabled(struct idxd_device *idxd) { @@ -827,15 +842,12 @@ static int idxd_wq_config_write(struct idxd_wq *wq) wq->wqcfg->bits[i] = ioread32(idxd->reg_base + wq_offset); } + if (wq->size == 0 && wq->type != IDXD_WQT_NONE) + wq->size = WQ_DEFAULT_QUEUE_DEPTH; + /* byte 0-3 */ wq->wqcfg->wq_size = wq->size; - if (wq->size == 0) { - idxd->cmd_status = IDXD_SCMD_WQ_NO_SIZE; - dev_warn(dev, "Incorrect work queue size: 0\n"); - return -EINVAL; - } - /* bytes 4-7 */ wq->wqcfg->wq_thresh = wq->threshold; @@ -981,8 +993,6 @@ static int idxd_wqs_setup(struct idxd_device *idxd) if (!wq->group) continue; - if (!wq->size) - continue; if (wq_shared(wq) && !device_swq_supported(idxd)) { idxd->cmd_status = IDXD_SCMD_WQ_NO_SWQ_SUPPORT; @@ -1206,6 +1216,13 @@ int __drv_enable_wq(struct idxd_wq *wq) goto err; } + /* + * Device has 1 misc interrupt and N interrupts for descriptor completion. To + * assign WQ to interrupt, we will take the N+1 interrupt since vector 0 is + * for the misc interrupt. + */ + wq->ie = &idxd->irq_entries[wq->id + 1]; + rc = idxd_wq_enable(wq); if (rc < 0) { dev_dbg(dev, "wq %d enabling failed: %d\n", wq->id, rc); @@ -1256,6 +1273,7 @@ void __drv_disable_wq(struct idxd_wq *wq) idxd_wq_drain(wq); idxd_wq_reset(wq); + wq->ie = NULL; wq->client_count = 0; } diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index c39e9483206a..2ce873994e33 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -21,20 +21,27 @@ static inline struct idxd_wq *to_idxd_wq(struct dma_chan *c) } void idxd_dma_complete_txd(struct idxd_desc *desc, - enum idxd_complete_type comp_type) + enum idxd_complete_type comp_type, + bool free_desc) { + struct idxd_device *idxd = desc->wq->idxd; struct dma_async_tx_descriptor *tx; struct dmaengine_result res; int complete = 1; - if (desc->completion->status == DSA_COMP_SUCCESS) + if (desc->completion->status == DSA_COMP_SUCCESS) { res.result = DMA_TRANS_NOERROR; - else if (desc->completion->status) + } else if (desc->completion->status) { + if (idxd->request_int_handles && comp_type != IDXD_COMPLETE_ABORT && + desc->completion->status == DSA_COMP_INT_HANDLE_INVAL && + idxd_queue_int_handle_resubmit(desc)) + return; res.result = DMA_TRANS_WRITE_FAILED; - else if (comp_type == IDXD_COMPLETE_ABORT) + } else if (comp_type == IDXD_COMPLETE_ABORT) { res.result = DMA_TRANS_ABORTED; - else + } else { complete = 0; + } tx = &desc->txd; if (complete && tx->cookie) { @@ -44,6 +51,9 @@ void idxd_dma_complete_txd(struct idxd_desc *desc, tx->callback = NULL; tx->callback_result = NULL; } + + if (free_desc) + idxd_free_desc(desc->wq, desc); } static void op_flag_setup(unsigned long flags, u32 *desc_flags) @@ -153,8 +163,10 @@ static dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx) cookie = dma_cookie_assign(tx); rc = idxd_submit_desc(wq, desc); - if (rc < 0) + if (rc < 0) { + idxd_free_desc(wq, desc); return rc; + } return cookie; } @@ -310,7 +322,7 @@ static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev) return 0; err_dma: - idxd_wq_quiesce(wq); + __idxd_wq_quiesce(wq); percpu_ref_exit(&wq->wq_active); err_ref: idxd_wq_free_resources(wq); @@ -327,7 +339,7 @@ static void idxd_dmaengine_drv_remove(struct idxd_dev *idxd_dev) struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); mutex_lock(&wq->wq_lock); - idxd_wq_quiesce(wq); + __idxd_wq_quiesce(wq); idxd_unregister_dma_channel(wq); idxd_wq_free_resources(wq); __drv_disable_wq(wq); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 0cf8d3145870..6b9bfdc557fe 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -10,6 +10,7 @@ #include <linux/cdev.h> #include <linux/idr.h> #include <linux/pci.h> +#include <linux/ioasid.h> #include <linux/perf_event.h> #include <uapi/linux/idxd.h> #include "registers.h" @@ -51,6 +52,9 @@ enum idxd_type { #define IDXD_NAME_SIZE 128 #define IDXD_PMU_EVENT_MAX 64 +#define IDXD_ENQCMDS_RETRIES 32 +#define IDXD_ENQCMDS_MAX_RETRIES 64 + struct idxd_device_driver { const char *name; enum idxd_dev_type *type; @@ -64,6 +68,7 @@ extern struct idxd_device_driver idxd_drv; extern struct idxd_device_driver idxd_dmaengine_drv; extern struct idxd_device_driver idxd_user_drv; +#define INVALID_INT_HANDLE -1 struct idxd_irq_entry { struct idxd_device *idxd; int id; @@ -75,6 +80,9 @@ struct idxd_irq_entry { * and irq thread processing error descriptor. */ spinlock_t list_lock; + int int_handle; + struct idxd_wq *wq; + ioasid_t pasid; }; struct idxd_group { @@ -145,6 +153,10 @@ struct idxd_cdev { #define WQ_NAME_SIZE 1024 #define WQ_TYPE_SIZE 10 +#define WQ_DEFAULT_QUEUE_DEPTH 16 +#define WQ_DEFAULT_MAX_XFER SZ_2M +#define WQ_DEFAULT_MAX_BATCH 32 + enum idxd_op_type { IDXD_OP_BLOCK = 0, IDXD_OP_NONBLOCK = 1, @@ -164,13 +176,16 @@ struct idxd_dma_chan { struct idxd_wq { void __iomem *portal; u32 portal_offset; + unsigned int enqcmds_retries; struct percpu_ref wq_active; struct completion wq_dead; + struct completion wq_resurrect; struct idxd_dev idxd_dev; struct idxd_cdev *idxd_cdev; struct wait_queue_head err_queue; struct idxd_device *idxd; int id; + struct idxd_irq_entry *ie; enum idxd_wq_type type; struct idxd_group *group; int client_count; @@ -266,6 +281,8 @@ struct idxd_device { unsigned int pasid; int num_groups; + int irq_cnt; + bool request_int_handles; u32 msix_perm_offset; u32 wqcfg_offset; @@ -292,8 +309,6 @@ struct idxd_device { struct workqueue_struct *wq; struct work_struct work; - int *int_handles; - struct idxd_pmu *idxd_pmu; }; @@ -518,6 +533,7 @@ void idxd_unregister_devices(struct idxd_device *idxd); int idxd_register_driver(void); void idxd_unregister_driver(void); void idxd_wqs_quiesce(struct idxd_device *idxd); +bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc); /* device interrupt control */ void idxd_msix_perm_setup(struct idxd_device *idxd); @@ -564,6 +580,7 @@ int idxd_wq_map_portal(struct idxd_wq *wq); void idxd_wq_unmap_portal(struct idxd_wq *wq); int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid); int idxd_wq_disable_pasid(struct idxd_wq *wq); +void __idxd_wq_quiesce(struct idxd_wq *wq); void idxd_wq_quiesce(struct idxd_wq *wq); int idxd_wq_init_percpu_ref(struct idxd_wq *wq); @@ -571,6 +588,7 @@ int idxd_wq_init_percpu_ref(struct idxd_wq *wq); int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc); struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype); void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc); +int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc); /* dmaengine */ int idxd_register_dma_device(struct idxd_device *idxd); @@ -579,7 +597,7 @@ int idxd_register_dma_channel(struct idxd_wq *wq); void idxd_unregister_dma_channel(struct idxd_wq *wq); void idxd_parse_completion_status(u8 status, enum dmaengine_tx_result *res); void idxd_dma_complete_txd(struct idxd_desc *desc, - enum idxd_complete_type comp_type); + enum idxd_complete_type comp_type, bool free_desc); /* cdev */ int idxd_cdev_register(void); @@ -603,10 +621,4 @@ static inline void perfmon_init(void) {} static inline void perfmon_exit(void) {} #endif -static inline void complete_desc(struct idxd_desc *desc, enum idxd_complete_type reason) -{ - idxd_dma_complete_txd(desc, reason); - idxd_free_desc(desc->wq, desc); -} - #endif diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 7bf03f371ce1..8b3afce9ea67 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -81,6 +81,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) dev_err(dev, "Not MSI-X interrupt capable.\n"); return -ENOSPC; } + idxd->irq_cnt = msixcnt; rc = pci_alloc_irq_vectors(pdev, msixcnt, msixcnt, PCI_IRQ_MSIX); if (rc != msixcnt) { @@ -103,7 +104,18 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) for (i = 0; i < msixcnt; i++) { idxd->irq_entries[i].id = i; idxd->irq_entries[i].idxd = idxd; + /* + * Association of WQ should be assigned starting with irq_entry 1. + * irq_entry 0 is for misc interrupts and has no wq association + */ + if (i > 0) + idxd->irq_entries[i].wq = idxd->wqs[i - 1]; idxd->irq_entries[i].vector = pci_irq_vector(pdev, i); + idxd->irq_entries[i].int_handle = INVALID_INT_HANDLE; + if (device_pasid_enabled(idxd) && i > 0) + idxd->irq_entries[i].pasid = idxd->pasid; + else + idxd->irq_entries[i].pasid = INVALID_IOASID; spin_lock_init(&idxd->irq_entries[i].list_lock); } @@ -135,22 +147,14 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) } dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n", i, irq_entry->vector); - if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) { - /* - * The MSIX vector enumeration starts at 1 with vector 0 being the - * misc interrupt that handles non I/O completion events. The - * interrupt handles are for IMS enumeration on guest. The misc - * interrupt vector does not require a handle and therefore we start - * the int_handles at index 0. Since 'i' starts at 1, the first - * int_handles index will be 0. - */ - rc = idxd_device_request_int_handle(idxd, i, &idxd->int_handles[i - 1], + if (idxd->request_int_handles) { + rc = idxd_device_request_int_handle(idxd, i, &irq_entry->int_handle, IDXD_IRQ_MSIX); if (rc < 0) { free_irq(irq_entry->vector, irq_entry); goto err_wq_irqs; } - dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i - 1]); + dev_dbg(dev, "int handle requested: %u\n", irq_entry->int_handle); } } @@ -161,9 +165,15 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) while (--i >= 0) { irq_entry = &idxd->irq_entries[i]; free_irq(irq_entry->vector, irq_entry); - if (i != 0) - idxd_device_release_int_handle(idxd, - idxd->int_handles[i], IDXD_IRQ_MSIX); + if (irq_entry->int_handle != INVALID_INT_HANDLE) { + idxd_device_release_int_handle(idxd, irq_entry->int_handle, + IDXD_IRQ_MSIX); + irq_entry->int_handle = INVALID_INT_HANDLE; + irq_entry->pasid = INVALID_IOASID; + } + irq_entry->vector = -1; + irq_entry->wq = NULL; + irq_entry->idxd = NULL; } err_misc_irq: /* Disable error interrupt generation */ @@ -179,21 +189,19 @@ static void idxd_cleanup_interrupts(struct idxd_device *idxd) { struct pci_dev *pdev = idxd->pdev; struct idxd_irq_entry *irq_entry; - int i, msixcnt; - - msixcnt = pci_msix_vec_count(pdev); - if (msixcnt <= 0) - return; - - irq_entry = &idxd->irq_entries[0]; - free_irq(irq_entry->vector, irq_entry); - - for (i = 1; i < msixcnt; i++) { + int i; + for (i = 0; i < idxd->irq_cnt; i++) { irq_entry = &idxd->irq_entries[i]; - if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)) - idxd_device_release_int_handle(idxd, idxd->int_handles[i], + if (irq_entry->int_handle != INVALID_INT_HANDLE) { + idxd_device_release_int_handle(idxd, irq_entry->int_handle, IDXD_IRQ_MSIX); + irq_entry->int_handle = INVALID_INT_HANDLE; + irq_entry->pasid = INVALID_IOASID; + } + irq_entry->vector = -1; + irq_entry->wq = NULL; + irq_entry->idxd = NULL; free_irq(irq_entry->vector, irq_entry); } @@ -237,8 +245,10 @@ static int idxd_setup_wqs(struct idxd_device *idxd) mutex_init(&wq->wq_lock); init_waitqueue_head(&wq->err_queue); init_completion(&wq->wq_dead); - wq->max_xfer_bytes = idxd->max_xfer_bytes; - wq->max_batch_size = idxd->max_batch_size; + init_completion(&wq->wq_resurrect); + wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER; + wq->max_batch_size = WQ_DEFAULT_MAX_BATCH; + wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES; wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev)); if (!wq->wqcfg) { put_device(conf_dev); @@ -379,13 +389,6 @@ static int idxd_setup_internals(struct idxd_device *idxd) init_waitqueue_head(&idxd->cmd_waitq); - if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) { - idxd->int_handles = kcalloc_node(idxd->max_wqs, sizeof(int), GFP_KERNEL, - dev_to_node(dev)); - if (!idxd->int_handles) - return -ENOMEM; - } - rc = idxd_setup_wqs(idxd); if (rc < 0) goto err_wqs; @@ -416,7 +419,6 @@ static int idxd_setup_internals(struct idxd_device *idxd) for (i = 0; i < idxd->max_wqs; i++) put_device(wq_confdev(idxd->wqs[i])); err_wqs: - kfree(idxd->int_handles); return rc; } @@ -451,6 +453,10 @@ static void idxd_read_caps(struct idxd_device *idxd) dev_dbg(dev, "cmd_cap: %#x\n", idxd->hw.cmd_cap); } + /* reading command capabilities */ + if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) + idxd->request_int_handles = true; + idxd->max_xfer_bytes = 1ULL << idxd->hw.gen_cap.max_xfer_shift; dev_dbg(dev, "max xfer size: %llu bytes\n", idxd->max_xfer_bytes); idxd->max_batch_size = 1U << idxd->hw.gen_cap.max_batch_shift; @@ -717,10 +723,8 @@ static void idxd_flush_pending_llist(struct idxd_irq_entry *ie) if (!head) return; - llist_for_each_entry_safe(desc, itr, head, llnode) { - idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT); - idxd_free_desc(desc->wq, desc); - } + llist_for_each_entry_safe(desc, itr, head, llnode) + idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); } static void idxd_flush_work_list(struct idxd_irq_entry *ie) @@ -729,8 +733,7 @@ static void idxd_flush_work_list(struct idxd_irq_entry *ie) list_for_each_entry_safe(desc, iter, &ie->work_list, list) { list_del(&desc->list); - idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT); - idxd_free_desc(desc->wq, desc); + idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); } } @@ -751,15 +754,15 @@ static void idxd_release_int_handles(struct idxd_device *idxd) struct device *dev = &idxd->pdev->dev; int i, rc; - for (i = 0; i < idxd->num_wq_irqs; i++) { - if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)) { - rc = idxd_device_release_int_handle(idxd, idxd->int_handles[i], - IDXD_IRQ_MSIX); + for (i = 1; i < idxd->irq_cnt; i++) { + struct idxd_irq_entry *ie = &idxd->irq_entries[i]; + + if (ie->int_handle != INVALID_INT_HANDLE) { + rc = idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX); if (rc < 0) - dev_warn(dev, "irq handle %d release failed\n", - idxd->int_handles[i]); + dev_warn(dev, "irq handle %d release failed\n", ie->int_handle); else - dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i]); + dev_dbg(dev, "int handle released: %u\n", ie->int_handle); } } } diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 17f2f8a31b63..0b0055a0ad2a 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -6,6 +6,7 @@ #include <linux/pci.h> #include <linux/io-64-nonatomic-lo-hi.h> #include <linux/dmaengine.h> +#include <linux/delay.h> #include <uapi/linux/idxd.h> #include "../dmaengine.h" #include "idxd.h" @@ -22,6 +23,16 @@ struct idxd_fault { struct idxd_device *idxd; }; +struct idxd_resubmit { + struct work_struct work; + struct idxd_desc *desc; +}; + +struct idxd_int_handle_revoke { + struct work_struct work; + struct idxd_device *idxd; +}; + static void idxd_device_reinit(struct work_struct *work) { struct idxd_device *idxd = container_of(work, struct idxd_device, work); @@ -55,6 +66,159 @@ static void idxd_device_reinit(struct work_struct *work) idxd_device_clear_state(idxd); } +/* + * The function sends a drain descriptor for the interrupt handle. The drain ensures + * all descriptors with this interrupt handle is flushed and the interrupt + * will allow the cleanup of the outstanding descriptors. + */ +static void idxd_int_handle_revoke_drain(struct idxd_irq_entry *ie) +{ + struct idxd_wq *wq = ie->wq; + struct idxd_device *idxd = ie->idxd; + struct device *dev = &idxd->pdev->dev; + struct dsa_hw_desc desc = {}; + void __iomem *portal; + int rc; + + /* Issue a simple drain operation with interrupt but no completion record */ + desc.flags = IDXD_OP_FLAG_RCI; + desc.opcode = DSA_OPCODE_DRAIN; + desc.priv = 1; + + if (ie->pasid != INVALID_IOASID) + desc.pasid = ie->pasid; + desc.int_handle = ie->int_handle; + portal = idxd_wq_portal_addr(wq); + + /* + * The wmb() makes sure that the descriptor is all there before we + * issue. + */ + wmb(); + if (wq_dedicated(wq)) { + iosubmit_cmds512(portal, &desc, 1); + } else { + rc = idxd_enqcmds(wq, portal, &desc); + /* This should not fail unless hardware failed. */ + if (rc < 0) + dev_warn(dev, "Failed to submit drain desc on wq %d\n", wq->id); + } +} + +static void idxd_abort_invalid_int_handle_descs(struct idxd_irq_entry *ie) +{ + LIST_HEAD(flist); + struct idxd_desc *d, *t; + struct llist_node *head; + + spin_lock(&ie->list_lock); + head = llist_del_all(&ie->pending_llist); + if (head) { + llist_for_each_entry_safe(d, t, head, llnode) + list_add_tail(&d->list, &ie->work_list); + } + + list_for_each_entry_safe(d, t, &ie->work_list, list) { + if (d->completion->status == DSA_COMP_INT_HANDLE_INVAL) + list_move_tail(&d->list, &flist); + } + spin_unlock(&ie->list_lock); + + list_for_each_entry_safe(d, t, &flist, list) { + list_del(&d->list); + idxd_dma_complete_txd(d, IDXD_COMPLETE_ABORT, true); + } +} + +static void idxd_int_handle_revoke(struct work_struct *work) +{ + struct idxd_int_handle_revoke *revoke = + container_of(work, struct idxd_int_handle_revoke, work); + struct idxd_device *idxd = revoke->idxd; + struct pci_dev *pdev = idxd->pdev; + struct device *dev = &pdev->dev; + int i, new_handle, rc; + + if (!idxd->request_int_handles) { + kfree(revoke); + dev_warn(dev, "Unexpected int handle refresh interrupt.\n"); + return; + } + + /* + * The loop attempts to acquire new interrupt handle for all interrupt + * vectors that supports a handle. If a new interrupt handle is acquired and the + * wq is kernel type, the driver will kill the percpu_ref to pause all + * ongoing descriptor submissions. The interrupt handle is then changed. + * After change, the percpu_ref is revived and all the pending submissions + * are woken to try again. A drain is sent to for the interrupt handle + * at the end to make sure all invalid int handle descriptors are processed. + */ + for (i = 1; i < idxd->irq_cnt; i++) { + struct idxd_irq_entry *ie = &idxd->irq_entries[i]; + struct idxd_wq *wq = ie->wq; + + rc = idxd_device_request_int_handle(idxd, i, &new_handle, IDXD_IRQ_MSIX); + if (rc < 0) { + dev_warn(dev, "get int handle %d failed: %d\n", i, rc); + /* + * Failed to acquire new interrupt handle. Kill the WQ + * and release all the pending submitters. The submitters will + * get error return code and handle appropriately. + */ + ie->int_handle = INVALID_INT_HANDLE; + idxd_wq_quiesce(wq); + idxd_abort_invalid_int_handle_descs(ie); + continue; + } + + /* No change in interrupt handle, nothing needs to be done */ + if (ie->int_handle == new_handle) + continue; + + if (wq->state != IDXD_WQ_ENABLED || wq->type != IDXD_WQT_KERNEL) { + /* + * All the MSIX interrupts are allocated at once during probe. + * Therefore we need to update all interrupts even if the WQ + * isn't supporting interrupt operations. + */ + ie->int_handle = new_handle; + continue; + } + + mutex_lock(&wq->wq_lock); + reinit_completion(&wq->wq_resurrect); + + /* Kill percpu_ref to pause additional descriptor submissions */ + percpu_ref_kill(&wq->wq_active); + + /* Wait for all submitters quiesce before we change interrupt handle */ + wait_for_completion(&wq->wq_dead); + + ie->int_handle = new_handle; + + /* Revive percpu ref and wake up all the waiting submitters */ + percpu_ref_reinit(&wq->wq_active); + complete_all(&wq->wq_resurrect); + mutex_unlock(&wq->wq_lock); + + /* + * The delay here is to wait for all possible MOVDIR64B that + * are issued before percpu_ref_kill() has happened to have + * reached the PCIe domain before the drain is issued. The driver + * needs to ensure that the drain descriptor issued does not pass + * all the other issued descriptors that contain the invalid + * interrupt handle in order to ensure that the drain descriptor + * interrupt will allow the cleanup of all the descriptors with + * invalid interrupt handle. + */ + if (wq_dedicated(wq)) + udelay(100); + idxd_int_handle_revoke_drain(ie); + } + kfree(revoke); +} + static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) { struct device *dev = &idxd->pdev->dev; @@ -101,6 +265,23 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) err = true; } + if (cause & IDXD_INTC_INT_HANDLE_REVOKED) { + struct idxd_int_handle_revoke *revoke; + + val |= IDXD_INTC_INT_HANDLE_REVOKED; + + revoke = kzalloc(sizeof(*revoke), GFP_ATOMIC); + if (revoke) { + revoke->idxd = idxd; + INIT_WORK(&revoke->work, idxd_int_handle_revoke); + queue_work(idxd->wq, &revoke->work); + + } else { + dev_err(dev, "Failed to allocate work for int handle revoke\n"); + idxd_wqs_quiesce(idxd); + } + } + if (cause & IDXD_INTC_CMD) { val |= IDXD_INTC_CMD; complete(idxd->cmd_done); @@ -177,6 +358,51 @@ irqreturn_t idxd_misc_thread(int vec, void *data) return IRQ_HANDLED; } +static void idxd_int_handle_resubmit_work(struct work_struct *work) +{ + struct idxd_resubmit *irw = container_of(work, struct idxd_resubmit, work); + struct idxd_desc *desc = irw->desc; + struct idxd_wq *wq = desc->wq; + int rc; + + desc->completion->status = 0; + rc = idxd_submit_desc(wq, desc); + if (rc < 0) { + dev_dbg(&wq->idxd->pdev->dev, "Failed to resubmit desc %d to wq %d.\n", + desc->id, wq->id); + /* + * If the error is not -EAGAIN, it means the submission failed due to wq + * has been killed instead of ENQCMDS failure. Here the driver needs to + * notify the submitter of the failure by reporting abort status. + * + * -EAGAIN comes from ENQCMDS failure. idxd_submit_desc() will handle the + * abort. + */ + if (rc != -EAGAIN) { + desc->completion->status = IDXD_COMP_DESC_ABORT; + idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, false); + } + idxd_free_desc(wq, desc); + } + kfree(irw); +} + +bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc) +{ + struct idxd_wq *wq = desc->wq; + struct idxd_device *idxd = wq->idxd; + struct idxd_resubmit *irw; + + irw = kzalloc(sizeof(*irw), GFP_KERNEL); + if (!irw) + return false; + + irw->desc = desc; + INIT_WORK(&irw->work, idxd_int_handle_resubmit_work); + queue_work(idxd->wq, &irw->work); + return true; +} + static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry) { struct idxd_desc *desc, *t; @@ -195,11 +421,11 @@ static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry) * and 0xff, which DSA_COMP_STATUS_MASK can mask out. */ if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) { - complete_desc(desc, IDXD_COMPLETE_ABORT); + idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); continue; } - complete_desc(desc, IDXD_COMPLETE_NORMAL); + idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true); } else { spin_lock(&irq_entry->list_lock); list_add_tail(&desc->list, @@ -238,11 +464,11 @@ static void irq_process_work_list(struct idxd_irq_entry *irq_entry) * and 0xff, which DSA_COMP_STATUS_MASK can mask out. */ if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) { - complete_desc(desc, IDXD_COMPLETE_ABORT); + idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); continue; } - complete_desc(desc, IDXD_COMPLETE_NORMAL); + idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true); } } diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 262c8220adbd..8e396698c22b 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -158,6 +158,7 @@ enum idxd_device_reset_type { #define IDXD_INTC_OCCUPY 0x04 #define IDXD_INTC_PERFMON_OVFL 0x08 #define IDXD_INTC_HALT_STATE 0x10 +#define IDXD_INTC_INT_HANDLE_REVOKED 0x80000000 #define IDXD_CMD_OFFSET 0xa0 union idxd_command_reg { diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index de76fb4abac2..569815a84e95 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -21,15 +21,6 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu) if (device_pasid_enabled(idxd)) desc->hw->pasid = idxd->pasid; - /* - * On host, MSIX vecotr 0 is used for misc interrupt. Therefore when we match - * vector 1:1 to the WQ id, we need to add 1 - */ - if (!idxd->int_handles) - desc->hw->int_handle = wq->id + 1; - else - desc->hw->int_handle = idxd->int_handles[wq->id]; - return desc; } @@ -129,24 +120,47 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, spin_unlock(&ie->list_lock); if (found) - complete_desc(found, IDXD_COMPLETE_ABORT); + idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false); +} + +/* + * ENQCMDS typically fail when the WQ is inactive or busy. On host submission, the driver + * has better control of number of descriptors being submitted to a shared wq by limiting + * the number of driver allocated descriptors to the wq size. However, when the swq is + * exported to a guest kernel, it may be shared with multiple guest kernels. This means + * the likelihood of getting busy returned on the swq when submitting goes significantly up. + * Having a tunable retry mechanism allows the driver to keep trying for a bit before giving + * up. The sysfs knob can be tuned by the system administrator. + */ +int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc) +{ + int rc, retries = 0; + + do { + rc = enqcmds(portal, desc); + if (rc == 0) + break; + cpu_relax(); + } while (retries++ < wq->enqcmds_retries); + + return rc; } int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) { struct idxd_device *idxd = wq->idxd; struct idxd_irq_entry *ie = NULL; + u32 desc_flags = desc->hw->flags; void __iomem *portal; int rc; - if (idxd->state != IDXD_DEV_ENABLED) { - idxd_free_desc(wq, desc); + if (idxd->state != IDXD_DEV_ENABLED) return -EIO; - } if (!percpu_ref_tryget_live(&wq->wq_active)) { - idxd_free_desc(wq, desc); - return -ENXIO; + wait_for_completion(&wq->wq_resurrect); + if (!percpu_ref_tryget_live(&wq->wq_active)) + return -ENXIO; } portal = idxd_wq_portal_addr(wq); @@ -162,28 +176,25 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) * Pending the descriptor to the lockless list for the irq_entry * that we designated the descriptor to. */ - if (desc->hw->flags & IDXD_OP_FLAG_RCI) { - ie = &idxd->irq_entries[wq->id + 1]; + if (desc_flags & IDXD_OP_FLAG_RCI) { + ie = wq->ie; + if (ie->int_handle == INVALID_INT_HANDLE) + desc->hw->int_handle = ie->id; + else + desc->hw->int_handle = ie->int_handle; + llist_add(&desc->llnode, &ie->pending_llist); } if (wq_dedicated(wq)) { iosubmit_cmds512(portal, desc->hw, 1); } else { - /* - * It's not likely that we would receive queue full rejection - * since the descriptor allocation gates at wq size. If we - * receive a -EAGAIN, that means something went wrong such as the - * device is not accepting descriptor at all. - */ - rc = enqcmds(portal, desc->hw); + rc = idxd_enqcmds(wq, portal, desc->hw); if (rc < 0) { percpu_ref_put(&wq->wq_active); /* abort operation frees the descriptor */ if (ie) llist_abort_desc(wq, ie, desc); - else - idxd_free_desc(wq, desc); return rc; } } diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index a9025be940db..c0fec88ff6c1 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -945,6 +945,41 @@ static ssize_t wq_occupancy_show(struct device *dev, struct device_attribute *at static struct device_attribute dev_attr_wq_occupancy = __ATTR(occupancy, 0444, wq_occupancy_show, NULL); +static ssize_t wq_enqcmds_retries_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + if (wq_dedicated(wq)) + return -EOPNOTSUPP; + + return sysfs_emit(buf, "%u\n", wq->enqcmds_retries); +} + +static ssize_t wq_enqcmds_retries_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + int rc; + unsigned int retries; + + if (wq_dedicated(wq)) + return -EOPNOTSUPP; + + rc = kstrtouint(buf, 10, &retries); + if (rc < 0) + return rc; + + if (retries > IDXD_ENQCMDS_MAX_RETRIES) + retries = IDXD_ENQCMDS_MAX_RETRIES; + + wq->enqcmds_retries = retries; + return count; +} + +static struct device_attribute dev_attr_wq_enqcmds_retries = + __ATTR(enqcmds_retries, 0644, wq_enqcmds_retries_show, wq_enqcmds_retries_store); + static struct attribute *idxd_wq_attributes[] = { &dev_attr_wq_clients.attr, &dev_attr_wq_state.attr, @@ -961,6 +996,7 @@ static struct attribute *idxd_wq_attributes[] = { &dev_attr_wq_max_batch_size.attr, &dev_attr_wq_ats_disable.attr, &dev_attr_wq_occupancy.attr, + &dev_attr_wq_enqcmds_retries.attr, NULL, }; @@ -1269,7 +1305,6 @@ static void idxd_conf_device_release(struct device *dev) kfree(idxd->wqs); kfree(idxd->engines); kfree(idxd->irq_entries); - kfree(idxd->int_handles); ida_free(&idxd_ida, idxd->id); kfree(idxd); } diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c index e2b5129c5f84..5e46e347e28b 100644 --- a/drivers/dma/ppc4xx/adma.c +++ b/drivers/dma/ppc4xx/adma.c @@ -3240,7 +3240,6 @@ static int ppc440spe_adma_dma2rxor_prep_src( struct ppc440spe_rxor *cursor, int index, int src_cnt, u32 addr) { - int rval = 0; u32 sign; struct ppc440spe_adma_desc_slot *desc = hdesc; int i; @@ -3348,7 +3347,7 @@ static int ppc440spe_adma_dma2rxor_prep_src( break; } - return rval; + return 0; } /** diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c index 1a1b7d8458c9..94f3648f7483 100644 --- a/drivers/dma/qcom/gpi.c +++ b/drivers/dma/qcom/gpi.c @@ -2206,10 +2206,8 @@ static int gpi_probe(struct platform_device *pdev) /* set up irq */ ret = platform_get_irq(pdev, i); - if (ret < 0) { - dev_err(gpi_dev->dev, "platform_get_irq failed for %d:%d\n", i, ret); + if (ret < 0) return ret; - } gpii->irq = ret; /* set up channel specific register info */ diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c index 41c6bc650fa3..158e5e7defae 100644 --- a/drivers/dma/sh/shdma-base.c +++ b/drivers/dma/sh/shdma-base.c @@ -1034,9 +1034,7 @@ EXPORT_SYMBOL(shdma_cleanup); static int __init shdma_enter(void) { - shdma_slave_used = kcalloc(DIV_ROUND_UP(slave_num, BITS_PER_LONG), - sizeof(long), - GFP_KERNEL); + shdma_slave_used = bitmap_zalloc(slave_num, GFP_KERNEL); if (!shdma_slave_used) return -ENOMEM; return 0; @@ -1045,7 +1043,7 @@ module_init(shdma_enter); static void __exit shdma_exit(void) { - kfree(shdma_slave_used); + bitmap_free(shdma_slave_used); } module_exit(shdma_exit); diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c index d30a4a28d3bf..76cf2e333e63 100644 --- a/drivers/dma/stm32-mdma.c +++ b/drivers/dma/stm32-mdma.c @@ -10,6 +10,7 @@ * Inspired by stm32-dma.c and dma-jz4780.c */ +#include <linux/bitfield.h> #include <linux/clk.h> #include <linux/delay.h> #include <linux/dmaengine.h> @@ -32,13 +33,6 @@ #include "virt-dma.h" -/* MDMA Generic getter/setter */ -#define STM32_MDMA_SHIFT(n) (ffs(n) - 1) -#define STM32_MDMA_SET(n, mask) (((n) << STM32_MDMA_SHIFT(mask)) & \ - (mask)) -#define STM32_MDMA_GET(n, mask) (((n) & (mask)) >> \ - STM32_MDMA_SHIFT(mask)) - #define STM32_MDMA_GISR0 0x0000 /* MDMA Int Status Reg 1 */ #define STM32_MDMA_GISR1 0x0004 /* MDMA Int Status Reg 2 */ @@ -80,8 +74,7 @@ #define STM32_MDMA_CCR_HEX BIT(13) #define STM32_MDMA_CCR_BEX BIT(12) #define STM32_MDMA_CCR_PL_MASK GENMASK(7, 6) -#define STM32_MDMA_CCR_PL(n) STM32_MDMA_SET(n, \ - STM32_MDMA_CCR_PL_MASK) +#define STM32_MDMA_CCR_PL(n) FIELD_PREP(STM32_MDMA_CCR_PL_MASK, (n)) #define STM32_MDMA_CCR_TCIE BIT(5) #define STM32_MDMA_CCR_BTIE BIT(4) #define STM32_MDMA_CCR_BRTIE BIT(3) @@ -99,48 +92,33 @@ #define STM32_MDMA_CTCR_BWM BIT(31) #define STM32_MDMA_CTCR_SWRM BIT(30) #define STM32_MDMA_CTCR_TRGM_MSK GENMASK(29, 28) -#define STM32_MDMA_CTCR_TRGM(n) STM32_MDMA_SET((n), \ - STM32_MDMA_CTCR_TRGM_MSK) -#define STM32_MDMA_CTCR_TRGM_GET(n) STM32_MDMA_GET((n), \ - STM32_MDMA_CTCR_TRGM_MSK) +#define STM32_MDMA_CTCR_TRGM(n) FIELD_PREP(STM32_MDMA_CTCR_TRGM_MSK, (n)) +#define STM32_MDMA_CTCR_TRGM_GET(n) FIELD_GET(STM32_MDMA_CTCR_TRGM_MSK, (n)) #define STM32_MDMA_CTCR_PAM_MASK GENMASK(27, 26) -#define STM32_MDMA_CTCR_PAM(n) STM32_MDMA_SET(n, \ - STM32_MDMA_CTCR_PAM_MASK) +#define STM32_MDMA_CTCR_PAM(n) FIELD_PREP(STM32_MDMA_CTCR_PAM_MASK, (n)) #define STM32_MDMA_CTCR_PKE BIT(25) #define STM32_MDMA_CTCR_TLEN_MSK GENMASK(24, 18) -#define STM32_MDMA_CTCR_TLEN(n) STM32_MDMA_SET((n), \ - STM32_MDMA_CTCR_TLEN_MSK) -#define STM32_MDMA_CTCR_TLEN_GET(n) STM32_MDMA_GET((n), \ - STM32_MDMA_CTCR_TLEN_MSK) +#define STM32_MDMA_CTCR_TLEN(n) FIELD_PREP(STM32_MDMA_CTCR_TLEN_MSK, (n)) +#define STM32_MDMA_CTCR_TLEN_GET(n) FIELD_GET(STM32_MDMA_CTCR_TLEN_MSK, (n)) #define STM32_MDMA_CTCR_LEN2_MSK GENMASK(25, 18) -#define STM32_MDMA_CTCR_LEN2(n) STM32_MDMA_SET((n), \ - STM32_MDMA_CTCR_LEN2_MSK) -#define STM32_MDMA_CTCR_LEN2_GET(n) STM32_MDMA_GET((n), \ - STM32_MDMA_CTCR_LEN2_MSK) +#define STM32_MDMA_CTCR_LEN2(n) FIELD_PREP(STM32_MDMA_CTCR_LEN2_MSK, (n)) +#define STM32_MDMA_CTCR_LEN2_GET(n) FIELD_GET(STM32_MDMA_CTCR_LEN2_MSK, (n)) #define STM32_MDMA_CTCR_DBURST_MASK GENMASK(17, 15) -#define STM32_MDMA_CTCR_DBURST(n) STM32_MDMA_SET(n, \ - STM32_MDMA_CTCR_DBURST_MASK) +#define STM32_MDMA_CTCR_DBURST(n) FIELD_PREP(STM32_MDMA_CTCR_DBURST_MASK, (n)) #define STM32_MDMA_CTCR_SBURST_MASK GENMASK(14, 12) -#define STM32_MDMA_CTCR_SBURST(n) STM32_MDMA_SET(n, \ - STM32_MDMA_CTCR_SBURST_MASK) +#define STM32_MDMA_CTCR_SBURST(n) FIELD_PREP(STM32_MDMA_CTCR_SBURST_MASK, (n)) #define STM32_MDMA_CTCR_DINCOS_MASK GENMASK(11, 10) -#define STM32_MDMA_CTCR_DINCOS(n) STM32_MDMA_SET((n), \ - STM32_MDMA_CTCR_DINCOS_MASK) +#define STM32_MDMA_CTCR_DINCOS(n) FIELD_PREP(STM32_MDMA_CTCR_DINCOS_MASK, (n)) #define STM32_MDMA_CTCR_SINCOS_MASK GENMASK(9, 8) -#define STM32_MDMA_CTCR_SINCOS(n) STM32_MDMA_SET((n), \ - STM32_MDMA_CTCR_SINCOS_MASK) +#define STM32_MDMA_CTCR_SINCOS(n) FIELD_PREP(STM32_MDMA_CTCR_SINCOS_MASK, (n)) #define STM32_MDMA_CTCR_DSIZE_MASK GENMASK(7, 6) -#define STM32_MDMA_CTCR_DSIZE(n) STM32_MDMA_SET(n, \ - STM32_MDMA_CTCR_DSIZE_MASK) +#define STM32_MDMA_CTCR_DSIZE(n) FIELD_PREP(STM32_MDMA_CTCR_DSIZE_MASK, (n)) #define STM32_MDMA_CTCR_SSIZE_MASK GENMASK(5, 4) -#define STM32_MDMA_CTCR_SSIZE(n) STM32_MDMA_SET(n, \ - STM32_MDMA_CTCR_SSIZE_MASK) +#define STM32_MDMA_CTCR_SSIZE(n) FIELD_PREP(STM32_MDMA_CTCR_SSIZE_MASK, (n)) #define STM32_MDMA_CTCR_DINC_MASK GENMASK(3, 2) -#define STM32_MDMA_CTCR_DINC(n) STM32_MDMA_SET((n), \ - STM32_MDMA_CTCR_DINC_MASK) +#define STM32_MDMA_CTCR_DINC(n) FIELD_PREP(STM32_MDMA_CTCR_DINC_MASK, (n)) #define STM32_MDMA_CTCR_SINC_MASK GENMASK(1, 0) -#define STM32_MDMA_CTCR_SINC(n) STM32_MDMA_SET((n), \ - STM32_MDMA_CTCR_SINC_MASK) +#define STM32_MDMA_CTCR_SINC(n) FIELD_PREP(STM32_MDMA_CTCR_SINC_MASK, (n)) #define STM32_MDMA_CTCR_CFG_MASK (STM32_MDMA_CTCR_SINC_MASK \ | STM32_MDMA_CTCR_DINC_MASK \ | STM32_MDMA_CTCR_SINCOS_MASK \ @@ -151,16 +129,13 @@ /* MDMA Channel x block number of data register */ #define STM32_MDMA_CBNDTR(x) (0x54 + 0x40 * (x)) #define STM32_MDMA_CBNDTR_BRC_MK GENMASK(31, 20) -#define STM32_MDMA_CBNDTR_BRC(n) STM32_MDMA_SET(n, \ - STM32_MDMA_CBNDTR_BRC_MK) -#define STM32_MDMA_CBNDTR_BRC_GET(n) STM32_MDMA_GET((n), \ - STM32_MDMA_CBNDTR_BRC_MK) +#define STM32_MDMA_CBNDTR_BRC(n) FIELD_PREP(STM32_MDMA_CBNDTR_BRC_MK, (n)) +#define STM32_MDMA_CBNDTR_BRC_GET(n) FIELD_GET(STM32_MDMA_CBNDTR_BRC_MK, (n)) #define STM32_MDMA_CBNDTR_BRDUM BIT(19) #define STM32_MDMA_CBNDTR_BRSUM BIT(18) #define STM32_MDMA_CBNDTR_BNDT_MASK GENMASK(16, 0) -#define STM32_MDMA_CBNDTR_BNDT(n) STM32_MDMA_SET(n, \ - STM32_MDMA_CBNDTR_BNDT_MASK) +#define STM32_MDMA_CBNDTR_BNDT(n) FIELD_PREP(STM32_MDMA_CBNDTR_BNDT_MASK, (n)) /* MDMA Channel x source address register */ #define STM32_MDMA_CSAR(x) (0x58 + 0x40 * (x)) @@ -171,11 +146,9 @@ /* MDMA Channel x block repeat address update register */ #define STM32_MDMA_CBRUR(x) (0x60 + 0x40 * (x)) #define STM32_MDMA_CBRUR_DUV_MASK GENMASK(31, 16) -#define STM32_MDMA_CBRUR_DUV(n) STM32_MDMA_SET(n, \ - STM32_MDMA_CBRUR_DUV_MASK) +#define STM32_MDMA_CBRUR_DUV(n) FIELD_PREP(STM32_MDMA_CBRUR_DUV_MASK, (n)) #define STM32_MDMA_CBRUR_SUV_MASK GENMASK(15, 0) -#define STM32_MDMA_CBRUR_SUV(n) STM32_MDMA_SET(n, \ - STM32_MDMA_CBRUR_SUV_MASK) +#define STM32_MDMA_CBRUR_SUV(n) FIELD_PREP(STM32_MDMA_CBRUR_SUV_MASK, (n)) /* MDMA Channel x link address register */ #define STM32_MDMA_CLAR(x) (0x64 + 0x40 * (x)) @@ -185,8 +158,7 @@ #define STM32_MDMA_CTBR_DBUS BIT(17) #define STM32_MDMA_CTBR_SBUS BIT(16) #define STM32_MDMA_CTBR_TSEL_MASK GENMASK(7, 0) -#define STM32_MDMA_CTBR_TSEL(n) STM32_MDMA_SET(n, \ - STM32_MDMA_CTBR_TSEL_MASK) +#define STM32_MDMA_CTBR_TSEL(n) FIELD_PREP(STM32_MDMA_CTBR_TSEL_MASK, (n)) /* MDMA Channel x mask address register */ #define STM32_MDMA_CMAR(x) (0x70 + 0x40 * (x)) @@ -1279,7 +1251,7 @@ static size_t stm32_mdma_desc_residue(struct stm32_mdma_chan *chan, u32 curr_hwdesc) { struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan); - struct stm32_mdma_hwdesc *hwdesc = desc->node[0].hwdesc; + struct stm32_mdma_hwdesc *hwdesc; u32 cbndtr, residue, modulo, burst_size; int i; diff --git a/drivers/dma/ti/Makefile b/drivers/dma/ti/Makefile index bd496efadff7..1d4081a049b7 100644 --- a/drivers/dma/ti/Makefile +++ b/drivers/dma/ti/Makefile @@ -8,5 +8,6 @@ obj-$(CONFIG_TI_K3_PSIL) += k3-psil.o \ k3-psil-am654.o \ k3-psil-j721e.o \ k3-psil-j7200.o \ - k3-psil-am64.o + k3-psil-am64.o \ + k3-psil-j721s2.o obj-$(CONFIG_TI_DMA_CROSSBAR) += dma-crossbar.o diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c index 35d81bd857f1..08e47f44d325 100644 --- a/drivers/dma/ti/edma.c +++ b/drivers/dma/ti/edma.c @@ -1681,8 +1681,7 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data) dev_dbg(ecc->dev, "EMR%d 0x%08x\n", j, val); emr = val; - for (i = find_next_bit(&emr, 32, 0); i < 32; - i = find_next_bit(&emr, 32, i + 1)) { + for_each_set_bit(i, &emr, 32) { int k = (j << 5) + i; /* Clear the corresponding EMR bits */ diff --git a/drivers/dma/ti/k3-psil-j721s2.c b/drivers/dma/ti/k3-psil-j721s2.c new file mode 100644 index 000000000000..4c4172a4d271 --- /dev/null +++ b/drivers/dma/ti/k3-psil-j721s2.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Texas Instruments Incorporated - https://www.ti.com + */ + +#include <linux/kernel.h> + +#include "k3-psil-priv.h" + +#define PSIL_PDMA_XY_TR(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + }, \ + } + +#define PSIL_PDMA_XY_PKT(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + .pkt_mode = 1, \ + }, \ + } + +#define PSIL_PDMA_MCASP(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + .pdma_acc32 = 1, \ + .pdma_burst = 1, \ + }, \ + } + +#define PSIL_ETHERNET(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + .pkt_mode = 1, \ + .needs_epib = 1, \ + .psd_size = 16, \ + }, \ + } + +#define PSIL_SA2UL(x, tx) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + .pkt_mode = 1, \ + .needs_epib = 1, \ + .psd_size = 64, \ + .notdpkt = tx, \ + }, \ + } + +/* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */ +static struct psil_ep j721s2_src_ep_map[] = { + /* PDMA_MCASP - McASP0-4 */ + PSIL_PDMA_MCASP(0x4400), + PSIL_PDMA_MCASP(0x4401), + PSIL_PDMA_MCASP(0x4402), + PSIL_PDMA_MCASP(0x4403), + PSIL_PDMA_MCASP(0x4404), + /* PDMA_SPI_G0 - SPI0-3 */ + PSIL_PDMA_XY_PKT(0x4600), + PSIL_PDMA_XY_PKT(0x4601), + PSIL_PDMA_XY_PKT(0x4602), + PSIL_PDMA_XY_PKT(0x4603), + PSIL_PDMA_XY_PKT(0x4604), + PSIL_PDMA_XY_PKT(0x4605), + PSIL_PDMA_XY_PKT(0x4606), + PSIL_PDMA_XY_PKT(0x4607), + PSIL_PDMA_XY_PKT(0x4608), + PSIL_PDMA_XY_PKT(0x4609), + PSIL_PDMA_XY_PKT(0x460a), + PSIL_PDMA_XY_PKT(0x460b), + PSIL_PDMA_XY_PKT(0x460c), + PSIL_PDMA_XY_PKT(0x460d), + PSIL_PDMA_XY_PKT(0x460e), + PSIL_PDMA_XY_PKT(0x460f), + /* PDMA_SPI_G1 - SPI4-7 */ + PSIL_PDMA_XY_PKT(0x4610), + PSIL_PDMA_XY_PKT(0x4611), + PSIL_PDMA_XY_PKT(0x4612), + PSIL_PDMA_XY_PKT(0x4613), + PSIL_PDMA_XY_PKT(0x4614), + PSIL_PDMA_XY_PKT(0x4615), + PSIL_PDMA_XY_PKT(0x4616), + PSIL_PDMA_XY_PKT(0x4617), + PSIL_PDMA_XY_PKT(0x4618), + PSIL_PDMA_XY_PKT(0x4619), + PSIL_PDMA_XY_PKT(0x461a), + PSIL_PDMA_XY_PKT(0x461b), + PSIL_PDMA_XY_PKT(0x461c), + PSIL_PDMA_XY_PKT(0x461d), + PSIL_PDMA_XY_PKT(0x461e), + PSIL_PDMA_XY_PKT(0x461f), + /* PDMA_USART_G0 - UART0-1 */ + PSIL_PDMA_XY_PKT(0x4700), + PSIL_PDMA_XY_PKT(0x4701), + /* PDMA_USART_G1 - UART2-3 */ + PSIL_PDMA_XY_PKT(0x4702), + PSIL_PDMA_XY_PKT(0x4703), + /* PDMA_USART_G2 - UART4-9 */ + PSIL_PDMA_XY_PKT(0x4704), + PSIL_PDMA_XY_PKT(0x4705), + PSIL_PDMA_XY_PKT(0x4706), + PSIL_PDMA_XY_PKT(0x4707), + PSIL_PDMA_XY_PKT(0x4708), + PSIL_PDMA_XY_PKT(0x4709), + /* CPSW0 */ + PSIL_ETHERNET(0x7000), + /* MCU_PDMA0 (MCU_PDMA_MISC_G0) - SPI0 */ + PSIL_PDMA_XY_PKT(0x7100), + PSIL_PDMA_XY_PKT(0x7101), + PSIL_PDMA_XY_PKT(0x7102), + PSIL_PDMA_XY_PKT(0x7103), + /* MCU_PDMA1 (MCU_PDMA_MISC_G1) - SPI1-2 */ + PSIL_PDMA_XY_PKT(0x7200), + PSIL_PDMA_XY_PKT(0x7201), + PSIL_PDMA_XY_PKT(0x7202), + PSIL_PDMA_XY_PKT(0x7203), + PSIL_PDMA_XY_PKT(0x7204), + PSIL_PDMA_XY_PKT(0x7205), + PSIL_PDMA_XY_PKT(0x7206), + PSIL_PDMA_XY_PKT(0x7207), + /* MCU_PDMA2 (MCU_PDMA_MISC_G2) - UART0 */ + PSIL_PDMA_XY_PKT(0x7300), + /* MCU_PDMA_ADC - ADC0-1 */ + PSIL_PDMA_XY_TR(0x7400), + PSIL_PDMA_XY_TR(0x7401), + PSIL_PDMA_XY_TR(0x7402), + PSIL_PDMA_XY_TR(0x7403), + /* SA2UL */ + PSIL_SA2UL(0x7500, 0), + PSIL_SA2UL(0x7501, 0), + PSIL_SA2UL(0x7502, 0), + PSIL_SA2UL(0x7503, 0), +}; + +/* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */ +static struct psil_ep j721s2_dst_ep_map[] = { + /* CPSW0 */ + PSIL_ETHERNET(0xf000), + PSIL_ETHERNET(0xf001), + PSIL_ETHERNET(0xf002), + PSIL_ETHERNET(0xf003), + PSIL_ETHERNET(0xf004), + PSIL_ETHERNET(0xf005), + PSIL_ETHERNET(0xf006), + PSIL_ETHERNET(0xf007), + /* SA2UL */ + PSIL_SA2UL(0xf500, 1), + PSIL_SA2UL(0xf501, 1), +}; + +struct psil_ep_map j721s2_ep_map = { + .name = "j721s2", + .src = j721s2_src_ep_map, + .src_count = ARRAY_SIZE(j721s2_src_ep_map), + .dst = j721s2_dst_ep_map, + .dst_count = ARRAY_SIZE(j721s2_dst_ep_map), +}; diff --git a/drivers/dma/ti/k3-psil-priv.h b/drivers/dma/ti/k3-psil-priv.h index b74e192e3c2d..e51e179cdb56 100644 --- a/drivers/dma/ti/k3-psil-priv.h +++ b/drivers/dma/ti/k3-psil-priv.h @@ -41,5 +41,6 @@ extern struct psil_ep_map am654_ep_map; extern struct psil_ep_map j721e_ep_map; extern struct psil_ep_map j7200_ep_map; extern struct psil_ep_map am64_ep_map; +extern struct psil_ep_map j721s2_ep_map; #endif /* K3_PSIL_PRIV_H_ */ diff --git a/drivers/dma/ti/k3-psil.c b/drivers/dma/ti/k3-psil.c index 13ce7367d870..8867b4bd0c51 100644 --- a/drivers/dma/ti/k3-psil.c +++ b/drivers/dma/ti/k3-psil.c @@ -21,6 +21,7 @@ static const struct soc_device_attribute k3_soc_devices[] = { { .family = "J721E", .data = &j721e_ep_map }, { .family = "J7200", .data = &j7200_ep_map }, { .family = "AM64X", .data = &am64_ep_map }, + { .family = "J721S2", .data = &j721s2_ep_map }, { /* sentinel */ } }; diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index 041d8e32d630..895dcd0e8b60 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -4376,6 +4376,7 @@ static const struct soc_device_attribute k3_soc_devices[] = { { .family = "J721E", .data = &j721e_soc_data }, { .family = "J7200", .data = &j7200_soc_data }, { .family = "AM64X", .data = &am64_soc_data }, + { .family = "J721S2", .data = &j721e_soc_data}, { /* sentinel */ } }; diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 4677ce08ed40..cd62bbb50e8b 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -2128,6 +2128,126 @@ error: } /** + * xilinx_cdma_prep_memcpy_sg - prepare descriptors for a memcpy_sg transaction + * @dchan: DMA channel + * @dst_sg: Destination scatter list + * @dst_sg_len: Number of entries in destination scatter list + * @src_sg: Source scatter list + * @src_sg_len: Number of entries in source scatter list + * @flags: transfer ack flags + * + * Return: Async transaction descriptor on success and NULL on failure + */ +static struct dma_async_tx_descriptor *xilinx_cdma_prep_memcpy_sg( + struct dma_chan *dchan, struct scatterlist *dst_sg, + unsigned int dst_sg_len, struct scatterlist *src_sg, + unsigned int src_sg_len, unsigned long flags) +{ + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dma_tx_descriptor *desc; + struct xilinx_cdma_tx_segment *segment, *prev = NULL; + struct xilinx_cdma_desc_hw *hw; + size_t len, dst_avail, src_avail; + dma_addr_t dma_dst, dma_src; + + if (unlikely(dst_sg_len == 0 || src_sg_len == 0)) + return NULL; + + if (unlikely(!dst_sg || !src_sg)) + return NULL; + + desc = xilinx_dma_alloc_tx_descriptor(chan); + if (!desc) + return NULL; + + dma_async_tx_descriptor_init(&desc->async_tx, &chan->common); + desc->async_tx.tx_submit = xilinx_dma_tx_submit; + + dst_avail = sg_dma_len(dst_sg); + src_avail = sg_dma_len(src_sg); + /* + * loop until there is either no more source or no more destination + * scatterlist entry + */ + while (true) { + len = min_t(size_t, src_avail, dst_avail); + len = min_t(size_t, len, chan->xdev->max_buffer_len); + if (len == 0) + goto fetch; + + /* Allocate the link descriptor from DMA pool */ + segment = xilinx_cdma_alloc_tx_segment(chan); + if (!segment) + goto error; + + dma_dst = sg_dma_address(dst_sg) + sg_dma_len(dst_sg) - + dst_avail; + dma_src = sg_dma_address(src_sg) + sg_dma_len(src_sg) - + src_avail; + hw = &segment->hw; + hw->control = len; + hw->src_addr = dma_src; + hw->dest_addr = dma_dst; + if (chan->ext_addr) { + hw->src_addr_msb = upper_32_bits(dma_src); + hw->dest_addr_msb = upper_32_bits(dma_dst); + } + + if (prev) { + prev->hw.next_desc = segment->phys; + if (chan->ext_addr) + prev->hw.next_desc_msb = + upper_32_bits(segment->phys); + } + + prev = segment; + dst_avail -= len; + src_avail -= len; + list_add_tail(&segment->node, &desc->segments); + +fetch: + /* Fetch the next dst scatterlist entry */ + if (dst_avail == 0) { + if (dst_sg_len == 0) + break; + dst_sg = sg_next(dst_sg); + if (dst_sg == NULL) + break; + dst_sg_len--; + dst_avail = sg_dma_len(dst_sg); + } + /* Fetch the next src scatterlist entry */ + if (src_avail == 0) { + if (src_sg_len == 0) + break; + src_sg = sg_next(src_sg); + if (src_sg == NULL) + break; + src_sg_len--; + src_avail = sg_dma_len(src_sg); + } + } + + if (list_empty(&desc->segments)) { + dev_err(chan->xdev->dev, + "%s: Zero-size SG transfer requested\n", __func__); + goto error; + } + + /* Link the last hardware descriptor with the first. */ + segment = list_first_entry(&desc->segments, + struct xilinx_cdma_tx_segment, node); + desc->async_tx.phys = segment->phys; + prev->hw.next_desc = segment->phys; + + return &desc->async_tx; + +error: + xilinx_dma_free_tx_descriptor(chan, desc); + return NULL; +} + +/** * xilinx_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction * @dchan: DMA channel * @sgl: scatterlist to transfer to/from @@ -2860,7 +2980,9 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev, } /* Request the interrupt */ - chan->irq = irq_of_parse_and_map(node, chan->tdest); + chan->irq = of_irq_get(node, chan->tdest); + if (chan->irq < 0) + return dev_err_probe(xdev->dev, chan->irq, "failed to get irq\n"); err = request_irq(chan->irq, xdev->dma_config->irq_handler, IRQF_SHARED, "xilinx-dma-controller", chan); if (err) { @@ -2934,8 +3056,11 @@ static int xilinx_dma_child_probe(struct xilinx_dma_device *xdev, if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA && ret < 0) dev_warn(xdev->dev, "missing dma-channels property\n"); - for (i = 0; i < nr_channels; i++) - xilinx_dma_chan_probe(xdev, node); + for (i = 0; i < nr_channels; i++) { + ret = xilinx_dma_chan_probe(xdev, node); + if (ret) + return ret; + } return 0; } @@ -3115,7 +3240,9 @@ static int xilinx_dma_probe(struct platform_device *pdev) DMA_RESIDUE_GRANULARITY_SEGMENT; } else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA) { dma_cap_set(DMA_MEMCPY, xdev->common.cap_mask); + dma_cap_set(DMA_MEMCPY_SG, xdev->common.cap_mask); xdev->common.device_prep_dma_memcpy = xilinx_cdma_prep_memcpy; + xdev->common.device_prep_dma_memcpy_sg = xilinx_cdma_prep_memcpy_sg; /* Residue calculation is supported by only AXI DMA and CDMA */ xdev->common.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; |