From 30bf2df65b0d18f53ee9c92131239f66c8f55d63 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 15 May 2017 16:25:03 +0200 Subject: iommu/amd: Ratelimit io-page-faults per device Misbehaving devices can cause an endless chain of io-page-faults, flooding dmesg and making the system-log unusable or even prevent the system from booting. So ratelimit the error messages about io-page-faults on a per-device basis. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) (limited to 'drivers/iommu/amd_iommu.c') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 63cacf5d6cf2..d3ff766a8ca6 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -138,6 +138,8 @@ struct iommu_dev_data { PPR completions */ u32 errata; /* Bitmap for errata to apply */ bool use_vapic; /* Enable device to use vapic mode */ + + struct ratelimit_state rs; /* Ratelimit IOPF messages */ }; /* @@ -253,6 +255,8 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid) list_add_tail(&dev_data->dev_data_list, &dev_data_list); spin_unlock_irqrestore(&dev_data_list_lock, flags); + ratelimit_default_init(&dev_data->rs); + return dev_data; } @@ -551,6 +555,29 @@ static void dump_command(unsigned long phys_addr) pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]); } +static void amd_iommu_report_page_fault(u16 devid, u16 domain_id, + u64 address, int flags) +{ + struct iommu_dev_data *dev_data = NULL; + struct pci_dev *pdev; + + pdev = pci_get_bus_and_slot(PCI_BUS_NUM(devid), devid & 0xff); + if (pdev) + dev_data = get_dev_data(&pdev->dev); + + if (dev_data && __ratelimit(&dev_data->rs)) { + dev_err(&pdev->dev, "AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%016llx flags=0x%04x]\n", + domain_id, address, flags); + } else if (printk_ratelimit()) { + pr_err("AMD-Vi: Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%016llx flags=0x%04x]\n", + PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + domain_id, address, flags); + } + + if (pdev) + pci_dev_put(pdev); +} + static void iommu_print_event(struct amd_iommu *iommu, void *__evt) { int type, devid, domid, flags; @@ -575,7 +602,12 @@ retry: goto retry; } - printk(KERN_ERR "AMD-Vi: Event logged ["); + if (type == EVENT_TYPE_IO_FAULT) { + amd_iommu_report_page_fault(devid, domid, address, flags); + return; + } else { + printk(KERN_ERR "AMD-Vi: Event logged ["); + } switch (type) { case EVENT_TYPE_ILL_DEV: @@ -585,12 +617,6 @@ retry: address, flags); dump_dte_entry(devid); break; - case EVENT_TYPE_IO_FAULT: - printk("IO_PAGE_FAULT device=%02x:%02x.%x " - "domain=0x%04x address=0x%016llx flags=0x%04x]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), - domid, address, flags); - break; case EVENT_TYPE_DEV_TAB_ERR: printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x " "address=0x%016llx flags=0x%04x]\n", -- cgit v1.2.3 From e2f9d45fb452c60c9aaab55ce6f40601f9ec6516 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Wed, 24 May 2017 16:31:16 +0200 Subject: iommu/amd: Constify irq_domain_ops struct irq_domain_ops is not modified, so it can be made const. Signed-off-by: Tobias Klauser Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/iommu/amd_iommu.c') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index d3ff766a8ca6..d7748955184b 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -4299,7 +4299,7 @@ static void irq_remapping_deactivate(struct irq_domain *domain, irte_info->index); } -static struct irq_domain_ops amd_ir_domain_ops = { +static const struct irq_domain_ops amd_ir_domain_ops = { .alloc = irq_remapping_alloc, .free = irq_remapping_free, .activate = irq_remapping_activate, -- cgit v1.2.3 From d334a5637dfb53f7d07017afc1e491903b482ef8 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 5 Jun 2017 14:52:12 -0500 Subject: iommu/amd: Reduce amount of MMIO when submitting commands As newer, higher speed devices are developed, perf data shows that the amount of MMIO that is performed when submitting commands to the IOMMU causes performance issues. Currently, the command submission path reads the command buffer head and tail pointers and then writes the tail pointer once the command is ready. The tail pointer is only ever updated by the driver so it can be tracked by the driver without having to read it from the hardware. The head pointer is updated by the hardware, but can be read opportunistically. Reading the head pointer only when it appears that there might not be room in the command buffer and then re-checking the available space reduces the number of times the head pointer has to be read. Signed-off-by: Tom Lendacky Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 35 ++++++++++++++++++++++------------- drivers/iommu/amd_iommu_init.c | 2 ++ drivers/iommu/amd_iommu_types.h | 2 ++ 3 files changed, 26 insertions(+), 13 deletions(-) (limited to 'drivers/iommu/amd_iommu.c') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index d7748955184b..d81c895ff4f4 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -874,19 +874,20 @@ static int wait_on_sem(volatile u64 *sem) } static void copy_cmd_to_buffer(struct amd_iommu *iommu, - struct iommu_cmd *cmd, - u32 tail) + struct iommu_cmd *cmd) { u8 *target; - target = iommu->cmd_buf + tail; - tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE; + target = iommu->cmd_buf + iommu->cmd_buf_tail; + + iommu->cmd_buf_tail += sizeof(*cmd); + iommu->cmd_buf_tail %= CMD_BUFFER_SIZE; /* Copy command to buffer */ memcpy(target, cmd, sizeof(*cmd)); /* Tell the IOMMU about it */ - writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); + writel(iommu->cmd_buf_tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); } static void build_completion_wait(struct iommu_cmd *cmd, u64 address) @@ -1044,23 +1045,31 @@ static int __iommu_queue_command_sync(struct amd_iommu *iommu, struct iommu_cmd *cmd, bool sync) { - u32 left, tail, head, next_tail; + bool read_head = true; + u32 left, next_tail; + next_tail = (iommu->cmd_buf_tail + sizeof(*cmd)) % CMD_BUFFER_SIZE; again: - - head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); - tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); - next_tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE; - left = (head - next_tail) % CMD_BUFFER_SIZE; + left = (iommu->cmd_buf_head - next_tail) % CMD_BUFFER_SIZE; if (left <= 0x20) { struct iommu_cmd sync_cmd; int ret; + if (read_head) { + /* Update head and recheck remaining space */ + iommu->cmd_buf_head = readl(iommu->mmio_base + + MMIO_CMD_HEAD_OFFSET); + read_head = false; + goto again; + } + + read_head = true; + iommu->cmd_sem = 0; build_completion_wait(&sync_cmd, (u64)&iommu->cmd_sem); - copy_cmd_to_buffer(iommu, &sync_cmd, tail); + copy_cmd_to_buffer(iommu, &sync_cmd); if ((ret = wait_on_sem(&iommu->cmd_sem)) != 0) return ret; @@ -1068,7 +1077,7 @@ again: goto again; } - copy_cmd_to_buffer(iommu, cmd, tail); + copy_cmd_to_buffer(iommu, cmd); /* We need to sync now to make sure all commands are processed */ iommu->need_sync = sync; diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 5a11328f4d98..3fa7e3b35507 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -588,6 +588,8 @@ void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu) writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); + iommu->cmd_buf_head = 0; + iommu->cmd_buf_tail = 0; iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); } diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 4de8f4160bb8..6960d7db2fab 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -516,6 +516,8 @@ struct amd_iommu { /* command buffer virtual address */ u8 *cmd_buf; + u32 cmd_buf_head; + u32 cmd_buf_tail; /* event buffer virtual address */ u8 *evt_buf; -- cgit v1.2.3 From 23e967e17c58779b38f69f8d41d727f59440d36a Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 5 Jun 2017 14:52:26 -0500 Subject: iommu/amd: Reduce delay waiting for command buffer space Currently if there is no room to add a command to the command buffer, the driver performs a "completion wait" which only returns when all commands on the queue have been processed. There is no need to wait for the entire command queue to be executed before adding the next command. Update the driver to perform the same udelay() loop that the "completion wait" performs, but instead re-read the head pointer to determine if sufficient space is available. The very first time it is found that there is no space available, the udelay() will be skipped to immediately perform the opportunistic read of the head pointer. If it is still found that there is not sufficient space, then the udelay() will be performed. Signed-off-by: Leo Duran Signed-off-by: Tom Lendacky Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) (limited to 'drivers/iommu/amd_iommu.c') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index d81c895ff4f4..1efbef7f3b61 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1045,7 +1045,7 @@ static int __iommu_queue_command_sync(struct amd_iommu *iommu, struct iommu_cmd *cmd, bool sync) { - bool read_head = true; + unsigned int count = 0; u32 left, next_tail; next_tail = (iommu->cmd_buf_tail + sizeof(*cmd)) % CMD_BUFFER_SIZE; @@ -1053,33 +1053,26 @@ again: left = (iommu->cmd_buf_head - next_tail) % CMD_BUFFER_SIZE; if (left <= 0x20) { - struct iommu_cmd sync_cmd; - int ret; - - if (read_head) { - /* Update head and recheck remaining space */ - iommu->cmd_buf_head = readl(iommu->mmio_base + - MMIO_CMD_HEAD_OFFSET); - read_head = false; - goto again; - } - - read_head = true; - - iommu->cmd_sem = 0; + /* Skip udelay() the first time around */ + if (count++) { + if (count == LOOP_TIMEOUT) { + pr_err("AMD-Vi: Command buffer timeout\n"); + return -EIO; + } - build_completion_wait(&sync_cmd, (u64)&iommu->cmd_sem); - copy_cmd_to_buffer(iommu, &sync_cmd); + udelay(1); + } - if ((ret = wait_on_sem(&iommu->cmd_sem)) != 0) - return ret; + /* Update head and recheck remaining space */ + iommu->cmd_buf_head = readl(iommu->mmio_base + + MMIO_CMD_HEAD_OFFSET); goto again; } copy_cmd_to_buffer(iommu, cmd); - /* We need to sync now to make sure all commands are processed */ + /* Do we need to make sure all commands are processed? */ iommu->need_sync = sync; return 0; -- cgit v1.2.3 From 460c26d05bf7357a4d5b41b3d7a97727f5bdffe1 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 2 Jun 2017 14:28:01 +0200 Subject: iommu/amd: Rip out old queue flushing code The queue flushing is pretty inefficient when it flushes the queues for all cpus at once. Further it flushes all domains from all IOMMUs for all CPUs, which is overkill as well. Rip it out to make room for something more efficient. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 143 ++-------------------------------------------- 1 file changed, 6 insertions(+), 137 deletions(-) (limited to 'drivers/iommu/amd_iommu.c') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 1efbef7f3b61..ec9da25c06a1 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -89,25 +89,6 @@ LIST_HEAD(ioapic_map); LIST_HEAD(hpet_map); LIST_HEAD(acpihid_map); -#define FLUSH_QUEUE_SIZE 256 - -struct flush_queue_entry { - unsigned long iova_pfn; - unsigned long pages; - struct dma_ops_domain *dma_dom; -}; - -struct flush_queue { - spinlock_t lock; - unsigned next; - struct flush_queue_entry *entries; -}; - -static DEFINE_PER_CPU(struct flush_queue, flush_queue); - -static atomic_t queue_timer_on; -static struct timer_list queue_timer; - /* * Domain for untranslated devices - only allocated * if iommu=pt passed on kernel cmd line. @@ -2253,92 +2234,6 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev) * *****************************************************************************/ -static void __queue_flush(struct flush_queue *queue) -{ - struct protection_domain *domain; - unsigned long flags; - int idx; - - /* First flush TLB of all known domains */ - spin_lock_irqsave(&amd_iommu_pd_lock, flags); - list_for_each_entry(domain, &amd_iommu_pd_list, list) - domain_flush_tlb(domain); - spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); - - /* Wait until flushes have completed */ - domain_flush_complete(NULL); - - for (idx = 0; idx < queue->next; ++idx) { - struct flush_queue_entry *entry; - - entry = queue->entries + idx; - - free_iova_fast(&entry->dma_dom->iovad, - entry->iova_pfn, - entry->pages); - - /* Not really necessary, just to make sure we catch any bugs */ - entry->dma_dom = NULL; - } - - queue->next = 0; -} - -static void queue_flush_all(void) -{ - int cpu; - - for_each_possible_cpu(cpu) { - struct flush_queue *queue; - unsigned long flags; - - queue = per_cpu_ptr(&flush_queue, cpu); - spin_lock_irqsave(&queue->lock, flags); - if (queue->next > 0) - __queue_flush(queue); - spin_unlock_irqrestore(&queue->lock, flags); - } -} - -static void queue_flush_timeout(unsigned long unsused) -{ - atomic_set(&queue_timer_on, 0); - queue_flush_all(); -} - -static void queue_add(struct dma_ops_domain *dma_dom, - unsigned long address, unsigned long pages) -{ - struct flush_queue_entry *entry; - struct flush_queue *queue; - unsigned long flags; - int idx; - - pages = __roundup_pow_of_two(pages); - address >>= PAGE_SHIFT; - - queue = get_cpu_ptr(&flush_queue); - spin_lock_irqsave(&queue->lock, flags); - - if (queue->next == FLUSH_QUEUE_SIZE) - __queue_flush(queue); - - idx = queue->next++; - entry = queue->entries + idx; - - entry->iova_pfn = address; - entry->pages = pages; - entry->dma_dom = dma_dom; - - spin_unlock_irqrestore(&queue->lock, flags); - - if (atomic_cmpxchg(&queue_timer_on, 0, 1) == 0) - mod_timer(&queue_timer, jiffies + msecs_to_jiffies(10)); - - put_cpu_ptr(&flush_queue); -} - - /* * In the dma_ops path we only have the struct device. This function * finds the corresponding IOMMU, the protection domain and the @@ -2490,7 +2385,10 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, domain_flush_tlb(&dma_dom->domain); domain_flush_complete(&dma_dom->domain); } else { - queue_add(dma_dom, dma_addr, pages); + /* Keep the if() around, we need it later again */ + dma_ops_free_iova(dma_dom, dma_addr, pages); + domain_flush_tlb(&dma_dom->domain); + domain_flush_complete(&dma_dom->domain); } } @@ -2825,7 +2723,7 @@ static int init_reserved_iova_ranges(void) int __init amd_iommu_init_api(void) { - int ret, cpu, err = 0; + int ret, err = 0; ret = iova_cache_get(); if (ret) @@ -2835,18 +2733,6 @@ int __init amd_iommu_init_api(void) if (ret) return ret; - for_each_possible_cpu(cpu) { - struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu); - - queue->entries = kzalloc(FLUSH_QUEUE_SIZE * - sizeof(*queue->entries), - GFP_KERNEL); - if (!queue->entries) - goto out_put_iova; - - spin_lock_init(&queue->lock); - } - err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops); if (err) return err; @@ -2858,23 +2744,12 @@ int __init amd_iommu_init_api(void) err = bus_set_iommu(&platform_bus_type, &amd_iommu_ops); if (err) return err; - return 0; - -out_put_iova: - for_each_possible_cpu(cpu) { - struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu); - kfree(queue->entries); - } - - return -ENOMEM; + return 0; } int __init amd_iommu_init_dma_ops(void) { - setup_timer(&queue_timer, queue_flush_timeout, 0); - atomic_set(&queue_timer_on, 0); - swiotlb = iommu_pass_through ? 1 : 0; iommu_detected = 1; @@ -3030,12 +2905,6 @@ static void amd_iommu_domain_free(struct iommu_domain *dom) switch (dom->type) { case IOMMU_DOMAIN_DMA: - /* - * First make sure the domain is no longer referenced from the - * flush queue - */ - queue_flush_all(); - /* Now release the domain */ dma_dom = to_dma_ops_domain(domain); dma_ops_domain_free(dma_dom); -- cgit v1.2.3 From d4241a276119bf404e6c0e23f06f84b84c4ecfc0 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 2 Jun 2017 14:55:56 +0200 Subject: iommu/amd: Add per-domain flush-queue data structures Make the flush-queue per dma-ops domain and add code allocate and free the flush-queues; Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) (limited to 'drivers/iommu/amd_iommu.c') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index ec9da25c06a1..2418fcc28fbe 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -136,6 +136,18 @@ static void update_domain(struct protection_domain *domain); static int protection_domain_init(struct protection_domain *domain); static void detach_device(struct device *dev); +#define FLUSH_QUEUE_SIZE 256 + +struct flush_queue_entry { + unsigned long iova_pfn; + unsigned long pages; +}; + +struct flush_queue { + struct flush_queue_entry *entries; + unsigned head, tail; +}; + /* * Data container for a dma_ops specific protection domain */ @@ -145,6 +157,8 @@ struct dma_ops_domain { /* IOVA RB-Tree */ struct iova_domain iovad; + + struct flush_queue __percpu *flush_queue; }; static struct iova_domain reserved_iova_ranges; @@ -1742,6 +1756,56 @@ static void free_gcr3_table(struct protection_domain *domain) free_page((unsigned long)domain->gcr3_tbl); } +static void dma_ops_domain_free_flush_queue(struct dma_ops_domain *dom) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct flush_queue *queue; + + queue = per_cpu_ptr(dom->flush_queue, cpu); + kfree(queue->entries); + } + + free_percpu(dom->flush_queue); + + dom->flush_queue = NULL; +} + +static int dma_ops_domain_alloc_flush_queue(struct dma_ops_domain *dom) +{ + int cpu; + + dom->flush_queue = alloc_percpu(struct flush_queue); + if (!dom->flush_queue) + return -ENOMEM; + + /* First make sure everything is cleared */ + for_each_possible_cpu(cpu) { + struct flush_queue *queue; + + queue = per_cpu_ptr(dom->flush_queue, cpu); + queue->head = 0; + queue->tail = 0; + queue->entries = NULL; + } + + /* Now start doing the allocation */ + for_each_possible_cpu(cpu) { + struct flush_queue *queue; + + queue = per_cpu_ptr(dom->flush_queue, cpu); + queue->entries = kzalloc(FLUSH_QUEUE_SIZE * sizeof(*queue->entries), + GFP_KERNEL); + if (!queue->entries) { + dma_ops_domain_free_flush_queue(dom); + return -ENOMEM; + } + } + + return 0; +} + /* * Free a domain, only used if something went wrong in the * allocation path and we need to free an already allocated page table @@ -1753,6 +1817,8 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) del_domain_from_list(&dom->domain); + dma_ops_domain_free_flush_queue(dom); + put_iova_domain(&dom->iovad); free_pagetable(&dom->domain); @@ -1791,6 +1857,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) /* Initialize reserved ranges */ copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad); + if (dma_ops_domain_alloc_flush_queue(dma_dom)) + goto free_dma_dom; + add_domain_to_list(&dma_dom->domain); return dma_dom; -- cgit v1.2.3 From fd62190a67d6bdf9b93dea056adfcd7fd29b0f92 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 2 Jun 2017 15:37:26 +0200 Subject: iommu/amd: Make use of the per-domain flush queue Fill the flush-queue on unmap and only flush the IOMMU and device TLBs when a per-cpu queue gets full. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 60 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 4 deletions(-) (limited to 'drivers/iommu/amd_iommu.c') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 2418fcc28fbe..9fafc3026865 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1806,6 +1806,61 @@ static int dma_ops_domain_alloc_flush_queue(struct dma_ops_domain *dom) return 0; } +static inline bool queue_ring_full(struct flush_queue *queue) +{ + return (((queue->tail + 1) % FLUSH_QUEUE_SIZE) == queue->head); +} + +#define queue_ring_for_each(i, q) \ + for (i = (q)->head; i != (q)->tail; i = (i + 1) % FLUSH_QUEUE_SIZE) + +static void queue_release(struct dma_ops_domain *dom, + struct flush_queue *queue) +{ + unsigned i; + + queue_ring_for_each(i, queue) + free_iova_fast(&dom->iovad, + queue->entries[i].iova_pfn, + queue->entries[i].pages); + + queue->head = queue->tail = 0; +} + +static inline unsigned queue_ring_add(struct flush_queue *queue) +{ + unsigned idx = queue->tail; + + queue->tail = (idx + 1) % FLUSH_QUEUE_SIZE; + + return idx; +} + +static void queue_add(struct dma_ops_domain *dom, + unsigned long address, unsigned long pages) +{ + struct flush_queue *queue; + int idx; + + pages = __roundup_pow_of_two(pages); + address >>= PAGE_SHIFT; + + queue = get_cpu_ptr(dom->flush_queue); + + if (queue_ring_full(queue)) { + domain_flush_tlb(&dom->domain); + domain_flush_complete(&dom->domain); + queue_release(dom, queue); + } + + idx = queue_ring_add(queue); + + queue->entries[idx].iova_pfn = address; + queue->entries[idx].pages = pages; + + put_cpu_ptr(dom->flush_queue); +} + /* * Free a domain, only used if something went wrong in the * allocation path and we need to free an already allocated page table @@ -2454,10 +2509,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, domain_flush_tlb(&dma_dom->domain); domain_flush_complete(&dma_dom->domain); } else { - /* Keep the if() around, we need it later again */ - dma_ops_free_iova(dma_dom, dma_addr, pages); - domain_flush_tlb(&dma_dom->domain); - domain_flush_complete(&dma_dom->domain); + queue_add(dma_dom, dma_addr, pages); } } -- cgit v1.2.3 From e241f8e76c152e000d481fc8334d41d22c013fe8 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 2 Jun 2017 15:44:57 +0200 Subject: iommu/amd: Add locking to per-domain flush-queue With locking we can safely access the flush-queues of other cpus. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers/iommu/amd_iommu.c') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 9fafc3026865..9a06acc8cc9d 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -146,6 +146,7 @@ struct flush_queue_entry { struct flush_queue { struct flush_queue_entry *entries; unsigned head, tail; + spinlock_t lock; }; /* @@ -1801,6 +1802,8 @@ static int dma_ops_domain_alloc_flush_queue(struct dma_ops_domain *dom) dma_ops_domain_free_flush_queue(dom); return -ENOMEM; } + + spin_lock_init(&queue->lock); } return 0; @@ -1808,6 +1811,8 @@ static int dma_ops_domain_alloc_flush_queue(struct dma_ops_domain *dom) static inline bool queue_ring_full(struct flush_queue *queue) { + assert_spin_locked(&queue->lock); + return (((queue->tail + 1) % FLUSH_QUEUE_SIZE) == queue->head); } @@ -1819,6 +1824,8 @@ static void queue_release(struct dma_ops_domain *dom, { unsigned i; + assert_spin_locked(&queue->lock); + queue_ring_for_each(i, queue) free_iova_fast(&dom->iovad, queue->entries[i].iova_pfn, @@ -1831,6 +1838,7 @@ static inline unsigned queue_ring_add(struct flush_queue *queue) { unsigned idx = queue->tail; + assert_spin_locked(&queue->lock); queue->tail = (idx + 1) % FLUSH_QUEUE_SIZE; return idx; @@ -1840,12 +1848,14 @@ static void queue_add(struct dma_ops_domain *dom, unsigned long address, unsigned long pages) { struct flush_queue *queue; + unsigned long flags; int idx; pages = __roundup_pow_of_two(pages); address >>= PAGE_SHIFT; queue = get_cpu_ptr(dom->flush_queue); + spin_lock_irqsave(&queue->lock, flags); if (queue_ring_full(queue)) { domain_flush_tlb(&dom->domain); @@ -1858,6 +1868,7 @@ static void queue_add(struct dma_ops_domain *dom, queue->entries[idx].iova_pfn = address; queue->entries[idx].pages = pages; + spin_unlock_irqrestore(&queue->lock, flags); put_cpu_ptr(dom->flush_queue); } -- cgit v1.2.3 From a6e3f6f030396c0576c729fd8ca4bfb654d35bfe Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 2 Jun 2017 16:01:53 +0200 Subject: iommu/amd: Add flush counters to struct dma_ops_domain The counters are increased every time the TLB for a given domain is flushed. We also store the current value of that counter into newly added entries of the flush-queue, so that we can tell whether this entry is already flushed. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'drivers/iommu/amd_iommu.c') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 9a06acc8cc9d..795208bd39bd 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -141,6 +141,7 @@ static void detach_device(struct device *dev); struct flush_queue_entry { unsigned long iova_pfn; unsigned long pages; + u64 counter; /* Flush counter when this entry was added to the queue */ }; struct flush_queue { @@ -160,6 +161,27 @@ struct dma_ops_domain { struct iova_domain iovad; struct flush_queue __percpu *flush_queue; + + /* + * We need two counter here to be race-free wrt. IOTLB flushing and + * adding entries to the flush queue. + * + * The flush_start_cnt is incremented _before_ the IOTLB flush starts. + * New entries added to the flush ring-buffer get their 'counter' value + * from here. This way we can make sure that entries added to the queue + * (or other per-cpu queues of the same domain) while the TLB is about + * to be flushed are not considered to be flushed already. + */ + atomic64_t flush_start_cnt; + + /* + * The flush_finish_cnt is incremented when an IOTLB flush is complete. + * This value is always smaller than flush_start_cnt. The queue_add + * function frees all IOVAs that have a counter value smaller than + * flush_finish_cnt. This makes sure that we only free IOVAs that are + * flushed out of the IOTLB of the domain. + */ + atomic64_t flush_finish_cnt; }; static struct iova_domain reserved_iova_ranges; @@ -1777,6 +1799,9 @@ static int dma_ops_domain_alloc_flush_queue(struct dma_ops_domain *dom) { int cpu; + atomic64_set(&dom->flush_start_cnt, 0); + atomic64_set(&dom->flush_finish_cnt, 0); + dom->flush_queue = alloc_percpu(struct flush_queue); if (!dom->flush_queue) return -ENOMEM; @@ -1844,22 +1869,48 @@ static inline unsigned queue_ring_add(struct flush_queue *queue) return idx; } +static inline void queue_ring_remove_head(struct flush_queue *queue) +{ + assert_spin_locked(&queue->lock); + queue->head = (queue->head + 1) % FLUSH_QUEUE_SIZE; +} + static void queue_add(struct dma_ops_domain *dom, unsigned long address, unsigned long pages) { struct flush_queue *queue; unsigned long flags; + u64 counter; int idx; pages = __roundup_pow_of_two(pages); address >>= PAGE_SHIFT; + counter = atomic64_read(&dom->flush_finish_cnt); + queue = get_cpu_ptr(dom->flush_queue); spin_lock_irqsave(&queue->lock, flags); + queue_ring_for_each(idx, queue) { + /* + * This assumes that counter values in the ring-buffer are + * monotonously rising. + */ + if (queue->entries[idx].counter >= counter) + break; + + free_iova_fast(&dom->iovad, + queue->entries[idx].iova_pfn, + queue->entries[idx].pages); + + queue_ring_remove_head(queue); + } + if (queue_ring_full(queue)) { + atomic64_inc(&dom->flush_start_cnt); domain_flush_tlb(&dom->domain); domain_flush_complete(&dom->domain); + atomic64_inc(&dom->flush_finish_cnt); queue_release(dom, queue); } @@ -1867,6 +1918,7 @@ static void queue_add(struct dma_ops_domain *dom, queue->entries[idx].iova_pfn = address; queue->entries[idx].pages = pages; + queue->entries[idx].counter = atomic64_read(&dom->flush_start_cnt); spin_unlock_irqrestore(&queue->lock, flags); put_cpu_ptr(dom->flush_queue); -- cgit v1.2.3 From fca6af6a5976dfa00182232f666b4f789c98bd0c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 2 Jun 2017 18:13:37 +0200 Subject: iommu/amd: Add per-domain timer to flush per-cpu queues Add a timer to each dma_ops domain so that we flush unused IOTLB entries regularily, even if the queues don't get full all the time. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 84 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 67 insertions(+), 17 deletions(-) (limited to 'drivers/iommu/amd_iommu.c') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 795208bd39bd..00c1796e07bf 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -182,6 +182,13 @@ struct dma_ops_domain { * flushed out of the IOTLB of the domain. */ atomic64_t flush_finish_cnt; + + /* + * Timer to make sure we don't keep IOVAs around unflushed + * for too long + */ + struct timer_list flush_timer; + atomic_t flush_timer_on; }; static struct iova_domain reserved_iova_ranges; @@ -1834,6 +1841,14 @@ static int dma_ops_domain_alloc_flush_queue(struct dma_ops_domain *dom) return 0; } +static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom) +{ + atomic64_inc(&dom->flush_start_cnt); + domain_flush_tlb(&dom->domain); + domain_flush_complete(&dom->domain); + atomic64_inc(&dom->flush_finish_cnt); +} + static inline bool queue_ring_full(struct flush_queue *queue) { assert_spin_locked(&queue->lock); @@ -1875,22 +1890,12 @@ static inline void queue_ring_remove_head(struct flush_queue *queue) queue->head = (queue->head + 1) % FLUSH_QUEUE_SIZE; } -static void queue_add(struct dma_ops_domain *dom, - unsigned long address, unsigned long pages) +static void queue_ring_free_flushed(struct dma_ops_domain *dom, + struct flush_queue *queue) { - struct flush_queue *queue; - unsigned long flags; - u64 counter; + u64 counter = atomic64_read(&dom->flush_finish_cnt); int idx; - pages = __roundup_pow_of_two(pages); - address >>= PAGE_SHIFT; - - counter = atomic64_read(&dom->flush_finish_cnt); - - queue = get_cpu_ptr(dom->flush_queue); - spin_lock_irqsave(&queue->lock, flags); - queue_ring_for_each(idx, queue) { /* * This assumes that counter values in the ring-buffer are @@ -1905,12 +1910,25 @@ static void queue_add(struct dma_ops_domain *dom, queue_ring_remove_head(queue); } +} + +static void queue_add(struct dma_ops_domain *dom, + unsigned long address, unsigned long pages) +{ + struct flush_queue *queue; + unsigned long flags; + int idx; + + pages = __roundup_pow_of_two(pages); + address >>= PAGE_SHIFT; + + queue = get_cpu_ptr(dom->flush_queue); + spin_lock_irqsave(&queue->lock, flags); + + queue_ring_free_flushed(dom, queue); if (queue_ring_full(queue)) { - atomic64_inc(&dom->flush_start_cnt); - domain_flush_tlb(&dom->domain); - domain_flush_complete(&dom->domain); - atomic64_inc(&dom->flush_finish_cnt); + dma_ops_domain_flush_tlb(dom); queue_release(dom, queue); } @@ -1921,9 +1939,33 @@ static void queue_add(struct dma_ops_domain *dom, queue->entries[idx].counter = atomic64_read(&dom->flush_start_cnt); spin_unlock_irqrestore(&queue->lock, flags); + + if (atomic_cmpxchg(&dom->flush_timer_on, 0, 1) == 0) + mod_timer(&dom->flush_timer, jiffies + msecs_to_jiffies(10)); + put_cpu_ptr(dom->flush_queue); } +static void queue_flush_timeout(unsigned long data) +{ + struct dma_ops_domain *dom = (struct dma_ops_domain *)data; + int cpu; + + atomic_set(&dom->flush_timer_on, 0); + + dma_ops_domain_flush_tlb(dom); + + for_each_possible_cpu(cpu) { + struct flush_queue *queue; + unsigned long flags; + + queue = per_cpu_ptr(dom->flush_queue, cpu); + spin_lock_irqsave(&queue->lock, flags); + queue_ring_free_flushed(dom, queue); + spin_unlock_irqrestore(&queue->lock, flags); + } +} + /* * Free a domain, only used if something went wrong in the * allocation path and we need to free an already allocated page table @@ -1935,6 +1977,9 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) del_domain_from_list(&dom->domain); + if (timer_pending(&dom->flush_timer)) + del_timer(&dom->flush_timer); + dma_ops_domain_free_flush_queue(dom); put_iova_domain(&dom->iovad); @@ -1978,6 +2023,11 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) if (dma_ops_domain_alloc_flush_queue(dma_dom)) goto free_dma_dom; + setup_timer(&dma_dom->flush_timer, queue_flush_timeout, + (unsigned long)dma_dom); + + atomic_set(&dma_dom->flush_timer_on, 0); + add_domain_to_list(&dma_dom->domain); return dma_dom; -- cgit v1.2.3 From ac3b708ad49f77b44faa104057783f161491e9e4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 7 Jun 2017 14:38:15 +0200 Subject: iommu/amd: Remove queue_release() function We can use queue_ring_free_flushed() instead, so remove this redundancy. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) (limited to 'drivers/iommu/amd_iommu.c') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 00c1796e07bf..6498f5baa7ea 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1859,21 +1859,6 @@ static inline bool queue_ring_full(struct flush_queue *queue) #define queue_ring_for_each(i, q) \ for (i = (q)->head; i != (q)->tail; i = (i + 1) % FLUSH_QUEUE_SIZE) -static void queue_release(struct dma_ops_domain *dom, - struct flush_queue *queue) -{ - unsigned i; - - assert_spin_locked(&queue->lock); - - queue_ring_for_each(i, queue) - free_iova_fast(&dom->iovad, - queue->entries[i].iova_pfn, - queue->entries[i].pages); - - queue->head = queue->tail = 0; -} - static inline unsigned queue_ring_add(struct flush_queue *queue) { unsigned idx = queue->tail; @@ -1925,12 +1910,15 @@ static void queue_add(struct dma_ops_domain *dom, queue = get_cpu_ptr(dom->flush_queue); spin_lock_irqsave(&queue->lock, flags); - queue_ring_free_flushed(dom, queue); - - if (queue_ring_full(queue)) { + /* + * When ring-queue is full, flush the entries from the IOTLB so + * that we can free all entries with queue_ring_free_flushed() + * below. + */ + if (queue_ring_full(queue)) dma_ops_domain_flush_tlb(dom); - queue_release(dom, queue); - } + + queue_ring_free_flushed(dom, queue); idx = queue_ring_add(queue); -- cgit v1.2.3 From 54bd63570484167cb13edf81e31fff107b879981 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 15 Jun 2017 10:36:22 +0200 Subject: iommu/amd: Suppress IO_PAGE_FAULTs in kdump kernel When booting into a kdump kernel, suppress IO_PAGE_FAULTs by default for all devices. But allow the faults again when a domain is assigned to a device. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 3 ++- drivers/iommu/amd_iommu_init.c | 9 +++++++++ drivers/iommu/amd_iommu_types.h | 1 + 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'drivers/iommu/amd_iommu.c') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 6498f5baa7ea..95ee360a5199 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2078,7 +2078,8 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) flags |= tmp; } - flags &= ~(0xffffUL); + + flags &= ~(DTE_FLAG_SA | 0xffffULL); flags |= domain->id; amd_iommu_dev_table[devid].data[1] = flags; diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 3fa7e3b35507..cf7896550e75 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -1900,6 +1901,14 @@ static void init_device_table_dma(void) for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { set_dev_entry_bit(devid, DEV_ENTRY_VALID); set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); + /* + * In kdump kernels in-flight DMA from the old kernel might + * cause IO_PAGE_FAULTs. There are no reports that a kdump + * actually failed because of that, so just disable fault + * reporting in the hardware to get rid of the messages + */ + if (is_kdump_kernel()) + set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT); } } diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 6960d7db2fab..294a409e283b 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -322,6 +322,7 @@ #define IOMMU_PTE_IW (1ULL << 62) #define DTE_FLAG_IOTLB (1ULL << 32) +#define DTE_FLAG_SA (1ULL << 34) #define DTE_FLAG_GV (1ULL << 55) #define DTE_FLAG_MASK (0x3ffULL << 32) #define DTE_GLX_SHIFT (56) -- cgit v1.2.3 From 9ce3a72cd7f7e0b9ba1c5952e4461b363824bca9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 22 Jun 2017 12:16:33 +0200 Subject: iommu/amd: Free already flushed ring-buffer entries before full-check To benefit from IOTLB flushes on other CPUs we have to free the already flushed IOVAs from the ring-buffer before we do the queue_ring_full() check. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'drivers/iommu/amd_iommu.c') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 95ee360a5199..c618c26a3b33 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1910,15 +1910,21 @@ static void queue_add(struct dma_ops_domain *dom, queue = get_cpu_ptr(dom->flush_queue); spin_lock_irqsave(&queue->lock, flags); + /* + * First remove the enries from the ring-buffer that are already + * flushed to make the below queue_ring_full() check less likely + */ + queue_ring_free_flushed(dom, queue); + /* * When ring-queue is full, flush the entries from the IOTLB so * that we can free all entries with queue_ring_free_flushed() * below. */ - if (queue_ring_full(queue)) + if (queue_ring_full(queue)) { dma_ops_domain_flush_tlb(dom); - - queue_ring_free_flushed(dom, queue); + queue_ring_free_flushed(dom, queue); + } idx = queue_ring_add(queue); -- cgit v1.2.3