summaryrefslogtreecommitdiffstats
path: root/drivers/xen
diff options
context:
space:
mode:
authorMichael Kelley <mhklinux@outlook.com>2024-07-08 21:41:00 +0200
committerChristoph Hellwig <hch@lst.de>2024-07-10 07:59:03 +0200
commit7296f2301a057493e97b07739213c6e864f76891 (patch)
treeed08a5d890b1c2530b9b881d2732df7f94a72389 /drivers/xen
parentdma-mapping: benchmark: Don't starve others when doing the test (diff)
downloadlinux-7296f2301a057493e97b07739213c6e864f76891.tar.xz
linux-7296f2301a057493e97b07739213c6e864f76891.zip
swiotlb: reduce swiotlb pool lookups
With CONFIG_SWIOTLB_DYNAMIC enabled, each round-trip map/unmap pair in the swiotlb results in 6 calls to swiotlb_find_pool(). In multiple places, the pool is found and used in one function, and then must be found again in the next function that is called because only the tlb_addr is passed as an argument. These are the six call sites: dma_direct_map_page: 1. swiotlb_map -> swiotlb_tbl_map_single -> swiotlb_bounce dma_direct_unmap_page: 2. dma_direct_sync_single_for_cpu -> is_swiotlb_buffer 3. dma_direct_sync_single_for_cpu -> swiotlb_sync_single_for_cpu -> swiotlb_bounce 4. is_swiotlb_buffer 5. swiotlb_tbl_unmap_single -> swiotlb_del_transient 6. swiotlb_tbl_unmap_single -> swiotlb_release_slots Reduce the number of calls by finding the pool at a higher level, and passing it as an argument instead of searching again. A key change is for is_swiotlb_buffer() to return a pool pointer instead of a boolean, and then pass this pool pointer to subsequent swiotlb functions. There are 9 occurrences of is_swiotlb_buffer() used to test if a buffer is a swiotlb buffer before calling a swiotlb function. To reduce code duplication in getting the pool pointer and passing it as an argument, introduce inline wrappers for this pattern. The generated code is essentially unchanged. Since is_swiotlb_buffer() no longer returns a boolean, rename some functions to reflect the change: * swiotlb_find_pool() becomes __swiotlb_find_pool() * is_swiotlb_buffer() becomes swiotlb_find_pool() * is_xen_swiotlb_buffer() becomes xen_swiotlb_find_pool() With these changes, a round-trip map/unmap pair requires only 2 pool lookups (listed using the new names and wrappers): dma_direct_unmap_page: 1. dma_direct_sync_single_for_cpu -> swiotlb_find_pool 2. swiotlb_tbl_unmap_single -> swiotlb_find_pool These changes come from noticing the inefficiencies in a code review, not from performance measurements. With CONFIG_SWIOTLB_DYNAMIC, __swiotlb_find_pool() is not trivial, and it uses an RCU read lock, so avoiding the redundant calls helps performance in a hot path. When CONFIG_SWIOTLB_DYNAMIC is *not* set, the code size reduction is minimal and the perf benefits are likely negligible, but no harm is done. No functional change is intended. Signed-off-by: Michael Kelley <mhklinux@outlook.com> Reviewed-by: Petr Tesarik <petr@tesarici.cz> Signed-off-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'drivers/xen')
-rw-r--r--drivers/xen/swiotlb-xen.c31
1 files changed, 20 insertions, 11 deletions
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 6579ae3f6dac..35155258a7e2 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -88,7 +88,8 @@ static inline int range_straddles_page_boundary(phys_addr_t p, size_t size)
return 0;
}
-static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr)
+static struct io_tlb_pool *xen_swiotlb_find_pool(struct device *dev,
+ dma_addr_t dma_addr)
{
unsigned long bfn = XEN_PFN_DOWN(dma_to_phys(dev, dma_addr));
unsigned long xen_pfn = bfn_to_local_pfn(bfn);
@@ -99,8 +100,8 @@ static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr)
* in our domain. Therefore _only_ check address within our domain.
*/
if (pfn_valid(PFN_DOWN(paddr)))
- return is_swiotlb_buffer(dev, paddr);
- return 0;
+ return swiotlb_find_pool(dev, paddr);
+ return NULL;
}
#ifdef CONFIG_X86
@@ -227,8 +228,9 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
* Ensure that the address returned is DMA'ble
*/
if (unlikely(!dma_capable(dev, dev_addr, size, true))) {
- swiotlb_tbl_unmap_single(dev, map, size, dir,
- attrs | DMA_ATTR_SKIP_CPU_SYNC);
+ __swiotlb_tbl_unmap_single(dev, map, size, dir,
+ attrs | DMA_ATTR_SKIP_CPU_SYNC,
+ swiotlb_find_pool(dev, map));
return DMA_MAPPING_ERROR;
}
@@ -254,6 +256,7 @@ static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
size_t size, enum dma_data_direction dir, unsigned long attrs)
{
phys_addr_t paddr = xen_dma_to_phys(hwdev, dev_addr);
+ struct io_tlb_pool *pool;
BUG_ON(dir == DMA_NONE);
@@ -265,8 +268,10 @@ static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
}
/* NOTE: We use dev_addr here, not paddr! */
- if (is_xen_swiotlb_buffer(hwdev, dev_addr))
- swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
+ pool = xen_swiotlb_find_pool(hwdev, dev_addr);
+ if (pool)
+ __swiotlb_tbl_unmap_single(hwdev, paddr, size, dir,
+ attrs, pool);
}
static void
@@ -274,6 +279,7 @@ xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
size_t size, enum dma_data_direction dir)
{
phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr);
+ struct io_tlb_pool *pool;
if (!dev_is_dma_coherent(dev)) {
if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
@@ -282,8 +288,9 @@ xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
xen_dma_sync_for_cpu(dev, dma_addr, size, dir);
}
- if (is_xen_swiotlb_buffer(dev, dma_addr))
- swiotlb_sync_single_for_cpu(dev, paddr, size, dir);
+ pool = xen_swiotlb_find_pool(dev, dma_addr);
+ if (pool)
+ __swiotlb_sync_single_for_cpu(dev, paddr, size, dir, pool);
}
static void
@@ -291,9 +298,11 @@ xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr,
size_t size, enum dma_data_direction dir)
{
phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr);
+ struct io_tlb_pool *pool;
- if (is_xen_swiotlb_buffer(dev, dma_addr))
- swiotlb_sync_single_for_device(dev, paddr, size, dir);
+ pool = xen_swiotlb_find_pool(dev, dma_addr);
+ if (pool)
+ __swiotlb_sync_single_for_device(dev, paddr, size, dir, pool);
if (!dev_is_dma_coherent(dev)) {
if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))