diff options
author | Petr Tesarik <petr.tesarik.ext@huawei.com> | 2023-08-03 13:59:41 +0200 |
---|---|---|
committer | Christoph Hellwig <hch@lst.de> | 2023-08-08 19:29:21 +0200 |
commit | d069ed288ac74c24e2b1c294aa9445c80ed6c518 (patch) | |
tree | ab5552e5c52f44a0997c635cca9d30ea01cb88d4 /kernel | |
parent | swiotlb: move slot allocation explanation comment where it belongs (diff) | |
download | linux-d069ed288ac74c24e2b1c294aa9445c80ed6c518.tar.xz linux-d069ed288ac74c24e2b1c294aa9445c80ed6c518.zip |
swiotlb: optimize get_max_slots()
Use a simple logical shift and increment to calculate the number of slots
taken by the DMA segment boundary.
At least GCC-13 is not able to optimize the expression, producing this
horrible assembly code on x86:
cmpq $-1, %rcx
je .L364
addq $2048, %rcx
shrq $11, %rcx
movq %rcx, %r13
.L331:
// rest of the function here...
// after function epilogue and return:
.L364:
movabsq $9007199254740992, %r13
jmp .L331
After the optimization, the code looks more reasonable:
shrq $11, %r11
leaq 1(%r11), %rbx
Signed-off-by: Petr Tesarik <petr.tesarik.ext@huawei.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/dma/swiotlb.c | 4 |
1 files changed, 1 insertions, 3 deletions
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 50a0e9c45c39..394494a6b1f3 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -903,9 +903,7 @@ static inline phys_addr_t slot_addr(phys_addr_t start, phys_addr_t idx) */ static inline unsigned long get_max_slots(unsigned long boundary_mask) { - if (boundary_mask == ~0UL) - return 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); - return nr_slots(boundary_mask + 1); + return (boundary_mask >> IO_TLB_SHIFT) + 1; } static unsigned int wrap_area_index(struct io_tlb_pool *mem, unsigned int index) |