diff options
author | Jason Gunthorpe <jgg@nvidia.com> | 2024-04-11 18:46:14 +0200 |
---|---|---|
committer | Jason Gunthorpe <jgg@nvidia.com> | 2024-04-22 22:11:19 +0200 |
commit | 20516d6e51dd9994afda8d556507cfbe7853384b (patch) | |
tree | 15e4bdf535cbe812c0c232481c3a5cefbb529c4b | |
parent | RDMA/rxe: Let destroy qp succeed with stuck packet (diff) | |
download | linux-20516d6e51dd9994afda8d556507cfbe7853384b.tar.xz linux-20516d6e51dd9994afda8d556507cfbe7853384b.zip |
x86: Stop using weak symbols for __iowrite32_copy()
Start switching iomap_copy routines over to use #define and arch provided
inline/macro functions instead of weak symbols.
Inline functions allow more compiler optimization and this is often a
driver hot path.
x86 has the only weak implementation for __iowrite32_copy(), so replace it
with a static inline containing the same single instruction inline
assembly. The compiler will generate the "mov edx,ecx" in a more optimal
way.
Remove iomap_copy_64.S
Link: https://lore.kernel.org/r/1-v3-1893cd8b9369+1925-mlx5_arm_wc_jgg@nvidia.com
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
-rw-r--r-- | arch/x86/include/asm/io.h | 17 | ||||
-rw-r--r-- | arch/x86/lib/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/lib/iomap_copy_64.S | 15 | ||||
-rw-r--r-- | include/linux/io.h | 5 | ||||
-rw-r--r-- | lib/iomap_copy.c | 6 |
5 files changed, 24 insertions, 20 deletions
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 294cd2a40818..4b99ed326b17 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -209,6 +209,23 @@ void memset_io(volatile void __iomem *, int, size_t); #define memcpy_toio memcpy_toio #define memset_io memset_io +#ifdef CONFIG_X86_64 +/* + * Commit 0f07496144c2 ("[PATCH] Add faster __iowrite32_copy routine for + * x86_64") says that circa 2006 rep movsl is noticeably faster than a copy + * loop. + */ +static inline void __iowrite32_copy(void __iomem *to, const void *from, + size_t count) +{ + asm volatile("rep ; movsl" + : "=&c"(count), "=&D"(to), "=&S"(from) + : "0"(count), "1"(to), "2"(from) + : "memory"); +} +#define __iowrite32_copy __iowrite32_copy +#endif + /* * ISA space is 'always mapped' on a typical x86 system, no need to * explicitly ioremap() it. The fact that the ISA IO space is mapped diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 6da73513f026..98583a9dbab3 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -53,7 +53,6 @@ ifneq ($(CONFIG_X86_CMPXCHG64),y) lib-y += atomic64_386_32.o endif else - obj-y += iomap_copy_64.o ifneq ($(CONFIG_GENERIC_CSUM),y) lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o endif diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S deleted file mode 100644 index 6ff2f56cb0f7..000000000000 --- a/arch/x86/lib/iomap_copy_64.S +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright 2006 PathScale, Inc. All Rights Reserved. - */ - -#include <linux/linkage.h> - -/* - * override generic version in lib/iomap_copy.c - */ -SYM_FUNC_START(__iowrite32_copy) - movl %edx,%ecx - rep movsl - RET -SYM_FUNC_END(__iowrite32_copy) diff --git a/include/linux/io.h b/include/linux/io.h index 235ba7d80a8f..ce86120ce9d5 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -16,7 +16,10 @@ struct device; struct resource; -__visible void __iowrite32_copy(void __iomem *to, const void *from, size_t count); +#ifndef __iowrite32_copy +void __iowrite32_copy(void __iomem *to, const void *from, size_t count); +#endif + void __ioread32_copy(void *to, const void __iomem *from, size_t count); void __iowrite64_copy(void __iomem *to, const void *from, size_t count); diff --git a/lib/iomap_copy.c b/lib/iomap_copy.c index 5de7c04e05ef..8ddcbb53507d 100644 --- a/lib/iomap_copy.c +++ b/lib/iomap_copy.c @@ -16,9 +16,8 @@ * time. Order of access is not guaranteed, nor is a memory barrier * performed afterwards. */ -void __attribute__((weak)) __iowrite32_copy(void __iomem *to, - const void *from, - size_t count) +#ifndef __iowrite32_copy +void __iowrite32_copy(void __iomem *to, const void *from, size_t count) { u32 __iomem *dst = to; const u32 *src = from; @@ -28,6 +27,7 @@ void __attribute__((weak)) __iowrite32_copy(void __iomem *to, __raw_writel(*src++, dst++); } EXPORT_SYMBOL_GPL(__iowrite32_copy); +#endif /** * __ioread32_copy - copy data from MMIO space, in 32-bit units |