summaryrefslogtreecommitdiffstats
path: root/arch/arc/lib/memcpy-archs-unaligned.S
diff options
context:
space:
mode:
authorEugeniy Paltsev <eugeniy.paltsev@synopsys.com>2019-01-30 17:32:43 +0100
committerVineet Gupta <vgupta@synopsys.com>2019-02-25 17:52:16 +0100
commit4d1e7918aae59ef504f5170a4f0c7ae82339fcb2 (patch)
tree358a3cf4544a01622172ecbd2d85f933b9de9995 /arch/arc/lib/memcpy-archs-unaligned.S
parentARC: [plat-hsdk]: Enable AXI DW DMAC support (diff)
downloadlinux-4d1e7918aae59ef504f5170a4f0c7ae82339fcb2.tar.xz
linux-4d1e7918aae59ef504f5170a4f0c7ae82339fcb2.zip
ARCv2: lib: introduce memcpy optimized for unaligned access
Optimise code to use efficient unaligned memory access which is available on ARCv2. This allows us to really simplify memcpy code and speed up the code one and a half times (in case of unaligned source or destination). Don't wire it up yet ! Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com> Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Diffstat (limited to 'arch/arc/lib/memcpy-archs-unaligned.S')
-rw-r--r--arch/arc/lib/memcpy-archs-unaligned.S47
1 files changed, 47 insertions, 0 deletions
diff --git a/arch/arc/lib/memcpy-archs-unaligned.S b/arch/arc/lib/memcpy-archs-unaligned.S
new file mode 100644
index 000000000000..28993a73fdde
--- /dev/null
+++ b/arch/arc/lib/memcpy-archs-unaligned.S
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * ARCv2 memcpy implementation optimized for unaligned memory access using.
+ *
+ * Copyright (C) 2019 Synopsys
+ * Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+ */
+
+#include <linux/linkage.h>
+
+#ifdef CONFIG_ARC_HAS_LL64
+# define LOADX(DST,RX) ldd.ab DST, [RX, 8]
+# define STOREX(SRC,RX) std.ab SRC, [RX, 8]
+# define ZOLSHFT 5
+# define ZOLAND 0x1F
+#else
+# define LOADX(DST,RX) ld.ab DST, [RX, 4]
+# define STOREX(SRC,RX) st.ab SRC, [RX, 4]
+# define ZOLSHFT 4
+# define ZOLAND 0xF
+#endif
+
+ENTRY_CFI(memcpy)
+ mov r3, r0 ; don;t clobber ret val
+
+ lsr.f lp_count, r2, ZOLSHFT
+ lpnz @.Lcopy32_64bytes
+ ;; LOOP START
+ LOADX (r6, r1)
+ LOADX (r8, r1)
+ LOADX (r10, r1)
+ LOADX (r4, r1)
+ STOREX (r6, r3)
+ STOREX (r8, r3)
+ STOREX (r10, r3)
+ STOREX (r4, r3)
+.Lcopy32_64bytes:
+
+ and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes
+ lpnz @.Lcopyremainingbytes
+ ;; LOOP START
+ ldb.ab r5, [r1, 1]
+ stb.ab r5, [r3, 1]
+.Lcopyremainingbytes:
+
+ j [blink]
+END_CFI(memcpy)