summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/xtensa/include/asm/asmmacro.h33
-rw-r--r--arch/xtensa/kernel/align.S5
-rw-r--r--arch/xtensa/lib/memcopy.S49
-rw-r--r--arch/xtensa/lib/usercopy.S24
4 files changed, 59 insertions, 52 deletions
diff --git a/arch/xtensa/include/asm/asmmacro.h b/arch/xtensa/include/asm/asmmacro.h
index d2a4415a4c08..7f2ae5872151 100644
--- a/arch/xtensa/include/asm/asmmacro.h
+++ b/arch/xtensa/include/asm/asmmacro.h
@@ -158,4 +158,37 @@
.previous \
97:
+
+/*
+ * Extract unaligned word that is split between two registers w0 and w1
+ * into r regardless of machine endianness. SAR must be loaded with the
+ * starting bit of the word (see __ssa8).
+ */
+
+ .macro __src_b r, w0, w1
+#ifdef __XTENSA_EB__
+ src \r, \w0, \w1
+#else
+ src \r, \w1, \w0
+#endif
+ .endm
+
+/*
+ * Load 2 lowest address bits of r into SAR for __src_b to extract unaligned
+ * word starting at r from two registers loaded from consecutive aligned
+ * addresses covering r regardless of machine endianness.
+ *
+ * r 0 1 2 3
+ * LE SAR 0 8 16 24
+ * BE SAR 32 24 16 8
+ */
+
+ .macro __ssa8 r
+#ifdef __XTENSA_EB__
+ ssa8b \r
+#else
+ ssa8l \r
+#endif
+ .endm
+
#endif /* _XTENSA_ASMMACRO_H */
diff --git a/arch/xtensa/kernel/align.S b/arch/xtensa/kernel/align.S
index 24b3189d7841..9301452e521e 100644
--- a/arch/xtensa/kernel/align.S
+++ b/arch/xtensa/kernel/align.S
@@ -19,6 +19,7 @@
#include <linux/linkage.h>
#include <asm/current.h>
#include <asm/asm-offsets.h>
+#include <asm/asmmacro.h>
#include <asm/processor.h>
#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
@@ -66,8 +67,6 @@
#define INSN_T 24
#define INSN_OP1 16
-.macro __src_b r, w0, w1; src \r, \w0, \w1; .endm
-.macro __ssa8 r; ssa8b \r; .endm
.macro __ssa8r r; ssa8l \r; .endm
.macro __sh r, s; srl \r, \s; .endm
.macro __sl r, s; sll \r, \s; .endm
@@ -81,8 +80,6 @@
#define INSN_T 4
#define INSN_OP1 12
-.macro __src_b r, w0, w1; src \r, \w1, \w0; .endm
-.macro __ssa8 r; ssa8l \r; .endm
.macro __ssa8r r; ssa8b \r; .endm
.macro __sh r, s; sll \r, \s; .endm
.macro __sl r, s; srl \r, \s; .endm
diff --git a/arch/xtensa/lib/memcopy.S b/arch/xtensa/lib/memcopy.S
index b1c219acabe7..9bda748a1e3e 100644
--- a/arch/xtensa/lib/memcopy.S
+++ b/arch/xtensa/lib/memcopy.S
@@ -10,22 +10,7 @@
*/
#include <variant/core.h>
-
- .macro src_b r, w0, w1
-#ifdef __XTENSA_EB__
- src \r, \w0, \w1
-#else
- src \r, \w1, \w0
-#endif
- .endm
-
- .macro ssa8 r
-#ifdef __XTENSA_EB__
- ssa8b \r
-#else
- ssa8l \r
-#endif
- .endm
+#include <asm/asmmacro.h>
/*
* void *memcpy(void *dst, const void *src, size_t len);
@@ -209,7 +194,7 @@ memcpy:
.Lsrcunaligned:
_beqz a4, .Ldone # avoid loading anything for zero-length copies
# copy 16 bytes per iteration for word-aligned dst and unaligned src
- ssa8 a3 # set shift amount from byte offset
+ __ssa8 a3 # set shift amount from byte offset
/* set to 1 when running on ISS (simulator) with the
lint or ferret client, or 0 to save a few cycles */
@@ -229,16 +214,16 @@ memcpy:
.Loop2:
l32i a7, a3, 4
l32i a8, a3, 8
- src_b a6, a6, a7
+ __src_b a6, a6, a7
s32i a6, a5, 0
l32i a9, a3, 12
- src_b a7, a7, a8
+ __src_b a7, a7, a8
s32i a7, a5, 4
l32i a6, a3, 16
- src_b a8, a8, a9
+ __src_b a8, a8, a9
s32i a8, a5, 8
addi a3, a3, 16
- src_b a9, a9, a6
+ __src_b a9, a9, a6
s32i a9, a5, 12
addi a5, a5, 16
#if !XCHAL_HAVE_LOOPS
@@ -249,10 +234,10 @@ memcpy:
# copy 8 bytes
l32i a7, a3, 4
l32i a8, a3, 8
- src_b a6, a6, a7
+ __src_b a6, a6, a7
s32i a6, a5, 0
addi a3, a3, 8
- src_b a7, a7, a8
+ __src_b a7, a7, a8
s32i a7, a5, 4
addi a5, a5, 8
mov a6, a8
@@ -261,7 +246,7 @@ memcpy:
# copy 4 bytes
l32i a7, a3, 4
addi a3, a3, 4
- src_b a6, a6, a7
+ __src_b a6, a6, a7
s32i a6, a5, 0
addi a5, a5, 4
mov a6, a7
@@ -485,7 +470,7 @@ memmove:
.Lbacksrcunaligned:
_beqz a4, .Lbackdone # avoid loading anything for zero-length copies
# copy 16 bytes per iteration for word-aligned dst and unaligned src
- ssa8 a3 # set shift amount from byte offset
+ __ssa8 a3 # set shift amount from byte offset
#define SIM_CHECKS_ALIGNMENT 1 /* set to 1 when running on ISS with
* the lint or ferret client, or 0
* to save a few cycles */
@@ -506,15 +491,15 @@ memmove:
l32i a7, a3, 12
l32i a8, a3, 8
addi a5, a5, -16
- src_b a6, a7, a6
+ __src_b a6, a7, a6
s32i a6, a5, 12
l32i a9, a3, 4
- src_b a7, a8, a7
+ __src_b a7, a8, a7
s32i a7, a5, 8
l32i a6, a3, 0
- src_b a8, a9, a8
+ __src_b a8, a9, a8
s32i a8, a5, 4
- src_b a9, a6, a9
+ __src_b a9, a6, a9
s32i a9, a5, 0
#if !XCHAL_HAVE_LOOPS
bne a3, a10, .backLoop2 # continue loop if a3:src != a10:src_start
@@ -526,9 +511,9 @@ memmove:
l32i a7, a3, 4
l32i a8, a3, 0
addi a5, a5, -8
- src_b a6, a7, a6
+ __src_b a6, a7, a6
s32i a6, a5, 4
- src_b a7, a8, a7
+ __src_b a7, a8, a7
s32i a7, a5, 0
mov a6, a8
.Lback12:
@@ -537,7 +522,7 @@ memmove:
addi a3, a3, -4
l32i a7, a3, 0
addi a5, a5, -4
- src_b a6, a7, a6
+ __src_b a6, a7, a6
s32i a6, a5, 0
mov a6, a7
.Lback13:
diff --git a/arch/xtensa/lib/usercopy.S b/arch/xtensa/lib/usercopy.S
index 4172b73b0364..0959b6e71f11 100644
--- a/arch/xtensa/lib/usercopy.S
+++ b/arch/xtensa/lib/usercopy.S
@@ -56,14 +56,6 @@
#include <variant/core.h>
#include <asm/asmmacro.h>
-#ifdef __XTENSA_EB__
-#define ALIGN(R, W0, W1) src R, W0, W1
-#define SSA8(R) ssa8b R
-#else
-#define ALIGN(R, W0, W1) src R, W1, W0
-#define SSA8(R) ssa8l R
-#endif
-
.text
.align 4
.global __xtensa_copy_user
@@ -81,7 +73,7 @@ __xtensa_copy_user:
# per iteration
movi a8, 3 # if source is also aligned,
bnone a3, a8, .Laligned # then use word copy
- SSA8( a3) # set shift amount from byte offset
+ __ssa8 a3 # set shift amount from byte offset
bnez a4, .Lsrcunaligned
movi a2, 0 # return success for len==0
retw
@@ -220,16 +212,16 @@ EX(10f) l32i a6, a3, 0 # load first word
.Loop2:
EX(10f) l32i a7, a3, 4
EX(10f) l32i a8, a3, 8
- ALIGN( a6, a6, a7)
+ __src_b a6, a6, a7
EX(10f) s32i a6, a5, 0
EX(10f) l32i a9, a3, 12
- ALIGN( a7, a7, a8)
+ __src_b a7, a7, a8
EX(10f) s32i a7, a5, 4
EX(10f) l32i a6, a3, 16
- ALIGN( a8, a8, a9)
+ __src_b a8, a8, a9
EX(10f) s32i a8, a5, 8
addi a3, a3, 16
- ALIGN( a9, a9, a6)
+ __src_b a9, a9, a6
EX(10f) s32i a9, a5, 12
addi a5, a5, 16
#if !XCHAL_HAVE_LOOPS
@@ -240,10 +232,10 @@ EX(10f) s32i a9, a5, 12
# copy 8 bytes
EX(10f) l32i a7, a3, 4
EX(10f) l32i a8, a3, 8
- ALIGN( a6, a6, a7)
+ __src_b a6, a6, a7
EX(10f) s32i a6, a5, 0
addi a3, a3, 8
- ALIGN( a7, a7, a8)
+ __src_b a7, a7, a8
EX(10f) s32i a7, a5, 4
addi a5, a5, 8
mov a6, a8
@@ -252,7 +244,7 @@ EX(10f) s32i a7, a5, 4
# copy 4 bytes
EX(10f) l32i a7, a3, 4
addi a3, a3, 4
- ALIGN( a6, a6, a7)
+ __src_b a6, a6, a7
EX(10f) s32i a6, a5, 0
addi a5, a5, 4
mov a6, a7