summaryrefslogtreecommitdiffstats
path: root/arch/arm
diff options
context:
space:
mode:
authorArd Biesheuvel <ardb@kernel.org>2022-04-20 10:41:31 +0200
committerRussell King (Oracle) <rmk+kernel@armlinux.org.uk>2022-05-20 13:32:32 +0200
commit508074607c7b95b24f0adf633fdf606761bb7824 (patch)
tree6bb3ce001060faa2c5da3c985e219c10e557330c /arch/arm
parentARM: 9194/1: assembler: simplify ldr_this_cpu for !SMP builds (diff)
downloadlinux-508074607c7b95b24f0adf633fdf606761bb7824.tar.xz
linux-508074607c7b95b24f0adf633fdf606761bb7824.zip
ARM: 9195/1: entry: avoid explicit literal loads
ARMv7 has MOVW/MOVT instruction pairs to load symbol addresses into registers without having to rely on literal loads that go via the D-cache. For older cores, we now support a similar arrangement, based on PC-relative group relocations. This means we can elide most literal loads entirely from the entry path, by switching to the ldr_va macro to emit the appropriate sequence depending on the target architecture revision. While at it, switch to the bl_r macro for invoking the right PABT/DABT helpers instead of setting the LR register explicitly, which does not play well with cores that speculate across function returns. Signed-off-by: Ard Biesheuvel <ardb@kernel.org> Reviewed-by: Linus Walleij <linus.walleij@linaro.org> Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Diffstat (limited to 'arch/arm')
-rw-r--r--arch/arm/include/asm/assembler.h18
-rw-r--r--arch/arm/kernel/entry-armv.S37
-rw-r--r--arch/arm/kernel/entry-common.S10
-rw-r--r--arch/arm/kernel/entry-header.S3
4 files changed, 18 insertions, 50 deletions
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 8e59d748358d..90fbe4a3f9c8 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -666,12 +666,11 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
__adldst_l str, \src, \sym, \tmp, \cond
.endm
- .macro __ldst_va, op, reg, tmp, sym, cond
+ .macro __ldst_va, op, reg, tmp, sym, cond, offset
#if __LINUX_ARM_ARCH__ >= 7 || \
!defined(CONFIG_ARM_HAS_GROUP_RELOCS) || \
(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
mov_l \tmp, \sym, \cond
- \op\cond \reg, [\tmp]
#else
/*
* Avoid a literal load, by emitting a sequence of ADD/LDR instructions
@@ -683,20 +682,21 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
.reloc .L0_\@, R_ARM_ALU_PC_G0_NC, \sym
.reloc .L1_\@, R_ARM_ALU_PC_G1_NC, \sym
.reloc .L2_\@, R_ARM_LDR_PC_G2, \sym
-.L0_\@: sub\cond \tmp, pc, #8
-.L1_\@: sub\cond \tmp, \tmp, #4
-.L2_\@: \op\cond \reg, [\tmp, #0]
+.L0_\@: sub\cond \tmp, pc, #8 - \offset
+.L1_\@: sub\cond \tmp, \tmp, #4 - \offset
+.L2_\@:
#endif
+ \op\cond \reg, [\tmp, #\offset]
.endm
/*
* ldr_va - load a 32-bit word from the virtual address of \sym
*/
- .macro ldr_va, rd:req, sym:req, cond, tmp
+ .macro ldr_va, rd:req, sym:req, cond, tmp, offset=0
.ifnb \tmp
- __ldst_va ldr, \rd, \tmp, \sym, \cond
+ __ldst_va ldr, \rd, \tmp, \sym, \cond, \offset
.else
- __ldst_va ldr, \rd, \rd, \sym, \cond
+ __ldst_va ldr, \rd, \rd, \sym, \cond, \offset
.endif
.endm
@@ -704,7 +704,7 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
* str_va - store a 32-bit word to the virtual address of \sym
*/
.macro str_va, rn:req, sym:req, tmp:req, cond
- __ldst_va str, \rn, \tmp, \sym, \cond
+ __ldst_va str, \rn, \tmp, \sym, \cond, 0
.endm
/*
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 7a8682468a84..6e7dfb4786e3 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -61,9 +61,8 @@
.macro pabt_helper
@ PABORT handler takes pt_regs in r2, fault address in r4 and psr in r5
#ifdef MULTI_PABORT
- ldr ip, .LCprocfns
- mov lr, pc
- ldr pc, [ip, #PROCESSOR_PABT_FUNC]
+ ldr_va ip, processor, offset=PROCESSOR_PABT_FUNC
+ bl_r ip
#else
bl CPU_PABORT_HANDLER
#endif
@@ -82,9 +81,8 @@
@ the fault status register in r1. r9 must be preserved.
@
#ifdef MULTI_DABORT
- ldr ip, .LCprocfns
- mov lr, pc
- ldr pc, [ip, #PROCESSOR_DABT_FUNC]
+ ldr_va ip, processor, offset=PROCESSOR_DABT_FUNC
+ bl_r ip
#else
bl CPU_DABORT_HANDLER
#endif
@@ -302,16 +300,6 @@ __fiq_svc:
UNWIND(.fnend )
ENDPROC(__fiq_svc)
- .align 5
-.LCcralign:
- .word cr_alignment
-#ifdef MULTI_DABORT
-.LCprocfns:
- .word processor
-#endif
-.LCfp:
- .word fp_enter
-
/*
* Abort mode handlers
*/
@@ -370,7 +358,7 @@ ENDPROC(__fiq_abt)
THUMB( stmia sp, {r0 - r12} )
ATRAP( mrc p15, 0, r7, c1, c0, 0)
- ATRAP( ldr r8, .LCcralign)
+ ATRAP( ldr_va r8, cr_alignment)
ldmia r0, {r3 - r5}
add r0, sp, #S_PC @ here for interlock avoidance
@@ -379,8 +367,6 @@ ENDPROC(__fiq_abt)
str r3, [sp] @ save the "real" r0 copied
@ from the exception stack
- ATRAP( ldr r8, [r8, #0])
-
@
@ We are now ready to fill in the remaining blanks on the stack:
@
@@ -505,9 +491,7 @@ __und_usr_thumb:
*/
#if __LINUX_ARM_ARCH__ < 7
/* If the target CPU may not be Thumb-2-capable, a run-time check is needed: */
-#define NEED_CPU_ARCHITECTURE
- ldr r5, .LCcpu_architecture
- ldr r5, [r5]
+ ldr_va r5, cpu_architecture
cmp r5, #CPU_ARCH_ARMv7
blo __und_usr_fault_16 @ 16bit undefined instruction
/*
@@ -654,12 +638,6 @@ call_fpe:
ret.w lr @ CP#14 (Debug)
ret.w lr @ CP#15 (Control)
-#ifdef NEED_CPU_ARCHITECTURE
- .align 2
-.LCcpu_architecture:
- .word __cpu_architecture
-#endif
-
#ifdef CONFIG_NEON
.align 6
@@ -685,9 +663,8 @@ call_fpe:
#endif
do_fpe:
- ldr r4, .LCfp
add r10, r10, #TI_FPSTATE @ r10 = workspace
- ldr pc, [r4] @ Call FP module USR entry point
+ ldr_va pc, fp_enter, tmp=r4 @ Call FP module USR entry point
/*
* The FP module is called with these registers set:
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 90d40f4d56cf..ad3210e5cb69 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -198,7 +198,7 @@ ENTRY(vector_swi)
#endif
reload_current r10, ip
zero_fp
- alignment_trap r10, ip, __cr_alignment
+ alignment_trap r10, ip, cr_alignment
asm_trace_hardirqs_on save=0
enable_irq_notrace
ct_user_exit save=0
@@ -328,14 +328,6 @@ __sys_trace_return:
bl syscall_trace_exit
b ret_slow_syscall
- .align 5
-#ifdef CONFIG_ALIGNMENT_TRAP
- .type __cr_alignment, #object
-__cr_alignment:
- .word cr_alignment
-#endif
- .ltorg
-
.macro syscall_table_start, sym
.equ __sys_nr, 0
.type \sym, #object
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index 9a1dc142f782..5865621bf691 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -48,8 +48,7 @@
.macro alignment_trap, rtmp1, rtmp2, label
#ifdef CONFIG_ALIGNMENT_TRAP
mrc p15, 0, \rtmp2, c1, c0, 0
- ldr \rtmp1, \label
- ldr \rtmp1, [\rtmp1]
+ ldr_va \rtmp1, \label
teq \rtmp1, \rtmp2
mcrne p15, 0, \rtmp1, c1, c0, 0
#endif