diff options
author | Russell King <rmk+kernel@armlinux.org.uk> | 2020-12-21 12:19:26 +0100 |
---|---|---|
committer | Russell King <rmk+kernel@armlinux.org.uk> | 2020-12-21 12:19:26 +0100 |
commit | ecbbb88727aee7880527d4b320b4d06dde75d46d (patch) | |
tree | 67216fc12f1b8220039eed892403bfbde884a1a7 /arch/arm | |
parent | Merge branches 'fixes' and 'misc' into for-next (diff) | |
parent | Merge tag 'arm-adrl-replacement-for-v5.11' of git://git.kernel.org/pub/scm/li... (diff) | |
download | linux-ecbbb88727aee7880527d4b320b4d06dde75d46d.tar.xz linux-ecbbb88727aee7880527d4b320b4d06dde75d46d.zip |
Merge branch 'devel-stable' into for-next
Diffstat (limited to 'arch/arm')
-rw-r--r-- | arch/arm/Kconfig | 2 | ||||
-rw-r--r-- | arch/arm/boot/compressed/head.S | 18 | ||||
-rw-r--r-- | arch/arm/include/asm/assembler.h | 88 | ||||
-rw-r--r-- | arch/arm/include/asm/elf.h | 5 | ||||
-rw-r--r-- | arch/arm/include/asm/memory.h | 57 | ||||
-rw-r--r-- | arch/arm/include/asm/processor.h | 2 | ||||
-rw-r--r-- | arch/arm/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/arm/kernel/head-common.S | 22 | ||||
-rw-r--r-- | arch/arm/kernel/head.S | 205 | ||||
-rw-r--r-- | arch/arm/kernel/hyp-stub.S | 27 | ||||
-rw-r--r-- | arch/arm/kernel/module.c | 20 | ||||
-rw-r--r-- | arch/arm/kernel/phys2virt.S | 238 | ||||
-rw-r--r-- | arch/arm/kernel/sleep.S | 19 |
13 files changed, 431 insertions, 273 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 2c5eb5c2b310..b2bf019dcefa 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -244,7 +244,7 @@ config ARM_PATCH_PHYS_VIRT kernel in system memory. This can only be used with non-XIP MMU kernels where the base - of physical memory is at a 16MB boundary. + of physical memory is at a 2 MiB boundary. Only disable this option if you know that you do not require this feature (eg, building a kernel for a single machine) and diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 8d546cb10921..804d66668019 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -468,15 +468,10 @@ dtb_check_done: /* * Compute the address of the hyp vectors after relocation. - * This requires some arithmetic since we cannot directly - * reference __hyp_stub_vectors in a PC-relative way. * Call __hyp_set_vectors with the new address so that we * can HVC again after the copy. */ -0: adr r0, 0b - movw r1, #:lower16:__hyp_stub_vectors - 0b - movt r1, #:upper16:__hyp_stub_vectors - 0b - add r0, r0, r1 + adr_l r0, __hyp_stub_vectors sub r0, r0, r5 add r0, r0, r10 bl __hyp_set_vectors @@ -627,17 +622,11 @@ not_relocated: mov r0, #0 cmp r0, #HYP_MODE @ if not booted in HYP mode... bne __enter_kernel @ boot kernel directly - adr r12, .L__hyp_reentry_vectors_offset - ldr r0, [r12] - add r0, r0, r12 - + adr_l r0, __hyp_reentry_vectors bl __hyp_set_vectors __HVC(0) @ otherwise bounce to hyp mode b . @ should never be reached - - .align 2 -.L__hyp_reentry_vectors_offset: .long __hyp_reentry_vectors - . #else b __enter_kernel #endif @@ -1440,8 +1429,7 @@ ENTRY(efi_enter_kernel) mov r4, r0 @ preserve image base mov r8, r1 @ preserve DT pointer - ARM( adrl r0, call_cache_fn ) - THUMB( adr r0, call_cache_fn ) + adr_l r0, call_cache_fn adr r1, 0f @ clean the region of code we bl cache_clean_flush @ may run with the MMU off diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index feac2c8b86f2..6ed30421f697 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -259,7 +259,7 @@ */ #define ALT_UP(instr...) \ .pushsection ".alt.smp.init", "a" ;\ - .long 9998b ;\ + .long 9998b - . ;\ 9997: instr ;\ .if . - 9997b == 2 ;\ nop ;\ @@ -270,7 +270,7 @@ .popsection #define ALT_UP_B(label) \ .pushsection ".alt.smp.init", "a" ;\ - .long 9998b ;\ + .long 9998b - . ;\ W(b) . + (label - 9998b) ;\ .popsection #else @@ -494,4 +494,88 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) #define _ASM_NOKPROBE(entry) #endif + .macro __adldst_l, op, reg, sym, tmp, c + .if __LINUX_ARM_ARCH__ < 7 + ldr\c \tmp, .La\@ + .subsection 1 + .align 2 +.La\@: .long \sym - .Lpc\@ + .previous + .else + .ifnb \c + THUMB( ittt \c ) + .endif + movw\c \tmp, #:lower16:\sym - .Lpc\@ + movt\c \tmp, #:upper16:\sym - .Lpc\@ + .endif + +#ifndef CONFIG_THUMB2_KERNEL + .set .Lpc\@, . + 8 // PC bias + .ifc \op, add + add\c \reg, \tmp, pc + .else + \op\c \reg, [pc, \tmp] + .endif +#else +.Lb\@: add\c \tmp, \tmp, pc + /* + * In Thumb-2 builds, the PC bias depends on whether we are currently + * emitting into a .arm or a .thumb section. The size of the add opcode + * above will be 2 bytes when emitting in Thumb mode and 4 bytes when + * emitting in ARM mode, so let's use this to account for the bias. + */ + .set .Lpc\@, . + (. - .Lb\@) + + .ifnc \op, add + \op\c \reg, [\tmp] + .endif +#endif + .endm + + /* + * mov_l - move a constant value or [relocated] address into a register + */ + .macro mov_l, dst:req, imm:req + .if __LINUX_ARM_ARCH__ < 7 + ldr \dst, =\imm + .else + movw \dst, #:lower16:\imm + movt \dst, #:upper16:\imm + .endif + .endm + + /* + * adr_l - adr pseudo-op with unlimited range + * + * @dst: destination register + * @sym: name of the symbol + * @cond: conditional opcode suffix + */ + .macro adr_l, dst:req, sym:req, cond + __adldst_l add, \dst, \sym, \dst, \cond + .endm + + /* + * ldr_l - ldr <literal> pseudo-op with unlimited range + * + * @dst: destination register + * @sym: name of the symbol + * @cond: conditional opcode suffix + */ + .macro ldr_l, dst:req, sym:req, cond + __adldst_l ldr, \dst, \sym, \dst, \cond + .endm + + /* + * str_l - str <literal> pseudo-op with unlimited range + * + * @src: source register + * @sym: name of the symbol + * @tmp: mandatory scratch register + * @cond: conditional opcode suffix + */ + .macro str_l, src:req, sym:req, tmp:req, cond + __adldst_l str, \src, \sym, \tmp, \cond + .endm + #endif /* __ASM_ASSEMBLER_H__ */ diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index b078d992414b..0ac62a54b73c 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -51,6 +51,7 @@ typedef struct user_fp elf_fpregset_t; #define R_ARM_NONE 0 #define R_ARM_PC24 1 #define R_ARM_ABS32 2 +#define R_ARM_REL32 3 #define R_ARM_CALL 28 #define R_ARM_JUMP24 29 #define R_ARM_TARGET1 38 @@ -58,11 +59,15 @@ typedef struct user_fp elf_fpregset_t; #define R_ARM_PREL31 42 #define R_ARM_MOVW_ABS_NC 43 #define R_ARM_MOVT_ABS 44 +#define R_ARM_MOVW_PREL_NC 45 +#define R_ARM_MOVT_PREL 46 #define R_ARM_THM_CALL 10 #define R_ARM_THM_JUMP24 30 #define R_ARM_THM_MOVW_ABS_NC 47 #define R_ARM_THM_MOVT_ABS 48 +#define R_ARM_THM_MOVW_PREL_NC 49 +#define R_ARM_THM_MOVT_PREL 50 /* * These are used to set parameters in the core dumps. diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 38a163f50130..2f841cb65c30 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -183,6 +183,7 @@ extern unsigned long vectors_base; * so that all we need to do is modify the 8-bit constant field. */ #define __PV_BITS_31_24 0x81000000 +#define __PV_BITS_23_16 0x810000 #define __PV_BITS_7_0 0x81 extern unsigned long __pv_phys_pfn_offset; @@ -193,43 +194,65 @@ extern const void *__pv_table_begin, *__pv_table_end; #define PHYS_OFFSET ((phys_addr_t)__pv_phys_pfn_offset << PAGE_SHIFT) #define PHYS_PFN_OFFSET (__pv_phys_pfn_offset) -#define __pv_stub(from,to,instr,type) \ +#ifndef CONFIG_THUMB2_KERNEL +#define __pv_stub(from,to,instr) \ __asm__("@ __pv_stub\n" \ "1: " instr " %0, %1, %2\n" \ + "2: " instr " %0, %0, %3\n" \ " .pushsection .pv_table,\"a\"\n" \ - " .long 1b\n" \ + " .long 1b - ., 2b - .\n" \ " .popsection\n" \ : "=r" (to) \ - : "r" (from), "I" (type)) + : "r" (from), "I" (__PV_BITS_31_24), \ + "I"(__PV_BITS_23_16)) + +#define __pv_add_carry_stub(x, y) \ + __asm__("@ __pv_add_carry_stub\n" \ + "0: movw %R0, #0\n" \ + " adds %Q0, %1, %R0, lsl #20\n" \ + "1: mov %R0, %2\n" \ + " adc %R0, %R0, #0\n" \ + " .pushsection .pv_table,\"a\"\n" \ + " .long 0b - ., 1b - .\n" \ + " .popsection\n" \ + : "=&r" (y) \ + : "r" (x), "I" (__PV_BITS_7_0) \ + : "cc") -#define __pv_stub_mov_hi(t) \ - __asm__ volatile("@ __pv_stub_mov\n" \ - "1: mov %R0, %1\n" \ +#else +#define __pv_stub(from,to,instr) \ + __asm__("@ __pv_stub\n" \ + "0: movw %0, #0\n" \ + " lsl %0, #21\n" \ + " " instr " %0, %1, %0\n" \ " .pushsection .pv_table,\"a\"\n" \ - " .long 1b\n" \ + " .long 0b - .\n" \ " .popsection\n" \ - : "=r" (t) \ - : "I" (__PV_BITS_7_0)) + : "=&r" (to) \ + : "r" (from)) #define __pv_add_carry_stub(x, y) \ - __asm__ volatile("@ __pv_add_carry_stub\n" \ - "1: adds %Q0, %1, %2\n" \ + __asm__("@ __pv_add_carry_stub\n" \ + "0: movw %R0, #0\n" \ + " lsls %R0, #21\n" \ + " adds %Q0, %1, %R0\n" \ + "1: mvn %R0, #0\n" \ " adc %R0, %R0, #0\n" \ " .pushsection .pv_table,\"a\"\n" \ - " .long 1b\n" \ + " .long 0b - ., 1b - .\n" \ " .popsection\n" \ - : "+r" (y) \ - : "r" (x), "I" (__PV_BITS_31_24) \ + : "=&r" (y) \ + : "r" (x) \ : "cc") +#endif static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x) { phys_addr_t t; if (sizeof(phys_addr_t) == 4) { - __pv_stub(x, t, "add", __PV_BITS_31_24); + __pv_stub(x, t, "add"); } else { - __pv_stub_mov_hi(t); __pv_add_carry_stub(x, t); } return t; @@ -245,7 +268,7 @@ static inline unsigned long __phys_to_virt(phys_addr_t x) * assembler expression receives 32 bit argument * in place where 'r' 32 bit operand is expected. */ - __pv_stub((unsigned long) x, t, "sub", __PV_BITS_31_24); + __pv_stub((unsigned long) x, t, "sub"); return t; } diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index b9241051e5cb..9e6b97286307 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h @@ -96,7 +96,7 @@ unsigned long get_wchan(struct task_struct *p); #define __ALT_SMP_ASM(smp, up) \ "9998: " smp "\n" \ " .pushsection \".alt.smp.init\", \"a\"\n" \ - " .long 9998b\n" \ + " .long 9998b - .\n" \ " " up "\n" \ " .popsection\n" #else diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index f6431b46866e..c38c8b019d24 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -95,6 +95,7 @@ obj-$(CONFIG_PARAVIRT) += paravirt.o head-y := head$(MMUEXT).o obj-$(CONFIG_DEBUG_LL) += debug.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +obj-$(CONFIG_ARM_PATCH_PHYS_VIRT) += phys2virt.o # This is executed very early using a temporary stack when no memory allocator # nor global data is available. Everything has to be allocated on the stack. diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 89c80154b9ef..29b2eda136bb 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -173,11 +173,12 @@ ENDPROC(lookup_processor_type) * r9 = cpuid (preserved) */ __lookup_processor_type: - adr r3, __lookup_processor_type_data - ldmia r3, {r4 - r6} - sub r3, r3, r4 @ get offset between virt&phys - add r5, r5, r3 @ convert virt addresses to - add r6, r6, r3 @ physical address space + /* + * Look in <asm/procinfo.h> for information about the __proc_info + * structure. + */ + adr_l r5, __proc_info_begin + adr_l r6, __proc_info_end 1: ldmia r5, {r3, r4} @ value, mask and r4, r4, r9 @ mask wanted bits teq r3, r4 @@ -189,17 +190,6 @@ __lookup_processor_type: 2: ret lr ENDPROC(__lookup_processor_type) -/* - * Look in <asm/procinfo.h> for information about the __proc_info structure. - */ - .align 2 - .type __lookup_processor_type_data, %object -__lookup_processor_type_data: - .long . - .long __proc_info_begin - .long __proc_info_end - .size __lookup_processor_type_data, . - __lookup_processor_type_data - __error_lpae: #ifdef CONFIG_DEBUG_LL adr r0, str_lpae diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 26ffd462a975..7f62c5eccdf3 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -103,10 +103,8 @@ ENTRY(stext) #endif #ifndef CONFIG_XIP_KERNEL - adr r3, 2f - ldmia r3, {r4, r8} - sub r4, r3, r4 @ (PHYS_OFFSET - PAGE_OFFSET) - add r8, r8, r4 @ PHYS_OFFSET + adr_l r8, _text @ __pa(_text) + sub r8, r8, #TEXT_OFFSET @ PHYS_OFFSET #else ldr r8, =PLAT_PHYS_OFFSET @ always constant in this case #endif @@ -158,10 +156,6 @@ ENTRY(stext) 1: b __enable_mmu ENDPROC(stext) .ltorg -#ifndef CONFIG_XIP_KERNEL -2: .long . - .long PAGE_OFFSET -#endif /* * Setup the initial page tables. We only setup the barest @@ -224,11 +218,8 @@ __create_page_tables: * Create identity mapping to cater for __enable_mmu. * This identity mapping will be removed by paging_init(). */ - adr r0, __turn_mmu_on_loc - ldmia r0, {r3, r5, r6} - sub r0, r0, r3 @ virt->phys offset - add r5, r5, r0 @ phys __turn_mmu_on - add r6, r6, r0 @ phys __turn_mmu_on_end + adr_l r5, __turn_mmu_on @ _pa(__turn_mmu_on) + adr_l r6, __turn_mmu_on_end @ _pa(__turn_mmu_on_end) mov r5, r5, lsr #SECTION_SHIFT mov r6, r6, lsr #SECTION_SHIFT @@ -350,11 +341,6 @@ __create_page_tables: ret lr ENDPROC(__create_page_tables) .ltorg - .align -__turn_mmu_on_loc: - .long . - .long __turn_mmu_on - .long __turn_mmu_on_end #if defined(CONFIG_SMP) .text @@ -390,10 +376,8 @@ ENTRY(secondary_startup) /* * Use the page tables supplied from __cpu_up. */ - adr r4, __secondary_data - ldmia r4, {r5, r7, r12} @ address to jump to after - sub lr, r4, r5 @ mmu has been enabled - add r3, r7, lr + adr_l r3, secondary_data + mov_l r12, __secondary_switched ldrd r4, r5, [r3, #0] @ get secondary_data.pgdir ARM_BE8(eor r4, r4, r5) @ Swap r5 and r4 in BE: ARM_BE8(eor r5, r4, r5) @ it can be done in 3 steps @@ -408,22 +392,13 @@ ARM_BE8(eor r4, r4, r5) @ without using a temp reg. ENDPROC(secondary_startup) ENDPROC(secondary_startup_arm) - /* - * r6 = &secondary_data - */ ENTRY(__secondary_switched) - ldr sp, [r7, #12] @ get secondary_data.stack + ldr_l r7, secondary_data + 12 @ get secondary_data.stack + mov sp, r7 mov fp, #0 b secondary_start_kernel ENDPROC(__secondary_switched) - .align - - .type __secondary_data, %object -__secondary_data: - .long . - .long secondary_data - .long __secondary_switched #endif /* defined(CONFIG_SMP) */ @@ -538,19 +513,11 @@ ARM_BE8(rev r0, r0) @ byteswap if big endian retne lr __fixup_smp_on_up: - adr r0, 1f - ldmia r0, {r3 - r5} - sub r3, r0, r3 - add r4, r4, r3 - add r5, r5, r3 + adr_l r4, __smpalt_begin + adr_l r5, __smpalt_end b __do_fixup_smp_on_up ENDPROC(__fixup_smp) - .align -1: .word . - .word __smpalt_begin - .word __smpalt_end - .pushsection .data .align 2 .globl smp_on_up @@ -564,14 +531,15 @@ smp_on_up: __do_fixup_smp_on_up: cmp r4, r5 reths lr - ldmia r4!, {r0, r6} - ARM( str r6, [r0, r3] ) - THUMB( add r0, r0, r3 ) + ldmia r4, {r0, r6} + ARM( str r6, [r0, r4] ) + THUMB( add r0, r0, r4 ) + add r4, r4, #8 #ifdef __ARMEB__ THUMB( mov r6, r6, ror #16 ) @ Convert word order for big-endian. #endif THUMB( strh r6, [r0], #2 ) @ For Thumb-2, store as two halfwords - THUMB( mov r6, r6, lsr #16 ) @ to be robust against misaligned r3. + THUMB( mov r6, r6, lsr #16 ) @ to be robust against misaligned r0. THUMB( strh r6, [r0] ) b __do_fixup_smp_on_up ENDPROC(__do_fixup_smp_on_up) @@ -580,151 +548,8 @@ ENTRY(fixup_smp) stmfd sp!, {r4 - r6, lr} mov r4, r0 add r5, r0, r1 - mov r3, #0 bl __do_fixup_smp_on_up ldmfd sp!, {r4 - r6, pc} ENDPROC(fixup_smp) -#ifdef __ARMEB__ -#define LOW_OFFSET 0x4 -#define HIGH_OFFSET 0x0 -#else -#define LOW_OFFSET 0x0 -#define HIGH_OFFSET 0x4 -#endif - -#ifdef CONFIG_ARM_PATCH_PHYS_VIRT - -/* __fixup_pv_table - patch the stub instructions with the delta between - * PHYS_OFFSET and PAGE_OFFSET, which is assumed to be 16MiB aligned and - * can be expressed by an immediate shifter operand. The stub instruction - * has a form of '(add|sub) rd, rn, #imm'. - */ - __HEAD -__fixup_pv_table: - adr r0, 1f - ldmia r0, {r3-r7} - mvn ip, #0 - subs r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET - add r4, r4, r3 @ adjust table start address - add r5, r5, r3 @ adjust table end address - add r6, r6, r3 @ adjust __pv_phys_pfn_offset address - add r7, r7, r3 @ adjust __pv_offset address - mov r0, r8, lsr #PAGE_SHIFT @ convert to PFN - str r0, [r6] @ save computed PHYS_OFFSET to __pv_phys_pfn_offset - strcc ip, [r7, #HIGH_OFFSET] @ save to __pv_offset high bits - mov r6, r3, lsr #24 @ constant for add/sub instructions - teq r3, r6, lsl #24 @ must be 16MiB aligned -THUMB( it ne @ cross section branch ) - bne __error - str r3, [r7, #LOW_OFFSET] @ save to __pv_offset low bits - b __fixup_a_pv_table -ENDPROC(__fixup_pv_table) - - .align -1: .long . - .long __pv_table_begin - .long __pv_table_end -2: .long __pv_phys_pfn_offset - .long __pv_offset - - .text -__fixup_a_pv_table: - adr r0, 3f - ldr r6, [r0] - add r6, r6, r3 - ldr r0, [r6, #HIGH_OFFSET] @ pv_offset high word - ldr r6, [r6, #LOW_OFFSET] @ pv_offset low word - mov r6, r6, lsr #24 - cmn r0, #1 -#ifdef CONFIG_THUMB2_KERNEL - moveq r0, #0x200000 @ set bit 21, mov to mvn instruction - lsls r6, #24 - beq 2f - clz r7, r6 - lsr r6, #24 - lsl r6, r7 - bic r6, #0x0080 - lsrs r7, #1 - orrcs r6, #0x0080 - orr r6, r6, r7, lsl #12 - orr r6, #0x4000 - b 2f -1: add r7, r3 - ldrh ip, [r7, #2] -ARM_BE8(rev16 ip, ip) - tst ip, #0x4000 - and ip, #0x8f00 - orrne ip, r6 @ mask in offset bits 31-24 - orreq ip, r0 @ mask in offset bits 7-0 -ARM_BE8(rev16 ip, ip) - strh ip, [r7, #2] - bne 2f - ldrh ip, [r7] -ARM_BE8(rev16 ip, ip) - bic ip, #0x20 - orr ip, ip, r0, lsr #16 -ARM_BE8(rev16 ip, ip) - strh ip, [r7] -2: cmp r4, r5 - ldrcc r7, [r4], #4 @ use branch for delay slot - bcc 1b - bx lr -#else -#ifdef CONFIG_CPU_ENDIAN_BE8 - moveq r0, #0x00004000 @ set bit 22, mov to mvn instruction -#else - moveq r0, #0x400000 @ set bit 22, mov to mvn instruction -#endif - b 2f -1: ldr ip, [r7, r3] -#ifdef CONFIG_CPU_ENDIAN_BE8 - @ in BE8, we load data in BE, but instructions still in LE - bic ip, ip, #0xff000000 - tst ip, #0x000f0000 @ check the rotation field - orrne ip, ip, r6, lsl #24 @ mask in offset bits 31-24 - biceq ip, ip, #0x00004000 @ clear bit 22 - orreq ip, ip, r0 @ mask in offset bits 7-0 -#else - bic ip, ip, #0x000000ff - tst ip, #0xf00 @ check the rotation field - orrne ip, ip, r6 @ mask in offset bits 31-24 - biceq ip, ip, #0x400000 @ clear bit 22 - orreq ip, ip, r0 @ mask in offset bits 7-0 -#endif - str ip, [r7, r3] -2: cmp r4, r5 - ldrcc r7, [r4], #4 @ use branch for delay slot - bcc 1b - ret lr -#endif -ENDPROC(__fixup_a_pv_table) - - .align -3: .long __pv_offset - -ENTRY(fixup_pv_table) - stmfd sp!, {r4 - r7, lr} - mov r3, #0 @ no offset - mov r4, r0 @ r0 = table start - add r5, r0, r1 @ r1 = table size - bl __fixup_a_pv_table - ldmfd sp!, {r4 - r7, pc} -ENDPROC(fixup_pv_table) - - .data - .align 2 - .globl __pv_phys_pfn_offset - .type __pv_phys_pfn_offset, %object -__pv_phys_pfn_offset: - .word 0 - .size __pv_phys_pfn_offset, . -__pv_phys_pfn_offset - - .globl __pv_offset - .type __pv_offset, %object -__pv_offset: - .quad 0 - .size __pv_offset, . -__pv_offset -#endif - #include "head-common.S" diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S index 327f63ec42c6..b699b22a4db1 100644 --- a/arch/arm/kernel/hyp-stub.S +++ b/arch/arm/kernel/hyp-stub.S @@ -24,41 +24,38 @@ ENTRY(__boot_cpu_mode) .text /* - * Save the primary CPU boot mode. Requires 3 scratch registers. + * Save the primary CPU boot mode. Requires 2 scratch registers. */ - .macro store_primary_cpu_mode reg1, reg2, reg3 + .macro store_primary_cpu_mode reg1, reg2 mrs \reg1, cpsr and \reg1, \reg1, #MODE_MASK - adr \reg2, .L__boot_cpu_mode_offset - ldr \reg3, [\reg2] - str \reg1, [\reg2, \reg3] + str_l \reg1, __boot_cpu_mode, \reg2 .endm /* * Compare the current mode with the one saved on the primary CPU. * If they don't match, record that fact. The Z bit indicates * if there's a match or not. - * Requires 3 additionnal scratch registers. + * Requires 2 additional scratch registers. */ - .macro compare_cpu_mode_with_primary mode, reg1, reg2, reg3 - adr \reg2, .L__boot_cpu_mode_offset - ldr \reg3, [\reg2] - ldr \reg1, [\reg2, \reg3] + .macro compare_cpu_mode_with_primary mode, reg1, reg2 + adr_l \reg2, __boot_cpu_mode + ldr \reg1, [\reg2] cmp \mode, \reg1 @ matches primary CPU boot mode? orrne \reg1, \reg1, #BOOT_CPU_MODE_MISMATCH - strne \reg1, [\reg2, \reg3] @ record what happened and give up + strne \reg1, [\reg2] @ record what happened and give up .endm #else /* ZIMAGE */ - .macro store_primary_cpu_mode reg1:req, reg2:req, reg3:req + .macro store_primary_cpu_mode reg1:req, reg2:req .endm /* * The zImage loader only runs on one CPU, so we don't bother with mult-CPU * consistency checking: */ - .macro compare_cpu_mode_with_primary mode, reg1, reg2, reg3 + .macro compare_cpu_mode_with_primary mode, reg1, reg2 cmp \mode, \mode .endm @@ -73,7 +70,7 @@ ENTRY(__boot_cpu_mode) */ @ Call this from the primary CPU ENTRY(__hyp_stub_install) - store_primary_cpu_mode r4, r5, r6 + store_primary_cpu_mode r4, r5 ENDPROC(__hyp_stub_install) @ fall through... @@ -87,7 +84,7 @@ ENTRY(__hyp_stub_install_secondary) * If the secondary has booted with a different mode, give up * immediately. */ - compare_cpu_mode_with_primary r4, r5, r6, r7 + compare_cpu_mode_with_primary r4, r5, r6 retne lr /* diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index e15444b25ca0..beac45e89ba6 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -185,14 +185,24 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, *(u32 *)loc |= offset & 0x7fffffff; break; + case R_ARM_REL32: + *(u32 *)loc += sym->st_value - loc; + break; + case R_ARM_MOVW_ABS_NC: case R_ARM_MOVT_ABS: + case R_ARM_MOVW_PREL_NC: + case R_ARM_MOVT_PREL: offset = tmp = __mem_to_opcode_arm(*(u32 *)loc); offset = ((offset & 0xf0000) >> 4) | (offset & 0xfff); offset = (offset ^ 0x8000) - 0x8000; offset += sym->st_value; - if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS) + if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_PREL || + ELF32_R_TYPE(rel->r_info) == R_ARM_MOVW_PREL_NC) + offset -= loc; + if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS || + ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_PREL) offset >>= 16; tmp &= 0xfff0f000; @@ -283,6 +293,8 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, case R_ARM_THM_MOVW_ABS_NC: case R_ARM_THM_MOVT_ABS: + case R_ARM_THM_MOVW_PREL_NC: + case R_ARM_THM_MOVT_PREL: upper = __mem_to_opcode_thumb16(*(u16 *)loc); lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2)); @@ -302,7 +314,11 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, offset = (offset ^ 0x8000) - 0x8000; offset += sym->st_value; - if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS) + if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_PREL || + ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVW_PREL_NC) + offset -= loc; + if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS || + ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_PREL) offset >>= 16; upper = (u16)((upper & 0xfbf0) | diff --git a/arch/arm/kernel/phys2virt.S b/arch/arm/kernel/phys2virt.S new file mode 100644 index 000000000000..fb53db78fe78 --- /dev/null +++ b/arch/arm/kernel/phys2virt.S @@ -0,0 +1,238 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 1994-2002 Russell King + * Copyright (c) 2003, 2020 ARM Limited + * All Rights Reserved + */ + +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/page.h> + +#ifdef __ARMEB__ +#define LOW_OFFSET 0x4 +#define HIGH_OFFSET 0x0 +#else +#define LOW_OFFSET 0x0 +#define HIGH_OFFSET 0x4 +#endif + +/* + * __fixup_pv_table - patch the stub instructions with the delta between + * PHYS_OFFSET and PAGE_OFFSET, which is assumed to be + * 2 MiB aligned. + * + * Called from head.S, which expects the following registers to be preserved: + * r1 = machine no, r2 = atags or dtb, + * r8 = phys_offset, r9 = cpuid, r10 = procinfo + */ + __HEAD +ENTRY(__fixup_pv_table) + mov r0, r8, lsr #PAGE_SHIFT @ convert to PFN + str_l r0, __pv_phys_pfn_offset, r3 + + adr_l r0, __pv_offset + subs r3, r8, #PAGE_OFFSET @ PHYS_OFFSET - PAGE_OFFSET + mvn ip, #0 + strcc ip, [r0, #HIGH_OFFSET] @ save to __pv_offset high bits + str r3, [r0, #LOW_OFFSET] @ save to __pv_offset low bits + + mov r0, r3, lsr #21 @ constant for add/sub instructions + teq r3, r0, lsl #21 @ must be 2 MiB aligned + bne 0f + + adr_l r4, __pv_table_begin + adr_l r5, __pv_table_end + b __fixup_a_pv_table + +0: mov r0, r0 @ deadloop on error + b 0b +ENDPROC(__fixup_pv_table) + + .text +__fixup_a_pv_table: + adr_l r6, __pv_offset + ldr r0, [r6, #HIGH_OFFSET] @ pv_offset high word + ldr r6, [r6, #LOW_OFFSET] @ pv_offset low word + cmn r0, #1 +#ifdef CONFIG_THUMB2_KERNEL + @ + @ The Thumb-2 versions of the patchable sequences are + @ + @ phys-to-virt: movw <reg>, #offset<31:21> + @ lsl <reg>, #21 + @ sub <VA>, <PA>, <reg> + @ + @ virt-to-phys (non-LPAE): movw <reg>, #offset<31:21> + @ lsl <reg>, #21 + @ add <PA>, <VA>, <reg> + @ + @ virt-to-phys (LPAE): movw <reg>, #offset<31:21> + @ lsl <reg>, #21 + @ adds <PAlo>, <VA>, <reg> + @ mov <PAhi>, #offset<39:32> + @ adc <PAhi>, <PAhi>, #0 + @ + @ In the non-LPAE case, all patchable instructions are MOVW + @ instructions, where we need to patch in the offset into the + @ second halfword of the opcode (the 16-bit immediate is encoded + @ as imm4:i:imm3:imm8) + @ + @ 15 11 10 9 4 3 0 15 14 12 11 8 7 0 + @ +-----------+---+-------------+------++---+------+----+------+ + @ MOVW | 1 1 1 1 0 | i | 1 0 0 1 0 0 | imm4 || 0 | imm3 | Rd | imm8 | + @ +-----------+---+-------------+------++---+------+----+------+ + @ + @ In the LPAE case, we also need to patch in the high word of the + @ offset into the immediate field of the MOV instruction, or patch it + @ to a MVN instruction if the offset is negative. In this case, we + @ need to inspect the first halfword of the opcode, to check whether + @ it is MOVW or MOV/MVN, and to perform the MOV to MVN patching if + @ needed. The encoding of the immediate is rather complex for values + @ of i:imm3 != 0b0000, but fortunately, we never need more than 8 lower + @ order bits, which can be patched into imm8 directly (and i:imm3 + @ cleared) + @ + @ 15 11 10 9 5 0 15 14 12 11 8 7 0 + @ +-----------+---+---------------------++---+------+----+------+ + @ MOV | 1 1 1 1 0 | i | 0 0 0 1 0 0 1 1 1 1 || 0 | imm3 | Rd | imm8 | + @ MVN | 1 1 1 1 0 | i | 0 0 0 1 1 0 1 1 1 1 || 0 | imm3 | Rd | imm8 | + @ +-----------+---+---------------------++---+------+----+------+ + @ + moveq r0, #0x200000 @ set bit 21, mov to mvn instruction + lsrs r3, r6, #29 @ isolate top 3 bits of displacement + ubfx r6, r6, #21, #8 @ put bits 28:21 into the MOVW imm8 field + bfi r6, r3, #12, #3 @ put bits 31:29 into the MOVW imm3 field + b .Lnext +.Lloop: add r7, r4 + adds r4, #4 @ clears Z flag +#ifdef CONFIG_ARM_LPAE + ldrh ip, [r7] +ARM_BE8(rev16 ip, ip) + tst ip, #0x200 @ MOVW has bit 9 set, MVN has it clear + bne 0f @ skip to MOVW handling (Z flag is clear) + bic ip, #0x20 @ clear bit 5 (MVN -> MOV) + orr ip, ip, r0, lsr #16 @ MOV -> MVN if offset < 0 +ARM_BE8(rev16 ip, ip) + strh ip, [r7] + @ Z flag is set +0: +#endif + ldrh ip, [r7, #2] +ARM_BE8(rev16 ip, ip) + and ip, #0xf00 @ clear everything except Rd field + orreq ip, r0 @ Z flag set -> MOV/MVN -> patch in high bits + orrne ip, r6 @ Z flag clear -> MOVW -> patch in low bits +ARM_BE8(rev16 ip, ip) + strh ip, [r7, #2] +#else +#ifdef CONFIG_CPU_ENDIAN_BE8 +@ in BE8, we load data in BE, but instructions still in LE +#define PV_BIT24 0x00000001 +#define PV_IMM8_MASK 0xff000000 +#define PV_IMMR_MSB 0x00080000 +#else +#define PV_BIT24 0x01000000 +#define PV_IMM8_MASK 0x000000ff +#define PV_IMMR_MSB 0x00000800 +#endif + + @ + @ The ARM versions of the patchable sequences are + @ + @ phys-to-virt: sub <VA>, <PA>, #offset<31:24>, lsl #24 + @ sub <VA>, <PA>, #offset<23:16>, lsl #16 + @ + @ virt-to-phys (non-LPAE): add <PA>, <VA>, #offset<31:24>, lsl #24 + @ add <PA>, <VA>, #offset<23:16>, lsl #16 + @ + @ virt-to-phys (LPAE): movw <reg>, #offset<31:20> + @ adds <PAlo>, <VA>, <reg>, lsl #20 + @ mov <PAhi>, #offset<39:32> + @ adc <PAhi>, <PAhi>, #0 + @ + @ In the non-LPAE case, all patchable instructions are ADD or SUB + @ instructions, where we need to patch in the offset into the + @ immediate field of the opcode, which is emitted with the correct + @ rotation value. (The effective value of the immediate is imm12<7:0> + @ rotated right by [2 * imm12<11:8>] bits) + @ + @ 31 28 27 23 22 20 19 16 15 12 11 0 + @ +------+-----------------+------+------+-------+ + @ ADD | cond | 0 0 1 0 1 0 0 0 | Rn | Rd | imm12 | + @ SUB | cond | 0 0 1 0 0 1 0 0 | Rn | Rd | imm12 | + @ MOV | cond | 0 0 1 1 1 0 1 0 | Rn | Rd | imm12 | + @ MVN | cond | 0 0 1 1 1 1 1 0 | Rn | Rd | imm12 | + @ +------+-----------------+------+------+-------+ + @ + @ In the LPAE case, we use a MOVW instruction to carry the low offset + @ word, and patch in the high word of the offset into the immediate + @ field of the subsequent MOV instruction, or patch it to a MVN + @ instruction if the offset is negative. We can distinguish MOVW + @ instructions based on bits 23:22 of the opcode, and ADD/SUB can be + @ distinguished from MOV/MVN (all using the encodings above) using + @ bit 24. + @ + @ 31 28 27 23 22 20 19 16 15 12 11 0 + @ +------+-----------------+------+------+-------+ + @ MOVW | cond | 0 0 1 1 0 0 0 0 | imm4 | Rd | imm12 | + @ +------+-----------------+------+------+-------+ + @ + moveq r0, #0x400000 @ set bit 22, mov to mvn instruction + mov r3, r6, lsr #16 @ put offset bits 31-16 into r3 + mov r6, r6, lsr #24 @ put offset bits 31-24 into r6 + and r3, r3, #0xf0 @ only keep offset bits 23-20 in r3 + b .Lnext +.Lloop: ldr ip, [r7, r4] +#ifdef CONFIG_ARM_LPAE + tst ip, #PV_BIT24 @ ADD/SUB have bit 24 clear + beq 1f +ARM_BE8(rev ip, ip) + tst ip, #0xc00000 @ MOVW has bits 23:22 clear + bic ip, ip, #0x400000 @ clear bit 22 + bfc ip, #0, #12 @ clear imm12 field of MOV[W] instruction + orreq ip, ip, r6, lsl #4 @ MOVW -> mask in offset bits 31-24 + orreq ip, ip, r3, lsr #4 @ MOVW -> mask in offset bits 23-20 + orrne ip, ip, r0 @ MOV -> mask in offset bits 7-0 (or bit 22) +ARM_BE8(rev ip, ip) + b 2f +1: +#endif + tst ip, #PV_IMMR_MSB @ rotation value >= 16 ? + bic ip, ip, #PV_IMM8_MASK + orreq ip, ip, r6 ARM_BE8(, lsl #24) @ mask in offset bits 31-24 + orrne ip, ip, r3 ARM_BE8(, lsl #24) @ mask in offset bits 23-20 +2: + str ip, [r7, r4] + add r4, r4, #4 +#endif + +.Lnext: + cmp r4, r5 + ldrcc r7, [r4] @ use branch for delay slot + bcc .Lloop + ret lr +ENDPROC(__fixup_a_pv_table) + +ENTRY(fixup_pv_table) + stmfd sp!, {r4 - r7, lr} + mov r4, r0 @ r0 = table start + add r5, r0, r1 @ r1 = table size + bl __fixup_a_pv_table + ldmfd sp!, {r4 - r7, pc} +ENDPROC(fixup_pv_table) + + .data + .align 2 + .globl __pv_phys_pfn_offset + .type __pv_phys_pfn_offset, %object +__pv_phys_pfn_offset: + .word 0 + .size __pv_phys_pfn_offset, . -__pv_phys_pfn_offset + + .globl __pv_offset + .type __pv_offset, %object +__pv_offset: + .quad 0 + .size __pv_offset, . -__pv_offset diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index 5dc8b80bb693..43077e11dafd 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -72,8 +72,9 @@ ENTRY(__cpu_suspend) ldr r3, =sleep_save_sp stmfd sp!, {r0, r1} @ save suspend func arg and pointer ldr r3, [r3, #SLEEP_SAVE_SP_VIRT] - ALT_SMP(ldr r0, =mpidr_hash) + ALT_SMP(W(nop)) @ don't use adr_l inside ALT_SMP() ALT_UP_B(1f) + adr_l r0, mpidr_hash /* This ldmia relies on the memory layout of the mpidr_hash struct */ ldmia r0, {r1, r6-r8} @ r1 = mpidr mask (r6,r7,r8) = l[0,1,2] shifts compute_mpidr_hash r0, r6, r7, r8, r2, r1 @@ -147,9 +148,8 @@ no_hyp: mov r1, #0 ALT_SMP(mrc p15, 0, r0, c0, c0, 5) ALT_UP_B(1f) - adr r2, mpidr_hash_ptr - ldr r3, [r2] - add r2, r2, r3 @ r2 = struct mpidr_hash phys address + adr_l r2, mpidr_hash @ r2 = struct mpidr_hash phys address + /* * This ldmia relies on the memory layout of the mpidr_hash * struct mpidr_hash. @@ -157,10 +157,7 @@ no_hyp: ldmia r2, { r3-r6 } @ r3 = mpidr mask (r4,r5,r6) = l[0,1,2] shifts compute_mpidr_hash r1, r4, r5, r6, r0, r3 1: - adr r0, _sleep_save_sp - ldr r2, [r0] - add r0, r0, r2 - ldr r0, [r0, #SLEEP_SAVE_SP_PHYS] + ldr_l r0, sleep_save_sp + SLEEP_SAVE_SP_PHYS ldr r0, [r0, r1, lsl #2] @ load phys pgd, stack, resume fn @@ -177,12 +174,6 @@ ENDPROC(cpu_resume_arm) ENDPROC(cpu_resume_no_hyp) #endif - .align 2 -_sleep_save_sp: - .long sleep_save_sp - . -mpidr_hash_ptr: - .long mpidr_hash - . @ mpidr_hash struct offset - .data .align 2 .type sleep_save_sp, #object |