From 8fcd6c45f5a65621ec809b7866a3623e9a01d4ed Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 28 Jul 2017 15:35:35 -0400 Subject: ARM: implement get_tls syscall When there is no dedicated register to hold the tp value and no MMU to provide a fixed address kuser helper entry point, all that is left as fallback is a syscall. Signed-off-by: Nicolas Pitre Acked-by: Mickael GUENE Tested-by: Vincent Abriou Tested-by: Andras Szemzo --- arch/arm/include/uapi/asm/unistd.h | 1 + arch/arm/kernel/traps.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h index 28bd456494a3..575b25fc29c6 100644 --- a/arch/arm/include/uapi/asm/unistd.h +++ b/arch/arm/include/uapi/asm/unistd.h @@ -35,5 +35,6 @@ #define __ARM_NR_usr26 (__ARM_NR_BASE+3) #define __ARM_NR_usr32 (__ARM_NR_BASE+4) #define __ARM_NR_set_tls (__ARM_NR_BASE+5) +#define __ARM_NR_get_tls (__ARM_NR_BASE+6) #endif /* _UAPI__ASM_ARM_UNISTD_H */ diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 948c648fea00..43c0560f7b2d 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -647,6 +647,9 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs) set_tls(regs->ARM_r0); return 0; + case NR(get_tls): + return current_thread_info()->tp_value[0]; + default: /* Calls 9f00xx..9f07ff are defined to return -ENOSYS if not implemented, rather than raising SIGILL. This -- cgit v1.2.3 From e71fd63127c5a1d4fb7660f5d4cbc257374a2af9 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Sat, 22 Jul 2017 00:48:09 -0400 Subject: arm_elf_read_implies_exec(): remove unused argument The first argument to elf_read_implies_exec() is either the actual header structure or a pointer to that structure whether one looks at fs/binfmt_elf.c or fs/binfmt_elf_fdpic.c. This ought to be fixed of course, but in the mean time let's sidestep the issue by removing that first argument from arm_elf_read_implies_exec() as it is unused anyway. Signed-off-by: Nicolas Pitre Acked-by: Mickael GUENE Tested-by: Vincent Abriou Tested-by: Andras Szemzo --- arch/arm/include/asm/elf.h | 4 ++-- arch/arm/kernel/elf.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index f13ae153fb24..e2786acf8202 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -102,8 +102,8 @@ extern int elf_check_arch(const struct elf32_hdr *); #define vmcore_elf64_check_arch(x) (0) -extern int arm_elf_read_implies_exec(const struct elf32_hdr *, int); -#define elf_read_implies_exec(ex,stk) arm_elf_read_implies_exec(&(ex), stk) +extern int arm_elf_read_implies_exec(int); +#define elf_read_implies_exec(ex,stk) arm_elf_read_implies_exec(stk) struct task_struct; int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs); diff --git a/arch/arm/kernel/elf.c b/arch/arm/kernel/elf.c index d0d1e83150c9..52fb98358d9d 100644 --- a/arch/arm/kernel/elf.c +++ b/arch/arm/kernel/elf.c @@ -80,7 +80,7 @@ EXPORT_SYMBOL(elf_set_personality); * - the binary requires an executable stack * - we're running on a CPU which doesn't support NX. */ -int arm_elf_read_implies_exec(const struct elf32_hdr *x, int executable_stack) +int arm_elf_read_implies_exec(int executable_stack) { if (executable_stack != EXSTACK_DISABLE_X) return 1; -- cgit v1.2.3 From 5e5881143297c97207486349ec1cf9fd833b1510 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 28 Jul 2017 18:48:48 -0400 Subject: ARM: start_thread(): don't always clear all regs The elf_fdpic binary format driver has to initialize extra registers beside the stack and program counter as required by the corresponding ABI. So reinstate them after the regs structure has been cleared. While at it let's get rid of start_thread_nommu(). Signed-off-by: Nicolas Pitre Acked-by: Mickael GUENE Tested-by: Vincent Abriou Tested-by: Andras Szemzo --- arch/arm/include/asm/processor.h | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index c3d5fc124a05..338cbe0a18ef 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h @@ -47,15 +47,24 @@ struct thread_struct { #define INIT_THREAD { } -#ifdef CONFIG_MMU -#define nommu_start_thread(regs) do { } while (0) -#else -#define nommu_start_thread(regs) regs->ARM_r10 = current->mm->start_data -#endif - #define start_thread(regs,pc,sp) \ ({ \ + unsigned long r7, r8, r9; \ + \ + if (IS_ENABLED(CONFIG_BINFMT_ELF_FDPIC)) { \ + r7 = regs->ARM_r7; \ + r8 = regs->ARM_r8; \ + r9 = regs->ARM_r9; \ + } \ memset(regs->uregs, 0, sizeof(regs->uregs)); \ + if (IS_ENABLED(CONFIG_BINFMT_ELF_FDPIC) && \ + current->personality & FDPIC_FUNCPTRS) { \ + regs->ARM_r7 = r7; \ + regs->ARM_r8 = r8; \ + regs->ARM_r9 = r9; \ + regs->ARM_r10 = current->mm->start_data; \ + } else if (!IS_ENABLED(CONFIG_MMU)) \ + regs->ARM_r10 = current->mm->start_data; \ if (current->personality & ADDR_LIMIT_32BIT) \ regs->ARM_cpsr = USR_MODE; \ else \ @@ -65,7 +74,6 @@ struct thread_struct { regs->ARM_cpsr |= PSR_ENDSTATE; \ regs->ARM_pc = pc & ~1; /* pc */ \ regs->ARM_sp = sp; /* sp */ \ - nommu_start_thread(regs); \ }) /* Forward declaration, a strange C thing */ -- cgit v1.2.3 From 5c16595353e0743af99294db48549c3145e3a5ad Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 9 Aug 2017 23:42:51 -0400 Subject: ARM: signal handling support for FDPIC_FUNCPTRS functions Signal handlers are not direct function pointers but pointers to function descriptor in that case. Therefore we must retrieve the actual function address and load the GOT value into r9 from the descriptor before branching to the actual handler. If a restorer is provided, we also have to load its address and GOT from its descriptor. That descriptor address and the code to load it is pushed onto the stack to be executed as soon as the signal handler returns. However, to be compatible with NX stacks, the FDPIC bounce code is also copied to the signal page along with the other code stubs. Therefore this code must get at the descriptor address whether it executes from the stack or the signal page. To do so we use the stack pointer which points at the signal stack frame where the descriptor address was stored. Because the rt signal frame is different from the simpler frame, two versions of the bounce code are needed, and two variants (ARM and Thumb) as well. The asm-offsets facility is used to determine the actual offset in the signal frame for each version, meaning that struct sigframe and rt_sigframe had to be moved to a separate file. Signed-off-by: Nicolas Pitre Acked-by: Mickael GUENE Tested-by: Vincent Abriou Tested-by: Andras Szemzo --- arch/arm/include/asm/ucontext.h | 1 + arch/arm/kernel/asm-offsets.c | 4 +++ arch/arm/kernel/signal.c | 53 +++++++++++++++++++++++++----------- arch/arm/kernel/signal.h | 11 ++++++++ arch/arm/kernel/sigreturn_codes.S | 56 +++++++++++++++++++++++++++++++++++---- 5 files changed, 105 insertions(+), 20 deletions(-) create mode 100644 arch/arm/kernel/signal.h diff --git a/arch/arm/include/asm/ucontext.h b/arch/arm/include/asm/ucontext.h index 921d8274855c..b42c75ae0d19 100644 --- a/arch/arm/include/asm/ucontext.h +++ b/arch/arm/include/asm/ucontext.h @@ -2,6 +2,7 @@ #define _ASMARM_UCONTEXT_H #include +#include /* * struct sigcontext only has room for the basic registers, but struct diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 608008229c7d..13c155850822 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -28,6 +28,7 @@ #include #include #include +#include "signal.h" /* * Make sure that the compiler and target are compatible. @@ -112,6 +113,9 @@ int main(void) DEFINE(SVC_ADDR_LIMIT, offsetof(struct svc_pt_regs, addr_limit)); DEFINE(SVC_REGS_SIZE, sizeof(struct svc_pt_regs)); BLANK(); + DEFINE(SIGFRAME_RC3_OFFSET, offsetof(struct sigframe, retcode[3])); + DEFINE(RT_SIGFRAME_RC3_OFFSET, offsetof(struct rt_sigframe, sig.retcode[3])); + BLANK(); #ifdef CONFIG_CACHE_L2X0 DEFINE(L2X0_R_PHY_BASE, offsetof(struct l2x0_regs, phy_base)); DEFINE(L2X0_R_AUX_CTRL, offsetof(struct l2x0_regs, aux_ctrl)); diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 5814298ef0b7..1f3574ec4fad 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -18,11 +18,12 @@ #include #include #include -#include #include #include -extern const unsigned long sigreturn_codes[7]; +#include "signal.h" + +extern const unsigned long sigreturn_codes[17]; static unsigned long signal_return_offset; @@ -171,15 +172,6 @@ static int restore_vfp_context(char __user **auxp) /* * Do a signal return; undo the signal stack. These are aligned to 64-bit. */ -struct sigframe { - struct ucontext uc; - unsigned long retcode[2]; -}; - -struct rt_sigframe { - struct siginfo info; - struct sigframe sig; -}; static int restore_sigframe(struct pt_regs *regs, struct sigframe __user *sf) { @@ -365,9 +357,20 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig, unsigned long __user *rc, void __user *frame) { unsigned long handler = (unsigned long)ksig->ka.sa.sa_handler; + unsigned long handler_fdpic_GOT = 0; unsigned long retcode; - int thumb = 0; + unsigned int idx, thumb = 0; unsigned long cpsr = regs->ARM_cpsr & ~(PSR_f | PSR_E_BIT); + bool fdpic = IS_ENABLED(CONFIG_BINFMT_ELF_FDPIC) && + (current->personality & FDPIC_FUNCPTRS); + + if (fdpic) { + unsigned long __user *fdpic_func_desc = + (unsigned long __user *)handler; + if (__get_user(handler, &fdpic_func_desc[0]) || + __get_user(handler_fdpic_GOT, &fdpic_func_desc[1])) + return 1; + } cpsr |= PSR_ENDSTATE; @@ -407,9 +410,26 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig, if (ksig->ka.sa.sa_flags & SA_RESTORER) { retcode = (unsigned long)ksig->ka.sa.sa_restorer; + if (fdpic) { + /* + * We need code to load the function descriptor. + * That code follows the standard sigreturn code + * (6 words), and is made of 3 + 2 words for each + * variant. The 4th copied word is the actual FD + * address that the assembly code expects. + */ + idx = 6 + thumb * 3; + if (ksig->ka.sa.sa_flags & SA_SIGINFO) + idx += 5; + if (__put_user(sigreturn_codes[idx], rc ) || + __put_user(sigreturn_codes[idx+1], rc+1) || + __put_user(sigreturn_codes[idx+2], rc+2) || + __put_user(retcode, rc+3)) + return 1; + goto rc_finish; + } } else { - unsigned int idx = thumb << 1; - + idx = thumb << 1; if (ksig->ka.sa.sa_flags & SA_SIGINFO) idx += 3; @@ -421,6 +441,7 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig, __put_user(sigreturn_codes[idx+1], rc+1)) return 1; +rc_finish: #ifdef CONFIG_MMU if (cpsr & MODE32_BIT) { struct mm_struct *mm = current->mm; @@ -440,7 +461,7 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig, * the return code written onto the stack. */ flush_icache_range((unsigned long)rc, - (unsigned long)(rc + 2)); + (unsigned long)(rc + 3)); retcode = ((unsigned long)rc) + thumb; } @@ -450,6 +471,8 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig, regs->ARM_sp = (unsigned long)frame; regs->ARM_lr = retcode; regs->ARM_pc = handler; + if (fdpic) + regs->ARM_r9 = handler_fdpic_GOT; regs->ARM_cpsr = cpsr; return 0; diff --git a/arch/arm/kernel/signal.h b/arch/arm/kernel/signal.h new file mode 100644 index 000000000000..b7b838b05229 --- /dev/null +++ b/arch/arm/kernel/signal.h @@ -0,0 +1,11 @@ +#include + +struct sigframe { + struct ucontext uc; + unsigned long retcode[4]; +}; + +struct rt_sigframe { + struct siginfo info; + struct sigframe sig; +}; diff --git a/arch/arm/kernel/sigreturn_codes.S b/arch/arm/kernel/sigreturn_codes.S index b84d0cb13682..2c7b22e32152 100644 --- a/arch/arm/kernel/sigreturn_codes.S +++ b/arch/arm/kernel/sigreturn_codes.S @@ -14,6 +14,8 @@ * GNU General Public License for more details. */ +#include +#include #include /* @@ -51,6 +53,17 @@ ARM_OK( .arm ) .thumb .endm + .macro arm_fdpic_slot n + .org sigreturn_codes + 24 + 20 * (\n) +ARM_OK( .arm ) + .endm + + .macro thumb_fdpic_slot n + .org sigreturn_codes + 24 + 20 * (\n) + 12 + .thumb + .endm + + #if __LINUX_ARM_ARCH__ <= 4 /* * Note we manually set minimally required arch that supports @@ -90,13 +103,46 @@ ARM_OK( swi #(__NR_rt_sigreturn)|(__NR_OABI_SYSCALL_BASE) ) movs r7, #(__NR_rt_sigreturn - __NR_SYSCALL_BASE) swi #0 + /* ARM sigreturn restorer FDPIC bounce code snippet */ + arm_fdpic_slot 0 +ARM_OK( ldr r3, [sp, #SIGFRAME_RC3_OFFSET] ) +ARM_OK( ldmia r3, {r3, r9} ) +#ifdef CONFIG_ARM_THUMB +ARM_OK( bx r3 ) +#else +ARM_OK( ret r3 ) +#endif + + /* Thumb sigreturn restorer FDPIC bounce code snippet */ + thumb_fdpic_slot 0 + ldr r3, [sp, #SIGFRAME_RC3_OFFSET] + ldmia r3, {r2, r3} + mov r9, r3 + bx r2 + + /* ARM sigreturn_rt restorer FDPIC bounce code snippet */ + arm_fdpic_slot 1 +ARM_OK( ldr r3, [sp, #RT_SIGFRAME_RC3_OFFSET] ) +ARM_OK( ldmia r3, {r3, r9} ) +#ifdef CONFIG_ARM_THUMB +ARM_OK( bx r3 ) +#else +ARM_OK( ret r3 ) +#endif + + /* Thumb sigreturn_rt restorer FDPIC bounce code snippet */ + thumb_fdpic_slot 1 + ldr r3, [sp, #RT_SIGFRAME_RC3_OFFSET] + ldmia r3, {r2, r3} + mov r9, r3 + bx r2 + /* - * Note on addtional space: setup_return in signal.c - * algorithm uses two words copy regardless whether - * it is thumb case or not, so we need additional - * word after real last entry. + * Note on additional space: setup_return in signal.c + * always copies the same number of words regardless whether + * it is thumb case or not, so we need one additional padding + * word after the last entry. */ - arm_slot 2 .space 4 .size sigreturn_codes, . - sigreturn_codes -- cgit v1.2.3 From 50b2b2e691cd4ff30331ba9a6156b29a07b60f90 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Sat, 22 Jul 2017 01:02:40 -0400 Subject: ARM: add ELF_FDPIC support This includes the necessary code to recognise the FDPIC format on ARM and the ptrace command definitions used by the common ptrace code. Based on patches originally from Mickael Guene . Signed-off-by: Nicolas Pitre Acked-by: Mickael GUENE Tested-by: Vincent Abriou Tested-by: Andras Szemzo --- arch/arm/include/asm/elf.h | 12 ++++++++++++ arch/arm/include/asm/mmu.h | 4 ++++ arch/arm/include/uapi/asm/ptrace.h | 4 ++++ fs/Kconfig.binfmt | 2 +- 4 files changed, 21 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index e2786acf8202..ad0ca4f2ba13 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -100,6 +100,11 @@ struct elf32_hdr; extern int elf_check_arch(const struct elf32_hdr *); #define elf_check_arch elf_check_arch +#define ELFOSABI_ARM_FDPIC 65 /* ARM FDPIC platform */ +#define elf_check_fdpic(x) ((x)->e_ident[EI_OSABI] == ELFOSABI_ARM_FDPIC) +#define elf_check_const_displacement(x) ((x)->e_flags & EF_ARM_PIC) +#define ELF_FDPIC_CORE_EFLAGS 0 + #define vmcore_elf64_check_arch(x) (0) extern int arm_elf_read_implies_exec(int); @@ -120,6 +125,13 @@ int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs); have no such handler. */ #define ELF_PLAT_INIT(_r, load_addr) (_r)->ARM_r0 = 0 +#define ELF_FDPIC_PLAT_INIT(_r, _exec_map_addr, _interp_map_addr, dynamic_addr) \ + do { \ + (_r)->ARM_r7 = _exec_map_addr; \ + (_r)->ARM_r8 = _interp_map_addr; \ + (_r)->ARM_r9 = dynamic_addr; \ + } while(0) + extern void elf_set_personality(const struct elf32_hdr *); #define SET_PERSONALITY(ex) elf_set_personality(&(ex)) diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h index a5b47421059d..e0eb16680a5b 100644 --- a/arch/arm/include/asm/mmu.h +++ b/arch/arm/include/asm/mmu.h @@ -33,6 +33,10 @@ typedef struct { */ typedef struct { unsigned long end_brk; +#ifdef CONFIG_BINFMT_ELF_FDPIC + unsigned long exec_fdpic_loadmap; + unsigned long interp_fdpic_loadmap; +#endif } mm_context_t; #endif diff --git a/arch/arm/include/uapi/asm/ptrace.h b/arch/arm/include/uapi/asm/ptrace.h index 5af0ed1b825a..3173eb9751fd 100644 --- a/arch/arm/include/uapi/asm/ptrace.h +++ b/arch/arm/include/uapi/asm/ptrace.h @@ -31,6 +31,10 @@ #define PTRACE_SETVFPREGS 28 #define PTRACE_GETHBPREGS 29 #define PTRACE_SETHBPREGS 30 +#define PTRACE_GETFDPIC 31 + +#define PTRACE_GETFDPIC_EXEC 0 +#define PTRACE_GETFDPIC_INTERP 1 /* * PSR bits diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index b2f82cf6bf86..6ef70ce8e976 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -35,7 +35,7 @@ config ARCH_BINFMT_ELF_STATE config BINFMT_ELF_FDPIC bool "Kernel support for FDPIC ELF binaries" default y - depends on (FRV || BLACKFIN || (SUPERH32 && !MMU) || C6X) + depends on ((ARM && !MMU) || FRV || BLACKFIN || (SUPERH32 && !MMU) || C6X) select ELFCORE help ELF FDPIC binaries are based on ELF, but allow the individual load -- cgit v1.2.3 From 382e67aec6a7eea8ed4403e86950b468a191c468 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 11 Aug 2017 00:53:39 -0400 Subject: ARM: enable elf_fdpic on systems with an MMU Provide the necessary changes to be able to execute ELF-FDPIC binaries on ARM systems with an MMU. The default for CONFIG_BINFMT_ELF_FDPIC is also set to n if the regular ELF loader is already configured so not to force FDPIC support on everyone. Given that CONFIG_BINFMT_ELF depends on CONFIG_MMU, this means CONFIG_BINFMT_ELF_FDPIC will still default to y when !MMU. Signed-off-by: Nicolas Pitre Acked-by: Mickael GUENE Tested-by: Vincent Abriou Tested-by: Andras Szemzo --- arch/arm/include/asm/mmu.h | 4 ++++ arch/arm/kernel/elf.c | 22 ++++++++++++++++++++++ fs/Kconfig.binfmt | 4 ++-- fs/binfmt_elf_fdpic.c | 5 +++++ 4 files changed, 33 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h index e0eb16680a5b..bdec37c6ac35 100644 --- a/arch/arm/include/asm/mmu.h +++ b/arch/arm/include/asm/mmu.h @@ -14,6 +14,10 @@ typedef struct { #ifdef CONFIG_VDSO unsigned long vdso; #endif +#ifdef CONFIG_BINFMT_ELF_FDPIC + unsigned long exec_fdpic_loadmap; + unsigned long interp_fdpic_loadmap; +#endif } mm_context_t; #ifdef CONFIG_CPU_HAS_ASID diff --git a/arch/arm/kernel/elf.c b/arch/arm/kernel/elf.c index 52fb98358d9d..569e69ece5ca 100644 --- a/arch/arm/kernel/elf.c +++ b/arch/arm/kernel/elf.c @@ -3,6 +3,7 @@ #include #include #include +#include #include int elf_check_arch(const struct elf32_hdr *x) @@ -89,3 +90,24 @@ int arm_elf_read_implies_exec(int executable_stack) return 0; } EXPORT_SYMBOL(arm_elf_read_implies_exec); + +#if defined(CONFIG_MMU) && defined(CONFIG_BINFMT_ELF_FDPIC) + +void elf_fdpic_arch_lay_out_mm(struct elf_fdpic_params *exec_params, + struct elf_fdpic_params *interp_params, + unsigned long *start_stack, + unsigned long *start_brk) +{ + elf_set_personality(&exec_params->hdr); + + exec_params->load_addr = 0x8000; + interp_params->load_addr = ELF_ET_DYN_BASE; + *start_stack = TASK_SIZE - SZ_16M; + + if ((exec_params->flags & ELF_FDPIC_FLAG_ARRANGEMENT) == ELF_FDPIC_FLAG_INDEPENDENT) { + exec_params->flags &= ~ELF_FDPIC_FLAG_ARRANGEMENT; + exec_params->flags |= ELF_FDPIC_FLAG_CONSTDISP; + } +} + +#endif diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 6ef70ce8e976..58c2bbd385ad 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -34,8 +34,8 @@ config ARCH_BINFMT_ELF_STATE config BINFMT_ELF_FDPIC bool "Kernel support for FDPIC ELF binaries" - default y - depends on ((ARM && !MMU) || FRV || BLACKFIN || (SUPERH32 && !MMU) || C6X) + default y if !BINFMT_ELF + depends on (ARM || FRV || BLACKFIN || (SUPERH32 && !MMU) || C6X) select ELFCORE help ELF FDPIC binaries are based on ELF, but allow the individual load diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index cf93a4fad012..692e2a1fd2bb 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -377,6 +377,11 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) executable_stack); if (retval < 0) goto error; +#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES + retval = arch_setup_additional_pages(bprm, !!interpreter_name); + if (retval < 0) + goto error; +#endif #endif /* load the executable and interpreter into memory */ -- cgit v1.2.3 From 4755200b6b116dbf6d5545427e8a2cf58194ba6b Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 16 Aug 2017 16:05:13 -0400 Subject: binfmt_elf: don't attempt to load FDPIC binaries On platforms where both ELF and ELF-FDPIC variants are available, the regular ELF loader will happily identify FDPIC binaries as proper ELF and load them without the necessary FDPIC fixups, resulting in an immediate user space crash. Let's prevent binflt_elf from loading those binaries so binfmt_elf_fdpic has a chance to pick them up. For those architectures that don't define elf_check_fdpic(), a default version returning false is provided. Signed-off-by: Nicolas Pitre Acked-by: Mickael GUENE Tested-by: Vincent Abriou Tested-by: Andras Szemzo --- fs/binfmt_elf.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 6466153f2bf0..89536811d8f8 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -51,6 +51,11 @@ #define user_siginfo_t siginfo_t #endif +/* That's for binfmt_elf_fdpic to deal with */ +#ifndef elf_check_fdpic +#define elf_check_fdpic(ex) false +#endif + static int load_elf_binary(struct linux_binprm *bprm); static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *, int, int, unsigned long); @@ -541,7 +546,8 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, if (interp_elf_ex->e_type != ET_EXEC && interp_elf_ex->e_type != ET_DYN) goto out; - if (!elf_check_arch(interp_elf_ex)) + if (!elf_check_arch(interp_elf_ex) || + elf_check_fdpic(interp_elf_ex)) goto out; if (!interpreter->f_op->mmap) goto out; @@ -717,6 +723,8 @@ static int load_elf_binary(struct linux_binprm *bprm) goto out; if (!elf_check_arch(&loc->elf_ex)) goto out; + if (elf_check_fdpic(&loc->elf_ex)) + goto out; if (!bprm->file->f_op->mmap) goto out; @@ -816,7 +824,8 @@ static int load_elf_binary(struct linux_binprm *bprm) if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0) goto out_free_dentry; /* Verify the interpreter has a valid arch */ - if (!elf_check_arch(&loc->interp_elf_ex)) + if (!elf_check_arch(&loc->interp_elf_ex) || + elf_check_fdpic(&loc->interp_elf_ex)) goto out_free_dentry; /* Load the interpreter program headers */ @@ -1188,6 +1197,8 @@ static int load_elf_library(struct file *file) if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 || !elf_check_arch(&elf_ex) || !file->f_op->mmap) goto out; + if (elf_check_fdpic(&elf_ex)) + goto out; /* Now read in all of the header information */ -- cgit v1.2.3 From cdf38888ed30523b1a6ee199449a86dbd1d864ce Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 16 Aug 2017 18:56:14 -0400 Subject: binfmt_elf_fdpic: fix crash on MMU system with dynamic binaries In elf_fdpic_map_file() there is a test to ensure the dynamic section in user space is properly terminated. However it does so by dereferencing a user address directly. Add proper user space accessor. Signed-off-by: Nicolas Pitre Acked-by: Mickael GUENE Tested-by: Vincent Abriou Tested-by: Andras Szemzo --- fs/binfmt_elf_fdpic.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 692e2a1fd2bb..6a56dea13876 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -835,6 +835,9 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params, if (phdr->p_vaddr >= seg->p_vaddr && phdr->p_vaddr + phdr->p_memsz <= seg->p_vaddr + seg->p_memsz) { + Elf32_Dyn __user *dyn; + Elf32_Sword d_tag; + params->dynamic_addr = (phdr->p_vaddr - seg->p_vaddr) + seg->addr; @@ -847,8 +850,9 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params, goto dynamic_error; tmp = phdr->p_memsz / sizeof(Elf32_Dyn); - if (((Elf32_Dyn *) - params->dynamic_addr)[tmp - 1].d_tag != 0) + dyn = (Elf32_Dyn __user *)params->dynamic_addr; + __get_user(d_tag, &dyn[tmp - 1].d_tag); + if (d_tag != 0) goto dynamic_error; break; } -- cgit v1.2.3 From 9520b1a1b5f7a34888e14de3cf2ee0ee5344e9fe Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 24 Aug 2017 15:54:47 -0400 Subject: ARM: head-common.S: speed up startup code Let's use optimized routines such as memcpy to copy .data and memzero to clear .bss in the startup code instead of doing it one word at a time. Those routines don't use any global data so they're safe to use even if .data and .bss segments are not initialized. In the .data copy case a temporary stack is installed in the .bss area as the actual kernel stack is located within the copied data area. The XIP kernel linker script ensures a 8 byte alignment for that purpose. Finally, make the .data copy and related pointers surrounded by CONFIG_XIP_KERNEL to make it obvious what it is all about. This will allow for further cleanups in the non-XIP linker script. Signed-off-by: Nicolas Pitre Reviewed-by: Ard Biesheuvel Tested-by: Chris Brandt --- arch/arm/kernel/head-common.S | 76 ++++++++++++++++++++++----------------- arch/arm/kernel/vmlinux-xip.lds.S | 2 +- 2 files changed, 45 insertions(+), 33 deletions(-) diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 8733012d231f..bf9c4e38eced 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -79,47 +79,59 @@ ENDPROC(__vet_atags) */ __INIT __mmap_switched: - adr r3, __mmap_switched_data - - ldmia r3!, {r4, r5, r6, r7} - cmp r4, r5 @ Copy data segment if needed -1: cmpne r5, r6 - ldrne fp, [r4], #4 - strne fp, [r5], #4 - bne 1b - - mov fp, #0 @ Clear BSS (and zero fp) -1: cmp r6, r7 - strcc fp, [r6],#4 - bcc 1b - - ARM( ldmia r3, {r4, r5, r6, r7, sp}) - THUMB( ldmia r3, {r4, r5, r6, r7} ) - THUMB( ldr sp, [r3, #16] ) - str r9, [r4] @ Save processor ID - str r1, [r5] @ Save machine type - str r2, [r6] @ Save atags pointer - cmp r7, #0 - strne r0, [r7] @ Save control register values + + mov r7, r1 + mov r8, r2 + mov r10, r0 + + adr r4, __mmap_switched_data + mov fp, #0 + +#ifdef CONFIG_XIP_KERNEL + ARM( ldmia r4!, {r0, r1, r2, sp} ) + THUMB( ldmia r4!, {r0, r1, r2, r3} ) + THUMB( mov sp, r3 ) + sub r2, r2, r1 + bl memcpy @ copy .data to RAM +#endif + + ARM( ldmia r4!, {r0, r1, sp} ) + THUMB( ldmia r4!, {r0, r1, r3} ) + THUMB( mov sp, r3 ) + sub r1, r1, r0 + bl __memzero @ clear .bss + + ldmia r4, {r0, r1, r2, r3} + str r9, [r0] @ Save processor ID + str r7, [r1] @ Save machine type + str r8, [r2] @ Save atags pointer + cmp r3, #0 + strne r10, [r3] @ Save control register values b start_kernel ENDPROC(__mmap_switched) .align 2 .type __mmap_switched_data, %object __mmap_switched_data: - .long __data_loc @ r4 - .long _sdata @ r5 - .long __bss_start @ r6 - .long _end @ r7 - .long processor_id @ r4 - .long __machine_arch_type @ r5 - .long __atags_pointer @ r6 +#ifdef CONFIG_XIP_KERNEL + .long _sdata @ r0 + .long __data_loc @ r1 + .long _edata_loc @ r2 + .long __bss_stop @ sp (temporary stack in .bss) +#endif + + .long __bss_start @ r0 + .long __bss_stop @ r1 + .long init_thread_union + THREAD_START_SP @ sp + + .long processor_id @ r0 + .long __machine_arch_type @ r1 + .long __atags_pointer @ r2 #ifdef CONFIG_CPU_CP15 - .long cr_alignment @ r7 + .long cr_alignment @ r3 #else - .long 0 @ r7 + .long 0 @ r3 #endif - .long init_thread_union + THREAD_START_SP @ sp .size __mmap_switched_data, . - __mmap_switched_data /* diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index 8265b116218d..1598caada3bb 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -301,7 +301,7 @@ SECTIONS } #endif - BSS_SECTION(0, 0, 0) + BSS_SECTION(0, 0, 8) _end = .; STABS_DEBUG -- cgit v1.2.3 From 88cac29f42b107fe340a4b8c255bcc3a0cc2f858 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 24 Aug 2017 16:35:36 -0400 Subject: ARM: vmlinux*.lds.S: some decruftification Remove stuff from vmlinux.lds.S that is relevant only to the XIP build, and stuff from vmlinux-xip.lds.S related to self-modifying code that makes no sense in the XIP case. Signed-off-by: Nicolas Pitre Reviewed-by: Ard Biesheuvel Tested-by: Chris Brandt --- arch/arm/kernel/vmlinux-xip.lds.S | 14 -------------- arch/arm/kernel/vmlinux.lds.S | 4 +--- 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index 1598caada3bb..88e8db3979da 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -77,9 +77,7 @@ SECTIONS *(.text.fixup) *(__ex_table) #endif -#ifndef CONFIG_SMP_ON_UP *(.alt.smp.init) -#endif *(.discard) *(.discard.*) } @@ -181,18 +179,6 @@ SECTIONS *(.taglist.init) __tagtable_end = .; } -#ifdef CONFIG_SMP_ON_UP - .init.smpalt : { - __smpalt_begin = .; - *(.alt.smp.init) - __smpalt_end = .; - } -#endif - .init.pv_table : { - __pv_table_begin = .; - *(.pv_table) - __pv_table_end = .; - } .init.data : { INIT_SETUP(16) INIT_CALLS diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index c83a7ba737d6..4f86b4b7bdcc 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -236,9 +236,8 @@ SECTIONS . = ALIGN(THREAD_SIZE); #endif __init_end = .; - __data_loc = .; - .data : AT(__data_loc) { + .data : { _data = .; /* address in memory */ _sdata = .; @@ -260,7 +259,6 @@ SECTIONS _edata = .; } - _edata_loc = __data_loc + SIZEOF(.data); BUG_TABLE -- cgit v1.2.3 From 861e37da6a06c867b51528066cad84c0fb90cc0c Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 29 Aug 2017 16:33:57 -0400 Subject: ARM: vmlinux.lds.S: replace open coded .data sections with generic macros Our .data section is missing PAGE_ALIGNED_DATA() which contains, amongst other things, the vdso page. This creates a System.map that looks like this: c15769a8 D _edata c1577000 d vdso_data_store c1578000 D __start___bug_table c1580544 D __stop___bug_table c1580544 B __bss_start By using RW_DATA_SECTION() we pick whatever generic sections might be added in the future and have page-aligned data next to other strongly aligned data. Furthermore we now include the entire thing, including the bug table, in the data accounting surrounded by _sdata/_edata. While at it let's also remplace the open coded .init.data by its equivalent INIT_DATA_SECTION(). Signed-off-by: Nicolas Pitre Acked-by: Ard Biesheuvel Tested-by: Chris Brandt --- arch/arm/kernel/vmlinux.lds.S | 38 ++++++-------------------------------- 1 file changed, 6 insertions(+), 32 deletions(-) diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 4f86b4b7bdcc..f73ba564b5e5 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -214,14 +214,9 @@ SECTIONS *(.pv_table) __pv_table_end = .; } - .init.data : { - INIT_DATA - INIT_SETUP(16) - INIT_CALLS - CON_INITCALL - SECURITY_INITCALL - INIT_RAM_FS - } + + INIT_DATA_SECTION(16) + .exit.data : { ARM_EXIT_KEEP(EXIT_DATA) } @@ -237,30 +232,9 @@ SECTIONS #endif __init_end = .; - .data : { - _data = .; /* address in memory */ - _sdata = .; - - /* - * first, the init task union, aligned - * to an 8192 byte boundary. - */ - INIT_TASK_DATA(THREAD_SIZE) - - NOSAVE_DATA - CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES) - READ_MOSTLY_DATA(L1_CACHE_BYTES) - - /* - * and the usual data section - */ - DATA_DATA - CONSTRUCTORS - - _edata = .; - } - - BUG_TABLE + _sdata = .; + RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) + _edata = .; #ifdef CONFIG_HAVE_TCM /* -- cgit v1.2.3 From 0d302c710bf04149b6de7cd9a7064d0ca6cd4bea Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 29 Aug 2017 17:58:41 -0400 Subject: ARM: vmlinux-xip.lds.S: fix multiple issues The XIP linker script has several problems: - PAGE_ALIGNED_DATA is missing and is likely to end up somewhere with the wrong LMA. - BUG_TABLE definitely has the wrong LMA, it is not copied to RAM, and its VMA is unaccounted for and likely to clash with dynamic memory usage. - TCM usage is similarly broken. - PERCPU_SECTION is left in ROM despite being written to. Let's use generic macros for those things and locate them appropriately. Incidentally, those macros are usable with a LMA != VMA already by properly defining LOAD_OFFSET. TCM is not fixed here. It never worked in a XIP configuration anyway, so that can wait until another round of cleanups. Signed-off-by: Nicolas Pitre Acked-by: Ard Biesheuvel Tested-by: Chris Brandt --- arch/arm/kernel/vmlinux-xip.lds.S | 70 +++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 36 deletions(-) diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index 88e8db3979da..39b1fb470a0a 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -179,7 +179,7 @@ SECTIONS *(.taglist.init) __tagtable_end = .; } - .init.data : { + .init.rodata : { INIT_SETUP(16) INIT_CALLS CON_INITCALL @@ -187,48 +187,46 @@ SECTIONS INIT_RAM_FS } -#ifdef CONFIG_SMP - PERCPU_SECTION(L1_CACHE_BYTES) -#endif - _exiprom = .; /* End of XIP ROM area */ - __data_loc = ALIGN(4); /* location in binary */ - . = PAGE_OFFSET + TEXT_OFFSET; - - .data : AT(__data_loc) { - _data = .; /* address in memory */ - _sdata = .; - /* - * first, the init task union, aligned - * to an 8192 byte boundary. - */ - INIT_TASK_DATA(THREAD_SIZE) +/* + * From this point, stuff is considered writable and will be copied to RAM + */ + __data_loc = ALIGN(4); /* location in file */ + . = PAGE_OFFSET + TEXT_OFFSET; /* location in memory */ +#undef LOAD_OFFSET +#define LOAD_OFFSET (PAGE_OFFSET + TEXT_OFFSET - __data_loc) + + . = ALIGN(THREAD_SIZE); + _sdata = .; + RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) + .data.ro_after_init : AT(ADDR(.data.ro_after_init) - LOAD_OFFSET) { + *(.data..ro_after_init) + } + _edata = .; - . = ALIGN(PAGE_SIZE); - __init_begin = .; + . = ALIGN(PAGE_SIZE); + __init_begin = .; + .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { INIT_DATA + } + .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { ARM_EXIT_KEEP(EXIT_DATA) - . = ALIGN(PAGE_SIZE); - __init_end = .; - - *(.data..ro_after_init) - - NOSAVE_DATA - CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES) - READ_MOSTLY_DATA(L1_CACHE_BYTES) - - /* - * and the usual data section - */ - DATA_DATA - CONSTRUCTORS - - _edata = .; } - _edata_loc = __data_loc + SIZEOF(.data); +#ifdef CONFIG_SMP + PERCPU_SECTION(L1_CACHE_BYTES) +#endif + + /* + * End of copied data. We need a dummy section to get its LMA. + * Also located before final ALIGN() as trailing padding is not stored + * in the resulting binary file and useless to copy. + */ + .data.endmark : AT(ADDR(.data.endmark) - LOAD_OFFSET) { } + _edata_loc = LOADADDR(.data.endmark); - BUG_TABLE + . = ALIGN(PAGE_SIZE); + __init_end = .; #ifdef CONFIG_HAVE_TCM /* -- cgit v1.2.3 From ca8b5d97d6bfd2d24cec053bbbe35cf356bec4e3 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 25 Aug 2017 00:54:18 -0400 Subject: ARM: XIP kernel: store .data compressed in ROM The .data segment stored in ROM is only copied to RAM once at boot time and never referenced afterwards. This is arguably a suboptimal usage of ROM resources. This patch allows for compressing the .data segment before storing it into ROM and decompressing it to RAM rather than simply copying it, saving on precious ROM space. Because global data is not available yet (obviously) we must allocate decompressor workspace memory on the stack. The .bss area is used as a stack area for that purpose before it is cleared. The required stack frame is 9568 bytes for __inflate_kernel_data() alone, so make sure the .bss is large enough to cope with that plus extra room for called functions or fail the build. Those numbers were picked arbitrarily based on the above 9568 byte stack frame: 10240 (2.5 * PAGE_SIZE): used to override -Wframe-larger-than whose default value is 1024. 12288 (3 * PAGE_SIZE): minimum .bss size to contain the stack. Signed-off-by: Nicolas Pitre Reviewed-by: Ard Biesheuvel Tested-by: Chris Brandt --- arch/arm/Kconfig | 11 +++++++ arch/arm/boot/Makefile | 13 +++++++- arch/arm/boot/deflate_xip_data.sh | 64 +++++++++++++++++++++++++++++++++++++ arch/arm/kernel/Makefile | 5 +++ arch/arm/kernel/head-common.S | 11 ++++++- arch/arm/kernel/head-inflate-data.c | 62 +++++++++++++++++++++++++++++++++++ arch/arm/kernel/vmlinux-xip.lds.S | 8 +++++ 7 files changed, 172 insertions(+), 2 deletions(-) create mode 100755 arch/arm/boot/deflate_xip_data.sh create mode 100644 arch/arm/kernel/head-inflate-data.c diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 61a0cb15067e..bf79c461bd2c 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -2005,6 +2005,17 @@ config XIP_PHYS_ADDR be linked for and stored to. This address is dependent on your own flash usage. +config XIP_DEFLATED_DATA + bool "Store kernel .data section compressed in ROM" + depends on XIP_KERNEL + select ZLIB_INFLATE + help + Before the kernel is actually executed, its .data section has to be + copied to RAM from ROM. This option allows for storing that data + in compressed form and decompressed to RAM rather than merely being + copied, saving some precious ROM space. A possible drawback is a + slightly longer boot delay. + config KEXEC bool "Kexec system call (EXPERIMENTAL)" depends on (!SMP || PM_SLEEP_SMP) diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile index 50f8d1be7fcb..a3af4dc08c3e 100644 --- a/arch/arm/boot/Makefile +++ b/arch/arm/boot/Makefile @@ -31,8 +31,19 @@ targets := Image zImage xipImage bootpImage uImage ifeq ($(CONFIG_XIP_KERNEL),y) +cmd_deflate_xip_data = $(CONFIG_SHELL) -c \ + '$(srctree)/$(src)/deflate_xip_data.sh $< $@ || { rm -f $@; false; }' + +ifeq ($(CONFIG_XIP_DEFLATED_DATA),y) +quiet_cmd_mkxip = XIPZ $@ +cmd_mkxip = $(cmd_objcopy) && $(cmd_deflate_xip_data) +else +quiet_cmd_mkxip = $(quiet_cmd_objcopy) +cmd_mkxip = $(cmd_objcopy) +endif + $(obj)/xipImage: vmlinux FORCE - $(call if_changed,objcopy) + $(call if_changed,mkxip) @$(kecho) ' Physical Address of xipImage: $(CONFIG_XIP_PHYS_ADDR)' $(obj)/Image $(obj)/zImage: FORCE diff --git a/arch/arm/boot/deflate_xip_data.sh b/arch/arm/boot/deflate_xip_data.sh new file mode 100755 index 000000000000..1189598a25eb --- /dev/null +++ b/arch/arm/boot/deflate_xip_data.sh @@ -0,0 +1,64 @@ +#!/bin/sh + +# XIP kernel .data segment compressor +# +# Created by: Nicolas Pitre, August 2017 +# Copyright: (C) 2017 Linaro Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +# This script locates the start of the .data section in xipImage and +# substitutes it with a compressed version. The needed offsets are obtained +# from symbol addresses in vmlinux. It is expected that .data extends to +# the end of xipImage. + +set -e + +VMLINUX="$1" +XIPIMAGE="$2" + +DD="dd status=none" + +# Use "make V=1" to debug this script. +case "$KBUILD_VERBOSE" in +*1*) + set -x + ;; +esac + +sym_val() { + # extract hex value for symbol in $1 + local val=$($NM "$VMLINUX" | sed -n "/ $1$/{s/ .*$//p;q}") + [ "$val" ] || { echo "can't find $1 in $VMLINUX" 1>&2; exit 1; } + # convert from hex to decimal + echo $((0x$val)) +} + +__data_loc=$(sym_val __data_loc) +_edata_loc=$(sym_val _edata_loc) +base_offset=$(sym_val _xiprom) + +# convert to file based offsets +data_start=$(($__data_loc - $base_offset)) +data_end=$(($_edata_loc - $base_offset)) + +# Make sure data occupies the last part of the file. +file_end=$(stat -c "%s" "$XIPIMAGE") +if [ "$file_end" != "$data_end" ]; then + printf "end of xipImage doesn't match with _edata_loc (%#x vs %#x)\n" \ + $(($file_end + $base_offset)) $_edata_loc 2>&1 + exit 1; +fi + +# be ready to clean up +trap 'rm -f "$XIPIMAGE.tmp"' 0 1 2 3 + +# substitute the data section by a compressed version +$DD if="$XIPIMAGE" count=$data_start iflag=count_bytes of="$XIPIMAGE.tmp" +$DD if="$XIPIMAGE" skip=$data_start iflag=skip_bytes | +gzip -9 >> "$XIPIMAGE.tmp" + +# replace kernel binary +mv -f "$XIPIMAGE.tmp" "$XIPIMAGE" diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index ad325a8c7e1e..52f437997cc6 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -87,6 +87,11 @@ head-y := head$(MMUEXT).o obj-$(CONFIG_DEBUG_LL) += debug.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +# This is executed very early using a temporary stack when no memory allocator +# nor global data is available. Everything has to be allocated on the stack. +CFLAGS_head-inflate-data.o := $(call cc-option,-Wframe-larger-than=10240) +obj-$(CONFIG_XIP_DEFLATED_DATA) += head-inflate-data.o + obj-$(CONFIG_ARM_VIRT_EXT) += hyp-stub.o AFLAGS_hyp-stub.o :=-Wa,-march=armv7-a ifeq ($(CONFIG_ARM_PSCI),y) diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index bf9c4e38eced..a25027b87a60 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -87,7 +87,14 @@ __mmap_switched: adr r4, __mmap_switched_data mov fp, #0 -#ifdef CONFIG_XIP_KERNEL +#if defined(CONFIG_XIP_DEFLATED_DATA) + ARM( ldr sp, [r4], #4 ) + THUMB( ldr sp, [r4] ) + THUMB( add r4, #4 ) + bl __inflate_kernel_data @ decompress .data to RAM + teq r0, #0 + bne __error +#elif defined(CONFIG_XIP_KERNEL) ARM( ldmia r4!, {r0, r1, r2, sp} ) THUMB( ldmia r4!, {r0, r1, r2, r3} ) THUMB( mov sp, r3 ) @@ -114,9 +121,11 @@ ENDPROC(__mmap_switched) .type __mmap_switched_data, %object __mmap_switched_data: #ifdef CONFIG_XIP_KERNEL +#ifndef CONFIG_XIP_DEFLATED_DATA .long _sdata @ r0 .long __data_loc @ r1 .long _edata_loc @ r2 +#endif .long __bss_stop @ sp (temporary stack in .bss) #endif diff --git a/arch/arm/kernel/head-inflate-data.c b/arch/arm/kernel/head-inflate-data.c new file mode 100644 index 000000000000..6dd0ce5e6058 --- /dev/null +++ b/arch/arm/kernel/head-inflate-data.c @@ -0,0 +1,62 @@ +/* + * XIP kernel .data segment decompressor + * + * Created by: Nicolas Pitre, August 2017 + * Copyright: (C) 2017 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + +/* for struct inflate_state */ +#include "../../../lib/zlib_inflate/inftrees.h" +#include "../../../lib/zlib_inflate/inflate.h" +#include "../../../lib/zlib_inflate/infutil.h" + +extern char __data_loc[]; +extern char _edata_loc[]; +extern char _sdata[]; + +/* + * This code is called very early during the boot process to decompress + * the .data segment stored compressed in ROM. Therefore none of the global + * variables are valid yet, hence no kernel services such as memory + * allocation is available. Everything must be allocated on the stack and + * we must avoid any global data access. We use a temporary stack located + * in the .bss area. The linker script makes sure the .bss is big enough + * to hold our stack frame plus some room for called functions. + * + * We mimic the code in lib/decompress_inflate.c to use the smallest work + * area possible. And because everything is statically allocated on the + * stack then there is no need to clean up before returning. + */ + +int __init __inflate_kernel_data(void) +{ + struct z_stream_s stream, *strm = &stream; + struct inflate_state state; + char *in = __data_loc; + int rc; + + /* Check and skip gzip header (assume no filename) */ + if (in[0] != 0x1f || in[1] != 0x8b || in[2] != 0x08 || in[3] & ~3) + return -1; + in += 10; + + strm->workspace = &state; + strm->next_in = in; + strm->avail_in = _edata_loc - __data_loc; /* upper bound */ + strm->next_out = _sdata; + strm->avail_out = _edata_loc - __data_loc; + zlib_inflateInit2(strm, -MAX_WBITS); + WS(strm)->inflate_state.wsize = 0; + WS(strm)->inflate_state.window = NULL; + rc = zlib_inflate(strm, Z_FINISH); + if (rc == Z_OK || rc == Z_STREAM_END) + rc = strm->avail_out; /* should be 0 */ + return rc; +} diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index 39b1fb470a0a..7a844310085e 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -306,3 +306,11 @@ ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined") */ ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & PAGE_MASK) <= PAGE_SIZE, "HYP init code too big or misaligned") + +#ifdef CONFIG_XIP_DEFLATED_DATA +/* + * The .bss is used as a stack area for __inflate_kernel_data() whose stack + * frame is 9568 bytes. Make sure it has extra room left. + */ +ASSERT((_end - __bss_start) >= 12288, ".bss too small for CONFIG_XIP_DEFLATED_DATA") +#endif -- cgit v1.2.3 From cf1b09908a231f3bb5e8ca15b76f05c4df91e292 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 1 Sep 2017 03:39:46 +0100 Subject: ARM: 8693/1: discard memblock arrays when possible On ARM the generic pfn_valid() version is used with some configurations such as SA1100 based devices. In that case the memblock arrays are no longer used after boot and can be discarded. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 7888c9803eb0..979aac3e2fbf 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -2,6 +2,7 @@ config ARM bool default y select ARCH_CLOCKSOURCE_DATA + select ARCH_DISCARD_MEMBLOCK if !HAVE_ARCH_PFN_VALID select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE -- cgit v1.2.3 From 94058906acfbe3474e26d665924260e44a30e1bc Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 4 Sep 2017 09:17:09 +0100 Subject: ARM: 8694/1: Remove leftover of fixmap_page_table fixmap_page_table was removed by commit 836a24183273 (ARM: expand fixmap region to 3MB), but some traces are still there - get rid of them. Signed-off-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/include/asm/highmem.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/include/asm/highmem.h b/arch/arm/include/asm/highmem.h index 0a0e2d1784c0..61736fdccc79 100644 --- a/arch/arm/include/asm/highmem.h +++ b/arch/arm/include/asm/highmem.h @@ -18,7 +18,6 @@ } while (0) extern pte_t *pkmap_page_table; -extern pte_t *fixmap_page_table; extern void *kmap_high(struct page *page); extern void kunmap_high(struct page *page); -- cgit v1.2.3 From 6022f80da0d9d42e02e78f3fb42930ee86f3e7af Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 4 Sep 2017 09:17:32 +0100 Subject: ARM: 8695/1: entry: Remove dead code in sys_mmap2 We support page size of 4K only, remove dead code. Signed-off-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/kernel/entry-common.S | 9 --------- 1 file changed, 9 deletions(-) diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index ca3614dc6938..cc70bc5650a5 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -394,17 +394,8 @@ ENDPROC(sys_fstatfs64_wrapper) * offset, we return EINVAL. */ sys_mmap2: -#if PAGE_SHIFT > 12 - tst r5, #PGOFF_MASK - moveq r5, r5, lsr #PAGE_SHIFT - 12 - streq r5, [sp, #4] - beq sys_mmap_pgoff - mov r0, #-EINVAL - ret lr -#else str r5, [sp, #4] b sys_mmap_pgoff -#endif ENDPROC(sys_mmap2) #ifdef CONFIG_OABI_COMPAT -- cgit v1.2.3 From 0d9ac1625ac1041742c72c3b680198f0304539b2 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 4 Sep 2017 09:17:48 +0100 Subject: ARM: 8696/1: mm: Remove dead code in mem_init() The code in question checks memory constrains to set default policy for overcommit; however we support page size of 4K only thus condition is always evaluated to false. Remove that dead code. Signed-off-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/mm/init.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index ad80548325fe..81d4482b6861 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -580,16 +580,6 @@ void __init mem_init(void) BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP * PAGE_SIZE > PAGE_OFFSET); BUG_ON(PKMAP_BASE + LAST_PKMAP * PAGE_SIZE > PAGE_OFFSET); #endif - - if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) { - extern int sysctl_overcommit_memory; - /* - * On a machine this small we won't get - * anywhere without overcommit, so turn - * it on by default. - */ - sysctl_overcommit_memory = OVERCOMMIT_ALWAYS; - } } #ifdef CONFIG_STRICT_KERNEL_RWX -- cgit v1.2.3 From acb624488df3073be4e6f3c0a86b155c40d63ab2 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 25 Sep 2017 10:25:53 +0100 Subject: ARM: 8697/1: dma-mapping: Do not pass data to gen_pool_set_algo() gen_pool_first_fit_order_align() does not make use of additional data, so pass plain NULL there. Signed-off-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/mm/dma-mapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index fcf1473d6fed..785606e3783e 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -443,7 +443,7 @@ static int __init atomic_pool_init(void) gen_pool_set_algo(atomic_pool, gen_pool_first_fit_order_align, - (void *)PAGE_SHIFT); + NULL); pr_info("DMA: preallocated %zu KiB pool for atomic coherent allocations\n", atomic_pool_size / 1024); return 0; -- cgit v1.2.3 From b337e1c40d3ea2d6f09228c4ff203f006abc9095 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 25 Sep 2017 10:29:07 +0100 Subject: ARM: 8698/1: dma-mapping: Mark atomic_pool as __ro_after_init atomic_pool is setup once while init stage and never changed after that, so it is good candidate for __ro_after_init. Since we are here mark atomic_pool_size with __init_data. Signed-off-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/mm/dma-mapping.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 785606e3783e..24519f4508b9 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -382,9 +382,9 @@ static void __dma_free_remap(void *cpu_addr, size_t size) } #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K -static struct gen_pool *atomic_pool; +static struct gen_pool *atomic_pool __ro_after_init; -static size_t atomic_pool_size = DEFAULT_DMA_COHERENT_POOL_SIZE; +static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE; static int __init early_coherent_pool(char *p) { -- cgit v1.2.3 From 0f7c4c15a30ffc5f5fd563ad1159cb899e121407 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 25 Sep 2017 10:31:15 +0100 Subject: ARM: 8699/1: dma-mapping: Remove init_dma_coherent_pool_size() There are no users of init_dma_coherent_pool_size() left due to 387870f ("mm: dmapool: use provided gfp flags for all dma_alloc_coherent() calls"), so remove it. Signed-off-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/include/asm/dma-mapping.h | 7 ------- arch/arm/mm/dma-mapping.c | 15 --------------- 2 files changed, 22 deletions(-) diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index 4e0285a66ef8..462803f3a6a0 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -189,13 +189,6 @@ extern int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); -/* - * This can be called during early boot to increase the size of the atomic - * coherent DMA pool above the default value of 256KiB. It must be called - * before postcore_initcall. - */ -extern void __init init_dma_coherent_pool_size(unsigned long size); - /* * For SA-1111, IXP425, and ADI systems the dma-mapping functions are "magic" * and utilize bounce buffers as needed to work around limited DMA windows. diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 24519f4508b9..ada8eb206a90 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -393,21 +393,6 @@ static int __init early_coherent_pool(char *p) } early_param("coherent_pool", early_coherent_pool); -void __init init_dma_coherent_pool_size(unsigned long size) -{ - /* - * Catch any attempt to set the pool size too late. - */ - BUG_ON(atomic_pool); - - /* - * Set architecture specific coherent pool size only if - * it has not been changed by kernel command line parameter. - */ - if (atomic_pool_size == DEFAULT_DMA_COHERENT_POOL_SIZE) - atomic_pool_size = size; -} - /* * Initialise the coherent pool for atomic allocations. */ -- cgit v1.2.3 From 99cf8f903148347e3d2ac86ffe98bb04bebc6983 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 21 Sep 2017 12:06:20 +0100 Subject: ARM: better diagnostics with missing/corrupt dtb With a kernel containing both DT and atag support, the diagnostics output when the dtb is missing or corrupt assume that we're trying to boot using atags and the machine ID, and only print the machine ID. This is not useful for diagnosing a missing or corrupt dtb. Move the message into arch/arm/kernel/setup.c, and print the address of the dtb/atag list, and the first 16 bytes of memory of the dtb or atag list. This allows us to see whether the dtb was corrupted in some way, causing the fallback to the machine ID / atag list. Tested-by: Keerthy Signed-off-by: Russell King --- arch/arm/kernel/atags_parse.c | 7 ++----- arch/arm/kernel/setup.c | 10 ++++++++++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c index 98fbfd235ac8..c10a3e8ee998 100644 --- a/arch/arm/kernel/atags_parse.c +++ b/arch/arm/kernel/atags_parse.c @@ -196,11 +196,8 @@ setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr) break; } - if (!mdesc) { - early_print("\nError: unrecognized/unsupported machine ID" - " (r1 = 0x%08x).\n\n", machine_nr); - dump_machine_table(); /* does not return */ - } + if (!mdesc) + return NULL; if (__atags_pointer) tags = phys_to_virt(__atags_pointer); diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 8e9a3e40d949..fc40a2b40595 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -1069,6 +1069,16 @@ void __init setup_arch(char **cmdline_p) mdesc = setup_machine_fdt(__atags_pointer); if (!mdesc) mdesc = setup_machine_tags(__atags_pointer, __machine_arch_type); + if (!mdesc) { + early_print("\nError: invalid dtb and unrecognized/unsupported machine ID\n"); + early_print(" r1=0x%08x, r2=0x%08x\n", __machine_arch_type, + __atags_pointer); + if (__atags_pointer) + early_print(" r2[]=%*ph\n", 16, + phys_to_virt(__atags_pointer)); + dump_machine_table(); + } + machine_desc = mdesc; machine_name = mdesc->name; dump_stack_set_arch_desc("%s", mdesc->name); -- cgit v1.2.3 From 7170a3124ccbc929d0716a77b76b994bb082a3cd Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 7 Sep 2016 13:45:11 +0100 Subject: pcmcia: sa1111: use sa1111_get_irq() to obtain IRQ resources Use the newly provided sa1111_get_irq() to fetch the IRQ resources for the SA1111 PCMCIA driver. Signed-off-by: Russell King --- drivers/pcmcia/sa1111_generic.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/pcmcia/sa1111_generic.c b/drivers/pcmcia/sa1111_generic.c index 3d95dffcff7a..da3ddf7769f0 100644 --- a/drivers/pcmcia/sa1111_generic.c +++ b/drivers/pcmcia/sa1111_generic.c @@ -63,6 +63,7 @@ #define IDX_IRQ_S1_READY_NINT (3) #define IDX_IRQ_S1_CD_VALID (4) #define IDX_IRQ_S1_BVD1_STSCHG (5) +#define NUM_IRQS (6) void sa1111_pcmcia_socket_state(struct soc_pcmcia_socket *skt, struct pcmcia_state *state) { @@ -137,12 +138,18 @@ int sa1111_pcmcia_add(struct sa1111_dev *dev, struct pcmcia_low_level *ops, { struct sa1111_pcmcia_socket *s; struct clk *clk; - int i, ret = 0; + int i, ret = 0, irqs[NUM_IRQS]; clk = devm_clk_get(&dev->dev, NULL); if (IS_ERR(clk)) return PTR_ERR(clk); + for (i = 0; i < NUM_IRQS; i++) { + irqs[i] = sa1111_get_irq(dev, i); + if (irqs[i] <= 0) + return irqs[i] ? : -ENXIO; + } + ops->socket_state = sa1111_pcmcia_socket_state; for (i = 0; i < ops->nr; i++) { @@ -156,16 +163,16 @@ int sa1111_pcmcia_add(struct sa1111_dev *dev, struct pcmcia_low_level *ops, soc_pcmcia_init_one(&s->soc, ops, &dev->dev); s->dev = dev; if (s->soc.nr) { - s->soc.socket.pci_irq = dev->irq[IDX_IRQ_S1_READY_NINT]; - s->soc.stat[SOC_STAT_CD].irq = dev->irq[IDX_IRQ_S1_CD_VALID]; + s->soc.socket.pci_irq = irqs[IDX_IRQ_S1_READY_NINT]; + s->soc.stat[SOC_STAT_CD].irq = irqs[IDX_IRQ_S1_CD_VALID]; s->soc.stat[SOC_STAT_CD].name = "SA1111 CF card detect"; - s->soc.stat[SOC_STAT_BVD1].irq = dev->irq[IDX_IRQ_S1_BVD1_STSCHG]; + s->soc.stat[SOC_STAT_BVD1].irq = irqs[IDX_IRQ_S1_BVD1_STSCHG]; s->soc.stat[SOC_STAT_BVD1].name = "SA1111 CF BVD1"; } else { - s->soc.socket.pci_irq = dev->irq[IDX_IRQ_S0_READY_NINT]; - s->soc.stat[SOC_STAT_CD].irq = dev->irq[IDX_IRQ_S0_CD_VALID]; + s->soc.socket.pci_irq = irqs[IDX_IRQ_S0_READY_NINT]; + s->soc.stat[SOC_STAT_CD].irq = irqs[IDX_IRQ_S0_CD_VALID]; s->soc.stat[SOC_STAT_CD].name = "SA1111 PCMCIA card detect"; - s->soc.stat[SOC_STAT_BVD1].irq = dev->irq[IDX_IRQ_S0_BVD1_STSCHG]; + s->soc.stat[SOC_STAT_BVD1].irq = irqs[IDX_IRQ_S0_BVD1_STSCHG]; s->soc.stat[SOC_STAT_BVD1].name = "SA1111 PCMCIA BVD1"; } -- cgit v1.2.3 From de854b336d209095b64b3b6c5c1c6a13938ef2d6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 7 Sep 2016 15:19:52 +0100 Subject: pcmcia: sa1111: remove special sa1111 mmio accessors Remove the special SA1111 MMIO accessors from the SA1111 PCMCIA driver as their definition will be removed shortly. The SA1111 accessors are barrierless, so use the _relaxed variants. Signed-off-by: Russell King --- drivers/pcmcia/sa1111_generic.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/pcmcia/sa1111_generic.c b/drivers/pcmcia/sa1111_generic.c index da3ddf7769f0..5ef351f87bfe 100644 --- a/drivers/pcmcia/sa1111_generic.c +++ b/drivers/pcmcia/sa1111_generic.c @@ -68,7 +68,7 @@ void sa1111_pcmcia_socket_state(struct soc_pcmcia_socket *skt, struct pcmcia_state *state) { struct sa1111_pcmcia_socket *s = to_skt(skt); - unsigned long status = sa1111_readl(s->dev->mapbase + PCSR); + u32 status = readl_relaxed(s->dev->mapbase + PCSR); switch (skt->nr) { case 0: @@ -96,7 +96,7 @@ void sa1111_pcmcia_socket_state(struct soc_pcmcia_socket *skt, struct pcmcia_sta int sa1111_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_state_t *state) { struct sa1111_pcmcia_socket *s = to_skt(skt); - unsigned int pccr_skt_mask, pccr_set_mask, val; + u32 pccr_skt_mask, pccr_set_mask, val; unsigned long flags; switch (skt->nr) { @@ -124,10 +124,10 @@ int sa1111_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_s pccr_set_mask |= PCCR_S0_FLT|PCCR_S1_FLT; local_irq_save(flags); - val = sa1111_readl(s->dev->mapbase + PCCR); + val = readl_relaxed(s->dev->mapbase + PCCR); val &= ~pccr_skt_mask; val |= pccr_set_mask & pccr_skt_mask; - sa1111_writel(val, s->dev->mapbase + PCCR); + writel_relaxed(val, s->dev->mapbase + PCCR); local_irq_restore(flags); return 0; @@ -208,8 +208,8 @@ static int pcmcia_probe(struct sa1111_dev *dev) /* * Initialise the suspend state. */ - sa1111_writel(PCSSR_S0_SLEEP | PCSSR_S1_SLEEP, base + PCSSR); - sa1111_writel(PCCR_S0_FLT | PCCR_S1_FLT, base + PCCR); + writel_relaxed(PCSSR_S0_SLEEP | PCSSR_S1_SLEEP, base + PCSSR); + writel_relaxed(PCCR_S0_FLT | PCCR_S1_FLT, base + PCCR); ret = -ENODEV; #ifdef CONFIG_SA1100_BADGE4 -- cgit v1.2.3 From 429f7a062e3b5cf6fcf01eb00600cee5fe4d751f Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 21 Sep 2017 17:15:23 +0100 Subject: ARM: decompressor: fix BSS size calculation Assuming size(1) gives the size of the BSS is a mistake - it reports the size of the .bss section in the ELF image, which may not be the same as the region we mark with the __bss_start..__bss_stop symbols. We use the size of the BSS in the decompressor to know whether the kernel will overwrite the appended dtb, by adding the BSS size to the size of the Image (stored at the end of the compressed data) and adding the desired address of the decompressed image. If the BSS size is smaller than it really is, the decompressor can incorrectly assume that the BSS clearance will not overwrite the DTB. Here is an illustration: $ arm-linux-size vmlinux text data bss dec hex filename 8136972 3098076 10240348 21475396 147b044 vmlinux $ arm-linux-nm vmlinux | grep __bss_ c0ac0e34 B __bss_start c1484f9c B __bss_stop $ stat -c %s arch/arm/boot/Image 11243060 In the above case, we are 12 bytes short. This is caused by the BSS section being aligned by one of its input sections: Idx Name Size VMA LMA File off Algn 23 __bug_table 00005d3c c0abb0f8 c0abb0f8 00acb0f8 2**2 CONTENTS, ALLOC, LOAD, DATA 24 .bss 009c415c c0ac0e40 c0ac0e40 00ad0e34 2**6 ALLOC Note that there's an additional 12 bytes difference between the file offset and LMA compared with the bug table - this occurs because one of the input sections for the .bss section requires a 64 byte alignment. Fix this by using 'nm' and perl to obtain the address of the __bss_start and __bss_stop symbols, using their difference for the size of the BSS. Tested-by: Tony Lindgren Tested-by: Keerthy Signed-off-by: Russell King --- arch/arm/boot/compressed/Makefile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index d50430c40045..9519ce3d2e1c 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -116,8 +116,11 @@ ccflags-y := -fpic -mno-single-pic-base -fno-builtin -I$(obj) asflags-y := -DZIMAGE # Supply kernel BSS size to the decompressor via a linker symbol. -KBSS_SZ = $(shell $(CROSS_COMPILE)size $(obj)/../../../../vmlinux | \ - awk 'END{print $$3}') +KBSS_SZ = $(shell $(CROSS_COMPILE)nm $(obj)/../../../../vmlinux | \ + perl -e 'while (<>) { \ + $$bss_start=hex($$1) if /^([[:xdigit:]]+) B __bss_start$$/; \ + $$bss_end=hex($$1) if /^([[:xdigit:]]+) B __bss_stop$$/; \ + }; printf "%d\n", $$bss_end - $$bss_start;') LDFLAGS_vmlinux = --defsym _kernel_bss_size=$(KBSS_SZ) # Supply ZRELADDR to the decompressor via a linker symbol. ifneq ($(CONFIG_AUTO_ZRELADDR),y) -- cgit v1.2.3 From c772568788b5f0cbaac7c8d4111d7173bfc90673 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 21 Sep 2017 18:10:19 +0100 Subject: ARM: add additional table to compressed kernel Add an additional extendable table to the compressed kernel so that we can provide further information to boot loaders regarding the properties of the image contained within. This is necessary for correct behaviour of kexec. Tested-by: Tony Lindgren Tested-by: Keerthy Signed-off-by: Russell King --- arch/arm/boot/compressed/head.S | 2 ++ arch/arm/boot/compressed/vmlinux.lds.S | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 8a756870c238..45c8823c3750 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -143,6 +143,8 @@ start: .word _magic_start @ absolute load/run zImage address .word _magic_end @ zImage end address .word 0x04030201 @ endianness flag + .word 0x45454545 @ another magic number to indicate + .word _magic_table @ additional data table __EFI_HEADER 1: diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S index 7a4c59154361..b38dcef90756 100644 --- a/arch/arm/boot/compressed/vmlinux.lds.S +++ b/arch/arm/boot/compressed/vmlinux.lds.S @@ -44,12 +44,22 @@ SECTIONS *(.glue_7t) *(.glue_7) } + .table : ALIGN(4) { + _table_start = .; + LONG(ZIMAGE_MAGIC(2)) + LONG(ZIMAGE_MAGIC(0x5a534c4b)) + LONG(ZIMAGE_MAGIC(__piggy_size_addr - _start)) + LONG(ZIMAGE_MAGIC(_kernel_bss_size)) + LONG(0) + _table_end = .; + } .rodata : { *(.rodata) *(.rodata.*) } .piggydata : { *(.piggydata) + __piggy_size_addr = . - 4; } . = ALIGN(4); @@ -88,6 +98,7 @@ SECTIONS _magic_sig = ZIMAGE_MAGIC(0x016f2818); _magic_start = ZIMAGE_MAGIC(_start); _magic_end = ZIMAGE_MAGIC(_edata); + _magic_table = ZIMAGE_MAGIC(_table_start - _start); . = BSS_START; __bss_start = .; -- cgit v1.2.3 From e11d1314d83ec7f562fa01df29878d0b1138cf00 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 6 Oct 2017 19:36:58 +0100 Subject: ARM: 8703/1: debug.S: move hexbuf to a writable section This was located in .text which is meant to be read-only. And in the XIP case this shortcut simply doesn't work and may trigger a Flash controller mode switch and crash the kernel. Move it to the .bss area. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/kernel/debug.S | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/arm/kernel/debug.S b/arch/arm/kernel/debug.S index ea9646cc2a0e..423f4432586d 100644 --- a/arch/arm/kernel/debug.S +++ b/arch/arm/kernel/debug.S @@ -55,7 +55,9 @@ ENDPROC(printhex4) ENTRY(printhex2) mov r1, #2 -printhex: adr r2, hexbuf +printhex: adr r2, hexbuf_rel + ldr r3, [r2] + add r2, r2, r3 add r3, r2, r1 mov r1, #0 strb r1, [r3] @@ -71,7 +73,11 @@ printhex: adr r2, hexbuf b printascii ENDPROC(printhex2) -hexbuf: .space 16 + .pushsection .bss +hexbuf_addr: .space 16 + .popsection + .align +hexbuf_rel: .long hexbuf_addr - . .ltorg @@ -120,7 +126,9 @@ ENTRY(printascii) ENDPROC(printascii) ENTRY(printch) - adr r1, hexbuf + adr r1, hexbuf_rel + ldr r2, [r1] + add r1, r1, r2 strb r0, [r1] mov r0, #0x03 @ SYS_WRITEC ARM( svc #0x123456 ) -- cgit v1.2.3 From de880632fc02a889c0c3d2247fa08dc9e1afa228 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 6 Oct 2017 19:49:19 +0100 Subject: ARM: 8705/1: early_printk: use printascii() rather than printch() With printch() the console messages are sent out one character at a time which is agonizingly slow especially with semihosting as the whole trap intercept, remote byte access, and system resume danse is performed for every single character across a relatively slow remote debug connection. Let's use printascii() to send a whole string at once. This is also going to be more efficient, albeit to a quite lesser extent, with serial ports as well. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/kernel/early_printk.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/arm/kernel/early_printk.c b/arch/arm/kernel/early_printk.c index 43076536965c..9257736ec9fa 100644 --- a/arch/arm/kernel/early_printk.c +++ b/arch/arm/kernel/early_printk.c @@ -11,16 +11,20 @@ #include #include #include +#include -extern void printch(int); +extern void printascii(const char *); static void early_write(const char *s, unsigned n) { - while (n-- > 0) { - if (*s == '\n') - printch('\r'); - printch(*s); - s++; + char buf[128]; + while (n) { + unsigned l = min(n, sizeof(buf)-1); + memcpy(buf, s, l); + buf[l] = 0; + s += l; + n -= l; + printascii(buf); } } -- cgit v1.2.3 From 59b6359dd92d18f5dc04b14a4c926fa08ab66f7c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 3 Oct 2017 19:14:38 +0100 Subject: ARM: 8702/1: head-common.S: Clear lr before jumping to start_kernel() If CONFIG_DEBUG_LOCK_ALLOC=y, the kernel log is spammed with a few hundred identical messages: unwind: Unknown symbol address c0800300 unwind: Index not found c0800300 c0800300 is the return address from the last subroutine call (to __memzero()) in __mmap_switched(). Apparently having this address in the link register confuses the unwinder. To fix this, reset the link register to zero before jumping to start_kernel(). Fixes: 9520b1a1b5f7a348 ("ARM: head-common.S: speed up startup code") Suggested-by: Ard Biesheuvel Signed-off-by: Geert Uytterhoeven Acked-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/kernel/head-common.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index a25027b87a60..21dde771a7dd 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -114,6 +114,7 @@ __mmap_switched: str r8, [r2] @ Save atags pointer cmp r3, #0 strne r10, [r3] @ Save control register values + mov lr, #0 b start_kernel ENDPROC(__mmap_switched) -- cgit v1.2.3 From 877ec119dbbf9576953efc457ede5243621ad6eb Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 16 Oct 2017 12:52:35 +0100 Subject: ARM: 8706/1: NOMMU: Move out MPU setup in separate module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Having MPU handling code in dedicated module makes it easier to enhance/maintain it. Tested-by: Szemző András Tested-by: Alexandre TORGUE Tested-by: Benjamin Gaignard Signed-off-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/include/asm/mpu.h | 16 ++- arch/arm/mm/Makefile | 1 + arch/arm/mm/nommu.c | 254 +------------------------------------------ arch/arm/mm/pmsa-v7.c | 262 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 276 insertions(+), 257 deletions(-) create mode 100644 arch/arm/mm/pmsa-v7.c diff --git a/arch/arm/include/asm/mpu.h b/arch/arm/include/asm/mpu.h index c3247cc2fe08..edec5cf48471 100644 --- a/arch/arm/include/asm/mpu.h +++ b/arch/arm/include/asm/mpu.h @@ -1,8 +1,6 @@ #ifndef __ARM_MPU_H #define __ARM_MPU_H -#ifdef CONFIG_ARM_MPU - /* MPUIR layout */ #define MPUIR_nU 1 #define MPUIR_DREGION 8 @@ -69,8 +67,18 @@ struct mpu_rgn_info { }; extern struct mpu_rgn_info mpu_rgn_info; -#endif /* __ASSEMBLY__ */ +#ifdef CONFIG_ARM_MPU + +extern void __init adjust_lowmem_bounds_mpu(void); +extern void __init mpu_setup(void); -#endif /* CONFIG_ARM_MPU */ +#else + +static inline void adjust_lowmem_bounds_mpu(void) {} +static inline void mpu_setup(void) {} + +#endif /* !CONFIG_ARM_MPU */ + +#endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 950d19babb5f..bdb2ec11a2e3 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_MMU) += fault-armv.o flush.o idmap.o ioremap.o \ ifneq ($(CONFIG_MMU),y) obj-y += nommu.o +obj-$(CONFIG_ARM_MPU) += pmsa-v7.o endif obj-$(CONFIG_ARM_PTDUMP) += dump.o diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 3b8e728cc944..4c56b567cb47 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -27,259 +27,7 @@ unsigned long vectors_base; #ifdef CONFIG_ARM_MPU struct mpu_rgn_info mpu_rgn_info; - -/* Region number */ -static void rgnr_write(u32 v) -{ - asm("mcr p15, 0, %0, c6, c2, 0" : : "r" (v)); -} - -/* Data-side / unified region attributes */ - -/* Region access control register */ -static void dracr_write(u32 v) -{ - asm("mcr p15, 0, %0, c6, c1, 4" : : "r" (v)); -} - -/* Region size register */ -static void drsr_write(u32 v) -{ - asm("mcr p15, 0, %0, c6, c1, 2" : : "r" (v)); -} - -/* Region base address register */ -static void drbar_write(u32 v) -{ - asm("mcr p15, 0, %0, c6, c1, 0" : : "r" (v)); -} - -static u32 drbar_read(void) -{ - u32 v; - asm("mrc p15, 0, %0, c6, c1, 0" : "=r" (v)); - return v; -} -/* Optional instruction-side region attributes */ - -/* I-side Region access control register */ -static void iracr_write(u32 v) -{ - asm("mcr p15, 0, %0, c6, c1, 5" : : "r" (v)); -} - -/* I-side Region size register */ -static void irsr_write(u32 v) -{ - asm("mcr p15, 0, %0, c6, c1, 3" : : "r" (v)); -} - -/* I-side Region base address register */ -static void irbar_write(u32 v) -{ - asm("mcr p15, 0, %0, c6, c1, 1" : : "r" (v)); -} - -static unsigned long irbar_read(void) -{ - unsigned long v; - asm("mrc p15, 0, %0, c6, c1, 1" : "=r" (v)); - return v; -} - -/* MPU initialisation functions */ -void __init adjust_lowmem_bounds_mpu(void) -{ - phys_addr_t phys_offset = PHYS_OFFSET; - phys_addr_t aligned_region_size, specified_mem_size, rounded_mem_size; - struct memblock_region *reg; - bool first = true; - phys_addr_t mem_start; - phys_addr_t mem_end; - - for_each_memblock(memory, reg) { - if (first) { - /* - * Initially only use memory continuous from - * PHYS_OFFSET */ - if (reg->base != phys_offset) - panic("First memory bank must be contiguous from PHYS_OFFSET"); - - mem_start = reg->base; - mem_end = reg->base + reg->size; - specified_mem_size = reg->size; - first = false; - } else { - /* - * memblock auto merges contiguous blocks, remove - * all blocks afterwards in one go (we can't remove - * blocks separately while iterating) - */ - pr_notice("Ignoring RAM after %pa, memory at %pa ignored\n", - &mem_end, ®->base); - memblock_remove(reg->base, 0 - reg->base); - break; - } - } - - /* - * MPU has curious alignment requirements: Size must be power of 2, and - * region start must be aligned to the region size - */ - if (phys_offset != 0) - pr_info("PHYS_OFFSET != 0 => MPU Region size constrained by alignment requirements\n"); - - /* - * Maximum aligned region might overflow phys_addr_t if phys_offset is - * 0. Hence we keep everything below 4G until we take the smaller of - * the aligned_region_size and rounded_mem_size, one of which is - * guaranteed to be smaller than the maximum physical address. - */ - aligned_region_size = (phys_offset - 1) ^ (phys_offset); - /* Find the max power-of-two sized region that fits inside our bank */ - rounded_mem_size = (1 << __fls(specified_mem_size)) - 1; - - /* The actual region size is the smaller of the two */ - aligned_region_size = aligned_region_size < rounded_mem_size - ? aligned_region_size + 1 - : rounded_mem_size + 1; - - if (aligned_region_size != specified_mem_size) { - pr_warn("Truncating memory from %pa to %pa (MPU region constraints)", - &specified_mem_size, &aligned_region_size); - memblock_remove(mem_start + aligned_region_size, - specified_mem_size - aligned_region_size); - - mem_end = mem_start + aligned_region_size; - } - - pr_debug("MPU Region from %pa size %pa (end %pa))\n", - &phys_offset, &aligned_region_size, &mem_end); - -} - -static int mpu_present(void) -{ - return ((read_cpuid_ext(CPUID_EXT_MMFR0) & MMFR0_PMSA) == MMFR0_PMSAv7); -} - -static int mpu_max_regions(void) -{ - /* - * We don't support a different number of I/D side regions so if we - * have separate instruction and data memory maps then return - * whichever side has a smaller number of supported regions. - */ - u32 dregions, iregions, mpuir; - mpuir = read_cpuid(CPUID_MPUIR); - - dregions = iregions = (mpuir & MPUIR_DREGION_SZMASK) >> MPUIR_DREGION; - - /* Check for separate d-side and i-side memory maps */ - if (mpuir & MPUIR_nU) - iregions = (mpuir & MPUIR_IREGION_SZMASK) >> MPUIR_IREGION; - - /* Use the smallest of the two maxima */ - return min(dregions, iregions); -} - -static int mpu_iside_independent(void) -{ - /* MPUIR.nU specifies whether there is *not* a unified memory map */ - return read_cpuid(CPUID_MPUIR) & MPUIR_nU; -} - -static int mpu_min_region_order(void) -{ - u32 drbar_result, irbar_result; - /* We've kept a region free for this probing */ - rgnr_write(MPU_PROBE_REGION); - isb(); - /* - * As per ARM ARM, write 0xFFFFFFFC to DRBAR to find the minimum - * region order - */ - drbar_write(0xFFFFFFFC); - drbar_result = irbar_result = drbar_read(); - drbar_write(0x0); - /* If the MPU is non-unified, we use the larger of the two minima*/ - if (mpu_iside_independent()) { - irbar_write(0xFFFFFFFC); - irbar_result = irbar_read(); - irbar_write(0x0); - } - isb(); /* Ensure that MPU region operations have completed */ - /* Return whichever result is larger */ - return __ffs(max(drbar_result, irbar_result)); -} - -static int mpu_setup_region(unsigned int number, phys_addr_t start, - unsigned int size_order, unsigned int properties) -{ - u32 size_data; - - /* We kept a region free for probing resolution of MPU regions*/ - if (number > mpu_max_regions() || number == MPU_PROBE_REGION) - return -ENOENT; - - if (size_order > 32) - return -ENOMEM; - - if (size_order < mpu_min_region_order()) - return -ENOMEM; - - /* Writing N to bits 5:1 (RSR_SZ) specifies region size 2^N+1 */ - size_data = ((size_order - 1) << MPU_RSR_SZ) | 1 << MPU_RSR_EN; - - dsb(); /* Ensure all previous data accesses occur with old mappings */ - rgnr_write(number); - isb(); - drbar_write(start); - dracr_write(properties); - isb(); /* Propagate properties before enabling region */ - drsr_write(size_data); - - /* Check for independent I-side registers */ - if (mpu_iside_independent()) { - irbar_write(start); - iracr_write(properties); - isb(); - irsr_write(size_data); - } - isb(); - - /* Store region info (we treat i/d side the same, so only store d) */ - mpu_rgn_info.rgns[number].dracr = properties; - mpu_rgn_info.rgns[number].drbar = start; - mpu_rgn_info.rgns[number].drsr = size_data; - return 0; -} - -/* -* Set up default MPU regions, doing nothing if there is no MPU -*/ -void __init mpu_setup(void) -{ - int region_err; - if (!mpu_present()) - return; - - region_err = mpu_setup_region(MPU_RAM_REGION, PHYS_OFFSET, - ilog2(memblock.memory.regions[0].size), - MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL); - if (region_err) { - panic("MPU region initialization failure! %d", region_err); - } else { - pr_info("Using ARMv7 PMSA Compliant MPU. " - "Region independence: %s, Max regions: %d\n", - mpu_iside_independent() ? "Yes" : "No", - mpu_max_regions()); - } -} -#else -static void adjust_lowmem_bounds_mpu(void) {} -static void __init mpu_setup(void) {} -#endif /* CONFIG_ARM_MPU */ +#endif #ifdef CONFIG_CPU_CP15 #ifdef CONFIG_CPU_HIGH_VECTOR diff --git a/arch/arm/mm/pmsa-v7.c b/arch/arm/mm/pmsa-v7.c new file mode 100644 index 000000000000..cc987715457d --- /dev/null +++ b/arch/arm/mm/pmsa-v7.c @@ -0,0 +1,262 @@ +/* + * Based on linux/arch/arm/mm/nommu.c + * + * ARM PMSAv7 supporting functions. + */ + +#include + +#include +#include +#include + +#include "mm.h" + +/* Region number */ +static void rgnr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c2, 0" : : "r" (v)); +} + +/* Data-side / unified region attributes */ + +/* Region access control register */ +static void dracr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 4" : : "r" (v)); +} + +/* Region size register */ +static void drsr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 2" : : "r" (v)); +} + +/* Region base address register */ +static void drbar_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 0" : : "r" (v)); +} + +static u32 drbar_read(void) +{ + u32 v; + asm("mrc p15, 0, %0, c6, c1, 0" : "=r" (v)); + return v; +} +/* Optional instruction-side region attributes */ + +/* I-side Region access control register */ +static void iracr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 5" : : "r" (v)); +} + +/* I-side Region size register */ +static void irsr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 3" : : "r" (v)); +} + +/* I-side Region base address register */ +static void irbar_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 1" : : "r" (v)); +} + +static unsigned long irbar_read(void) +{ + unsigned long v; + asm("mrc p15, 0, %0, c6, c1, 1" : "=r" (v)); + return v; +} + +/* MPU initialisation functions */ +void __init adjust_lowmem_bounds_mpu(void) +{ + phys_addr_t phys_offset = PHYS_OFFSET; + phys_addr_t aligned_region_size, specified_mem_size, rounded_mem_size; + struct memblock_region *reg; + bool first = true; + phys_addr_t mem_start; + phys_addr_t mem_end; + + for_each_memblock(memory, reg) { + if (first) { + /* + * Initially only use memory continuous from + * PHYS_OFFSET */ + if (reg->base != phys_offset) + panic("First memory bank must be contiguous from PHYS_OFFSET"); + + mem_start = reg->base; + mem_end = reg->base + reg->size; + specified_mem_size = reg->size; + first = false; + } else { + /* + * memblock auto merges contiguous blocks, remove + * all blocks afterwards in one go (we can't remove + * blocks separately while iterating) + */ + pr_notice("Ignoring RAM after %pa, memory at %pa ignored\n", + &mem_end, ®->base); + memblock_remove(reg->base, 0 - reg->base); + break; + } + } + + /* + * MPU has curious alignment requirements: Size must be power of 2, and + * region start must be aligned to the region size + */ + if (phys_offset != 0) + pr_info("PHYS_OFFSET != 0 => MPU Region size constrained by alignment requirements\n"); + + /* + * Maximum aligned region might overflow phys_addr_t if phys_offset is + * 0. Hence we keep everything below 4G until we take the smaller of + * the aligned_region_size and rounded_mem_size, one of which is + * guaranteed to be smaller than the maximum physical address. + */ + aligned_region_size = (phys_offset - 1) ^ (phys_offset); + /* Find the max power-of-two sized region that fits inside our bank */ + rounded_mem_size = (1 << __fls(specified_mem_size)) - 1; + + /* The actual region size is the smaller of the two */ + aligned_region_size = aligned_region_size < rounded_mem_size + ? aligned_region_size + 1 + : rounded_mem_size + 1; + + if (aligned_region_size != specified_mem_size) { + pr_warn("Truncating memory from %pa to %pa (MPU region constraints)", + &specified_mem_size, &aligned_region_size); + memblock_remove(mem_start + aligned_region_size, + specified_mem_size - aligned_region_size); + + mem_end = mem_start + aligned_region_size; + } + + pr_debug("MPU Region from %pa size %pa (end %pa))\n", + &phys_offset, &aligned_region_size, &mem_end); + +} + +static int mpu_present(void) +{ + return ((read_cpuid_ext(CPUID_EXT_MMFR0) & MMFR0_PMSA) == MMFR0_PMSAv7); +} + +static int mpu_max_regions(void) +{ + /* + * We don't support a different number of I/D side regions so if we + * have separate instruction and data memory maps then return + * whichever side has a smaller number of supported regions. + */ + u32 dregions, iregions, mpuir; + mpuir = read_cpuid(CPUID_MPUIR); + + dregions = iregions = (mpuir & MPUIR_DREGION_SZMASK) >> MPUIR_DREGION; + + /* Check for separate d-side and i-side memory maps */ + if (mpuir & MPUIR_nU) + iregions = (mpuir & MPUIR_IREGION_SZMASK) >> MPUIR_IREGION; + + /* Use the smallest of the two maxima */ + return min(dregions, iregions); +} + +static int mpu_iside_independent(void) +{ + /* MPUIR.nU specifies whether there is *not* a unified memory map */ + return read_cpuid(CPUID_MPUIR) & MPUIR_nU; +} + +static int mpu_min_region_order(void) +{ + u32 drbar_result, irbar_result; + /* We've kept a region free for this probing */ + rgnr_write(MPU_PROBE_REGION); + isb(); + /* + * As per ARM ARM, write 0xFFFFFFFC to DRBAR to find the minimum + * region order + */ + drbar_write(0xFFFFFFFC); + drbar_result = irbar_result = drbar_read(); + drbar_write(0x0); + /* If the MPU is non-unified, we use the larger of the two minima*/ + if (mpu_iside_independent()) { + irbar_write(0xFFFFFFFC); + irbar_result = irbar_read(); + irbar_write(0x0); + } + isb(); /* Ensure that MPU region operations have completed */ + /* Return whichever result is larger */ + return __ffs(max(drbar_result, irbar_result)); +} + +static int mpu_setup_region(unsigned int number, phys_addr_t start, + unsigned int size_order, unsigned int properties) +{ + u32 size_data; + + /* We kept a region free for probing resolution of MPU regions*/ + if (number > mpu_max_regions() || number == MPU_PROBE_REGION) + return -ENOENT; + + if (size_order > 32) + return -ENOMEM; + + if (size_order < mpu_min_region_order()) + return -ENOMEM; + + /* Writing N to bits 5:1 (RSR_SZ) specifies region size 2^N+1 */ + size_data = ((size_order - 1) << MPU_RSR_SZ) | 1 << MPU_RSR_EN; + + dsb(); /* Ensure all previous data accesses occur with old mappings */ + rgnr_write(number); + isb(); + drbar_write(start); + dracr_write(properties); + isb(); /* Propagate properties before enabling region */ + drsr_write(size_data); + + /* Check for independent I-side registers */ + if (mpu_iside_independent()) { + irbar_write(start); + iracr_write(properties); + isb(); + irsr_write(size_data); + } + isb(); + + /* Store region info (we treat i/d side the same, so only store d) */ + mpu_rgn_info.rgns[number].dracr = properties; + mpu_rgn_info.rgns[number].drbar = start; + mpu_rgn_info.rgns[number].drsr = size_data; + return 0; +} + +/* +* Set up default MPU regions, doing nothing if there is no MPU +*/ +void __init mpu_setup(void) +{ + int region_err; + if (!mpu_present()) + return; + + region_err = mpu_setup_region(MPU_RAM_REGION, PHYS_OFFSET, + ilog2(memblock.memory.regions[0].size), + MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL); + if (region_err) { + panic("MPU region initialization failure! %d", region_err); + } else { + pr_info("Using ARMv7 PMSA Compliant MPU. " + "Region independence: %s, Max regions: %d\n", + mpu_iside_independent() ? "Yes" : "No", + mpu_max_regions()); + } +} -- cgit v1.2.3 From e8b47e12d6c72f26a8ce85974f98a4050ac7ca24 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 16 Oct 2017 12:53:18 +0100 Subject: ARM: 8707/1: NOMMU: Update MPU accessors to use cp15 helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, inline assembly for accessing to MPU's cp15 lacks volatile keyword which opens possibility to compiler to optimise such accesses as soon as we start using them more intensively. Rather than fixing inline asm, lets move MPU accessors to use cp15 helpers which do the right thing. Tested-by: Szemző András Tested-by: Alexandre TORGUE Tested-by: Benjamin Gaignard Signed-off-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/mm/pmsa-v7.c | 48 ++++++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/arch/arm/mm/pmsa-v7.c b/arch/arm/mm/pmsa-v7.c index cc987715457d..484f5aa51090 100644 --- a/arch/arm/mm/pmsa-v7.c +++ b/arch/arm/mm/pmsa-v7.c @@ -12,63 +12,67 @@ #include "mm.h" +#define DRBAR __ACCESS_CP15(c6, 0, c1, 0) +#define IRBAR __ACCESS_CP15(c6, 0, c1, 1) +#define DRSR __ACCESS_CP15(c6, 0, c1, 2) +#define IRSR __ACCESS_CP15(c6, 0, c1, 3) +#define DRACR __ACCESS_CP15(c6, 0, c1, 4) +#define IRACR __ACCESS_CP15(c6, 0, c1, 5) +#define RNGNR __ACCESS_CP15(c6, 0, c2, 0) + /* Region number */ -static void rgnr_write(u32 v) +static inline void rgnr_write(u32 v) { - asm("mcr p15, 0, %0, c6, c2, 0" : : "r" (v)); + write_sysreg(v, RNGNR); } /* Data-side / unified region attributes */ /* Region access control register */ -static void dracr_write(u32 v) +static inline void dracr_write(u32 v) { - asm("mcr p15, 0, %0, c6, c1, 4" : : "r" (v)); + write_sysreg(v, DRACR); } /* Region size register */ -static void drsr_write(u32 v) +static inline void drsr_write(u32 v) { - asm("mcr p15, 0, %0, c6, c1, 2" : : "r" (v)); + write_sysreg(v, DRSR); } /* Region base address register */ -static void drbar_write(u32 v) +static inline void drbar_write(u32 v) { - asm("mcr p15, 0, %0, c6, c1, 0" : : "r" (v)); + write_sysreg(v, DRBAR); } -static u32 drbar_read(void) +static inline u32 drbar_read(void) { - u32 v; - asm("mrc p15, 0, %0, c6, c1, 0" : "=r" (v)); - return v; + return read_sysreg(DRBAR); } /* Optional instruction-side region attributes */ /* I-side Region access control register */ -static void iracr_write(u32 v) +static inline void iracr_write(u32 v) { - asm("mcr p15, 0, %0, c6, c1, 5" : : "r" (v)); + write_sysreg(v, IRACR); } /* I-side Region size register */ -static void irsr_write(u32 v) +static inline void irsr_write(u32 v) { - asm("mcr p15, 0, %0, c6, c1, 3" : : "r" (v)); + write_sysreg(v, IRSR); } /* I-side Region base address register */ -static void irbar_write(u32 v) +static inline void irbar_write(u32 v) { - asm("mcr p15, 0, %0, c6, c1, 1" : : "r" (v)); + write_sysreg(v, IRBAR); } -static unsigned long irbar_read(void) +static inline u32 irbar_read(void) { - unsigned long v; - asm("mrc p15, 0, %0, c6, c1, 1" : "=r" (v)); - return v; + return read_sysreg(IRBAR); } /* MPU initialisation functions */ -- cgit v1.2.3 From a0995c0805b63c930b99970f2c9d5e4f167ca65b Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 16 Oct 2017 12:54:05 +0100 Subject: ARM: 8708/1: NOMMU: Rework MPU to be mostly done in C MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, there are several issues with how MPU is setup: 1. We won't boot if MPU is missing 2. We won't boot if use XIP 3. Further extension of MPU setup requires asm skills The 1st point can be relaxed, so we can continue with boot CPU even if MPU is missed and fail boot for secondaries only. To address the 2nd point we could create region covering CONFIG_XIP_PHYS_ADDR - _end and that might work for the first stage of MPU enable, but due to MPU's alignment requirement we could cover too much, IOW we need more flexibility in how we're partitioning memory regions... and it'd be hardly possible to archive because of the 3rd point. This patch is trying to address 1st and 3rd issues and paves the path for 2nd and further improvements. The most visible change introduced with this patch is that we start using mpu_rgn_info array (as it was supposed?), so change in MPU setup done by boot CPU is recorded there and feed to secondaries. It allows us to keep minimal region setup for boot CPU and do the rest in C. Since we start programming MPU regions in C evaluation of MPU constrains (number of regions supported and minimal region order) can be done once, which in turn open possibility to free-up "probe" region early. Tested-by: Szemző András Tested-by: Alexandre TORGUE Tested-by: Benjamin Gaignard Signed-off-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/include/asm/mpu.h | 2 +- arch/arm/include/asm/smp.h | 2 +- arch/arm/kernel/asm-offsets.c | 11 ++++++ arch/arm/kernel/head-nommu.S | 80 +++++++++++++++++++++++++++++++++---------- arch/arm/kernel/smp.c | 2 +- arch/arm/mm/pmsa-v7.c | 70 +++++++++++++++++++++++++++---------- 6 files changed, 127 insertions(+), 40 deletions(-) diff --git a/arch/arm/include/asm/mpu.h b/arch/arm/include/asm/mpu.h index edec5cf48471..403462e1df9d 100644 --- a/arch/arm/include/asm/mpu.h +++ b/arch/arm/include/asm/mpu.h @@ -62,7 +62,7 @@ struct mpu_rgn { }; struct mpu_rgn_info { - u32 mpuir; + unsigned int used; struct mpu_rgn rgns[MPU_MAX_REGIONS]; }; extern struct mpu_rgn_info mpu_rgn_info; diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index 3d6dc8b460e4..709a55989cb0 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -60,7 +60,7 @@ asmlinkage void secondary_start_kernel(void); */ struct secondary_data { union { - unsigned long mpu_rgn_szr; + struct mpu_rgn_info *mpu_rgn_info; u64 pgdir; }; unsigned long swapper_pg_dir; diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 13c155850822..f369ece99958 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -186,6 +187,16 @@ int main(void) BLANK(); #ifdef CONFIG_VDSO DEFINE(VDSO_DATA_SIZE, sizeof(union vdso_data_store)); +#endif + BLANK(); +#ifdef CONFIG_ARM_MPU + DEFINE(MPU_RNG_INFO_RNGS, offsetof(struct mpu_rgn_info, rgns)); + DEFINE(MPU_RNG_INFO_USED, offsetof(struct mpu_rgn_info, used)); + + DEFINE(MPU_RNG_SIZE, sizeof(struct mpu_rgn)); + DEFINE(MPU_RGN_DRBAR, offsetof(struct mpu_rgn, drbar)); + DEFINE(MPU_RGN_DRSR, offsetof(struct mpu_rgn, drsr)); + DEFINE(MPU_RGN_DRACR, offsetof(struct mpu_rgn, dracr)); #endif return 0; } diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index 2e21e08de747..5f90a5fb7022 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -13,6 +13,7 @@ */ #include #include +#include #include #include @@ -110,8 +111,8 @@ ENTRY(secondary_startup) #ifdef CONFIG_ARM_MPU /* Use MPU region info supplied by __cpu_up */ - ldr r6, [r7] @ get secondary_data.mpu_szr - bl __setup_mpu @ Initialize the MPU + ldr r6, [r7] @ get secondary_data.mpu_rgn_info + bl __secondary_setup_mpu @ Initialize the MPU #endif badr lr, 1f @ return (PIC) address @@ -204,13 +205,13 @@ ENTRY(__setup_mpu) mrc p15, 0, r0, c0, c1, 4 @ Read ID_MMFR0 and r0, r0, #(MMFR0_PMSA) @ PMSA field teq r0, #(MMFR0_PMSAv7) @ PMSA v7 - bne __error_p @ Fail: ARM_MPU on NOT v7 PMSA + bxne lr /* Determine whether the D/I-side memory map is unified. We set the * flags here and continue to use them for the rest of this function */ mrc p15, 0, r0, c0, c0, 4 @ MPUIR ands r5, r0, #MPUIR_DREGION_SZMASK @ 0 size d region => No MPU - beq __error_p @ Fail: ARM_MPU and no MPU + bxeq lr tst r0, #MPUIR_nU @ MPUIR_nU = 0 for unified /* Setup second region first to free up r6 */ @@ -238,27 +239,70 @@ ENTRY(__setup_mpu) setup_region r0, r5, r6, MPU_INSTR_SIDE @ 0x0, BG region, enabled 2: isb - /* Vectors region */ - set_region_nr r0, #MPU_VECTORS_REGION + /* Enable the MPU */ + mrc p15, 0, r0, c1, c0, 0 @ Read SCTLR + bic r0, r0, #CR_BR @ Disable the 'default mem-map' + orr r0, r0, #CR_M @ Set SCTRL.M (MPU on) + mcr p15, 0, r0, c1, c0, 0 @ Enable MPU + isb + + ret lr +ENDPROC(__setup_mpu) + +#ifdef CONFIG_SMP +/* + * r6: pointer at mpu_rgn_info + */ + +ENTRY(__secondary_setup_mpu) + /* Probe for v7 PMSA compliance */ + mrc p15, 0, r0, c0, c1, 4 @ Read ID_MMFR0 + and r0, r0, #(MMFR0_PMSA) @ PMSA field + teq r0, #(MMFR0_PMSAv7) @ PMSA v7 + bne __error_p + + /* Determine whether the D/I-side memory map is unified. We set the + * flags here and continue to use them for the rest of this function */ + mrc p15, 0, r0, c0, c0, 4 @ MPUIR + ands r5, r0, #MPUIR_DREGION_SZMASK @ 0 size d region => No MPU + beq __error_p + + ldr r4, [r6, #MPU_RNG_INFO_USED] + mov r5, #MPU_RNG_SIZE + add r3, r6, #MPU_RNG_INFO_RNGS + mla r3, r4, r5, r3 + +1: + tst r0, #MPUIR_nU @ MPUIR_nU = 0 for unified + sub r3, r3, #MPU_RNG_SIZE + sub r4, r4, #1 + + set_region_nr r0, r4 isb - /* Shared, inaccessible to PL0, rw PL1 */ - mov r0, #CONFIG_VECTORS_BASE @ Cover from VECTORS_BASE - ldr r5,=(MPU_AP_PL1RW_PL0NA | MPU_RGN_NORMAL) - /* Writing N to bits 5:1 (RSR_SZ) --> region size 2^N+1 */ - mov r6, #(((2 * PAGE_SHIFT - 1) << MPU_RSR_SZ) | 1 << MPU_RSR_EN) - setup_region r0, r5, r6, MPU_DATA_SIDE @ VECTORS_BASE, PL0 NA, enabled - beq 3f @ Memory-map not unified - setup_region r0, r5, r6, MPU_INSTR_SIDE @ VECTORS_BASE, PL0 NA, enabled -3: isb + ldr r0, [r3, #MPU_RGN_DRBAR] + ldr r6, [r3, #MPU_RGN_DRSR] + ldr r5, [r3, #MPU_RGN_DRACR] + + setup_region r0, r5, r6, MPU_DATA_SIDE + beq 2f + setup_region r0, r5, r6, MPU_INSTR_SIDE +2: isb + + mrc p15, 0, r0, c0, c0, 4 @ Reevaluate the MPUIR + cmp r4, #0 + bgt 1b /* Enable the MPU */ mrc p15, 0, r0, c1, c0, 0 @ Read SCTLR - bic r0, r0, #CR_BR @ Disable the 'default mem-map' + bic r0, r0, #CR_BR @ Disable the 'default mem-map' orr r0, r0, #CR_M @ Set SCTRL.M (MPU on) mcr p15, 0, r0, c1, c0, 0 @ Enable MPU isb + ret lr -ENDPROC(__setup_mpu) -#endif +ENDPROC(__secondary_setup_mpu) + +#endif /* CONFIG_SMP */ +#endif /* CONFIG_ARM_MPU */ #include "head-common.S" diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index c9a0a5299827..b4fbf00ee4ad 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -114,7 +114,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) */ secondary_data.stack = task_stack_page(idle) + THREAD_START_SP; #ifdef CONFIG_ARM_MPU - secondary_data.mpu_rgn_szr = mpu_rgn_info.rgns[MPU_RAM_REGION].drsr; + secondary_data.mpu_rgn_info = &mpu_rgn_info; #endif #ifdef CONFIG_MMU diff --git a/arch/arm/mm/pmsa-v7.c b/arch/arm/mm/pmsa-v7.c index 484f5aa51090..cd798271a78d 100644 --- a/arch/arm/mm/pmsa-v7.c +++ b/arch/arm/mm/pmsa-v7.c @@ -12,6 +12,9 @@ #include "mm.h" +static unsigned int __initdata mpu_min_region_order; +static unsigned int __initdata mpu_max_regions; + #define DRBAR __ACCESS_CP15(c6, 0, c1, 0) #define IRBAR __ACCESS_CP15(c6, 0, c1, 1) #define DRSR __ACCESS_CP15(c6, 0, c1, 2) @@ -75,6 +78,11 @@ static inline u32 irbar_read(void) return read_sysreg(IRBAR); } +static int __init mpu_present(void) +{ + return ((read_cpuid_ext(CPUID_EXT_MMFR0) & MMFR0_PMSA) == MMFR0_PMSAv7); +} + /* MPU initialisation functions */ void __init adjust_lowmem_bounds_mpu(void) { @@ -85,6 +93,9 @@ void __init adjust_lowmem_bounds_mpu(void) phys_addr_t mem_start; phys_addr_t mem_end; + if (!mpu_present()) + return; + for_each_memblock(memory, reg) { if (first) { /* @@ -146,12 +157,7 @@ void __init adjust_lowmem_bounds_mpu(void) } -static int mpu_present(void) -{ - return ((read_cpuid_ext(CPUID_EXT_MMFR0) & MMFR0_PMSA) == MMFR0_PMSAv7); -} - -static int mpu_max_regions(void) +static int __init __mpu_max_regions(void) { /* * We don't support a different number of I/D side regions so if we @@ -159,6 +165,7 @@ static int mpu_max_regions(void) * whichever side has a smaller number of supported regions. */ u32 dregions, iregions, mpuir; + mpuir = read_cpuid(CPUID_MPUIR); dregions = iregions = (mpuir & MPUIR_DREGION_SZMASK) >> MPUIR_DREGION; @@ -171,15 +178,16 @@ static int mpu_max_regions(void) return min(dregions, iregions); } -static int mpu_iside_independent(void) +static int __init mpu_iside_independent(void) { /* MPUIR.nU specifies whether there is *not* a unified memory map */ return read_cpuid(CPUID_MPUIR) & MPUIR_nU; } -static int mpu_min_region_order(void) +static int __init __mpu_min_region_order(void) { u32 drbar_result, irbar_result; + /* We've kept a region free for this probing */ rgnr_write(MPU_PROBE_REGION); isb(); @@ -198,22 +206,24 @@ static int mpu_min_region_order(void) } isb(); /* Ensure that MPU region operations have completed */ /* Return whichever result is larger */ + return __ffs(max(drbar_result, irbar_result)); } -static int mpu_setup_region(unsigned int number, phys_addr_t start, +static int __init mpu_setup_region(unsigned int number, phys_addr_t start, unsigned int size_order, unsigned int properties) { u32 size_data; /* We kept a region free for probing resolution of MPU regions*/ - if (number > mpu_max_regions() || number == MPU_PROBE_REGION) + if (number > mpu_max_regions + || number >= MPU_MAX_REGIONS) return -ENOENT; if (size_order > 32) return -ENOMEM; - if (size_order < mpu_min_region_order()) + if (size_order < mpu_min_region_order) return -ENOMEM; /* Writing N to bits 5:1 (RSR_SZ) specifies region size 2^N+1 */ @@ -240,6 +250,9 @@ static int mpu_setup_region(unsigned int number, phys_addr_t start, mpu_rgn_info.rgns[number].dracr = properties; mpu_rgn_info.rgns[number].drbar = start; mpu_rgn_info.rgns[number].drsr = size_data; + + mpu_rgn_info.used++; + return 0; } @@ -248,19 +261,38 @@ static int mpu_setup_region(unsigned int number, phys_addr_t start, */ void __init mpu_setup(void) { - int region_err; + int region = 0, err = 0; + if (!mpu_present()) return; - region_err = mpu_setup_region(MPU_RAM_REGION, PHYS_OFFSET, - ilog2(memblock.memory.regions[0].size), - MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL); - if (region_err) { - panic("MPU region initialization failure! %d", region_err); + /* Free-up MPU_PROBE_REGION */ + mpu_min_region_order = __mpu_min_region_order(); + + /* How many regions are supported */ + mpu_max_regions = __mpu_max_regions(); + + /* Now setup MPU (order is important) */ + + /* Background */ + err |= mpu_setup_region(region++, 0, 32, + MPU_ACR_XN | MPU_RGN_STRONGLY_ORDERED | MPU_AP_PL1RW_PL0NA); + + /* RAM */ + err |= mpu_setup_region(region++, PHYS_OFFSET, + ilog2(memblock.memory.regions[0].size), + MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL); + + /* Vectors */ + err |= mpu_setup_region(region++, vectors_base, + ilog2(2 * PAGE_SIZE), + MPU_AP_PL1RW_PL0NA | MPU_RGN_NORMAL); + if (err) { + panic("MPU region initialization failure! %d", err); } else { pr_info("Using ARMv7 PMSA Compliant MPU. " - "Region independence: %s, Max regions: %d\n", + "Region independence: %s, Used %d of %d regions\n", mpu_iside_independent() ? "Yes" : "No", - mpu_max_regions()); + mpu_rgn_info.used, mpu_max_regions); } } -- cgit v1.2.3 From df8089e7f343e3a6744df9d05f3e4c225deddadb Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 16 Oct 2017 12:55:06 +0100 Subject: ARM: 8709/1: NOMMU: Disallow MPU for XIP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It seems that MPU never worked with XIP, so we just disallow such combination. Tested-by: Szemző András Tested-by: Alexandre TORGUE Tested-by: Benjamin Gaignard Signed-off-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/Kconfig-nommu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/Kconfig-nommu b/arch/arm/Kconfig-nommu index b7576349528c..6d18395da24d 100644 --- a/arch/arm/Kconfig-nommu +++ b/arch/arm/Kconfig-nommu @@ -52,7 +52,7 @@ config REMAP_VECTORS_TO_RAM config ARM_MPU bool 'Use the ARM v7 PMSA Compliant MPU' - depends on CPU_V7 + depends on !XIP_KERNEL && CPU_V7 default y help Some ARM systems without an MMU have instead a Memory Protection -- cgit v1.2.3 From 89a6dafe136952b3409d5e4ed04cb891e806e924 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 16 Oct 2017 12:56:18 +0100 Subject: ARM: 8710/1: Kconfig: Kill CONFIG_VECTORS_BASE The last user of CONFIG_VECTORS_BASE has gone, so kill it. Tested-by: Benjamin Gaignard Reported-by: Afzal Mohammed Signed-off-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/Kconfig | 9 --------- 1 file changed, 9 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 1dba9c0d63d5..bf91aa38051d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -239,15 +239,6 @@ config NEED_RET_TO_USER config ARCH_MTD_XIP bool -config VECTORS_BASE - hex - default 0xffff0000 if MMU || CPU_HIGH_VECTOR - default DRAM_BASE if REMAP_VECTORS_TO_RAM - default 0x00000000 - help - The base address of exception vectors. This must be two pages - in size. - config ARM_PATCH_PHYS_VIRT bool "Patch physical to virtual translations at runtime" if EMBEDDED default y -- cgit v1.2.3 From 9fcb01a9f54c28062a73a545c29137a4cc104c72 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 16 Oct 2017 12:57:48 +0100 Subject: ARM: 8711/1: V7M: Add support for MPU to M-class MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch makes it possible to use MPU with v7M cores. Tested-by: Szemző András Tested-by: Alexandre TORGUE Tested-by: Benjamin Gaignard Signed-off-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/Kconfig-nommu | 4 +-- arch/arm/include/asm/cputype.h | 10 ++++++++ arch/arm/include/asm/v7m.h | 10 ++++++++ arch/arm/kernel/head-nommu.S | 56 ++++++++++++++++++++++++++++++------------ arch/arm/mm/pmsa-v7.c | 53 +++++++++++++++++++++++++++++++++++++-- 5 files changed, 113 insertions(+), 20 deletions(-) diff --git a/arch/arm/Kconfig-nommu b/arch/arm/Kconfig-nommu index 6d18395da24d..930e000489a8 100644 --- a/arch/arm/Kconfig-nommu +++ b/arch/arm/Kconfig-nommu @@ -52,8 +52,8 @@ config REMAP_VECTORS_TO_RAM config ARM_MPU bool 'Use the ARM v7 PMSA Compliant MPU' - depends on !XIP_KERNEL && CPU_V7 - default y + depends on !XIP_KERNEL && (CPU_V7 || CPU_V7M) + default y if CPU_V7 help Some ARM systems without an MMU have instead a Memory Protection Unit (MPU) that defines the type and permissions for regions of diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index b62eaeb147aa..abaac5e07b80 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -173,6 +173,11 @@ static inline unsigned int __attribute_const__ read_cpuid_cachetype(void) return read_cpuid(CPUID_CACHETYPE); } +static inline unsigned int __attribute_const__ read_cpuid_mputype(void) +{ + return read_cpuid(CPUID_MPUIR); +} + #elif defined(CONFIG_CPU_V7M) static inline unsigned int __attribute_const__ read_cpuid_id(void) @@ -185,6 +190,11 @@ static inline unsigned int __attribute_const__ read_cpuid_cachetype(void) return readl(BASEADDR_V7M_SCB + V7M_SCB_CTR); } +static inline unsigned int __attribute_const__ read_cpuid_mputype(void) +{ + return readl(BASEADDR_V7M_SCB + MPU_TYPE); +} + #else /* ifdef CONFIG_CPU_CP15 / elif defined(CONFIG_CPU_V7M) */ static inline unsigned int __attribute_const__ read_cpuid_id(void) diff --git a/arch/arm/include/asm/v7m.h b/arch/arm/include/asm/v7m.h index 1fd775c1bc5d..5de776c81382 100644 --- a/arch/arm/include/asm/v7m.h +++ b/arch/arm/include/asm/v7m.h @@ -57,6 +57,16 @@ #define V7M_SCB_CCSIDR 0x80 /* Cache size ID register */ #define V7M_SCB_CSSELR 0x84 /* Cache size selection register */ +/* Memory-mapped MPU registers for M-class */ +#define MPU_TYPE 0x90 +#define MPU_CTRL 0x94 +#define MPU_CTRL_ENABLE 1 +#define MPU_CTRL_PRIVDEFENA (1 << 2) + +#define MPU_RNR 0x98 +#define MPU_RBAR 0x9c +#define MPU_RASR 0xa0 + /* Cache opeartions */ #define V7M_SCB_ICIALLU 0x250 /* I-cache invalidate all to PoU */ #define V7M_SCB_ICIMVAU 0x258 /* I-cache invalidate by MVA to PoU */ diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index 5f90a5fb7022..0d64b8ba7e9c 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -176,19 +176,33 @@ ENDPROC(__after_proc_init) #ifdef CONFIG_ARM_MPU +#ifndef CONFIG_CPU_V7M /* Set which MPU region should be programmed */ -.macro set_region_nr tmp, rgnr +.macro set_region_nr tmp, rgnr, unused mov \tmp, \rgnr @ Use static region numbers mcr p15, 0, \tmp, c6, c2, 0 @ Write RGNR .endm /* Setup a single MPU region, either D or I side (D-side for unified) */ -.macro setup_region bar, acr, sr, side = MPU_DATA_SIDE +.macro setup_region bar, acr, sr, side = MPU_DATA_SIDE, unused mcr p15, 0, \bar, c6, c1, (0 + \side) @ I/DRBAR mcr p15, 0, \acr, c6, c1, (4 + \side) @ I/DRACR mcr p15, 0, \sr, c6, c1, (2 + \side) @ I/DRSR .endm +#else +.macro set_region_nr tmp, rgnr, base + mov \tmp, \rgnr + str \tmp, [\base, #MPU_RNR] +.endm + +.macro setup_region bar, acr, sr, unused, base + lsl \acr, \acr, #16 + orr \acr, \acr, \sr + str \bar, [\base, #MPU_RBAR] + str \acr, [\base, #MPU_RASR] +.endm +#endif /* * Setup the MPU and initial MPU Regions. We create the following regions: * Region 0: Use this for probing the MPU details, so leave disabled. @@ -202,48 +216,58 @@ ENDPROC(__after_proc_init) ENTRY(__setup_mpu) /* Probe for v7 PMSA compliance */ - mrc p15, 0, r0, c0, c1, 4 @ Read ID_MMFR0 +M_CLASS(movw r12, #:lower16:BASEADDR_V7M_SCB) +M_CLASS(movt r12, #:upper16:BASEADDR_V7M_SCB) + +AR_CLASS(mrc p15, 0, r0, c0, c1, 4) @ Read ID_MMFR0 +M_CLASS(ldr r0, [r12, 0x50]) and r0, r0, #(MMFR0_PMSA) @ PMSA field teq r0, #(MMFR0_PMSAv7) @ PMSA v7 bxne lr /* Determine whether the D/I-side memory map is unified. We set the * flags here and continue to use them for the rest of this function */ - mrc p15, 0, r0, c0, c0, 4 @ MPUIR +AR_CLASS(mrc p15, 0, r0, c0, c0, 4) @ MPUIR +M_CLASS(ldr r0, [r12, #MPU_TYPE]) ands r5, r0, #MPUIR_DREGION_SZMASK @ 0 size d region => No MPU bxeq lr tst r0, #MPUIR_nU @ MPUIR_nU = 0 for unified /* Setup second region first to free up r6 */ - set_region_nr r0, #MPU_RAM_REGION + set_region_nr r0, #MPU_RAM_REGION, r12 isb /* Full access from PL0, PL1, shared for CONFIG_SMP, cacheable */ ldr r0, =PLAT_PHYS_OFFSET @ RAM starts at PHYS_OFFSET ldr r5,=(MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL) - setup_region r0, r5, r6, MPU_DATA_SIDE @ PHYS_OFFSET, shared, enabled - beq 1f @ Memory-map not unified - setup_region r0, r5, r6, MPU_INSTR_SIDE @ PHYS_OFFSET, shared, enabled + setup_region r0, r5, r6, MPU_DATA_SIDE, r12 @ PHYS_OFFSET, shared, enabled + beq 1f @ Memory-map not unified + setup_region r0, r5, r6, MPU_INSTR_SIDE, r12 @ PHYS_OFFSET, shared, enabled 1: isb /* First/background region */ - set_region_nr r0, #MPU_BG_REGION + set_region_nr r0, #MPU_BG_REGION, r12 isb /* Execute Never, strongly ordered, inaccessible to PL0, rw PL1 */ mov r0, #0 @ BG region starts at 0x0 ldr r5,=(MPU_ACR_XN | MPU_RGN_STRONGLY_ORDERED | MPU_AP_PL1RW_PL0NA) mov r6, #MPU_RSR_ALL_MEM @ 4GB region, enabled - setup_region r0, r5, r6, MPU_DATA_SIDE @ 0x0, BG region, enabled - beq 2f @ Memory-map not unified - setup_region r0, r5, r6, MPU_INSTR_SIDE @ 0x0, BG region, enabled + setup_region r0, r5, r6, MPU_DATA_SIDE, r12 @ 0x0, BG region, enabled + beq 2f @ Memory-map not unified + setup_region r0, r5, r6, MPU_INSTR_SIDE r12 @ 0x0, BG region, enabled 2: isb /* Enable the MPU */ - mrc p15, 0, r0, c1, c0, 0 @ Read SCTLR - bic r0, r0, #CR_BR @ Disable the 'default mem-map' - orr r0, r0, #CR_M @ Set SCTRL.M (MPU on) - mcr p15, 0, r0, c1, c0, 0 @ Enable MPU +AR_CLASS(mrc p15, 0, r0, c1, c0, 0) @ Read SCTLR +AR_CLASS(bic r0, r0, #CR_BR) @ Disable the 'default mem-map' +AR_CLASS(orr r0, r0, #CR_M) @ Set SCTRL.M (MPU on) +AR_CLASS(mcr p15, 0, r0, c1, c0, 0) @ Enable MPU + +M_CLASS(ldr r0, [r12, #MPU_CTRL]) +M_CLASS(bic r0, #MPU_CTRL_PRIVDEFENA) +M_CLASS(orr r0, #MPU_CTRL_ENABLE) +M_CLASS(str r0, [r12, #MPU_CTRL]) isb ret lr diff --git a/arch/arm/mm/pmsa-v7.c b/arch/arm/mm/pmsa-v7.c index cd798271a78d..06e2a853cab9 100644 --- a/arch/arm/mm/pmsa-v7.c +++ b/arch/arm/mm/pmsa-v7.c @@ -15,6 +15,8 @@ static unsigned int __initdata mpu_min_region_order; static unsigned int __initdata mpu_max_regions; +#ifndef CONFIG_CPU_V7M + #define DRBAR __ACCESS_CP15(c6, 0, c1, 0) #define IRBAR __ACCESS_CP15(c6, 0, c1, 1) #define DRSR __ACCESS_CP15(c6, 0, c1, 2) @@ -78,6 +80,51 @@ static inline u32 irbar_read(void) return read_sysreg(IRBAR); } +#else + +static inline void rgnr_write(u32 v) +{ + writel_relaxed(v, BASEADDR_V7M_SCB + MPU_RNR); +} + +/* Data-side / unified region attributes */ + +/* Region access control register */ +static inline void dracr_write(u32 v) +{ + u32 rsr = readl_relaxed(BASEADDR_V7M_SCB + MPU_RASR) & GENMASK(15, 0); + + writel_relaxed((v << 16) | rsr, BASEADDR_V7M_SCB + MPU_RASR); +} + +/* Region size register */ +static inline void drsr_write(u32 v) +{ + u32 racr = readl_relaxed(BASEADDR_V7M_SCB + MPU_RASR) & GENMASK(31, 16); + + writel_relaxed(v | racr, BASEADDR_V7M_SCB + MPU_RASR); +} + +/* Region base address register */ +static inline void drbar_write(u32 v) +{ + writel_relaxed(v, BASEADDR_V7M_SCB + MPU_RBAR); +} + +static inline u32 drbar_read(void) +{ + return readl_relaxed(BASEADDR_V7M_SCB + MPU_RBAR); +} + +/* ARMv7-M only supports a unified MPU, so I-side operations are nop */ + +static inline void iracr_write(u32 v) {} +static inline void irsr_write(u32 v) {} +static inline void irbar_write(u32 v) {} +static inline unsigned long irbar_read(void) {return 0;} + +#endif + static int __init mpu_present(void) { return ((read_cpuid_ext(CPUID_EXT_MMFR0) & MMFR0_PMSA) == MMFR0_PMSAv7); @@ -166,7 +213,7 @@ static int __init __mpu_max_regions(void) */ u32 dregions, iregions, mpuir; - mpuir = read_cpuid(CPUID_MPUIR); + mpuir = read_cpuid_mputype(); dregions = iregions = (mpuir & MPUIR_DREGION_SZMASK) >> MPUIR_DREGION; @@ -181,7 +228,7 @@ static int __init __mpu_max_regions(void) static int __init mpu_iside_independent(void) { /* MPUIR.nU specifies whether there is *not* a unified memory map */ - return read_cpuid(CPUID_MPUIR) & MPUIR_nU; + return read_cpuid_mputype() & MPUIR_nU; } static int __init __mpu_min_region_order(void) @@ -284,9 +331,11 @@ void __init mpu_setup(void) MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL); /* Vectors */ +#ifndef CONFIG_CPU_V7M err |= mpu_setup_region(region++, vectors_base, ilog2(2 * PAGE_SIZE), MPU_AP_PL1RW_PL0NA | MPU_RGN_NORMAL); +#endif if (err) { panic("MPU region initialization failure! %d", err); } else { -- cgit v1.2.3 From 5c9d9a1b3a540779ba3f2d5e81150b2d92dcb74a Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 16 Oct 2017 12:59:15 +0100 Subject: ARM: 8712/1: NOMMU: Use more MPU regions to cover memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PMSAv7 defines curious alignment requirements to the regions: - size must be power of 2, and - region start must be aligned to the region size Because of that we currently adjust lowmem bounds plus we assign only one MPU region to cover memory all these lead to significant amount of memory could be wasted. As an example, consider 64Mb of memory at 0x70000000 - it fits alignment requirements nicely; now, imagine that 2Mb of memory is reserved for coherent DMA allocation, so now Linux is expected to see 62Mb of memory... and here annoying thing happens - memory gets truncated to 32Mb (we've lost 30Mb!), i.e. MPU layout looks like: 0: base 0x70000000, size 0x2000000 This patch tries to allocate as much as possible MPU slots to minimise amount of truncated memory. Moreover, with this patch MPU subregions starting to get used. MPU subregions allow us reduce the number of MPU slots used. For example given above, MPU layout looks like: 0: base 0x70000000, size 0x2000000 1: base 0x72000000, size 0x1000000 2: base 0x73000000, size 0x1000000, disable subreg 7 (0x73e00000 - 0x73ffffff) Where without subregions we'd get: 0: base 0x70000000, size 0x2000000 1: base 0x72000000, size 0x1000000 2: base 0x73000000, size 0x800000 3: base 0x73800000, size 0x400000 4: base 0x73c00000, size 0x200000 To achieve better layout we fist try to cover specified memory as is (maybe with help of subregions) and if we failed, we truncate memory to fit alignment requirements (so it occupies one MPU slot) and perform one more attempt with the reminder, and so on till we either cover all memory or run out of MPU slots. Tested-by: Szemző András Tested-by: Alexandre TORGUE Tested-by: Benjamin Gaignard Signed-off-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/include/asm/mpu.h | 5 ++ arch/arm/mm/pmsa-v7.c | 190 ++++++++++++++++++++++++++++++++++----------- 2 files changed, 149 insertions(+), 46 deletions(-) diff --git a/arch/arm/include/asm/mpu.h b/arch/arm/include/asm/mpu.h index 403462e1df9d..5db37a6ef3cb 100644 --- a/arch/arm/include/asm/mpu.h +++ b/arch/arm/include/asm/mpu.h @@ -15,6 +15,11 @@ /* MPU D/I Size Register fields */ #define MPU_RSR_SZ 1 #define MPU_RSR_EN 0 +#define MPU_RSR_SD 8 + +/* Number of subregions (SD) */ +#define MPU_NR_SUBREGS 8 +#define MPU_MIN_SUBREG_SIZE 256 /* The D/I RSR value for an enabled region spanning the whole of memory */ #define MPU_RSR_ALL_MEM 63 diff --git a/arch/arm/mm/pmsa-v7.c b/arch/arm/mm/pmsa-v7.c index 06e2a853cab9..ef204634a16e 100644 --- a/arch/arm/mm/pmsa-v7.c +++ b/arch/arm/mm/pmsa-v7.c @@ -4,6 +4,7 @@ * ARM PMSAv7 supporting functions. */ +#include #include #include @@ -12,9 +13,20 @@ #include "mm.h" +struct region { + phys_addr_t base; + phys_addr_t size; + unsigned long subreg; +}; + +static struct region __initdata mem[MPU_MAX_REGIONS]; + static unsigned int __initdata mpu_min_region_order; static unsigned int __initdata mpu_max_regions; +static int __init __mpu_min_region_order(void); +static int __init __mpu_max_regions(void); + #ifndef CONFIG_CPU_V7M #define DRBAR __ACCESS_CP15(c6, 0, c1, 0) @@ -130,19 +142,120 @@ static int __init mpu_present(void) return ((read_cpuid_ext(CPUID_EXT_MMFR0) & MMFR0_PMSA) == MMFR0_PMSAv7); } +static bool __init try_split_region(phys_addr_t base, phys_addr_t size, struct region *region) +{ + unsigned long subreg, bslots, sslots; + phys_addr_t abase = base & ~(size - 1); + phys_addr_t asize = base + size - abase; + phys_addr_t p2size = 1 << __fls(asize); + phys_addr_t bdiff, sdiff; + + if (p2size != asize) + p2size *= 2; + + bdiff = base - abase; + sdiff = p2size - asize; + subreg = p2size / MPU_NR_SUBREGS; + + if ((bdiff % subreg) || (sdiff % subreg)) + return false; + + bslots = bdiff / subreg; + sslots = sdiff / subreg; + + if (bslots || sslots) { + int i; + + if (subreg < MPU_MIN_SUBREG_SIZE) + return false; + + if (bslots + sslots > MPU_NR_SUBREGS) + return false; + + for (i = 0; i < bslots; i++) + _set_bit(i, ®ion->subreg); + + for (i = 1; i <= sslots; i++) + _set_bit(MPU_NR_SUBREGS - i, ®ion->subreg); + } + + region->base = abase; + region->size = p2size; + + return true; +} + +static int __init allocate_region(phys_addr_t base, phys_addr_t size, + unsigned int limit, struct region *regions) +{ + int count = 0; + phys_addr_t diff = size; + int attempts = MPU_MAX_REGIONS; + + while (diff) { + /* Try cover region as is (maybe with help of subregions) */ + if (try_split_region(base, size, ®ions[count])) { + count++; + base += size; + diff -= size; + size = diff; + } else { + /* + * Maximum aligned region might overflow phys_addr_t + * if "base" is 0. Hence we keep everything below 4G + * until we take the smaller of the aligned region + * size ("asize") and rounded region size ("p2size"), + * one of which is guaranteed to be smaller than the + * maximum physical address. + */ + phys_addr_t asize = (base - 1) ^ base; + phys_addr_t p2size = (1 << __fls(diff)) - 1; + + size = asize < p2size ? asize + 1 : p2size + 1; + } + + if (count > limit) + break; + + if (!attempts) + break; + + attempts--; + } + + return count; +} + /* MPU initialisation functions */ void __init adjust_lowmem_bounds_mpu(void) { phys_addr_t phys_offset = PHYS_OFFSET; - phys_addr_t aligned_region_size, specified_mem_size, rounded_mem_size; + phys_addr_t specified_mem_size, total_mem_size = 0; struct memblock_region *reg; bool first = true; phys_addr_t mem_start; phys_addr_t mem_end; + unsigned int mem_max_regions; + int num, i; if (!mpu_present()) return; + /* Free-up MPU_PROBE_REGION */ + mpu_min_region_order = __mpu_min_region_order(); + + /* How many regions are supported */ + mpu_max_regions = __mpu_max_regions(); + + mem_max_regions = min((unsigned int)MPU_MAX_REGIONS, mpu_max_regions); + + /* We need to keep one slot for background region */ + mem_max_regions--; + +#ifndef CONFIG_CPU_V7M + /* ... and one for vectors */ + mem_max_regions--; +#endif for_each_memblock(memory, reg) { if (first) { /* @@ -168,40 +281,23 @@ void __init adjust_lowmem_bounds_mpu(void) } } - /* - * MPU has curious alignment requirements: Size must be power of 2, and - * region start must be aligned to the region size - */ - if (phys_offset != 0) - pr_info("PHYS_OFFSET != 0 => MPU Region size constrained by alignment requirements\n"); - - /* - * Maximum aligned region might overflow phys_addr_t if phys_offset is - * 0. Hence we keep everything below 4G until we take the smaller of - * the aligned_region_size and rounded_mem_size, one of which is - * guaranteed to be smaller than the maximum physical address. - */ - aligned_region_size = (phys_offset - 1) ^ (phys_offset); - /* Find the max power-of-two sized region that fits inside our bank */ - rounded_mem_size = (1 << __fls(specified_mem_size)) - 1; + num = allocate_region(mem_start, specified_mem_size, mem_max_regions, mem); - /* The actual region size is the smaller of the two */ - aligned_region_size = aligned_region_size < rounded_mem_size - ? aligned_region_size + 1 - : rounded_mem_size + 1; + for (i = 0; i < num; i++) { + unsigned long subreg = mem[i].size / MPU_NR_SUBREGS; - if (aligned_region_size != specified_mem_size) { - pr_warn("Truncating memory from %pa to %pa (MPU region constraints)", - &specified_mem_size, &aligned_region_size); - memblock_remove(mem_start + aligned_region_size, - specified_mem_size - aligned_region_size); + total_mem_size += mem[i].size - subreg * hweight_long(mem[i].subreg); - mem_end = mem_start + aligned_region_size; + pr_debug("MPU: base %pa size %pa disable subregions: %*pbl\n", + &mem[i].base, &mem[i].size, MPU_NR_SUBREGS, &mem[i].subreg); } - pr_debug("MPU Region from %pa size %pa (end %pa))\n", - &phys_offset, &aligned_region_size, &mem_end); - + if (total_mem_size != specified_mem_size) { + pr_warn("Truncating memory from %pa to %pa (MPU region constraints)", + &specified_mem_size, &total_mem_size); + memblock_remove(mem_start + total_mem_size, + specified_mem_size - total_mem_size); + } } static int __init __mpu_max_regions(void) @@ -258,7 +354,8 @@ static int __init __mpu_min_region_order(void) } static int __init mpu_setup_region(unsigned int number, phys_addr_t start, - unsigned int size_order, unsigned int properties) + unsigned int size_order, unsigned int properties, + unsigned int subregions) { u32 size_data; @@ -275,6 +372,7 @@ static int __init mpu_setup_region(unsigned int number, phys_addr_t start, /* Writing N to bits 5:1 (RSR_SZ) specifies region size 2^N+1 */ size_data = ((size_order - 1) << MPU_RSR_SZ) | 1 << MPU_RSR_EN; + size_data |= subregions << MPU_RSR_SD; dsb(); /* Ensure all previous data accesses occur with old mappings */ rgnr_write(number); @@ -308,33 +406,33 @@ static int __init mpu_setup_region(unsigned int number, phys_addr_t start, */ void __init mpu_setup(void) { - int region = 0, err = 0; + int i, region = 0, err = 0; if (!mpu_present()) return; - /* Free-up MPU_PROBE_REGION */ - mpu_min_region_order = __mpu_min_region_order(); - - /* How many regions are supported */ - mpu_max_regions = __mpu_max_regions(); - - /* Now setup MPU (order is important) */ + /* Setup MPU (order is important) */ /* Background */ err |= mpu_setup_region(region++, 0, 32, - MPU_ACR_XN | MPU_RGN_STRONGLY_ORDERED | MPU_AP_PL1RW_PL0NA); + MPU_ACR_XN | MPU_RGN_STRONGLY_ORDERED | MPU_AP_PL1RW_PL0NA, + 0); /* RAM */ - err |= mpu_setup_region(region++, PHYS_OFFSET, - ilog2(memblock.memory.regions[0].size), - MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL); + for (i = 0; i < ARRAY_SIZE(mem); i++) { + if (!mem[i].size) + continue; + + err |= mpu_setup_region(region++, mem[i].base, ilog2(mem[i].size), + MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL, + mem[i].subreg); + } /* Vectors */ #ifndef CONFIG_CPU_V7M - err |= mpu_setup_region(region++, vectors_base, - ilog2(2 * PAGE_SIZE), - MPU_AP_PL1RW_PL0NA | MPU_RGN_NORMAL); + err |= mpu_setup_region(region++, vectors_base, ilog2(2 * PAGE_SIZE), + MPU_AP_PL1RW_PL0NA | MPU_RGN_NORMAL, + 0); #endif if (err) { panic("MPU region initialization failure! %d", err); -- cgit v1.2.3 From 216218308cfb0939aeecb246b34faf6e179c8d57 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 16 Oct 2017 13:00:45 +0100 Subject: ARM: 8713/1: NOMMU: Support MPU in XIP configuration Currently, there is assumption in early MPU setup code that kernel image is located in RAM, which is obviously not true for XIP. To run code from ROM we need to make sure that it is covered by MPU. However, due to we allocate regions (semi-)dynamically we can run into issue of trimming region we are running from in case ROM spawns several MPU regions. To help deal with that we enforce minimum alignments for start end end of XIP address space as 1MB and 128Kb correspondingly. Tested-by: Alexandre TORGUE Tested-by: Benjamin Gaignard Signed-off-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/Kconfig-nommu | 2 +- arch/arm/include/asm/mpu.h | 3 ++- arch/arm/kernel/head-nommu.S | 20 ++++++++++++++++ arch/arm/kernel/vmlinux-xip.lds.S | 23 ++++++++++++++++++ arch/arm/mm/pmsa-v7.c | 49 +++++++++++++++++++++++++++++++++++---- 5 files changed, 90 insertions(+), 7 deletions(-) diff --git a/arch/arm/Kconfig-nommu b/arch/arm/Kconfig-nommu index 930e000489a8..0fad7d943630 100644 --- a/arch/arm/Kconfig-nommu +++ b/arch/arm/Kconfig-nommu @@ -52,7 +52,7 @@ config REMAP_VECTORS_TO_RAM config ARM_MPU bool 'Use the ARM v7 PMSA Compliant MPU' - depends on !XIP_KERNEL && (CPU_V7 || CPU_V7M) + depends on CPU_V7 || CPU_V7M default y if CPU_V7 help Some ARM systems without an MMU have instead a Memory Protection diff --git a/arch/arm/include/asm/mpu.h b/arch/arm/include/asm/mpu.h index 5db37a6ef3cb..56ec02617f58 100644 --- a/arch/arm/include/asm/mpu.h +++ b/arch/arm/include/asm/mpu.h @@ -41,6 +41,7 @@ #endif /* Access permission bits of ACR (only define those that we use)*/ +#define MPU_AP_PL1RO_PL0NA (0x5 << 8) #define MPU_AP_PL1RW_PL0RW (0x3 << 8) #define MPU_AP_PL1RW_PL0R0 (0x2 << 8) #define MPU_AP_PL1RW_PL0NA (0x1 << 8) @@ -49,7 +50,7 @@ #define MPU_PROBE_REGION 0 #define MPU_BG_REGION 1 #define MPU_RAM_REGION 2 -#define MPU_VECTORS_REGION 3 +#define MPU_ROM_REGION 3 /* Maximum number of regions Linux is interested in */ #define MPU_MAX_REGIONS 16 diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index 0d64b8ba7e9c..2e38f85b757a 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -258,6 +258,26 @@ M_CLASS(ldr r0, [r12, #MPU_TYPE]) setup_region r0, r5, r6, MPU_INSTR_SIDE r12 @ 0x0, BG region, enabled 2: isb +#ifdef CONFIG_XIP_KERNEL + set_region_nr r0, #MPU_ROM_REGION, r12 + isb + + ldr r5,=(MPU_AP_PL1RO_PL0NA | MPU_RGN_NORMAL) + + ldr r0, =CONFIG_XIP_PHYS_ADDR @ ROM start + ldr r6, =(_exiprom) @ ROM end + sub r6, r6, r0 @ Minimum size of region to map + clz r6, r6 @ Region size must be 2^N... + rsb r6, r6, #31 @ ...so round up region size + lsl r6, r6, #MPU_RSR_SZ @ Put size in right field + orr r6, r6, #(1 << MPU_RSR_EN) @ Set region enabled bit + + setup_region r0, r5, r6, MPU_DATA_SIDE, r12 @ XIP_PHYS_ADDR, shared, enabled + beq 3f @ Memory-map not unified + setup_region r0, r5, r6, MPU_INSTR_SIDE, r12 @ XIP_PHYS_ADDR, shared, enabled +3: isb +#endif + /* Enable the MPU */ AR_CLASS(mrc p15, 0, r0, c1, c0, 0) @ Read SCTLR AR_CLASS(bic r0, r0, #CR_BR) @ Disable the 'default mem-map' diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index 7a844310085e..74c93879532a 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -6,6 +6,8 @@ /* No __ro_after_init data in the .rodata section - which will always be ro */ #define RO_AFTER_INIT_DATA +#include + #include #include #include @@ -187,6 +189,9 @@ SECTIONS INIT_RAM_FS } +#ifdef CONFIG_ARM_MPU + . = ALIGN(SZ_128K); +#endif _exiprom = .; /* End of XIP ROM area */ /* @@ -314,3 +319,21 @@ ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & PAGE_MASK) <= PAGE_SIZE, */ ASSERT((_end - __bss_start) >= 12288, ".bss too small for CONFIG_XIP_DEFLATED_DATA") #endif + +#ifdef CONFIG_ARM_MPU +/* + * Due to PMSAv7 restriction on base address and size we have to + * enforce minimal alignment restrictions. It was seen that weaker + * alignment restriction on _xiprom will likely force XIP address + * space spawns multiple MPU regions thus it is likely we run in + * situation when we are reprogramming MPU region we run on with + * something which doesn't cover reprogramming code itself, so as soon + * as we update MPU settings we'd immediately try to execute straight + * from background region which is XN. + * It seem that alignment in 1M should suit most users. + * _exiprom is aligned as 1/8 of 1M so can be covered by subregion + * disable + */ +ASSERT(!(_xiprom & (SZ_1M - 1)), "XIP start address may cause MPU programming issues") +ASSERT(!(_exiprom & (SZ_128K - 1)), "XIP end address may cause MPU programming issues") +#endif diff --git a/arch/arm/mm/pmsa-v7.c b/arch/arm/mm/pmsa-v7.c index ef204634a16e..106ae1c435a3 100644 --- a/arch/arm/mm/pmsa-v7.c +++ b/arch/arm/mm/pmsa-v7.c @@ -7,9 +7,11 @@ #include #include +#include #include #include #include +#include #include "mm.h" @@ -20,6 +22,9 @@ struct region { }; static struct region __initdata mem[MPU_MAX_REGIONS]; +#ifdef CONFIG_XIP_KERNEL +static struct region __initdata xip[MPU_MAX_REGIONS]; +#endif static unsigned int __initdata mpu_min_region_order; static unsigned int __initdata mpu_max_regions; @@ -229,7 +234,6 @@ static int __init allocate_region(phys_addr_t base, phys_addr_t size, /* MPU initialisation functions */ void __init adjust_lowmem_bounds_mpu(void) { - phys_addr_t phys_offset = PHYS_OFFSET; phys_addr_t specified_mem_size, total_mem_size = 0; struct memblock_region *reg; bool first = true; @@ -256,8 +260,19 @@ void __init adjust_lowmem_bounds_mpu(void) /* ... and one for vectors */ mem_max_regions--; #endif + +#ifdef CONFIG_XIP_KERNEL + /* plus some regions to cover XIP ROM */ + num = allocate_region(CONFIG_XIP_PHYS_ADDR, __pa(_exiprom) - CONFIG_XIP_PHYS_ADDR, + mem_max_regions, xip); + + mem_max_regions -= num; +#endif + for_each_memblock(memory, reg) { if (first) { + phys_addr_t phys_offset = PHYS_OFFSET; + /* * Initially only use memory continuous from * PHYS_OFFSET */ @@ -355,7 +370,7 @@ static int __init __mpu_min_region_order(void) static int __init mpu_setup_region(unsigned int number, phys_addr_t start, unsigned int size_order, unsigned int properties, - unsigned int subregions) + unsigned int subregions, bool need_flush) { u32 size_data; @@ -374,6 +389,9 @@ static int __init mpu_setup_region(unsigned int number, phys_addr_t start, size_data = ((size_order - 1) << MPU_RSR_SZ) | 1 << MPU_RSR_EN; size_data |= subregions << MPU_RSR_SD; + if (need_flush) + flush_cache_all(); + dsb(); /* Ensure all previous data accesses occur with old mappings */ rgnr_write(number); isb(); @@ -416,7 +434,28 @@ void __init mpu_setup(void) /* Background */ err |= mpu_setup_region(region++, 0, 32, MPU_ACR_XN | MPU_RGN_STRONGLY_ORDERED | MPU_AP_PL1RW_PL0NA, - 0); + 0, false); + +#ifdef CONFIG_XIP_KERNEL + /* ROM */ + for (i = 0; i < ARRAY_SIZE(xip); i++) { + /* + * In case we overwrite RAM region we set earlier in + * head-nommu.S (which is cachable) all subsequent + * data access till we setup RAM bellow would be done + * with BG region (which is uncachable), thus we need + * to clean and invalidate cache. + */ + bool need_flush = region == MPU_RAM_REGION; + + if (!xip[i].size) + continue; + + err |= mpu_setup_region(region++, xip[i].base, ilog2(xip[i].size), + MPU_AP_PL1RO_PL0NA | MPU_RGN_NORMAL, + xip[i].subreg, need_flush); + } +#endif /* RAM */ for (i = 0; i < ARRAY_SIZE(mem); i++) { @@ -425,14 +464,14 @@ void __init mpu_setup(void) err |= mpu_setup_region(region++, mem[i].base, ilog2(mem[i].size), MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL, - mem[i].subreg); + mem[i].subreg, false); } /* Vectors */ #ifndef CONFIG_CPU_V7M err |= mpu_setup_region(region++, vectors_base, ilog2(2 * PAGE_SIZE), MPU_AP_PL1RW_PL0NA | MPU_RGN_NORMAL, - 0); + 0, false); #endif if (err) { panic("MPU region initialization failure! %d", err); -- cgit v1.2.3 From 2a14b80cb03bee9a4973e5a91e0ed48df25b57ce Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 2 Nov 2017 21:58:41 +0100 Subject: ARM: 8717/2: debug printch/printascii: translate '\n' to "\r\n" not "\n\r" Some terminals apparently have issues with "\n\r" and mess up the display. Let's use the traditional "\r\n" ordering. Signed-off-by: Nicolas Pitre Reported-by: Chris Brandt Tested-by: Chris Brandt Signed-off-by: Russell King --- arch/arm/kernel/debug.S | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/arch/arm/kernel/debug.S b/arch/arm/kernel/debug.S index 423f4432586d..611ded664d61 100644 --- a/arch/arm/kernel/debug.S +++ b/arch/arm/kernel/debug.S @@ -85,25 +85,28 @@ hexbuf_rel: .long hexbuf_addr - . ENTRY(printascii) addruart_current r3, r1, r2 - b 2f -1: waituart r2, r3 - senduart r1, r3 - busyuart r2, r3 - teq r1, #'\n' - moveq r1, #'\r' - beq 1b -2: teq r0, #0 +1: teq r0, #0 ldrneb r1, [r0], #1 teqne r1, #0 - bne 1b - ret lr + reteq lr +2: teq r1, #'\n' + bne 3f + mov r1, #'\r' + waituart r2, r3 + senduart r1, r3 + busyuart r2, r3 + mov r1, #'\n' +3: waituart r2, r3 + senduart r1, r3 + busyuart r2, r3 + b 1b ENDPROC(printascii) ENTRY(printch) addruart_current r3, r1, r2 mov r1, r0 mov r0, #0 - b 1b + b 2b ENDPROC(printch) #ifdef CONFIG_MMU -- cgit v1.2.3 From fe9c0589eeef4b3edbaad9f7500679a2eeafe951 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 2 Nov 2017 13:20:31 +0100 Subject: ARM: 8719/1: NOMMU: work around maybe-uninitialized warning The reworked MPU code produces a new warning in some configurations, presumably starting with the code move after the compiler now makes different inlining decisions: arch/arm/mm/pmsa-v7.c: In function 'adjust_lowmem_bounds_mpu': arch/arm/mm/pmsa-v7.c:310:5: error: 'specified_mem_size' may be used uninitialized in this function [-Werror=maybe-uninitialized] This appears to be harmless, as we know that there is always at least one memblock, and the only way this could get triggered is if the for_each_memblock() loop was never entered. I could not come up with a better workaround than initializing the specified_mem_size to zero, but at least that is the value that the variable would have in the hypothetical case of no memblocks. Fixes: 877ec119dbbf ("ARM: 8706/1: NOMMU: Move out MPU setup in separate module") Signed-off-by: Arnd Bergmann Reviewed-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/mm/pmsa-v7.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/pmsa-v7.c b/arch/arm/mm/pmsa-v7.c index 106ae1c435a3..976df60ac426 100644 --- a/arch/arm/mm/pmsa-v7.c +++ b/arch/arm/mm/pmsa-v7.c @@ -234,7 +234,7 @@ static int __init allocate_region(phys_addr_t base, phys_addr_t size, /* MPU initialisation functions */ void __init adjust_lowmem_bounds_mpu(void) { - phys_addr_t specified_mem_size, total_mem_size = 0; + phys_addr_t specified_mem_size = 0, total_mem_size = 0; struct memblock_region *reg; bool first = true; phys_addr_t mem_start; -- cgit v1.2.3