diff options
Diffstat (limited to 'arch/arm/kernel')
40 files changed, 2195 insertions, 963 deletions
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 902397dd1000..e69f7a19735d 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -16,7 +16,7 @@ CFLAGS_REMOVE_return_address.o = -pg # Object file lists. obj-y := elf.o entry-common.o irq.o opcodes.o \ - process.o ptrace.o return_address.o \ + process.o ptrace.o reboot.o return_address.o \ setup.o signal.o sigreturn_codes.o \ stacktrace.o sys_arm.o time.o traps.o @@ -34,7 +34,7 @@ obj-$(CONFIG_CPU_IDLE) += cpuidle.o obj-$(CONFIG_ISA_DMA_API) += dma.o obj-$(CONFIG_FIQ) += fiq.o fiqasm.o obj-$(CONFIG_MODULES) += armksyms.o module.o -obj-$(CONFIG_ARTHUR) += arthur.o +obj-$(CONFIG_ARM_MODULE_PLTS) += module-plts.o obj-$(CONFIG_ISA_DMA) += dma-isa.o obj-$(CONFIG_PCI) += bios32.o isa.o obj-$(CONFIG_ARM_CPU_SUSPEND) += sleep.o suspend.o @@ -71,10 +71,13 @@ obj-$(CONFIG_CPU_PJ4) += pj4-cp0.o obj-$(CONFIG_CPU_PJ4B) += pj4-cp0.o obj-$(CONFIG_IWMMXT) += iwmmxt.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o -obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o perf_event_cpu.o +obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o \ + perf_event_xscale.o perf_event_v6.o \ + perf_event_v7.o CFLAGS_pj4-cp0.o := -marm AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o +obj-$(CONFIG_VDSO) += vdso.o ifneq ($(CONFIG_ARCH_EBSA110),y) obj-y += io.o @@ -86,7 +89,7 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_ARM_VIRT_EXT) += hyp-stub.o ifeq ($(CONFIG_ARM_PSCI),y) -obj-y += psci.o +obj-y += psci.o psci-call.o obj-$(CONFIG_SMP) += psci_smp.o endif diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index a88671cfe1ff..5e5a51a99e68 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -50,6 +50,9 @@ extern void __aeabi_ulcmp(void); extern void fpundefinstr(void); +void mmioset(void *, unsigned int, size_t); +void mmiocpy(void *, const void *, size_t); + /* platform dependent support */ EXPORT_SYMBOL(arm_delay_ops); @@ -88,6 +91,9 @@ EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(memchr); EXPORT_SYMBOL(__memzero); +EXPORT_SYMBOL(mmioset); +EXPORT_SYMBOL(mmiocpy); + #ifdef CONFIG_MMU EXPORT_SYMBOL(copy_page); diff --git a/arch/arm/kernel/arthur.c b/arch/arm/kernel/arthur.c deleted file mode 100644 index 321c5291d05f..000000000000 --- a/arch/arm/kernel/arthur.c +++ /dev/null @@ -1,94 +0,0 @@ -/* - * linux/arch/arm/kernel/arthur.c - * - * Copyright (C) 1998, 1999, 2000, 2001 Philip Blundell - * - * Arthur personality - */ - -/* - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/module.h> -#include <linux/personality.h> -#include <linux/stddef.h> -#include <linux/signal.h> -#include <linux/init.h> -#include <linux/sched.h> - -#include <asm/ptrace.h> - -/* Arthur doesn't have many signals, and a lot of those that it does - have don't map easily to any Linux equivalent. Never mind. */ - -#define ARTHUR_SIGABRT 1 -#define ARTHUR_SIGFPE 2 -#define ARTHUR_SIGILL 3 -#define ARTHUR_SIGINT 4 -#define ARTHUR_SIGSEGV 5 -#define ARTHUR_SIGTERM 6 -#define ARTHUR_SIGSTAK 7 -#define ARTHUR_SIGUSR1 8 -#define ARTHUR_SIGUSR2 9 -#define ARTHUR_SIGOSERROR 10 - -static unsigned long arthur_to_linux_signals[32] = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31 -}; - -static unsigned long linux_to_arthur_signals[32] = { - 0, -1, ARTHUR_SIGINT, -1, - ARTHUR_SIGILL, 5, ARTHUR_SIGABRT, 7, - ARTHUR_SIGFPE, 9, ARTHUR_SIGUSR1, ARTHUR_SIGSEGV, - ARTHUR_SIGUSR2, 13, 14, ARTHUR_SIGTERM, - 16, 17, 18, 19, - 20, 21, 22, 23, - 24, 25, 26, 27, - 28, 29, 30, 31 -}; - -static void arthur_lcall7(int nr, struct pt_regs *regs) -{ - struct siginfo info; - info.si_signo = SIGSWI; - info.si_errno = nr; - /* Bounce it to the emulator */ - send_sig_info(SIGSWI, &info, current); -} - -static struct exec_domain arthur_exec_domain = { - .name = "Arthur", - .handler = arthur_lcall7, - .pers_low = PER_RISCOS, - .pers_high = PER_RISCOS, - .signal_map = arthur_to_linux_signals, - .signal_invmap = linux_to_arthur_signals, - .module = THIS_MODULE, -}; - -/* - * We could do with some locking to stop Arthur being removed while - * processes are using it. - */ - -static int __init arthur_init(void) -{ - return register_exec_domain(&arthur_exec_domain); -} - -static void __exit arthur_exit(void) -{ - unregister_exec_domain(&arthur_exec_domain); -} - -module_init(arthur_init); -module_exit(arthur_exit); - -MODULE_LICENSE("GPL"); diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 2d2d6087b9b1..871b8267d211 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -25,6 +25,7 @@ #include <asm/memory.h> #include <asm/procinfo.h> #include <asm/suspend.h> +#include <asm/vdso_datapage.h> #include <asm/hardware/cache-l2x0.h> #include <linux/kbuild.h> @@ -66,7 +67,6 @@ int main(void) DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); DEFINE(TI_TASK, offsetof(struct thread_info, task)); - DEFINE(TI_EXEC_DOMAIN, offsetof(struct thread_info, exec_domain)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); DEFINE(TI_CPU_DOMAIN, offsetof(struct thread_info, cpu_domain)); DEFINE(TI_CPU_SAVE, offsetof(struct thread_info, cpu_context)); @@ -190,7 +190,6 @@ int main(void) DEFINE(VCPU_HxFAR, offsetof(struct kvm_vcpu, arch.fault.hxfar)); DEFINE(VCPU_HPFAR, offsetof(struct kvm_vcpu, arch.fault.hpfar)); DEFINE(VCPU_HYP_PC, offsetof(struct kvm_vcpu, arch.fault.hyp_pc)); -#ifdef CONFIG_KVM_ARM_VGIC DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); @@ -200,15 +199,16 @@ int main(void) DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); -#ifdef CONFIG_KVM_ARM_TIMER DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff)); DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); -#endif DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); -#endif DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); #endif + BLANK(); +#ifdef CONFIG_VDSO + DEFINE(VDSO_DATA_SIZE, sizeof(union vdso_data_store)); +#endif return 0; } diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c index ab19b7c03423..fcbbbb1b9e95 100644 --- a/arch/arm/kernel/bios32.c +++ b/arch/arm/kernel/bios32.c @@ -618,21 +618,15 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine) { - struct pci_sys_data *root = dev->sysdata; - unsigned long phys; - - if (mmap_state == pci_mmap_io) { + if (mmap_state == pci_mmap_io) return -EINVAL; - } else { - phys = vma->vm_pgoff + (root->mem_offset >> PAGE_SHIFT); - } /* * Mark this as IO */ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - if (remap_pfn_range(vma, vma->vm_start, phys, + if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, vma->vm_end - vma->vm_start, vma->vm_page_prot)) return -EAGAIN; diff --git a/arch/arm/kernel/cpuidle.c b/arch/arm/kernel/cpuidle.c index 89545f6c8403..318da33465f4 100644 --- a/arch/arm/kernel/cpuidle.c +++ b/arch/arm/kernel/cpuidle.c @@ -10,8 +10,28 @@ */ #include <linux/cpuidle.h> -#include <asm/proc-fns.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <asm/cpuidle.h> +extern struct of_cpuidle_method __cpuidle_method_of_table[]; + +static const struct of_cpuidle_method __cpuidle_method_of_table_sentinel + __used __section(__cpuidle_method_of_table_end); + +static struct cpuidle_ops cpuidle_ops[NR_CPUS]; + +/** + * arm_cpuidle_simple_enter() - a wrapper to cpu_do_idle() + * @dev: not used + * @drv: not used + * @index: not used + * + * A trivial wrapper to allow the cpu_do_idle function to be assigned as a + * cpuidle callback by matching the function signature. + * + * Returns the index passed as parameter + */ int arm_cpuidle_simple_enter(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { @@ -19,3 +39,114 @@ int arm_cpuidle_simple_enter(struct cpuidle_device *dev, return index; } + +/** + * arm_cpuidle_suspend() - function to enter low power idle states + * @index: an integer used as an identifier for the low level PM callbacks + * + * This function calls the underlying arch specific low level PM code as + * registered at the init time. + * + * Returns -EOPNOTSUPP if no suspend callback is defined, the result of the + * callback otherwise. + */ +int arm_cpuidle_suspend(int index) +{ + int ret = -EOPNOTSUPP; + int cpu = smp_processor_id(); + + if (cpuidle_ops[cpu].suspend) + ret = cpuidle_ops[cpu].suspend(cpu, index); + + return ret; +} + +/** + * arm_cpuidle_get_ops() - find a registered cpuidle_ops by name + * @method: the method name + * + * Search in the __cpuidle_method_of_table array the cpuidle ops matching the + * method name. + * + * Returns a struct cpuidle_ops pointer, NULL if not found. + */ +static struct cpuidle_ops *__init arm_cpuidle_get_ops(const char *method) +{ + struct of_cpuidle_method *m = __cpuidle_method_of_table; + + for (; m->method; m++) + if (!strcmp(m->method, method)) + return m->ops; + + return NULL; +} + +/** + * arm_cpuidle_read_ops() - Initialize the cpuidle ops with the device tree + * @dn: a pointer to a struct device node corresponding to a cpu node + * @cpu: the cpu identifier + * + * Get the method name defined in the 'enable-method' property, retrieve the + * associated cpuidle_ops and do a struct copy. This copy is needed because all + * cpuidle_ops are tagged __initdata and will be unloaded after the init + * process. + * + * Return 0 on sucess, -ENOENT if no 'enable-method' is defined, -EOPNOTSUPP if + * no cpuidle_ops is registered for the 'enable-method'. + */ +static int __init arm_cpuidle_read_ops(struct device_node *dn, int cpu) +{ + const char *enable_method; + struct cpuidle_ops *ops; + + enable_method = of_get_property(dn, "enable-method", NULL); + if (!enable_method) + return -ENOENT; + + ops = arm_cpuidle_get_ops(enable_method); + if (!ops) { + pr_warn("%s: unsupported enable-method property: %s\n", + dn->full_name, enable_method); + return -EOPNOTSUPP; + } + + cpuidle_ops[cpu] = *ops; /* structure copy */ + + pr_notice("cpuidle: enable-method property '%s'" + " found operations\n", enable_method); + + return 0; +} + +/** + * arm_cpuidle_init() - Initialize cpuidle_ops for a specific cpu + * @cpu: the cpu to be initialized + * + * Initialize the cpuidle ops with the device for the cpu and then call + * the cpu's idle initialization callback. This may fail if the underlying HW + * is not operational. + * + * Returns: + * 0 on success, + * -ENODEV if it fails to find the cpu node in the device tree, + * -EOPNOTSUPP if it does not find a registered cpuidle_ops for this cpu, + * -ENOENT if it fails to find an 'enable-method' property, + * -ENXIO if the HW reports a failure or a misconfiguration, + * -ENOMEM if the HW report an memory allocation failure + */ +int __init arm_cpuidle_init(int cpu) +{ + struct device_node *cpu_node = of_cpu_device_node_get(cpu); + int ret; + + if (!cpu_node) + return -ENODEV; + + ret = arm_cpuidle_read_ops(cpu_node, cpu); + if (!ret && cpuidle_ops[cpu].init) + ret = cpuidle_ops[cpu].init(cpu_node, cpu); + + of_node_put(cpu_node); + + return ret; +} diff --git a/arch/arm/kernel/debug.S b/arch/arm/kernel/debug.S index 78c91b5f97d4..ea9646cc2a0e 100644 --- a/arch/arm/kernel/debug.S +++ b/arch/arm/kernel/debug.S @@ -35,7 +35,7 @@ #else /* !CONFIG_MMU */ .macro addruart_current, rx, tmp1, tmp2 - addruart \rx, \tmp1 + addruart \rx, \tmp1, \tmp2 .endm #endif /* CONFIG_MMU */ diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 672b21942fff..cb4fb1e69778 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -15,6 +15,8 @@ * that causes it to save wrong values... Be aware! */ +#include <linux/init.h> + #include <asm/assembler.h> #include <asm/memory.h> #include <asm/glue-df.h> @@ -40,7 +42,7 @@ #ifdef CONFIG_MULTI_IRQ_HANDLER ldr r1, =handle_arch_irq mov r0, sp - adr lr, BSYM(9997f) + badr lr, 9997f ldr pc, [r1] #else arch_irq_handler_default @@ -273,7 +275,7 @@ __und_svc: str r4, [sp, #S_PC] orr r0, r9, r0, lsl #16 #endif - adr r9, BSYM(__und_svc_finish) + badr r9, __und_svc_finish mov r2, r4 bl call_fpe @@ -408,7 +410,7 @@ ENDPROC(__fiq_abt) zero_fp .if \trace -#ifdef CONFIG_IRQSOFF_TRACER +#ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off #endif ct_user_exit save = 0 @@ -469,7 +471,7 @@ __und_usr: @ instruction, or the more conventional lr if we are to treat @ this as a real undefined instruction @ - adr r9, BSYM(ret_from_exception) + badr r9, ret_from_exception @ IRQs must be enabled before attempting to read the instruction from @ user space since that could cause a page/translation fault if the @@ -486,7 +488,7 @@ __und_usr: @ r2 = PC value for the following instruction (:= regs->ARM_pc) @ r4 = PC value for the faulting instruction @ lr = 32-bit undefined instruction function - adr lr, BSYM(__und_usr_fault_32) + badr lr, __und_usr_fault_32 b call_fpe __und_usr_thumb: @@ -522,7 +524,7 @@ ARM_BE8(rev16 r0, r0) @ little endian instruction add r2, r2, #2 @ r2 is PC + 2, make it PC + 4 str r2, [sp, #S_PC] @ it's a 2x16bit instr, update orr r0, r0, r5, lsl #16 - adr lr, BSYM(__und_usr_fault_32) + badr lr, __und_usr_fault_32 @ r0 = the two 16-bit Thumb instructions which caused the exception @ r2 = PC value for the following Thumb instruction (:= regs->ARM_pc) @ r4 = PC value for the first 16-bit Thumb instruction @@ -545,7 +547,7 @@ ENDPROC(__und_usr) /* * The out of line fixup for the ldrt instructions above. */ - .pushsection .fixup, "ax" + .pushsection .text.fixup, "ax" .align 2 4: str r4, [sp, #S_PC] @ retry current instruction ret r9 @@ -716,7 +718,7 @@ __und_usr_fault_32: __und_usr_fault_16: mov r1, #2 1: mov r0, sp - adr lr, BSYM(ret_from_exception) + badr lr, ret_from_exception b __und_fault ENDPROC(__und_usr_fault_32) ENDPROC(__und_usr_fault_16) diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index f8ccc21fa032..92828a1dec80 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -33,7 +33,9 @@ ret_fast_syscall: UNWIND(.fnstart ) UNWIND(.cantunwind ) disable_irq @ disable interrupts - ldr r1, [tsk, #TI_FLAGS] + ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing + tst r1, #_TIF_SYSCALL_WORK + bne __sys_trace_return tst r1, #_TIF_WORK_MASK bne fast_work_pending asm_trace_hardirqs_on @@ -88,7 +90,7 @@ ENTRY(ret_from_fork) bl schedule_tail cmp r5, #0 movne r0, r4 - adrne lr, BSYM(1f) + badrne lr, 1f retne r5 1: get_thread_info tsk b ret_slow_syscall @@ -196,7 +198,7 @@ local_restart: bne __sys_trace cmp scno, #NR_syscalls @ check upper syscall limit - adr lr, BSYM(ret_fast_syscall) @ return address + badr lr, ret_fast_syscall @ return address ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine add r1, sp, #S_OFF @@ -231,7 +233,7 @@ __sys_trace: add r0, sp, #S_OFF bl syscall_trace_enter - adr lr, BSYM(__sys_trace_return) @ return address + badr lr, __sys_trace_return @ return address mov scno, r0 @ syscall number (possibly new) add r1, sp, #S_R0 + S_OFF @ pointer to regs cmp scno, #NR_syscalls @ check upper syscall limit diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S index fe57c73e70a4..c73c4030ca5d 100644 --- a/arch/arm/kernel/entry-ftrace.S +++ b/arch/arm/kernel/entry-ftrace.S @@ -87,7 +87,7 @@ 1: mcount_get_lr r1 @ lr of instrumented func mcount_adjust_addr r0, lr @ instrumented function - adr lr, BSYM(2f) + badr lr, 2f mov pc, r2 2: mcount_exit .endm diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S index 8944f4991c3c..b6c8bb9315e7 100644 --- a/arch/arm/kernel/entry-v7m.S +++ b/arch/arm/kernel/entry-v7m.S @@ -117,9 +117,14 @@ ENTRY(__switch_to) ENDPROC(__switch_to) .data - .align 8 +#if CONFIG_CPU_V7M_NUM_IRQ <= 112 + .align 9 +#else + .align 10 +#endif + /* - * Vector table (64 words => 256 bytes natural alignment) + * Vector table (Natural alignment need to be ensured) */ ENTRY(vector_table) .long 0 @ 0 - Reset stack pointer @@ -138,6 +143,6 @@ ENTRY(vector_table) .long __invalid_entry @ 13 - Reserved .long __pendsv_entry @ 14 - PendSV .long __invalid_entry @ 15 - SysTick - .rept 64 - 16 - .long __irq_entry @ 16..64 - External Interrupts + .rept CONFIG_CPU_V7M_NUM_IRQ + .long __irq_entry @ External Interrupts .endr diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index cc176b67c134..9b8c5a113434 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -46,7 +46,7 @@ ENTRY(stext) .arm ENTRY(stext) - THUMB( adr r9, BSYM(1f) ) @ Kernel is always entered in ARM. + THUMB( badr r9, 1f ) @ Kernel is always entered in ARM. THUMB( bx r9 ) @ If this is a Thumb-2 kernel, THUMB( .thumb ) @ switch to Thumb now. THUMB(1: ) @@ -77,13 +77,13 @@ ENTRY(stext) orr r6, r6, #(1 << MPU_RSR_EN) @ Set region enabled bit bl __setup_mpu #endif - ldr r13, =__mmap_switched @ address to jump to after - @ initialising sctlr - adr lr, BSYM(1f) @ return (PIC) address - ARM( add pc, r10, #PROCINFO_INITFUNC ) - THUMB( add r12, r10, #PROCINFO_INITFUNC ) - THUMB( ret r12 ) - 1: b __after_proc_init + + badr lr, 1f @ return (PIC) address + ldr r12, [r10, #PROCINFO_INITFUNC] + add r12, r12, r10 + ret r12 +1: bl __after_proc_init + b __mmap_switched ENDPROC(stext) #ifdef CONFIG_SMP @@ -106,8 +106,7 @@ ENTRY(secondary_startup) movs r10, r5 @ invalid processor? beq __error_p @ yes, error 'p' - adr r4, __secondary_data - ldmia r4, {r7, r12} + ldr r7, __secondary_data #ifdef CONFIG_ARM_MPU /* Use MPU region info supplied by __cpu_up */ @@ -115,23 +114,19 @@ ENTRY(secondary_startup) bl __setup_mpu @ Initialize the MPU #endif - adr lr, BSYM(__after_proc_init) @ return address - mov r13, r12 @ __secondary_switched address - ARM( add pc, r10, #PROCINFO_INITFUNC ) - THUMB( add r12, r10, #PROCINFO_INITFUNC ) - THUMB( ret r12 ) -ENDPROC(secondary_startup) - -ENTRY(__secondary_switched) - ldr sp, [r7, #8] @ set up the stack pointer + badr lr, 1f @ return (PIC) address + ldr r12, [r10, #PROCINFO_INITFUNC] + add r12, r12, r10 + ret r12 +1: bl __after_proc_init + ldr sp, [r7, #12] @ set up the stack pointer mov fp, #0 b secondary_start_kernel -ENDPROC(__secondary_switched) +ENDPROC(secondary_startup) .type __secondary_data, %object __secondary_data: .long secondary_data - .long __secondary_switched #endif /* CONFIG_SMP */ /* @@ -164,7 +159,7 @@ __after_proc_init: #endif mcr p15, 0, r0, c1, c0, 0 @ write control reg #endif /* CONFIG_CPU_CP15 */ - ret r13 + ret lr ENDPROC(__after_proc_init) .ltorg diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 01963273c07a..bd755d97e459 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -80,7 +80,7 @@ ENTRY(stext) ARM_BE8(setend be ) @ ensure we are in BE8 mode - THUMB( adr r9, BSYM(1f) ) @ Kernel is always entered in ARM. + THUMB( badr r9, 1f ) @ Kernel is always entered in ARM. THUMB( bx r9 ) @ If this is a Thumb-2 kernel, THUMB( .thumb ) @ switch to Thumb now. THUMB(1: ) @@ -131,16 +131,33 @@ ENTRY(stext) * The following calls CPU specific code in a position independent * manner. See arch/arm/mm/proc-*.S for details. r10 = base of * xxx_proc_info structure selected by __lookup_processor_type - * above. On return, the CPU will be ready for the MMU to be - * turned on, and r0 will hold the CPU control register value. + * above. + * + * The processor init function will be called with: + * r1 - machine type + * r2 - boot data (atags/dt) pointer + * r4 - translation table base (low word) + * r5 - translation table base (high word, if LPAE) + * r8 - translation table base 1 (pfn if LPAE) + * r9 - cpuid + * r13 - virtual address for __enable_mmu -> __turn_mmu_on + * + * On return, the CPU will be ready for the MMU to be turned on, + * r0 will hold the CPU control register value, r1, r2, r4, and + * r9 will be preserved. r5 will also be preserved if LPAE. */ ldr r13, =__mmap_switched @ address to jump to after @ mmu has been enabled - adr lr, BSYM(1f) @ return (PIC) address + badr lr, 1f @ return (PIC) address +#ifdef CONFIG_ARM_LPAE + mov r5, #0 @ high TTBR0 + mov r8, r4, lsr #12 @ TTBR1 is swapper_pg_dir pfn +#else mov r8, r4 @ set TTBR1 to swapper_pg_dir - ARM( add pc, r10, #PROCINFO_INITFUNC ) - THUMB( add r12, r10, #PROCINFO_INITFUNC ) - THUMB( ret r12 ) +#endif + ldr r12, [r10, #PROCINFO_INITFUNC] + add r12, r12, r10 + ret r12 1: b __enable_mmu ENDPROC(stext) .ltorg @@ -158,7 +175,7 @@ ENDPROC(stext) * * Returns: * r0, r3, r5-r7 corrupted - * r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h) + * r4 = physical page table address */ __create_page_tables: pgtbl r4, r8 @ page table address @@ -333,7 +350,6 @@ __create_page_tables: #endif #ifdef CONFIG_ARM_LPAE sub r4, r4, #0x1000 @ point to the PGD table - mov r4, r4, lsr #ARCH_PGD_SHIFT #endif ret lr ENDPROC(__create_page_tables) @@ -346,9 +362,9 @@ __turn_mmu_on_loc: #if defined(CONFIG_SMP) .text -ENTRY(secondary_startup_arm) .arm - THUMB( adr r9, BSYM(1f) ) @ Kernel is entered in ARM. +ENTRY(secondary_startup_arm) + THUMB( badr r9, 1f ) @ Kernel is entered in ARM. THUMB( bx r9 ) @ If this is a Thumb-2 kernel, THUMB( .thumb ) @ switch to Thumb now. THUMB(1: ) @@ -381,15 +397,15 @@ ENTRY(secondary_startup) adr r4, __secondary_data ldmia r4, {r5, r7, r12} @ address to jump to after sub lr, r4, r5 @ mmu has been enabled - ldr r4, [r7, lr] @ get secondary_data.pgdir - add r7, r7, #4 - ldr r8, [r7, lr] @ get secondary_data.swapper_pg_dir - adr lr, BSYM(__enable_mmu) @ return address + add r3, r7, lr + ldrd r4, [r3, #0] @ get secondary_data.pgdir + ldr r8, [r3, #8] @ get secondary_data.swapper_pg_dir + badr lr, __enable_mmu @ return address mov r13, r12 @ __secondary_switched address - ARM( add pc, r10, #PROCINFO_INITFUNC ) @ initialise processor - @ (return control reg) - THUMB( add r12, r10, #PROCINFO_INITFUNC ) - THUMB( ret r12 ) + ldr r12, [r10, #PROCINFO_INITFUNC] + add r12, r12, r10 @ initialise processor + @ (return control reg) + ret r12 ENDPROC(secondary_startup) ENDPROC(secondary_startup_arm) @@ -397,7 +413,7 @@ ENDPROC(secondary_startup_arm) * r6 = &secondary_data */ ENTRY(__secondary_switched) - ldr sp, [r7, #4] @ get secondary_data.stack + ldr sp, [r7, #12] @ get secondary_data.stack mov fp, #0 b secondary_start_kernel ENDPROC(__secondary_switched) @@ -416,12 +432,14 @@ __secondary_data: /* * Setup common bits before finally enabling the MMU. Essentially * this is just loading the page table pointer and domain access - * registers. + * registers. All these registers need to be preserved by the + * processor setup function (or set in the case of r0) * * r0 = cp#15 control register * r1 = machine ID * r2 = atags or dtb pointer - * r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h) + * r4 = TTBR pointer (low word) + * r5 = TTBR pointer (high word if LPAE) * r9 = processor ID * r13 = *virtual* address to jump to upon completion */ @@ -440,7 +458,9 @@ __enable_mmu: #ifdef CONFIG_CPU_ICACHE_DISABLE bic r0, r0, #CR_I #endif -#ifndef CONFIG_ARM_LPAE +#ifdef CONFIG_ARM_LPAE + mcrr p15, 0, r4, r5, c2 @ load TTBR0 +#else mov r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \ domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \ diff --git a/arch/arm/kernel/hibernate.c b/arch/arm/kernel/hibernate.c index c4cc50e58c13..a71501ff6f18 100644 --- a/arch/arm/kernel/hibernate.c +++ b/arch/arm/kernel/hibernate.c @@ -22,6 +22,7 @@ #include <asm/suspend.h> #include <asm/memory.h> #include <asm/sections.h> +#include "reboot.h" int pfn_is_nosave(unsigned long pfn) { @@ -61,7 +62,7 @@ static int notrace arch_save_image(unsigned long unused) ret = swsusp_save(); if (ret == 0) - soft_restart(virt_to_phys(cpu_resume)); + _soft_restart(virt_to_phys(cpu_resume), false); return ret; } @@ -86,7 +87,7 @@ static void notrace arch_restore_image(void *unused) for (pbe = restore_pblist; pbe; pbe = pbe->next) copy_page(pbe->orig_address, pbe->address); - soft_restart(virt_to_phys(cpu_resume)); + _soft_restart(virt_to_phys(cpu_resume), false); } static u64 resume_stack[PAGE_SIZE/2/sizeof(u64)] __nosavedata; @@ -99,7 +100,6 @@ static u64 resume_stack[PAGE_SIZE/2/sizeof(u64)] __nosavedata; */ int swsusp_arch_resume(void) { - extern void call_with_stack(void (*fn)(void *), void *arg, void *sp); call_with_stack(arch_restore_image, 0, resume_stack + ARRAY_SIZE(resume_stack)); return 0; diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 7fc70ae21185..dc7d0a95bd36 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -648,7 +648,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) * Per-cpu breakpoints are not supported by our stepping * mechanism. */ - if (!bp->hw.bp_target) + if (!bp->hw.target) return -EINVAL; /* diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index de2b085ad753..8bf3b7c09888 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -46,7 +46,8 @@ int machine_kexec_prepare(struct kimage *image) * and implements CPU hotplug for the current HW. If not, we won't be * able to kexec reliably, so fail the prepare operation. */ - if (num_possible_cpus() > 1 && !platform_can_cpu_hotplug()) + if (num_possible_cpus() > 1 && platform_can_secondary_boot() && + !platform_can_cpu_hotplug()) return -EINVAL; /* diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c new file mode 100644 index 000000000000..097e2e201b9f --- /dev/null +++ b/arch/arm/kernel/module-plts.c @@ -0,0 +1,183 @@ +/* + * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/elf.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#include <asm/cache.h> +#include <asm/opcodes.h> + +#define PLT_ENT_STRIDE L1_CACHE_BYTES +#define PLT_ENT_COUNT (PLT_ENT_STRIDE / sizeof(u32)) +#define PLT_ENT_SIZE (sizeof(struct plt_entries) / PLT_ENT_COUNT) + +#ifdef CONFIG_THUMB2_KERNEL +#define PLT_ENT_LDR __opcode_to_mem_thumb32(0xf8dff000 | \ + (PLT_ENT_STRIDE - 4)) +#else +#define PLT_ENT_LDR __opcode_to_mem_arm(0xe59ff000 | \ + (PLT_ENT_STRIDE - 8)) +#endif + +struct plt_entries { + u32 ldr[PLT_ENT_COUNT]; + u32 lit[PLT_ENT_COUNT]; +}; + +static bool in_init(const struct module *mod, u32 addr) +{ + return addr - (u32)mod->module_init < mod->init_size; +} + +u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val) +{ + struct plt_entries *plt, *plt_end; + int c, *count; + + if (in_init(mod, loc)) { + plt = (void *)mod->arch.init_plt->sh_addr; + plt_end = (void *)plt + mod->arch.init_plt->sh_size; + count = &mod->arch.init_plt_count; + } else { + plt = (void *)mod->arch.core_plt->sh_addr; + plt_end = (void *)plt + mod->arch.core_plt->sh_size; + count = &mod->arch.core_plt_count; + } + + /* Look for an existing entry pointing to 'val' */ + for (c = *count; plt < plt_end; c -= PLT_ENT_COUNT, plt++) { + int i; + + if (!c) { + /* Populate a new set of entries */ + *plt = (struct plt_entries){ + { [0 ... PLT_ENT_COUNT - 1] = PLT_ENT_LDR, }, + { val, } + }; + ++*count; + return (u32)plt->ldr; + } + for (i = 0; i < PLT_ENT_COUNT; i++) { + if (!plt->lit[i]) { + plt->lit[i] = val; + ++*count; + } + if (plt->lit[i] == val) + return (u32)&plt->ldr[i]; + } + } + BUG(); +} + +static int duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num, + u32 mask) +{ + u32 *loc1, *loc2; + int i; + + for (i = 0; i < num; i++) { + if (rel[i].r_info != rel[num].r_info) + continue; + + /* + * Identical relocation types against identical symbols can + * still result in different PLT entries if the addend in the + * place is different. So resolve the target of the relocation + * to compare the values. + */ + loc1 = (u32 *)(base + rel[i].r_offset); + loc2 = (u32 *)(base + rel[num].r_offset); + if (((*loc1 ^ *loc2) & mask) == 0) + return 1; + } + return 0; +} + +/* Count how many PLT entries we may need */ +static unsigned int count_plts(Elf32_Addr base, const Elf32_Rel *rel, int num) +{ + unsigned int ret = 0; + int i; + + /* + * Sure, this is order(n^2), but it's usually short, and not + * time critical + */ + for (i = 0; i < num; i++) + switch (ELF32_R_TYPE(rel[i].r_info)) { + case R_ARM_CALL: + case R_ARM_PC24: + case R_ARM_JUMP24: + if (!duplicate_rel(base, rel, i, + __opcode_to_mem_arm(0x00ffffff))) + ret++; + break; +#ifdef CONFIG_THUMB2_KERNEL + case R_ARM_THM_CALL: + case R_ARM_THM_JUMP24: + if (!duplicate_rel(base, rel, i, + __opcode_to_mem_thumb32(0x07ff2fff))) + ret++; +#endif + } + return ret; +} + +int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, + char *secstrings, struct module *mod) +{ + unsigned long core_plts = 0, init_plts = 0; + Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum; + + /* + * To store the PLTs, we expand the .text section for core module code + * and the .init.text section for initialization code. + */ + for (s = sechdrs; s < sechdrs_end; ++s) + if (strcmp(".core.plt", secstrings + s->sh_name) == 0) + mod->arch.core_plt = s; + else if (strcmp(".init.plt", secstrings + s->sh_name) == 0) + mod->arch.init_plt = s; + + if (!mod->arch.core_plt || !mod->arch.init_plt) { + pr_err("%s: sections missing\n", mod->name); + return -ENOEXEC; + } + + for (s = sechdrs + 1; s < sechdrs_end; ++s) { + const Elf32_Rel *rels = (void *)ehdr + s->sh_offset; + int numrels = s->sh_size / sizeof(Elf32_Rel); + Elf32_Shdr *dstsec = sechdrs + s->sh_info; + + if (s->sh_type != SHT_REL) + continue; + + if (strstr(secstrings + s->sh_name, ".init")) + init_plts += count_plts(dstsec->sh_addr, rels, numrels); + else + core_plts += count_plts(dstsec->sh_addr, rels, numrels); + } + + mod->arch.core_plt->sh_type = SHT_NOBITS; + mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.core_plt->sh_addralign = L1_CACHE_BYTES; + mod->arch.core_plt->sh_size = round_up(core_plts * PLT_ENT_SIZE, + sizeof(struct plt_entries)); + mod->arch.core_plt_count = 0; + + mod->arch.init_plt->sh_type = SHT_NOBITS; + mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.init_plt->sh_addralign = L1_CACHE_BYTES; + mod->arch.init_plt->sh_size = round_up(init_plts * PLT_ENT_SIZE, + sizeof(struct plt_entries)); + mod->arch.init_plt_count = 0; + pr_debug("%s: core.plt=%x, init.plt=%x\n", __func__, + mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size); + return 0; +} diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index 2e11961f65ae..efdddcb97dd1 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -40,7 +40,12 @@ #ifdef CONFIG_MMU void *module_alloc(unsigned long size) { - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, + void *p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, + GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + __builtin_return_address(0)); + if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p) + return p; + return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); } @@ -98,14 +103,33 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, case R_ARM_PC24: case R_ARM_CALL: case R_ARM_JUMP24: + if (sym->st_value & 3) { + pr_err("%s: section %u reloc %u sym '%s': unsupported interworking call (ARM -> Thumb)\n", + module->name, relindex, i, symname); + return -ENOEXEC; + } + offset = __mem_to_opcode_arm(*(u32 *)loc); offset = (offset & 0x00ffffff) << 2; if (offset & 0x02000000) offset -= 0x04000000; offset += sym->st_value - loc; - if (offset & 3 || - offset <= (s32)0xfe000000 || + + /* + * Route through a PLT entry if 'offset' exceeds the + * supported range. Note that 'offset + loc + 8' + * contains the absolute jump target, i.e., + * @sym + addend, corrected for the +8 PC bias. + */ + if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS) && + (offset <= (s32)0xfe000000 || + offset >= (s32)0x02000000)) + offset = get_module_plt(module, loc, + offset + loc + 8) + - loc - 8; + + if (offset <= (s32)0xfe000000 || offset >= (s32)0x02000000) { pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n", module->name, relindex, i, symname, @@ -155,6 +179,22 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, #ifdef CONFIG_THUMB2_KERNEL case R_ARM_THM_CALL: case R_ARM_THM_JUMP24: + /* + * For function symbols, only Thumb addresses are + * allowed (no interworking). + * + * For non-function symbols, the destination + * has no specific ARM/Thumb disposition, so + * the branch is resolved under the assumption + * that interworking is not required. + */ + if (ELF32_ST_TYPE(sym->st_info) == STT_FUNC && + !(sym->st_value & 1)) { + pr_err("%s: section %u reloc %u sym '%s': unsupported interworking call (Thumb -> ARM)\n", + module->name, relindex, i, symname); + return -ENOEXEC; + } + upper = __mem_to_opcode_thumb16(*(u16 *)loc); lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2)); @@ -183,17 +223,17 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, offset += sym->st_value - loc; /* - * For function symbols, only Thumb addresses are - * allowed (no interworking). - * - * For non-function symbols, the destination - * has no specific ARM/Thumb disposition, so - * the branch is resolved under the assumption - * that interworking is not required. + * Route through a PLT entry if 'offset' exceeds the + * supported range. */ - if ((ELF32_ST_TYPE(sym->st_info) == STT_FUNC && - !(offset & 1)) || - offset <= (s32)0xff000000 || + if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS) && + (offset <= (s32)0xff000000 || + offset >= (s32)0x01000000)) + offset = get_module_plt(module, loc, + offset + loc + 4) + - loc - 4; + + if (offset <= (s32)0xff000000 || offset >= (s32)0x01000000) { pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n", module->name, relindex, i, symname, diff --git a/arch/arm/kernel/module.lds b/arch/arm/kernel/module.lds new file mode 100644 index 000000000000..3682fa107918 --- /dev/null +++ b/arch/arm/kernel/module.lds @@ -0,0 +1,4 @@ +SECTIONS { + .core.plt : { BYTE(0) } + .init.plt : { BYTE(0) } +} diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 557e128e4df0..54272e0be713 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -11,12 +11,18 @@ */ #define pr_fmt(fmt) "hw perfevents: " fmt +#include <linux/bitmap.h> +#include <linux/cpumask.h> +#include <linux/export.h> #include <linux/kernel.h> +#include <linux/of.h> #include <linux/platform_device.h> -#include <linux/pm_runtime.h> +#include <linux/slab.h> +#include <linux/spinlock.h> #include <linux/irq.h> #include <linux/irqdesc.h> +#include <asm/cputype.h> #include <asm/irq_regs.h> #include <asm/pmu.h> @@ -229,6 +235,10 @@ armpmu_add(struct perf_event *event, int flags) int idx; int err = 0; + /* An event following a process won't be stopped earlier */ + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) + return -ENOENT; + perf_pmu_disable(event->pmu); /* If we don't have a space for the counter then finish early. */ @@ -259,20 +269,29 @@ out: } static int -validate_event(struct pmu_hw_events *hw_events, - struct perf_event *event) +validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events, + struct perf_event *event) { - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct arm_pmu *armpmu; if (is_software_event(event)) return 1; + /* + * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The + * core perf code won't check that the pmu->ctx == leader->ctx + * until after pmu->event_init(event). + */ + if (event->pmu != pmu) + return 0; + if (event->state < PERF_EVENT_STATE_OFF) return 1; if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) return 1; + armpmu = to_arm_pmu(event->pmu); return armpmu->get_event_idx(hw_events, event) >= 0; } @@ -288,15 +307,15 @@ validate_group(struct perf_event *event) */ memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask)); - if (!validate_event(&fake_pmu, leader)) + if (!validate_event(event->pmu, &fake_pmu, leader)) return -EINVAL; list_for_each_entry(sibling, &leader->sibling_list, group_entry) { - if (!validate_event(&fake_pmu, sibling)) + if (!validate_event(event->pmu, &fake_pmu, sibling)) return -EINVAL; } - if (!validate_event(&fake_pmu, event)) + if (!validate_event(event->pmu, &fake_pmu, event)) return -EINVAL; return 0; @@ -335,20 +354,12 @@ static void armpmu_release_hardware(struct arm_pmu *armpmu) { armpmu->free_irq(armpmu); - pm_runtime_put_sync(&armpmu->plat_device->dev); } static int armpmu_reserve_hardware(struct arm_pmu *armpmu) { - int err; - struct platform_device *pmu_device = armpmu->plat_device; - - if (!pmu_device) - return -ENODEV; - - pm_runtime_get_sync(&pmu_device->dev); - err = armpmu->request_irq(armpmu, armpmu_dispatch_irq); + int err = armpmu->request_irq(armpmu, armpmu_dispatch_irq); if (err) { armpmu_release_hardware(armpmu); return err; @@ -445,6 +456,17 @@ static int armpmu_event_init(struct perf_event *event) int err = 0; atomic_t *active_events = &armpmu->active_events; + /* + * Reject CPU-affine events for CPUs that are of a different class to + * that which this PMU handles. Process-following events (where + * event->cpu == -1) can be migrated between CPUs, and thus we have to + * reject them later (in armpmu_add) if they're scheduled on a + * different class of CPU. + */ + if (event->cpu != -1 && + !cpumask_test_cpu(event->cpu, &armpmu->supported_cpus)) + return -ENOENT; + /* does not support taken branch sampling */ if (has_branch_stack(event)) return -EOPNOTSUPP; @@ -480,6 +502,10 @@ static void armpmu_enable(struct pmu *pmu) struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); + /* For task-bound events we may be called on other CPUs */ + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) + return; + if (enabled) armpmu->start(armpmu); } @@ -487,34 +513,25 @@ static void armpmu_enable(struct pmu *pmu) static void armpmu_disable(struct pmu *pmu) { struct arm_pmu *armpmu = to_arm_pmu(pmu); - armpmu->stop(armpmu); -} - -#ifdef CONFIG_PM -static int armpmu_runtime_resume(struct device *dev) -{ - struct arm_pmu_platdata *plat = dev_get_platdata(dev); - if (plat && plat->runtime_resume) - return plat->runtime_resume(dev); + /* For task-bound events we may be called on other CPUs */ + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) + return; - return 0; + armpmu->stop(armpmu); } -static int armpmu_runtime_suspend(struct device *dev) +/* + * In heterogeneous systems, events are specific to a particular + * microarchitecture, and aren't suitable for another. Thus, only match CPUs of + * the same microarchitecture. + */ +static int armpmu_filter_match(struct perf_event *event) { - struct arm_pmu_platdata *plat = dev_get_platdata(dev); - - if (plat && plat->runtime_suspend) - return plat->runtime_suspend(dev); - - return 0; + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + unsigned int cpu = smp_processor_id(); + return cpumask_test_cpu(cpu, &armpmu->supported_cpus); } -#endif - -const struct dev_pm_ops armpmu_dev_pm_ops = { - SET_RUNTIME_PM_OPS(armpmu_runtime_suspend, armpmu_runtime_resume, NULL) -}; static void armpmu_init(struct arm_pmu *armpmu) { @@ -530,15 +547,350 @@ static void armpmu_init(struct arm_pmu *armpmu) .start = armpmu_start, .stop = armpmu_stop, .read = armpmu_read, + .filter_match = armpmu_filter_match, }; } int armpmu_register(struct arm_pmu *armpmu, int type) { armpmu_init(armpmu); - pm_runtime_enable(&armpmu->plat_device->dev); pr_info("enabled with %s PMU driver, %d counters available\n", armpmu->name, armpmu->num_events); return perf_pmu_register(&armpmu->pmu, armpmu->name, type); } +/* Set at runtime when we know what CPU type we are. */ +static struct arm_pmu *__oprofile_cpu_pmu; + +/* + * Despite the names, these two functions are CPU-specific and are used + * by the OProfile/perf code. + */ +const char *perf_pmu_name(void) +{ + if (!__oprofile_cpu_pmu) + return NULL; + + return __oprofile_cpu_pmu->name; +} +EXPORT_SYMBOL_GPL(perf_pmu_name); + +int perf_num_counters(void) +{ + int max_events = 0; + + if (__oprofile_cpu_pmu != NULL) + max_events = __oprofile_cpu_pmu->num_events; + + return max_events; +} +EXPORT_SYMBOL_GPL(perf_num_counters); + +static void cpu_pmu_enable_percpu_irq(void *data) +{ + int irq = *(int *)data; + + enable_percpu_irq(irq, IRQ_TYPE_NONE); +} + +static void cpu_pmu_disable_percpu_irq(void *data) +{ + int irq = *(int *)data; + + disable_percpu_irq(irq); +} + +static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) +{ + int i, irq, irqs; + struct platform_device *pmu_device = cpu_pmu->plat_device; + struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; + + irqs = min(pmu_device->num_resources, num_possible_cpus()); + + irq = platform_get_irq(pmu_device, 0); + if (irq >= 0 && irq_is_percpu(irq)) { + on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1); + free_percpu_irq(irq, &hw_events->percpu_pmu); + } else { + for (i = 0; i < irqs; ++i) { + int cpu = i; + + if (cpu_pmu->irq_affinity) + cpu = cpu_pmu->irq_affinity[i]; + + if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs)) + continue; + irq = platform_get_irq(pmu_device, i); + if (irq >= 0) + free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu)); + } + } +} + +static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) +{ + int i, err, irq, irqs; + struct platform_device *pmu_device = cpu_pmu->plat_device; + struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; + + if (!pmu_device) + return -ENODEV; + + irqs = min(pmu_device->num_resources, num_possible_cpus()); + if (irqs < 1) { + pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n"); + return 0; + } + + irq = platform_get_irq(pmu_device, 0); + if (irq >= 0 && irq_is_percpu(irq)) { + err = request_percpu_irq(irq, handler, "arm-pmu", + &hw_events->percpu_pmu); + if (err) { + pr_err("unable to request IRQ%d for ARM PMU counters\n", + irq); + return err; + } + on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1); + } else { + for (i = 0; i < irqs; ++i) { + int cpu = i; + + err = 0; + irq = platform_get_irq(pmu_device, i); + if (irq < 0) + continue; + + if (cpu_pmu->irq_affinity) + cpu = cpu_pmu->irq_affinity[i]; + + /* + * If we have a single PMU interrupt that we can't shift, + * assume that we're running on a uniprocessor machine and + * continue. Otherwise, continue without this interrupt. + */ + if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) { + pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n", + irq, cpu); + continue; + } + + err = request_irq(irq, handler, + IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", + per_cpu_ptr(&hw_events->percpu_pmu, cpu)); + if (err) { + pr_err("unable to request IRQ%d for ARM PMU counters\n", + irq); + return err; + } + + cpumask_set_cpu(cpu, &cpu_pmu->active_irqs); + } + } + + return 0; +} + +/* + * PMU hardware loses all context when a CPU goes offline. + * When a CPU is hotplugged back in, since some hardware registers are + * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading + * junk values out of them. + */ +static int cpu_pmu_notify(struct notifier_block *b, unsigned long action, + void *hcpu) +{ + int cpu = (unsigned long)hcpu; + struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb); + + if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) + return NOTIFY_DONE; + + if (!cpumask_test_cpu(cpu, &pmu->supported_cpus)) + return NOTIFY_DONE; + + if (pmu->reset) + pmu->reset(pmu); + else + return NOTIFY_DONE; + + return NOTIFY_OK; +} + +static int cpu_pmu_init(struct arm_pmu *cpu_pmu) +{ + int err; + int cpu; + struct pmu_hw_events __percpu *cpu_hw_events; + + cpu_hw_events = alloc_percpu(struct pmu_hw_events); + if (!cpu_hw_events) + return -ENOMEM; + + cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify; + err = register_cpu_notifier(&cpu_pmu->hotplug_nb); + if (err) + goto out_hw_events; + + for_each_possible_cpu(cpu) { + struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu); + raw_spin_lock_init(&events->pmu_lock); + events->percpu_pmu = cpu_pmu; + } + + cpu_pmu->hw_events = cpu_hw_events; + cpu_pmu->request_irq = cpu_pmu_request_irq; + cpu_pmu->free_irq = cpu_pmu_free_irq; + + /* Ensure the PMU has sane values out of reset. */ + if (cpu_pmu->reset) + on_each_cpu_mask(&cpu_pmu->supported_cpus, cpu_pmu->reset, + cpu_pmu, 1); + + /* If no interrupts available, set the corresponding capability flag */ + if (!platform_get_irq(cpu_pmu->plat_device, 0)) + cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + + return 0; + +out_hw_events: + free_percpu(cpu_hw_events); + return err; +} + +static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu) +{ + unregister_cpu_notifier(&cpu_pmu->hotplug_nb); + free_percpu(cpu_pmu->hw_events); +} + +/* + * CPU PMU identification and probing. + */ +static int probe_current_pmu(struct arm_pmu *pmu, + const struct pmu_probe_info *info) +{ + int cpu = get_cpu(); + unsigned int cpuid = read_cpuid_id(); + int ret = -ENODEV; + + pr_info("probing PMU on CPU %d\n", cpu); + + for (; info->init != NULL; info++) { + if ((cpuid & info->mask) != info->cpuid) + continue; + ret = info->init(pmu); + break; + } + + put_cpu(); + return ret; +} + +static int of_pmu_irq_cfg(struct arm_pmu *pmu) +{ + int i, irq, *irqs; + struct platform_device *pdev = pmu->plat_device; + + /* Don't bother with PPIs; they're already affine */ + irq = platform_get_irq(pdev, 0); + if (irq >= 0 && irq_is_percpu(irq)) + return 0; + + irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); + if (!irqs) + return -ENOMEM; + + for (i = 0; i < pdev->num_resources; ++i) { + struct device_node *dn; + int cpu; + + dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity", + i); + if (!dn) { + pr_warn("Failed to parse %s/interrupt-affinity[%d]\n", + of_node_full_name(pdev->dev.of_node), i); + break; + } + + for_each_possible_cpu(cpu) + if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL)) + break; + + if (cpu >= nr_cpu_ids) { + pr_warn("Failed to find logical CPU for %s\n", + dn->name); + of_node_put(dn); + break; + } + of_node_put(dn); + + irqs[i] = cpu; + cpumask_set_cpu(cpu, &pmu->supported_cpus); + } + + if (i == pdev->num_resources) { + pmu->irq_affinity = irqs; + } else { + kfree(irqs); + cpumask_setall(&pmu->supported_cpus); + } + + return 0; +} + +int arm_pmu_device_probe(struct platform_device *pdev, + const struct of_device_id *of_table, + const struct pmu_probe_info *probe_table) +{ + const struct of_device_id *of_id; + const int (*init_fn)(struct arm_pmu *); + struct device_node *node = pdev->dev.of_node; + struct arm_pmu *pmu; + int ret = -ENODEV; + + pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL); + if (!pmu) { + pr_info("failed to allocate PMU device!\n"); + return -ENOMEM; + } + + if (!__oprofile_cpu_pmu) + __oprofile_cpu_pmu = pmu; + + pmu->plat_device = pdev; + + if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) { + init_fn = of_id->data; + + ret = of_pmu_irq_cfg(pmu); + if (!ret) + ret = init_fn(pmu); + } else { + ret = probe_current_pmu(pmu, probe_table); + cpumask_setall(&pmu->supported_cpus); + } + + if (ret) { + pr_info("failed to probe PMU!\n"); + goto out_free; + } + + ret = cpu_pmu_init(pmu); + if (ret) + goto out_free; + + ret = armpmu_register(pmu, -1); + if (ret) + goto out_destroy; + + return 0; + +out_destroy: + cpu_pmu_destroy(pmu); +out_free: + pr_info("failed to register PMU devices!\n"); + kfree(pmu); + return ret; +} diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c deleted file mode 100644 index 61b53c46edfa..000000000000 --- a/arch/arm/kernel/perf_event_cpu.c +++ /dev/null @@ -1,358 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) 2012 ARM Limited - * - * Author: Will Deacon <will.deacon@arm.com> - */ -#define pr_fmt(fmt) "CPU PMU: " fmt - -#include <linux/bitmap.h> -#include <linux/export.h> -#include <linux/kernel.h> -#include <linux/of.h> -#include <linux/platform_device.h> -#include <linux/slab.h> -#include <linux/spinlock.h> -#include <linux/irq.h> -#include <linux/irqdesc.h> - -#include <asm/cputype.h> -#include <asm/irq_regs.h> -#include <asm/pmu.h> - -/* Set at runtime when we know what CPU type we are. */ -static struct arm_pmu *cpu_pmu; - -/* - * Despite the names, these two functions are CPU-specific and are used - * by the OProfile/perf code. - */ -const char *perf_pmu_name(void) -{ - if (!cpu_pmu) - return NULL; - - return cpu_pmu->name; -} -EXPORT_SYMBOL_GPL(perf_pmu_name); - -int perf_num_counters(void) -{ - int max_events = 0; - - if (cpu_pmu != NULL) - max_events = cpu_pmu->num_events; - - return max_events; -} -EXPORT_SYMBOL_GPL(perf_num_counters); - -/* Include the PMU-specific implementations. */ -#include "perf_event_xscale.c" -#include "perf_event_v6.c" -#include "perf_event_v7.c" - -static void cpu_pmu_enable_percpu_irq(void *data) -{ - int irq = *(int *)data; - - enable_percpu_irq(irq, IRQ_TYPE_NONE); -} - -static void cpu_pmu_disable_percpu_irq(void *data) -{ - int irq = *(int *)data; - - disable_percpu_irq(irq); -} - -static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) -{ - int i, irq, irqs; - struct platform_device *pmu_device = cpu_pmu->plat_device; - struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; - - irqs = min(pmu_device->num_resources, num_possible_cpus()); - - irq = platform_get_irq(pmu_device, 0); - if (irq >= 0 && irq_is_percpu(irq)) { - on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1); - free_percpu_irq(irq, &hw_events->percpu_pmu); - } else { - for (i = 0; i < irqs; ++i) { - if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs)) - continue; - irq = platform_get_irq(pmu_device, i); - if (irq >= 0) - free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, i)); - } - } -} - -static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) -{ - int i, err, irq, irqs; - struct platform_device *pmu_device = cpu_pmu->plat_device; - struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; - - if (!pmu_device) - return -ENODEV; - - irqs = min(pmu_device->num_resources, num_possible_cpus()); - if (irqs < 1) { - pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n"); - return 0; - } - - irq = platform_get_irq(pmu_device, 0); - if (irq >= 0 && irq_is_percpu(irq)) { - err = request_percpu_irq(irq, handler, "arm-pmu", - &hw_events->percpu_pmu); - if (err) { - pr_err("unable to request IRQ%d for ARM PMU counters\n", - irq); - return err; - } - on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1); - } else { - for (i = 0; i < irqs; ++i) { - err = 0; - irq = platform_get_irq(pmu_device, i); - if (irq < 0) - continue; - - /* - * If we have a single PMU interrupt that we can't shift, - * assume that we're running on a uniprocessor machine and - * continue. Otherwise, continue without this interrupt. - */ - if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) { - pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n", - irq, i); - continue; - } - - err = request_irq(irq, handler, - IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", - per_cpu_ptr(&hw_events->percpu_pmu, i)); - if (err) { - pr_err("unable to request IRQ%d for ARM PMU counters\n", - irq); - return err; - } - - cpumask_set_cpu(i, &cpu_pmu->active_irqs); - } - } - - return 0; -} - -/* - * PMU hardware loses all context when a CPU goes offline. - * When a CPU is hotplugged back in, since some hardware registers are - * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading - * junk values out of them. - */ -static int cpu_pmu_notify(struct notifier_block *b, unsigned long action, - void *hcpu) -{ - struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb); - - if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) - return NOTIFY_DONE; - - if (pmu->reset) - pmu->reset(pmu); - else - return NOTIFY_DONE; - - return NOTIFY_OK; -} - -static int cpu_pmu_init(struct arm_pmu *cpu_pmu) -{ - int err; - int cpu; - struct pmu_hw_events __percpu *cpu_hw_events; - - cpu_hw_events = alloc_percpu(struct pmu_hw_events); - if (!cpu_hw_events) - return -ENOMEM; - - cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify; - err = register_cpu_notifier(&cpu_pmu->hotplug_nb); - if (err) - goto out_hw_events; - - for_each_possible_cpu(cpu) { - struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu); - raw_spin_lock_init(&events->pmu_lock); - events->percpu_pmu = cpu_pmu; - } - - cpu_pmu->hw_events = cpu_hw_events; - cpu_pmu->request_irq = cpu_pmu_request_irq; - cpu_pmu->free_irq = cpu_pmu_free_irq; - - /* Ensure the PMU has sane values out of reset. */ - if (cpu_pmu->reset) - on_each_cpu(cpu_pmu->reset, cpu_pmu, 1); - - /* If no interrupts available, set the corresponding capability flag */ - if (!platform_get_irq(cpu_pmu->plat_device, 0)) - cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; - - return 0; - -out_hw_events: - free_percpu(cpu_hw_events); - return err; -} - -static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu) -{ - unregister_cpu_notifier(&cpu_pmu->hotplug_nb); - free_percpu(cpu_pmu->hw_events); -} - -/* - * PMU platform driver and devicetree bindings. - */ -static const struct of_device_id cpu_pmu_of_device_ids[] = { - {.compatible = "arm,cortex-a17-pmu", .data = armv7_a17_pmu_init}, - {.compatible = "arm,cortex-a15-pmu", .data = armv7_a15_pmu_init}, - {.compatible = "arm,cortex-a12-pmu", .data = armv7_a12_pmu_init}, - {.compatible = "arm,cortex-a9-pmu", .data = armv7_a9_pmu_init}, - {.compatible = "arm,cortex-a8-pmu", .data = armv7_a8_pmu_init}, - {.compatible = "arm,cortex-a7-pmu", .data = armv7_a7_pmu_init}, - {.compatible = "arm,cortex-a5-pmu", .data = armv7_a5_pmu_init}, - {.compatible = "arm,arm11mpcore-pmu", .data = armv6mpcore_pmu_init}, - {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init}, - {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init}, - {.compatible = "qcom,krait-pmu", .data = krait_pmu_init}, - {}, -}; - -static struct platform_device_id cpu_pmu_plat_device_ids[] = { - {.name = "arm-pmu"}, - {.name = "armv6-pmu"}, - {.name = "armv7-pmu"}, - {.name = "xscale-pmu"}, - {}, -}; - -static const struct pmu_probe_info pmu_probe_table[] = { - ARM_PMU_PROBE(ARM_CPU_PART_ARM1136, armv6_1136_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_ARM1156, armv6_1156_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_ARM1176, armv6_1176_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_ARM11MPCORE, armv6mpcore_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init), - XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V1, xscale1pmu_init), - XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V2, xscale2pmu_init), - { /* sentinel value */ } -}; - -/* - * CPU PMU identification and probing. - */ -static int probe_current_pmu(struct arm_pmu *pmu) -{ - int cpu = get_cpu(); - unsigned int cpuid = read_cpuid_id(); - int ret = -ENODEV; - const struct pmu_probe_info *info; - - pr_info("probing PMU on CPU %d\n", cpu); - - for (info = pmu_probe_table; info->init != NULL; info++) { - if ((cpuid & info->mask) != info->cpuid) - continue; - ret = info->init(pmu); - break; - } - - put_cpu(); - return ret; -} - -static int cpu_pmu_device_probe(struct platform_device *pdev) -{ - const struct of_device_id *of_id; - const int (*init_fn)(struct arm_pmu *); - struct device_node *node = pdev->dev.of_node; - struct arm_pmu *pmu; - int ret = -ENODEV; - - if (cpu_pmu) { - pr_info("attempt to register multiple PMU devices!\n"); - return -ENOSPC; - } - - pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL); - if (!pmu) { - pr_info("failed to allocate PMU device!\n"); - return -ENOMEM; - } - - cpu_pmu = pmu; - cpu_pmu->plat_device = pdev; - - if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) { - init_fn = of_id->data; - ret = init_fn(pmu); - } else { - ret = probe_current_pmu(pmu); - } - - if (ret) { - pr_info("failed to probe PMU!\n"); - goto out_free; - } - - ret = cpu_pmu_init(cpu_pmu); - if (ret) - goto out_free; - - ret = armpmu_register(cpu_pmu, -1); - if (ret) - goto out_destroy; - - return 0; - -out_destroy: - cpu_pmu_destroy(cpu_pmu); -out_free: - pr_info("failed to register PMU devices!\n"); - kfree(pmu); - return ret; -} - -static struct platform_driver cpu_pmu_driver = { - .driver = { - .name = "arm-pmu", - .pm = &armpmu_dev_pm_ops, - .of_match_table = cpu_pmu_of_device_ids, - }, - .probe = cpu_pmu_device_probe, - .id_table = cpu_pmu_plat_device_ids, -}; - -static int __init register_pmu_driver(void) -{ - return platform_driver_register(&cpu_pmu_driver); -} -device_initcall(register_pmu_driver); diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index f2ffd5c542ed..09f83e414a72 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -31,6 +31,14 @@ */ #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) + +#include <asm/cputype.h> +#include <asm/irq_regs.h> +#include <asm/pmu.h> + +#include <linux/of.h> +#include <linux/platform_device.h> + enum armv6_perf_types { ARMV6_PERFCTR_ICACHE_MISS = 0x0, ARMV6_PERFCTR_IBUF_STALL = 0x1, @@ -543,24 +551,39 @@ static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu) return 0; } -#else -static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} -static int armv6_1156_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} +static struct of_device_id armv6_pmu_of_device_ids[] = { + {.compatible = "arm,arm11mpcore-pmu", .data = armv6mpcore_pmu_init}, + {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init}, + {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init}, + { /* sentinel value */ } +}; -static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu) +static const struct pmu_probe_info armv6_pmu_probe_table[] = { + ARM_PMU_PROBE(ARM_CPU_PART_ARM1136, armv6_1136_pmu_init), + ARM_PMU_PROBE(ARM_CPU_PART_ARM1156, armv6_1156_pmu_init), + ARM_PMU_PROBE(ARM_CPU_PART_ARM1176, armv6_1176_pmu_init), + ARM_PMU_PROBE(ARM_CPU_PART_ARM11MPCORE, armv6mpcore_pmu_init), + { /* sentinel value */ } +}; + +static int armv6_pmu_device_probe(struct platform_device *pdev) { - return -ENODEV; + return arm_pmu_device_probe(pdev, armv6_pmu_of_device_ids, + armv6_pmu_probe_table); } -static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu) +static struct platform_driver armv6_pmu_driver = { + .driver = { + .name = "armv6-pmu", + .of_match_table = armv6_pmu_of_device_ids, + }, + .probe = armv6_pmu_device_probe, +}; + +static int __init register_armv6_pmu_driver(void) { - return -ENODEV; + return platform_driver_register(&armv6_pmu_driver); } +device_initcall(register_armv6_pmu_driver); #endif /* CONFIG_CPU_V6 || CONFIG_CPU_V6K */ diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 8993770c47de..f9b37f876e20 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -19,9 +19,15 @@ #ifdef CONFIG_CPU_V7 #include <asm/cp15.h> +#include <asm/cputype.h> +#include <asm/irq_regs.h> +#include <asm/pmu.h> #include <asm/vfp.h> #include "../vfp/vfpinstr.h" +#include <linux/of.h> +#include <linux/platform_device.h> + /* * Common ARMv7 event types * @@ -140,6 +146,23 @@ enum krait_perf_types { KRAIT_PERFCTR_L1_DTLB_ACCESS = 0x12210, }; +/* ARMv7 Scorpion specific event types */ +enum scorpion_perf_types { + SCORPION_LPM0_GROUP0 = 0x4c, + SCORPION_LPM1_GROUP0 = 0x50, + SCORPION_LPM2_GROUP0 = 0x54, + SCORPION_L2LPM_GROUP0 = 0x58, + SCORPION_VLPM_GROUP0 = 0x5c, + + SCORPION_ICACHE_ACCESS = 0x10053, + SCORPION_ICACHE_MISS = 0x10052, + + SCORPION_DTLB_ACCESS = 0x12013, + SCORPION_DTLB_MISS = 0x12012, + + SCORPION_ITLB_MISS = 0x12021, +}; + /* * Cortex-A8 HW events mapping * @@ -482,6 +505,49 @@ static const unsigned krait_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] }; /* + * Scorpion HW events mapping + */ +static const unsigned scorpion_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, +}; + +static const unsigned scorpion_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + /* + * The performance counters don't differentiate between read and write + * accesses/misses so this isn't strictly correct, but it's the best we + * can do. Writes and reads get combined. + */ + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ICACHE_MISS, + /* + * Only ITLB misses and DTLB refills are supported. If users want the + * DTLB refills misses a raw counter must be used. + */ + [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS, + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_DTLB_MISS, + [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_DTLB_MISS, + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ITLB_MISS, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_ITLB_MISS, + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, +}; + +/* * Perf Events' indices */ #define ARMV7_IDX_CYCLE_COUNTER 0 @@ -976,6 +1042,12 @@ static int krait_map_event_no_branch(struct perf_event *event) &krait_perf_cache_map, 0xFFFFF); } +static int scorpion_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &scorpion_perf_map, + &scorpion_perf_cache_map, 0xFFFFF); +} + static void armv7pmu_init(struct arm_pmu *cpu_pmu) { cpu_pmu->handle_irq = armv7pmu_handle_irq; @@ -990,15 +1062,22 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->max_period = (1LLU << 32) - 1; }; -static u32 armv7_read_num_pmnc_events(void) +static void armv7_read_num_pmnc_events(void *info) { - u32 nb_cnt; + int *nb_cnt = info; /* Read the nb of CNTx counters supported from PMNC */ - nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; + *nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; + + /* Add the CPU cycles counter */ + *nb_cnt += 1; +} - /* Add the CPU cycles counter and return */ - return nb_cnt + 1; +static int armv7_probe_num_events(struct arm_pmu *arm_pmu) +{ + return smp_call_function_any(&arm_pmu->supported_cpus, + armv7_read_num_pmnc_events, + &arm_pmu->num_events, 1); } static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) @@ -1006,8 +1085,7 @@ static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a8"; cpu_pmu->map_event = armv7_a8_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); - return 0; + return armv7_probe_num_events(cpu_pmu); } static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) @@ -1015,8 +1093,7 @@ static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a9"; cpu_pmu->map_event = armv7_a9_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); - return 0; + return armv7_probe_num_events(cpu_pmu); } static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) @@ -1024,8 +1101,7 @@ static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a5"; cpu_pmu->map_event = armv7_a5_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); - return 0; + return armv7_probe_num_events(cpu_pmu); } static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) @@ -1033,9 +1109,8 @@ static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a15"; cpu_pmu->map_event = armv7_a15_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - return 0; + return armv7_probe_num_events(cpu_pmu); } static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) @@ -1043,9 +1118,8 @@ static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a7"; cpu_pmu->map_event = armv7_a7_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - return 0; + return armv7_probe_num_events(cpu_pmu); } static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu) @@ -1053,16 +1127,15 @@ static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a12"; cpu_pmu->map_event = armv7_a12_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - return 0; + return armv7_probe_num_events(cpu_pmu); } static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu) { - armv7_a12_pmu_init(cpu_pmu); + int ret = armv7_a12_pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a17"; - return 0; + return ret; } /* @@ -1103,6 +1176,12 @@ static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu) #define KRAIT_EVENT_MASK (KRAIT_EVENT | VENUM_EVENT) #define PMRESRn_EN BIT(31) +#define EVENT_REGION(event) (((event) >> 12) & 0xf) /* R */ +#define EVENT_GROUP(event) ((event) & 0xf) /* G */ +#define EVENT_CODE(event) (((event) >> 4) & 0xff) /* CC */ +#define EVENT_VENUM(event) (!!(event & VENUM_EVENT)) /* N=2 */ +#define EVENT_CPU(event) (!!(event & KRAIT_EVENT)) /* N=1 */ + static u32 krait_read_pmresrn(int n) { u32 val; @@ -1141,19 +1220,19 @@ static void krait_write_pmresrn(int n, u32 val) } } -static u32 krait_read_vpmresr0(void) +static u32 venum_read_pmresr(void) { u32 val; asm volatile("mrc p10, 7, %0, c11, c0, 0" : "=r" (val)); return val; } -static void krait_write_vpmresr0(u32 val) +static void venum_write_pmresr(u32 val) { asm volatile("mcr p10, 7, %0, c11, c0, 0" : : "r" (val)); } -static void krait_pre_vpmresr0(u32 *venum_orig_val, u32 *fp_orig_val) +static void venum_pre_pmresr(u32 *venum_orig_val, u32 *fp_orig_val) { u32 venum_new_val; u32 fp_new_val; @@ -1170,7 +1249,7 @@ static void krait_pre_vpmresr0(u32 *venum_orig_val, u32 *fp_orig_val) fmxr(FPEXC, fp_new_val); } -static void krait_post_vpmresr0(u32 venum_orig_val, u32 fp_orig_val) +static void venum_post_pmresr(u32 venum_orig_val, u32 fp_orig_val) { BUG_ON(preemptible()); /* Restore FPEXC */ @@ -1193,16 +1272,11 @@ static void krait_evt_setup(int idx, u32 config_base) u32 val; u32 mask; u32 vval, fval; - unsigned int region; - unsigned int group; - unsigned int code; + unsigned int region = EVENT_REGION(config_base); + unsigned int group = EVENT_GROUP(config_base); + unsigned int code = EVENT_CODE(config_base); unsigned int group_shift; - bool venum_event; - - venum_event = !!(config_base & VENUM_EVENT); - region = (config_base >> 12) & 0xf; - code = (config_base >> 4) & 0xff; - group = (config_base >> 0) & 0xf; + bool venum_event = EVENT_VENUM(config_base); group_shift = group * 8; mask = 0xff << group_shift; @@ -1217,16 +1291,14 @@ static void krait_evt_setup(int idx, u32 config_base) val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1); armv7_pmnc_write_evtsel(idx, val); - asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0)); - if (venum_event) { - krait_pre_vpmresr0(&vval, &fval); - val = krait_read_vpmresr0(); + venum_pre_pmresr(&vval, &fval); + val = venum_read_pmresr(); val &= ~mask; val |= code << group_shift; val |= PMRESRn_EN; - krait_write_vpmresr0(val); - krait_post_vpmresr0(vval, fval); + venum_write_pmresr(val); + venum_post_pmresr(vval, fval); } else { val = krait_read_pmresrn(region); val &= ~mask; @@ -1236,7 +1308,7 @@ static void krait_evt_setup(int idx, u32 config_base) } } -static u32 krait_clear_pmresrn_group(u32 val, int group) +static u32 clear_pmresrn_group(u32 val, int group) { u32 mask; int group_shift; @@ -1256,23 +1328,19 @@ static void krait_clearpmu(u32 config_base) { u32 val; u32 vval, fval; - unsigned int region; - unsigned int group; - bool venum_event; - - venum_event = !!(config_base & VENUM_EVENT); - region = (config_base >> 12) & 0xf; - group = (config_base >> 0) & 0xf; + unsigned int region = EVENT_REGION(config_base); + unsigned int group = EVENT_GROUP(config_base); + bool venum_event = EVENT_VENUM(config_base); if (venum_event) { - krait_pre_vpmresr0(&vval, &fval); - val = krait_read_vpmresr0(); - val = krait_clear_pmresrn_group(val, group); - krait_write_vpmresr0(val); - krait_post_vpmresr0(vval, fval); + venum_pre_pmresr(&vval, &fval); + val = venum_read_pmresr(); + val = clear_pmresrn_group(val, group); + venum_write_pmresr(val); + venum_post_pmresr(vval, fval); } else { val = krait_read_pmresrn(region); - val = krait_clear_pmresrn_group(val, group); + val = clear_pmresrn_group(val, group); krait_write_pmresrn(region, val); } } @@ -1342,6 +1410,8 @@ static void krait_pmu_enable_event(struct perf_event *event) static void krait_pmu_reset(void *info) { u32 vval, fval; + struct arm_pmu *cpu_pmu = info; + u32 idx, nb_cnt = cpu_pmu->num_events; armv7pmu_reset(info); @@ -1350,9 +1420,16 @@ static void krait_pmu_reset(void *info) krait_write_pmresrn(1, 0); krait_write_pmresrn(2, 0); - krait_pre_vpmresr0(&vval, &fval); - krait_write_vpmresr0(0); - krait_post_vpmresr0(vval, fval); + venum_pre_pmresr(&vval, &fval); + venum_write_pmresr(0); + venum_post_pmresr(vval, fval); + + /* Reset PMxEVNCTCR to sane default */ + for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { + armv7_pmnc_select_counter(idx); + asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0)); + } + } static int krait_event_to_bit(struct perf_event *event, unsigned int region, @@ -1386,26 +1463,18 @@ static int krait_pmu_get_event_idx(struct pmu_hw_events *cpuc, { int idx; int bit = -1; - unsigned int prefix; - unsigned int region; - unsigned int code; - unsigned int group; - bool krait_event; struct hw_perf_event *hwc = &event->hw; + unsigned int region = EVENT_REGION(hwc->config_base); + unsigned int code = EVENT_CODE(hwc->config_base); + unsigned int group = EVENT_GROUP(hwc->config_base); + bool venum_event = EVENT_VENUM(hwc->config_base); + bool krait_event = EVENT_CPU(hwc->config_base); - region = (hwc->config_base >> 12) & 0xf; - code = (hwc->config_base >> 4) & 0xff; - group = (hwc->config_base >> 0) & 0xf; - krait_event = !!(hwc->config_base & KRAIT_EVENT_MASK); - - if (krait_event) { + if (venum_event || krait_event) { /* Ignore invalid events */ if (group > 3 || region > 2) return -EINVAL; - prefix = hwc->config_base & KRAIT_EVENT_MASK; - if (prefix != KRAIT_EVENT && prefix != VENUM_EVENT) - return -EINVAL; - if (prefix == VENUM_EVENT && (code & 0xe0)) + if (venum_event && (code & 0xe0)) return -EINVAL; bit = krait_event_to_bit(event, region, group); @@ -1425,15 +1494,12 @@ static void krait_pmu_clear_event_idx(struct pmu_hw_events *cpuc, { int bit; struct hw_perf_event *hwc = &event->hw; - unsigned int region; - unsigned int group; - bool krait_event; - - region = (hwc->config_base >> 12) & 0xf; - group = (hwc->config_base >> 0) & 0xf; - krait_event = !!(hwc->config_base & KRAIT_EVENT_MASK); + unsigned int region = EVENT_REGION(hwc->config_base); + unsigned int group = EVENT_GROUP(hwc->config_base); + bool venum_event = EVENT_VENUM(hwc->config_base); + bool krait_event = EVENT_CPU(hwc->config_base); - if (krait_event) { + if (venum_event || krait_event) { bit = krait_event_to_bit(event, region, group); clear_bit(bit, cpuc->used_mask); } @@ -1449,53 +1515,389 @@ static int krait_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->map_event = krait_map_event_no_branch; else cpu_pmu->map_event = krait_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->set_event_filter = armv7pmu_set_event_filter; cpu_pmu->reset = krait_pmu_reset; cpu_pmu->enable = krait_pmu_enable_event; cpu_pmu->disable = krait_pmu_disable_event; cpu_pmu->get_event_idx = krait_pmu_get_event_idx; cpu_pmu->clear_event_idx = krait_pmu_clear_event_idx; - return 0; + return armv7_probe_num_events(cpu_pmu); } -#else -static inline int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) + +/* + * Scorpion Local Performance Monitor Register (LPMn) + * + * 31 30 24 16 8 0 + * +--------------------------------+ + * LPM0 | EN | CC | CC | CC | CC | N = 1, R = 0 + * +--------------------------------+ + * LPM1 | EN | CC | CC | CC | CC | N = 1, R = 1 + * +--------------------------------+ + * LPM2 | EN | CC | CC | CC | CC | N = 1, R = 2 + * +--------------------------------+ + * L2LPM | EN | CC | CC | CC | CC | N = 1, R = 3 + * +--------------------------------+ + * VLPM | EN | CC | CC | CC | CC | N = 2, R = ? + * +--------------------------------+ + * EN | G=3 | G=2 | G=1 | G=0 + * + * + * Event Encoding: + * + * hwc->config_base = 0xNRCCG + * + * N = prefix, 1 for Scorpion CPU (LPMn/L2LPM), 2 for Venum VFP (VLPM) + * R = region register + * CC = class of events the group G is choosing from + * G = group or particular event + * + * Example: 0x12021 is a Scorpion CPU event in LPM2's group 1 with code 2 + * + * A region (R) corresponds to a piece of the CPU (execution unit, instruction + * unit, etc.) while the event code (CC) corresponds to a particular class of + * events (interrupts for example). An event code is broken down into + * groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for + * example). + */ + +static u32 scorpion_read_pmresrn(int n) { - return -ENODEV; + u32 val; + + switch (n) { + case 0: + asm volatile("mrc p15, 0, %0, c15, c0, 0" : "=r" (val)); + break; + case 1: + asm volatile("mrc p15, 1, %0, c15, c0, 0" : "=r" (val)); + break; + case 2: + asm volatile("mrc p15, 2, %0, c15, c0, 0" : "=r" (val)); + break; + case 3: + asm volatile("mrc p15, 3, %0, c15, c2, 0" : "=r" (val)); + break; + default: + BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */ + } + + return val; } -static inline int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) +static void scorpion_write_pmresrn(int n, u32 val) { - return -ENODEV; + switch (n) { + case 0: + asm volatile("mcr p15, 0, %0, c15, c0, 0" : : "r" (val)); + break; + case 1: + asm volatile("mcr p15, 1, %0, c15, c0, 0" : : "r" (val)); + break; + case 2: + asm volatile("mcr p15, 2, %0, c15, c0, 0" : : "r" (val)); + break; + case 3: + asm volatile("mcr p15, 3, %0, c15, c2, 0" : : "r" (val)); + break; + default: + BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */ + } } -static inline int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) +static u32 scorpion_get_pmresrn_event(unsigned int region) { - return -ENODEV; + static const u32 pmresrn_table[] = { SCORPION_LPM0_GROUP0, + SCORPION_LPM1_GROUP0, + SCORPION_LPM2_GROUP0, + SCORPION_L2LPM_GROUP0 }; + return pmresrn_table[region]; } -static inline int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) +static void scorpion_evt_setup(int idx, u32 config_base) { - return -ENODEV; + u32 val; + u32 mask; + u32 vval, fval; + unsigned int region = EVENT_REGION(config_base); + unsigned int group = EVENT_GROUP(config_base); + unsigned int code = EVENT_CODE(config_base); + unsigned int group_shift; + bool venum_event = EVENT_VENUM(config_base); + + group_shift = group * 8; + mask = 0xff << group_shift; + + /* Configure evtsel for the region and group */ + if (venum_event) + val = SCORPION_VLPM_GROUP0; + else + val = scorpion_get_pmresrn_event(region); + val += group; + /* Mix in mode-exclusion bits */ + val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1); + armv7_pmnc_write_evtsel(idx, val); + + asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0)); + + if (venum_event) { + venum_pre_pmresr(&vval, &fval); + val = venum_read_pmresr(); + val &= ~mask; + val |= code << group_shift; + val |= PMRESRn_EN; + venum_write_pmresr(val); + venum_post_pmresr(vval, fval); + } else { + val = scorpion_read_pmresrn(region); + val &= ~mask; + val |= code << group_shift; + val |= PMRESRn_EN; + scorpion_write_pmresrn(region, val); + } } -static inline int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) +static void scorpion_clearpmu(u32 config_base) { - return -ENODEV; + u32 val; + u32 vval, fval; + unsigned int region = EVENT_REGION(config_base); + unsigned int group = EVENT_GROUP(config_base); + bool venum_event = EVENT_VENUM(config_base); + + if (venum_event) { + venum_pre_pmresr(&vval, &fval); + val = venum_read_pmresr(); + val = clear_pmresrn_group(val, group); + venum_write_pmresr(val); + venum_post_pmresr(vval, fval); + } else { + val = scorpion_read_pmresrn(region); + val = clear_pmresrn_group(val, group); + scorpion_write_pmresrn(region, val); + } +} + +static void scorpion_pmu_disable_event(struct perf_event *event) +{ + unsigned long flags; + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); + + /* Disable counter and interrupt */ + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* Disable counter */ + armv7_pmnc_disable_counter(idx); + + /* + * Clear pmresr code (if destined for PMNx counters) + */ + if (hwc->config_base & KRAIT_EVENT_MASK) + scorpion_clearpmu(hwc->config_base); + + /* Disable interrupt for this counter */ + armv7_pmnc_disable_intens(idx); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static inline int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu) +static void scorpion_pmu_enable_event(struct perf_event *event) { - return -ENODEV; + unsigned long flags; + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); + + /* + * Enable counter and interrupt, and set the counter to count + * the event that we're interested in. + */ + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* Disable counter */ + armv7_pmnc_disable_counter(idx); + + /* + * Set event (if destined for PMNx counters) + * We don't set the event for the cycle counter because we + * don't have the ability to perform event filtering. + */ + if (hwc->config_base & KRAIT_EVENT_MASK) + scorpion_evt_setup(idx, hwc->config_base); + else if (idx != ARMV7_IDX_CYCLE_COUNTER) + armv7_pmnc_write_evtsel(idx, hwc->config_base); + + /* Enable interrupt for this counter */ + armv7_pmnc_enable_intens(idx); + + /* Enable counter */ + armv7_pmnc_enable_counter(idx); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static inline int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu) +static void scorpion_pmu_reset(void *info) { - return -ENODEV; + u32 vval, fval; + struct arm_pmu *cpu_pmu = info; + u32 idx, nb_cnt = cpu_pmu->num_events; + + armv7pmu_reset(info); + + /* Clear all pmresrs */ + scorpion_write_pmresrn(0, 0); + scorpion_write_pmresrn(1, 0); + scorpion_write_pmresrn(2, 0); + scorpion_write_pmresrn(3, 0); + + venum_pre_pmresr(&vval, &fval); + venum_write_pmresr(0); + venum_post_pmresr(vval, fval); + + /* Reset PMxEVNCTCR to sane default */ + for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { + armv7_pmnc_select_counter(idx); + asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0)); + } } -static inline int krait_pmu_init(struct arm_pmu *cpu_pmu) +static int scorpion_event_to_bit(struct perf_event *event, unsigned int region, + unsigned int group) +{ + int bit; + struct hw_perf_event *hwc = &event->hw; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + + if (hwc->config_base & VENUM_EVENT) + bit = SCORPION_VLPM_GROUP0; + else + bit = scorpion_get_pmresrn_event(region); + bit -= scorpion_get_pmresrn_event(0); + bit += group; + /* + * Lower bits are reserved for use by the counters (see + * armv7pmu_get_event_idx() for more info) + */ + bit += ARMV7_IDX_COUNTER_LAST(cpu_pmu) + 1; + + return bit; +} + +/* + * We check for column exclusion constraints here. + * Two events cant use the same group within a pmresr register. + */ +static int scorpion_pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int idx; + int bit = -1; + struct hw_perf_event *hwc = &event->hw; + unsigned int region = EVENT_REGION(hwc->config_base); + unsigned int group = EVENT_GROUP(hwc->config_base); + bool venum_event = EVENT_VENUM(hwc->config_base); + bool scorpion_event = EVENT_CPU(hwc->config_base); + + if (venum_event || scorpion_event) { + /* Ignore invalid events */ + if (group > 3 || region > 3) + return -EINVAL; + + bit = scorpion_event_to_bit(event, region, group); + if (test_and_set_bit(bit, cpuc->used_mask)) + return -EAGAIN; + } + + idx = armv7pmu_get_event_idx(cpuc, event); + if (idx < 0 && bit >= 0) + clear_bit(bit, cpuc->used_mask); + + return idx; +} + +static void scorpion_pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int bit; + struct hw_perf_event *hwc = &event->hw; + unsigned int region = EVENT_REGION(hwc->config_base); + unsigned int group = EVENT_GROUP(hwc->config_base); + bool venum_event = EVENT_VENUM(hwc->config_base); + bool scorpion_event = EVENT_CPU(hwc->config_base); + + if (venum_event || scorpion_event) { + bit = scorpion_event_to_bit(event, region, group); + clear_bit(bit, cpuc->used_mask); + } +} + +static int scorpion_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "armv7_scorpion"; + cpu_pmu->map_event = scorpion_map_event; + cpu_pmu->reset = scorpion_pmu_reset; + cpu_pmu->enable = scorpion_pmu_enable_event; + cpu_pmu->disable = scorpion_pmu_disable_event; + cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx; + cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx; + return armv7_probe_num_events(cpu_pmu); +} + +static int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "armv7_scorpion_mp"; + cpu_pmu->map_event = scorpion_map_event; + cpu_pmu->reset = scorpion_pmu_reset; + cpu_pmu->enable = scorpion_pmu_enable_event; + cpu_pmu->disable = scorpion_pmu_disable_event; + cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx; + cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx; + return armv7_probe_num_events(cpu_pmu); +} + +static const struct of_device_id armv7_pmu_of_device_ids[] = { + {.compatible = "arm,cortex-a17-pmu", .data = armv7_a17_pmu_init}, + {.compatible = "arm,cortex-a15-pmu", .data = armv7_a15_pmu_init}, + {.compatible = "arm,cortex-a12-pmu", .data = armv7_a12_pmu_init}, + {.compatible = "arm,cortex-a9-pmu", .data = armv7_a9_pmu_init}, + {.compatible = "arm,cortex-a8-pmu", .data = armv7_a8_pmu_init}, + {.compatible = "arm,cortex-a7-pmu", .data = armv7_a7_pmu_init}, + {.compatible = "arm,cortex-a5-pmu", .data = armv7_a5_pmu_init}, + {.compatible = "qcom,krait-pmu", .data = krait_pmu_init}, + {.compatible = "qcom,scorpion-pmu", .data = scorpion_pmu_init}, + {.compatible = "qcom,scorpion-mp-pmu", .data = scorpion_mp_pmu_init}, + {}, +}; + +static const struct pmu_probe_info armv7_pmu_probe_table[] = { + ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init), + ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init), + { /* sentinel value */ } +}; + + +static int armv7_pmu_device_probe(struct platform_device *pdev) +{ + return arm_pmu_device_probe(pdev, armv7_pmu_of_device_ids, + armv7_pmu_probe_table); +} + +static struct platform_driver armv7_pmu_driver = { + .driver = { + .name = "armv7-pmu", + .of_match_table = armv7_pmu_of_device_ids, + }, + .probe = armv7_pmu_device_probe, +}; + +static int __init register_armv7_pmu_driver(void) { - return -ENODEV; + return platform_driver_register(&armv7_pmu_driver); } +device_initcall(register_armv7_pmu_driver); #endif /* CONFIG_CPU_V7 */ diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 8af9f1f82c68..304d056d5b25 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -13,6 +13,14 @@ */ #ifdef CONFIG_CPU_XSCALE + +#include <asm/cputype.h> +#include <asm/irq_regs.h> +#include <asm/pmu.h> + +#include <linux/of.h> +#include <linux/platform_device.h> + enum xscale_perf_types { XSCALE_PERFCTR_ICACHE_MISS = 0x00, XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, @@ -740,14 +748,28 @@ static int xscale2pmu_init(struct arm_pmu *cpu_pmu) return 0; } -#else -static inline int xscale1pmu_init(struct arm_pmu *cpu_pmu) + +static const struct pmu_probe_info xscale_pmu_probe_table[] = { + XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V1, xscale1pmu_init), + XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V2, xscale2pmu_init), + { /* sentinel value */ } +}; + +static int xscale_pmu_device_probe(struct platform_device *pdev) { - return -ENODEV; + return arm_pmu_device_probe(pdev, NULL, xscale_pmu_probe_table); } -static inline int xscale2pmu_init(struct arm_pmu *cpu_pmu) +static struct platform_driver xscale_pmu_driver = { + .driver = { + .name = "xscale-pmu", + }, + .probe = xscale_pmu_device_probe, +}; + +static int __init register_xscale_pmu_driver(void) { - return -ENODEV; + return platform_driver_register(&xscale_pmu_driver); } +device_initcall(register_xscale_pmu_driver); #endif /* CONFIG_CPU_XSCALE */ diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index fdfa3a78ec8c..f192a2a41719 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -17,12 +17,9 @@ #include <linux/stddef.h> #include <linux/unistd.h> #include <linux/user.h> -#include <linux/delay.h> -#include <linux/reboot.h> #include <linux/interrupt.h> #include <linux/kallsyms.h> #include <linux/init.h> -#include <linux/cpu.h> #include <linux/elfcore.h> #include <linux/pm.h> #include <linux/tick.h> @@ -31,16 +28,14 @@ #include <linux/random.h> #include <linux/hw_breakpoint.h> #include <linux/leds.h> -#include <linux/reboot.h> -#include <asm/cacheflush.h> -#include <asm/idmap.h> #include <asm/processor.h> #include <asm/thread_notify.h> #include <asm/stacktrace.h> #include <asm/system_misc.h> #include <asm/mach/time.h> #include <asm/tls.h> +#include <asm/vdso.h> #ifdef CONFIG_CC_STACKPROTECTOR #include <linux/stackprotector.h> @@ -59,69 +54,6 @@ static const char *isa_modes[] __maybe_unused = { "ARM" , "Thumb" , "Jazelle", "ThumbEE" }; -extern void call_with_stack(void (*fn)(void *), void *arg, void *sp); -typedef void (*phys_reset_t)(unsigned long); - -/* - * A temporary stack to use for CPU reset. This is static so that we - * don't clobber it with the identity mapping. When running with this - * stack, any references to the current task *will not work* so you - * should really do as little as possible before jumping to your reset - * code. - */ -static u64 soft_restart_stack[16]; - -static void __soft_restart(void *addr) -{ - phys_reset_t phys_reset; - - /* Take out a flat memory mapping. */ - setup_mm_for_reboot(); - - /* Clean and invalidate caches */ - flush_cache_all(); - - /* Turn off caching */ - cpu_proc_fin(); - - /* Push out any further dirty data, and ensure cache is empty */ - flush_cache_all(); - - /* Switch to the identity mapping. */ - phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset); - phys_reset((unsigned long)addr); - - /* Should never get here. */ - BUG(); -} - -void soft_restart(unsigned long addr) -{ - u64 *stack = soft_restart_stack + ARRAY_SIZE(soft_restart_stack); - - /* Disable interrupts first */ - raw_local_irq_disable(); - local_fiq_disable(); - - /* Disable the L2 if we're the last man standing. */ - if (num_online_cpus() == 1) - outer_disable(); - - /* Change to the new stack and continue with the reset. */ - call_with_stack(__soft_restart, (void *)addr, (void *)stack); - - /* Should never get here. */ - BUG(); -} - -/* - * Function pointers to optional machine specific functions - */ -void (*pm_power_off)(void); -EXPORT_SYMBOL(pm_power_off); - -void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); - /* * This is our default idle handler. */ @@ -166,79 +98,6 @@ void arch_cpu_idle_dead(void) } #endif -/* - * Called by kexec, immediately prior to machine_kexec(). - * - * This must completely disable all secondary CPUs; simply causing those CPUs - * to execute e.g. a RAM-based pin loop is not sufficient. This allows the - * kexec'd kernel to use any and all RAM as it sees fit, without having to - * avoid any code or data used by any SW CPU pin loop. The CPU hotplug - * functionality embodied in disable_nonboot_cpus() to achieve this. - */ -void machine_shutdown(void) -{ - disable_nonboot_cpus(); -} - -/* - * Halting simply requires that the secondary CPUs stop performing any - * activity (executing tasks, handling interrupts). smp_send_stop() - * achieves this. - */ -void machine_halt(void) -{ - local_irq_disable(); - smp_send_stop(); - - local_irq_disable(); - while (1); -} - -/* - * Power-off simply requires that the secondary CPUs stop performing any - * activity (executing tasks, handling interrupts). smp_send_stop() - * achieves this. When the system power is turned off, it will take all CPUs - * with it. - */ -void machine_power_off(void) -{ - local_irq_disable(); - smp_send_stop(); - - if (pm_power_off) - pm_power_off(); -} - -/* - * Restart requires that the secondary CPUs stop performing any activity - * while the primary CPU resets the system. Systems with a single CPU can - * use soft_restart() as their machine descriptor's .restart hook, since that - * will cause the only available CPU to reset. Systems with multiple CPUs must - * provide a HW restart implementation, to ensure that all CPUs reset at once. - * This is required so that any code running after reset on the primary CPU - * doesn't have to co-ordinate with other CPUs to ensure they aren't still - * executing pre-reset code, and using RAM that the primary CPU's code wishes - * to use. Implementing such co-ordination would be essentially impossible. - */ -void machine_restart(char *cmd) -{ - local_irq_disable(); - smp_send_stop(); - - if (arm_pm_restart) - arm_pm_restart(reboot_mode, cmd); - else - do_kernel_restart(cmd); - - /* Give a grace period for failure to restart of 1s */ - mdelay(1000); - - /* Whoops - the platform was unable to reboot. Tell the user! */ - printk("Reboot failed -- System halted\n"); - local_irq_disable(); - while (1); -} - void __show_regs(struct pt_regs *regs) { unsigned long flags; @@ -475,7 +334,7 @@ const char *arch_vma_name(struct vm_area_struct *vma) } /* If possible, provide a placement hint at a random offset from the - * stack for the signal page. + * stack for the sigpage and vdso pages. */ static unsigned long sigpage_addr(const struct mm_struct *mm, unsigned int npages) @@ -519,6 +378,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; + unsigned long npages; unsigned long addr; unsigned long hint; int ret = 0; @@ -528,9 +388,12 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (!signal_page) return -ENOMEM; + npages = 1; /* for sigpage */ + npages += vdso_total_pages; + down_write(&mm->mmap_sem); - hint = sigpage_addr(mm, 1); - addr = get_unmapped_area(NULL, hint, PAGE_SIZE, 0, 0); + hint = sigpage_addr(mm, npages); + addr = get_unmapped_area(NULL, hint, npages << PAGE_SHIFT, 0, 0); if (IS_ERR_VALUE(addr)) { ret = addr; goto up_fail; @@ -547,6 +410,12 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) mm->context.sigpage = addr; + /* Unlike the sigpage, failure to install the vdso is unlikely + * to be fatal to the process, so no error check needed + * here. + */ + arm_install_vdso(mm, addr + PAGE_SIZE); + up_fail: up_write(&mm->mmap_sem); return ret; diff --git a/arch/arm/kernel/psci-call.S b/arch/arm/kernel/psci-call.S new file mode 100644 index 000000000000..a78e9e1e206d --- /dev/null +++ b/arch/arm/kernel/psci-call.S @@ -0,0 +1,31 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) 2015 ARM Limited + * + * Author: Mark Rutland <mark.rutland@arm.com> + */ + +#include <linux/linkage.h> + +#include <asm/opcodes-sec.h> +#include <asm/opcodes-virt.h> + +/* int __invoke_psci_fn_hvc(u32 function_id, u32 arg0, u32 arg1, u32 arg2) */ +ENTRY(__invoke_psci_fn_hvc) + __HVC(0) + bx lr +ENDPROC(__invoke_psci_fn_hvc) + +/* int __invoke_psci_fn_smc(u32 function_id, u32 arg0, u32 arg1, u32 arg2) */ +ENTRY(__invoke_psci_fn_smc) + __SMC(0) + bx lr +ENDPROC(__invoke_psci_fn_smc) diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c index f73891b6b730..f90fdf4ce7c7 100644 --- a/arch/arm/kernel/psci.c +++ b/arch/arm/kernel/psci.c @@ -23,8 +23,6 @@ #include <asm/compiler.h> #include <asm/errno.h> -#include <asm/opcodes-sec.h> -#include <asm/opcodes-virt.h> #include <asm/psci.h> #include <asm/system_misc.h> @@ -33,6 +31,9 @@ struct psci_operations psci_ops; static int (*invoke_psci_fn)(u32, u32, u32, u32); typedef int (*psci_initcall_t)(const struct device_node *); +asmlinkage int __invoke_psci_fn_hvc(u32, u32, u32, u32); +asmlinkage int __invoke_psci_fn_smc(u32, u32, u32, u32); + enum psci_function { PSCI_FN_CPU_SUSPEND, PSCI_FN_CPU_ON, @@ -71,40 +72,6 @@ static u32 psci_power_state_pack(struct psci_power_state state) & PSCI_0_2_POWER_STATE_AFFL_MASK); } -/* - * The following two functions are invoked via the invoke_psci_fn pointer - * and will not be inlined, allowing us to piggyback on the AAPCS. - */ -static noinline int __invoke_psci_fn_hvc(u32 function_id, u32 arg0, u32 arg1, - u32 arg2) -{ - asm volatile( - __asmeq("%0", "r0") - __asmeq("%1", "r1") - __asmeq("%2", "r2") - __asmeq("%3", "r3") - __HVC(0) - : "+r" (function_id) - : "r" (arg0), "r" (arg1), "r" (arg2)); - - return function_id; -} - -static noinline int __invoke_psci_fn_smc(u32 function_id, u32 arg0, u32 arg1, - u32 arg2) -{ - asm volatile( - __asmeq("%0", "r0") - __asmeq("%1", "r1") - __asmeq("%2", "r2") - __asmeq("%3", "r3") - __SMC(0) - : "+r" (function_id) - : "r" (arg0), "r" (arg1), "r" (arg2)); - - return function_id; -} - static int psci_get_version(void) { int err; diff --git a/arch/arm/kernel/reboot.c b/arch/arm/kernel/reboot.c new file mode 100644 index 000000000000..38269358fd25 --- /dev/null +++ b/arch/arm/kernel/reboot.c @@ -0,0 +1,155 @@ +/* + * Copyright (C) 1996-2000 Russell King - Converted to ARM. + * Original Copyright (C) 1995 Linus Torvalds + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/cpu.h> +#include <linux/delay.h> +#include <linux/reboot.h> + +#include <asm/cacheflush.h> +#include <asm/idmap.h> + +#include "reboot.h" + +typedef void (*phys_reset_t)(unsigned long); + +/* + * Function pointers to optional machine specific functions + */ +void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); +void (*pm_power_off)(void); +EXPORT_SYMBOL(pm_power_off); + +/* + * A temporary stack to use for CPU reset. This is static so that we + * don't clobber it with the identity mapping. When running with this + * stack, any references to the current task *will not work* so you + * should really do as little as possible before jumping to your reset + * code. + */ +static u64 soft_restart_stack[16]; + +static void __soft_restart(void *addr) +{ + phys_reset_t phys_reset; + + /* Take out a flat memory mapping. */ + setup_mm_for_reboot(); + + /* Clean and invalidate caches */ + flush_cache_all(); + + /* Turn off caching */ + cpu_proc_fin(); + + /* Push out any further dirty data, and ensure cache is empty */ + flush_cache_all(); + + /* Switch to the identity mapping. */ + phys_reset = (phys_reset_t)(unsigned long)virt_to_idmap(cpu_reset); + phys_reset((unsigned long)addr); + + /* Should never get here. */ + BUG(); +} + +void _soft_restart(unsigned long addr, bool disable_l2) +{ + u64 *stack = soft_restart_stack + ARRAY_SIZE(soft_restart_stack); + + /* Disable interrupts first */ + raw_local_irq_disable(); + local_fiq_disable(); + + /* Disable the L2 if we're the last man standing. */ + if (disable_l2) + outer_disable(); + + /* Change to the new stack and continue with the reset. */ + call_with_stack(__soft_restart, (void *)addr, (void *)stack); + + /* Should never get here. */ + BUG(); +} + +void soft_restart(unsigned long addr) +{ + _soft_restart(addr, num_online_cpus() == 1); +} + +/* + * Called by kexec, immediately prior to machine_kexec(). + * + * This must completely disable all secondary CPUs; simply causing those CPUs + * to execute e.g. a RAM-based pin loop is not sufficient. This allows the + * kexec'd kernel to use any and all RAM as it sees fit, without having to + * avoid any code or data used by any SW CPU pin loop. The CPU hotplug + * functionality embodied in disable_nonboot_cpus() to achieve this. + */ +void machine_shutdown(void) +{ + disable_nonboot_cpus(); +} + +/* + * Halting simply requires that the secondary CPUs stop performing any + * activity (executing tasks, handling interrupts). smp_send_stop() + * achieves this. + */ +void machine_halt(void) +{ + local_irq_disable(); + smp_send_stop(); + + local_irq_disable(); + while (1); +} + +/* + * Power-off simply requires that the secondary CPUs stop performing any + * activity (executing tasks, handling interrupts). smp_send_stop() + * achieves this. When the system power is turned off, it will take all CPUs + * with it. + */ +void machine_power_off(void) +{ + local_irq_disable(); + smp_send_stop(); + + if (pm_power_off) + pm_power_off(); +} + +/* + * Restart requires that the secondary CPUs stop performing any activity + * while the primary CPU resets the system. Systems with a single CPU can + * use soft_restart() as their machine descriptor's .restart hook, since that + * will cause the only available CPU to reset. Systems with multiple CPUs must + * provide a HW restart implementation, to ensure that all CPUs reset at once. + * This is required so that any code running after reset on the primary CPU + * doesn't have to co-ordinate with other CPUs to ensure they aren't still + * executing pre-reset code, and using RAM that the primary CPU's code wishes + * to use. Implementing such co-ordination would be essentially impossible. + */ +void machine_restart(char *cmd) +{ + local_irq_disable(); + smp_send_stop(); + + if (arm_pm_restart) + arm_pm_restart(reboot_mode, cmd); + else + do_kernel_restart(cmd); + + /* Give a grace period for failure to restart of 1s */ + mdelay(1000); + + /* Whoops - the platform was unable to reboot. Tell the user! */ + printk("Reboot failed -- System halted\n"); + local_irq_disable(); + while (1); +} diff --git a/arch/arm/kernel/reboot.h b/arch/arm/kernel/reboot.h new file mode 100644 index 000000000000..bf7a0b1f076e --- /dev/null +++ b/arch/arm/kernel/reboot.h @@ -0,0 +1,7 @@ +#ifndef REBOOT_H +#define REBOOT_H + +extern void call_with_stack(void (*fn)(void *), void *arg, void *sp); +extern void _soft_restart(unsigned long addr, bool disable_l2); + +#endif diff --git a/arch/arm/kernel/return_address.c b/arch/arm/kernel/return_address.c index 24b4a04846eb..36ed35073289 100644 --- a/arch/arm/kernel/return_address.c +++ b/arch/arm/kernel/return_address.c @@ -56,8 +56,6 @@ void *return_address(unsigned int level) return NULL; } -#else /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) */ - -#endif /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) / else */ +#endif /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) */ EXPORT_SYMBOL_GPL(return_address); diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 1d60bebea4b8..36c18b73c1f4 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -46,6 +46,7 @@ #include <asm/cacheflush.h> #include <asm/cachetype.h> #include <asm/tlbflush.h> +#include <asm/xen/hypervisor.h> #include <asm/prom.h> #include <asm/mach/arch.h> @@ -75,8 +76,7 @@ __setup("fpe=", fpe_setup); extern void init_default_cache_policy(unsigned long); extern void paging_init(const struct machine_desc *desc); -extern void early_paging_init(const struct machine_desc *, - struct proc_info_list *); +extern void early_paging_init(const struct machine_desc *); extern void sanity_check_meminfo(void); extern enum reboot_mode reboot_mode; extern void setup_dma_zone(const struct machine_desc *desc); @@ -93,6 +93,9 @@ unsigned int __atags_pointer __initdata; unsigned int system_rev; EXPORT_SYMBOL(system_rev); +const char *system_serial; +EXPORT_SYMBOL(system_serial); + unsigned int system_serial_low; EXPORT_SYMBOL(system_serial_low); @@ -372,30 +375,48 @@ void __init early_print(const char *str, ...) static void __init cpuid_init_hwcaps(void) { - unsigned int divide_instrs, vmsa; + int block; + u32 isar5; if (cpu_architecture() < CPU_ARCH_ARMv7) return; - divide_instrs = (read_cpuid_ext(CPUID_EXT_ISAR0) & 0x0f000000) >> 24; - - switch (divide_instrs) { - case 2: + block = cpuid_feature_extract(CPUID_EXT_ISAR0, 24); + if (block >= 2) elf_hwcap |= HWCAP_IDIVA; - case 1: + if (block >= 1) elf_hwcap |= HWCAP_IDIVT; - } /* LPAE implies atomic ldrd/strd instructions */ - vmsa = (read_cpuid_ext(CPUID_EXT_MMFR0) & 0xf) >> 0; - if (vmsa >= 5) + block = cpuid_feature_extract(CPUID_EXT_MMFR0, 0); + if (block >= 5) elf_hwcap |= HWCAP_LPAE; + + /* check for supported v8 Crypto instructions */ + isar5 = read_cpuid_ext(CPUID_EXT_ISAR5); + + block = cpuid_feature_extract_field(isar5, 4); + if (block >= 2) + elf_hwcap2 |= HWCAP2_PMULL; + if (block >= 1) + elf_hwcap2 |= HWCAP2_AES; + + block = cpuid_feature_extract_field(isar5, 8); + if (block >= 1) + elf_hwcap2 |= HWCAP2_SHA1; + + block = cpuid_feature_extract_field(isar5, 12); + if (block >= 1) + elf_hwcap2 |= HWCAP2_SHA2; + + block = cpuid_feature_extract_field(isar5, 16); + if (block >= 1) + elf_hwcap2 |= HWCAP2_CRC32; } static void __init elf_hwcap_fixup(void) { unsigned id = read_cpuid_id(); - unsigned sync_prim; /* * HWCAP_TLS is available only on 1136 r1p0 and later, @@ -416,9 +437,9 @@ static void __init elf_hwcap_fixup(void) * avoid advertising SWP; it may not be atomic with * multiprocessing cores. */ - sync_prim = ((read_cpuid_ext(CPUID_EXT_ISAR3) >> 8) & 0xf0) | - ((read_cpuid_ext(CPUID_EXT_ISAR4) >> 20) & 0x0f); - if (sync_prim >= 0x13) + if (cpuid_feature_extract(CPUID_EXT_ISAR3, 12) > 1 || + (cpuid_feature_extract(CPUID_EXT_ISAR3, 12) == 1 && + cpuid_feature_extract(CPUID_EXT_ISAR3, 20) >= 3)) elf_hwcap &= ~HWCAP_SWP; } @@ -821,8 +842,25 @@ arch_initcall(customize_machine); static int __init init_machine_late(void) { + struct device_node *root; + int ret; + if (machine_desc->init_late) machine_desc->init_late(); + + root = of_find_node_by_path("/"); + if (root) { + ret = of_property_read_string(root, "serial-number", + &system_serial); + if (ret) + system_serial = NULL; + } + + if (!system_serial) + system_serial = kasprintf(GFP_KERNEL, "%08x%08x", + system_serial_high, + system_serial_low); + return 0; } late_initcall(init_machine_late); @@ -918,7 +956,9 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); - early_paging_init(mdesc, lookup_processor_type(read_cpuid_id())); +#ifdef CONFIG_MMU + early_paging_init(mdesc); +#endif setup_dma_zone(mdesc); sanity_check_meminfo(); arm_memblock_init(mdesc); @@ -933,6 +973,7 @@ void __init setup_arch(char **cmdline_p) arm_dt_init_cpu_maps(); psci_init(); + xen_early_init(); #ifdef CONFIG_SMP if (is_smp()) { if (!mdesc->smp_init || !mdesc->smp_init()) { @@ -1091,8 +1132,7 @@ static int c_show(struct seq_file *m, void *v) seq_printf(m, "Hardware\t: %s\n", machine_name); seq_printf(m, "Revision\t: %04x\n", system_rev); - seq_printf(m, "Serial\t\t: %08x%08x\n", - system_serial_high, system_serial_low); + seq_printf(m, "Serial\t\t: %s\n", system_serial); return 0; } diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 023ac905e4c3..423663e23791 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -318,17 +318,6 @@ get_sigframe(struct ksignal *ksig, struct pt_regs *regs, int framesize) return frame; } -/* - * translate the signal - */ -static inline int map_sig(int sig) -{ - struct thread_info *thread = current_thread_info(); - if (sig < 32 && thread->exec_domain && thread->exec_domain->signal_invmap) - sig = thread->exec_domain->signal_invmap[sig]; - return sig; -} - static int setup_return(struct pt_regs *regs, struct ksignal *ksig, unsigned long __user *rc, void __user *frame) @@ -412,7 +401,7 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig, } } - regs->ARM_r0 = map_sig(ksig->sig); + regs->ARM_r0 = ksig->sig; regs->ARM_sp = (unsigned long)frame; regs->ARM_lr = retcode; regs->ARM_pc = handler; diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index e1e60e5a7a27..0f6c1000582c 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -81,7 +81,7 @@ ENTRY(__cpu_suspend) mov r1, r4 @ size of save block add r0, sp, #8 @ pointer to save block bl __cpu_suspend_save - adr lr, BSYM(cpu_suspend_abort) + badr lr, cpu_suspend_abort ldmfd sp!, {r0, pc} @ call suspend fn ENDPROC(__cpu_suspend) .ltorg @@ -116,15 +116,18 @@ cpu_resume_after_mmu: ldmfd sp!, {r4 - r11, pc} ENDPROC(cpu_resume_after_mmu) -/* - * Note: Yes, part of the following code is located into the .data section. - * This is to allow sleep_save_sp to be accessed with a relative load - * while we can't rely on any MMU translation. We could have put - * sleep_save_sp in the .text section as well, but some setups might - * insist on it to be truly read-only. - */ - .data + .text .align + +#ifdef CONFIG_MMU + .arm +ENTRY(cpu_resume_arm) + THUMB( badr r9, 1f ) @ Kernel is entered in ARM. + THUMB( bx r9 ) @ If this is a Thumb-2 kernel, + THUMB( .thumb ) @ switch to Thumb now. + THUMB(1: ) +#endif + ENTRY(cpu_resume) ARM_BE8(setend be) @ ensure we are in BE mode #ifdef CONFIG_ARM_VIRT_EXT @@ -145,6 +148,8 @@ ARM_BE8(setend be) @ ensure we are in BE mode compute_mpidr_hash r1, r4, r5, r6, r0, r3 1: adr r0, _sleep_save_sp + ldr r2, [r0] + add r0, r0, r2 ldr r0, [r0, #SLEEP_SAVE_SP_PHYS] ldr r0, [r0, r1, lsl #2] @@ -155,11 +160,17 @@ THUMB( mov sp, r2 ) THUMB( bx r3 ) ENDPROC(cpu_resume) +#ifdef CONFIG_MMU +ENDPROC(cpu_resume_arm) +#endif + .align 2 +_sleep_save_sp: + .long sleep_save_sp - . mpidr_hash_ptr: .long mpidr_hash - . @ mpidr_hash struct offset + .data .type sleep_save_sp, #object ENTRY(sleep_save_sp) -_sleep_save_sp: .space SLEEP_SAVE_SP_SZ @ struct sleep_save_sp diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 86ef244c5a24..3d6b7821cff8 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -86,9 +86,11 @@ void __init smp_set_ops(struct smp_operations *ops) static unsigned long get_arch_pgd(pgd_t *pgd) { - phys_addr_t pgdir = virt_to_idmap(pgd); - BUG_ON(pgdir & ARCH_PGD_MASK); - return pgdir >> ARCH_PGD_SHIFT; +#ifdef CONFIG_ARM_LPAE + return __phys_to_pfn(virt_to_phys(pgd)); +#else + return virt_to_phys(pgd); +#endif } int __cpu_up(unsigned int cpu, struct task_struct *idle) @@ -108,7 +110,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) #endif #ifdef CONFIG_MMU - secondary_data.pgdir = get_arch_pgd(idmap_pgd); + secondary_data.pgdir = virt_to_phys(idmap_pgd); secondary_data.swapper_pg_dir = get_arch_pgd(swapper_pg_dir); #endif sync_cache_w(&secondary_data); @@ -145,6 +147,11 @@ void __init smp_init_cpus(void) smp_ops.smp_init_cpus(); } +int platform_can_secondary_boot(void) +{ + return !!smp_ops.smp_boot_secondary; +} + int platform_can_cpu_hotplug(void) { #ifdef CONFIG_HOTPLUG_CPU @@ -571,7 +578,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); if ((unsigned)ipinr < NR_IPI) { - trace_ipi_entry(ipi_types[ipinr]); + trace_ipi_entry_rcuidle(ipi_types[ipinr]); __inc_irq_stat(cpu, ipi_irqs[ipinr]); } @@ -630,7 +637,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) } if ((unsigned)ipinr < NR_IPI) - trace_ipi_exit(ipi_types[ipinr]); + trace_ipi_exit_rcuidle(ipi_types[ipinr]); set_irq_regs(old_regs); } diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c index afdd51e30bec..1361756782c7 100644 --- a/arch/arm/kernel/swp_emulate.c +++ b/arch/arm/kernel/swp_emulate.c @@ -42,7 +42,7 @@ " cmp %0, #0\n" \ " movne %0, %4\n" \ "2:\n" \ - " .section .fixup,\"ax\"\n" \ + " .section .text.fixup,\"ax\"\n" \ " .align 2\n" \ "3: mov %0, %5\n" \ " b 2b\n" \ diff --git a/arch/arm/kernel/tcm.c b/arch/arm/kernel/tcm.c index 7a3be1d4d0b1..b10e1360762e 100644 --- a/arch/arm/kernel/tcm.c +++ b/arch/arm/kernel/tcm.c @@ -17,6 +17,9 @@ #include <asm/mach/map.h> #include <asm/memory.h> #include <asm/system_info.h> +#include <asm/traps.h> + +#define TCMTR_FORMAT_MASK 0xe0000000U static struct gen_pool *tcm_pool; static bool dtcm_present; @@ -176,6 +179,77 @@ static int __init setup_tcm_bank(u8 type, u8 bank, u8 banks, } /* + * When we are running in the non-secure world and the secure world + * has not explicitly given us access to the TCM we will get an + * undefined error when reading the TCM region register in the + * setup_tcm_bank function (above). + * + * There are two variants of this register read that we need to trap, + * the read for the data TCM and the read for the instruction TCM: + * c0370628: ee196f11 mrc 15, 0, r6, cr9, cr1, {0} + * c0370674: ee196f31 mrc 15, 0, r6, cr9, cr1, {1} + * + * Our undef hook mask explicitly matches all fields of the encoded + * instruction other than the destination register. The mask also + * only allows operand 2 to have the values 0 or 1. + * + * The undefined hook is defined as __init and __initdata, and therefore + * must be removed before tcm_init returns. + * + * In this particular case (MRC with ARM condition code ALways) the + * Thumb-2 and ARM instruction encoding are identical, so this hook + * will work on a Thumb-2 kernel. + * + * See A8.8.107, DDI0406C_C ARM Architecture Reference Manual, Encoding + * T1/A1 for the bit-by-bit details. + * + * mrc p15, 0, XX, c9, c1, 0 + * mrc p15, 0, XX, c9, c1, 1 + * | | | | | | | +---- opc2 0|1 = 000|001 + * | | | | | | +------- CRm 0 = 0001 + * | | | | | +----------- CRn 0 = 1001 + * | | | | +--------------- Rt ? = ???? + * | | | +------------------- opc1 0 = 000 + * | | +----------------------- coproc 15 = 1111 + * | +-------------------------- condition ALways = 1110 + * +----------------------------- instruction MRC = 1110 + * + * Encoding this as per A8.8.107 of DDI0406C, Encoding T1/A1, yields: + * 1111 1111 1111 1111 0000 1111 1101 1111 Required Mask + * 1110 1110 0001 1001 ???? 1111 0001 0001 mrc p15, 0, XX, c9, c1, 0 + * 1110 1110 0001 1001 ???? 1111 0011 0001 mrc p15, 0, XX, c9, c1, 1 + * [ ] [ ] [ ]| [ ] [ ] [ ] [ ]| +--- CRm + * | | | | | | | | +----- SBO + * | | | | | | | +------- opc2 + * | | | | | | +----------- coproc + * | | | | | +---------------- Rt + * | | | | +--------------------- CRn + * | | | +------------------------- SBO + * | | +--------------------------- opc1 + * | +------------------------------- instruction + * +------------------------------------ condition + */ +#define TCM_REGION_READ_MASK 0xffff0fdf +#define TCM_REGION_READ_INSTR 0xee190f11 +#define DEST_REG_SHIFT 12 +#define DEST_REG_MASK 0xf + +static int __init tcm_handler(struct pt_regs *regs, unsigned int instr) +{ + regs->uregs[(instr >> DEST_REG_SHIFT) & DEST_REG_MASK] = 0; + regs->ARM_pc += 4; + return 0; +} + +static struct undef_hook tcm_hook __initdata = { + .instr_mask = TCM_REGION_READ_MASK, + .instr_val = TCM_REGION_READ_INSTR, + .cpsr_mask = MODE_MASK, + .cpsr_val = SVC_MODE, + .fn = tcm_handler +}; + +/* * This initializes the TCM memory */ void __init tcm_init(void) @@ -204,9 +278,18 @@ void __init tcm_init(void) } tcm_status = read_cpuid_tcmstatus(); + + /* + * This code only supports v6-compatible TCMTR implementations. + */ + if (tcm_status & TCMTR_FORMAT_MASK) + return; + dtcm_banks = (tcm_status >> 16) & 0x03; itcm_banks = (tcm_status & 0x03); + register_undef_hook(&tcm_hook); + /* Values greater than 2 for D/ITCM banks are "reserved" */ if (dtcm_banks > 2) dtcm_banks = 0; @@ -218,7 +301,7 @@ void __init tcm_init(void) for (i = 0; i < dtcm_banks; i++) { ret = setup_tcm_bank(0, i, dtcm_banks, &dtcm_end); if (ret) - return; + goto unregister; } /* This means you compiled more code than fits into DTCM */ if (dtcm_code_sz > (dtcm_end - DTCM_OFFSET)) { @@ -227,6 +310,12 @@ void __init tcm_init(void) dtcm_code_sz, (dtcm_end - DTCM_OFFSET)); goto no_dtcm; } + /* + * This means that the DTCM sizes were 0 or the DTCM banks + * were inaccessible due to TrustZone configuration. + */ + if (!(dtcm_end - DTCM_OFFSET)) + goto no_dtcm; dtcm_res.end = dtcm_end - 1; request_resource(&iomem_resource, &dtcm_res); dtcm_iomap[0].length = dtcm_end - DTCM_OFFSET; @@ -250,15 +339,21 @@ no_dtcm: for (i = 0; i < itcm_banks; i++) { ret = setup_tcm_bank(1, i, itcm_banks, &itcm_end); if (ret) - return; + goto unregister; } /* This means you compiled more code than fits into ITCM */ if (itcm_code_sz > (itcm_end - ITCM_OFFSET)) { pr_info("CPU ITCM: %u bytes of code compiled to " "ITCM but only %lu bytes of ITCM present\n", itcm_code_sz, (itcm_end - ITCM_OFFSET)); - return; + goto unregister; } + /* + * This means that the ITCM sizes were 0 or the ITCM banks + * were inaccessible due to TrustZone configuration. + */ + if (!(itcm_end - ITCM_OFFSET)) + goto unregister; itcm_res.end = itcm_end - 1; request_resource(&iomem_resource, &itcm_res); itcm_iomap[0].length = itcm_end - ITCM_OFFSET; @@ -275,6 +370,9 @@ no_dtcm: pr_info("CPU ITCM: %u bytes of code compiled to ITCM but no " "ITCM banks present in CPU\n", itcm_code_sz); } + +unregister: + unregister_undef_hook(&tcm_hook); } /* diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index 0cc7e58c47cc..a66e37e211a9 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -76,7 +76,7 @@ void timer_tick(void) } #endif -static void dummy_clock_access(struct timespec *ts) +static void dummy_clock_access(struct timespec64 *ts) { ts->tv_sec = 0; ts->tv_nsec = 0; @@ -85,12 +85,12 @@ static void dummy_clock_access(struct timespec *ts) static clock_access_fn __read_persistent_clock = dummy_clock_access; static clock_access_fn __read_boot_clock = dummy_clock_access;; -void read_persistent_clock(struct timespec *ts) +void read_persistent_clock64(struct timespec64 *ts) { __read_persistent_clock(ts); } -void read_boot_clock(struct timespec *ts) +void read_boot_clock64(struct timespec64 *ts) { __read_boot_clock(ts); } diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 788e23fe64d8..d358226236f2 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -505,12 +505,10 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason) static int bad_syscall(int n, struct pt_regs *regs) { - struct thread_info *thread = current_thread_info(); siginfo_t info; - if ((current->personality & PER_MASK) != PER_LINUX && - thread->exec_domain->handler) { - thread->exec_domain->handler(n, regs); + if ((current->personality & PER_MASK) != PER_LINUX) { + send_sig(SIGSEGV, current, 1); return regs->ARM_r0; } @@ -751,14 +749,6 @@ late_initcall(arm_mrc_hook_init); #endif -void __bad_xchg(volatile void *ptr, int size) -{ - pr_err("xchg: bad data size: pc 0x%p, ptr 0x%p, size %d\n", - __builtin_return_address(0), ptr, size); - BUG(); -} -EXPORT_SYMBOL(__bad_xchg); - /* * A data abort trap was taken, but we did not handle the instruction. * Try to abort the user program, or panic if it was the kernel. diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c new file mode 100644 index 000000000000..efe17dd9b921 --- /dev/null +++ b/arch/arm/kernel/vdso.c @@ -0,0 +1,337 @@ +/* + * Adapted from arm64 version. + * + * Copyright (C) 2012 ARM Limited + * Copyright (C) 2015 Mentor Graphics Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/elf.h> +#include <linux/err.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/of.h> +#include <linux/printk.h> +#include <linux/slab.h> +#include <linux/timekeeper_internal.h> +#include <linux/vmalloc.h> +#include <asm/arch_timer.h> +#include <asm/barrier.h> +#include <asm/cacheflush.h> +#include <asm/page.h> +#include <asm/vdso.h> +#include <asm/vdso_datapage.h> +#include <clocksource/arm_arch_timer.h> + +#define MAX_SYMNAME 64 + +static struct page **vdso_text_pagelist; + +/* Total number of pages needed for the data and text portions of the VDSO. */ +unsigned int vdso_total_pages __read_mostly; + +/* + * The VDSO data page. + */ +static union vdso_data_store vdso_data_store __page_aligned_data; +static struct vdso_data *vdso_data = &vdso_data_store.data; + +static struct page *vdso_data_page; +static struct vm_special_mapping vdso_data_mapping = { + .name = "[vvar]", + .pages = &vdso_data_page, +}; + +static struct vm_special_mapping vdso_text_mapping = { + .name = "[vdso]", +}; + +struct elfinfo { + Elf32_Ehdr *hdr; /* ptr to ELF */ + Elf32_Sym *dynsym; /* ptr to .dynsym section */ + unsigned long dynsymsize; /* size of .dynsym section */ + char *dynstr; /* ptr to .dynstr section */ +}; + +/* Cached result of boot-time check for whether the arch timer exists, + * and if so, whether the virtual counter is useable. + */ +static bool cntvct_ok __read_mostly; + +static bool __init cntvct_functional(void) +{ + struct device_node *np; + bool ret = false; + + if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER)) + goto out; + + /* The arm_arch_timer core should export + * arch_timer_use_virtual or similar so we don't have to do + * this. + */ + np = of_find_compatible_node(NULL, NULL, "arm,armv7-timer"); + if (!np) + goto out_put; + + if (of_property_read_bool(np, "arm,cpu-registers-not-fw-configured")) + goto out_put; + + ret = true; + +out_put: + of_node_put(np); +out: + return ret; +} + +static void * __init find_section(Elf32_Ehdr *ehdr, const char *name, + unsigned long *size) +{ + Elf32_Shdr *sechdrs; + unsigned int i; + char *secnames; + + /* Grab section headers and strings so we can tell who is who */ + sechdrs = (void *)ehdr + ehdr->e_shoff; + secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset; + + /* Find the section they want */ + for (i = 1; i < ehdr->e_shnum; i++) { + if (strcmp(secnames + sechdrs[i].sh_name, name) == 0) { + if (size) + *size = sechdrs[i].sh_size; + return (void *)ehdr + sechdrs[i].sh_offset; + } + } + + if (size) + *size = 0; + return NULL; +} + +static Elf32_Sym * __init find_symbol(struct elfinfo *lib, const char *symname) +{ + unsigned int i; + + for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) { + char name[MAX_SYMNAME], *c; + + if (lib->dynsym[i].st_name == 0) + continue; + strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, + MAX_SYMNAME); + c = strchr(name, '@'); + if (c) + *c = 0; + if (strcmp(symname, name) == 0) + return &lib->dynsym[i]; + } + return NULL; +} + +static void __init vdso_nullpatch_one(struct elfinfo *lib, const char *symname) +{ + Elf32_Sym *sym; + + sym = find_symbol(lib, symname); + if (!sym) + return; + + sym->st_name = 0; +} + +static void __init patch_vdso(void *ehdr) +{ + struct elfinfo einfo; + + einfo = (struct elfinfo) { + .hdr = ehdr, + }; + + einfo.dynsym = find_section(einfo.hdr, ".dynsym", &einfo.dynsymsize); + einfo.dynstr = find_section(einfo.hdr, ".dynstr", NULL); + + /* If the virtual counter is absent or non-functional we don't + * want programs to incur the slight additional overhead of + * dispatching through the VDSO only to fall back to syscalls. + */ + if (!cntvct_ok) { + vdso_nullpatch_one(&einfo, "__vdso_gettimeofday"); + vdso_nullpatch_one(&einfo, "__vdso_clock_gettime"); + } +} + +static int __init vdso_init(void) +{ + unsigned int text_pages; + int i; + + if (memcmp(&vdso_start, "\177ELF", 4)) { + pr_err("VDSO is not a valid ELF object!\n"); + return -ENOEXEC; + } + + text_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT; + pr_debug("vdso: %i text pages at base %p\n", text_pages, &vdso_start); + + /* Allocate the VDSO text pagelist */ + vdso_text_pagelist = kcalloc(text_pages, sizeof(struct page *), + GFP_KERNEL); + if (vdso_text_pagelist == NULL) + return -ENOMEM; + + /* Grab the VDSO data page. */ + vdso_data_page = virt_to_page(vdso_data); + + /* Grab the VDSO text pages. */ + for (i = 0; i < text_pages; i++) { + struct page *page; + + page = virt_to_page(&vdso_start + i * PAGE_SIZE); + vdso_text_pagelist[i] = page; + } + + vdso_text_mapping.pages = vdso_text_pagelist; + + vdso_total_pages = 1; /* for the data/vvar page */ + vdso_total_pages += text_pages; + + cntvct_ok = cntvct_functional(); + + patch_vdso(&vdso_start); + + return 0; +} +arch_initcall(vdso_init); + +static int install_vvar(struct mm_struct *mm, unsigned long addr) +{ + struct vm_area_struct *vma; + + vma = _install_special_mapping(mm, addr, PAGE_SIZE, + VM_READ | VM_MAYREAD, + &vdso_data_mapping); + + return IS_ERR(vma) ? PTR_ERR(vma) : 0; +} + +/* assumes mmap_sem is write-locked */ +void arm_install_vdso(struct mm_struct *mm, unsigned long addr) +{ + struct vm_area_struct *vma; + unsigned long len; + + mm->context.vdso = 0; + + if (vdso_text_pagelist == NULL) + return; + + if (install_vvar(mm, addr)) + return; + + /* Account for vvar page. */ + addr += PAGE_SIZE; + len = (vdso_total_pages - 1) << PAGE_SHIFT; + + vma = _install_special_mapping(mm, addr, len, + VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, + &vdso_text_mapping); + + if (!IS_ERR(vma)) + mm->context.vdso = addr; +} + +static void vdso_write_begin(struct vdso_data *vdata) +{ + ++vdso_data->seq_count; + smp_wmb(); /* Pairs with smp_rmb in vdso_read_retry */ +} + +static void vdso_write_end(struct vdso_data *vdata) +{ + smp_wmb(); /* Pairs with smp_rmb in vdso_read_begin */ + ++vdso_data->seq_count; +} + +static bool tk_is_cntvct(const struct timekeeper *tk) +{ + if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER)) + return false; + + if (strcmp(tk->tkr_mono.clock->name, "arch_sys_counter") != 0) + return false; + + return true; +} + +/** + * update_vsyscall - update the vdso data page + * + * Increment the sequence counter, making it odd, indicating to + * userspace that an update is in progress. Update the fields used + * for coarse clocks and, if the architected system timer is in use, + * the fields used for high precision clocks. Increment the sequence + * counter again, making it even, indicating to userspace that the + * update is finished. + * + * Userspace is expected to sample seq_count before reading any other + * fields from the data page. If seq_count is odd, userspace is + * expected to wait until it becomes even. After copying data from + * the page, userspace must sample seq_count again; if it has changed + * from its previous value, userspace must retry the whole sequence. + * + * Calls to update_vsyscall are serialized by the timekeeping core. + */ +void update_vsyscall(struct timekeeper *tk) +{ + struct timespec xtime_coarse; + struct timespec64 *wtm = &tk->wall_to_monotonic; + + if (!cntvct_ok) { + /* The entry points have been zeroed, so there is no + * point in updating the data page. + */ + return; + } + + vdso_write_begin(vdso_data); + + xtime_coarse = __current_kernel_time(); + vdso_data->tk_is_cntvct = tk_is_cntvct(tk); + vdso_data->xtime_coarse_sec = xtime_coarse.tv_sec; + vdso_data->xtime_coarse_nsec = xtime_coarse.tv_nsec; + vdso_data->wtm_clock_sec = wtm->tv_sec; + vdso_data->wtm_clock_nsec = wtm->tv_nsec; + + if (vdso_data->tk_is_cntvct) { + vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; + vdso_data->xtime_clock_sec = tk->xtime_sec; + vdso_data->xtime_clock_snsec = tk->tkr_mono.xtime_nsec; + vdso_data->cs_mult = tk->tkr_mono.mult; + vdso_data->cs_shift = tk->tkr_mono.shift; + vdso_data->cs_mask = tk->tkr_mono.mask; + } + + vdso_write_end(vdso_data); + + flush_dcache_page(virt_to_page(vdso_data)); +} + +void update_vsyscall_tz(void) +{ + vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; + vdso_data->tz_dsttime = sys_tz.tz_dsttime; + flush_dcache_page(virt_to_page(vdso_data)); +} diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index b31aa73e8076..8b60fde5ce48 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -11,7 +11,7 @@ #ifdef CONFIG_ARM_KERNMEM_PERMS #include <asm/pgtable.h> #endif - + #define PROC_INFO \ . = ALIGN(4); \ VMLINUX_SYMBOL(__proc_info_begin) = .; \ @@ -23,7 +23,7 @@ VMLINUX_SYMBOL(__idmap_text_start) = .; \ *(.idmap.text) \ VMLINUX_SYMBOL(__idmap_text_end) = .; \ - . = ALIGN(32); \ + . = ALIGN(PAGE_SIZE); \ VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \ *(.hyp.idmap.text) \ VMLINUX_SYMBOL(__hyp_idmap_text_end) = .; @@ -74,7 +74,7 @@ SECTIONS ARM_EXIT_DISCARD(EXIT_DATA) EXIT_CALL #ifndef CONFIG_MMU - *(.fixup) + *(.text.fixup) *(__ex_table) #endif #ifndef CONFIG_SMP_ON_UP @@ -100,6 +100,7 @@ SECTIONS .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ + IDMAP_TEXT __exception_text_start = .; *(.exception.text) __exception_text_end = .; @@ -108,10 +109,6 @@ SECTIONS SCHED_TEXT LOCK_TEXT KPROBES_TEXT - IDMAP_TEXT -#ifdef CONFIG_MMU - *(.fixup) -#endif *(.gnu.warning) *(.glue_7) *(.glue_7t) @@ -346,8 +343,11 @@ SECTIONS */ ASSERT((__proc_info_end - __proc_info_begin), "missing CPU support") ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined") + /* - * The HYP init code can't be more than a page long. + * The HYP init code can't be more than a page long, + * and should not cross a page boundary. * The above comment applies as well. */ -ASSERT(((__hyp_idmap_text_end - __hyp_idmap_text_start) <= PAGE_SIZE), "HYP init code too big") +ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & PAGE_MASK) <= PAGE_SIZE, + "HYP init code too big or misaligned") |