40 files changed, 2195 insertions, 963 deletions
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 902397dd1000..e69f7a19735d 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -16,7 +16,7 @@ CFLAGS_REMOVE_return_address.o = -pg
 # Object file lists.
 
 obj-y		:= elf.o entry-common.o irq.o opcodes.o \
-		   process.o ptrace.o return_address.o \
+		   process.o ptrace.o reboot.o return_address.o \
 		   setup.o signal.o sigreturn_codes.o \
 		   stacktrace.o sys_arm.o time.o traps.o
 
@@ -34,7 +34,7 @@ obj-$(CONFIG_CPU_IDLE)		+= cpuidle.o
 obj-$(CONFIG_ISA_DMA_API)	+= dma.o
 obj-$(CONFIG_FIQ)		+= fiq.o fiqasm.o
 obj-$(CONFIG_MODULES)		+= armksyms.o module.o
-obj-$(CONFIG_ARTHUR)		+= arthur.o
+obj-$(CONFIG_ARM_MODULE_PLTS)	+= module-plts.o
 obj-$(CONFIG_ISA_DMA)		+= dma-isa.o
 obj-$(CONFIG_PCI)		+= bios32.o isa.o
 obj-$(CONFIG_ARM_CPU_SUSPEND)	+= sleep.o suspend.o
@@ -71,10 +71,13 @@ obj-$(CONFIG_CPU_PJ4)		+= pj4-cp0.o
 obj-$(CONFIG_CPU_PJ4B)		+= pj4-cp0.o
 obj-$(CONFIG_IWMMXT)		+= iwmmxt.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_regs.o perf_callchain.o
-obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o perf_event_cpu.o
+obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o \
+				   perf_event_xscale.o perf_event_v6.o \
+				   perf_event_v7.o
 CFLAGS_pj4-cp0.o		:= -marm
 AFLAGS_iwmmxt.o			:= -Wa,-mcpu=iwmmxt
 obj-$(CONFIG_ARM_CPU_TOPOLOGY)  += topology.o
+obj-$(CONFIG_VDSO)		+= vdso.o
 
 ifneq ($(CONFIG_ARCH_EBSA110),y)
   obj-y		+= io.o
@@ -86,7 +89,7 @@ obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 
 obj-$(CONFIG_ARM_VIRT_EXT)	+= hyp-stub.o
 ifeq ($(CONFIG_ARM_PSCI),y)
-obj-y				+= psci.o
+obj-y				+= psci.o psci-call.o
 obj-$(CONFIG_SMP)		+= psci_smp.o
 endif
 
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index a88671cfe1ff..5e5a51a99e68 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -50,6 +50,9 @@ extern void __aeabi_ulcmp(void);
 
 extern void fpundefinstr(void);
 
+void mmioset(void *, unsigned int, size_t);
+void mmiocpy(void *, const void *, size_t);
+
 	/* platform dependent support */
 EXPORT_SYMBOL(arm_delay_ops);
 
@@ -88,6 +91,9 @@ EXPORT_SYMBOL(memmove);
 EXPORT_SYMBOL(memchr);
 EXPORT_SYMBOL(__memzero);
 
+EXPORT_SYMBOL(mmioset);
+EXPORT_SYMBOL(mmiocpy);
+
 #ifdef CONFIG_MMU
 EXPORT_SYMBOL(copy_page);
 
diff --git a/arch/arm/kernel/arthur.c b/arch/arm/kernel/arthur.c
deleted file mode 100644
index 321c5291d05f..000000000000
--- a/arch/arm/kernel/arthur.c
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- *  linux/arch/arm/kernel/arthur.c
- *
- *  Copyright (C) 1998, 1999, 2000, 2001 Philip Blundell
- *
- * Arthur personality
- */
-
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/personality.h>
-#include <linux/stddef.h>
-#include <linux/signal.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-
-#include <asm/ptrace.h>
-
-/* Arthur doesn't have many signals, and a lot of those that it does
-   have don't map easily to any Linux equivalent.  Never mind.  */
-
-#define ARTHUR_SIGABRT		1
-#define ARTHUR_SIGFPE		2
-#define ARTHUR_SIGILL		3
-#define ARTHUR_SIGINT		4
-#define ARTHUR_SIGSEGV		5
-#define ARTHUR_SIGTERM		6
-#define ARTHUR_SIGSTAK		7
-#define ARTHUR_SIGUSR1		8
-#define ARTHUR_SIGUSR2		9
-#define ARTHUR_SIGOSERROR	10
-
-static unsigned long arthur_to_linux_signals[32] = {
-	0,	1,	2,	3,	4,	5,	6,	7,
-	8,	9,	10,	11,	12,	13,	14,	15,
-	16,	17,	18,	19,	20,	21,	22,	23,
-	24,	25,	26,	27,	28,	29,	30,	31
-};
-
-static unsigned long linux_to_arthur_signals[32] = {
-	0,		-1,		ARTHUR_SIGINT,	-1,
-       	ARTHUR_SIGILL,	5,		ARTHUR_SIGABRT,	7,
-	ARTHUR_SIGFPE,	9,		ARTHUR_SIGUSR1,	ARTHUR_SIGSEGV,	
-	ARTHUR_SIGUSR2,	13,		14,		ARTHUR_SIGTERM,
-	16,		17,		18,		19,
-	20,		21,		22,		23,
-	24,		25,		26,		27,
-	28,		29,		30,		31
-};
-
-static void arthur_lcall7(int nr, struct pt_regs *regs)
-{
-	struct siginfo info;
-	info.si_signo = SIGSWI;
-	info.si_errno = nr;
-	/* Bounce it to the emulator */
-	send_sig_info(SIGSWI, &info, current);
-}
-
-static struct exec_domain arthur_exec_domain = {
-	.name		= "Arthur",
-	.handler	= arthur_lcall7,
-	.pers_low	= PER_RISCOS,
-	.pers_high	= PER_RISCOS,
-	.signal_map	= arthur_to_linux_signals,
-	.signal_invmap	= linux_to_arthur_signals,
-	.module		= THIS_MODULE,
-};
-
-/*
- * We could do with some locking to stop Arthur being removed while
- * processes are using it.
- */
-
-static int __init arthur_init(void)
-{
-	return register_exec_domain(&arthur_exec_domain);
-}
-
-static void __exit arthur_exit(void)
-{
-	unregister_exec_domain(&arthur_exec_domain);
-}
-
-module_init(arthur_init);
-module_exit(arthur_exit);
-
-MODULE_LICENSE("GPL");
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 2d2d6087b9b1..871b8267d211 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -25,6 +25,7 @@
 #include <asm/memory.h>
 #include <asm/procinfo.h>
 #include <asm/suspend.h>
+#include <asm/vdso_datapage.h>
 #include <asm/hardware/cache-l2x0.h>
 #include <linux/kbuild.h>
 
@@ -66,7 +67,6 @@ int main(void)
   DEFINE(TI_PREEMPT,		offsetof(struct thread_info, preempt_count));
   DEFINE(TI_ADDR_LIMIT,		offsetof(struct thread_info, addr_limit));
   DEFINE(TI_TASK,		offsetof(struct thread_info, task));
-  DEFINE(TI_EXEC_DOMAIN,	offsetof(struct thread_info, exec_domain));
   DEFINE(TI_CPU,		offsetof(struct thread_info, cpu));
   DEFINE(TI_CPU_DOMAIN,		offsetof(struct thread_info, cpu_domain));
   DEFINE(TI_CPU_SAVE,		offsetof(struct thread_info, cpu_context));
@@ -190,7 +190,6 @@ int main(void)
   DEFINE(VCPU_HxFAR,		offsetof(struct kvm_vcpu, arch.fault.hxfar));
   DEFINE(VCPU_HPFAR,		offsetof(struct kvm_vcpu, arch.fault.hpfar));
   DEFINE(VCPU_HYP_PC,		offsetof(struct kvm_vcpu, arch.fault.hyp_pc));
-#ifdef CONFIG_KVM_ARM_VGIC
   DEFINE(VCPU_VGIC_CPU,		offsetof(struct kvm_vcpu, arch.vgic_cpu));
   DEFINE(VGIC_V2_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
   DEFINE(VGIC_V2_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
@@ -200,15 +199,16 @@ int main(void)
   DEFINE(VGIC_V2_CPU_APR,	offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
   DEFINE(VGIC_V2_CPU_LR,	offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
   DEFINE(VGIC_CPU_NR_LR,	offsetof(struct vgic_cpu, nr_lr));
-#ifdef CONFIG_KVM_ARM_TIMER
   DEFINE(VCPU_TIMER_CNTV_CTL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
   DEFINE(VCPU_TIMER_CNTV_CVAL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval));
   DEFINE(KVM_TIMER_CNTVOFF,	offsetof(struct kvm, arch.timer.cntvoff));
   DEFINE(KVM_TIMER_ENABLED,	offsetof(struct kvm, arch.timer.enabled));
-#endif
   DEFINE(KVM_VGIC_VCTRL,	offsetof(struct kvm, arch.vgic.vctrl_base));
-#endif
   DEFINE(KVM_VTTBR,		offsetof(struct kvm, arch.vttbr));
 #endif
+  BLANK();
+#ifdef CONFIG_VDSO
+  DEFINE(VDSO_DATA_SIZE,	sizeof(union vdso_data_store));
+#endif
   return 0; 
 }
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index ab19b7c03423..fcbbbb1b9e95 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -618,21 +618,15 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
 int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 			enum pci_mmap_state mmap_state, int write_combine)
 {
-	struct pci_sys_data *root = dev->sysdata;
-	unsigned long phys;
-
-	if (mmap_state == pci_mmap_io) {
+	if (mmap_state == pci_mmap_io)
 		return -EINVAL;
-	} else {
-		phys = vma->vm_pgoff + (root->mem_offset >> PAGE_SHIFT);
-	}
 
 	/*
 	 * Mark this as IO
 	 */
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
-	if (remap_pfn_range(vma, vma->vm_start, phys,
+	if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
 			     vma->vm_end - vma->vm_start,
 			     vma->vm_page_prot))
 		return -EAGAIN;
diff --git a/arch/arm/kernel/cpuidle.c b/arch/arm/kernel/cpuidle.c
index 89545f6c8403..318da33465f4 100644
--- a/arch/arm/kernel/cpuidle.c
+++ b/arch/arm/kernel/cpuidle.c
@@ -10,8 +10,28 @@
  */
 
 #include <linux/cpuidle.h>
-#include <asm/proc-fns.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <asm/cpuidle.h>
 
+extern struct of_cpuidle_method __cpuidle_method_of_table[];
+
+static const struct of_cpuidle_method __cpuidle_method_of_table_sentinel
+	__used __section(__cpuidle_method_of_table_end);
+
+static struct cpuidle_ops cpuidle_ops[NR_CPUS];
+
+/**
+ * arm_cpuidle_simple_enter() - a wrapper to cpu_do_idle()
+ * @dev: not used
+ * @drv: not used
+ * @index: not used
+ *
+ * A trivial wrapper to allow the cpu_do_idle function to be assigned as a
+ * cpuidle callback by matching the function signature.
+ *
+ * Returns the index passed as parameter
+ */
 int arm_cpuidle_simple_enter(struct cpuidle_device *dev,
 		struct cpuidle_driver *drv, int index)
 {
@@ -19,3 +39,114 @@ int arm_cpuidle_simple_enter(struct cpuidle_device *dev,
 
 	return index;
 }
+
+/**
+ * arm_cpuidle_suspend() - function to enter low power idle states
+ * @index: an integer used as an identifier for the low level PM callbacks
+ *
+ * This function calls the underlying arch specific low level PM code as
+ * registered at the init time.
+ *
+ * Returns -EOPNOTSUPP if no suspend callback is defined, the result of the
+ * callback otherwise.
+ */
+int arm_cpuidle_suspend(int index)
+{
+	int ret = -EOPNOTSUPP;
+	int cpu = smp_processor_id();
+
+	if (cpuidle_ops[cpu].suspend)
+		ret = cpuidle_ops[cpu].suspend(cpu, index);
+
+	return ret;
+}
+
+/**
+ * arm_cpuidle_get_ops() - find a registered cpuidle_ops by name
+ * @method: the method name
+ *
+ * Search in the __cpuidle_method_of_table array the cpuidle ops matching the
+ * method name.
+ *
+ * Returns a struct cpuidle_ops pointer, NULL if not found.
+ */
+static struct cpuidle_ops *__init arm_cpuidle_get_ops(const char *method)
+{
+	struct of_cpuidle_method *m = __cpuidle_method_of_table;
+
+	for (; m->method; m++)
+		if (!strcmp(m->method, method))
+			return m->ops;
+
+	return NULL;
+}
+
+/**
+ * arm_cpuidle_read_ops() - Initialize the cpuidle ops with the device tree
+ * @dn: a pointer to a struct device node corresponding to a cpu node
+ * @cpu: the cpu identifier
+ *
+ * Get the method name defined in the 'enable-method' property, retrieve the
+ * associated cpuidle_ops and do a struct copy. This copy is needed because all
+ * cpuidle_ops are tagged __initdata and will be unloaded after the init
+ * process.
+ *
+ * Return 0 on sucess, -ENOENT if no 'enable-method' is defined, -EOPNOTSUPP if
+ * no cpuidle_ops is registered for the 'enable-method'.
+ */
+static int __init arm_cpuidle_read_ops(struct device_node *dn, int cpu)
+{
+	const char *enable_method;
+	struct cpuidle_ops *ops;
+
+	enable_method = of_get_property(dn, "enable-method", NULL);
+	if (!enable_method)
+		return -ENOENT;
+
+	ops = arm_cpuidle_get_ops(enable_method);
+	if (!ops) {
+		pr_warn("%s: unsupported enable-method property: %s\n",
+			dn->full_name, enable_method);
+		return -EOPNOTSUPP;
+	}
+
+	cpuidle_ops[cpu] = *ops; /* structure copy */
+
+	pr_notice("cpuidle: enable-method property '%s'"
+		  " found operations\n", enable_method);
+
+	return 0;
+}
+
+/**
+ * arm_cpuidle_init() - Initialize cpuidle_ops for a specific cpu
+ * @cpu: the cpu to be initialized
+ *
+ * Initialize the cpuidle ops with the device for the cpu and then call
+ * the cpu's idle initialization callback. This may fail if the underlying HW
+ * is not operational.
+ *
+ * Returns:
+ *  0 on success,
+ *  -ENODEV if it fails to find the cpu node in the device tree,
+ *  -EOPNOTSUPP if it does not find a registered cpuidle_ops for this cpu,
+ *  -ENOENT if it fails to find an 'enable-method' property,
+ *  -ENXIO if the HW reports a failure or a misconfiguration,
+ *  -ENOMEM if the HW report an memory allocation failure 
+ */
+int __init arm_cpuidle_init(int cpu)
+{
+	struct device_node *cpu_node = of_cpu_device_node_get(cpu);
+	int ret;
+
+	if (!cpu_node)
+		return -ENODEV;
+
+	ret = arm_cpuidle_read_ops(cpu_node, cpu);
+	if (!ret && cpuidle_ops[cpu].init)
+		ret = cpuidle_ops[cpu].init(cpu_node, cpu);
+
+	of_node_put(cpu_node);
+
+	return ret;
+}
diff --git a/arch/arm/kernel/debug.S b/arch/arm/kernel/debug.S
index 78c91b5f97d4..ea9646cc2a0e 100644
--- a/arch/arm/kernel/debug.S
+++ b/arch/arm/kernel/debug.S
@@ -35,7 +35,7 @@
 
 #else /* !CONFIG_MMU */
 		.macro	addruart_current, rx, tmp1, tmp2
-		addruart	\rx, \tmp1
+		addruart	\rx, \tmp1, \tmp2
 		.endm
 
 #endif /* CONFIG_MMU */
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 672b21942fff..cb4fb1e69778 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -15,6 +15,8 @@
  *  that causes it to save wrong values...  Be aware!
  */
 
+#include <linux/init.h>
+
 #include <asm/assembler.h>
 #include <asm/memory.h>
 #include <asm/glue-df.h>
@@ -40,7 +42,7 @@
 #ifdef CONFIG_MULTI_IRQ_HANDLER
 	ldr	r1, =handle_arch_irq
 	mov	r0, sp
-	adr	lr, BSYM(9997f)
+	badr	lr, 9997f
 	ldr	pc, [r1]
 #else
 	arch_irq_handler_default
@@ -273,7 +275,7 @@ __und_svc:
 	str	r4, [sp, #S_PC]
 	orr	r0, r9, r0, lsl #16
 #endif
-	adr	r9, BSYM(__und_svc_finish)
+	badr	r9, __und_svc_finish
 	mov	r2, r4
 	bl	call_fpe
 
@@ -408,7 +410,7 @@ ENDPROC(__fiq_abt)
 	zero_fp
 
 	.if	\trace
-#ifdef CONFIG_IRQSOFF_TRACER
+#ifdef CONFIG_TRACE_IRQFLAGS
 	bl	trace_hardirqs_off
 #endif
 	ct_user_exit save = 0
@@ -469,7 +471,7 @@ __und_usr:
 	@ instruction, or the more conventional lr if we are to treat
 	@ this as a real undefined instruction
 	@
-	adr	r9, BSYM(ret_from_exception)
+	badr	r9, ret_from_exception
 
 	@ IRQs must be enabled before attempting to read the instruction from
 	@ user space since that could cause a page/translation fault if the
@@ -486,7 +488,7 @@ __und_usr:
 	@ r2 = PC value for the following instruction (:= regs->ARM_pc)
 	@ r4 = PC value for the faulting instruction
 	@ lr = 32-bit undefined instruction function
-	adr	lr, BSYM(__und_usr_fault_32)
+	badr	lr, __und_usr_fault_32
 	b	call_fpe
 
 __und_usr_thumb:
@@ -522,7 +524,7 @@ ARM_BE8(rev16	r0, r0)				@ little endian instruction
 	add	r2, r2, #2			@ r2 is PC + 2, make it PC + 4
 	str	r2, [sp, #S_PC]			@ it's a 2x16bit instr, update
 	orr	r0, r0, r5, lsl #16
-	adr	lr, BSYM(__und_usr_fault_32)
+	badr	lr, __und_usr_fault_32
 	@ r0 = the two 16-bit Thumb instructions which caused the exception
 	@ r2 = PC value for the following Thumb instruction (:= regs->ARM_pc)
 	@ r4 = PC value for the first 16-bit Thumb instruction
@@ -545,7 +547,7 @@ ENDPROC(__und_usr)
 /*
  * The out of line fixup for the ldrt instructions above.
  */
-	.pushsection .fixup, "ax"
+	.pushsection .text.fixup, "ax"
 	.align	2
 4:	str     r4, [sp, #S_PC]			@ retry current instruction
 	ret	r9
@@ -716,7 +718,7 @@ __und_usr_fault_32:
 __und_usr_fault_16:
 	mov	r1, #2
 1:	mov	r0, sp
-	adr	lr, BSYM(ret_from_exception)
+	badr	lr, ret_from_exception
 	b	__und_fault
 ENDPROC(__und_usr_fault_32)
 ENDPROC(__und_usr_fault_16)
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index f8ccc21fa032..92828a1dec80 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -33,7 +33,9 @@ ret_fast_syscall:
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	disable_irq				@ disable interrupts
-	ldr	r1, [tsk, #TI_FLAGS]
+	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
+	tst	r1, #_TIF_SYSCALL_WORK
+	bne	__sys_trace_return
 	tst	r1, #_TIF_WORK_MASK
 	bne	fast_work_pending
 	asm_trace_hardirqs_on
@@ -88,7 +90,7 @@ ENTRY(ret_from_fork)
 	bl	schedule_tail
 	cmp	r5, #0
 	movne	r0, r4
-	adrne	lr, BSYM(1f)
+	badrne	lr, 1f
 	retne	r5
 1:	get_thread_info tsk
 	b	ret_slow_syscall
@@ -196,7 +198,7 @@ local_restart:
 	bne	__sys_trace
 
 	cmp	scno, #NR_syscalls		@ check upper syscall limit
-	adr	lr, BSYM(ret_fast_syscall)	@ return address
+	badr	lr, ret_fast_syscall		@ return address
 	ldrcc	pc, [tbl, scno, lsl #2]		@ call sys_* routine
 
 	add	r1, sp, #S_OFF
@@ -231,7 +233,7 @@ __sys_trace:
 	add	r0, sp, #S_OFF
 	bl	syscall_trace_enter
 
-	adr	lr, BSYM(__sys_trace_return)	@ return address
+	badr	lr, __sys_trace_return		@ return address
 	mov	scno, r0			@ syscall number (possibly new)
 	add	r1, sp, #S_R0 + S_OFF		@ pointer to regs
 	cmp	scno, #NR_syscalls		@ check upper syscall limit
diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S
index fe57c73e70a4..c73c4030ca5d 100644
--- a/arch/arm/kernel/entry-ftrace.S
+++ b/arch/arm/kernel/entry-ftrace.S
@@ -87,7 +87,7 @@
 
 1: 	mcount_get_lr	r1			@ lr of instrumented func
 	mcount_adjust_addr	r0, lr		@ instrumented function
-	adr	lr, BSYM(2f)
+	badr	lr, 2f
 	mov	pc, r2
 2:	mcount_exit
 .endm
diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S
index 8944f4991c3c..b6c8bb9315e7 100644
--- a/arch/arm/kernel/entry-v7m.S
+++ b/arch/arm/kernel/entry-v7m.S
@@ -117,9 +117,14 @@ ENTRY(__switch_to)
 ENDPROC(__switch_to)
 
 	.data
-	.align	8
+#if CONFIG_CPU_V7M_NUM_IRQ <= 112
+	.align	9
+#else
+	.align	10
+#endif
+
 /*
- * Vector table (64 words => 256 bytes natural alignment)
+ * Vector table (Natural alignment need to be ensured)
  */
 ENTRY(vector_table)
 	.long	0			@ 0 - Reset stack pointer
@@ -138,6 +143,6 @@ ENTRY(vector_table)
 	.long	__invalid_entry		@ 13 - Reserved
 	.long	__pendsv_entry		@ 14 - PendSV
 	.long	__invalid_entry		@ 15 - SysTick
-	.rept	64 - 16
-	.long	__irq_entry		@ 16..64 - External Interrupts
+	.rept	CONFIG_CPU_V7M_NUM_IRQ
+	.long	__irq_entry		@ External Interrupts
 	.endr
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index cc176b67c134..9b8c5a113434 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -46,7 +46,7 @@ ENTRY(stext)
 	.arm
 ENTRY(stext)
 
- THUMB(	adr	r9, BSYM(1f)	)	@ Kernel is always entered in ARM.
+ THUMB(	badr	r9, 1f		)	@ Kernel is always entered in ARM.
  THUMB(	bx	r9		)	@ If this is a Thumb-2 kernel,
  THUMB(	.thumb			)	@ switch to Thumb now.
  THUMB(1:			)
@@ -77,13 +77,13 @@ ENTRY(stext)
 	orr	r6, r6, #(1 << MPU_RSR_EN)	@ Set region enabled bit
 	bl	__setup_mpu
 #endif
-	ldr	r13, =__mmap_switched		@ address to jump to after
-						@ initialising sctlr
-	adr	lr, BSYM(1f)			@ return (PIC) address
- ARM(	add	pc, r10, #PROCINFO_INITFUNC	)
- THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
- THUMB(	ret	r12				)
- 1:	b	__after_proc_init
+
+	badr	lr, 1f				@ return (PIC) address
+	ldr	r12, [r10, #PROCINFO_INITFUNC]
+	add	r12, r12, r10
+	ret	r12
+1:	bl	__after_proc_init
+	b	__mmap_switched
 ENDPROC(stext)
 
 #ifdef CONFIG_SMP
@@ -106,8 +106,7 @@ ENTRY(secondary_startup)
 	movs	r10, r5				@ invalid processor?
 	beq	__error_p			@ yes, error 'p'
 
-	adr	r4, __secondary_data
-	ldmia	r4, {r7, r12}
+	ldr	r7, __secondary_data
 
 #ifdef CONFIG_ARM_MPU
 	/* Use MPU region info supplied by __cpu_up */
@@ -115,23 +114,19 @@ ENTRY(secondary_startup)
 	bl      __setup_mpu			@ Initialize the MPU
 #endif
 
-	adr	lr, BSYM(__after_proc_init)	@ return address
-	mov	r13, r12			@ __secondary_switched address
- ARM(	add	pc, r10, #PROCINFO_INITFUNC	)
- THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
- THUMB(	ret	r12				)
-ENDPROC(secondary_startup)
-
-ENTRY(__secondary_switched)
-	ldr	sp, [r7, #8]			@ set up the stack pointer
+	badr	lr, 1f				@ return (PIC) address
+	ldr	r12, [r10, #PROCINFO_INITFUNC]
+	add	r12, r12, r10
+	ret	r12
+1:	bl	__after_proc_init
+	ldr	sp, [r7, #12]			@ set up the stack pointer
 	mov	fp, #0
 	b	secondary_start_kernel
-ENDPROC(__secondary_switched)
+ENDPROC(secondary_startup)
 
 	.type	__secondary_data, %object
 __secondary_data:
 	.long	secondary_data
-	.long	__secondary_switched
 #endif /* CONFIG_SMP */
 
 /*
@@ -164,7 +159,7 @@ __after_proc_init:
 #endif
 	mcr	p15, 0, r0, c1, c0, 0		@ write control reg
 #endif /* CONFIG_CPU_CP15 */
-	ret	r13
+	ret	lr
 ENDPROC(__after_proc_init)
 	.ltorg
 
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 01963273c07a..bd755d97e459 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -80,7 +80,7 @@
 ENTRY(stext)
  ARM_BE8(setend	be )			@ ensure we are in BE8 mode
 
- THUMB(	adr	r9, BSYM(1f)	)	@ Kernel is always entered in ARM.
+ THUMB(	badr	r9, 1f		)	@ Kernel is always entered in ARM.
  THUMB(	bx	r9		)	@ If this is a Thumb-2 kernel,
  THUMB(	.thumb			)	@ switch to Thumb now.
  THUMB(1:			)
@@ -131,16 +131,33 @@ ENTRY(stext)
 	 * The following calls CPU specific code in a position independent
 	 * manner.  See arch/arm/mm/proc-*.S for details.  r10 = base of
 	 * xxx_proc_info structure selected by __lookup_processor_type
-	 * above.  On return, the CPU will be ready for the MMU to be
-	 * turned on, and r0 will hold the CPU control register value.
+	 * above.
+	 *
+	 * The processor init function will be called with:
+	 *  r1 - machine type
+	 *  r2 - boot data (atags/dt) pointer
+	 *  r4 - translation table base (low word)
+	 *  r5 - translation table base (high word, if LPAE)
+	 *  r8 - translation table base 1 (pfn if LPAE)
+	 *  r9 - cpuid
+	 *  r13 - virtual address for __enable_mmu -> __turn_mmu_on
+	 *
+	 * On return, the CPU will be ready for the MMU to be turned on,
+	 * r0 will hold the CPU control register value, r1, r2, r4, and
+	 * r9 will be preserved.  r5 will also be preserved if LPAE.
 	 */
 	ldr	r13, =__mmap_switched		@ address to jump to after
 						@ mmu has been enabled
-	adr	lr, BSYM(1f)			@ return (PIC) address
+	badr	lr, 1f				@ return (PIC) address
+#ifdef CONFIG_ARM_LPAE
+	mov	r5, #0				@ high TTBR0
+	mov	r8, r4, lsr #12			@ TTBR1 is swapper_pg_dir pfn
+#else
 	mov	r8, r4				@ set TTBR1 to swapper_pg_dir
- ARM(	add	pc, r10, #PROCINFO_INITFUNC	)
- THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
- THUMB(	ret	r12				)
+#endif
+	ldr	r12, [r10, #PROCINFO_INITFUNC]
+	add	r12, r12, r10
+	ret	r12
 1:	b	__enable_mmu
 ENDPROC(stext)
 	.ltorg
@@ -158,7 +175,7 @@ ENDPROC(stext)
  *
  * Returns:
  *  r0, r3, r5-r7 corrupted
- *  r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h)
+ *  r4 = physical page table address
  */
 __create_page_tables:
 	pgtbl	r4, r8				@ page table address
@@ -333,7 +350,6 @@ __create_page_tables:
 #endif
 #ifdef CONFIG_ARM_LPAE
 	sub	r4, r4, #0x1000		@ point to the PGD table
-	mov	r4, r4, lsr #ARCH_PGD_SHIFT
 #endif
 	ret	lr
 ENDPROC(__create_page_tables)
@@ -346,9 +362,9 @@ __turn_mmu_on_loc:
 
 #if defined(CONFIG_SMP)
 	.text
-ENTRY(secondary_startup_arm)
 	.arm
- THUMB(	adr	r9, BSYM(1f)	)	@ Kernel is entered in ARM.
+ENTRY(secondary_startup_arm)
+ THUMB(	badr	r9, 1f		)	@ Kernel is entered in ARM.
  THUMB(	bx	r9		)	@ If this is a Thumb-2 kernel,
  THUMB(	.thumb			)	@ switch to Thumb now.
  THUMB(1:			)
@@ -381,15 +397,15 @@ ENTRY(secondary_startup)
 	adr	r4, __secondary_data
 	ldmia	r4, {r5, r7, r12}		@ address to jump to after
 	sub	lr, r4, r5			@ mmu has been enabled
-	ldr	r4, [r7, lr]			@ get secondary_data.pgdir
-	add	r7, r7, #4
-	ldr	r8, [r7, lr]			@ get secondary_data.swapper_pg_dir
-	adr	lr, BSYM(__enable_mmu)		@ return address
+	add	r3, r7, lr
+	ldrd	r4, [r3, #0]			@ get secondary_data.pgdir
+	ldr	r8, [r3, #8]			@ get secondary_data.swapper_pg_dir
+	badr	lr, __enable_mmu		@ return address
 	mov	r13, r12			@ __secondary_switched address
- ARM(	add	pc, r10, #PROCINFO_INITFUNC	) @ initialise processor
-						  @ (return control reg)
- THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
- THUMB(	ret	r12				)
+	ldr	r12, [r10, #PROCINFO_INITFUNC]
+	add	r12, r12, r10			@ initialise processor
+						@ (return control reg)
+	ret	r12
 ENDPROC(secondary_startup)
 ENDPROC(secondary_startup_arm)
 
@@ -397,7 +413,7 @@ ENDPROC(secondary_startup_arm)
 	 * r6  = &secondary_data
 	 */
 ENTRY(__secondary_switched)
-	ldr	sp, [r7, #4]			@ get secondary_data.stack
+	ldr	sp, [r7, #12]			@ get secondary_data.stack
 	mov	fp, #0
 	b	secondary_start_kernel
 ENDPROC(__secondary_switched)
@@ -416,12 +432,14 @@ __secondary_data:
 /*
  * Setup common bits before finally enabling the MMU.  Essentially
  * this is just loading the page table pointer and domain access
- * registers.
+ * registers.  All these registers need to be preserved by the
+ * processor setup function (or set in the case of r0)
  *
  *  r0  = cp#15 control register
  *  r1  = machine ID
  *  r2  = atags or dtb pointer
- *  r4  = page table (see ARCH_PGD_SHIFT in asm/memory.h)
+ *  r4  = TTBR pointer (low word)
+ *  r5  = TTBR pointer (high word if LPAE)
  *  r9  = processor ID
  *  r13 = *virtual* address to jump to upon completion
  */
@@ -440,7 +458,9 @@ __enable_mmu:
 #ifdef CONFIG_CPU_ICACHE_DISABLE
 	bic	r0, r0, #CR_I
 #endif
-#ifndef CONFIG_ARM_LPAE
+#ifdef CONFIG_ARM_LPAE
+	mcrr	p15, 0, r4, r5, c2		@ load TTBR0
+#else
 	mov	r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
 		      domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
 		      domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
diff --git a/arch/arm/kernel/hibernate.c b/arch/arm/kernel/hibernate.c
index c4cc50e58c13..a71501ff6f18 100644
--- a/arch/arm/kernel/hibernate.c
+++ b/arch/arm/kernel/hibernate.c
@@ -22,6 +22,7 @@
 #include <asm/suspend.h>
 #include <asm/memory.h>
 #include <asm/sections.h>
+#include "reboot.h"
 
 int pfn_is_nosave(unsigned long pfn)
 {
@@ -61,7 +62,7 @@ static int notrace arch_save_image(unsigned long unused)
 
 	ret = swsusp_save();
 	if (ret == 0)
-		soft_restart(virt_to_phys(cpu_resume));
+		_soft_restart(virt_to_phys(cpu_resume), false);
 	return ret;
 }
 
@@ -86,7 +87,7 @@ static void notrace arch_restore_image(void *unused)
 	for (pbe = restore_pblist; pbe; pbe = pbe->next)
 		copy_page(pbe->orig_address, pbe->address);
 
-	soft_restart(virt_to_phys(cpu_resume));
+	_soft_restart(virt_to_phys(cpu_resume), false);
 }
 
 static u64 resume_stack[PAGE_SIZE/2/sizeof(u64)] __nosavedata;
@@ -99,7 +100,6 @@ static u64 resume_stack[PAGE_SIZE/2/sizeof(u64)] __nosavedata;
  */
 int swsusp_arch_resume(void)
 {
-	extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
 	call_with_stack(arch_restore_image, 0,
 		resume_stack + ARRAY_SIZE(resume_stack));
 	return 0;
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 7fc70ae21185..dc7d0a95bd36 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -648,7 +648,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
 		 * Per-cpu breakpoints are not supported by our stepping
 		 * mechanism.
 		 */
-		if (!bp->hw.bp_target)
+		if (!bp->hw.target)
 			return -EINVAL;
 
 		/*
diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c
index de2b085ad753..8bf3b7c09888 100644
--- a/arch/arm/kernel/machine_kexec.c
+++ b/arch/arm/kernel/machine_kexec.c
@@ -46,7 +46,8 @@ int machine_kexec_prepare(struct kimage *image)
 	 * and implements CPU hotplug for the current HW. If not, we won't be
 	 * able to kexec reliably, so fail the prepare operation.
 	 */
-	if (num_possible_cpus() > 1 && !platform_can_cpu_hotplug())
+	if (num_possible_cpus() > 1 && platform_can_secondary_boot() &&
+	    !platform_can_cpu_hotplug())
 		return -EINVAL;
 
 	/*
diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c
new file mode 100644
index 000000000000..097e2e201b9f
--- /dev/null
+++ b/arch/arm/kernel/module-plts.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/elf.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/cache.h>
+#include <asm/opcodes.h>
+
+#define PLT_ENT_STRIDE		L1_CACHE_BYTES
+#define PLT_ENT_COUNT		(PLT_ENT_STRIDE / sizeof(u32))
+#define PLT_ENT_SIZE		(sizeof(struct plt_entries) / PLT_ENT_COUNT)
+
+#ifdef CONFIG_THUMB2_KERNEL
+#define PLT_ENT_LDR		__opcode_to_mem_thumb32(0xf8dff000 | \
+							(PLT_ENT_STRIDE - 4))
+#else
+#define PLT_ENT_LDR		__opcode_to_mem_arm(0xe59ff000 | \
+						    (PLT_ENT_STRIDE - 8))
+#endif
+
+struct plt_entries {
+	u32	ldr[PLT_ENT_COUNT];
+	u32	lit[PLT_ENT_COUNT];
+};
+
+static bool in_init(const struct module *mod, u32 addr)
+{
+	return addr - (u32)mod->module_init < mod->init_size;
+}
+
+u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
+{
+	struct plt_entries *plt, *plt_end;
+	int c, *count;
+
+	if (in_init(mod, loc)) {
+		plt = (void *)mod->arch.init_plt->sh_addr;
+		plt_end = (void *)plt + mod->arch.init_plt->sh_size;
+		count = &mod->arch.init_plt_count;
+	} else {
+		plt = (void *)mod->arch.core_plt->sh_addr;
+		plt_end = (void *)plt + mod->arch.core_plt->sh_size;
+		count = &mod->arch.core_plt_count;
+	}
+
+	/* Look for an existing entry pointing to 'val' */
+	for (c = *count; plt < plt_end; c -= PLT_ENT_COUNT, plt++) {
+		int i;
+
+		if (!c) {
+			/* Populate a new set of entries */
+			*plt = (struct plt_entries){
+				{ [0 ... PLT_ENT_COUNT - 1] = PLT_ENT_LDR, },
+				{ val, }
+			};
+			++*count;
+			return (u32)plt->ldr;
+		}
+		for (i = 0; i < PLT_ENT_COUNT; i++) {
+			if (!plt->lit[i]) {
+				plt->lit[i] = val;
+				++*count;
+			}
+			if (plt->lit[i] == val)
+				return (u32)&plt->ldr[i];
+		}
+	}
+	BUG();
+}
+
+static int duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num,
+			   u32 mask)
+{
+	u32 *loc1, *loc2;
+	int i;
+
+	for (i = 0; i < num; i++) {
+		if (rel[i].r_info != rel[num].r_info)
+			continue;
+
+		/*
+		 * Identical relocation types against identical symbols can
+		 * still result in different PLT entries if the addend in the
+		 * place is different. So resolve the target of the relocation
+		 * to compare the values.
+		 */
+		loc1 = (u32 *)(base + rel[i].r_offset);
+		loc2 = (u32 *)(base + rel[num].r_offset);
+		if (((*loc1 ^ *loc2) & mask) == 0)
+			return 1;
+	}
+	return 0;
+}
+
+/* Count how many PLT entries we may need */
+static unsigned int count_plts(Elf32_Addr base, const Elf32_Rel *rel, int num)
+{
+	unsigned int ret = 0;
+	int i;
+
+	/*
+	 * Sure, this is order(n^2), but it's usually short, and not
+	 * time critical
+	 */
+	for (i = 0; i < num; i++)
+		switch (ELF32_R_TYPE(rel[i].r_info)) {
+		case R_ARM_CALL:
+		case R_ARM_PC24:
+		case R_ARM_JUMP24:
+			if (!duplicate_rel(base, rel, i,
+					   __opcode_to_mem_arm(0x00ffffff)))
+				ret++;
+			break;
+#ifdef CONFIG_THUMB2_KERNEL
+		case R_ARM_THM_CALL:
+		case R_ARM_THM_JUMP24:
+			if (!duplicate_rel(base, rel, i,
+					   __opcode_to_mem_thumb32(0x07ff2fff)))
+				ret++;
+#endif
+		}
+	return ret;
+}
+
+int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
+			      char *secstrings, struct module *mod)
+{
+	unsigned long core_plts = 0, init_plts = 0;
+	Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum;
+
+	/*
+	 * To store the PLTs, we expand the .text section for core module code
+	 * and the .init.text section for initialization code.
+	 */
+	for (s = sechdrs; s < sechdrs_end; ++s)
+		if (strcmp(".core.plt", secstrings + s->sh_name) == 0)
+			mod->arch.core_plt = s;
+		else if (strcmp(".init.plt", secstrings + s->sh_name) == 0)
+			mod->arch.init_plt = s;
+
+	if (!mod->arch.core_plt || !mod->arch.init_plt) {
+		pr_err("%s: sections missing\n", mod->name);
+		return -ENOEXEC;
+	}
+
+	for (s = sechdrs + 1; s < sechdrs_end; ++s) {
+		const Elf32_Rel *rels = (void *)ehdr + s->sh_offset;
+		int numrels = s->sh_size / sizeof(Elf32_Rel);
+		Elf32_Shdr *dstsec = sechdrs + s->sh_info;
+
+		if (s->sh_type != SHT_REL)
+			continue;
+
+		if (strstr(secstrings + s->sh_name, ".init"))
+			init_plts += count_plts(dstsec->sh_addr, rels, numrels);
+		else
+			core_plts += count_plts(dstsec->sh_addr, rels, numrels);
+	}
+
+	mod->arch.core_plt->sh_type = SHT_NOBITS;
+	mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	mod->arch.core_plt->sh_addralign = L1_CACHE_BYTES;
+	mod->arch.core_plt->sh_size = round_up(core_plts * PLT_ENT_SIZE,
+					       sizeof(struct plt_entries));
+	mod->arch.core_plt_count = 0;
+
+	mod->arch.init_plt->sh_type = SHT_NOBITS;
+	mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	mod->arch.init_plt->sh_addralign = L1_CACHE_BYTES;
+	mod->arch.init_plt->sh_size = round_up(init_plts * PLT_ENT_SIZE,
+					       sizeof(struct plt_entries));
+	mod->arch.init_plt_count = 0;
+	pr_debug("%s: core.plt=%x, init.plt=%x\n", __func__,
+		 mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size);
+	return 0;
+}
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index 2e11961f65ae..efdddcb97dd1 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -40,7 +40,12 @@
 #ifdef CONFIG_MMU
 void *module_alloc(unsigned long size)
 {
-	return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+	void *p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+				GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
+				__builtin_return_address(0));
+	if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p)
+		return p;
+	return __vmalloc_node_range(size, 1,  VMALLOC_START, VMALLOC_END,
 				GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
 				__builtin_return_address(0));
 }
@@ -98,14 +103,33 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 		case R_ARM_PC24:
 		case R_ARM_CALL:
 		case R_ARM_JUMP24:
+			if (sym->st_value & 3) {
+				pr_err("%s: section %u reloc %u sym '%s': unsupported interworking call (ARM -> Thumb)\n",
+				       module->name, relindex, i, symname);
+				return -ENOEXEC;
+			}
+
 			offset = __mem_to_opcode_arm(*(u32 *)loc);
 			offset = (offset & 0x00ffffff) << 2;
 			if (offset & 0x02000000)
 				offset -= 0x04000000;
 
 			offset += sym->st_value - loc;
-			if (offset & 3 ||
-			    offset <= (s32)0xfe000000 ||
+
+			/*
+			 * Route through a PLT entry if 'offset' exceeds the
+			 * supported range. Note that 'offset + loc + 8'
+			 * contains the absolute jump target, i.e.,
+			 * @sym + addend, corrected for the +8 PC bias.
+			 */
+			if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS) &&
+			    (offset <= (s32)0xfe000000 ||
+			     offset >= (s32)0x02000000))
+				offset = get_module_plt(module, loc,
+							offset + loc + 8)
+					 - loc - 8;
+
+			if (offset <= (s32)0xfe000000 ||
 			    offset >= (s32)0x02000000) {
 				pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
 				       module->name, relindex, i, symname,
@@ -155,6 +179,22 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 #ifdef CONFIG_THUMB2_KERNEL
 		case R_ARM_THM_CALL:
 		case R_ARM_THM_JUMP24:
+			/*
+			 * For function symbols, only Thumb addresses are
+			 * allowed (no interworking).
+			 *
+			 * For non-function symbols, the destination
+			 * has no specific ARM/Thumb disposition, so
+			 * the branch is resolved under the assumption
+			 * that interworking is not required.
+			 */
+			if (ELF32_ST_TYPE(sym->st_info) == STT_FUNC &&
+			    !(sym->st_value & 1)) {
+				pr_err("%s: section %u reloc %u sym '%s': unsupported interworking call (Thumb -> ARM)\n",
+				       module->name, relindex, i, symname);
+				return -ENOEXEC;
+			}
+
 			upper = __mem_to_opcode_thumb16(*(u16 *)loc);
 			lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2));
 
@@ -183,17 +223,17 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 			offset += sym->st_value - loc;
 
 			/*
-			 * For function symbols, only Thumb addresses are
-			 * allowed (no interworking).
-			 *
-			 * For non-function symbols, the destination
-			 * has no specific ARM/Thumb disposition, so
-			 * the branch is resolved under the assumption
-			 * that interworking is not required.
+			 * Route through a PLT entry if 'offset' exceeds the
+			 * supported range.
 			 */
-			if ((ELF32_ST_TYPE(sym->st_info) == STT_FUNC &&
-				!(offset & 1)) ||
-			    offset <= (s32)0xff000000 ||
+			if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS) &&
+			    (offset <= (s32)0xff000000 ||
+			     offset >= (s32)0x01000000))
+				offset = get_module_plt(module, loc,
+							offset + loc + 4)
+					 - loc - 4;
+
+			if (offset <= (s32)0xff000000 ||
 			    offset >= (s32)0x01000000) {
 				pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
 				       module->name, relindex, i, symname,
diff --git a/arch/arm/kernel/module.lds b/arch/arm/kernel/module.lds
new file mode 100644
index 000000000000..3682fa107918
--- /dev/null
+++ b/arch/arm/kernel/module.lds
@@ -0,0 +1,4 @@
+SECTIONS {
+        .core.plt : { BYTE(0) }
+        .init.plt : { BYTE(0) }
+}
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 557e128e4df0..54272e0be713 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -11,12 +11,18 @@
  */
 #define pr_fmt(fmt) "hw perfevents: " fmt
 
+#include <linux/bitmap.h>
+#include <linux/cpumask.h>
+#include <linux/export.h>
 #include <linux/kernel.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
-#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
 #include <linux/irq.h>
 #include <linux/irqdesc.h>
 
+#include <asm/cputype.h>
 #include <asm/irq_regs.h>
 #include <asm/pmu.h>
 
@@ -229,6 +235,10 @@ armpmu_add(struct perf_event *event, int flags)
 	int idx;
 	int err = 0;
 
+	/* An event following a process won't be stopped earlier */
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
+		return -ENOENT;
+
 	perf_pmu_disable(event->pmu);
 
 	/* If we don't have a space for the counter then finish early. */
@@ -259,20 +269,29 @@ out:
 }
 
 static int
-validate_event(struct pmu_hw_events *hw_events,
-	       struct perf_event *event)
+validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
+			       struct perf_event *event)
 {
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct arm_pmu *armpmu;
 
 	if (is_software_event(event))
 		return 1;
 
+	/*
+	 * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The
+	 * core perf code won't check that the pmu->ctx == leader->ctx
+	 * until after pmu->event_init(event).
+	 */
+	if (event->pmu != pmu)
+		return 0;
+
 	if (event->state < PERF_EVENT_STATE_OFF)
 		return 1;
 
 	if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
 		return 1;
 
+	armpmu = to_arm_pmu(event->pmu);
 	return armpmu->get_event_idx(hw_events, event) >= 0;
 }
 
@@ -288,15 +307,15 @@ validate_group(struct perf_event *event)
 	 */
 	memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask));
 
-	if (!validate_event(&fake_pmu, leader))
+	if (!validate_event(event->pmu, &fake_pmu, leader))
 		return -EINVAL;
 
 	list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
-		if (!validate_event(&fake_pmu, sibling))
+		if (!validate_event(event->pmu, &fake_pmu, sibling))
 			return -EINVAL;
 	}
 
-	if (!validate_event(&fake_pmu, event))
+	if (!validate_event(event->pmu, &fake_pmu, event))
 		return -EINVAL;
 
 	return 0;
@@ -335,20 +354,12 @@ static void
 armpmu_release_hardware(struct arm_pmu *armpmu)
 {
 	armpmu->free_irq(armpmu);
-	pm_runtime_put_sync(&armpmu->plat_device->dev);
 }
 
 static int
 armpmu_reserve_hardware(struct arm_pmu *armpmu)
 {
-	int err;
-	struct platform_device *pmu_device = armpmu->plat_device;
-
-	if (!pmu_device)
-		return -ENODEV;
-
-	pm_runtime_get_sync(&pmu_device->dev);
-	err = armpmu->request_irq(armpmu, armpmu_dispatch_irq);
+	int err = armpmu->request_irq(armpmu, armpmu_dispatch_irq);
 	if (err) {
 		armpmu_release_hardware(armpmu);
 		return err;
@@ -445,6 +456,17 @@ static int armpmu_event_init(struct perf_event *event)
 	int err = 0;
 	atomic_t *active_events = &armpmu->active_events;
 
+	/*
+	 * Reject CPU-affine events for CPUs that are of a different class to
+	 * that which this PMU handles. Process-following events (where
+	 * event->cpu == -1) can be migrated between CPUs, and thus we have to
+	 * reject them later (in armpmu_add) if they're scheduled on a
+	 * different class of CPU.
+	 */
+	if (event->cpu != -1 &&
+		!cpumask_test_cpu(event->cpu, &armpmu->supported_cpus))
+		return -ENOENT;
+
 	/* does not support taken branch sampling */
 	if (has_branch_stack(event))
 		return -EOPNOTSUPP;
@@ -480,6 +502,10 @@ static void armpmu_enable(struct pmu *pmu)
 	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
 	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
 
+	/* For task-bound events we may be called on other CPUs */
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
+		return;
+
 	if (enabled)
 		armpmu->start(armpmu);
 }
@@ -487,34 +513,25 @@ static void armpmu_enable(struct pmu *pmu)
 static void armpmu_disable(struct pmu *pmu)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(pmu);
-	armpmu->stop(armpmu);
-}
-
-#ifdef CONFIG_PM
-static int armpmu_runtime_resume(struct device *dev)
-{
-	struct arm_pmu_platdata *plat = dev_get_platdata(dev);
 
-	if (plat && plat->runtime_resume)
-		return plat->runtime_resume(dev);
+	/* For task-bound events we may be called on other CPUs */
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
+		return;
 
-	return 0;
+	armpmu->stop(armpmu);
 }
 
-static int armpmu_runtime_suspend(struct device *dev)
+/*
+ * In heterogeneous systems, events are specific to a particular
+ * microarchitecture, and aren't suitable for another. Thus, only match CPUs of
+ * the same microarchitecture.
+ */
+static int armpmu_filter_match(struct perf_event *event)
 {
-	struct arm_pmu_platdata *plat = dev_get_platdata(dev);
-
-	if (plat && plat->runtime_suspend)
-		return plat->runtime_suspend(dev);
-
-	return 0;
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	unsigned int cpu = smp_processor_id();
+	return cpumask_test_cpu(cpu, &armpmu->supported_cpus);
 }
-#endif
-
-const struct dev_pm_ops armpmu_dev_pm_ops = {
-	SET_RUNTIME_PM_OPS(armpmu_runtime_suspend, armpmu_runtime_resume, NULL)
-};
 
 static void armpmu_init(struct arm_pmu *armpmu)
 {
@@ -530,15 +547,350 @@ static void armpmu_init(struct arm_pmu *armpmu)
 		.start		= armpmu_start,
 		.stop		= armpmu_stop,
 		.read		= armpmu_read,
+		.filter_match	= armpmu_filter_match,
 	};
 }
 
 int armpmu_register(struct arm_pmu *armpmu, int type)
 {
 	armpmu_init(armpmu);
-	pm_runtime_enable(&armpmu->plat_device->dev);
 	pr_info("enabled with %s PMU driver, %d counters available\n",
 			armpmu->name, armpmu->num_events);
 	return perf_pmu_register(&armpmu->pmu, armpmu->name, type);
 }
 
+/* Set at runtime when we know what CPU type we are. */
+static struct arm_pmu *__oprofile_cpu_pmu;
+
+/*
+ * Despite the names, these two functions are CPU-specific and are used
+ * by the OProfile/perf code.
+ */
+const char *perf_pmu_name(void)
+{
+	if (!__oprofile_cpu_pmu)
+		return NULL;
+
+	return __oprofile_cpu_pmu->name;
+}
+EXPORT_SYMBOL_GPL(perf_pmu_name);
+
+int perf_num_counters(void)
+{
+	int max_events = 0;
+
+	if (__oprofile_cpu_pmu != NULL)
+		max_events = __oprofile_cpu_pmu->num_events;
+
+	return max_events;
+}
+EXPORT_SYMBOL_GPL(perf_num_counters);
+
+static void cpu_pmu_enable_percpu_irq(void *data)
+{
+	int irq = *(int *)data;
+
+	enable_percpu_irq(irq, IRQ_TYPE_NONE);
+}
+
+static void cpu_pmu_disable_percpu_irq(void *data)
+{
+	int irq = *(int *)data;
+
+	disable_percpu_irq(irq);
+}
+
+static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
+{
+	int i, irq, irqs;
+	struct platform_device *pmu_device = cpu_pmu->plat_device;
+	struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
+
+	irqs = min(pmu_device->num_resources, num_possible_cpus());
+
+	irq = platform_get_irq(pmu_device, 0);
+	if (irq >= 0 && irq_is_percpu(irq)) {
+		on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1);
+		free_percpu_irq(irq, &hw_events->percpu_pmu);
+	} else {
+		for (i = 0; i < irqs; ++i) {
+			int cpu = i;
+
+			if (cpu_pmu->irq_affinity)
+				cpu = cpu_pmu->irq_affinity[i];
+
+			if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
+				continue;
+			irq = platform_get_irq(pmu_device, i);
+			if (irq >= 0)
+				free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu));
+		}
+	}
+}
+
+static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
+{
+	int i, err, irq, irqs;
+	struct platform_device *pmu_device = cpu_pmu->plat_device;
+	struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
+
+	if (!pmu_device)
+		return -ENODEV;
+
+	irqs = min(pmu_device->num_resources, num_possible_cpus());
+	if (irqs < 1) {
+		pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n");
+		return 0;
+	}
+
+	irq = platform_get_irq(pmu_device, 0);
+	if (irq >= 0 && irq_is_percpu(irq)) {
+		err = request_percpu_irq(irq, handler, "arm-pmu",
+					 &hw_events->percpu_pmu);
+		if (err) {
+			pr_err("unable to request IRQ%d for ARM PMU counters\n",
+				irq);
+			return err;
+		}
+		on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1);
+	} else {
+		for (i = 0; i < irqs; ++i) {
+			int cpu = i;
+
+			err = 0;
+			irq = platform_get_irq(pmu_device, i);
+			if (irq < 0)
+				continue;
+
+			if (cpu_pmu->irq_affinity)
+				cpu = cpu_pmu->irq_affinity[i];
+
+			/*
+			 * If we have a single PMU interrupt that we can't shift,
+			 * assume that we're running on a uniprocessor machine and
+			 * continue. Otherwise, continue without this interrupt.
+			 */
+			if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
+				pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n",
+					irq, cpu);
+				continue;
+			}
+
+			err = request_irq(irq, handler,
+					  IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
+					  per_cpu_ptr(&hw_events->percpu_pmu, cpu));
+			if (err) {
+				pr_err("unable to request IRQ%d for ARM PMU counters\n",
+					irq);
+				return err;
+			}
+
+			cpumask_set_cpu(cpu, &cpu_pmu->active_irqs);
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * PMU hardware loses all context when a CPU goes offline.
+ * When a CPU is hotplugged back in, since some hardware registers are
+ * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
+ * junk values out of them.
+ */
+static int cpu_pmu_notify(struct notifier_block *b, unsigned long action,
+			  void *hcpu)
+{
+	int cpu = (unsigned long)hcpu;
+	struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb);
+
+	if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
+		return NOTIFY_DONE;
+
+	if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
+		return NOTIFY_DONE;
+
+	if (pmu->reset)
+		pmu->reset(pmu);
+	else
+		return NOTIFY_DONE;
+
+	return NOTIFY_OK;
+}
+
+static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	int err;
+	int cpu;
+	struct pmu_hw_events __percpu *cpu_hw_events;
+
+	cpu_hw_events = alloc_percpu(struct pmu_hw_events);
+	if (!cpu_hw_events)
+		return -ENOMEM;
+
+	cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify;
+	err = register_cpu_notifier(&cpu_pmu->hotplug_nb);
+	if (err)
+		goto out_hw_events;
+
+	for_each_possible_cpu(cpu) {
+		struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu);
+		raw_spin_lock_init(&events->pmu_lock);
+		events->percpu_pmu = cpu_pmu;
+	}
+
+	cpu_pmu->hw_events	= cpu_hw_events;
+	cpu_pmu->request_irq	= cpu_pmu_request_irq;
+	cpu_pmu->free_irq	= cpu_pmu_free_irq;
+
+	/* Ensure the PMU has sane values out of reset. */
+	if (cpu_pmu->reset)
+		on_each_cpu_mask(&cpu_pmu->supported_cpus, cpu_pmu->reset,
+			 cpu_pmu, 1);
+
+	/* If no interrupts available, set the corresponding capability flag */
+	if (!platform_get_irq(cpu_pmu->plat_device, 0))
+		cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
+	return 0;
+
+out_hw_events:
+	free_percpu(cpu_hw_events);
+	return err;
+}
+
+static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
+{
+	unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
+	free_percpu(cpu_pmu->hw_events);
+}
+
+/*
+ * CPU PMU identification and probing.
+ */
+static int probe_current_pmu(struct arm_pmu *pmu,
+			     const struct pmu_probe_info *info)
+{
+	int cpu = get_cpu();
+	unsigned int cpuid = read_cpuid_id();
+	int ret = -ENODEV;
+
+	pr_info("probing PMU on CPU %d\n", cpu);
+
+	for (; info->init != NULL; info++) {
+		if ((cpuid & info->mask) != info->cpuid)
+			continue;
+		ret = info->init(pmu);
+		break;
+	}
+
+	put_cpu();
+	return ret;
+}
+
+static int of_pmu_irq_cfg(struct arm_pmu *pmu)
+{
+	int i, irq, *irqs;
+	struct platform_device *pdev = pmu->plat_device;
+
+	/* Don't bother with PPIs; they're already affine */
+	irq = platform_get_irq(pdev, 0);
+	if (irq >= 0 && irq_is_percpu(irq))
+		return 0;
+
+	irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
+	if (!irqs)
+		return -ENOMEM;
+
+	for (i = 0; i < pdev->num_resources; ++i) {
+		struct device_node *dn;
+		int cpu;
+
+		dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity",
+				      i);
+		if (!dn) {
+			pr_warn("Failed to parse %s/interrupt-affinity[%d]\n",
+				of_node_full_name(pdev->dev.of_node), i);
+			break;
+		}
+
+		for_each_possible_cpu(cpu)
+			if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL))
+				break;
+
+		if (cpu >= nr_cpu_ids) {
+			pr_warn("Failed to find logical CPU for %s\n",
+				dn->name);
+			of_node_put(dn);
+			break;
+		}
+		of_node_put(dn);
+
+		irqs[i] = cpu;
+		cpumask_set_cpu(cpu, &pmu->supported_cpus);
+	}
+
+	if (i == pdev->num_resources) {
+		pmu->irq_affinity = irqs;
+	} else {
+		kfree(irqs);
+		cpumask_setall(&pmu->supported_cpus);
+	}
+
+	return 0;
+}
+
+int arm_pmu_device_probe(struct platform_device *pdev,
+			 const struct of_device_id *of_table,
+			 const struct pmu_probe_info *probe_table)
+{
+	const struct of_device_id *of_id;
+	const int (*init_fn)(struct arm_pmu *);
+	struct device_node *node = pdev->dev.of_node;
+	struct arm_pmu *pmu;
+	int ret = -ENODEV;
+
+	pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
+	if (!pmu) {
+		pr_info("failed to allocate PMU device!\n");
+		return -ENOMEM;
+	}
+
+	if (!__oprofile_cpu_pmu)
+		__oprofile_cpu_pmu = pmu;
+
+	pmu->plat_device = pdev;
+
+	if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) {
+		init_fn = of_id->data;
+
+		ret = of_pmu_irq_cfg(pmu);
+		if (!ret)
+			ret = init_fn(pmu);
+	} else {
+		ret = probe_current_pmu(pmu, probe_table);
+		cpumask_setall(&pmu->supported_cpus);
+	}
+
+	if (ret) {
+		pr_info("failed to probe PMU!\n");
+		goto out_free;
+	}
+
+	ret = cpu_pmu_init(pmu);
+	if (ret)
+		goto out_free;
+
+	ret = armpmu_register(pmu, -1);
+	if (ret)
+		goto out_destroy;
+
+	return 0;
+
+out_destroy:
+	cpu_pmu_destroy(pmu);
+out_free:
+	pr_info("failed to register PMU devices!\n");
+	kfree(pmu);
+	return ret;
+}
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
deleted file mode 100644
index 61b53c46edfa..000000000000
--- a/arch/arm/kernel/perf_event_cpu.c
+++ /dev/null
@@ -1,358 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) 2012 ARM Limited
- *
- * Author: Will Deacon <will.deacon@arm.com>
- */
-#define pr_fmt(fmt) "CPU PMU: " fmt
-
-#include <linux/bitmap.h>
-#include <linux/export.h>
-#include <linux/kernel.h>
-#include <linux/of.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/irq.h>
-#include <linux/irqdesc.h>
-
-#include <asm/cputype.h>
-#include <asm/irq_regs.h>
-#include <asm/pmu.h>
-
-/* Set at runtime when we know what CPU type we are. */
-static struct arm_pmu *cpu_pmu;
-
-/*
- * Despite the names, these two functions are CPU-specific and are used
- * by the OProfile/perf code.
- */
-const char *perf_pmu_name(void)
-{
-	if (!cpu_pmu)
-		return NULL;
-
-	return cpu_pmu->name;
-}
-EXPORT_SYMBOL_GPL(perf_pmu_name);
-
-int perf_num_counters(void)
-{
-	int max_events = 0;
-
-	if (cpu_pmu != NULL)
-		max_events = cpu_pmu->num_events;
-
-	return max_events;
-}
-EXPORT_SYMBOL_GPL(perf_num_counters);
-
-/* Include the PMU-specific implementations. */
-#include "perf_event_xscale.c"
-#include "perf_event_v6.c"
-#include "perf_event_v7.c"
-
-static void cpu_pmu_enable_percpu_irq(void *data)
-{
-	int irq = *(int *)data;
-
-	enable_percpu_irq(irq, IRQ_TYPE_NONE);
-}
-
-static void cpu_pmu_disable_percpu_irq(void *data)
-{
-	int irq = *(int *)data;
-
-	disable_percpu_irq(irq);
-}
-
-static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
-{
-	int i, irq, irqs;
-	struct platform_device *pmu_device = cpu_pmu->plat_device;
-	struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
-
-	irqs = min(pmu_device->num_resources, num_possible_cpus());
-
-	irq = platform_get_irq(pmu_device, 0);
-	if (irq >= 0 && irq_is_percpu(irq)) {
-		on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1);
-		free_percpu_irq(irq, &hw_events->percpu_pmu);
-	} else {
-		for (i = 0; i < irqs; ++i) {
-			if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs))
-				continue;
-			irq = platform_get_irq(pmu_device, i);
-			if (irq >= 0)
-				free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, i));
-		}
-	}
-}
-
-static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
-{
-	int i, err, irq, irqs;
-	struct platform_device *pmu_device = cpu_pmu->plat_device;
-	struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
-
-	if (!pmu_device)
-		return -ENODEV;
-
-	irqs = min(pmu_device->num_resources, num_possible_cpus());
-	if (irqs < 1) {
-		pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n");
-		return 0;
-	}
-
-	irq = platform_get_irq(pmu_device, 0);
-	if (irq >= 0 && irq_is_percpu(irq)) {
-		err = request_percpu_irq(irq, handler, "arm-pmu",
-					 &hw_events->percpu_pmu);
-		if (err) {
-			pr_err("unable to request IRQ%d for ARM PMU counters\n",
-				irq);
-			return err;
-		}
-		on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1);
-	} else {
-		for (i = 0; i < irqs; ++i) {
-			err = 0;
-			irq = platform_get_irq(pmu_device, i);
-			if (irq < 0)
-				continue;
-
-			/*
-			 * If we have a single PMU interrupt that we can't shift,
-			 * assume that we're running on a uniprocessor machine and
-			 * continue. Otherwise, continue without this interrupt.
-			 */
-			if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) {
-				pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n",
-					irq, i);
-				continue;
-			}
-
-			err = request_irq(irq, handler,
-					  IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
-					  per_cpu_ptr(&hw_events->percpu_pmu, i));
-			if (err) {
-				pr_err("unable to request IRQ%d for ARM PMU counters\n",
-					irq);
-				return err;
-			}
-
-			cpumask_set_cpu(i, &cpu_pmu->active_irqs);
-		}
-	}
-
-	return 0;
-}
-
-/*
- * PMU hardware loses all context when a CPU goes offline.
- * When a CPU is hotplugged back in, since some hardware registers are
- * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
- * junk values out of them.
- */
-static int cpu_pmu_notify(struct notifier_block *b, unsigned long action,
-			  void *hcpu)
-{
-	struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb);
-
-	if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
-		return NOTIFY_DONE;
-
-	if (pmu->reset)
-		pmu->reset(pmu);
-	else
-		return NOTIFY_DONE;
-
-	return NOTIFY_OK;
-}
-
-static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
-{
-	int err;
-	int cpu;
-	struct pmu_hw_events __percpu *cpu_hw_events;
-
-	cpu_hw_events = alloc_percpu(struct pmu_hw_events);
-	if (!cpu_hw_events)
-		return -ENOMEM;
-
-	cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify;
-	err = register_cpu_notifier(&cpu_pmu->hotplug_nb);
-	if (err)
-		goto out_hw_events;
-
-	for_each_possible_cpu(cpu) {
-		struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu);
-		raw_spin_lock_init(&events->pmu_lock);
-		events->percpu_pmu = cpu_pmu;
-	}
-
-	cpu_pmu->hw_events	= cpu_hw_events;
-	cpu_pmu->request_irq	= cpu_pmu_request_irq;
-	cpu_pmu->free_irq	= cpu_pmu_free_irq;
-
-	/* Ensure the PMU has sane values out of reset. */
-	if (cpu_pmu->reset)
-		on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
-
-	/* If no interrupts available, set the corresponding capability flag */
-	if (!platform_get_irq(cpu_pmu->plat_device, 0))
-		cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
-
-	return 0;
-
-out_hw_events:
-	free_percpu(cpu_hw_events);
-	return err;
-}
-
-static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
-{
-	unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
-	free_percpu(cpu_pmu->hw_events);
-}
-
-/*
- * PMU platform driver and devicetree bindings.
- */
-static const struct of_device_id cpu_pmu_of_device_ids[] = {
-	{.compatible = "arm,cortex-a17-pmu",	.data = armv7_a17_pmu_init},
-	{.compatible = "arm,cortex-a15-pmu",	.data = armv7_a15_pmu_init},
-	{.compatible = "arm,cortex-a12-pmu",	.data = armv7_a12_pmu_init},
-	{.compatible = "arm,cortex-a9-pmu",	.data = armv7_a9_pmu_init},
-	{.compatible = "arm,cortex-a8-pmu",	.data = armv7_a8_pmu_init},
-	{.compatible = "arm,cortex-a7-pmu",	.data = armv7_a7_pmu_init},
-	{.compatible = "arm,cortex-a5-pmu",	.data = armv7_a5_pmu_init},
-	{.compatible = "arm,arm11mpcore-pmu",	.data = armv6mpcore_pmu_init},
-	{.compatible = "arm,arm1176-pmu",	.data = armv6_1176_pmu_init},
-	{.compatible = "arm,arm1136-pmu",	.data = armv6_1136_pmu_init},
-	{.compatible = "qcom,krait-pmu",	.data = krait_pmu_init},
-	{},
-};
-
-static struct platform_device_id cpu_pmu_plat_device_ids[] = {
-	{.name = "arm-pmu"},
-	{.name = "armv6-pmu"},
-	{.name = "armv7-pmu"},
-	{.name = "xscale-pmu"},
-	{},
-};
-
-static const struct pmu_probe_info pmu_probe_table[] = {
-	ARM_PMU_PROBE(ARM_CPU_PART_ARM1136, armv6_1136_pmu_init),
-	ARM_PMU_PROBE(ARM_CPU_PART_ARM1156, armv6_1156_pmu_init),
-	ARM_PMU_PROBE(ARM_CPU_PART_ARM1176, armv6_1176_pmu_init),
-	ARM_PMU_PROBE(ARM_CPU_PART_ARM11MPCORE, armv6mpcore_pmu_init),
-	ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init),
-	ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init),
-	XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V1, xscale1pmu_init),
-	XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V2, xscale2pmu_init),
-	{ /* sentinel value */ }
-};
-
-/*
- * CPU PMU identification and probing.
- */
-static int probe_current_pmu(struct arm_pmu *pmu)
-{
-	int cpu = get_cpu();
-	unsigned int cpuid = read_cpuid_id();
-	int ret = -ENODEV;
-	const struct pmu_probe_info *info;
-
-	pr_info("probing PMU on CPU %d\n", cpu);
-
-	for (info = pmu_probe_table; info->init != NULL; info++) {
-		if ((cpuid & info->mask) != info->cpuid)
-			continue;
-		ret = info->init(pmu);
-		break;
-	}
-
-	put_cpu();
-	return ret;
-}
-
-static int cpu_pmu_device_probe(struct platform_device *pdev)
-{
-	const struct of_device_id *of_id;
-	const int (*init_fn)(struct arm_pmu *);
-	struct device_node *node = pdev->dev.of_node;
-	struct arm_pmu *pmu;
-	int ret = -ENODEV;
-
-	if (cpu_pmu) {
-		pr_info("attempt to register multiple PMU devices!\n");
-		return -ENOSPC;
-	}
-
-	pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
-	if (!pmu) {
-		pr_info("failed to allocate PMU device!\n");
-		return -ENOMEM;
-	}
-
-	cpu_pmu = pmu;
-	cpu_pmu->plat_device = pdev;
-
-	if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) {
-		init_fn = of_id->data;
-		ret = init_fn(pmu);
-	} else {
-		ret = probe_current_pmu(pmu);
-	}
-
-	if (ret) {
-		pr_info("failed to probe PMU!\n");
-		goto out_free;
-	}
-
-	ret = cpu_pmu_init(cpu_pmu);
-	if (ret)
-		goto out_free;
-
-	ret = armpmu_register(cpu_pmu, -1);
-	if (ret)
-		goto out_destroy;
-
-	return 0;
-
-out_destroy:
-	cpu_pmu_destroy(cpu_pmu);
-out_free:
-	pr_info("failed to register PMU devices!\n");
-	kfree(pmu);
-	return ret;
-}
-
-static struct platform_driver cpu_pmu_driver = {
-	.driver		= {
-		.name	= "arm-pmu",
-		.pm	= &armpmu_dev_pm_ops,
-		.of_match_table = cpu_pmu_of_device_ids,
-	},
-	.probe		= cpu_pmu_device_probe,
-	.id_table	= cpu_pmu_plat_device_ids,
-};
-
-static int __init register_pmu_driver(void)
-{
-	return platform_driver_register(&cpu_pmu_driver);
-}
-device_initcall(register_pmu_driver);
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index f2ffd5c542ed..09f83e414a72 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -31,6 +31,14 @@
  */
 
 #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K)
+
+#include <asm/cputype.h>
+#include <asm/irq_regs.h>
+#include <asm/pmu.h>
+
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
 enum armv6_perf_types {
 	ARMV6_PERFCTR_ICACHE_MISS	    = 0x0,
 	ARMV6_PERFCTR_IBUF_STALL	    = 0x1,
@@ -543,24 +551,39 @@ static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu)
 
 	return 0;
 }
-#else
-static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu)
-{
-	return -ENODEV;
-}
 
-static int armv6_1156_pmu_init(struct arm_pmu *cpu_pmu)
-{
-	return -ENODEV;
-}
+static struct of_device_id armv6_pmu_of_device_ids[] = {
+	{.compatible = "arm,arm11mpcore-pmu",	.data = armv6mpcore_pmu_init},
+	{.compatible = "arm,arm1176-pmu",	.data = armv6_1176_pmu_init},
+	{.compatible = "arm,arm1136-pmu",	.data = armv6_1136_pmu_init},
+	{ /* sentinel value */ }
+};
 
-static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu)
+static const struct pmu_probe_info armv6_pmu_probe_table[] = {
+	ARM_PMU_PROBE(ARM_CPU_PART_ARM1136, armv6_1136_pmu_init),
+	ARM_PMU_PROBE(ARM_CPU_PART_ARM1156, armv6_1156_pmu_init),
+	ARM_PMU_PROBE(ARM_CPU_PART_ARM1176, armv6_1176_pmu_init),
+	ARM_PMU_PROBE(ARM_CPU_PART_ARM11MPCORE, armv6mpcore_pmu_init),
+	{ /* sentinel value */ }
+};
+
+static int armv6_pmu_device_probe(struct platform_device *pdev)
 {
-	return -ENODEV;
+	return arm_pmu_device_probe(pdev, armv6_pmu_of_device_ids,
+				    armv6_pmu_probe_table);
 }
 
-static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu)
+static struct platform_driver armv6_pmu_driver = {
+	.driver		= {
+		.name	= "armv6-pmu",
+		.of_match_table = armv6_pmu_of_device_ids,
+	},
+	.probe		= armv6_pmu_device_probe,
+};
+
+static int __init register_armv6_pmu_driver(void)
 {
-	return -ENODEV;
+	return platform_driver_register(&armv6_pmu_driver);
 }
+device_initcall(register_armv6_pmu_driver);
 #endif	/* CONFIG_CPU_V6 || CONFIG_CPU_V6K */
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 8993770c47de..f9b37f876e20 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -19,9 +19,15 @@
 #ifdef CONFIG_CPU_V7
 
 #include <asm/cp15.h>
+#include <asm/cputype.h>
+#include <asm/irq_regs.h>
+#include <asm/pmu.h>
 #include <asm/vfp.h>
 #include "../vfp/vfpinstr.h"
 
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
 /*
  * Common ARMv7 event types
  *
@@ -140,6 +146,23 @@ enum krait_perf_types {
 	KRAIT_PERFCTR_L1_DTLB_ACCESS			= 0x12210,
 };
 
+/* ARMv7 Scorpion specific event types */
+enum scorpion_perf_types {
+	SCORPION_LPM0_GROUP0				= 0x4c,
+	SCORPION_LPM1_GROUP0				= 0x50,
+	SCORPION_LPM2_GROUP0				= 0x54,
+	SCORPION_L2LPM_GROUP0				= 0x58,
+	SCORPION_VLPM_GROUP0				= 0x5c,
+
+	SCORPION_ICACHE_ACCESS				= 0x10053,
+	SCORPION_ICACHE_MISS				= 0x10052,
+
+	SCORPION_DTLB_ACCESS				= 0x12013,
+	SCORPION_DTLB_MISS				= 0x12012,
+
+	SCORPION_ITLB_MISS				= 0x12021,
+};
+
 /*
  * Cortex-A8 HW events mapping
  *
@@ -482,6 +505,49 @@ static const unsigned krait_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 };
 
 /*
+ * Scorpion HW events mapping
+ */
+static const unsigned scorpion_perf_map[PERF_COUNT_HW_MAX] = {
+	PERF_MAP_ALL_UNSUPPORTED,
+	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV7_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]	    = ARMV7_PERFCTR_INSTR_EXECUTED,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
+	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_BUS_CYCLES]	    = ARMV7_PERFCTR_CLOCK_CYCLES,
+};
+
+static const unsigned scorpion_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					    [PERF_COUNT_HW_CACHE_OP_MAX]
+					    [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	PERF_CACHE_MAP_ALL_UNSUPPORTED,
+	/*
+	 * The performance counters don't differentiate between read and write
+	 * accesses/misses so this isn't strictly correct, but it's the best we
+	 * can do. Writes and reads get combined.
+	 */
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+	[C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL,
+	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_ICACHE_ACCESS,
+	[C(L1I)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ICACHE_MISS,
+	/*
+	 * Only ITLB misses and DTLB refills are supported.  If users want the
+	 * DTLB refills misses a raw counter must be used.
+	 */
+	[C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS,
+	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_DTLB_MISS,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_DTLB_MISS,
+	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ITLB_MISS,
+	[C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_ITLB_MISS,
+	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+/*
  * Perf Events' indices
  */
 #define	ARMV7_IDX_CYCLE_COUNTER	0
@@ -976,6 +1042,12 @@ static int krait_map_event_no_branch(struct perf_event *event)
 				&krait_perf_cache_map, 0xFFFFF);
 }
 
+static int scorpion_map_event(struct perf_event *event)
+{
+	return armpmu_map_event(event, &scorpion_perf_map,
+				&scorpion_perf_cache_map, 0xFFFFF);
+}
+
 static void armv7pmu_init(struct arm_pmu *cpu_pmu)
 {
 	cpu_pmu->handle_irq	= armv7pmu_handle_irq;
@@ -990,15 +1062,22 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->max_period	= (1LLU << 32) - 1;
 };
 
-static u32 armv7_read_num_pmnc_events(void)
+static void armv7_read_num_pmnc_events(void *info)
 {
-	u32 nb_cnt;
+	int *nb_cnt = info;
 
 	/* Read the nb of CNTx counters supported from PMNC */
-	nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
+	*nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
+
+	/* Add the CPU cycles counter */
+	*nb_cnt += 1;
+}
 
-	/* Add the CPU cycles counter and return */
-	return nb_cnt + 1;
+static int armv7_probe_num_events(struct arm_pmu *arm_pmu)
+{
+	return smp_call_function_any(&arm_pmu->supported_cpus,
+				     armv7_read_num_pmnc_events,
+				     &arm_pmu->num_events, 1);
 }
 
 static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
@@ -1006,8 +1085,7 @@ static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
 	armv7pmu_init(cpu_pmu);
 	cpu_pmu->name		= "armv7_cortex_a8";
 	cpu_pmu->map_event	= armv7_a8_map_event;
-	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
-	return 0;
+	return armv7_probe_num_events(cpu_pmu);
 }
 
 static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
@@ -1015,8 +1093,7 @@ static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
 	armv7pmu_init(cpu_pmu);
 	cpu_pmu->name		= "armv7_cortex_a9";
 	cpu_pmu->map_event	= armv7_a9_map_event;
-	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
-	return 0;
+	return armv7_probe_num_events(cpu_pmu);
 }
 
 static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
@@ -1024,8 +1101,7 @@ static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
 	armv7pmu_init(cpu_pmu);
 	cpu_pmu->name		= "armv7_cortex_a5";
 	cpu_pmu->map_event	= armv7_a5_map_event;
-	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
-	return 0;
+	return armv7_probe_num_events(cpu_pmu);
 }
 
 static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
@@ -1033,9 +1109,8 @@ static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
 	armv7pmu_init(cpu_pmu);
 	cpu_pmu->name		= "armv7_cortex_a15";
 	cpu_pmu->map_event	= armv7_a15_map_event;
-	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
 	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
-	return 0;
+	return armv7_probe_num_events(cpu_pmu);
 }
 
 static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
@@ -1043,9 +1118,8 @@ static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
 	armv7pmu_init(cpu_pmu);
 	cpu_pmu->name		= "armv7_cortex_a7";
 	cpu_pmu->map_event	= armv7_a7_map_event;
-	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
 	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
-	return 0;
+	return armv7_probe_num_events(cpu_pmu);
 }
 
 static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu)
@@ -1053,16 +1127,15 @@ static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu)
 	armv7pmu_init(cpu_pmu);
 	cpu_pmu->name		= "armv7_cortex_a12";
 	cpu_pmu->map_event	= armv7_a12_map_event;
-	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
 	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
-	return 0;
+	return armv7_probe_num_events(cpu_pmu);
 }
 
 static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	armv7_a12_pmu_init(cpu_pmu);
+	int ret = armv7_a12_pmu_init(cpu_pmu);
 	cpu_pmu->name = "armv7_cortex_a17";
-	return 0;
+	return ret;
 }
 
 /*
@@ -1103,6 +1176,12 @@ static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu)
 #define KRAIT_EVENT_MASK	(KRAIT_EVENT | VENUM_EVENT)
 #define PMRESRn_EN		BIT(31)
 
+#define EVENT_REGION(event)	(((event) >> 12) & 0xf)		/* R */
+#define EVENT_GROUP(event)	((event) & 0xf)			/* G */
+#define EVENT_CODE(event)	(((event) >> 4) & 0xff)		/* CC */
+#define EVENT_VENUM(event)	(!!(event & VENUM_EVENT))	/* N=2 */
+#define EVENT_CPU(event)	(!!(event & KRAIT_EVENT))	/* N=1 */
+
 static u32 krait_read_pmresrn(int n)
 {
 	u32 val;
@@ -1141,19 +1220,19 @@ static void krait_write_pmresrn(int n, u32 val)
 	}
 }
 
-static u32 krait_read_vpmresr0(void)
+static u32 venum_read_pmresr(void)
 {
 	u32 val;
 	asm volatile("mrc p10, 7, %0, c11, c0, 0" : "=r" (val));
 	return val;
 }
 
-static void krait_write_vpmresr0(u32 val)
+static void venum_write_pmresr(u32 val)
 {
 	asm volatile("mcr p10, 7, %0, c11, c0, 0" : : "r" (val));
 }
 
-static void krait_pre_vpmresr0(u32 *venum_orig_val, u32 *fp_orig_val)
+static void venum_pre_pmresr(u32 *venum_orig_val, u32 *fp_orig_val)
 {
 	u32 venum_new_val;
 	u32 fp_new_val;
@@ -1170,7 +1249,7 @@ static void krait_pre_vpmresr0(u32 *venum_orig_val, u32 *fp_orig_val)
 	fmxr(FPEXC, fp_new_val);
 }
 
-static void krait_post_vpmresr0(u32 venum_orig_val, u32 fp_orig_val)
+static void venum_post_pmresr(u32 venum_orig_val, u32 fp_orig_val)
 {
 	BUG_ON(preemptible());
 	/* Restore FPEXC */
@@ -1193,16 +1272,11 @@ static void krait_evt_setup(int idx, u32 config_base)
 	u32 val;
 	u32 mask;
 	u32 vval, fval;
-	unsigned int region;
-	unsigned int group;
-	unsigned int code;
+	unsigned int region = EVENT_REGION(config_base);
+	unsigned int group = EVENT_GROUP(config_base);
+	unsigned int code = EVENT_CODE(config_base);
 	unsigned int group_shift;
-	bool venum_event;
-
-	venum_event = !!(config_base & VENUM_EVENT);
-	region = (config_base >> 12) & 0xf;
-	code   = (config_base >> 4) & 0xff;
-	group  = (config_base >> 0)  & 0xf;
+	bool venum_event = EVENT_VENUM(config_base);
 
 	group_shift = group * 8;
 	mask = 0xff << group_shift;
@@ -1217,16 +1291,14 @@ static void krait_evt_setup(int idx, u32 config_base)
 	val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1);
 	armv7_pmnc_write_evtsel(idx, val);
 
-	asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
-
 	if (venum_event) {
-		krait_pre_vpmresr0(&vval, &fval);
-		val = krait_read_vpmresr0();
+		venum_pre_pmresr(&vval, &fval);
+		val = venum_read_pmresr();
 		val &= ~mask;
 		val |= code << group_shift;
 		val |= PMRESRn_EN;
-		krait_write_vpmresr0(val);
-		krait_post_vpmresr0(vval, fval);
+		venum_write_pmresr(val);
+		venum_post_pmresr(vval, fval);
 	} else {
 		val = krait_read_pmresrn(region);
 		val &= ~mask;
@@ -1236,7 +1308,7 @@ static void krait_evt_setup(int idx, u32 config_base)
 	}
 }
 
-static u32 krait_clear_pmresrn_group(u32 val, int group)
+static u32 clear_pmresrn_group(u32 val, int group)
 {
 	u32 mask;
 	int group_shift;
@@ -1256,23 +1328,19 @@ static void krait_clearpmu(u32 config_base)
 {
 	u32 val;
 	u32 vval, fval;
-	unsigned int region;
-	unsigned int group;
-	bool venum_event;
-
-	venum_event = !!(config_base & VENUM_EVENT);
-	region = (config_base >> 12) & 0xf;
-	group  = (config_base >> 0)  & 0xf;
+	unsigned int region = EVENT_REGION(config_base);
+	unsigned int group = EVENT_GROUP(config_base);
+	bool venum_event = EVENT_VENUM(config_base);
 
 	if (venum_event) {
-		krait_pre_vpmresr0(&vval, &fval);
-		val = krait_read_vpmresr0();
-		val = krait_clear_pmresrn_group(val, group);
-		krait_write_vpmresr0(val);
-		krait_post_vpmresr0(vval, fval);
+		venum_pre_pmresr(&vval, &fval);
+		val = venum_read_pmresr();
+		val = clear_pmresrn_group(val, group);
+		venum_write_pmresr(val);
+		venum_post_pmresr(vval, fval);
 	} else {
 		val = krait_read_pmresrn(region);
-		val = krait_clear_pmresrn_group(val, group);
+		val = clear_pmresrn_group(val, group);
 		krait_write_pmresrn(region, val);
 	}
 }
@@ -1342,6 +1410,8 @@ static void krait_pmu_enable_event(struct perf_event *event)
 static void krait_pmu_reset(void *info)
 {
 	u32 vval, fval;
+	struct arm_pmu *cpu_pmu = info;
+	u32 idx, nb_cnt = cpu_pmu->num_events;
 
 	armv7pmu_reset(info);
 
@@ -1350,9 +1420,16 @@ static void krait_pmu_reset(void *info)
 	krait_write_pmresrn(1, 0);
 	krait_write_pmresrn(2, 0);
 
-	krait_pre_vpmresr0(&vval, &fval);
-	krait_write_vpmresr0(0);
-	krait_post_vpmresr0(vval, fval);
+	venum_pre_pmresr(&vval, &fval);
+	venum_write_pmresr(0);
+	venum_post_pmresr(vval, fval);
+
+	/* Reset PMxEVNCTCR to sane default */
+	for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
+		armv7_pmnc_select_counter(idx);
+		asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
+	}
+
 }
 
 static int krait_event_to_bit(struct perf_event *event, unsigned int region,
@@ -1386,26 +1463,18 @@ static int krait_pmu_get_event_idx(struct pmu_hw_events *cpuc,
 {
 	int idx;
 	int bit = -1;
-	unsigned int prefix;
-	unsigned int region;
-	unsigned int code;
-	unsigned int group;
-	bool krait_event;
 	struct hw_perf_event *hwc = &event->hw;
+	unsigned int region = EVENT_REGION(hwc->config_base);
+	unsigned int code = EVENT_CODE(hwc->config_base);
+	unsigned int group = EVENT_GROUP(hwc->config_base);
+	bool venum_event = EVENT_VENUM(hwc->config_base);
+	bool krait_event = EVENT_CPU(hwc->config_base);
 
-	region = (hwc->config_base >> 12) & 0xf;
-	code   = (hwc->config_base >> 4) & 0xff;
-	group  = (hwc->config_base >> 0) & 0xf;
-	krait_event = !!(hwc->config_base & KRAIT_EVENT_MASK);
-
-	if (krait_event) {
+	if (venum_event || krait_event) {
 		/* Ignore invalid events */
 		if (group > 3 || region > 2)
 			return -EINVAL;
-		prefix = hwc->config_base & KRAIT_EVENT_MASK;
-		if (prefix != KRAIT_EVENT && prefix != VENUM_EVENT)
-			return -EINVAL;
-		if (prefix == VENUM_EVENT && (code & 0xe0))
+		if (venum_event && (code & 0xe0))
 			return -EINVAL;
 
 		bit = krait_event_to_bit(event, region, group);
@@ -1425,15 +1494,12 @@ static void krait_pmu_clear_event_idx(struct pmu_hw_events *cpuc,
 {
 	int bit;
 	struct hw_perf_event *hwc = &event->hw;
-	unsigned int region;
-	unsigned int group;
-	bool krait_event;
-
-	region = (hwc->config_base >> 12) & 0xf;
-	group  = (hwc->config_base >> 0) & 0xf;
-	krait_event = !!(hwc->config_base & KRAIT_EVENT_MASK);
+	unsigned int region = EVENT_REGION(hwc->config_base);
+	unsigned int group = EVENT_GROUP(hwc->config_base);
+	bool venum_event = EVENT_VENUM(hwc->config_base);
+	bool krait_event = EVENT_CPU(hwc->config_base);
 
-	if (krait_event) {
+	if (venum_event || krait_event) {
 		bit = krait_event_to_bit(event, region, group);
 		clear_bit(bit, cpuc->used_mask);
 	}
@@ -1449,53 +1515,389 @@ static int krait_pmu_init(struct arm_pmu *cpu_pmu)
 		cpu_pmu->map_event = krait_map_event_no_branch;
 	else
 		cpu_pmu->map_event = krait_map_event;
-	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
 	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
 	cpu_pmu->reset		= krait_pmu_reset;
 	cpu_pmu->enable		= krait_pmu_enable_event;
 	cpu_pmu->disable	= krait_pmu_disable_event;
 	cpu_pmu->get_event_idx	= krait_pmu_get_event_idx;
 	cpu_pmu->clear_event_idx = krait_pmu_clear_event_idx;
-	return 0;
+	return armv7_probe_num_events(cpu_pmu);
 }
-#else
-static inline int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
+
+/*
+ * Scorpion Local Performance Monitor Register (LPMn)
+ *
+ *            31   30     24     16     8      0
+ *            +--------------------------------+
+ *  LPM0      | EN |  CC  |  CC  |  CC  |  CC  |   N = 1, R = 0
+ *            +--------------------------------+
+ *  LPM1      | EN |  CC  |  CC  |  CC  |  CC  |   N = 1, R = 1
+ *            +--------------------------------+
+ *  LPM2      | EN |  CC  |  CC  |  CC  |  CC  |   N = 1, R = 2
+ *            +--------------------------------+
+ *  L2LPM     | EN |  CC  |  CC  |  CC  |  CC  |   N = 1, R = 3
+ *            +--------------------------------+
+ *  VLPM      | EN |  CC  |  CC  |  CC  |  CC  |   N = 2, R = ?
+ *            +--------------------------------+
+ *              EN | G=3  | G=2  | G=1  | G=0
+ *
+ *
+ *  Event Encoding:
+ *
+ *      hwc->config_base = 0xNRCCG
+ *
+ *      N  = prefix, 1 for Scorpion CPU (LPMn/L2LPM), 2 for Venum VFP (VLPM)
+ *      R  = region register
+ *      CC = class of events the group G is choosing from
+ *      G  = group or particular event
+ *
+ *  Example: 0x12021 is a Scorpion CPU event in LPM2's group 1 with code 2
+ *
+ *  A region (R) corresponds to a piece of the CPU (execution unit, instruction
+ *  unit, etc.) while the event code (CC) corresponds to a particular class of
+ *  events (interrupts for example). An event code is broken down into
+ *  groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for
+ *  example).
+ */
+
+static u32 scorpion_read_pmresrn(int n)
 {
-	return -ENODEV;
+	u32 val;
+
+	switch (n) {
+	case 0:
+		asm volatile("mrc p15, 0, %0, c15, c0, 0" : "=r" (val));
+		break;
+	case 1:
+		asm volatile("mrc p15, 1, %0, c15, c0, 0" : "=r" (val));
+		break;
+	case 2:
+		asm volatile("mrc p15, 2, %0, c15, c0, 0" : "=r" (val));
+		break;
+	case 3:
+		asm volatile("mrc p15, 3, %0, c15, c2, 0" : "=r" (val));
+		break;
+	default:
+		BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */
+	}
+
+	return val;
 }
 
-static inline int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
+static void scorpion_write_pmresrn(int n, u32 val)
 {
-	return -ENODEV;
+	switch (n) {
+	case 0:
+		asm volatile("mcr p15, 0, %0, c15, c0, 0" : : "r" (val));
+		break;
+	case 1:
+		asm volatile("mcr p15, 1, %0, c15, c0, 0" : : "r" (val));
+		break;
+	case 2:
+		asm volatile("mcr p15, 2, %0, c15, c0, 0" : : "r" (val));
+		break;
+	case 3:
+		asm volatile("mcr p15, 3, %0, c15, c2, 0" : : "r" (val));
+		break;
+	default:
+		BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */
+	}
 }
 
-static inline int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
+static u32 scorpion_get_pmresrn_event(unsigned int region)
 {
-	return -ENODEV;
+	static const u32 pmresrn_table[] = { SCORPION_LPM0_GROUP0,
+					     SCORPION_LPM1_GROUP0,
+					     SCORPION_LPM2_GROUP0,
+					     SCORPION_L2LPM_GROUP0 };
+	return pmresrn_table[region];
 }
 
-static inline int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
+static void scorpion_evt_setup(int idx, u32 config_base)
 {
-	return -ENODEV;
+	u32 val;
+	u32 mask;
+	u32 vval, fval;
+	unsigned int region = EVENT_REGION(config_base);
+	unsigned int group = EVENT_GROUP(config_base);
+	unsigned int code = EVENT_CODE(config_base);
+	unsigned int group_shift;
+	bool venum_event = EVENT_VENUM(config_base);
+
+	group_shift = group * 8;
+	mask = 0xff << group_shift;
+
+	/* Configure evtsel for the region and group */
+	if (venum_event)
+		val = SCORPION_VLPM_GROUP0;
+	else
+		val = scorpion_get_pmresrn_event(region);
+	val += group;
+	/* Mix in mode-exclusion bits */
+	val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1);
+	armv7_pmnc_write_evtsel(idx, val);
+
+	asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
+
+	if (venum_event) {
+		venum_pre_pmresr(&vval, &fval);
+		val = venum_read_pmresr();
+		val &= ~mask;
+		val |= code << group_shift;
+		val |= PMRESRn_EN;
+		venum_write_pmresr(val);
+		venum_post_pmresr(vval, fval);
+	} else {
+		val = scorpion_read_pmresrn(region);
+		val &= ~mask;
+		val |= code << group_shift;
+		val |= PMRESRn_EN;
+		scorpion_write_pmresrn(region, val);
+	}
 }
 
-static inline int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
+static void scorpion_clearpmu(u32 config_base)
 {
-	return -ENODEV;
+	u32 val;
+	u32 vval, fval;
+	unsigned int region = EVENT_REGION(config_base);
+	unsigned int group = EVENT_GROUP(config_base);
+	bool venum_event = EVENT_VENUM(config_base);
+
+	if (venum_event) {
+		venum_pre_pmresr(&vval, &fval);
+		val = venum_read_pmresr();
+		val = clear_pmresrn_group(val, group);
+		venum_write_pmresr(val);
+		venum_post_pmresr(vval, fval);
+	} else {
+		val = scorpion_read_pmresrn(region);
+		val = clear_pmresrn_group(val, group);
+		scorpion_write_pmresrn(region, val);
+	}
+}
+
+static void scorpion_pmu_disable_event(struct perf_event *event)
+{
+	unsigned long flags;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+
+	/* Disable counter and interrupt */
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/* Disable counter */
+	armv7_pmnc_disable_counter(idx);
+
+	/*
+	 * Clear pmresr code (if destined for PMNx counters)
+	 */
+	if (hwc->config_base & KRAIT_EVENT_MASK)
+		scorpion_clearpmu(hwc->config_base);
+
+	/* Disable interrupt for this counter */
+	armv7_pmnc_disable_intens(idx);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static inline int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu)
+static void scorpion_pmu_enable_event(struct perf_event *event)
 {
-	return -ENODEV;
+	unsigned long flags;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+
+	/*
+	 * Enable counter and interrupt, and set the counter to count
+	 * the event that we're interested in.
+	 */
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/* Disable counter */
+	armv7_pmnc_disable_counter(idx);
+
+	/*
+	 * Set event (if destined for PMNx counters)
+	 * We don't set the event for the cycle counter because we
+	 * don't have the ability to perform event filtering.
+	 */
+	if (hwc->config_base & KRAIT_EVENT_MASK)
+		scorpion_evt_setup(idx, hwc->config_base);
+	else if (idx != ARMV7_IDX_CYCLE_COUNTER)
+		armv7_pmnc_write_evtsel(idx, hwc->config_base);
+
+	/* Enable interrupt for this counter */
+	armv7_pmnc_enable_intens(idx);
+
+	/* Enable counter */
+	armv7_pmnc_enable_counter(idx);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static inline int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu)
+static void scorpion_pmu_reset(void *info)
 {
-	return -ENODEV;
+	u32 vval, fval;
+	struct arm_pmu *cpu_pmu = info;
+	u32 idx, nb_cnt = cpu_pmu->num_events;
+
+	armv7pmu_reset(info);
+
+	/* Clear all pmresrs */
+	scorpion_write_pmresrn(0, 0);
+	scorpion_write_pmresrn(1, 0);
+	scorpion_write_pmresrn(2, 0);
+	scorpion_write_pmresrn(3, 0);
+
+	venum_pre_pmresr(&vval, &fval);
+	venum_write_pmresr(0);
+	venum_post_pmresr(vval, fval);
+
+	/* Reset PMxEVNCTCR to sane default */
+	for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
+		armv7_pmnc_select_counter(idx);
+		asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0));
+	}
 }
 
-static inline int krait_pmu_init(struct arm_pmu *cpu_pmu)
+static int scorpion_event_to_bit(struct perf_event *event, unsigned int region,
+			      unsigned int group)
+{
+	int bit;
+	struct hw_perf_event *hwc = &event->hw;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+
+	if (hwc->config_base & VENUM_EVENT)
+		bit = SCORPION_VLPM_GROUP0;
+	else
+		bit = scorpion_get_pmresrn_event(region);
+	bit -= scorpion_get_pmresrn_event(0);
+	bit += group;
+	/*
+	 * Lower bits are reserved for use by the counters (see
+	 * armv7pmu_get_event_idx() for more info)
+	 */
+	bit += ARMV7_IDX_COUNTER_LAST(cpu_pmu) + 1;
+
+	return bit;
+}
+
+/*
+ * We check for column exclusion constraints here.
+ * Two events cant use the same group within a pmresr register.
+ */
+static int scorpion_pmu_get_event_idx(struct pmu_hw_events *cpuc,
+				   struct perf_event *event)
+{
+	int idx;
+	int bit = -1;
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned int region = EVENT_REGION(hwc->config_base);
+	unsigned int group = EVENT_GROUP(hwc->config_base);
+	bool venum_event = EVENT_VENUM(hwc->config_base);
+	bool scorpion_event = EVENT_CPU(hwc->config_base);
+
+	if (venum_event || scorpion_event) {
+		/* Ignore invalid events */
+		if (group > 3 || region > 3)
+			return -EINVAL;
+
+		bit = scorpion_event_to_bit(event, region, group);
+		if (test_and_set_bit(bit, cpuc->used_mask))
+			return -EAGAIN;
+	}
+
+	idx = armv7pmu_get_event_idx(cpuc, event);
+	if (idx < 0 && bit >= 0)
+		clear_bit(bit, cpuc->used_mask);
+
+	return idx;
+}
+
+static void scorpion_pmu_clear_event_idx(struct pmu_hw_events *cpuc,
+				      struct perf_event *event)
+{
+	int bit;
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned int region = EVENT_REGION(hwc->config_base);
+	unsigned int group = EVENT_GROUP(hwc->config_base);
+	bool venum_event = EVENT_VENUM(hwc->config_base);
+	bool scorpion_event = EVENT_CPU(hwc->config_base);
+
+	if (venum_event || scorpion_event) {
+		bit = scorpion_event_to_bit(event, region, group);
+		clear_bit(bit, cpuc->used_mask);
+	}
+}
+
+static int scorpion_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	armv7pmu_init(cpu_pmu);
+	cpu_pmu->name		= "armv7_scorpion";
+	cpu_pmu->map_event	= scorpion_map_event;
+	cpu_pmu->reset		= scorpion_pmu_reset;
+	cpu_pmu->enable		= scorpion_pmu_enable_event;
+	cpu_pmu->disable	= scorpion_pmu_disable_event;
+	cpu_pmu->get_event_idx	= scorpion_pmu_get_event_idx;
+	cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx;
+	return armv7_probe_num_events(cpu_pmu);
+}
+
+static int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	armv7pmu_init(cpu_pmu);
+	cpu_pmu->name		= "armv7_scorpion_mp";
+	cpu_pmu->map_event	= scorpion_map_event;
+	cpu_pmu->reset		= scorpion_pmu_reset;
+	cpu_pmu->enable		= scorpion_pmu_enable_event;
+	cpu_pmu->disable	= scorpion_pmu_disable_event;
+	cpu_pmu->get_event_idx	= scorpion_pmu_get_event_idx;
+	cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx;
+	return armv7_probe_num_events(cpu_pmu);
+}
+
+static const struct of_device_id armv7_pmu_of_device_ids[] = {
+	{.compatible = "arm,cortex-a17-pmu",	.data = armv7_a17_pmu_init},
+	{.compatible = "arm,cortex-a15-pmu",	.data = armv7_a15_pmu_init},
+	{.compatible = "arm,cortex-a12-pmu",	.data = armv7_a12_pmu_init},
+	{.compatible = "arm,cortex-a9-pmu",	.data = armv7_a9_pmu_init},
+	{.compatible = "arm,cortex-a8-pmu",	.data = armv7_a8_pmu_init},
+	{.compatible = "arm,cortex-a7-pmu",	.data = armv7_a7_pmu_init},
+	{.compatible = "arm,cortex-a5-pmu",	.data = armv7_a5_pmu_init},
+	{.compatible = "qcom,krait-pmu",	.data = krait_pmu_init},
+	{.compatible = "qcom,scorpion-pmu",	.data = scorpion_pmu_init},
+	{.compatible = "qcom,scorpion-mp-pmu",	.data = scorpion_mp_pmu_init},
+	{},
+};
+
+static const struct pmu_probe_info armv7_pmu_probe_table[] = {
+	ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init),
+	ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init),
+	{ /* sentinel value */ }
+};
+
+
+static int armv7_pmu_device_probe(struct platform_device *pdev)
+{
+	return arm_pmu_device_probe(pdev, armv7_pmu_of_device_ids,
+				    armv7_pmu_probe_table);
+}
+
+static struct platform_driver armv7_pmu_driver = {
+	.driver		= {
+		.name	= "armv7-pmu",
+		.of_match_table = armv7_pmu_of_device_ids,
+	},
+	.probe		= armv7_pmu_device_probe,
+};
+
+static int __init register_armv7_pmu_driver(void)
 {
-	return -ENODEV;
+	return platform_driver_register(&armv7_pmu_driver);
 }
+device_initcall(register_armv7_pmu_driver);
 #endif	/* CONFIG_CPU_V7 */
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index 8af9f1f82c68..304d056d5b25 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -13,6 +13,14 @@
  */
 
 #ifdef CONFIG_CPU_XSCALE
+
+#include <asm/cputype.h>
+#include <asm/irq_regs.h>
+#include <asm/pmu.h>
+
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
 enum xscale_perf_types {
 	XSCALE_PERFCTR_ICACHE_MISS		= 0x00,
 	XSCALE_PERFCTR_ICACHE_NO_DELIVER	= 0x01,
@@ -740,14 +748,28 @@ static int xscale2pmu_init(struct arm_pmu *cpu_pmu)
 
 	return 0;
 }
-#else
-static inline int xscale1pmu_init(struct arm_pmu *cpu_pmu)
+
+static const struct pmu_probe_info xscale_pmu_probe_table[] = {
+	XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V1, xscale1pmu_init),
+	XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V2, xscale2pmu_init),
+	{ /* sentinel value */ }
+};
+
+static int xscale_pmu_device_probe(struct platform_device *pdev)
 {
-	return -ENODEV;
+	return arm_pmu_device_probe(pdev, NULL, xscale_pmu_probe_table);
 }
 
-static inline int xscale2pmu_init(struct arm_pmu *cpu_pmu)
+static struct platform_driver xscale_pmu_driver = {
+	.driver		= {
+		.name	= "xscale-pmu",
+	},
+	.probe		= xscale_pmu_device_probe,
+};
+
+static int __init register_xscale_pmu_driver(void)
 {
-	return -ENODEV;
+	return platform_driver_register(&xscale_pmu_driver);
 }
+device_initcall(register_xscale_pmu_driver);
 #endif	/* CONFIG_CPU_XSCALE */
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index fdfa3a78ec8c..f192a2a41719 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -17,12 +17,9 @@
 #include <linux/stddef.h>
 #include <linux/unistd.h>
 #include <linux/user.h>
-#include <linux/delay.h>
-#include <linux/reboot.h>
 #include <linux/interrupt.h>
 #include <linux/kallsyms.h>
 #include <linux/init.h>
-#include <linux/cpu.h>
 #include <linux/elfcore.h>
 #include <linux/pm.h>
 #include <linux/tick.h>
@@ -31,16 +28,14 @@
 #include <linux/random.h>
 #include <linux/hw_breakpoint.h>
 #include <linux/leds.h>
-#include <linux/reboot.h>
 
-#include <asm/cacheflush.h>
-#include <asm/idmap.h>
 #include <asm/processor.h>
 #include <asm/thread_notify.h>
 #include <asm/stacktrace.h>
 #include <asm/system_misc.h>
 #include <asm/mach/time.h>
 #include <asm/tls.h>
+#include <asm/vdso.h>
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 #include <linux/stackprotector.h>
@@ -59,69 +54,6 @@ static const char *isa_modes[] __maybe_unused = {
   "ARM" , "Thumb" , "Jazelle", "ThumbEE"
 };
 
-extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
-typedef void (*phys_reset_t)(unsigned long);
-
-/*
- * A temporary stack to use for CPU reset. This is static so that we
- * don't clobber it with the identity mapping. When running with this
- * stack, any references to the current task *will not work* so you
- * should really do as little as possible before jumping to your reset
- * code.
- */
-static u64 soft_restart_stack[16];
-
-static void __soft_restart(void *addr)
-{
-	phys_reset_t phys_reset;
-
-	/* Take out a flat memory mapping. */
-	setup_mm_for_reboot();
-
-	/* Clean and invalidate caches */
-	flush_cache_all();
-
-	/* Turn off caching */
-	cpu_proc_fin();
-
-	/* Push out any further dirty data, and ensure cache is empty */
-	flush_cache_all();
-
-	/* Switch to the identity mapping. */
-	phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
-	phys_reset((unsigned long)addr);
-
-	/* Should never get here. */
-	BUG();
-}
-
-void soft_restart(unsigned long addr)
-{
-	u64 *stack = soft_restart_stack + ARRAY_SIZE(soft_restart_stack);
-
-	/* Disable interrupts first */
-	raw_local_irq_disable();
-	local_fiq_disable();
-
-	/* Disable the L2 if we're the last man standing. */
-	if (num_online_cpus() == 1)
-		outer_disable();
-
-	/* Change to the new stack and continue with the reset. */
-	call_with_stack(__soft_restart, (void *)addr, (void *)stack);
-
-	/* Should never get here. */
-	BUG();
-}
-
-/*
- * Function pointers to optional machine specific functions
- */
-void (*pm_power_off)(void);
-EXPORT_SYMBOL(pm_power_off);
-
-void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
-
 /*
  * This is our default idle handler.
  */
@@ -166,79 +98,6 @@ void arch_cpu_idle_dead(void)
 }
 #endif
 
-/*
- * Called by kexec, immediately prior to machine_kexec().
- *
- * This must completely disable all secondary CPUs; simply causing those CPUs
- * to execute e.g. a RAM-based pin loop is not sufficient. This allows the
- * kexec'd kernel to use any and all RAM as it sees fit, without having to
- * avoid any code or data used by any SW CPU pin loop. The CPU hotplug
- * functionality embodied in disable_nonboot_cpus() to achieve this.
- */
-void machine_shutdown(void)
-{
-	disable_nonboot_cpus();
-}
-
-/*
- * Halting simply requires that the secondary CPUs stop performing any
- * activity (executing tasks, handling interrupts). smp_send_stop()
- * achieves this.
- */
-void machine_halt(void)
-{
-	local_irq_disable();
-	smp_send_stop();
-
-	local_irq_disable();
-	while (1);
-}
-
-/*
- * Power-off simply requires that the secondary CPUs stop performing any
- * activity (executing tasks, handling interrupts). smp_send_stop()
- * achieves this. When the system power is turned off, it will take all CPUs
- * with it.
- */
-void machine_power_off(void)
-{
-	local_irq_disable();
-	smp_send_stop();
-
-	if (pm_power_off)
-		pm_power_off();
-}
-
-/*
- * Restart requires that the secondary CPUs stop performing any activity
- * while the primary CPU resets the system. Systems with a single CPU can
- * use soft_restart() as their machine descriptor's .restart hook, since that
- * will cause the only available CPU to reset. Systems with multiple CPUs must
- * provide a HW restart implementation, to ensure that all CPUs reset at once.
- * This is required so that any code running after reset on the primary CPU
- * doesn't have to co-ordinate with other CPUs to ensure they aren't still
- * executing pre-reset code, and using RAM that the primary CPU's code wishes
- * to use. Implementing such co-ordination would be essentially impossible.
- */
-void machine_restart(char *cmd)
-{
-	local_irq_disable();
-	smp_send_stop();
-
-	if (arm_pm_restart)
-		arm_pm_restart(reboot_mode, cmd);
-	else
-		do_kernel_restart(cmd);
-
-	/* Give a grace period for failure to restart of 1s */
-	mdelay(1000);
-
-	/* Whoops - the platform was unable to reboot. Tell the user! */
-	printk("Reboot failed -- System halted\n");
-	local_irq_disable();
-	while (1);
-}
-
 void __show_regs(struct pt_regs *regs)
 {
 	unsigned long flags;
@@ -475,7 +334,7 @@ const char *arch_vma_name(struct vm_area_struct *vma)
 }
 
 /* If possible, provide a placement hint at a random offset from the
- * stack for the signal page.
+ * stack for the sigpage and vdso pages.
  */
 static unsigned long sigpage_addr(const struct mm_struct *mm,
 				  unsigned int npages)
@@ -519,6 +378,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
+	unsigned long npages;
 	unsigned long addr;
 	unsigned long hint;
 	int ret = 0;
@@ -528,9 +388,12 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 	if (!signal_page)
 		return -ENOMEM;
 
+	npages = 1; /* for sigpage */
+	npages += vdso_total_pages;
+
 	down_write(&mm->mmap_sem);
-	hint = sigpage_addr(mm, 1);
-	addr = get_unmapped_area(NULL, hint, PAGE_SIZE, 0, 0);
+	hint = sigpage_addr(mm, npages);
+	addr = get_unmapped_area(NULL, hint, npages << PAGE_SHIFT, 0, 0);
 	if (IS_ERR_VALUE(addr)) {
 		ret = addr;
 		goto up_fail;
@@ -547,6 +410,12 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 
 	mm->context.sigpage = addr;
 
+	/* Unlike the sigpage, failure to install the vdso is unlikely
+	 * to be fatal to the process, so no error check needed
+	 * here.
+	 */
+	arm_install_vdso(mm, addr + PAGE_SIZE);
+
  up_fail:
 	up_write(&mm->mmap_sem);
 	return ret;
diff --git a/arch/arm/kernel/psci-call.S b/arch/arm/kernel/psci-call.S
new file mode 100644
index 000000000000..a78e9e1e206d
--- /dev/null
+++ b/arch/arm/kernel/psci-call.S
@@ -0,0 +1,31 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2015 ARM Limited
+ *
+ * Author: Mark Rutland <mark.rutland@arm.com>
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/opcodes-sec.h>
+#include <asm/opcodes-virt.h>
+
+/* int __invoke_psci_fn_hvc(u32 function_id, u32 arg0, u32 arg1, u32 arg2) */
+ENTRY(__invoke_psci_fn_hvc)
+	__HVC(0)
+	bx	lr
+ENDPROC(__invoke_psci_fn_hvc)
+
+/* int __invoke_psci_fn_smc(u32 function_id, u32 arg0, u32 arg1, u32 arg2) */
+ENTRY(__invoke_psci_fn_smc)
+	__SMC(0)
+	bx	lr
+ENDPROC(__invoke_psci_fn_smc)
diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c
index f73891b6b730..f90fdf4ce7c7 100644
--- a/arch/arm/kernel/psci.c
+++ b/arch/arm/kernel/psci.c
@@ -23,8 +23,6 @@
 
 #include <asm/compiler.h>
 #include <asm/errno.h>
-#include <asm/opcodes-sec.h>
-#include <asm/opcodes-virt.h>
 #include <asm/psci.h>
 #include <asm/system_misc.h>
 
@@ -33,6 +31,9 @@ struct psci_operations psci_ops;
 static int (*invoke_psci_fn)(u32, u32, u32, u32);
 typedef int (*psci_initcall_t)(const struct device_node *);
 
+asmlinkage int __invoke_psci_fn_hvc(u32, u32, u32, u32);
+asmlinkage int __invoke_psci_fn_smc(u32, u32, u32, u32);
+
 enum psci_function {
 	PSCI_FN_CPU_SUSPEND,
 	PSCI_FN_CPU_ON,
@@ -71,40 +72,6 @@ static u32 psci_power_state_pack(struct psci_power_state state)
 		 & PSCI_0_2_POWER_STATE_AFFL_MASK);
 }
 
-/*
- * The following two functions are invoked via the invoke_psci_fn pointer
- * and will not be inlined, allowing us to piggyback on the AAPCS.
- */
-static noinline int __invoke_psci_fn_hvc(u32 function_id, u32 arg0, u32 arg1,
-					 u32 arg2)
-{
-	asm volatile(
-			__asmeq("%0", "r0")
-			__asmeq("%1", "r1")
-			__asmeq("%2", "r2")
-			__asmeq("%3", "r3")
-			__HVC(0)
-		: "+r" (function_id)
-		: "r" (arg0), "r" (arg1), "r" (arg2));
-
-	return function_id;
-}
-
-static noinline int __invoke_psci_fn_smc(u32 function_id, u32 arg0, u32 arg1,
-					 u32 arg2)
-{
-	asm volatile(
-			__asmeq("%0", "r0")
-			__asmeq("%1", "r1")
-			__asmeq("%2", "r2")
-			__asmeq("%3", "r3")
-			__SMC(0)
-		: "+r" (function_id)
-		: "r" (arg0), "r" (arg1), "r" (arg2));
-
-	return function_id;
-}
-
 static int psci_get_version(void)
 {
 	int err;
diff --git a/arch/arm/kernel/reboot.c b/arch/arm/kernel/reboot.c
new file mode 100644
index 000000000000..38269358fd25
--- /dev/null
+++ b/arch/arm/kernel/reboot.c
@@ -0,0 +1,155 @@
+/*
+ *  Copyright (C) 1996-2000 Russell King - Converted to ARM.
+ *  Original Copyright (C) 1995  Linus Torvalds
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+
+#include <asm/cacheflush.h>
+#include <asm/idmap.h>
+
+#include "reboot.h"
+
+typedef void (*phys_reset_t)(unsigned long);
+
+/*
+ * Function pointers to optional machine specific functions
+ */
+void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
+void (*pm_power_off)(void);
+EXPORT_SYMBOL(pm_power_off);
+
+/*
+ * A temporary stack to use for CPU reset. This is static so that we
+ * don't clobber it with the identity mapping. When running with this
+ * stack, any references to the current task *will not work* so you
+ * should really do as little as possible before jumping to your reset
+ * code.
+ */
+static u64 soft_restart_stack[16];
+
+static void __soft_restart(void *addr)
+{
+	phys_reset_t phys_reset;
+
+	/* Take out a flat memory mapping. */
+	setup_mm_for_reboot();
+
+	/* Clean and invalidate caches */
+	flush_cache_all();
+
+	/* Turn off caching */
+	cpu_proc_fin();
+
+	/* Push out any further dirty data, and ensure cache is empty */
+	flush_cache_all();
+
+	/* Switch to the identity mapping. */
+	phys_reset = (phys_reset_t)(unsigned long)virt_to_idmap(cpu_reset);
+	phys_reset((unsigned long)addr);
+
+	/* Should never get here. */
+	BUG();
+}
+
+void _soft_restart(unsigned long addr, bool disable_l2)
+{
+	u64 *stack = soft_restart_stack + ARRAY_SIZE(soft_restart_stack);
+
+	/* Disable interrupts first */
+	raw_local_irq_disable();
+	local_fiq_disable();
+
+	/* Disable the L2 if we're the last man standing. */
+	if (disable_l2)
+		outer_disable();
+
+	/* Change to the new stack and continue with the reset. */
+	call_with_stack(__soft_restart, (void *)addr, (void *)stack);
+
+	/* Should never get here. */
+	BUG();
+}
+
+void soft_restart(unsigned long addr)
+{
+	_soft_restart(addr, num_online_cpus() == 1);
+}
+
+/*
+ * Called by kexec, immediately prior to machine_kexec().
+ *
+ * This must completely disable all secondary CPUs; simply causing those CPUs
+ * to execute e.g. a RAM-based pin loop is not sufficient. This allows the
+ * kexec'd kernel to use any and all RAM as it sees fit, without having to
+ * avoid any code or data used by any SW CPU pin loop. The CPU hotplug
+ * functionality embodied in disable_nonboot_cpus() to achieve this.
+ */
+void machine_shutdown(void)
+{
+	disable_nonboot_cpus();
+}
+
+/*
+ * Halting simply requires that the secondary CPUs stop performing any
+ * activity (executing tasks, handling interrupts). smp_send_stop()
+ * achieves this.
+ */
+void machine_halt(void)
+{
+	local_irq_disable();
+	smp_send_stop();
+
+	local_irq_disable();
+	while (1);
+}
+
+/*
+ * Power-off simply requires that the secondary CPUs stop performing any
+ * activity (executing tasks, handling interrupts). smp_send_stop()
+ * achieves this. When the system power is turned off, it will take all CPUs
+ * with it.
+ */
+void machine_power_off(void)
+{
+	local_irq_disable();
+	smp_send_stop();
+
+	if (pm_power_off)
+		pm_power_off();
+}
+
+/*
+ * Restart requires that the secondary CPUs stop performing any activity
+ * while the primary CPU resets the system. Systems with a single CPU can
+ * use soft_restart() as their machine descriptor's .restart hook, since that
+ * will cause the only available CPU to reset. Systems with multiple CPUs must
+ * provide a HW restart implementation, to ensure that all CPUs reset at once.
+ * This is required so that any code running after reset on the primary CPU
+ * doesn't have to co-ordinate with other CPUs to ensure they aren't still
+ * executing pre-reset code, and using RAM that the primary CPU's code wishes
+ * to use. Implementing such co-ordination would be essentially impossible.
+ */
+void machine_restart(char *cmd)
+{
+	local_irq_disable();
+	smp_send_stop();
+
+	if (arm_pm_restart)
+		arm_pm_restart(reboot_mode, cmd);
+	else
+		do_kernel_restart(cmd);
+
+	/* Give a grace period for failure to restart of 1s */
+	mdelay(1000);
+
+	/* Whoops - the platform was unable to reboot. Tell the user! */
+	printk("Reboot failed -- System halted\n");
+	local_irq_disable();
+	while (1);
+}
diff --git a/arch/arm/kernel/reboot.h b/arch/arm/kernel/reboot.h
new file mode 100644
index 000000000000..bf7a0b1f076e
--- /dev/null
+++ b/arch/arm/kernel/reboot.h
@@ -0,0 +1,7 @@
+#ifndef REBOOT_H
+#define REBOOT_H
+
+extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
+extern void _soft_restart(unsigned long addr, bool disable_l2);
+
+#endif
diff --git a/arch/arm/kernel/return_address.c b/arch/arm/kernel/return_address.c
index 24b4a04846eb..36ed35073289 100644
--- a/arch/arm/kernel/return_address.c
+++ b/arch/arm/kernel/return_address.c
@@ -56,8 +56,6 @@ void *return_address(unsigned int level)
 		return NULL;
 }
 
-#else /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) */
-
-#endif /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) / else */
+#endif /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) */
 
 EXPORT_SYMBOL_GPL(return_address);
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 1d60bebea4b8..36c18b73c1f4 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -46,6 +46,7 @@
 #include <asm/cacheflush.h>
 #include <asm/cachetype.h>
 #include <asm/tlbflush.h>
+#include <asm/xen/hypervisor.h>
 
 #include <asm/prom.h>
 #include <asm/mach/arch.h>
@@ -75,8 +76,7 @@ __setup("fpe=", fpe_setup);
 
 extern void init_default_cache_policy(unsigned long);
 extern void paging_init(const struct machine_desc *desc);
-extern void early_paging_init(const struct machine_desc *,
-			      struct proc_info_list *);
+extern void early_paging_init(const struct machine_desc *);
 extern void sanity_check_meminfo(void);
 extern enum reboot_mode reboot_mode;
 extern void setup_dma_zone(const struct machine_desc *desc);
@@ -93,6 +93,9 @@ unsigned int __atags_pointer __initdata;
 unsigned int system_rev;
 EXPORT_SYMBOL(system_rev);
 
+const char *system_serial;
+EXPORT_SYMBOL(system_serial);
+
 unsigned int system_serial_low;
 EXPORT_SYMBOL(system_serial_low);
 
@@ -372,30 +375,48 @@ void __init early_print(const char *str, ...)
 
 static void __init cpuid_init_hwcaps(void)
 {
-	unsigned int divide_instrs, vmsa;
+	int block;
+	u32 isar5;
 
 	if (cpu_architecture() < CPU_ARCH_ARMv7)
 		return;
 
-	divide_instrs = (read_cpuid_ext(CPUID_EXT_ISAR0) & 0x0f000000) >> 24;
-
-	switch (divide_instrs) {
-	case 2:
+	block = cpuid_feature_extract(CPUID_EXT_ISAR0, 24);
+	if (block >= 2)
 		elf_hwcap |= HWCAP_IDIVA;
-	case 1:
+	if (block >= 1)
 		elf_hwcap |= HWCAP_IDIVT;
-	}
 
 	/* LPAE implies atomic ldrd/strd instructions */
-	vmsa = (read_cpuid_ext(CPUID_EXT_MMFR0) & 0xf) >> 0;
-	if (vmsa >= 5)
+	block = cpuid_feature_extract(CPUID_EXT_MMFR0, 0);
+	if (block >= 5)
 		elf_hwcap |= HWCAP_LPAE;
+
+	/* check for supported v8 Crypto instructions */
+	isar5 = read_cpuid_ext(CPUID_EXT_ISAR5);
+
+	block = cpuid_feature_extract_field(isar5, 4);
+	if (block >= 2)
+		elf_hwcap2 |= HWCAP2_PMULL;
+	if (block >= 1)
+		elf_hwcap2 |= HWCAP2_AES;
+
+	block = cpuid_feature_extract_field(isar5, 8);
+	if (block >= 1)
+		elf_hwcap2 |= HWCAP2_SHA1;
+
+	block = cpuid_feature_extract_field(isar5, 12);
+	if (block >= 1)
+		elf_hwcap2 |= HWCAP2_SHA2;
+
+	block = cpuid_feature_extract_field(isar5, 16);
+	if (block >= 1)
+		elf_hwcap2 |= HWCAP2_CRC32;
 }
 
 static void __init elf_hwcap_fixup(void)
 {
 	unsigned id = read_cpuid_id();
-	unsigned sync_prim;
 
 	/*
 	 * HWCAP_TLS is available only on 1136 r1p0 and later,
@@ -416,9 +437,9 @@ static void __init elf_hwcap_fixup(void)
 	 * avoid advertising SWP; it may not be atomic with
 	 * multiprocessing cores.
 	 */
-	sync_prim = ((read_cpuid_ext(CPUID_EXT_ISAR3) >> 8) & 0xf0) |
-		    ((read_cpuid_ext(CPUID_EXT_ISAR4) >> 20) & 0x0f);
-	if (sync_prim >= 0x13)
+	if (cpuid_feature_extract(CPUID_EXT_ISAR3, 12) > 1 ||
+	    (cpuid_feature_extract(CPUID_EXT_ISAR3, 12) == 1 &&
+	     cpuid_feature_extract(CPUID_EXT_ISAR3, 20) >= 3))
 		elf_hwcap &= ~HWCAP_SWP;
 }
 
@@ -821,8 +842,25 @@ arch_initcall(customize_machine);
 
 static int __init init_machine_late(void)
 {
+	struct device_node *root;
+	int ret;
+
 	if (machine_desc->init_late)
 		machine_desc->init_late();
+
+	root = of_find_node_by_path("/");
+	if (root) {
+		ret = of_property_read_string(root, "serial-number",
+					      &system_serial);
+		if (ret)
+			system_serial = NULL;
+	}
+
+	if (!system_serial)
+		system_serial = kasprintf(GFP_KERNEL, "%08x%08x",
+					  system_serial_high,
+					  system_serial_low);
+
 	return 0;
 }
 late_initcall(init_machine_late);
@@ -918,7 +956,9 @@ void __init setup_arch(char **cmdline_p)
 
 	parse_early_param();
 
-	early_paging_init(mdesc, lookup_processor_type(read_cpuid_id()));
+#ifdef CONFIG_MMU
+	early_paging_init(mdesc);
+#endif
 	setup_dma_zone(mdesc);
 	sanity_check_meminfo();
 	arm_memblock_init(mdesc);
@@ -933,6 +973,7 @@ void __init setup_arch(char **cmdline_p)
 
 	arm_dt_init_cpu_maps();
 	psci_init();
+	xen_early_init();
 #ifdef CONFIG_SMP
 	if (is_smp()) {
 		if (!mdesc->smp_init || !mdesc->smp_init()) {
@@ -1091,8 +1132,7 @@ static int c_show(struct seq_file *m, void *v)
 
 	seq_printf(m, "Hardware\t: %s\n", machine_name);
 	seq_printf(m, "Revision\t: %04x\n", system_rev);
-	seq_printf(m, "Serial\t\t: %08x%08x\n",
-		   system_serial_high, system_serial_low);
+	seq_printf(m, "Serial\t\t: %s\n", system_serial);
 
 	return 0;
 }
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 023ac905e4c3..423663e23791 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -318,17 +318,6 @@ get_sigframe(struct ksignal *ksig, struct pt_regs *regs, int framesize)
 	return frame;
 }
 
-/*
- * translate the signal
- */
-static inline int map_sig(int sig)
-{
-	struct thread_info *thread = current_thread_info();
-	if (sig < 32 && thread->exec_domain && thread->exec_domain->signal_invmap)
-		sig = thread->exec_domain->signal_invmap[sig];
-	return sig;
-}
-
 static int
 setup_return(struct pt_regs *regs, struct ksignal *ksig,
 	     unsigned long __user *rc, void __user *frame)
@@ -412,7 +401,7 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig,
 		}
 	}
 
-	regs->ARM_r0 = map_sig(ksig->sig);
+	regs->ARM_r0 = ksig->sig;
 	regs->ARM_sp = (unsigned long)frame;
 	regs->ARM_lr = retcode;
 	regs->ARM_pc = handler;
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index e1e60e5a7a27..0f6c1000582c 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -81,7 +81,7 @@ ENTRY(__cpu_suspend)
 	mov	r1, r4			@ size of save block
 	add	r0, sp, #8		@ pointer to save block
 	bl	__cpu_suspend_save
-	adr	lr, BSYM(cpu_suspend_abort)
+	badr	lr, cpu_suspend_abort
 	ldmfd	sp!, {r0, pc}		@ call suspend fn
 ENDPROC(__cpu_suspend)
 	.ltorg
@@ -116,15 +116,18 @@ cpu_resume_after_mmu:
 	ldmfd	sp!, {r4 - r11, pc}
 ENDPROC(cpu_resume_after_mmu)
 
-/*
- * Note: Yes, part of the following code is located into the .data section.
- *       This is to allow sleep_save_sp to be accessed with a relative load
- *       while we can't rely on any MMU translation.  We could have put
- *       sleep_save_sp in the .text section as well, but some setups might
- *       insist on it to be truly read-only.
- */
-	.data
+	.text
 	.align
+
+#ifdef CONFIG_MMU
+	.arm
+ENTRY(cpu_resume_arm)
+ THUMB(	badr	r9, 1f		)	@ Kernel is entered in ARM.
+ THUMB(	bx	r9		)	@ If this is a Thumb-2 kernel,
+ THUMB(	.thumb			)	@ switch to Thumb now.
+ THUMB(1:			)
+#endif
+
 ENTRY(cpu_resume)
 ARM_BE8(setend be)			@ ensure we are in BE mode
 #ifdef CONFIG_ARM_VIRT_EXT
@@ -145,6 +148,8 @@ ARM_BE8(setend be)			@ ensure we are in BE mode
 	compute_mpidr_hash	r1, r4, r5, r6, r0, r3
 1:
 	adr	r0, _sleep_save_sp
+	ldr	r2, [r0]
+	add	r0, r0, r2
 	ldr	r0, [r0, #SLEEP_SAVE_SP_PHYS]
 	ldr	r0, [r0, r1, lsl #2]
 
@@ -155,11 +160,17 @@ THUMB(	mov	sp, r2			)
 THUMB(	bx	r3			)
 ENDPROC(cpu_resume)
 
+#ifdef CONFIG_MMU
+ENDPROC(cpu_resume_arm)
+#endif
+
 	.align 2
+_sleep_save_sp:
+	.long	sleep_save_sp - .
 mpidr_hash_ptr:
 	.long	mpidr_hash - .			@ mpidr_hash struct offset
 
+	.data
 	.type	sleep_save_sp, #object
 ENTRY(sleep_save_sp)
-_sleep_save_sp:
 	.space	SLEEP_SAVE_SP_SZ		@ struct sleep_save_sp
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 86ef244c5a24..3d6b7821cff8 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -86,9 +86,11 @@ void __init smp_set_ops(struct smp_operations *ops)
 
 static unsigned long get_arch_pgd(pgd_t *pgd)
 {
-	phys_addr_t pgdir = virt_to_idmap(pgd);
-	BUG_ON(pgdir & ARCH_PGD_MASK);
-	return pgdir >> ARCH_PGD_SHIFT;
+#ifdef CONFIG_ARM_LPAE
+	return __phys_to_pfn(virt_to_phys(pgd));
+#else
+	return virt_to_phys(pgd);
+#endif
 }
 
 int __cpu_up(unsigned int cpu, struct task_struct *idle)
@@ -108,7 +110,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 #endif
 
 #ifdef CONFIG_MMU
-	secondary_data.pgdir = get_arch_pgd(idmap_pgd);
+	secondary_data.pgdir = virt_to_phys(idmap_pgd);
 	secondary_data.swapper_pg_dir = get_arch_pgd(swapper_pg_dir);
 #endif
 	sync_cache_w(&secondary_data);
@@ -145,6 +147,11 @@ void __init smp_init_cpus(void)
 		smp_ops.smp_init_cpus();
 }
 
+int platform_can_secondary_boot(void)
+{
+	return !!smp_ops.smp_boot_secondary;
+}
+
 int platform_can_cpu_hotplug(void)
 {
 #ifdef CONFIG_HOTPLUG_CPU
@@ -571,7 +578,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
 	if ((unsigned)ipinr < NR_IPI) {
-		trace_ipi_entry(ipi_types[ipinr]);
+		trace_ipi_entry_rcuidle(ipi_types[ipinr]);
 		__inc_irq_stat(cpu, ipi_irqs[ipinr]);
 	}
 
@@ -630,7 +637,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
 	}
 
 	if ((unsigned)ipinr < NR_IPI)
-		trace_ipi_exit(ipi_types[ipinr]);
+		trace_ipi_exit_rcuidle(ipi_types[ipinr]);
 	set_irq_regs(old_regs);
 }
 
diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c
index afdd51e30bec..1361756782c7 100644
--- a/arch/arm/kernel/swp_emulate.c
+++ b/arch/arm/kernel/swp_emulate.c
@@ -42,7 +42,7 @@
 	"	cmp		%0, #0\n"			\
 	"	movne		%0, %4\n"			\
 	"2:\n"							\
-	"	.section	 .fixup,\"ax\"\n"		\
+	"	.section	 .text.fixup,\"ax\"\n"		\
 	"	.align		2\n"				\
 	"3:	mov		%0, %5\n"			\
 	"	b		2b\n"				\
diff --git a/arch/arm/kernel/tcm.c b/arch/arm/kernel/tcm.c
index 7a3be1d4d0b1..b10e1360762e 100644
--- a/arch/arm/kernel/tcm.c
+++ b/arch/arm/kernel/tcm.c
@@ -17,6 +17,9 @@
 #include <asm/mach/map.h>
 #include <asm/memory.h>
 #include <asm/system_info.h>
+#include <asm/traps.h>
+
+#define TCMTR_FORMAT_MASK	0xe0000000U
 
 static struct gen_pool *tcm_pool;
 static bool dtcm_present;
@@ -176,6 +179,77 @@ static int __init setup_tcm_bank(u8 type, u8 bank, u8 banks,
 }
 
 /*
+ * When we are running in the non-secure world and the secure world
+ * has not explicitly given us access to the TCM we will get an
+ * undefined error when reading the TCM region register in the
+ * setup_tcm_bank function (above).
+ *
+ * There are two variants of this register read that we need to trap,
+ * the read for the data TCM and the read for the instruction TCM:
+ *  c0370628:       ee196f11        mrc     15, 0, r6, cr9, cr1, {0}
+ *  c0370674:       ee196f31        mrc     15, 0, r6, cr9, cr1, {1}
+ *
+ * Our undef hook mask explicitly matches all fields of the encoded
+ * instruction other than the destination register.  The mask also
+ * only allows operand 2 to have the values 0 or 1.
+ *
+ * The undefined hook is defined as __init and __initdata, and therefore
+ * must be removed before tcm_init returns.
+ *
+ * In this particular case (MRC with ARM condition code ALways) the
+ * Thumb-2 and ARM instruction encoding are identical, so this hook
+ * will work on a Thumb-2 kernel.
+ *
+ * See A8.8.107, DDI0406C_C ARM Architecture Reference Manual, Encoding
+ * T1/A1 for the bit-by-bit details.
+ *
+ *  mrc   p15, 0, XX, c9, c1, 0
+ *  mrc   p15, 0, XX, c9, c1, 1
+ *   |  |  |   |   |   |   |  +---- opc2           0|1 = 000|001
+ *   |  |  |   |   |   |   +------- CRm              0 = 0001
+ *   |  |  |   |   |   +----------- CRn              0 = 1001
+ *   |  |  |   |   +--------------- Rt               ? = ????
+ *   |  |  |   +------------------- opc1             0 =  000
+ *   |  |  +----------------------- coproc          15 = 1111
+ *   |  +-------------------------- condition   ALways = 1110
+ *   +----------------------------- instruction    MRC = 1110
+ *
+ * Encoding this as per A8.8.107 of DDI0406C, Encoding T1/A1, yields:
+ *  1111 1111 1111 1111 0000 1111 1101 1111 Required Mask
+ *  1110 1110 0001 1001 ???? 1111 0001 0001 mrc p15, 0, XX, c9, c1, 0
+ *  1110 1110 0001 1001 ???? 1111 0011 0001 mrc p15, 0, XX, c9, c1, 1
+ *  [  ] [  ] [ ]| [  ] [  ] [  ] [ ]| +--- CRm
+ *    |    |   | |   |    |    |   | +----- SBO
+ *    |    |   | |   |    |    |   +------- opc2
+ *    |    |   | |   |    |    +----------- coproc
+ *    |    |   | |   |    +---------------- Rt
+ *    |    |   | |   +--------------------- CRn
+ *    |    |   | +------------------------- SBO
+ *    |    |   +--------------------------- opc1
+ *    |    +------------------------------- instruction
+ *    +------------------------------------ condition
+ */
+#define TCM_REGION_READ_MASK		0xffff0fdf
+#define TCM_REGION_READ_INSTR		0xee190f11
+#define DEST_REG_SHIFT			12
+#define DEST_REG_MASK			0xf
+
+static int __init tcm_handler(struct pt_regs *regs, unsigned int instr)
+{
+	regs->uregs[(instr >> DEST_REG_SHIFT) & DEST_REG_MASK] = 0;
+	regs->ARM_pc += 4;
+	return 0;
+}
+
+static struct undef_hook tcm_hook __initdata = {
+	.instr_mask	= TCM_REGION_READ_MASK,
+	.instr_val	= TCM_REGION_READ_INSTR,
+	.cpsr_mask	= MODE_MASK,
+	.cpsr_val	= SVC_MODE,
+	.fn		= tcm_handler
+};
+
+/*
  * This initializes the TCM memory
  */
 void __init tcm_init(void)
@@ -204,9 +278,18 @@ void __init tcm_init(void)
 	}
 
 	tcm_status = read_cpuid_tcmstatus();
+
+	/*
+	 * This code only supports v6-compatible TCMTR implementations.
+	 */
+	if (tcm_status & TCMTR_FORMAT_MASK)
+		return;
+
 	dtcm_banks = (tcm_status >> 16) & 0x03;
 	itcm_banks = (tcm_status & 0x03);
 
+	register_undef_hook(&tcm_hook);
+
 	/* Values greater than 2 for D/ITCM banks are "reserved" */
 	if (dtcm_banks > 2)
 		dtcm_banks = 0;
@@ -218,7 +301,7 @@ void __init tcm_init(void)
 		for (i = 0; i < dtcm_banks; i++) {
 			ret = setup_tcm_bank(0, i, dtcm_banks, &dtcm_end);
 			if (ret)
-				return;
+				goto unregister;
 		}
 		/* This means you compiled more code than fits into DTCM */
 		if (dtcm_code_sz > (dtcm_end - DTCM_OFFSET)) {
@@ -227,6 +310,12 @@ void __init tcm_init(void)
 				dtcm_code_sz, (dtcm_end - DTCM_OFFSET));
 			goto no_dtcm;
 		}
+		/*
+		 * This means that the DTCM sizes were 0 or the DTCM banks
+		 * were inaccessible due to TrustZone configuration.
+		 */
+		if (!(dtcm_end - DTCM_OFFSET))
+			goto no_dtcm;
 		dtcm_res.end = dtcm_end - 1;
 		request_resource(&iomem_resource, &dtcm_res);
 		dtcm_iomap[0].length = dtcm_end - DTCM_OFFSET;
@@ -250,15 +339,21 @@ no_dtcm:
 		for (i = 0; i < itcm_banks; i++) {
 			ret = setup_tcm_bank(1, i, itcm_banks, &itcm_end);
 			if (ret)
-				return;
+				goto unregister;
 		}
 		/* This means you compiled more code than fits into ITCM */
 		if (itcm_code_sz > (itcm_end - ITCM_OFFSET)) {
 			pr_info("CPU ITCM: %u bytes of code compiled to "
 				"ITCM but only %lu bytes of ITCM present\n",
 				itcm_code_sz, (itcm_end - ITCM_OFFSET));
-			return;
+			goto unregister;
 		}
+		/*
+		 * This means that the ITCM sizes were 0 or the ITCM banks
+		 * were inaccessible due to TrustZone configuration.
+		 */
+		if (!(itcm_end - ITCM_OFFSET))
+			goto unregister;
 		itcm_res.end = itcm_end - 1;
 		request_resource(&iomem_resource, &itcm_res);
 		itcm_iomap[0].length = itcm_end - ITCM_OFFSET;
@@ -275,6 +370,9 @@ no_dtcm:
 		pr_info("CPU ITCM: %u bytes of code compiled to ITCM but no "
 			"ITCM banks present in CPU\n", itcm_code_sz);
 	}
+
+unregister:
+	unregister_undef_hook(&tcm_hook);
 }
 
 /*
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index 0cc7e58c47cc..a66e37e211a9 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -76,7 +76,7 @@ void timer_tick(void)
 }
 #endif
 
-static void dummy_clock_access(struct timespec *ts)
+static void dummy_clock_access(struct timespec64 *ts)
 {
 	ts->tv_sec = 0;
 	ts->tv_nsec = 0;
@@ -85,12 +85,12 @@ static void dummy_clock_access(struct timespec *ts)
 static clock_access_fn __read_persistent_clock = dummy_clock_access;
 static clock_access_fn __read_boot_clock = dummy_clock_access;;
 
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
 {
 	__read_persistent_clock(ts);
 }
 
-void read_boot_clock(struct timespec *ts)
+void read_boot_clock64(struct timespec64 *ts)
 {
 	__read_boot_clock(ts);
 }
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 788e23fe64d8..d358226236f2 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -505,12 +505,10 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason)
 
 static int bad_syscall(int n, struct pt_regs *regs)
 {
-	struct thread_info *thread = current_thread_info();
 	siginfo_t info;
 
-	if ((current->personality & PER_MASK) != PER_LINUX &&
-	    thread->exec_domain->handler) {
-		thread->exec_domain->handler(n, regs);
+	if ((current->personality & PER_MASK) != PER_LINUX) {
+		send_sig(SIGSEGV, current, 1);
 		return regs->ARM_r0;
 	}
 
@@ -751,14 +749,6 @@ late_initcall(arm_mrc_hook_init);
 
 #endif
 
-void __bad_xchg(volatile void *ptr, int size)
-{
-	pr_err("xchg: bad data size: pc 0x%p, ptr 0x%p, size %d\n",
-	       __builtin_return_address(0), ptr, size);
-	BUG();
-}
-EXPORT_SYMBOL(__bad_xchg);
-
 /*
  * A data abort trap was taken, but we did not handle the instruction.
  * Try to abort the user program, or panic if it was the kernel.
diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c
new file mode 100644
index 000000000000..efe17dd9b921
--- /dev/null
+++ b/arch/arm/kernel/vdso.c
@@ -0,0 +1,337 @@
+/*
+ * Adapted from arm64 version.
+ *
+ * Copyright (C) 2012 ARM Limited
+ * Copyright (C) 2015 Mentor Graphics Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/elf.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/of.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/timekeeper_internal.h>
+#include <linux/vmalloc.h>
+#include <asm/arch_timer.h>
+#include <asm/barrier.h>
+#include <asm/cacheflush.h>
+#include <asm/page.h>
+#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
+#include <clocksource/arm_arch_timer.h>
+
+#define MAX_SYMNAME	64
+
+static struct page **vdso_text_pagelist;
+
+/* Total number of pages needed for the data and text portions of the VDSO. */
+unsigned int vdso_total_pages __read_mostly;
+
+/*
+ * The VDSO data page.
+ */
+static union vdso_data_store vdso_data_store __page_aligned_data;
+static struct vdso_data *vdso_data = &vdso_data_store.data;
+
+static struct page *vdso_data_page;
+static struct vm_special_mapping vdso_data_mapping = {
+	.name = "[vvar]",
+	.pages = &vdso_data_page,
+};
+
+static struct vm_special_mapping vdso_text_mapping = {
+	.name = "[vdso]",
+};
+
+struct elfinfo {
+	Elf32_Ehdr	*hdr;		/* ptr to ELF */
+	Elf32_Sym	*dynsym;	/* ptr to .dynsym section */
+	unsigned long	dynsymsize;	/* size of .dynsym section */
+	char		*dynstr;	/* ptr to .dynstr section */
+};
+
+/* Cached result of boot-time check for whether the arch timer exists,
+ * and if so, whether the virtual counter is useable.
+ */
+static bool cntvct_ok __read_mostly;
+
+static bool __init cntvct_functional(void)
+{
+	struct device_node *np;
+	bool ret = false;
+
+	if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER))
+		goto out;
+
+	/* The arm_arch_timer core should export
+	 * arch_timer_use_virtual or similar so we don't have to do
+	 * this.
+	 */
+	np = of_find_compatible_node(NULL, NULL, "arm,armv7-timer");
+	if (!np)
+		goto out_put;
+
+	if (of_property_read_bool(np, "arm,cpu-registers-not-fw-configured"))
+		goto out_put;
+
+	ret = true;
+
+out_put:
+	of_node_put(np);
+out:
+	return ret;
+}
+
+static void * __init find_section(Elf32_Ehdr *ehdr, const char *name,
+				  unsigned long *size)
+{
+	Elf32_Shdr *sechdrs;
+	unsigned int i;
+	char *secnames;
+
+	/* Grab section headers and strings so we can tell who is who */
+	sechdrs = (void *)ehdr + ehdr->e_shoff;
+	secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
+
+	/* Find the section they want */
+	for (i = 1; i < ehdr->e_shnum; i++) {
+		if (strcmp(secnames + sechdrs[i].sh_name, name) == 0) {
+			if (size)
+				*size = sechdrs[i].sh_size;
+			return (void *)ehdr + sechdrs[i].sh_offset;
+		}
+	}
+
+	if (size)
+		*size = 0;
+	return NULL;
+}
+
+static Elf32_Sym * __init find_symbol(struct elfinfo *lib, const char *symname)
+{
+	unsigned int i;
+
+	for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) {
+		char name[MAX_SYMNAME], *c;
+
+		if (lib->dynsym[i].st_name == 0)
+			continue;
+		strlcpy(name, lib->dynstr + lib->dynsym[i].st_name,
+			MAX_SYMNAME);
+		c = strchr(name, '@');
+		if (c)
+			*c = 0;
+		if (strcmp(symname, name) == 0)
+			return &lib->dynsym[i];
+	}
+	return NULL;
+}
+
+static void __init vdso_nullpatch_one(struct elfinfo *lib, const char *symname)
+{
+	Elf32_Sym *sym;
+
+	sym = find_symbol(lib, symname);
+	if (!sym)
+		return;
+
+	sym->st_name = 0;
+}
+
+static void __init patch_vdso(void *ehdr)
+{
+	struct elfinfo einfo;
+
+	einfo = (struct elfinfo) {
+		.hdr = ehdr,
+	};
+
+	einfo.dynsym = find_section(einfo.hdr, ".dynsym", &einfo.dynsymsize);
+	einfo.dynstr = find_section(einfo.hdr, ".dynstr", NULL);
+
+	/* If the virtual counter is absent or non-functional we don't
+	 * want programs to incur the slight additional overhead of
+	 * dispatching through the VDSO only to fall back to syscalls.
+	 */
+	if (!cntvct_ok) {
+		vdso_nullpatch_one(&einfo, "__vdso_gettimeofday");
+		vdso_nullpatch_one(&einfo, "__vdso_clock_gettime");
+	}
+}
+
+static int __init vdso_init(void)
+{
+	unsigned int text_pages;
+	int i;
+
+	if (memcmp(&vdso_start, "\177ELF", 4)) {
+		pr_err("VDSO is not a valid ELF object!\n");
+		return -ENOEXEC;
+	}
+
+	text_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT;
+	pr_debug("vdso: %i text pages at base %p\n", text_pages, &vdso_start);
+
+	/* Allocate the VDSO text pagelist */
+	vdso_text_pagelist = kcalloc(text_pages, sizeof(struct page *),
+				     GFP_KERNEL);
+	if (vdso_text_pagelist == NULL)
+		return -ENOMEM;
+
+	/* Grab the VDSO data page. */
+	vdso_data_page = virt_to_page(vdso_data);
+
+	/* Grab the VDSO text pages. */
+	for (i = 0; i < text_pages; i++) {
+		struct page *page;
+
+		page = virt_to_page(&vdso_start + i * PAGE_SIZE);
+		vdso_text_pagelist[i] = page;
+	}
+
+	vdso_text_mapping.pages = vdso_text_pagelist;
+
+	vdso_total_pages = 1; /* for the data/vvar page */
+	vdso_total_pages += text_pages;
+
+	cntvct_ok = cntvct_functional();
+
+	patch_vdso(&vdso_start);
+
+	return 0;
+}
+arch_initcall(vdso_init);
+
+static int install_vvar(struct mm_struct *mm, unsigned long addr)
+{
+	struct vm_area_struct *vma;
+
+	vma = _install_special_mapping(mm, addr, PAGE_SIZE,
+				       VM_READ | VM_MAYREAD,
+				       &vdso_data_mapping);
+
+	return IS_ERR(vma) ? PTR_ERR(vma) : 0;
+}
+
+/* assumes mmap_sem is write-locked */
+void arm_install_vdso(struct mm_struct *mm, unsigned long addr)
+{
+	struct vm_area_struct *vma;
+	unsigned long len;
+
+	mm->context.vdso = 0;
+
+	if (vdso_text_pagelist == NULL)
+		return;
+
+	if (install_vvar(mm, addr))
+		return;
+
+	/* Account for vvar page. */
+	addr += PAGE_SIZE;
+	len = (vdso_total_pages - 1) << PAGE_SHIFT;
+
+	vma = _install_special_mapping(mm, addr, len,
+		VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
+		&vdso_text_mapping);
+
+	if (!IS_ERR(vma))
+		mm->context.vdso = addr;
+}
+
+static void vdso_write_begin(struct vdso_data *vdata)
+{
+	++vdso_data->seq_count;
+	smp_wmb(); /* Pairs with smp_rmb in vdso_read_retry */
+}
+
+static void vdso_write_end(struct vdso_data *vdata)
+{
+	smp_wmb(); /* Pairs with smp_rmb in vdso_read_begin */
+	++vdso_data->seq_count;
+}
+
+static bool tk_is_cntvct(const struct timekeeper *tk)
+{
+	if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER))
+		return false;
+
+	if (strcmp(tk->tkr_mono.clock->name, "arch_sys_counter") != 0)
+		return false;
+
+	return true;
+}
+
+/**
+ * update_vsyscall - update the vdso data page
+ *
+ * Increment the sequence counter, making it odd, indicating to
+ * userspace that an update is in progress.  Update the fields used
+ * for coarse clocks and, if the architected system timer is in use,
+ * the fields used for high precision clocks.  Increment the sequence
+ * counter again, making it even, indicating to userspace that the
+ * update is finished.
+ *
+ * Userspace is expected to sample seq_count before reading any other
+ * fields from the data page.  If seq_count is odd, userspace is
+ * expected to wait until it becomes even.  After copying data from
+ * the page, userspace must sample seq_count again; if it has changed
+ * from its previous value, userspace must retry the whole sequence.
+ *
+ * Calls to update_vsyscall are serialized by the timekeeping core.
+ */
+void update_vsyscall(struct timekeeper *tk)
+{
+	struct timespec xtime_coarse;
+	struct timespec64 *wtm = &tk->wall_to_monotonic;
+
+	if (!cntvct_ok) {
+		/* The entry points have been zeroed, so there is no
+		 * point in updating the data page.
+		 */
+		return;
+	}
+
+	vdso_write_begin(vdso_data);
+
+	xtime_coarse = __current_kernel_time();
+	vdso_data->tk_is_cntvct			= tk_is_cntvct(tk);
+	vdso_data->xtime_coarse_sec		= xtime_coarse.tv_sec;
+	vdso_data->xtime_coarse_nsec		= xtime_coarse.tv_nsec;
+	vdso_data->wtm_clock_sec		= wtm->tv_sec;
+	vdso_data->wtm_clock_nsec		= wtm->tv_nsec;
+
+	if (vdso_data->tk_is_cntvct) {
+		vdso_data->cs_cycle_last	= tk->tkr_mono.cycle_last;
+		vdso_data->xtime_clock_sec	= tk->xtime_sec;
+		vdso_data->xtime_clock_snsec	= tk->tkr_mono.xtime_nsec;
+		vdso_data->cs_mult		= tk->tkr_mono.mult;
+		vdso_data->cs_shift		= tk->tkr_mono.shift;
+		vdso_data->cs_mask		= tk->tkr_mono.mask;
+	}
+
+	vdso_write_end(vdso_data);
+
+	flush_dcache_page(virt_to_page(vdso_data));
+}
+
+void update_vsyscall_tz(void)
+{
+	vdso_data->tz_minuteswest	= sys_tz.tz_minuteswest;
+	vdso_data->tz_dsttime		= sys_tz.tz_dsttime;
+	flush_dcache_page(virt_to_page(vdso_data));
+}
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index b31aa73e8076..8b60fde5ce48 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -11,7 +11,7 @@
 #ifdef CONFIG_ARM_KERNMEM_PERMS
 #include <asm/pgtable.h>
 #endif
-	
+
 #define PROC_INFO							\
 	. = ALIGN(4);							\
 	VMLINUX_SYMBOL(__proc_info_begin) = .;				\
@@ -23,7 +23,7 @@
 	VMLINUX_SYMBOL(__idmap_text_start) = .;				\
 	*(.idmap.text)							\
 	VMLINUX_SYMBOL(__idmap_text_end) = .;				\
-	. = ALIGN(32);							\
+	. = ALIGN(PAGE_SIZE);						\
 	VMLINUX_SYMBOL(__hyp_idmap_text_start) = .;			\
 	*(.hyp.idmap.text)						\
 	VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;
@@ -74,7 +74,7 @@ SECTIONS
 		ARM_EXIT_DISCARD(EXIT_DATA)
 		EXIT_CALL
 #ifndef CONFIG_MMU
-		*(.fixup)
+		*(.text.fixup)
 		*(__ex_table)
 #endif
 #ifndef CONFIG_SMP_ON_UP
@@ -100,6 +100,7 @@ SECTIONS
 
 	.text : {			/* Real text segment		*/
 		_stext = .;		/* Text and read-only data	*/
+			IDMAP_TEXT
 			__exception_text_start = .;
 			*(.exception.text)
 			__exception_text_end = .;
@@ -108,10 +109,6 @@ SECTIONS
 			SCHED_TEXT
 			LOCK_TEXT
 			KPROBES_TEXT
-			IDMAP_TEXT
-#ifdef CONFIG_MMU
-			*(.fixup)
-#endif
 			*(.gnu.warning)
 			*(.glue_7)
 			*(.glue_7t)
@@ -346,8 +343,11 @@ SECTIONS
  */
 ASSERT((__proc_info_end - __proc_info_begin), "missing CPU support")
 ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined")
+
 /*
- * The HYP init code can't be more than a page long.
+ * The HYP init code can't be more than a page long,
+ * and should not cross a page boundary.
  * The above comment applies as well.
  */
-ASSERT(((__hyp_idmap_text_end - __hyp_idmap_text_start) <= PAGE_SIZE), "HYP init code too big")
+ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & PAGE_MASK) <= PAGE_SIZE,
+	"HYP init code too big or misaligned")