diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-08-05 19:05:29 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-08-05 19:05:29 +0200 |
commit | c489d98c8c81a898cfed6bec193cca2006f956aa (patch) | |
tree | 4cc9b571c9bb2380e6b11828cc843f3ceeb5dcf4 | |
parent | Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/gerg... (diff) | |
parent | Merge branch 'devel-stable' into for-next (diff) | |
download | linux-c489d98c8c81a898cfed6bec193cca2006f956aa.tar.xz linux-c489d98c8c81a898cfed6bec193cca2006f956aa.zip |
Merge branch 'for-linus' of git://ftp.arm.linux.org.uk/~rmk/linux-arm
Pull ARM updates from Russell King:
"Included in this update:
- perf updates from Will Deacon:
The main changes are callchain stability fixes from Jean Pihet and
event mapping and PMU name rework from Mark Rutland
The latter is preparatory work for enabling some code re-use with
arm64 in the future.
- updates for nommu from Uwe Kleine-König:
Two different fixes for the same problem making some ARM nommu
configurations not boot since 3.6-rc1. The problem is that
user_addr_max returned the biggest available RAM address which
makes some copy_from_user variants fail to read from XIP memory.
- deprecate legacy OMAP DMA API, in preparation for it's removal.
The popular drivers have been converted over, leaving a very small
number of rarely used drivers, which hopefully can be converted
during the next cycle with a bit more visibility (and hopefully
people popping out of the woodwork to help test)
- more tweaks for BE systems, particularly with the kernel image
format. In connection with this, I've cleaned up the way we
generate the linker script for the decompressor.
- removal of hard-coded assumptions of the kernel stack size, making
everywhere depend on the value of THREAD_SIZE_ORDER.
- MCPM updates from Nicolas Pitre.
- Make it easier for proper CPU part number checks (which should
always include the vendor field).
- Assembly code optimisation - use the "bx" instruction when
returning from a function on ARMv6+ rather than "mov pc, reg".
- Save the last kernel misaligned fault location and report it via
the procfs alignment file.
- Clean up the way we create the initial stack frame, which is a
repeated pattern in several different locations.
- Support for 8-byte get_user(), needed for some DRM implementations.
- mcs locking from Will Deacon.
- Save and restore a few more Cortex-A9 registers (for errata
workarounds)
- Fix various aspects of the SWP emulation, and the ELF hwcap for the
SWP instruction.
- Update LPAE logic for pte_write and pmd_write to make it more
correct.
- Support for Broadcom Brahma15 CPU cores.
- ARM assembly crypto updates from Ard Biesheuvel"
* 'for-linus' of git://ftp.arm.linux.org.uk/~rmk/linux-arm: (53 commits)
ARM: add comments to the early page table remap code
ARM: 8122/1: smp_scu: enable SCU standby support
ARM: 8121/1: smp_scu: use macro for SCU enable bit
ARM: 8120/1: crypto: sha512: add ARM NEON implementation
ARM: 8119/1: crypto: sha1: add ARM NEON implementation
ARM: 8118/1: crypto: sha1/make use of common SHA-1 structures
ARM: 8113/1: remove remaining definitions of PLAT_PHYS_OFFSET from <mach/memory.h>
ARM: 8111/1: Enable erratum 798181 for Broadcom Brahma-B15
ARM: 8110/1: do CPU-specific init for Broadcom Brahma15 cores
ARM: 8109/1: mm: Modify pte_write and pmd_write logic for LPAE
ARM: 8108/1: mm: Introduce {pte,pmd}_isset and {pte,pmd}_isclear
ARM: hwcap: disable HWCAP_SWP if the CPU advertises it has exclusives
ARM: SWP emulation: only initialise on ARMv7 CPUs
ARM: SWP emulation: always enable when SMP is enabled
ARM: 8103/1: save/restore Cortex-A9 CP15 registers on suspend/resume
ARM: 8098/1: mcs lock: implement wfe-based polling for MCS locking
ARM: 8091/2: add get_user() support for 8 byte types
ARM: 8097/1: unistd.h: relocate comments back to place
ARM: 8096/1: Describe required sort order for textofs-y (TEXT_OFFSET)
ARM: 8090/1: add revision info for PL310 errata 588369 and 727915
...
177 files changed, 3213 insertions, 2006 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 290f02ee0157..1e14b9068a39 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -263,8 +263,22 @@ config NEED_MACH_MEMORY_H config PHYS_OFFSET hex "Physical address of main memory" if MMU - depends on !ARM_PATCH_PHYS_VIRT && !NEED_MACH_MEMORY_H + depends on !ARM_PATCH_PHYS_VIRT default DRAM_BASE if !MMU + default 0x00000000 if ARCH_EBSA110 || \ + EP93XX_SDCE3_SYNC_PHYS_OFFSET || \ + ARCH_FOOTBRIDGE || \ + ARCH_INTEGRATOR || \ + ARCH_IOP13XX || \ + ARCH_KS8695 || \ + (ARCH_REALVIEW && !REALVIEW_HIGH_PHYS_OFFSET) + default 0x10000000 if ARCH_OMAP1 || ARCH_RPC + default 0x20000000 if ARCH_S5PV210 + default 0x70000000 if REALVIEW_HIGH_PHYS_OFFSET + default 0xc0000000 if EP93XX_SDCE0_PHYS_OFFSET || ARCH_SA1100 + default 0xd0000000 if EP93XX_SDCE1_PHYS_OFFSET + default 0xe0000000 if EP93XX_SDCE2_PHYS_OFFSET + default 0xf0000000 if EP93XX_SDCE3_ASYNC_PHYS_OFFSET help Please provide the physical address corresponding to the location of main memory in your system. @@ -436,7 +450,6 @@ config ARCH_EP93XX select ARM_VIC select CLKDEV_LOOKUP select CPU_ARM920T - select NEED_MACH_MEMORY_H help This enables support for the Cirrus EP93xx series of CPUs. diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 6721fab13734..718913dfe815 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -127,6 +127,9 @@ CHECKFLAGS += -D__arm__ #Default value head-y := arch/arm/kernel/head$(MMUEXT).o + +# Text offset. This list is sorted numerically by address in order to +# provide a means to avoid/resolve conflicts in multi-arch kernels. textofs-y := 0x00008000 textofs-$(CONFIG_ARCH_CLPS711X) := 0x00028000 # We don't want the htc bootloader to corrupt kernel during resume diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 68c918362b79..76a50ecae1c3 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -81,7 +81,7 @@ ZTEXTADDR := 0 ZBSSADDR := ALIGN(8) endif -SEDFLAGS = s/TEXT_START/$(ZTEXTADDR)/;s/BSS_START/$(ZBSSADDR)/ +CPPFLAGS_vmlinux.lds := -DTEXT_START="$(ZTEXTADDR)" -DBSS_START="$(ZBSSADDR)" suffix_$(CONFIG_KERNEL_GZIP) = gzip suffix_$(CONFIG_KERNEL_LZO) = lzo @@ -199,8 +199,5 @@ CFLAGS_font.o := -Dstatic= $(obj)/font.c: $(FONTC) $(call cmd,shipped) -$(obj)/vmlinux.lds: $(obj)/vmlinux.lds.in arch/arm/boot/Makefile $(KCONFIG_CONFIG) - @sed "$(SEDFLAGS)" < $< > $@ - $(obj)/hyp-stub.S: $(srctree)/arch/$(SRCARCH)/kernel/hyp-stub.S $(call cmd,shipped) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 3a8b32df6b31..413fd94b5301 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -125,9 +125,11 @@ start: THUMB( adr r12, BSYM(1f) ) THUMB( bx r12 ) - .word 0x016f2818 @ Magic numbers to help the loader - .word start @ absolute load/run zImage address - .word _edata @ zImage end address + .word _magic_sig @ Magic numbers to help the loader + .word _magic_start @ absolute load/run zImage address + .word _magic_end @ zImage end address + .word 0x04030201 @ endianness flag + THUMB( .thumb ) 1: ARM_BE8( setend be ) @ go BE8 if compiled for BE8 diff --git a/arch/arm/boot/compressed/vmlinux.lds.in b/arch/arm/boot/compressed/vmlinux.lds.S index 4919f2ac8b89..2b60b843ac5e 100644 --- a/arch/arm/boot/compressed/vmlinux.lds.in +++ b/arch/arm/boot/compressed/vmlinux.lds.S @@ -1,12 +1,20 @@ /* - * linux/arch/arm/boot/compressed/vmlinux.lds.in - * * Copyright (C) 2000 Russell King * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + +#ifdef CONFIG_CPU_ENDIAN_BE8 +#define ZIMAGE_MAGIC(x) ( (((x) >> 24) & 0x000000ff) | \ + (((x) >> 8) & 0x0000ff00) | \ + (((x) << 8) & 0x00ff0000) | \ + (((x) << 24) & 0xff000000) ) +#else +#define ZIMAGE_MAGIC(x) (x) +#endif + OUTPUT_ARCH(arm) ENTRY(_start) SECTIONS @@ -57,6 +65,10 @@ SECTIONS .pad : { BYTE(0); . = ALIGN(8); } _edata = .; + _magic_sig = ZIMAGE_MAGIC(0x016f2818); + _magic_start = ZIMAGE_MAGIC(_start); + _magic_end = ZIMAGE_MAGIC(_edata); + . = BSS_START; __bss_start = .; .bss : { *(.bss) } @@ -73,4 +85,3 @@ SECTIONS .stab.indexstr 0 : { *(.stab.indexstr) } .comment 0 : { *(.comment) } } - diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c index f91136ab447e..3c165fc2dce2 100644 --- a/arch/arm/common/mcpm_entry.c +++ b/arch/arm/common/mcpm_entry.c @@ -12,11 +12,13 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/irqflags.h> +#include <linux/cpu_pm.h> #include <asm/mcpm.h> #include <asm/cacheflush.h> #include <asm/idmap.h> #include <asm/cputype.h> +#include <asm/suspend.h> extern unsigned long mcpm_entry_vectors[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER]; @@ -146,6 +148,56 @@ int mcpm_cpu_powered_up(void) return 0; } +#ifdef CONFIG_ARM_CPU_SUSPEND + +static int __init nocache_trampoline(unsigned long _arg) +{ + void (*cache_disable)(void) = (void *)_arg; + unsigned int mpidr = read_cpuid_mpidr(); + unsigned int cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + unsigned int cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + phys_reset_t phys_reset; + + mcpm_set_entry_vector(cpu, cluster, cpu_resume); + setup_mm_for_reboot(); + + __mcpm_cpu_going_down(cpu, cluster); + BUG_ON(!__mcpm_outbound_enter_critical(cpu, cluster)); + cache_disable(); + __mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN); + __mcpm_cpu_down(cpu, cluster); + + phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset); + phys_reset(virt_to_phys(mcpm_entry_point)); + BUG(); +} + +int __init mcpm_loopback(void (*cache_disable)(void)) +{ + int ret; + + /* + * We're going to soft-restart the current CPU through the + * low-level MCPM code by leveraging the suspend/resume + * infrastructure. Let's play it safe by using cpu_pm_enter() + * in case the CPU init code path resets the VFP or similar. + */ + local_irq_disable(); + local_fiq_disable(); + ret = cpu_pm_enter(); + if (!ret) { + ret = cpu_suspend((unsigned long)cache_disable, nocache_trampoline); + cpu_pm_exit(); + } + local_fiq_enable(); + local_irq_enable(); + if (ret) + pr_err("%s returned %d\n", __func__, ret); + return ret; +} + +#endif + struct sync_struct mcpm_sync; /* diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 81cda39860c5..b48fa341648d 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -5,10 +5,14 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o +obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o +obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o aes-arm-y := aes-armv4.o aes_glue.o aes-arm-bs-y := aesbs-core.o aesbs-glue.o sha1-arm-y := sha1-armv4-large.o sha1_glue.o +sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o +sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o quiet_cmd_perl = PERL $@ cmd_perl = $(PERL) $(<) > $(@) diff --git a/arch/arm/crypto/aes-armv4.S b/arch/arm/crypto/aes-armv4.S index 3a14ea8fe97e..ebb9761fb572 100644 --- a/arch/arm/crypto/aes-armv4.S +++ b/arch/arm/crypto/aes-armv4.S @@ -35,6 +35,7 @@ @ that is being targetted. #include <linux/linkage.h> +#include <asm/assembler.h> .text @@ -648,7 +649,7 @@ _armv4_AES_set_encrypt_key: .Ldone: mov r0,#0 ldmia sp!,{r4-r12,lr} -.Labrt: mov pc,lr +.Labrt: ret lr ENDPROC(private_AES_set_encrypt_key) .align 5 diff --git a/arch/arm/crypto/sha1-armv7-neon.S b/arch/arm/crypto/sha1-armv7-neon.S new file mode 100644 index 000000000000..50013c0e2864 --- /dev/null +++ b/arch/arm/crypto/sha1-armv7-neon.S @@ -0,0 +1,634 @@ +/* sha1-armv7-neon.S - ARM/NEON accelerated SHA-1 transform function + * + * Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include <linux/linkage.h> + + +.syntax unified +.code 32 +.fpu neon + +.text + + +/* Context structure */ + +#define state_h0 0 +#define state_h1 4 +#define state_h2 8 +#define state_h3 12 +#define state_h4 16 + + +/* Constants */ + +#define K1 0x5A827999 +#define K2 0x6ED9EBA1 +#define K3 0x8F1BBCDC +#define K4 0xCA62C1D6 +.align 4 +.LK_VEC: +.LK1: .long K1, K1, K1, K1 +.LK2: .long K2, K2, K2, K2 +.LK3: .long K3, K3, K3, K3 +.LK4: .long K4, K4, K4, K4 + + +/* Register macros */ + +#define RSTATE r0 +#define RDATA r1 +#define RNBLKS r2 +#define ROLDSTACK r3 +#define RWK lr + +#define _a r4 +#define _b r5 +#define _c r6 +#define _d r7 +#define _e r8 + +#define RT0 r9 +#define RT1 r10 +#define RT2 r11 +#define RT3 r12 + +#define W0 q0 +#define W1 q1 +#define W2 q2 +#define W3 q3 +#define W4 q4 +#define W5 q5 +#define W6 q6 +#define W7 q7 + +#define tmp0 q8 +#define tmp1 q9 +#define tmp2 q10 +#define tmp3 q11 + +#define qK1 q12 +#define qK2 q13 +#define qK3 q14 +#define qK4 q15 + + +/* Round function macros. */ + +#define WK_offs(i) (((i) & 15) * 4) + +#define _R_F1(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ + W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + ldr RT3, [sp, WK_offs(i)]; \ + pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ + bic RT0, d, b; \ + add e, e, a, ror #(32 - 5); \ + and RT1, c, b; \ + pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ + add RT0, RT0, RT3; \ + add e, e, RT1; \ + ror b, #(32 - 30); \ + pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ + add e, e, RT0; + +#define _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ + W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + ldr RT3, [sp, WK_offs(i)]; \ + pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ + eor RT0, d, b; \ + add e, e, a, ror #(32 - 5); \ + eor RT0, RT0, c; \ + pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ + add e, e, RT3; \ + ror b, #(32 - 30); \ + pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ + add e, e, RT0; \ + +#define _R_F3(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ + W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + ldr RT3, [sp, WK_offs(i)]; \ + pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ + eor RT0, b, c; \ + and RT1, b, c; \ + add e, e, a, ror #(32 - 5); \ + pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ + and RT0, RT0, d; \ + add RT1, RT1, RT3; \ + add e, e, RT0; \ + ror b, #(32 - 30); \ + pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ + add e, e, RT1; + +#define _R_F4(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ + W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ + W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) + +#define _R(a,b,c,d,e,f,i,pre1,pre2,pre3,i16,\ + W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + _R_##f(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ + W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) + +#define R(a,b,c,d,e,f,i) \ + _R_##f(a,b,c,d,e,i,dummy,dummy,dummy,i16,\ + W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) + +#define dummy(...) + + +/* Input expansion macros. */ + +/********* Precalc macros for rounds 0-15 *************************************/ + +#define W_PRECALC_00_15() \ + add RWK, sp, #(WK_offs(0)); \ + \ + vld1.32 {tmp0, tmp1}, [RDATA]!; \ + vrev32.8 W0, tmp0; /* big => little */ \ + vld1.32 {tmp2, tmp3}, [RDATA]!; \ + vadd.u32 tmp0, W0, curK; \ + vrev32.8 W7, tmp1; /* big => little */ \ + vrev32.8 W6, tmp2; /* big => little */ \ + vadd.u32 tmp1, W7, curK; \ + vrev32.8 W5, tmp3; /* big => little */ \ + vadd.u32 tmp2, W6, curK; \ + vst1.32 {tmp0, tmp1}, [RWK]!; \ + vadd.u32 tmp3, W5, curK; \ + vst1.32 {tmp2, tmp3}, [RWK]; \ + +#define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vld1.32 {tmp0, tmp1}, [RDATA]!; \ + +#define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + add RWK, sp, #(WK_offs(0)); \ + +#define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vrev32.8 W0, tmp0; /* big => little */ \ + +#define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vld1.32 {tmp2, tmp3}, [RDATA]!; \ + +#define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vadd.u32 tmp0, W0, curK; \ + +#define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vrev32.8 W7, tmp1; /* big => little */ \ + +#define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vrev32.8 W6, tmp2; /* big => little */ \ + +#define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vadd.u32 tmp1, W7, curK; \ + +#define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vrev32.8 W5, tmp3; /* big => little */ \ + +#define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vadd.u32 tmp2, W6, curK; \ + +#define WPRECALC_00_15_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vst1.32 {tmp0, tmp1}, [RWK]!; \ + +#define WPRECALC_00_15_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vadd.u32 tmp3, W5, curK; \ + +#define WPRECALC_00_15_12(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vst1.32 {tmp2, tmp3}, [RWK]; \ + + +/********* Precalc macros for rounds 16-31 ************************************/ + +#define WPRECALC_16_31_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + veor tmp0, tmp0; \ + vext.8 W, W_m16, W_m12, #8; \ + +#define WPRECALC_16_31_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + add RWK, sp, #(WK_offs(i)); \ + vext.8 tmp0, W_m04, tmp0, #4; \ + +#define WPRECALC_16_31_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + veor tmp0, tmp0, W_m16; \ + veor.32 W, W, W_m08; \ + +#define WPRECALC_16_31_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + veor tmp1, tmp1; \ + veor W, W, tmp0; \ + +#define WPRECALC_16_31_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vshl.u32 tmp0, W, #1; \ + +#define WPRECALC_16_31_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vext.8 tmp1, tmp1, W, #(16-12); \ + vshr.u32 W, W, #31; \ + +#define WPRECALC_16_31_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vorr tmp0, tmp0, W; \ + vshr.u32 W, tmp1, #30; \ + +#define WPRECALC_16_31_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vshl.u32 tmp1, tmp1, #2; \ + +#define WPRECALC_16_31_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + veor tmp0, tmp0, W; \ + +#define WPRECALC_16_31_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + veor W, tmp0, tmp1; \ + +#define WPRECALC_16_31_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vadd.u32 tmp0, W, curK; \ + +#define WPRECALC_16_31_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vst1.32 {tmp0}, [RWK]; + + +/********* Precalc macros for rounds 32-79 ************************************/ + +#define WPRECALC_32_79_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + veor W, W_m28; \ + +#define WPRECALC_32_79_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vext.8 tmp0, W_m08, W_m04, #8; \ + +#define WPRECALC_32_79_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + veor W, W_m16; \ + +#define WPRECALC_32_79_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + veor W, tmp0; \ + +#define WPRECALC_32_79_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + add RWK, sp, #(WK_offs(i&~3)); \ + +#define WPRECALC_32_79_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vshl.u32 tmp1, W, #2; \ + +#define WPRECALC_32_79_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vshr.u32 tmp0, W, #30; \ + +#define WPRECALC_32_79_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vorr W, tmp0, tmp1; \ + +#define WPRECALC_32_79_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vadd.u32 tmp0, W, curK; \ + +#define WPRECALC_32_79_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vst1.32 {tmp0}, [RWK]; + + +/* + * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA. + * + * unsigned int + * sha1_transform_neon (void *ctx, const unsigned char *data, + * unsigned int nblks) + */ +.align 3 +ENTRY(sha1_transform_neon) + /* input: + * r0: ctx, CTX + * r1: data (64*nblks bytes) + * r2: nblks + */ + + cmp RNBLKS, #0; + beq .Ldo_nothing; + + push {r4-r12, lr}; + /*vpush {q4-q7};*/ + + adr RT3, .LK_VEC; + + mov ROLDSTACK, sp; + + /* Align stack. */ + sub RT0, sp, #(16*4); + and RT0, #(~(16-1)); + mov sp, RT0; + + vld1.32 {qK1-qK2}, [RT3]!; /* Load K1,K2 */ + + /* Get the values of the chaining variables. */ + ldm RSTATE, {_a-_e}; + + vld1.32 {qK3-qK4}, [RT3]; /* Load K3,K4 */ + +#undef curK +#define curK qK1 + /* Precalc 0-15. */ + W_PRECALC_00_15(); + +.Loop: + /* Transform 0-15 + Precalc 16-31. */ + _R( _a, _b, _c, _d, _e, F1, 0, + WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 16, + W4, W5, W6, W7, W0, _, _, _ ); + _R( _e, _a, _b, _c, _d, F1, 1, + WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 16, + W4, W5, W6, W7, W0, _, _, _ ); + _R( _d, _e, _a, _b, _c, F1, 2, + WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 16, + W4, W5, W6, W7, W0, _, _, _ ); + _R( _c, _d, _e, _a, _b, F1, 3, + WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,16, + W4, W5, W6, W7, W0, _, _, _ ); + +#undef curK +#define curK qK2 + _R( _b, _c, _d, _e, _a, F1, 4, + WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 20, + W3, W4, W5, W6, W7, _, _, _ ); + _R( _a, _b, _c, _d, _e, F1, 5, + WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 20, + W3, W4, W5, W6, W7, _, _, _ ); + _R( _e, _a, _b, _c, _d, F1, 6, + WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 20, + W3, W4, W5, W6, W7, _, _, _ ); + _R( _d, _e, _a, _b, _c, F1, 7, + WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,20, + W3, W4, W5, W6, W7, _, _, _ ); + + _R( _c, _d, _e, _a, _b, F1, 8, + WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 24, + W2, W3, W4, W5, W6, _, _, _ ); + _R( _b, _c, _d, _e, _a, F1, 9, + WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 24, + W2, W3, W4, W5, W6, _, _, _ ); + _R( _a, _b, _c, _d, _e, F1, 10, + WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 24, + W2, W3, W4, W5, W6, _, _, _ ); + _R( _e, _a, _b, _c, _d, F1, 11, + WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,24, + W2, W3, W4, W5, W6, _, _, _ ); + + _R( _d, _e, _a, _b, _c, F1, 12, + WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 28, + W1, W2, W3, W4, W5, _, _, _ ); + _R( _c, _d, _e, _a, _b, F1, 13, + WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 28, + W1, W2, W3, W4, W5, _, _, _ ); + _R( _b, _c, _d, _e, _a, F1, 14, + WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 28, + W1, W2, W3, W4, W5, _, _, _ ); + _R( _a, _b, _c, _d, _e, F1, 15, + WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,28, + W1, W2, W3, W4, W5, _, _, _ ); + + /* Transform 16-63 + Precalc 32-79. */ + _R( _e, _a, _b, _c, _d, F1, 16, + WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 32, + W0, W1, W2, W3, W4, W5, W6, W7); + _R( _d, _e, _a, _b, _c, F1, 17, + WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 32, + W0, W1, W2, W3, W4, W5, W6, W7); + _R( _c, _d, _e, _a, _b, F1, 18, + WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 32, + W0, W1, W2, W3, W4, W5, W6, W7); + _R( _b, _c, _d, _e, _a, F1, 19, + WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 32, + W0, W1, W2, W3, W4, W5, W6, W7); + + _R( _a, _b, _c, _d, _e, F2, 20, + WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 36, + W7, W0, W1, W2, W3, W4, W5, W6); + _R( _e, _a, _b, _c, _d, F2, 21, + WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 36, + W7, W0, W1, W2, W3, W4, W5, W6); + _R( _d, _e, _a, _b, _c, F2, 22, + WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 36, + W7, W0, W1, W2, W3, W4, W5, W6); + _R( _c, _d, _e, _a, _b, F2, 23, + WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 36, + W7, W0, W1, W2, W3, W4, W5, W6); + +#undef curK +#define curK qK3 + _R( _b, _c, _d, _e, _a, F2, 24, + WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 40, + W6, W7, W0, W1, W2, W3, W4, W5); + _R( _a, _b, _c, _d, _e, F2, 25, + WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 40, + W6, W7, W0, W1, W2, W3, W4, W5); + _R( _e, _a, _b, _c, _d, F2, 26, + WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 40, + W6, W7, W0, W1, W2, W3, W4, W5); + _R( _d, _e, _a, _b, _c, F2, 27, + WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 40, + W6, W7, W0, W1, W2, W3, W4, W5); + + _R( _c, _d, _e, _a, _b, F2, 28, + WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 44, + W5, W6, W7, W0, W1, W2, W3, W4); + _R( _b, _c, _d, _e, _a, F2, 29, + WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 44, + W5, W6, W7, W0, W1, W2, W3, W4); + _R( _a, _b, _c, _d, _e, F2, 30, + WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 44, + W5, W6, W7, W0, W1, W2, W3, W4); + _R( _e, _a, _b, _c, _d, F2, 31, + WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 44, + W5, W6, W7, W0, W1, W2, W3, W4); + + _R( _d, _e, _a, _b, _c, F2, 32, + WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 48, + W4, W5, W6, W7, W0, W1, W2, W3); + _R( _c, _d, _e, _a, _b, F2, 33, + WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 48, + W4, W5, W6, W7, W0, W1, W2, W3); + _R( _b, _c, _d, _e, _a, F2, 34, + WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 48, + W4, W5, W6, W7, W0, W1, W2, W3); + _R( _a, _b, _c, _d, _e, F2, 35, + WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 48, + W4, W5, W6, W7, W0, W1, W2, W3); + + _R( _e, _a, _b, _c, _d, F2, 36, + WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 52, + W3, W4, W5, W6, W7, W0, W1, W2); + _R( _d, _e, _a, _b, _c, F2, 37, + WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 52, + W3, W4, W5, W6, W7, W0, W1, W2); + _R( _c, _d, _e, _a, _b, F2, 38, + WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 52, + W3, W4, W5, W6, W7, W0, W1, W2); + _R( _b, _c, _d, _e, _a, F2, 39, + WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 52, + W3, W4, W5, W6, W7, W0, W1, W2); + + _R( _a, _b, _c, _d, _e, F3, 40, + WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 56, + W2, W3, W4, W5, W6, W7, W0, W1); + _R( _e, _a, _b, _c, _d, F3, 41, + WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 56, + W2, W3, W4, W5, W6, W7, W0, W1); + _R( _d, _e, _a, _b, _c, F3, 42, + WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 56, + W2, W3, W4, W5, W6, W7, W0, W1); + _R( _c, _d, _e, _a, _b, F3, 43, + WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 56, + W2, W3, W4, W5, W6, W7, W0, W1); + +#undef curK +#define curK qK4 + _R( _b, _c, _d, _e, _a, F3, 44, + WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 60, + W1, W2, W3, W4, W5, W6, W7, W0); + _R( _a, _b, _c, _d, _e, F3, 45, + WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 60, + W1, W2, W3, W4, W5, W6, W7, W0); + _R( _e, _a, _b, _c, _d, F3, 46, + WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 60, + W1, W2, W3, W4, W5, W6, W7, W0); + _R( _d, _e, _a, _b, _c, F3, 47, + WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 60, + W1, W2, W3, W4, W5, W6, W7, W0); + + _R( _c, _d, _e, _a, _b, F3, 48, + WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 64, + W0, W1, W2, W3, W4, W5, W6, W7); + _R( _b, _c, _d, _e, _a, F3, 49, + WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 64, + W0, W1, W2, W3, W4, W5, W6, W7); + _R( _a, _b, _c, _d, _e, F3, 50, + WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 64, + W0, W1, W2, W3, W4, W5, W6, W7); + _R( _e, _a, _b, _c, _d, F3, 51, + WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 64, + W0, W1, W2, W3, W4, W5, W6, W7); + + _R( _d, _e, _a, _b, _c, F3, 52, + WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 68, + W7, W0, W1, W2, W3, W4, W5, W6); + _R( _c, _d, _e, _a, _b, F3, 53, + WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 68, + W7, W0, W1, W2, W3, W4, W5, W6); + _R( _b, _c, _d, _e, _a, F3, 54, + WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 68, + W7, W0, W1, W2, W3, W4, W5, W6); + _R( _a, _b, _c, _d, _e, F3, 55, + WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 68, + W7, W0, W1, W2, W3, W4, W5, W6); + + _R( _e, _a, _b, _c, _d, F3, 56, + WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 72, + W6, W7, W0, W1, W2, W3, W4, W5); + _R( _d, _e, _a, _b, _c, F3, 57, + WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 72, + W6, W7, W0, W1, W2, W3, W4, W5); + _R( _c, _d, _e, _a, _b, F3, 58, + WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 72, + W6, W7, W0, W1, W2, W3, W4, W5); + _R( _b, _c, _d, _e, _a, F3, 59, + WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 72, + W6, W7, W0, W1, W2, W3, W4, W5); + + subs RNBLKS, #1; + + _R( _a, _b, _c, _d, _e, F4, 60, + WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 76, + W5, W6, W7, W0, W1, W2, W3, W4); + _R( _e, _a, _b, _c, _d, F4, 61, + WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 76, + W5, W6, W7, W0, W1, W2, W3, W4); + _R( _d, _e, _a, _b, _c, F4, 62, + WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 76, + W5, W6, W7, W0, W1, W2, W3, W4); + _R( _c, _d, _e, _a, _b, F4, 63, + WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 76, + W5, W6, W7, W0, W1, W2, W3, W4); + + beq .Lend; + + /* Transform 64-79 + Precalc 0-15 of next block. */ +#undef curK +#define curK qK1 + _R( _b, _c, _d, _e, _a, F4, 64, + WPRECALC_00_15_0, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _a, _b, _c, _d, _e, F4, 65, + WPRECALC_00_15_1, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _e, _a, _b, _c, _d, F4, 66, + WPRECALC_00_15_2, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _d, _e, _a, _b, _c, F4, 67, + WPRECALC_00_15_3, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + + _R( _c, _d, _e, _a, _b, F4, 68, + dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _b, _c, _d, _e, _a, F4, 69, + dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _a, _b, _c, _d, _e, F4, 70, + WPRECALC_00_15_4, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _e, _a, _b, _c, _d, F4, 71, + WPRECALC_00_15_5, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + + _R( _d, _e, _a, _b, _c, F4, 72, + dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _c, _d, _e, _a, _b, F4, 73, + dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _b, _c, _d, _e, _a, F4, 74, + WPRECALC_00_15_6, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _a, _b, _c, _d, _e, F4, 75, + WPRECALC_00_15_7, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + + _R( _e, _a, _b, _c, _d, F4, 76, + WPRECALC_00_15_8, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _d, _e, _a, _b, _c, F4, 77, + WPRECALC_00_15_9, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _c, _d, _e, _a, _b, F4, 78, + WPRECALC_00_15_10, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _b, _c, _d, _e, _a, F4, 79, + WPRECALC_00_15_11, dummy, WPRECALC_00_15_12, _, _, _, _, _, _, _, _, _ ); + + /* Update the chaining variables. */ + ldm RSTATE, {RT0-RT3}; + add _a, RT0; + ldr RT0, [RSTATE, #state_h4]; + add _b, RT1; + add _c, RT2; + add _d, RT3; + add _e, RT0; + stm RSTATE, {_a-_e}; + + b .Loop; + +.Lend: + /* Transform 64-79 */ + R( _b, _c, _d, _e, _a, F4, 64 ); + R( _a, _b, _c, _d, _e, F4, 65 ); + R( _e, _a, _b, _c, _d, F4, 66 ); + R( _d, _e, _a, _b, _c, F4, 67 ); + R( _c, _d, _e, _a, _b, F4, 68 ); + R( _b, _c, _d, _e, _a, F4, 69 ); + R( _a, _b, _c, _d, _e, F4, 70 ); + R( _e, _a, _b, _c, _d, F4, 71 ); + R( _d, _e, _a, _b, _c, F4, 72 ); + R( _c, _d, _e, _a, _b, F4, 73 ); + R( _b, _c, _d, _e, _a, F4, 74 ); + R( _a, _b, _c, _d, _e, F4, 75 ); + R( _e, _a, _b, _c, _d, F4, 76 ); + R( _d, _e, _a, _b, _c, F4, 77 ); + R( _c, _d, _e, _a, _b, F4, 78 ); + R( _b, _c, _d, _e, _a, F4, 79 ); + + mov sp, ROLDSTACK; + + /* Update the chaining variables. */ + ldm RSTATE, {RT0-RT3}; + add _a, RT0; + ldr RT0, [RSTATE, #state_h4]; + add _b, RT1; + add _c, RT2; + add _d, RT3; + /*vpop {q4-q7};*/ + add _e, RT0; + stm RSTATE, {_a-_e}; + + pop {r4-r12, pc}; + +.Ldo_nothing: + bx lr +ENDPROC(sha1_transform_neon) diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c index 76cd976230bc..84f2a756588b 100644 --- a/arch/arm/crypto/sha1_glue.c +++ b/arch/arm/crypto/sha1_glue.c @@ -23,32 +23,27 @@ #include <linux/types.h> #include <crypto/sha.h> #include <asm/byteorder.h> +#include <asm/crypto/sha1.h> -struct SHA1_CTX { - uint32_t h0,h1,h2,h3,h4; - u64 count; - u8 data[SHA1_BLOCK_SIZE]; -}; -asmlinkage void sha1_block_data_order(struct SHA1_CTX *digest, +asmlinkage void sha1_block_data_order(u32 *digest, const unsigned char *data, unsigned int rounds); static int sha1_init(struct shash_desc *desc) { - struct SHA1_CTX *sctx = shash_desc_ctx(desc); - memset(sctx, 0, sizeof(*sctx)); - sctx->h0 = SHA1_H0; - sctx->h1 = SHA1_H1; - sctx->h2 = SHA1_H2; - sctx->h3 = SHA1_H3; - sctx->h4 = SHA1_H4; + struct sha1_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha1_state){ + .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, + }; + return 0; } -static int __sha1_update(struct SHA1_CTX *sctx, const u8 *data, - unsigned int len, unsigned int partial) +static int __sha1_update(struct sha1_state *sctx, const u8 *data, + unsigned int len, unsigned int partial) { unsigned int done = 0; @@ -56,43 +51,44 @@ static int __sha1_update(struct SHA1_CTX *sctx, const u8 *data, if (partial) { done = SHA1_BLOCK_SIZE - partial; - memcpy(sctx->data + partial, data, done); - sha1_block_data_order(sctx, sctx->data, 1); + memcpy(sctx->buffer + partial, data, done); + sha1_block_data_order(sctx->state, sctx->buffer, 1); } if (len - done >= SHA1_BLOCK_SIZE) { const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; - sha1_block_data_order(sctx, data + done, rounds); + sha1_block_data_order(sctx->state, data + done, rounds); done += rounds * SHA1_BLOCK_SIZE; } - memcpy(sctx->data, data + done, len - done); + memcpy(sctx->buffer, data + done, len - done); return 0; } -static int sha1_update(struct shash_desc *desc, const u8 *data, - unsigned int len) +int sha1_update_arm(struct shash_desc *desc, const u8 *data, + unsigned int len) { - struct SHA1_CTX *sctx = shash_desc_ctx(desc); + struct sha1_state *sctx = shash_desc_ctx(desc); unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; int res; /* Handle the fast case right here */ if (partial + len < SHA1_BLOCK_SIZE) { sctx->count += len; - memcpy(sctx->data + partial, data, len); + memcpy(sctx->buffer + partial, data, len); return 0; } res = __sha1_update(sctx, data, len, partial); return res; } +EXPORT_SYMBOL_GPL(sha1_update_arm); /* Add padding and return the message digest. */ static int sha1_final(struct shash_desc *desc, u8 *out) { - struct SHA1_CTX *sctx = shash_desc_ctx(desc); + struct sha1_state *sctx = shash_desc_ctx(desc); unsigned int i, index, padlen; __be32 *dst = (__be32 *)out; __be64 bits; @@ -106,7 +102,7 @@ static int sha1_final(struct shash_desc *desc, u8 *out) /* We need to fill a whole block for __sha1_update() */ if (padlen <= 56) { sctx->count += padlen; - memcpy(sctx->data + index, padding, padlen); + memcpy(sctx->buffer + index, padding, padlen); } else { __sha1_update(sctx, padding, padlen, index); } @@ -114,7 +110,7 @@ static int sha1_final(struct shash_desc *desc, u8 *out) /* Store state in digest */ for (i = 0; i < 5; i++) - dst[i] = cpu_to_be32(((u32 *)sctx)[i]); + dst[i] = cpu_to_be32(sctx->state[i]); /* Wipe context */ memset(sctx, 0, sizeof(*sctx)); @@ -124,7 +120,7 @@ static int sha1_final(struct shash_desc *desc, u8 *out) static int sha1_export(struct shash_desc *desc, void *out) { - struct SHA1_CTX *sctx = shash_desc_ctx(desc); + struct sha1_state *sctx = shash_desc_ctx(desc); memcpy(out, sctx, sizeof(*sctx)); return 0; } @@ -132,7 +128,7 @@ static int sha1_export(struct shash_desc *desc, void *out) static int sha1_import(struct shash_desc *desc, const void *in) { - struct SHA1_CTX *sctx = shash_desc_ctx(desc); + struct sha1_state *sctx = shash_desc_ctx(desc); memcpy(sctx, in, sizeof(*sctx)); return 0; } @@ -141,12 +137,12 @@ static int sha1_import(struct shash_desc *desc, const void *in) static struct shash_alg alg = { .digestsize = SHA1_DIGEST_SIZE, .init = sha1_init, - .update = sha1_update, + .update = sha1_update_arm, .final = sha1_final, .export = sha1_export, .import = sha1_import, - .descsize = sizeof(struct SHA1_CTX), - .statesize = sizeof(struct SHA1_CTX), + .descsize = sizeof(struct sha1_state), + .statesize = sizeof(struct sha1_state), .base = { .cra_name = "sha1", .cra_driver_name= "sha1-asm", diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c new file mode 100644 index 000000000000..6f1b411b1d55 --- /dev/null +++ b/arch/arm/crypto/sha1_neon_glue.c @@ -0,0 +1,197 @@ +/* + * Glue code for the SHA1 Secure Hash Algorithm assembler implementation using + * ARM NEON instructions. + * + * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is based on sha1_generic.c and sha1_ssse3_glue.c: + * Copyright (c) Alan Smithee. + * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> + * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> + * Copyright (c) Mathias Krause <minipli@googlemail.com> + * Copyright (c) Chandramouli Narayanan <mouli@linux.intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/cryptohash.h> +#include <linux/types.h> +#include <crypto/sha.h> +#include <asm/byteorder.h> +#include <asm/neon.h> +#include <asm/simd.h> +#include <asm/crypto/sha1.h> + + +asmlinkage void sha1_transform_neon(void *state_h, const char *data, + unsigned int rounds); + + +static int sha1_neon_init(struct shash_desc *desc) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha1_state){ + .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, + }; + + return 0; +} + +static int __sha1_neon_update(struct shash_desc *desc, const u8 *data, + unsigned int len, unsigned int partial) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + unsigned int done = 0; + + sctx->count += len; + + if (partial) { + done = SHA1_BLOCK_SIZE - partial; + memcpy(sctx->buffer + partial, data, done); + sha1_transform_neon(sctx->state, sctx->buffer, 1); + } + + if (len - done >= SHA1_BLOCK_SIZE) { + const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; + + sha1_transform_neon(sctx->state, data + done, rounds); + done += rounds * SHA1_BLOCK_SIZE; + } + + memcpy(sctx->buffer, data + done, len - done); + + return 0; +} + +static int sha1_neon_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; + int res; + + /* Handle the fast case right here */ + if (partial + len < SHA1_BLOCK_SIZE) { + sctx->count += len; + memcpy(sctx->buffer + partial, data, len); + + return 0; + } + + if (!may_use_simd()) { + res = sha1_update_arm(desc, data, len); + } else { + kernel_neon_begin(); + res = __sha1_neon_update(desc, data, len, partial); + kernel_neon_end(); + } + + return res; +} + + +/* Add padding and return the message digest. */ +static int sha1_neon_final(struct shash_desc *desc, u8 *out) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + unsigned int i, index, padlen; + __be32 *dst = (__be32 *)out; + __be64 bits; + static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; + + bits = cpu_to_be64(sctx->count << 3); + + /* Pad out to 56 mod 64 and append length */ + index = sctx->count % SHA1_BLOCK_SIZE; + padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); + if (!may_use_simd()) { + sha1_update_arm(desc, padding, padlen); + sha1_update_arm(desc, (const u8 *)&bits, sizeof(bits)); + } else { + kernel_neon_begin(); + /* We need to fill a whole block for __sha1_neon_update() */ + if (padlen <= 56) { + sctx->count += padlen; + memcpy(sctx->buffer + index, padding, padlen); + } else { + __sha1_neon_update(desc, padding, padlen, index); + } + __sha1_neon_update(desc, (const u8 *)&bits, sizeof(bits), 56); + kernel_neon_end(); + } + + /* Store state in digest */ + for (i = 0; i < 5; i++) + dst[i] = cpu_to_be32(sctx->state[i]); + + /* Wipe context */ + memset(sctx, 0, sizeof(*sctx)); + + return 0; +} + +static int sha1_neon_export(struct shash_desc *desc, void *out) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + + memcpy(out, sctx, sizeof(*sctx)); + + return 0; +} + +static int sha1_neon_import(struct shash_desc *desc, const void *in) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + + memcpy(sctx, in, sizeof(*sctx)); + + return 0; +} + +static struct shash_alg alg = { + .digestsize = SHA1_DIGEST_SIZE, + .init = sha1_neon_init, + .update = sha1_neon_update, + .final = sha1_neon_final, + .export = sha1_neon_export, + .import = sha1_neon_import, + .descsize = sizeof(struct sha1_state), + .statesize = sizeof(struct sha1_state), + .base = { + .cra_name = "sha1", + .cra_driver_name = "sha1-neon", + .cra_priority = 250, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int __init sha1_neon_mod_init(void) +{ + if (!cpu_has_neon()) + return -ENODEV; + + return crypto_register_shash(&alg); +} + +static void __exit sha1_neon_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(sha1_neon_mod_init); +module_exit(sha1_neon_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, NEON accelerated"); +MODULE_ALIAS("sha1"); diff --git a/arch/arm/crypto/sha512-armv7-neon.S b/arch/arm/crypto/sha512-armv7-neon.S new file mode 100644 index 000000000000..fe99472e507c --- /dev/null +++ b/arch/arm/crypto/sha512-armv7-neon.S @@ -0,0 +1,455 @@ +/* sha512-armv7-neon.S - ARM/NEON assembly implementation of SHA-512 transform + * + * Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include <linux/linkage.h> + + +.syntax unified +.code 32 +.fpu neon + +.text + +/* structure of SHA512_CONTEXT */ +#define hd_a 0 +#define hd_b ((hd_a) + 8) +#define hd_c ((hd_b) + 8) +#define hd_d ((hd_c) + 8) +#define hd_e ((hd_d) + 8) +#define hd_f ((hd_e) + 8) +#define hd_g ((hd_f) + 8) + +/* register macros */ +#define RK %r2 + +#define RA d0 +#define RB d1 +#define RC d2 +#define RD d3 +#define RE d4 +#define RF d5 +#define RG d6 +#define RH d7 + +#define RT0 d8 +#define RT1 d9 +#define RT2 d10 +#define RT3 d11 +#define RT4 d12 +#define RT5 d13 +#define RT6 d14 +#define RT7 d15 + +#define RT01q q4 +#define RT23q q5 +#define RT45q q6 +#define RT67q q7 + +#define RW0 d16 +#define RW1 d17 +#define RW2 d18 +#define RW3 d19 +#define RW4 d20 +#define RW5 d21 +#define RW6 d22 +#define RW7 d23 +#define RW8 d24 +#define RW9 d25 +#define RW10 d26 +#define RW11 d27 +#define RW12 d28 +#define RW13 d29 +#define RW14 d30 +#define RW15 d31 + +#define RW01q q8 +#define RW23q q9 +#define RW45q q10 +#define RW67q q11 +#define RW89q q12 +#define RW1011q q13 +#define RW1213q q14 +#define RW1415q q15 + +/*********************************************************************** + * ARM assembly implementation of sha512 transform + ***********************************************************************/ +#define rounds2_0_63(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, rw01q, rw2, \ + rw23q, rw1415q, rw9, rw10, interleave_op, arg1) \ + /* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \ + vshr.u64 RT2, re, #14; \ + vshl.u64 RT3, re, #64 - 14; \ + interleave_op(arg1); \ + vshr.u64 RT4, re, #18; \ + vshl.u64 RT5, re, #64 - 18; \ + vld1.64 {RT0}, [RK]!; \ + veor.64 RT23q, RT23q, RT45q; \ + vshr.u64 RT4, re, #41; \ + vshl.u64 RT5, re, #64 - 41; \ + vadd.u64 RT0, RT0, rw0; \ + veor.64 RT23q, RT23q, RT45q; \ + vmov.64 RT7, re; \ + veor.64 RT1, RT2, RT3; \ + vbsl.64 RT7, rf, rg; \ + \ + vadd.u64 RT1, RT1, rh; \ + vshr.u64 RT2, ra, #28; \ + vshl.u64 RT3, ra, #64 - 28; \ + vadd.u64 RT1, RT1, RT0; \ + vshr.u64 RT4, ra, #34; \ + vshl.u64 RT5, ra, #64 - 34; \ + vadd.u64 RT1, RT1, RT7; \ + \ + /* h = Sum0 (a) + Maj (a, b, c); */ \ + veor.64 RT23q, RT23q, RT45q; \ + vshr.u64 RT4, ra, #39; \ + vshl.u64 RT5, ra, #64 - 39; \ + veor.64 RT0, ra, rb; \ + veor.64 RT23q, RT23q, RT45q; \ + vbsl.64 RT0, rc, rb; \ + vadd.u64 rd, rd, RT1; /* d+=t1; */ \ + veor.64 rh, RT2, RT3; \ + \ + /* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \ + vshr.u64 RT2, rd, #14; \ + vshl.u64 RT3, rd, #64 - 14; \ + vadd.u64 rh, rh, RT0; \ + vshr.u64 RT4, rd, #18; \ + vshl.u64 RT5, rd, #64 - 18; \ + vadd.u64 rh, rh, RT1; /* h+=t1; */ \ + vld1.64 {RT0}, [RK]!; \ + veor.64 RT23q, RT23q, RT45q; \ + vshr.u64 RT4, rd, #41; \ + vshl.u64 RT5, rd, #64 - 41; \ + vadd.u64 RT0, RT0, rw1; \ + veor.64 RT23q, RT23q, RT45q; \ + vmov.64 RT7, rd; \ + veor.64 RT1, RT2, RT3; \ + vbsl.64 RT7, re, rf; \ + \ + vadd.u64 RT1, RT1, rg; \ + vshr.u64 RT2, rh, #28; \ + vshl.u64 RT3, rh, #64 - 28; \ + vadd.u64 RT1, RT1, RT0; \ + vshr.u64 RT4, rh, #34; \ + vshl.u64 RT5, rh, #64 - 34; \ + vadd.u64 RT1, RT1, RT7; \ + \ + /* g = Sum0 (h) + Maj (h, a, b); */ \ + veor.64 RT23q, RT23q, RT45q; \ + vshr.u64 RT4, rh, #39; \ + vshl.u64 RT5, rh, #64 - 39; \ + veor.64 RT0, rh, ra; \ + veor.64 RT23q, RT23q, RT45q; \ + vbsl.64 RT0, rb, ra; \ + vadd.u64 rc, rc, RT1; /* c+=t1; */ \ + veor.64 rg, RT2, RT3; \ + \ + /* w[0] += S1 (w[14]) + w[9] + S0 (w[1]); */ \ + /* w[1] += S1 (w[15]) + w[10] + S0 (w[2]); */ \ + \ + /**** S0(w[1:2]) */ \ + \ + /* w[0:1] += w[9:10] */ \ + /* RT23q = rw1:rw2 */ \ + vext.u64 RT23q, rw01q, rw23q, #1; \ + vadd.u64 rw0, rw9; \ + vadd.u64 rg, rg, RT0; \ + vadd.u64 rw1, rw10;\ + vadd.u64 rg, rg, RT1; /* g+=t1; */ \ + \ + vshr.u64 RT45q, RT23q, #1; \ + vshl.u64 RT67q, RT23q, #64 - 1; \ + vshr.u64 RT01q, RT23q, #8; \ + veor.u64 RT45q, RT45q, RT67q; \ + vshl.u64 RT67q, RT23q, #64 - 8; \ + veor.u64 RT45q, RT45q, RT01q; \ + vshr.u64 RT01q, RT23q, #7; \ + veor.u64 RT45q, RT45q, RT67q; \ + \ + /**** S1(w[14:15]) */ \ + vshr.u64 RT23q, rw1415q, #6; \ + veor.u64 RT01q, RT01q, RT45q; \ + vshr.u64 RT45q, rw1415q, #19; \ + vshl.u64 RT67q, rw1415q, #64 - 19; \ + veor.u64 RT23q, RT23q, RT45q; \ + vshr.u64 RT45q, rw1415q, #61; \ + veor.u64 RT23q, RT23q, RT67q; \ + vshl.u64 RT67q, rw1415q, #64 - 61; \ + veor.u64 RT23q, RT23q, RT45q; \ + vadd.u64 rw01q, RT01q; /* w[0:1] += S(w[1:2]) */ \ + veor.u64 RT01q, RT23q, RT67q; +#define vadd_RT01q(rw01q) \ + /* w[0:1] += S(w[14:15]) */ \ + vadd.u64 rw01q, RT01q; + +#define dummy(_) /*_*/ + +#define rounds2_64_79(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, \ + interleave_op1, arg1, interleave_op2, arg2) \ + /* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \ + vshr.u64 RT2, re, #14; \ + vshl.u64 RT3, re, #64 - 14; \ + interleave_op1(arg1); \ + vshr.u64 RT4, re, #18; \ + vshl.u64 RT5, re, #64 - 18; \ + interleave_op2(arg2); \ + vld1.64 {RT0}, [RK]!; \ + veor.64 RT23q, RT23q, RT45q; \ + vshr.u64 RT4, re, #41; \ + vshl.u64 RT5, re, #64 - 41; \ + vadd.u64 RT0, RT0, rw0; \ + veor.64 RT23q, RT23q, RT45q; \ + vmov.64 RT7, re; \ + veor.64 RT1, RT2, RT3; \ + vbsl.64 RT7, rf, rg; \ + \ + vadd.u64 RT1, RT1, rh; \ + vshr.u64 RT2, ra, #28; \ + vshl.u64 RT3, ra, #64 - 28; \ + vadd.u64 RT1, RT1, RT0; \ + vshr.u64 RT4, ra, #34; \ + vshl.u64 RT5, ra, #64 - 34; \ + vadd.u64 RT1, RT1, RT7; \ + \ + /* h = Sum0 (a) + Maj (a, b, c); */ \ + veor.64 RT23q, RT23q, RT45q; \ + vshr.u64 RT4, ra, #39; \ + vshl.u64 RT5, ra, #64 - 39; \ + veor.64 RT0, ra, rb; \ + veor.64 RT23q, RT23q, RT45q; \ + vbsl.64 RT0, rc, rb; \ + vadd.u64 rd, rd, RT1; /* d+=t1; */ \ + veor.64 rh, RT2, RT3; \ + \ + /* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \ + vshr.u64 RT2, rd, #14; \ + vshl.u64 RT3, rd, #64 - 14; \ + vadd.u64 rh, rh, RT0; \ + vshr.u64 RT4, rd, #18; \ + vshl.u64 RT5, rd, #64 - 18; \ + vadd.u64 rh, rh, RT1; /* h+=t1; */ \ + vld1.64 {RT0}, [RK]!; \ + veor.64 RT23q, RT23q, RT45q; \ + vshr.u64 RT4, rd, #41; \ + vshl.u64 RT5, rd, #64 - 41; \ + vadd.u64 RT0, RT0, rw1; \ + veor.64 RT23q, RT23q, RT45q; \ + vmov.64 RT7, rd; \ + veor.64 RT1, RT2, RT3; \ + vbsl.64 RT7, re, rf; \ + \ + vadd.u64 RT1, RT1, rg; \ + vshr.u64 RT2, rh, #28; \ + vshl.u64 RT3, rh, #64 - 28; \ + vadd.u64 RT1, RT1, RT0; \ + vshr.u64 RT4, rh, #34; \ + vshl.u64 RT5, rh, #64 - 34; \ + vadd.u64 RT1, RT1, RT7; \ + \ + /* g = Sum0 (h) + Maj (h, a, b); */ \ + veor.64 RT23q, RT23q, RT45q; \ + vshr.u64 RT4, rh, #39; \ + vshl.u64 RT5, rh, #64 - 39; \ + veor.64 RT0, rh, ra; \ + veor.64 RT23q, RT23q, RT45q; \ + vbsl.64 RT0, rb, ra; \ + vadd.u64 rc, rc, RT1; /* c+=t1; */ \ + veor.64 rg, RT2, RT3; +#define vadd_rg_RT0(rg) \ + vadd.u64 rg, rg, RT0; +#define vadd_rg_RT1(rg) \ + vadd.u64 rg, rg, RT1; /* g+=t1; */ + +.align 3 +ENTRY(sha512_transform_neon) + /* Input: + * %r0: SHA512_CONTEXT + * %r1: data + * %r2: u64 k[] constants + * %r3: nblks + */ + push {%lr}; + + mov %lr, #0; + + /* Load context to d0-d7 */ + vld1.64 {RA-RD}, [%r0]!; + vld1.64 {RE-RH}, [%r0]; + sub %r0, #(4*8); + + /* Load input to w[16], d16-d31 */ + /* NOTE: Assumes that on ARMv7 unaligned accesses are always allowed. */ + vld1.64 {RW0-RW3}, [%r1]!; + vld1.64 {RW4-RW7}, [%r1]!; + vld1.64 {RW8-RW11}, [%r1]!; + vld1.64 {RW12-RW15}, [%r1]!; +#ifdef __ARMEL__ + /* byteswap */ + vrev64.8 RW01q, RW01q; + vrev64.8 RW23q, RW23q; + vrev64.8 RW45q, RW45q; + vrev64.8 RW67q, RW67q; + vrev64.8 RW89q, RW89q; + vrev64.8 RW1011q, RW1011q; + vrev64.8 RW1213q, RW1213q; + vrev64.8 RW1415q, RW1415q; +#endif + + /* EABI says that d8-d15 must be preserved by callee. */ + /*vpush {RT0-RT7};*/ + +.Loop: + rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2, + RW23q, RW1415q, RW9, RW10, dummy, _); + b .Lenter_rounds; + +.Loop_rounds: + rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2, + RW23q, RW1415q, RW9, RW10, vadd_RT01q, RW1415q); +.Lenter_rounds: + rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, RW23q, RW4, + RW45q, RW01q, RW11, RW12, vadd_RT01q, RW01q); + rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, RW45q, RW6, + RW67q, RW23q, RW13, RW14, vadd_RT01q, RW23q); + rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, RW67q, RW8, + RW89q, RW45q, RW15, RW0, vadd_RT01q, RW45q); + rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, RW89q, RW10, + RW1011q, RW67q, RW1, RW2, vadd_RT01q, RW67q); + rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, RW1011q, RW12, + RW1213q, RW89q, RW3, RW4, vadd_RT01q, RW89q); + add %lr, #16; + rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, RW1213q, RW14, + RW1415q, RW1011q, RW5, RW6, vadd_RT01q, RW1011q); + cmp %lr, #64; + rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, RW1415q, RW0, + RW01q, RW1213q, RW7, RW8, vadd_RT01q, RW1213q); + bne .Loop_rounds; + + subs %r3, #1; + + rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, + vadd_RT01q, RW1415q, dummy, _); + rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, + vadd_rg_RT0, RG, vadd_rg_RT1, RG); + beq .Lhandle_tail; + vld1.64 {RW0-RW3}, [%r1]!; + rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, + vadd_rg_RT0, RE, vadd_rg_RT1, RE); + rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, + vadd_rg_RT0, RC, vadd_rg_RT1, RC); +#ifdef __ARMEL__ + vrev64.8 RW01q, RW01q; + vrev64.8 RW23q, RW23q; +#endif + vld1.64 {RW4-RW7}, [%r1]!; + rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, + vadd_rg_RT0, RA, vadd_rg_RT1, RA); + rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, + vadd_rg_RT0, RG, vadd_rg_RT1, RG); +#ifdef __ARMEL__ + vrev64.8 RW45q, RW45q; + vrev64.8 RW67q, RW67q; +#endif + vld1.64 {RW8-RW11}, [%r1]!; + rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, + vadd_rg_RT0, RE, vadd_rg_RT1, RE); + rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, + vadd_rg_RT0, RC, vadd_rg_RT1, RC); +#ifdef __ARMEL__ + vrev64.8 RW89q, RW89q; + vrev64.8 RW1011q, RW1011q; +#endif + vld1.64 {RW12-RW15}, [%r1]!; + vadd_rg_RT0(RA); + vadd_rg_RT1(RA); + + /* Load context */ + vld1.64 {RT0-RT3}, [%r0]!; + vld1.64 {RT4-RT7}, [%r0]; + sub %r0, #(4*8); + +#ifdef __ARMEL__ + vrev64.8 RW1213q, RW1213q; + vrev64.8 RW1415q, RW1415q; +#endif + + vadd.u64 RA, RT0; + vadd.u64 RB, RT1; + vadd.u64 RC, RT2; + vadd.u64 RD, RT3; + vadd.u64 RE, RT4; + vadd.u64 RF, RT5; + vadd.u64 RG, RT6; + vadd.u64 RH, RT7; + + /* Store the first half of context */ + vst1.64 {RA-RD}, [%r0]!; + sub RK, $(8*80); + vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */ + mov %lr, #0; + sub %r0, #(4*8); + + b .Loop; + +.Lhandle_tail: + rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, + vadd_rg_RT0, RE, vadd_rg_RT1, RE); + rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, + vadd_rg_RT0, RC, vadd_rg_RT1, RC); + rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, + vadd_rg_RT0, RA, vadd_rg_RT1, RA); + rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, + vadd_rg_RT0, RG, vadd_rg_RT1, RG); + rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, + vadd_rg_RT0, RE, vadd_rg_RT1, RE); + rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, + vadd_rg_RT0, RC, vadd_rg_RT1, RC); + + /* Load context to d16-d23 */ + vld1.64 {RW0-RW3}, [%r0]!; + vadd_rg_RT0(RA); + vld1.64 {RW4-RW7}, [%r0]; + vadd_rg_RT1(RA); + sub %r0, #(4*8); + + vadd.u64 RA, RW0; + vadd.u64 RB, RW1; + vadd.u64 RC, RW2; + vadd.u64 RD, RW3; + vadd.u64 RE, RW4; + vadd.u64 RF, RW5; + vadd.u64 RG, RW6; + vadd.u64 RH, RW7; + + /* Store the first half of context */ + vst1.64 {RA-RD}, [%r0]!; + + /* Clear used registers */ + /* d16-d31 */ + veor.u64 RW01q, RW01q; + veor.u64 RW23q, RW23q; + veor.u64 RW45q, RW45q; + veor.u64 RW67q, RW67q; + vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */ + veor.u64 RW89q, RW89q; + veor.u64 RW1011q, RW1011q; + veor.u64 RW1213q, RW1213q; + veor.u64 RW1415q, RW1415q; + /* d8-d15 */ + /*vpop {RT0-RT7};*/ + /* d0-d7 (q0-q3) */ + veor.u64 %q0, %q0; + veor.u64 %q1, %q1; + veor.u64 %q2, %q2; + veor.u64 %q3, %q3; + + pop {%pc}; +ENDPROC(sha512_transform_neon) diff --git a/arch/arm/crypto/sha512_neon_glue.c b/arch/arm/crypto/sha512_neon_glue.c new file mode 100644 index 000000000000..0d2758ff5e12 --- /dev/null +++ b/arch/arm/crypto/sha512_neon_glue.c @@ -0,0 +1,305 @@ +/* + * Glue code for the SHA512 Secure Hash Algorithm assembly implementation + * using NEON instructions. + * + * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is based on sha512_ssse3_glue.c: + * Copyright (C) 2013 Intel Corporation + * Author: Tim Chen <tim.c.chen@linux.intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/cryptohash.h> +#include <linux/types.h> +#include <linux/string.h> +#include <crypto/sha.h> +#include <asm/byteorder.h> +#include <asm/simd.h> +#include <asm/neon.h> + + +static const u64 sha512_k[] = { + 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, + 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL, + 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL, + 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL, + 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL, + 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL, + 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, + 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL, + 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL, + 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL, + 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL, + 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL, + 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL, + 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL, + 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL, + 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL, + 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL, + 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL, + 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, + 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL, + 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL, + 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL, + 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL, + 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL, + 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, + 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL, + 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL, + 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL, + 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL, + 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL, + 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, + 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL, + 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, + 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL, + 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL, + 0x113f9804bef90daeULL, 0x1b710b35131c471bULL, + 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, + 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL, + 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL, + 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL +}; + + +asmlinkage void sha512_transform_neon(u64 *digest, const void *data, + const u64 k[], unsigned int num_blks); + + +static int sha512_neon_init(struct shash_desc *desc) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + + sctx->state[0] = SHA512_H0; + sctx->state[1] = SHA512_H1; + sctx->state[2] = SHA512_H2; + sctx->state[3] = SHA512_H3; + sctx->state[4] = SHA512_H4; + sctx->state[5] = SHA512_H5; + sctx->state[6] = SHA512_H6; + sctx->state[7] = SHA512_H7; + sctx->count[0] = sctx->count[1] = 0; + + return 0; +} + +static int __sha512_neon_update(struct shash_desc *desc, const u8 *data, + unsigned int len, unsigned int partial) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + unsigned int done = 0; + + sctx->count[0] += len; + if (sctx->count[0] < len) + sctx->count[1]++; + + if (partial) { + done = SHA512_BLOCK_SIZE - partial; + memcpy(sctx->buf + partial, data, done); + sha512_transform_neon(sctx->state, sctx->buf, sha512_k, 1); + } + + if (len - done >= SHA512_BLOCK_SIZE) { + const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE; + + sha512_transform_neon(sctx->state, data + done, sha512_k, + rounds); + + done += rounds * SHA512_BLOCK_SIZE; + } + + memcpy(sctx->buf, data + done, len - done); + + return 0; +} + +static int sha512_neon_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; + int res; + + /* Handle the fast case right here */ + if (partial + len < SHA512_BLOCK_SIZE) { + sctx->count[0] += len; + if (sctx->count[0] < len) + sctx->count[1]++; + memcpy(sctx->buf + partial, data, len); + + return 0; + } + + if (!may_use_simd()) { + res = crypto_sha512_update(desc, data, len); + } else { + kernel_neon_begin(); + res = __sha512_neon_update(desc, data, len, partial); + kernel_neon_end(); + } + + return res; +} + + +/* Add padding and return the message digest. */ +static int sha512_neon_final(struct shash_desc *desc, u8 *out) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + unsigned int i, index, padlen; + __be64 *dst = (__be64 *)out; + __be64 bits[2]; + static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, }; + + /* save number of bits */ + bits[1] = cpu_to_be64(sctx->count[0] << 3); + bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); + + /* Pad out to 112 mod 128 and append length */ + index = sctx->count[0] & 0x7f; + padlen = (index < 112) ? (112 - index) : ((128+112) - index); + + if (!may_use_simd()) { + crypto_sha512_update(desc, padding, padlen); + crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits)); + } else { + kernel_neon_begin(); + /* We need to fill a whole block for __sha512_neon_update() */ + if (padlen <= 112) { + sctx->count[0] += padlen; + if (sctx->count[0] < padlen) + sctx->count[1]++; + memcpy(sctx->buf + index, padding, padlen); + } else { + __sha512_neon_update(desc, padding, padlen, index); + } + __sha512_neon_update(desc, (const u8 *)&bits, + sizeof(bits), 112); + kernel_neon_end(); + } + + /* Store state in digest */ + for (i = 0; i < 8; i++) + dst[i] = cpu_to_be64(sctx->state[i]); + + /* Wipe context */ + memset(sctx, 0, sizeof(*sctx)); + + return 0; +} + +static int sha512_neon_export(struct shash_desc *desc, void *out) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + + memcpy(out, sctx, sizeof(*sctx)); + + return 0; +} + +static int sha512_neon_import(struct shash_desc *desc, const void *in) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + + memcpy(sctx, in, sizeof(*sctx)); + + return 0; +} + +static int sha384_neon_init(struct shash_desc *desc) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + + sctx->state[0] = SHA384_H0; + sctx->state[1] = SHA384_H1; + sctx->state[2] = SHA384_H2; + sctx->state[3] = SHA384_H3; + sctx->state[4] = SHA384_H4; + sctx->state[5] = SHA384_H5; + sctx->state[6] = SHA384_H6; + sctx->state[7] = SHA384_H7; + + sctx->count[0] = sctx->count[1] = 0; + + return 0; +} + +static int sha384_neon_final(struct shash_desc *desc, u8 *hash) +{ + u8 D[SHA512_DIGEST_SIZE]; + + sha512_neon_final(desc, D); + + memcpy(hash, D, SHA384_DIGEST_SIZE); + memset(D, 0, SHA512_DIGEST_SIZE); + + return 0; +} + +static struct shash_alg algs[] = { { + .digestsize = SHA512_DIGEST_SIZE, + .init = sha512_neon_init, + .update = sha512_neon_update, + .final = sha512_neon_final, + .export = sha512_neon_export, + .import = sha512_neon_import, + .descsize = sizeof(struct sha512_state), + .statesize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha512", + .cra_driver_name = "sha512-neon", + .cra_priority = 250, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .digestsize = SHA384_DIGEST_SIZE, + .init = sha384_neon_init, + .update = sha512_neon_update, + .final = sha384_neon_final, + .export = sha512_neon_export, + .import = sha512_neon_import, + .descsize = sizeof(struct sha512_state), + .statesize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha384", + .cra_driver_name = "sha384-neon", + .cra_priority = 250, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; + +static int __init sha512_neon_mod_init(void) +{ + if (!cpu_has_neon()) + return -ENODEV; + + return crypto_register_shashes(algs, ARRAY_SIZE(algs)); +} + +static void __exit sha512_neon_mod_fini(void) +{ + crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); +} + +module_init(sha512_neon_mod_init); +module_exit(sha512_neon_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, NEON accelerated"); + +MODULE_ALIAS("sha512"); +MODULE_ALIAS("sha384"); diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 57f0584e8d97..f67fd3afebdf 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -24,6 +24,8 @@ #include <asm/domain.h> #include <asm/opcodes-virt.h> #include <asm/asm-offsets.h> +#include <asm/page.h> +#include <asm/thread_info.h> #define IOMEM(x) (x) @@ -179,10 +181,10 @@ * Get current thread_info. */ .macro get_thread_info, rd - ARM( mov \rd, sp, lsr #13 ) + ARM( mov \rd, sp, lsr #THREAD_SIZE_ORDER + PAGE_SHIFT ) THUMB( mov \rd, sp ) - THUMB( lsr \rd, \rd, #13 ) - mov \rd, \rd, lsl #13 + THUMB( lsr \rd, \rd, #THREAD_SIZE_ORDER + PAGE_SHIFT ) + mov \rd, \rd, lsl #THREAD_SIZE_ORDER + PAGE_SHIFT .endm /* @@ -425,4 +427,25 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) #endif .endm + .irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo + .macro ret\c, reg +#if __LINUX_ARM_ARCH__ < 6 + mov\c pc, \reg +#else + .ifeqs "\reg", "lr" + bx\c \reg + .else + mov\c pc, \reg + .endif +#endif + .endm + .endr + + .macro ret.w, reg + ret \reg +#ifdef CONFIG_THUMB2_KERNEL + nop +#endif + .endm + #endif /* __ASM_ASSEMBLER_H__ */ diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index 8c2b7321a478..963a2515906d 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -62,17 +62,18 @@ #define ARM_CPU_IMP_ARM 0x41 #define ARM_CPU_IMP_INTEL 0x69 -#define ARM_CPU_PART_ARM1136 0xB360 -#define ARM_CPU_PART_ARM1156 0xB560 -#define ARM_CPU_PART_ARM1176 0xB760 -#define ARM_CPU_PART_ARM11MPCORE 0xB020 -#define ARM_CPU_PART_CORTEX_A8 0xC080 -#define ARM_CPU_PART_CORTEX_A9 0xC090 -#define ARM_CPU_PART_CORTEX_A5 0xC050 -#define ARM_CPU_PART_CORTEX_A15 0xC0F0 -#define ARM_CPU_PART_CORTEX_A7 0xC070 -#define ARM_CPU_PART_CORTEX_A12 0xC0D0 -#define ARM_CPU_PART_CORTEX_A17 0xC0E0 +/* ARM implemented processors */ +#define ARM_CPU_PART_ARM1136 0x4100b360 +#define ARM_CPU_PART_ARM1156 0x4100b560 +#define ARM_CPU_PART_ARM1176 0x4100b760 +#define ARM_CPU_PART_ARM11MPCORE 0x4100b020 +#define ARM_CPU_PART_CORTEX_A8 0x4100c080 +#define ARM_CPU_PART_CORTEX_A9 0x4100c090 +#define ARM_CPU_PART_CORTEX_A5 0x4100c050 +#define ARM_CPU_PART_CORTEX_A7 0x4100c070 +#define ARM_CPU_PART_CORTEX_A12 0x4100c0d0 +#define ARM_CPU_PART_CORTEX_A17 0x4100c0e0 +#define ARM_CPU_PART_CORTEX_A15 0x4100c0f0 #define ARM_CPU_XSCALE_ARCH_MASK 0xe000 #define ARM_CPU_XSCALE_ARCH_V1 0x2000 @@ -171,14 +172,24 @@ static inline unsigned int __attribute_const__ read_cpuid_implementor(void) return (read_cpuid_id() & 0xFF000000) >> 24; } -static inline unsigned int __attribute_const__ read_cpuid_part_number(void) +/* + * The CPU part number is meaningless without referring to the CPU + * implementer: implementers are free to define their own part numbers + * which are permitted to clash with other implementer part numbers. + */ +static inline unsigned int __attribute_const__ read_cpuid_part(void) +{ + return read_cpuid_id() & 0xff00fff0; +} + +static inline unsigned int __attribute_const__ __deprecated read_cpuid_part_number(void) { return read_cpuid_id() & 0xFFF0; } static inline unsigned int __attribute_const__ xscale_cpu_arch_version(void) { - return read_cpuid_part_number() & ARM_CPU_XSCALE_ARCH_MASK; + return read_cpuid_id() & ARM_CPU_XSCALE_ARCH_MASK; } static inline unsigned int __attribute_const__ read_cpuid_cachetype(void) diff --git a/arch/arm/include/asm/crypto/sha1.h b/arch/arm/include/asm/crypto/sha1.h new file mode 100644 index 000000000000..75e6a417416b --- /dev/null +++ b/arch/arm/include/asm/crypto/sha1.h @@ -0,0 +1,10 @@ +#ifndef ASM_ARM_CRYPTO_SHA1_H +#define ASM_ARM_CRYPTO_SHA1_H + +#include <linux/crypto.h> +#include <crypto/sha.h> + +extern int sha1_update_arm(struct shash_desc *desc, const u8 *data, + unsigned int len); + +#endif diff --git a/arch/arm/include/asm/entry-macro-multi.S b/arch/arm/include/asm/entry-macro-multi.S index 88d61815f0c0..469a2b30fa27 100644 --- a/arch/arm/include/asm/entry-macro-multi.S +++ b/arch/arm/include/asm/entry-macro-multi.S @@ -35,5 +35,5 @@ \symbol_name: mov r8, lr arch_irq_handler_default - mov pc, r8 + ret r8 .endm diff --git a/arch/arm/include/asm/glue-proc.h b/arch/arm/include/asm/glue-proc.h index 74a8b84f3cb1..74be7c22035a 100644 --- a/arch/arm/include/asm/glue-proc.h +++ b/arch/arm/include/asm/glue-proc.h @@ -221,15 +221,6 @@ # endif #endif -#ifdef CONFIG_CPU_V7 -# ifdef CPU_NAME -# undef MULTI_CPU -# define MULTI_CPU -# else -# define CPU_NAME cpu_v7 -# endif -#endif - #ifdef CONFIG_CPU_V7M # ifdef CPU_NAME # undef MULTI_CPU @@ -248,6 +239,15 @@ # endif #endif +#ifdef CONFIG_CPU_V7 +/* + * Cortex-A9 needs a different suspend/resume function, so we need + * multiple CPU support for ARMv7 anyway. + */ +# undef MULTI_CPU +# define MULTI_CPU +#endif + #ifndef MULTI_CPU #define cpu_proc_init __glue(CPU_NAME,_proc_init) #define cpu_proc_fin __glue(CPU_NAME,_proc_fin) diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h index 94060adba174..57ff7f2a3084 100644 --- a/arch/arm/include/asm/mcpm.h +++ b/arch/arm/include/asm/mcpm.h @@ -217,6 +217,22 @@ int __mcpm_cluster_state(unsigned int cluster); int __init mcpm_sync_init( void (*power_up_setup)(unsigned int affinity_level)); +/** + * mcpm_loopback - make a run through the MCPM low-level code + * + * @cache_disable: pointer to function performing cache disabling + * + * This exercises the MCPM machinery by soft resetting the CPU and branching + * to the MCPM low-level entry code before returning to the caller. + * The @cache_disable function must do the necessary cache disabling to + * let the regular kernel init code turn it back on as if the CPU was + * hotplugged in. The MCPM state machine is set as if the cluster was + * initialized meaning the power_up_setup callback passed to mcpm_sync_init() + * will be invoked for all affinity levels. This may be useful to initialize + * some resources such as enabling the CCI that requires the cache to be off, or simply for testing purposes. + */ +int __init mcpm_loopback(void (*cache_disable)(void)); + void __init mcpm_smp_set_ops(void); #else diff --git a/arch/arm/include/asm/mcs_spinlock.h b/arch/arm/include/asm/mcs_spinlock.h new file mode 100644 index 000000000000..f652ad65840a --- /dev/null +++ b/arch/arm/include/asm/mcs_spinlock.h @@ -0,0 +1,23 @@ +#ifndef __ASM_MCS_LOCK_H +#define __ASM_MCS_LOCK_H + +#ifdef CONFIG_SMP +#include <asm/spinlock.h> + +/* MCS spin-locking. */ +#define arch_mcs_spin_lock_contended(lock) \ +do { \ + /* Ensure prior stores are observed before we enter wfe. */ \ + smp_mb(); \ + while (!(smp_load_acquire(lock))) \ + wfe(); \ +} while (0) \ + +#define arch_mcs_spin_unlock_contended(lock) \ +do { \ + smp_store_release(lock, 1); \ + dsb_sev(); \ +} while (0) + +#endif /* CONFIG_SMP */ +#endif /* __ASM_MCS_LOCK_H */ diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 2b751464d6ff..e731018869a7 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -91,9 +91,7 @@ * of this define that was meant to. * Fortunately, there is no reference for this in noMMU mode, for now. */ -#ifndef TASK_SIZE -#define TASK_SIZE (CONFIG_DRAM_SIZE) -#endif +#define TASK_SIZE UL(0xffffffff) #ifndef TASK_UNMAPPED_BASE #define TASK_UNMAPPED_BASE UL(0x00000000) @@ -150,13 +148,11 @@ /* * PLAT_PHYS_OFFSET is the offset (from zero) of the start of physical - * memory. This is used for XIP and NoMMU kernels, or by kernels which - * have their own mach/memory.h. Assembly code must always use + * memory. This is used for XIP and NoMMU kernels, and on platforms that don't + * have CONFIG_ARM_PATCH_PHYS_VIRT. Assembly code must always use * PLAT_PHYS_OFFSET and not PHYS_OFFSET. */ -#ifndef PLAT_PHYS_OFFSET #define PLAT_PHYS_OFFSET UL(CONFIG_PHYS_OFFSET) -#endif #ifndef __ASSEMBLY__ diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index 755877527cf9..c3a83691af8e 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h @@ -12,15 +12,6 @@ #ifndef __ARM_PERF_EVENT_H__ #define __ARM_PERF_EVENT_H__ -/* - * The ARMv7 CPU PMU supports up to 32 event counters. - */ -#define ARMPMU_MAX_HWEVENTS 32 - -#define HW_OP_UNSUPPORTED 0xFFFF -#define C(_x) PERF_COUNT_HW_CACHE_##_x -#define CACHE_OP_UNSUPPORTED 0xFFFF - #ifdef CONFIG_HW_PERF_EVENTS struct pt_regs; extern unsigned long perf_instruction_pointer(struct pt_regs *regs); diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h index 626989fec4d3..9fd61c72a33a 100644 --- a/arch/arm/include/asm/pgtable-3level-hwdef.h +++ b/arch/arm/include/asm/pgtable-3level-hwdef.h @@ -43,7 +43,7 @@ #define PMD_SECT_BUFFERABLE (_AT(pmdval_t, 1) << 2) #define PMD_SECT_CACHEABLE (_AT(pmdval_t, 1) << 3) #define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */ -#define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */ +#define PMD_SECT_AP2 (_AT(pmdval_t, 1) << 7) /* read only */ #define PMD_SECT_S (_AT(pmdval_t, 3) << 8) #define PMD_SECT_AF (_AT(pmdval_t, 1) << 10) #define PMD_SECT_nG (_AT(pmdval_t, 1) << 11) @@ -72,6 +72,7 @@ #define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1) #define PTE_BUFFERABLE (_AT(pteval_t, 1) << 2) /* AttrIndx[0] */ #define PTE_CACHEABLE (_AT(pteval_t, 1) << 3) /* AttrIndx[1] */ +#define PTE_AP2 (_AT(pteval_t, 1) << 7) /* AP[2] */ #define PTE_EXT_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ #define PTE_EXT_AF (_AT(pteval_t, 1) << 10) /* Access Flag */ #define PTE_EXT_NG (_AT(pteval_t, 1) << 11) /* nG */ diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 85c60adc8b60..06e0bc0f8b00 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -79,18 +79,19 @@ #define L_PTE_PRESENT (_AT(pteval_t, 3) << 0) /* Present */ #define L_PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !PRESENT */ #define L_PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ -#define L_PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */ #define L_PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ #define L_PTE_YOUNG (_AT(pteval_t, 1) << 10) /* AF */ #define L_PTE_XN (_AT(pteval_t, 1) << 54) /* XN */ -#define L_PTE_DIRTY (_AT(pteval_t, 1) << 55) /* unused */ -#define L_PTE_SPECIAL (_AT(pteval_t, 1) << 56) /* unused */ +#define L_PTE_DIRTY (_AT(pteval_t, 1) << 55) +#define L_PTE_SPECIAL (_AT(pteval_t, 1) << 56) #define L_PTE_NONE (_AT(pteval_t, 1) << 57) /* PROT_NONE */ +#define L_PTE_RDONLY (_AT(pteval_t, 1) << 58) /* READ ONLY */ -#define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0) -#define PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55) -#define PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 56) -#define PMD_SECT_NONE (_AT(pmdval_t, 1) << 57) +#define L_PMD_SECT_VALID (_AT(pmdval_t, 1) << 0) +#define L_PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55) +#define L_PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 56) +#define L_PMD_SECT_NONE (_AT(pmdval_t, 1) << 57) +#define L_PMD_SECT_RDONLY (_AT(pteval_t, 1) << 58) /* * To be used in assembly code with the upper page attributes. @@ -207,27 +208,32 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) #define pte_huge(pte) (pte_val(pte) && !(pte_val(pte) & PTE_TABLE_BIT)) #define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT)) -#define pmd_young(pmd) (pmd_val(pmd) & PMD_SECT_AF) +#define pmd_isset(pmd, val) ((u32)(val) == (val) ? pmd_val(pmd) & (val) \ + : !!(pmd_val(pmd) & (val))) +#define pmd_isclear(pmd, val) (!(pmd_val(pmd) & (val))) + +#define pmd_young(pmd) (pmd_isset((pmd), PMD_SECT_AF)) #define __HAVE_ARCH_PMD_WRITE -#define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY)) +#define pmd_write(pmd) (pmd_isclear((pmd), L_PMD_SECT_RDONLY)) +#define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY)) #define pmd_hugewillfault(pmd) (!pmd_young(pmd) || !pmd_write(pmd)) #define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) #ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) -#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING) +#define pmd_trans_huge(pmd) (pmd_val(pmd) && !pmd_table(pmd)) +#define pmd_trans_splitting(pmd) (pmd_isset((pmd), L_PMD_SECT_SPLITTING)) #endif #define PMD_BIT_FUNC(fn,op) \ static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; } -PMD_BIT_FUNC(wrprotect, |= PMD_SECT_RDONLY); +PMD_BIT_FUNC(wrprotect, |= L_PMD_SECT_RDONLY); PMD_BIT_FUNC(mkold, &= ~PMD_SECT_AF); -PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING); -PMD_BIT_FUNC(mkwrite, &= ~PMD_SECT_RDONLY); -PMD_BIT_FUNC(mkdirty, |= PMD_SECT_DIRTY); +PMD_BIT_FUNC(mksplitting, |= L_PMD_SECT_SPLITTING); +PMD_BIT_FUNC(mkwrite, &= ~L_PMD_SECT_RDONLY); +PMD_BIT_FUNC(mkdirty, |= L_PMD_SECT_DIRTY); PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF); #define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) @@ -241,8 +247,8 @@ PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF); static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { - const pmdval_t mask = PMD_SECT_USER | PMD_SECT_XN | PMD_SECT_RDONLY | - PMD_SECT_VALID | PMD_SECT_NONE; + const pmdval_t mask = PMD_SECT_USER | PMD_SECT_XN | L_PMD_SECT_RDONLY | + L_PMD_SECT_VALID | L_PMD_SECT_NONE; pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask); return pmd; } @@ -253,8 +259,13 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, BUG_ON(addr >= TASK_SIZE); /* create a faulting entry if PROT_NONE protected */ - if (pmd_val(pmd) & PMD_SECT_NONE) - pmd_val(pmd) &= ~PMD_SECT_VALID; + if (pmd_val(pmd) & L_PMD_SECT_NONE) + pmd_val(pmd) &= ~L_PMD_SECT_VALID; + + if (pmd_write(pmd) && pmd_dirty(pmd)) + pmd_val(pmd) &= ~PMD_SECT_AP2; + else + pmd_val(pmd) |= PMD_SECT_AP2; *pmdp = __pmd(pmd_val(pmd) | PMD_SECT_nG); flush_pmd_entry(pmdp); diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 5478e5d6ad89..01baef07cd0c 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -214,18 +214,22 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) #define pte_clear(mm,addr,ptep) set_pte_ext(ptep, __pte(0), 0) +#define pte_isset(pte, val) ((u32)(val) == (val) ? pte_val(pte) & (val) \ + : !!(pte_val(pte) & (val))) +#define pte_isclear(pte, val) (!(pte_val(pte) & (val))) + #define pte_none(pte) (!pte_val(pte)) -#define pte_present(pte) (pte_val(pte) & L_PTE_PRESENT) -#define pte_valid(pte) (pte_val(pte) & L_PTE_VALID) +#define pte_present(pte) (pte_isset((pte), L_PTE_PRESENT)) +#define pte_valid(pte) (pte_isset((pte), L_PTE_VALID)) #define pte_accessible(mm, pte) (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte)) -#define pte_write(pte) (!(pte_val(pte) & L_PTE_RDONLY)) -#define pte_dirty(pte) (pte_val(pte) & L_PTE_DIRTY) -#define pte_young(pte) (pte_val(pte) & L_PTE_YOUNG) -#define pte_exec(pte) (!(pte_val(pte) & L_PTE_XN)) +#define pte_write(pte) (pte_isclear((pte), L_PTE_RDONLY)) +#define pte_dirty(pte) (pte_isset((pte), L_PTE_DIRTY)) +#define pte_young(pte) (pte_isset((pte), L_PTE_YOUNG)) +#define pte_exec(pte) (pte_isclear((pte), L_PTE_XN)) #define pte_special(pte) (0) #define pte_valid_user(pte) \ - (pte_valid(pte) && (pte_val(pte) & L_PTE_USER) && pte_young(pte)) + (pte_valid(pte) && pte_isset((pte), L_PTE_USER) && pte_young(pte)) #if __LINUX_ARM_ARCH__ < 6 static inline void __sync_icache_dcache(pte_t pteval) diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index ae1919be8f98..0b648c541293 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -42,6 +42,25 @@ struct arm_pmu_platdata { #ifdef CONFIG_HW_PERF_EVENTS +/* + * The ARMv7 CPU PMU supports up to 32 event counters. + */ +#define ARMPMU_MAX_HWEVENTS 32 + +#define HW_OP_UNSUPPORTED 0xFFFF +#define C(_x) PERF_COUNT_HW_CACHE_##_x +#define CACHE_OP_UNSUPPORTED 0xFFFF + +#define PERF_MAP_ALL_UNSUPPORTED \ + [0 ... PERF_COUNT_HW_MAX - 1] = HW_OP_UNSUPPORTED + +#define PERF_CACHE_MAP_ALL_UNSUPPORTED \ +[0 ... C(MAX) - 1] = { \ + [0 ... C(OP_MAX) - 1] = { \ + [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \ + }, \ +} + /* The events for a given PMU register set. */ struct pmu_hw_events { /* diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index c877654fe3bf..601264d983fa 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -84,6 +84,12 @@ static inline long regs_return_value(struct pt_regs *regs) #define instruction_pointer(regs) (regs)->ARM_pc +#ifdef CONFIG_THUMB2_KERNEL +#define frame_pointer(regs) (regs)->ARM_r7 +#else +#define frame_pointer(regs) (regs)->ARM_fp +#endif + static inline void instruction_pointer_set(struct pt_regs *regs, unsigned long val) { diff --git a/arch/arm/include/asm/smp_scu.h b/arch/arm/include/asm/smp_scu.h index 0393fbab8dd5..bfe163c40024 100644 --- a/arch/arm/include/asm/smp_scu.h +++ b/arch/arm/include/asm/smp_scu.h @@ -11,7 +11,7 @@ static inline bool scu_a9_has_base(void) { - return read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9; + return read_cpuid_part() == ARM_CPU_PART_CORTEX_A9; } static inline unsigned long scu_a9_get_base(void) diff --git a/arch/arm/include/asm/stacktrace.h b/arch/arm/include/asm/stacktrace.h index 4d0a16441b29..7722201ead19 100644 --- a/arch/arm/include/asm/stacktrace.h +++ b/arch/arm/include/asm/stacktrace.h @@ -1,13 +1,28 @@ #ifndef __ASM_STACKTRACE_H #define __ASM_STACKTRACE_H +#include <asm/ptrace.h> + struct stackframe { + /* + * FP member should hold R7 when CONFIG_THUMB2_KERNEL is enabled + * and R11 otherwise. + */ unsigned long fp; unsigned long sp; unsigned long lr; unsigned long pc; }; +static __always_inline +void arm_get_current_stackframe(struct pt_regs *regs, struct stackframe *frame) +{ + frame->fp = frame_pointer(regs); + frame->sp = regs->ARM_sp; + frame->lr = regs->ARM_lr; + frame->pc = regs->ARM_pc; +} + extern int unwind_frame(struct stackframe *frame); extern void walk_stackframe(struct stackframe *frame, int (*fn)(struct stackframe *, void *), void *data); diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index e4e4208a9130..fc44d3761f9e 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -14,9 +14,10 @@ #include <linux/compiler.h> #include <asm/fpstate.h> +#include <asm/page.h> #define THREAD_SIZE_ORDER 1 -#define THREAD_SIZE 8192 +#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) #define THREAD_START_SP (THREAD_SIZE - 8) #ifndef __ASSEMBLY__ diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 75d95799b6e6..a4cd7af475e9 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -107,6 +107,8 @@ static inline void set_fs(mm_segment_t fs) extern int __get_user_1(void *); extern int __get_user_2(void *); extern int __get_user_4(void *); +extern int __get_user_lo8(void *); +extern int __get_user_8(void *); #define __GUP_CLOBBER_1 "lr", "cc" #ifdef CONFIG_CPU_USE_DOMAINS @@ -115,6 +117,8 @@ extern int __get_user_4(void *); #define __GUP_CLOBBER_2 "lr", "cc" #endif #define __GUP_CLOBBER_4 "lr", "cc" +#define __GUP_CLOBBER_lo8 "lr", "cc" +#define __GUP_CLOBBER_8 "lr", "cc" #define __get_user_x(__r2,__p,__e,__l,__s) \ __asm__ __volatile__ ( \ @@ -125,11 +129,19 @@ extern int __get_user_4(void *); : "0" (__p), "r" (__l) \ : __GUP_CLOBBER_##__s) +/* narrowing a double-word get into a single 32bit word register: */ +#ifdef __ARMEB__ +#define __get_user_xb(__r2, __p, __e, __l, __s) \ + __get_user_x(__r2, __p, __e, __l, lo8) +#else +#define __get_user_xb __get_user_x +#endif + #define __get_user_check(x,p) \ ({ \ unsigned long __limit = current_thread_info()->addr_limit - 1; \ register const typeof(*(p)) __user *__p asm("r0") = (p);\ - register unsigned long __r2 asm("r2"); \ + register typeof(x) __r2 asm("r2"); \ register unsigned long __l asm("r1") = __limit; \ register int __e asm("r0"); \ switch (sizeof(*(__p))) { \ @@ -142,6 +154,12 @@ extern int __get_user_4(void *); case 4: \ __get_user_x(__r2, __p, __e, __l, 4); \ break; \ + case 8: \ + if (sizeof((x)) < 8) \ + __get_user_xb(__r2, __p, __e, __l, 4); \ + else \ + __get_user_x(__r2, __p, __e, __l, 8); \ + break; \ default: __e = __get_user_bad(); break; \ } \ x = (typeof(*(p))) __r2; \ @@ -224,7 +242,7 @@ static inline void set_fs(mm_segment_t fs) #define access_ok(type,addr,size) (__range_ok(addr,size) == 0) #define user_addr_max() \ - (segment_eq(get_fs(), USER_DS) ? TASK_SIZE : ~0UL) + (segment_eq(get_fs(), KERNEL_DS) ? ~0UL : get_fs()) /* * The "__xxx" versions of the user access functions do not verify the diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 43876245fc57..21ca0cebcab0 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -15,7 +15,17 @@ #include <uapi/asm/unistd.h> +/* + * This may need to be greater than __NR_last_syscall+1 in order to + * account for the padding in the syscall table + */ #define __NR_syscalls (384) + +/* + * *NOTE*: This is a ghost syscall private to the kernel. Only the + * __kuser_cmpxchg code in entry-armv.S should be aware of its + * existence. Don't ever use this from user code. + */ #define __ARM_NR_cmpxchg (__ARM_NR_BASE+0x00fff0) #define __ARCH_WANT_STAT64 diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h index ba94446c72d9..acd5b66ea3aa 100644 --- a/arch/arm/include/uapi/asm/unistd.h +++ b/arch/arm/include/uapi/asm/unistd.h @@ -411,11 +411,6 @@ #define __NR_renameat2 (__NR_SYSCALL_BASE+382) /* - * This may need to be greater than __NR_last_syscall+1 in order to - * account for the padding in the syscall table - */ - -/* * The following SWIs are ARM private. */ #define __ARM_NR_BASE (__NR_SYSCALL_BASE+0x0f0000) @@ -426,12 +421,6 @@ #define __ARM_NR_set_tls (__ARM_NR_BASE+5) /* - * *NOTE*: This is a ghost syscall private to the kernel. Only the - * __kuser_cmpxchg code in entry-armv.S should be aware of its - * existence. Don't ever use this from user code. - */ - -/* * The following syscalls are obsolete and no longer available for EABI. */ #if !defined(__KERNEL__) diff --git a/arch/arm/kernel/debug.S b/arch/arm/kernel/debug.S index 14f7c3b14632..78c91b5f97d4 100644 --- a/arch/arm/kernel/debug.S +++ b/arch/arm/kernel/debug.S @@ -90,7 +90,7 @@ ENTRY(printascii) ldrneb r1, [r0], #1 teqne r1, #0 bne 1b - mov pc, lr + ret lr ENDPROC(printascii) ENTRY(printch) @@ -105,7 +105,7 @@ ENTRY(debug_ll_addr) addruart r2, r3, ip str r2, [r0] str r3, [r1] - mov pc, lr + ret lr ENDPROC(debug_ll_addr) #endif @@ -116,7 +116,7 @@ ENTRY(printascii) mov r0, #0x04 @ SYS_WRITE0 ARM( svc #0x123456 ) THUMB( svc #0xab ) - mov pc, lr + ret lr ENDPROC(printascii) ENTRY(printch) @@ -125,14 +125,14 @@ ENTRY(printch) mov r0, #0x03 @ SYS_WRITEC ARM( svc #0x123456 ) THUMB( svc #0xab ) - mov pc, lr + ret lr ENDPROC(printch) ENTRY(debug_ll_addr) mov r2, #0 str r2, [r0] str r2, [r1] - mov pc, lr + ret lr ENDPROC(debug_ll_addr) #endif diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 52a949a8077d..36276cdccfbc 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -224,7 +224,7 @@ svc_preempt: 1: bl preempt_schedule_irq @ irq en/disable is done inside ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS tst r0, #_TIF_NEED_RESCHED - moveq pc, r8 @ go again + reteq r8 @ go again b 1b #endif @@ -490,7 +490,7 @@ ENDPROC(__und_usr) .pushsection .fixup, "ax" .align 2 4: str r4, [sp, #S_PC] @ retry current instruction - mov pc, r9 + ret r9 .popsection .pushsection __ex_table,"a" .long 1b, 4b @@ -552,7 +552,7 @@ call_fpe: #endif tst r0, #0x08000000 @ only CDP/CPRT/LDC/STC have bit 27 tstne r0, #0x04000000 @ bit 26 set on both ARM and Thumb-2 - moveq pc, lr + reteq lr and r8, r0, #0x00000f00 @ mask out CP number THUMB( lsr r8, r8, #8 ) mov r7, #1 @@ -571,33 +571,33 @@ call_fpe: THUMB( add pc, r8 ) nop - movw_pc lr @ CP#0 + ret.w lr @ CP#0 W(b) do_fpe @ CP#1 (FPE) W(b) do_fpe @ CP#2 (FPE) - movw_pc lr @ CP#3 + ret.w lr @ CP#3 #ifdef CONFIG_CRUNCH b crunch_task_enable @ CP#4 (MaverickCrunch) b crunch_task_enable @ CP#5 (MaverickCrunch) b crunch_task_enable @ CP#6 (MaverickCrunch) #else - movw_pc lr @ CP#4 - movw_pc lr @ CP#5 - movw_pc lr @ CP#6 + ret.w lr @ CP#4 + ret.w lr @ CP#5 + ret.w lr @ CP#6 #endif - movw_pc lr @ CP#7 - movw_pc lr @ CP#8 - movw_pc lr @ CP#9 + ret.w lr @ CP#7 + ret.w lr @ CP#8 + ret.w lr @ CP#9 #ifdef CONFIG_VFP W(b) do_vfp @ CP#10 (VFP) W(b) do_vfp @ CP#11 (VFP) #else - movw_pc lr @ CP#10 (VFP) - movw_pc lr @ CP#11 (VFP) + ret.w lr @ CP#10 (VFP) + ret.w lr @ CP#11 (VFP) #endif - movw_pc lr @ CP#12 - movw_pc lr @ CP#13 - movw_pc lr @ CP#14 (Debug) - movw_pc lr @ CP#15 (Control) + ret.w lr @ CP#12 + ret.w lr @ CP#13 + ret.w lr @ CP#14 (Debug) + ret.w lr @ CP#15 (Control) #ifdef NEED_CPU_ARCHITECTURE .align 2 @@ -649,7 +649,7 @@ ENTRY(fp_enter) .popsection ENTRY(no_fp) - mov pc, lr + ret lr ENDPROC(no_fp) __und_usr_fault_32: @@ -745,7 +745,7 @@ ENDPROC(__switch_to) #ifdef CONFIG_ARM_THUMB bx \reg #else - mov pc, \reg + ret \reg #endif .endm @@ -837,7 +837,7 @@ kuser_cmpxchg64_fixup: #if __LINUX_ARM_ARCH__ < 6 bcc kuser_cmpxchg32_fixup #endif - mov pc, lr + ret lr .previous #else @@ -905,7 +905,7 @@ kuser_cmpxchg32_fixup: subs r8, r4, r7 rsbcss r8, r8, #(2b - 1b) strcs r7, [sp, #S_PC] - mov pc, lr + ret lr .previous #else diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 7139d4a7dea7..e52fe5a2d843 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -8,6 +8,7 @@ * published by the Free Software Foundation. */ +#include <asm/assembler.h> #include <asm/unistd.h> #include <asm/ftrace.h> #include <asm/unwind.h> @@ -88,7 +89,7 @@ ENTRY(ret_from_fork) cmp r5, #0 movne r0, r4 adrne lr, BSYM(1f) - movne pc, r5 + retne r5 1: get_thread_info tsk b ret_slow_syscall ENDPROC(ret_from_fork) @@ -290,7 +291,7 @@ ENDPROC(ftrace_graph_caller_old) .macro mcount_exit ldmia sp!, {r0-r3, ip, lr} - mov pc, ip + ret ip .endm ENTRY(__gnu_mcount_nc) @@ -298,7 +299,7 @@ UNWIND(.fnstart) #ifdef CONFIG_DYNAMIC_FTRACE mov ip, lr ldmia sp!, {lr} - mov pc, ip + ret ip #else __mcount #endif @@ -333,12 +334,12 @@ return_to_handler: bl ftrace_return_to_handler mov lr, r0 @ r0 has real ret addr ldmia sp!, {r0-r3} - mov pc, lr + ret lr #endif ENTRY(ftrace_stub) .Lftrace_stub: - mov pc, lr + ret lr ENDPROC(ftrace_stub) #endif /* CONFIG_FUNCTION_TRACER */ @@ -561,7 +562,7 @@ sys_mmap2: streq r5, [sp, #4] beq sys_mmap_pgoff mov r0, #-EINVAL - mov pc, lr + ret lr #else str r5, [sp, #4] b sys_mmap_pgoff diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index 5d702f8900b1..8db307d0954b 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -240,12 +240,6 @@ movs pc, lr @ return & move spsr_svc into cpsr .endm - @ - @ 32-bit wide "mov pc, reg" - @ - .macro movw_pc, reg - mov pc, \reg - .endm #else /* CONFIG_THUMB2_KERNEL */ .macro svc_exit, rpsr, irq = 0 .if \irq != 0 @@ -304,14 +298,6 @@ movs pc, lr @ return & move spsr_svc into cpsr .endm #endif /* ifdef CONFIG_CPU_V7M / else */ - - @ - @ 32-bit wide "mov pc, reg" - @ - .macro movw_pc, reg - mov pc, \reg - nop - .endm #endif /* !CONFIG_THUMB2_KERNEL */ /* diff --git a/arch/arm/kernel/fiqasm.S b/arch/arm/kernel/fiqasm.S index 207f9d652010..8dd26e1a9bd6 100644 --- a/arch/arm/kernel/fiqasm.S +++ b/arch/arm/kernel/fiqasm.S @@ -32,7 +32,7 @@ ENTRY(__set_fiq_regs) ldr lr, [r0] msr cpsr_c, r1 @ return to SVC mode mov r0, r0 @ avoid hazard prior to ARMv4 - mov pc, lr + ret lr ENDPROC(__set_fiq_regs) ENTRY(__get_fiq_regs) @@ -45,5 +45,5 @@ ENTRY(__get_fiq_regs) str lr, [r0] msr cpsr_c, r1 @ return to SVC mode mov r0, r0 @ avoid hazard prior to ARMv4 - mov pc, lr + ret lr ENDPROC(__get_fiq_regs) diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 572a38335c96..8733012d231f 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -10,6 +10,7 @@ * published by the Free Software Foundation. * */ +#include <asm/assembler.h> #define ATAG_CORE 0x54410001 #define ATAG_CORE_SIZE ((2*4 + 3*4) >> 2) @@ -61,10 +62,10 @@ __vet_atags: cmp r5, r6 bne 1f -2: mov pc, lr @ atag/dtb pointer is ok +2: ret lr @ atag/dtb pointer is ok 1: mov r2, #0 - mov pc, lr + ret lr ENDPROC(__vet_atags) /* @@ -162,7 +163,7 @@ __lookup_processor_type: cmp r5, r6 blo 1b mov r5, #0 @ unknown processor -2: mov pc, lr +2: ret lr ENDPROC(__lookup_processor_type) /* diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index 716249cc2ee1..cc176b67c134 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -82,7 +82,7 @@ ENTRY(stext) adr lr, BSYM(1f) @ return (PIC) address ARM( add pc, r10, #PROCINFO_INITFUNC ) THUMB( add r12, r10, #PROCINFO_INITFUNC ) - THUMB( mov pc, r12 ) + THUMB( ret r12 ) 1: b __after_proc_init ENDPROC(stext) @@ -119,7 +119,7 @@ ENTRY(secondary_startup) mov r13, r12 @ __secondary_switched address ARM( add pc, r10, #PROCINFO_INITFUNC ) THUMB( add r12, r10, #PROCINFO_INITFUNC ) - THUMB( mov pc, r12 ) + THUMB( ret r12 ) ENDPROC(secondary_startup) ENTRY(__secondary_switched) @@ -164,7 +164,7 @@ __after_proc_init: #endif mcr p15, 0, r0, c1, c0, 0 @ write control reg #endif /* CONFIG_CPU_CP15 */ - mov pc, r13 + ret r13 ENDPROC(__after_proc_init) .ltorg @@ -254,7 +254,7 @@ ENTRY(__setup_mpu) orr r0, r0, #CR_M @ Set SCTRL.M (MPU on) mcr p15, 0, r0, c1, c0, 0 @ Enable MPU isb - mov pc,lr + ret lr ENDPROC(__setup_mpu) #endif #include "head-common.S" diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 2c35f0ff2fdc..664eee8c4a26 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -140,7 +140,7 @@ ENTRY(stext) mov r8, r4 @ set TTBR1 to swapper_pg_dir ARM( add pc, r10, #PROCINFO_INITFUNC ) THUMB( add r12, r10, #PROCINFO_INITFUNC ) - THUMB( mov pc, r12 ) + THUMB( ret r12 ) 1: b __enable_mmu ENDPROC(stext) .ltorg @@ -335,7 +335,7 @@ __create_page_tables: sub r4, r4, #0x1000 @ point to the PGD table mov r4, r4, lsr #ARCH_PGD_SHIFT #endif - mov pc, lr + ret lr ENDPROC(__create_page_tables) .ltorg .align @@ -383,7 +383,7 @@ ENTRY(secondary_startup) ARM( add pc, r10, #PROCINFO_INITFUNC ) @ initialise processor @ (return control reg) THUMB( add r12, r10, #PROCINFO_INITFUNC ) - THUMB( mov pc, r12 ) + THUMB( ret r12 ) ENDPROC(secondary_startup) /* @@ -468,7 +468,7 @@ ENTRY(__turn_mmu_on) instr_sync mov r3, r3 mov r3, r13 - mov pc, r3 + ret r3 __turn_mmu_on_end: ENDPROC(__turn_mmu_on) .popsection @@ -487,7 +487,7 @@ __fixup_smp: orr r4, r4, #0x0000b000 orr r4, r4, #0x00000020 @ val 0x4100b020 teq r3, r4 @ ARM 11MPCore? - moveq pc, lr @ yes, assume SMP + reteq lr @ yes, assume SMP mrc p15, 0, r0, c0, c0, 5 @ read MPIDR and r0, r0, #0xc0000000 @ multiprocessing extensions and @@ -500,7 +500,7 @@ __fixup_smp: orr r4, r4, #0x0000c000 orr r4, r4, #0x00000090 teq r3, r4 @ Check for ARM Cortex-A9 - movne pc, lr @ Not ARM Cortex-A9, + retne lr @ Not ARM Cortex-A9, @ If a future SoC *does* use 0x0 as the PERIPH_BASE, then the @ below address check will need to be #ifdef'd or equivalent @@ -512,7 +512,7 @@ __fixup_smp: ARM_BE8(rev r0, r0) @ byteswap if big endian and r0, r0, #0x3 @ number of CPUs teq r0, #0x0 @ is 1? - movne pc, lr + retne lr __fixup_smp_on_up: adr r0, 1f @@ -539,7 +539,7 @@ smp_on_up: .text __do_fixup_smp_on_up: cmp r4, r5 - movhs pc, lr + reths lr ldmia r4!, {r0, r6} ARM( str r6, [r0, r3] ) THUMB( add r0, r0, r3 ) @@ -672,7 +672,7 @@ ARM_BE8(rev16 ip, ip) 2: cmp r4, r5 ldrcc r7, [r4], #4 @ use branch for delay slot bcc 1b - mov pc, lr + ret lr #endif ENDPROC(__fixup_a_pv_table) diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S index 797b1a6a4906..56ce6290c831 100644 --- a/arch/arm/kernel/hyp-stub.S +++ b/arch/arm/kernel/hyp-stub.S @@ -99,7 +99,7 @@ ENTRY(__hyp_stub_install_secondary) * immediately. */ compare_cpu_mode_with_primary r4, r5, r6, r7 - movne pc, lr + retne lr /* * Once we have given up on one CPU, we do not try to install the @@ -111,7 +111,7 @@ ENTRY(__hyp_stub_install_secondary) */ cmp r4, #HYP_MODE - movne pc, lr @ give up if the CPU is not in HYP mode + retne lr @ give up if the CPU is not in HYP mode /* * Configure HSCTLR to set correct exception endianness/instruction set @@ -201,7 +201,7 @@ ENDPROC(__hyp_get_vectors) @ fall through ENTRY(__hyp_set_vectors) __HVC(0) - mov pc, lr + ret lr ENDPROC(__hyp_set_vectors) #ifndef ZIMAGE diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S index 2b32978ae905..ad58e565fe98 100644 --- a/arch/arm/kernel/iwmmxt.S +++ b/arch/arm/kernel/iwmmxt.S @@ -100,7 +100,7 @@ ENTRY(iwmmxt_task_enable) get_thread_info r10 #endif 4: dec_preempt_count r10, r3 - mov pc, r9 @ normal exit from exception + ret r9 @ normal exit from exception concan_save: @@ -144,7 +144,7 @@ concan_dump: wstrd wR15, [r1, #MMX_WR15] 2: teq r0, #0 @ anything to load? - moveq pc, lr @ if not, return + reteq lr @ if not, return concan_load: @@ -177,10 +177,10 @@ concan_load: @ clear CUP/MUP (only if r1 != 0) teq r1, #0 mov r2, #0 - moveq pc, lr + reteq lr tmcr wCon, r2 - mov pc, lr + ret lr /* * Back up Concan regs to save area and disable access to them @@ -266,7 +266,7 @@ ENTRY(iwmmxt_task_copy) mov r3, lr @ preserve return address bl concan_dump msr cpsr_c, ip @ restore interrupt mode - mov pc, r3 + ret r3 /* * Restore Concan state from given memory address @@ -302,7 +302,7 @@ ENTRY(iwmmxt_task_restore) mov r3, lr @ preserve return address bl concan_load msr cpsr_c, ip @ restore interrupt mode - mov pc, r3 + ret r3 /* * Concan handling on task switch @@ -324,7 +324,7 @@ ENTRY(iwmmxt_task_switch) add r3, r0, #TI_IWMMXT_STATE @ get next task Concan save area ldr r2, [r2] @ get current Concan owner teq r2, r3 @ next task owns it? - movne pc, lr @ no: leave Concan disabled + retne lr @ no: leave Concan disabled 1: @ flip Concan access XSC(eor r1, r1, #0x3) @@ -351,7 +351,7 @@ ENTRY(iwmmxt_task_release) eors r0, r0, r1 @ if equal... streq r0, [r3] @ then clear ownership msr cpsr_c, r2 @ restore interrupts - mov pc, lr + ret lr .data concan_owner: diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 4238bcba9d60..266cba46db3e 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -560,11 +560,16 @@ user_backtrace(struct frame_tail __user *tail, struct perf_callchain_entry *entry) { struct frame_tail buftail; + unsigned long err; - /* Also check accessibility of one struct frame_tail beyond */ if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) return NULL; - if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail))) + + pagefault_disable(); + err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); + pagefault_enable(); + + if (err) return NULL; perf_callchain_store(entry, buftail.lr); @@ -590,6 +595,10 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) } perf_callchain_store(entry, regs->ARM_pc); + + if (!current->mm) + return; + tail = (struct frame_tail __user *)regs->ARM_fp - 1; while ((entry->nr < PERF_MAX_STACK_DEPTH) && @@ -621,10 +630,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) return; } - fr.fp = regs->ARM_fp; - fr.sp = regs->ARM_sp; - fr.lr = regs->ARM_lr; - fr.pc = regs->ARM_pc; + arm_get_current_stackframe(regs, &fr); walk_stackframe(&fr, callchain_trace, entry); } diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index af9e35e8836f..e6a6edbec613 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -233,14 +233,17 @@ static struct of_device_id cpu_pmu_of_device_ids[] = { {.compatible = "arm,cortex-a7-pmu", .data = armv7_a7_pmu_init}, {.compatible = "arm,cortex-a5-pmu", .data = armv7_a5_pmu_init}, {.compatible = "arm,arm11mpcore-pmu", .data = armv6mpcore_pmu_init}, - {.compatible = "arm,arm1176-pmu", .data = armv6pmu_init}, - {.compatible = "arm,arm1136-pmu", .data = armv6pmu_init}, + {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init}, + {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init}, {.compatible = "qcom,krait-pmu", .data = krait_pmu_init}, {}, }; static struct platform_device_id cpu_pmu_plat_device_ids[] = { {.name = "arm-pmu"}, + {.name = "armv6-pmu"}, + {.name = "armv7-pmu"}, + {.name = "xscale-pmu"}, {}, }; @@ -250,40 +253,43 @@ static struct platform_device_id cpu_pmu_plat_device_ids[] = { static int probe_current_pmu(struct arm_pmu *pmu) { int cpu = get_cpu(); - unsigned long implementor = read_cpuid_implementor(); - unsigned long part_number = read_cpuid_part_number(); int ret = -ENODEV; pr_info("probing PMU on CPU %d\n", cpu); + switch (read_cpuid_part()) { /* ARM Ltd CPUs. */ - if (implementor == ARM_CPU_IMP_ARM) { - switch (part_number) { - case ARM_CPU_PART_ARM1136: - case ARM_CPU_PART_ARM1156: - case ARM_CPU_PART_ARM1176: - ret = armv6pmu_init(pmu); - break; - case ARM_CPU_PART_ARM11MPCORE: - ret = armv6mpcore_pmu_init(pmu); - break; - case ARM_CPU_PART_CORTEX_A8: - ret = armv7_a8_pmu_init(pmu); - break; - case ARM_CPU_PART_CORTEX_A9: - ret = armv7_a9_pmu_init(pmu); - break; - } - /* Intel CPUs [xscale]. */ - } else if (implementor == ARM_CPU_IMP_INTEL) { - switch (xscale_cpu_arch_version()) { - case ARM_CPU_XSCALE_ARCH_V1: - ret = xscale1pmu_init(pmu); - break; - case ARM_CPU_XSCALE_ARCH_V2: - ret = xscale2pmu_init(pmu); - break; + case ARM_CPU_PART_ARM1136: + ret = armv6_1136_pmu_init(pmu); + break; + case ARM_CPU_PART_ARM1156: + ret = armv6_1156_pmu_init(pmu); + break; + case ARM_CPU_PART_ARM1176: + ret = armv6_1176_pmu_init(pmu); + break; + case ARM_CPU_PART_ARM11MPCORE: + ret = armv6mpcore_pmu_init(pmu); + break; + case ARM_CPU_PART_CORTEX_A8: + ret = armv7_a8_pmu_init(pmu); + break; + case ARM_CPU_PART_CORTEX_A9: + ret = armv7_a9_pmu_init(pmu); + break; + + default: + if (read_cpuid_implementor() == ARM_CPU_IMP_INTEL) { + switch (xscale_cpu_arch_version()) { + case ARM_CPU_XSCALE_ARCH_V1: + ret = xscale1pmu_init(pmu); + break; + case ARM_CPU_XSCALE_ARCH_V2: + ret = xscale2pmu_init(pmu); + break; + } } + break; } put_cpu(); diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index 03664b0e8fa4..abfeb04f3213 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -65,13 +65,11 @@ enum armv6_counters { * accesses/misses in hardware. */ static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV6_PERFCTR_IBUF_STALL, [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV6_PERFCTR_LSU_FULL_STALL, }; @@ -79,116 +77,31 @@ static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * The ARM performance counters can count micro DTLB misses, - * micro ITLB misses and main TLB misses. There isn't an event - * for TLB misses, so use the micro misses here and if users - * want the main TLB misses they can use a raw counter. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(NODE)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + /* + * The performance counters don't differentiate between read and write + * accesses/misses so this isn't strictly correct, but it's the best we + * can do. Writes and reads get combined. + */ + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, + + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, + + /* + * The ARM performance counters can count micro DTLB misses, micro ITLB + * misses and main TLB misses. There isn't an event for TLB misses, so + * use the micro misses here and if users want the main TLB misses they + * can use a raw counter. + */ + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, }; enum armv6mpcore_perf_types { @@ -220,13 +133,11 @@ enum armv6mpcore_perf_types { * accesses/misses in hardware. */ static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES, [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC, [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV6MPCORE_PERFCTR_IBUF_STALL, [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV6MPCORE_PERFCTR_LSU_FULL_STALL, }; @@ -234,114 +145,26 @@ static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = { static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = - ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS, - [C(RESULT_MISS)] = - ARMV6MPCORE_PERFCTR_DCACHE_RDMISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = - ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS, - [C(RESULT_MISS)] = - ARMV6MPCORE_PERFCTR_DCACHE_WRMISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * The ARM performance counters can count micro DTLB misses, - * micro ITLB misses and main TLB misses. There isn't an event - * for TLB misses, so use the micro misses here and if users - * want the main TLB misses they can use a raw counter. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(NODE)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DCACHE_RDMISS, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DCACHE_WRMISS, + + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, + + /* + * The ARM performance counters can count micro DTLB misses, micro ITLB + * misses and main TLB misses. There isn't an event for TLB misses, so + * use the micro misses here and if users want the main TLB misses they + * can use a raw counter. + */ + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, }; static inline unsigned long @@ -653,9 +476,8 @@ static int armv6_map_event(struct perf_event *event) &armv6_perf_cache_map, 0xFF); } -static int armv6pmu_init(struct arm_pmu *cpu_pmu) +static void armv6pmu_init(struct arm_pmu *cpu_pmu) { - cpu_pmu->name = "v6"; cpu_pmu->handle_irq = armv6pmu_handle_irq; cpu_pmu->enable = armv6pmu_enable_event; cpu_pmu->disable = armv6pmu_disable_event; @@ -667,7 +489,26 @@ static int armv6pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->map_event = armv6_map_event; cpu_pmu->num_events = 3; cpu_pmu->max_period = (1LLU << 32) - 1; +} + +static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv6pmu_init(cpu_pmu); + cpu_pmu->name = "armv6_1136"; + return 0; +} +static int armv6_1156_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv6pmu_init(cpu_pmu); + cpu_pmu->name = "armv6_1156"; + return 0; +} + +static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv6pmu_init(cpu_pmu); + cpu_pmu->name = "armv6_1176"; return 0; } @@ -687,7 +528,7 @@ static int armv6mpcore_map_event(struct perf_event *event) static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu) { - cpu_pmu->name = "v6mpcore"; + cpu_pmu->name = "armv6_11mpcore"; cpu_pmu->handle_irq = armv6pmu_handle_irq; cpu_pmu->enable = armv6pmu_enable_event; cpu_pmu->disable = armv6mpcore_pmu_disable_event; @@ -703,7 +544,17 @@ static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu) return 0; } #else -static int armv6pmu_init(struct arm_pmu *cpu_pmu) +static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu) +{ + return -ENODEV; +} + +static int armv6_1156_pmu_init(struct arm_pmu *cpu_pmu) +{ + return -ENODEV; +} + +static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu) { return -ENODEV; } diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 1d37568c547a..116758b77f93 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -148,137 +148,62 @@ enum krait_perf_types { * accesses/misses in hardware. */ static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV7_A8_PERFCTR_STALL_ISIDE, - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED, }; static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(NODE)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + /* + * The performance counters don't differentiate between read and write + * accesses/misses so this isn't strictly correct, but it's the best we + * can do. Writes and reads get combined. + */ + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + + [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS, + [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL, + [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS, + [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, }; /* * Cortex-A9 HW events mapping */ static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_A9_PERFCTR_INSTR_CORE_RENAME, [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV7_A9_PERFCTR_STALL_ICACHE, [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV7_A9_PERFCTR_STALL_DISPATCH, }; @@ -286,238 +211,83 @@ static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = { static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(NODE)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + /* + * The performance counters don't differentiate between read and write + * accesses/misses so this isn't strictly correct, but it's the best we + * can do. Writes and reads get combined. + */ + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, }; /* * Cortex-A5 HW events mapping */ static const unsigned armv7_a5_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED, }; static const unsigned armv7_a5_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL, - [C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - /* - * The prefetch counters don't differentiate between the I - * side and the D side. - */ - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL, - [C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(NODE)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL, + [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP, + + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + /* + * The prefetch counters don't differentiate between the I side and the + * D side. + */ + [C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL, + [C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, }; /* * Cortex-A15 HW events mapping */ static const unsigned armv7_a15_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, @@ -525,123 +295,48 @@ static const unsigned armv7_a15_perf_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_A15_PERFCTR_PC_WRITE_SPEC, [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED, }; static const unsigned armv7_a15_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ, - [C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE, - [C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - /* - * Not all performance counters differentiate between read - * and write accesses/misses so we're not always strictly - * correct, but it's the best we can do. Writes and reads get - * combined in these cases. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ, - [C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE, - [C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(NODE)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE, + + /* + * Not all performance counters differentiate between read and write + * accesses/misses so we're not always strictly correct, but it's the + * best we can do. Writes and reads get combined in these cases. + */ + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + + [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ, + [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ, + [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE, + [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, }; /* * Cortex-A7 HW events mapping */ static const unsigned armv7_a7_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, @@ -649,123 +344,48 @@ static const unsigned armv7_a7_perf_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED, }; static const unsigned armv7_a7_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(NODE)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + /* + * The performance counters don't differentiate between read and write + * accesses/misses so this isn't strictly correct, but it's the best we + * can do. Writes and reads get combined. + */ + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + + [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS, + [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, + [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS, + [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, }; /* * Cortex-A12 HW events mapping */ static const unsigned armv7_a12_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, @@ -773,138 +393,60 @@ static const unsigned armv7_a12_perf_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_A12_PERFCTR_PC_WRITE_SPEC, [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED, }; static const unsigned armv7_a12_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - /* - * Not all performance counters differentiate between read - * and write accesses/misses so we're not always strictly - * correct, but it's the best we can do. Writes and reads get - * combined in these cases. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_A12_PERFCTR_PF_TLB_REFILL, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(NODE)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + + /* + * Not all performance counters differentiate between read and write + * accesses/misses so we're not always strictly correct, but it's the + * best we can do. Writes and reads get combined in these cases. + */ + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + + [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ, + [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, + [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE, + [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + [C(DTLB)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV7_A12_PERFCTR_PF_TLB_REFILL, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, }; /* * Krait HW events mapping */ static const unsigned krait_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, }; static const unsigned krait_perf_map_no_branch[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = HW_OP_UNSUPPORTED, [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, }; @@ -912,110 +454,31 @@ static const unsigned krait_perf_map_no_branch[PERF_COUNT_HW_MAX] = { static const unsigned krait_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ICACHE_ACCESS, - [C(RESULT_MISS)] = KRAIT_PERFCTR_L1_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(NODE)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + /* + * The performance counters don't differentiate between read and write + * accesses/misses so this isn't strictly correct, but it's the best we + * can do. Writes and reads get combined. + */ + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = KRAIT_PERFCTR_L1_ICACHE_MISS, + + [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS, + [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS, + + [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS, + [C(ITLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, }; /* @@ -1545,7 +1008,7 @@ static u32 armv7_read_num_pmnc_events(void) static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) { armv7pmu_init(cpu_pmu); - cpu_pmu->name = "ARMv7 Cortex-A8"; + cpu_pmu->name = "armv7_cortex_a8"; cpu_pmu->map_event = armv7_a8_map_event; cpu_pmu->num_events = armv7_read_num_pmnc_events(); return 0; @@ -1554,7 +1017,7 @@ static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) { armv7pmu_init(cpu_pmu); - cpu_pmu->name = "ARMv7 Cortex-A9"; + cpu_pmu->name = "armv7_cortex_a9"; cpu_pmu->map_event = armv7_a9_map_event; cpu_pmu->num_events = armv7_read_num_pmnc_events(); return 0; @@ -1563,7 +1026,7 @@ static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) { armv7pmu_init(cpu_pmu); - cpu_pmu->name = "ARMv7 Cortex-A5"; + cpu_pmu->name = "armv7_cortex_a5"; cpu_pmu->map_event = armv7_a5_map_event; cpu_pmu->num_events = armv7_read_num_pmnc_events(); return 0; @@ -1572,7 +1035,7 @@ static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) { armv7pmu_init(cpu_pmu); - cpu_pmu->name = "ARMv7 Cortex-A15"; + cpu_pmu->name = "armv7_cortex_a15"; cpu_pmu->map_event = armv7_a15_map_event; cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->set_event_filter = armv7pmu_set_event_filter; @@ -1582,7 +1045,7 @@ static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) { armv7pmu_init(cpu_pmu); - cpu_pmu->name = "ARMv7 Cortex-A7"; + cpu_pmu->name = "armv7_cortex_a7"; cpu_pmu->map_event = armv7_a7_map_event; cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->set_event_filter = armv7pmu_set_event_filter; @@ -1592,7 +1055,7 @@ static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu) { armv7pmu_init(cpu_pmu); - cpu_pmu->name = "ARMv7 Cortex-A12"; + cpu_pmu->name = "armv7_cortex_a12"; cpu_pmu->map_event = armv7_a12_map_event; cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->set_event_filter = armv7pmu_set_event_filter; @@ -1602,7 +1065,7 @@ static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu) static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu) { armv7_a12_pmu_init(cpu_pmu); - cpu_pmu->name = "ARMv7 Cortex-A17"; + cpu_pmu->name = "armv7_cortex_a17"; return 0; } @@ -1823,6 +1286,7 @@ static void krait_pmu_disable_event(struct perf_event *event) unsigned long flags; struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct pmu_hw_events *events = cpu_pmu->get_hw_events(); /* Disable counter and interrupt */ @@ -1848,6 +1312,7 @@ static void krait_pmu_enable_event(struct perf_event *event) unsigned long flags; struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct pmu_hw_events *events = cpu_pmu->get_hw_events(); /* @@ -1981,7 +1446,7 @@ static void krait_pmu_clear_event_idx(struct pmu_hw_events *cpuc, static int krait_pmu_init(struct arm_pmu *cpu_pmu) { armv7pmu_init(cpu_pmu); - cpu_pmu->name = "ARMv7 Krait"; + cpu_pmu->name = "armv7_krait"; /* Some early versions of Krait don't support PC write events */ if (of_property_read_bool(cpu_pmu->plat_device->dev.of_node, "qcom,no-pc-write")) diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 63990c42fac9..08da0af550b7 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -48,118 +48,31 @@ enum xscale_counters { }; static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT, [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH, [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XSCALE_PERFCTR_ICACHE_NO_DELIVER, - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED, }; static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(NODE)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, + + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, }; #define XSCALE_PMU_ENABLE 0x001 @@ -442,7 +355,7 @@ static int xscale_map_event(struct perf_event *event) static int xscale1pmu_init(struct arm_pmu *cpu_pmu) { - cpu_pmu->name = "xscale1"; + cpu_pmu->name = "armv5_xscale1"; cpu_pmu->handle_irq = xscale1pmu_handle_irq; cpu_pmu->enable = xscale1pmu_enable_event; cpu_pmu->disable = xscale1pmu_disable_event; @@ -812,7 +725,7 @@ static inline void xscale2pmu_write_counter(struct perf_event *event, u32 val) static int xscale2pmu_init(struct arm_pmu *cpu_pmu) { - cpu_pmu->name = "xscale2"; + cpu_pmu->name = "armv5_xscale2"; cpu_pmu->handle_irq = xscale2pmu_handle_irq; cpu_pmu->enable = xscale2pmu_enable_event; cpu_pmu->disable = xscale2pmu_disable_event; diff --git a/arch/arm/kernel/relocate_kernel.S b/arch/arm/kernel/relocate_kernel.S index 95858966d84e..35e72585ec1d 100644 --- a/arch/arm/kernel/relocate_kernel.S +++ b/arch/arm/kernel/relocate_kernel.S @@ -3,6 +3,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> #include <asm/kexec.h> .align 3 /* not needed for this code, but keeps fncpy() happy */ @@ -59,7 +60,7 @@ ENTRY(relocate_new_kernel) mov r0,#0 ldr r1,kexec_mach_type ldr r2,kexec_boot_atags - ARM( mov pc, lr ) + ARM( ret lr ) THUMB( bx lr ) .align diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 8a16ee5d8a95..84db893dedc2 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -393,19 +393,34 @@ static void __init cpuid_init_hwcaps(void) elf_hwcap |= HWCAP_LPAE; } -static void __init feat_v6_fixup(void) +static void __init elf_hwcap_fixup(void) { - int id = read_cpuid_id(); - - if ((id & 0xff0f0000) != 0x41070000) - return; + unsigned id = read_cpuid_id(); + unsigned sync_prim; /* * HWCAP_TLS is available only on 1136 r1p0 and later, * see also kuser_get_tls_init. */ - if ((((id >> 4) & 0xfff) == 0xb36) && (((id >> 20) & 3) == 0)) + if (read_cpuid_part() == ARM_CPU_PART_ARM1136 && + ((id >> 20) & 3) == 0) { elf_hwcap &= ~HWCAP_TLS; + return; + } + + /* Verify if CPUID scheme is implemented */ + if ((id & 0x000f0000) != 0x000f0000) + return; + + /* + * If the CPU supports LDREX/STREX and LDREXB/STREXB, + * avoid advertising SWP; it may not be atomic with + * multiprocessing cores. + */ + sync_prim = ((read_cpuid_ext(CPUID_EXT_ISAR3) >> 8) & 0xf0) | + ((read_cpuid_ext(CPUID_EXT_ISAR4) >> 20) & 0x0f); + if (sync_prim >= 0x13) + elf_hwcap &= ~HWCAP_SWP; } /* @@ -609,7 +624,7 @@ static void __init setup_processor(void) #endif erratum_a15_798181_init(); - feat_v6_fixup(); + elf_hwcap_fixup(); cacheid_init(); cpu_init(); diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index 1b880db2a033..e1e60e5a7a27 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -107,7 +107,7 @@ ENTRY(cpu_resume_mmu) instr_sync mov r0, r0 mov r0, r0 - mov pc, r3 @ jump to virtual address + ret r3 @ jump to virtual address ENDPROC(cpu_resume_mmu) .popsection cpu_resume_after_mmu: diff --git a/arch/arm/kernel/smp_scu.c b/arch/arm/kernel/smp_scu.c index 1aafa0d785eb..72f9241ad5db 100644 --- a/arch/arm/kernel/smp_scu.c +++ b/arch/arm/kernel/smp_scu.c @@ -17,6 +17,8 @@ #include <asm/cputype.h> #define SCU_CTRL 0x00 +#define SCU_ENABLE (1 << 0) +#define SCU_STANDBY_ENABLE (1 << 5) #define SCU_CONFIG 0x04 #define SCU_CPU_STATUS 0x08 #define SCU_INVALIDATE 0x0c @@ -50,10 +52,16 @@ void scu_enable(void __iomem *scu_base) scu_ctrl = readl_relaxed(scu_base + SCU_CTRL); /* already enabled? */ - if (scu_ctrl & 1) + if (scu_ctrl & SCU_ENABLE) return; - scu_ctrl |= 1; + scu_ctrl |= SCU_ENABLE; + + /* Cortex-A9 earlier than r2p0 has no standby bit in SCU */ + if ((read_cpuid_id() & 0xff0ffff0) == 0x410fc090 && + (read_cpuid_id() & 0x00f0000f) >= 0x00200000) + scu_ctrl |= SCU_STANDBY_ENABLE; + writel_relaxed(scu_ctrl, scu_base + SCU_CTRL); /* diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c index 95d063620b76..2e72be4f623e 100644 --- a/arch/arm/kernel/smp_tlb.c +++ b/arch/arm/kernel/smp_tlb.c @@ -92,15 +92,19 @@ void erratum_a15_798181_init(void) unsigned int midr = read_cpuid_id(); unsigned int revidr = read_cpuid(CPUID_REVIDR); - /* Cortex-A15 r0p0..r3p2 w/o ECO fix affected */ - if ((midr & 0xff0ffff0) != 0x410fc0f0 || midr > 0x413fc0f2 || - (revidr & 0x210) == 0x210) { - return; - } - if (revidr & 0x10) - erratum_a15_798181_handler = erratum_a15_798181_partial; - else + /* Brahma-B15 r0p0..r0p2 affected + * Cortex-A15 r0p0..r3p2 w/o ECO fix affected */ + if ((midr & 0xff0ffff0) == 0x420f00f0 && midr <= 0x420f00f2) erratum_a15_798181_handler = erratum_a15_798181_broadcast; + else if ((midr & 0xff0ffff0) == 0x410fc0f0 && midr <= 0x413fc0f2 && + (revidr & 0x210) != 0x210) { + if (revidr & 0x10) + erratum_a15_798181_handler = + erratum_a15_798181_partial; + else + erratum_a15_798181_handler = + erratum_a15_798181_broadcast; + } } #endif diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c index b1b89882b113..67ca8578c6d8 100644 --- a/arch/arm/kernel/swp_emulate.c +++ b/arch/arm/kernel/swp_emulate.c @@ -27,6 +27,7 @@ #include <linux/perf_event.h> #include <asm/opcodes.h> +#include <asm/system_info.h> #include <asm/traps.h> #include <asm/uaccess.h> @@ -266,6 +267,9 @@ static struct undef_hook swp_hook = { */ static int __init swp_emulation_init(void) { + if (cpu_architecture() < CPU_ARCH_ARMv7) + return 0; + #ifdef CONFIG_PROC_FS if (!proc_create("cpu/swp_emulation", S_IRUGO, NULL, &proc_status_fops)) return -ENOMEM; diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index 829a96d4a179..0cc7e58c47cc 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -50,10 +50,7 @@ unsigned long profile_pc(struct pt_regs *regs) if (!in_lock_functions(regs->ARM_pc)) return regs->ARM_pc; - frame.fp = regs->ARM_fp; - frame.sp = regs->ARM_sp; - frame.lr = regs->ARM_lr; - frame.pc = regs->ARM_pc; + arm_get_current_stackframe(regs, &frame); do { int ret = unwind_frame(&frame); if (ret < 0) diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index abd2fc067736..c8e4bb714944 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -31,11 +31,13 @@ #include <asm/exception.h> #include <asm/unistd.h> #include <asm/traps.h> +#include <asm/ptrace.h> #include <asm/unwind.h> #include <asm/tls.h> #include <asm/system_misc.h> #include <asm/opcodes.h> + static const char *handler[]= { "prefetch abort", "data abort", @@ -184,7 +186,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) tsk = current; if (regs) { - fp = regs->ARM_fp; + fp = frame_pointer(regs); mode = processor_mode(regs); } else if (tsk != current) { fp = thread_saved_fp(tsk); @@ -719,7 +721,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs) dump_instr("", regs); if (user_mode(regs)) { __show_regs(regs); - c_backtrace(regs->ARM_fp, processor_mode(regs)); + c_backtrace(frame_pointer(regs), processor_mode(regs)); } } #endif diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c index e67682f02cb2..a61a1dfbb0db 100644 --- a/arch/arm/kernel/unwind.c +++ b/arch/arm/kernel/unwind.c @@ -479,12 +479,10 @@ void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk) tsk = current; if (regs) { - frame.fp = regs->ARM_fp; - frame.sp = regs->ARM_sp; - frame.lr = regs->ARM_lr; + arm_get_current_stackframe(regs, &frame); /* PC might be corrupted, use LR in that case. */ - frame.pc = kernel_text_address(regs->ARM_pc) - ? regs->ARM_pc : regs->ARM_lr; + if (!kernel_text_address(regs->ARM_pc)) + frame.pc = regs->ARM_lr; } else if (tsk == current) { frame.fp = (unsigned long)__builtin_frame_address(0); frame.sp = current_sp; diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 7bcee5c9b604..6f57cb94367f 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -318,7 +318,6 @@ SECTIONS _end = .; STABS_DEBUG - .comment 0 : { *(.comment) } } /* diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index b23a59c1c522..70bf49b8b244 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -274,13 +274,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int __attribute_const__ kvm_target_cpu(void) { - unsigned long implementor = read_cpuid_implementor(); - unsigned long part_number = read_cpuid_part_number(); - - if (implementor != ARM_CPU_IMP_ARM) - return -EINVAL; - - switch (part_number) { + switch (read_cpuid_part()) { case ARM_CPU_PART_CORTEX_A7: return KVM_ARM_TARGET_CORTEX_A7; case ARM_CPU_PART_CORTEX_A15: diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S index 1b9844d369cc..b2d229f09c07 100644 --- a/arch/arm/kvm/init.S +++ b/arch/arm/kvm/init.S @@ -17,6 +17,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> #include <asm/unified.h> #include <asm/asm-offsets.h> #include <asm/kvm_asm.h> @@ -134,7 +135,7 @@ phase2: ldr r0, =TRAMPOLINE_VA adr r1, target bfi r0, r1, #0, #PAGE_SHIFT - mov pc, r0 + ret r0 target: @ We're now in the trampoline code, switch page tables mcrr p15, 4, r2, r3, c2 diff --git a/arch/arm/lib/ashldi3.S b/arch/arm/lib/ashldi3.S index 638deb13da1c..b05e95840651 100644 --- a/arch/arm/lib/ashldi3.S +++ b/arch/arm/lib/ashldi3.S @@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA. */ #include <linux/linkage.h> +#include <asm/assembler.h> #ifdef __ARMEB__ #define al r1 @@ -47,7 +48,7 @@ ENTRY(__aeabi_llsl) THUMB( lsrmi r3, al, ip ) THUMB( orrmi ah, ah, r3 ) mov al, al, lsl r2 - mov pc, lr + ret lr ENDPROC(__ashldi3) ENDPROC(__aeabi_llsl) diff --git a/arch/arm/lib/ashrdi3.S b/arch/arm/lib/ashrdi3.S index 015e8aa5a1d1..275d7d2341a4 100644 --- a/arch/arm/lib/ashrdi3.S +++ b/arch/arm/lib/ashrdi3.S @@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA. */ #include <linux/linkage.h> +#include <asm/assembler.h> #ifdef __ARMEB__ #define al r1 @@ -47,7 +48,7 @@ ENTRY(__aeabi_lasr) THUMB( lslmi r3, ah, ip ) THUMB( orrmi al, al, r3 ) mov ah, ah, asr r2 - mov pc, lr + ret lr ENDPROC(__ashrdi3) ENDPROC(__aeabi_lasr) diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S index 4102be617fce..fab5a50503ae 100644 --- a/arch/arm/lib/backtrace.S +++ b/arch/arm/lib/backtrace.S @@ -25,7 +25,7 @@ ENTRY(c_backtrace) #if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK) - mov pc, lr + ret lr ENDPROC(c_backtrace) #else stmfd sp!, {r4 - r8, lr} @ Save an extra register so we have a location... diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h index 9f12ed1eea86..7d807cfd8ef5 100644 --- a/arch/arm/lib/bitops.h +++ b/arch/arm/lib/bitops.h @@ -1,3 +1,4 @@ +#include <asm/assembler.h> #include <asm/unwind.h> #if __LINUX_ARM_ARCH__ >= 6 @@ -70,7 +71,7 @@ UNWIND( .fnstart ) \instr r2, r2, r3 str r2, [r1, r0, lsl #2] restore_irqs ip - mov pc, lr + ret lr UNWIND( .fnend ) ENDPROC(\name ) .endm @@ -98,7 +99,7 @@ UNWIND( .fnstart ) \store r2, [r1] moveq r0, #0 restore_irqs ip - mov pc, lr + ret lr UNWIND( .fnend ) ENDPROC(\name ) .endm diff --git a/arch/arm/lib/bswapsdi2.S b/arch/arm/lib/bswapsdi2.S index 9fcdd154eff9..07cda737bb11 100644 --- a/arch/arm/lib/bswapsdi2.S +++ b/arch/arm/lib/bswapsdi2.S @@ -1,4 +1,5 @@ #include <linux/linkage.h> +#include <asm/assembler.h> #if __LINUX_ARM_ARCH__ >= 6 ENTRY(__bswapsi2) @@ -18,7 +19,7 @@ ENTRY(__bswapsi2) mov r3, r3, lsr #8 bic r3, r3, #0xff00 eor r0, r3, r0, ror #8 - mov pc, lr + ret lr ENDPROC(__bswapsi2) ENTRY(__bswapdi2) @@ -31,6 +32,6 @@ ENTRY(__bswapdi2) bic r1, r1, #0xff00 eor r1, r1, r0, ror #8 eor r0, r3, ip, ror #8 - mov pc, lr + ret lr ENDPROC(__bswapdi2) #endif diff --git a/arch/arm/lib/call_with_stack.S b/arch/arm/lib/call_with_stack.S index 916c80f13ae7..ed1a421813cb 100644 --- a/arch/arm/lib/call_with_stack.S +++ b/arch/arm/lib/call_with_stack.S @@ -36,9 +36,9 @@ ENTRY(call_with_stack) mov r0, r1 adr lr, BSYM(1f) - mov pc, r2 + ret r2 1: ldr lr, [sp] ldr sp, [sp, #4] - mov pc, lr + ret lr ENDPROC(call_with_stack) diff --git a/arch/arm/lib/csumpartial.S b/arch/arm/lib/csumpartial.S index 31d3cb34740d..984e0f29d548 100644 --- a/arch/arm/lib/csumpartial.S +++ b/arch/arm/lib/csumpartial.S @@ -97,7 +97,7 @@ td3 .req lr #endif #endif adcnes sum, sum, td0 @ update checksum - mov pc, lr + ret lr ENTRY(csum_partial) stmfd sp!, {buf, lr} diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S index d6e742d24007..10b45909610c 100644 --- a/arch/arm/lib/csumpartialcopygeneric.S +++ b/arch/arm/lib/csumpartialcopygeneric.S @@ -7,6 +7,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include <asm/assembler.h> /* * unsigned int @@ -40,7 +41,7 @@ sum .req r3 adcs sum, sum, ip, put_byte_1 @ update checksum strb ip, [dst], #1 tst dst, #2 - moveq pc, lr @ dst is now 32bit aligned + reteq lr @ dst is now 32bit aligned .Ldst_16bit: load2b r8, ip sub len, len, #2 @@ -48,7 +49,7 @@ sum .req r3 strb r8, [dst], #1 adcs sum, sum, ip, put_byte_1 strb ip, [dst], #1 - mov pc, lr @ dst is now 32bit aligned + ret lr @ dst is now 32bit aligned /* * Handle 0 to 7 bytes, with any alignment of source and diff --git a/arch/arm/lib/delay-loop.S b/arch/arm/lib/delay-loop.S index bc1033b897b4..518bf6e93f78 100644 --- a/arch/arm/lib/delay-loop.S +++ b/arch/arm/lib/delay-loop.S @@ -35,7 +35,7 @@ ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0x7fffff06 mul r0, r2, r0 @ max = 2^32-1 add r0, r0, r1, lsr #32-6 movs r0, r0, lsr #6 - moveq pc, lr + reteq lr /* * loops = r0 * HZ * loops_per_jiffy / 1000000 @@ -46,23 +46,23 @@ ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0x7fffff06 ENTRY(__loop_delay) subs r0, r0, #1 #if 0 - movls pc, lr + retls lr subs r0, r0, #1 - movls pc, lr + retls lr subs r0, r0, #1 - movls pc, lr + retls lr subs r0, r0, #1 - movls pc, lr + retls lr subs r0, r0, #1 - movls pc, lr + retls lr subs r0, r0, #1 - movls pc, lr + retls lr subs r0, r0, #1 - movls pc, lr + retls lr subs r0, r0, #1 #endif bhi __loop_delay - mov pc, lr + ret lr ENDPROC(__loop_udelay) ENDPROC(__loop_const_udelay) ENDPROC(__loop_delay) diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S index e55c4842c290..a9eafe4981eb 100644 --- a/arch/arm/lib/div64.S +++ b/arch/arm/lib/div64.S @@ -13,6 +13,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> #include <asm/unwind.h> #ifdef __ARMEB__ @@ -97,7 +98,7 @@ UNWIND(.fnstart) mov yl, #0 cmpeq xl, r4 movlo xh, xl - movlo pc, lr + retlo lr @ The division loop for lower bit positions. @ Here we shift remainer bits leftwards rather than moving the @@ -111,14 +112,14 @@ UNWIND(.fnstart) subcs xh, xh, r4 movs ip, ip, lsr #1 bne 4b - mov pc, lr + ret lr @ The top part of remainder became zero. If carry is set @ (the 33th bit) this is a false positive so resume the loop. @ Otherwise, if lower part is also null then we are done. 6: bcs 5b cmp xl, #0 - moveq pc, lr + reteq lr @ We still have remainer bits in the low part. Bring them up. @@ -144,7 +145,7 @@ UNWIND(.fnstart) movs ip, ip, lsr #1 mov xh, #1 bne 4b - mov pc, lr + ret lr 8: @ Division by a power of 2: determine what that divisor order is @ then simply shift values around @@ -184,13 +185,13 @@ UNWIND(.fnstart) THUMB( orr yl, yl, xh ) mov xh, xl, lsl ip mov xh, xh, lsr ip - mov pc, lr + ret lr @ eq -> division by 1: obvious enough... 9: moveq yl, xl moveq yh, xh moveq xh, #0 - moveq pc, lr + reteq lr UNWIND(.fnend) UNWIND(.fnstart) diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S index 64f6bc1a9132..7848780e8834 100644 --- a/arch/arm/lib/findbit.S +++ b/arch/arm/lib/findbit.S @@ -35,7 +35,7 @@ ENTRY(_find_first_zero_bit_le) 2: cmp r2, r1 @ any more? blo 1b 3: mov r0, r1 @ no free bits - mov pc, lr + ret lr ENDPROC(_find_first_zero_bit_le) /* @@ -76,7 +76,7 @@ ENTRY(_find_first_bit_le) 2: cmp r2, r1 @ any more? blo 1b 3: mov r0, r1 @ no free bits - mov pc, lr + ret lr ENDPROC(_find_first_bit_le) /* @@ -114,7 +114,7 @@ ENTRY(_find_first_zero_bit_be) 2: cmp r2, r1 @ any more? blo 1b 3: mov r0, r1 @ no free bits - mov pc, lr + ret lr ENDPROC(_find_first_zero_bit_be) ENTRY(_find_next_zero_bit_be) @@ -148,7 +148,7 @@ ENTRY(_find_first_bit_be) 2: cmp r2, r1 @ any more? blo 1b 3: mov r0, r1 @ no free bits - mov pc, lr + ret lr ENDPROC(_find_first_bit_be) ENTRY(_find_next_bit_be) @@ -192,5 +192,5 @@ ENDPROC(_find_next_bit_be) #endif cmp r1, r0 @ Clamp to maxbit movlo r0, r1 - mov pc, lr + ret lr diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S index 9b06bb41fca6..938600098b88 100644 --- a/arch/arm/lib/getuser.S +++ b/arch/arm/lib/getuser.S @@ -18,7 +18,7 @@ * Inputs: r0 contains the address * r1 contains the address limit, which must be preserved * Outputs: r0 is the error code - * r2 contains the zero-extended value + * r2, r3 contains the zero-extended value * lr corrupted * * No other registers must be altered. (see <asm/uaccess.h> @@ -36,7 +36,7 @@ ENTRY(__get_user_1) check_uaccess r0, 1, r1, r2, __get_user_bad 1: TUSER(ldrb) r2, [r0] mov r0, #0 - mov pc, lr + ret lr ENDPROC(__get_user_1) ENTRY(__get_user_2) @@ -56,25 +56,60 @@ rb .req r0 orr r2, rb, r2, lsl #8 #endif mov r0, #0 - mov pc, lr + ret lr ENDPROC(__get_user_2) ENTRY(__get_user_4) check_uaccess r0, 4, r1, r2, __get_user_bad 4: TUSER(ldr) r2, [r0] mov r0, #0 - mov pc, lr + ret lr ENDPROC(__get_user_4) +ENTRY(__get_user_8) + check_uaccess r0, 8, r1, r2, __get_user_bad +#ifdef CONFIG_THUMB2_KERNEL +5: TUSER(ldr) r2, [r0] +6: TUSER(ldr) r3, [r0, #4] +#else +5: TUSER(ldr) r2, [r0], #4 +6: TUSER(ldr) r3, [r0] +#endif + mov r0, #0 + ret lr +ENDPROC(__get_user_8) + +#ifdef __ARMEB__ +ENTRY(__get_user_lo8) + check_uaccess r0, 8, r1, r2, __get_user_bad +#ifdef CONFIG_CPU_USE_DOMAINS + add r0, r0, #4 +7: ldrt r2, [r0] +#else +7: ldr r2, [r0, #4] +#endif + mov r0, #0 + ret lr +ENDPROC(__get_user_lo8) +#endif + +__get_user_bad8: + mov r3, #0 __get_user_bad: mov r2, #0 mov r0, #-EFAULT - mov pc, lr + ret lr ENDPROC(__get_user_bad) +ENDPROC(__get_user_bad8) .pushsection __ex_table, "a" .long 1b, __get_user_bad .long 2b, __get_user_bad .long 3b, __get_user_bad .long 4b, __get_user_bad + .long 5b, __get_user_bad8 + .long 6b, __get_user_bad8 +#ifdef __ARMEB__ + .long 7b, __get_user_bad +#endif .popsection diff --git a/arch/arm/lib/io-readsb.S b/arch/arm/lib/io-readsb.S index 9f4238987fe9..c31b2f3153f1 100644 --- a/arch/arm/lib/io-readsb.S +++ b/arch/arm/lib/io-readsb.S @@ -25,7 +25,7 @@ ENTRY(__raw_readsb) teq r2, #0 @ do we have to check for the zero len? - moveq pc, lr + reteq lr ands ip, r1, #3 bne .Linsb_align diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S index 7a7430950c79..2ed86fa5465f 100644 --- a/arch/arm/lib/io-readsl.S +++ b/arch/arm/lib/io-readsl.S @@ -12,7 +12,7 @@ ENTRY(__raw_readsl) teq r2, #0 @ do we have to check for the zero len? - moveq pc, lr + reteq lr ands ip, r1, #3 bne 3f @@ -33,7 +33,7 @@ ENTRY(__raw_readsl) stmcsia r1!, {r3, ip} ldrne r3, [r0, #0] strne r3, [r1, #0] - mov pc, lr + ret lr 3: ldr r3, [r0] cmp ip, #2 @@ -75,5 +75,5 @@ ENTRY(__raw_readsl) strb r3, [r1, #1] 8: mov r3, ip, get_byte_0 strb r3, [r1, #0] - mov pc, lr + ret lr ENDPROC(__raw_readsl) diff --git a/arch/arm/lib/io-readsw-armv3.S b/arch/arm/lib/io-readsw-armv3.S index 88487c8c4f23..413da9914529 100644 --- a/arch/arm/lib/io-readsw-armv3.S +++ b/arch/arm/lib/io-readsw-armv3.S @@ -27,11 +27,11 @@ strb r3, [r1], #1 subs r2, r2, #1 - moveq pc, lr + reteq lr ENTRY(__raw_readsw) teq r2, #0 @ do we have to check for the zero len? - moveq pc, lr + reteq lr tst r1, #3 bne .Linsw_align diff --git a/arch/arm/lib/io-readsw-armv4.S b/arch/arm/lib/io-readsw-armv4.S index 1f393d42593d..d9a45e9692ae 100644 --- a/arch/arm/lib/io-readsw-armv4.S +++ b/arch/arm/lib/io-readsw-armv4.S @@ -26,7 +26,7 @@ ENTRY(__raw_readsw) teq r2, #0 - moveq pc, lr + reteq lr tst r1, #3 bne .Linsw_align diff --git a/arch/arm/lib/io-writesb.S b/arch/arm/lib/io-writesb.S index 68b92f4acaeb..a46bbc9b168b 100644 --- a/arch/arm/lib/io-writesb.S +++ b/arch/arm/lib/io-writesb.S @@ -45,7 +45,7 @@ ENTRY(__raw_writesb) teq r2, #0 @ do we have to check for the zero len? - moveq pc, lr + reteq lr ands ip, r1, #3 bne .Loutsb_align diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S index d0d104a0dd11..4ea2435988c1 100644 --- a/arch/arm/lib/io-writesl.S +++ b/arch/arm/lib/io-writesl.S @@ -12,7 +12,7 @@ ENTRY(__raw_writesl) teq r2, #0 @ do we have to check for the zero len? - moveq pc, lr + reteq lr ands ip, r1, #3 bne 3f @@ -33,7 +33,7 @@ ENTRY(__raw_writesl) ldrne r3, [r1, #0] strcs ip, [r0, #0] strne r3, [r0, #0] - mov pc, lr + ret lr 3: bic r1, r1, #3 ldr r3, [r1], #4 @@ -47,7 +47,7 @@ ENTRY(__raw_writesl) orr ip, ip, r3, lspush #16 str ip, [r0] bne 4b - mov pc, lr + ret lr 5: mov ip, r3, lspull #8 ldr r3, [r1], #4 @@ -55,7 +55,7 @@ ENTRY(__raw_writesl) orr ip, ip, r3, lspush #24 str ip, [r0] bne 5b - mov pc, lr + ret lr 6: mov ip, r3, lspull #24 ldr r3, [r1], #4 @@ -63,5 +63,5 @@ ENTRY(__raw_writesl) orr ip, ip, r3, lspush #8 str ip, [r0] bne 6b - mov pc, lr + ret lr ENDPROC(__raw_writesl) diff --git a/arch/arm/lib/io-writesw-armv3.S b/arch/arm/lib/io-writesw-armv3.S index 49b800419e32..121789eb6802 100644 --- a/arch/arm/lib/io-writesw-armv3.S +++ b/arch/arm/lib/io-writesw-armv3.S @@ -28,11 +28,11 @@ orr r3, r3, r3, lsl #16 str r3, [r0] subs r2, r2, #1 - moveq pc, lr + reteq lr ENTRY(__raw_writesw) teq r2, #0 @ do we have to check for the zero len? - moveq pc, lr + reteq lr tst r1, #3 bne .Loutsw_align diff --git a/arch/arm/lib/io-writesw-armv4.S b/arch/arm/lib/io-writesw-armv4.S index ff4f71b579ee..269f90c51ad2 100644 --- a/arch/arm/lib/io-writesw-armv4.S +++ b/arch/arm/lib/io-writesw-armv4.S @@ -31,7 +31,7 @@ ENTRY(__raw_writesw) teq r2, #0 - moveq pc, lr + reteq lr ands r3, r1, #3 bne .Loutsw_align @@ -96,5 +96,5 @@ ENTRY(__raw_writesw) tst r2, #1 3: movne ip, r3, lsr #8 strneh ip, [r0] - mov pc, lr + ret lr ENDPROC(__raw_writesw) diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S index c562f649734c..947567ff67f9 100644 --- a/arch/arm/lib/lib1funcs.S +++ b/arch/arm/lib/lib1funcs.S @@ -210,7 +210,7 @@ ENTRY(__aeabi_uidiv) UNWIND(.fnstart) subs r2, r1, #1 - moveq pc, lr + reteq lr bcc Ldiv0 cmp r0, r1 bls 11f @@ -220,16 +220,16 @@ UNWIND(.fnstart) ARM_DIV_BODY r0, r1, r2, r3 mov r0, r2 - mov pc, lr + ret lr 11: moveq r0, #1 movne r0, #0 - mov pc, lr + ret lr 12: ARM_DIV2_ORDER r1, r2 mov r0, r0, lsr r2 - mov pc, lr + ret lr UNWIND(.fnend) ENDPROC(__udivsi3) @@ -244,11 +244,11 @@ UNWIND(.fnstart) moveq r0, #0 tsthi r1, r2 @ see if divisor is power of 2 andeq r0, r0, r2 - movls pc, lr + retls lr ARM_MOD_BODY r0, r1, r2, r3 - mov pc, lr + ret lr UNWIND(.fnend) ENDPROC(__umodsi3) @@ -274,23 +274,23 @@ UNWIND(.fnstart) cmp ip, #0 rsbmi r0, r0, #0 - mov pc, lr + ret lr 10: teq ip, r0 @ same sign ? rsbmi r0, r0, #0 - mov pc, lr + ret lr 11: movlo r0, #0 moveq r0, ip, asr #31 orreq r0, r0, #1 - mov pc, lr + ret lr 12: ARM_DIV2_ORDER r1, r2 cmp ip, #0 mov r0, r3, lsr r2 rsbmi r0, r0, #0 - mov pc, lr + ret lr UNWIND(.fnend) ENDPROC(__divsi3) @@ -315,7 +315,7 @@ UNWIND(.fnstart) 10: cmp ip, #0 rsbmi r0, r0, #0 - mov pc, lr + ret lr UNWIND(.fnend) ENDPROC(__modsi3) @@ -331,7 +331,7 @@ UNWIND(.save {r0, r1, ip, lr} ) ldmfd sp!, {r1, r2, ip, lr} mul r3, r0, r2 sub r1, r1, r3 - mov pc, lr + ret lr UNWIND(.fnend) ENDPROC(__aeabi_uidivmod) @@ -344,7 +344,7 @@ UNWIND(.save {r0, r1, ip, lr} ) ldmfd sp!, {r1, r2, ip, lr} mul r3, r0, r2 sub r1, r1, r3 - mov pc, lr + ret lr UNWIND(.fnend) ENDPROC(__aeabi_idivmod) diff --git a/arch/arm/lib/lshrdi3.S b/arch/arm/lib/lshrdi3.S index f83d449141f7..922dcd88b02b 100644 --- a/arch/arm/lib/lshrdi3.S +++ b/arch/arm/lib/lshrdi3.S @@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA. */ #include <linux/linkage.h> +#include <asm/assembler.h> #ifdef __ARMEB__ #define al r1 @@ -47,7 +48,7 @@ ENTRY(__aeabi_llsr) THUMB( lslmi r3, ah, ip ) THUMB( orrmi al, al, r3 ) mov ah, ah, lsr r2 - mov pc, lr + ret lr ENDPROC(__lshrdi3) ENDPROC(__aeabi_llsr) diff --git a/arch/arm/lib/memchr.S b/arch/arm/lib/memchr.S index 1da86991d700..74a5bed6d999 100644 --- a/arch/arm/lib/memchr.S +++ b/arch/arm/lib/memchr.S @@ -22,5 +22,5 @@ ENTRY(memchr) bne 1b sub r0, r0, #1 2: movne r0, #0 - mov pc, lr + ret lr ENDPROC(memchr) diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S index 94b0650ea98f..671455c854fa 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S @@ -110,7 +110,7 @@ ENTRY(memset) strneb r1, [ip], #1 tst r2, #1 strneb r1, [ip], #1 - mov pc, lr + ret lr 6: subs r2, r2, #4 @ 1 do we have enough blt 5b @ 1 bytes to align with? diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S index 3fbdef5f802a..385ccb306fa2 100644 --- a/arch/arm/lib/memzero.S +++ b/arch/arm/lib/memzero.S @@ -121,5 +121,5 @@ ENTRY(__memzero) strneb r2, [r0], #1 @ 1 tst r1, #1 @ 1 a byte left over strneb r2, [r0], #1 @ 1 - mov pc, lr @ 1 + ret lr @ 1 ENDPROC(__memzero) diff --git a/arch/arm/lib/muldi3.S b/arch/arm/lib/muldi3.S index 36c91b4957e2..204305956925 100644 --- a/arch/arm/lib/muldi3.S +++ b/arch/arm/lib/muldi3.S @@ -11,6 +11,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> #ifdef __ARMEB__ #define xh r0 @@ -41,7 +42,7 @@ ENTRY(__aeabi_lmul) adc xh, xh, yh, lsr #16 adds xl, xl, ip, lsl #16 adc xh, xh, ip, lsr #16 - mov pc, lr + ret lr ENDPROC(__muldi3) ENDPROC(__aeabi_lmul) diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S index 3d73dcb959b0..38d660d3705f 100644 --- a/arch/arm/lib/putuser.S +++ b/arch/arm/lib/putuser.S @@ -36,7 +36,7 @@ ENTRY(__put_user_1) check_uaccess r0, 1, r1, ip, __put_user_bad 1: TUSER(strb) r2, [r0] mov r0, #0 - mov pc, lr + ret lr ENDPROC(__put_user_1) ENTRY(__put_user_2) @@ -60,14 +60,14 @@ ENTRY(__put_user_2) #endif #endif /* CONFIG_THUMB2_KERNEL */ mov r0, #0 - mov pc, lr + ret lr ENDPROC(__put_user_2) ENTRY(__put_user_4) check_uaccess r0, 4, r1, ip, __put_user_bad 4: TUSER(str) r2, [r0] mov r0, #0 - mov pc, lr + ret lr ENDPROC(__put_user_4) ENTRY(__put_user_8) @@ -80,12 +80,12 @@ ENTRY(__put_user_8) 6: TUSER(str) r3, [r0] #endif mov r0, #0 - mov pc, lr + ret lr ENDPROC(__put_user_8) __put_user_bad: mov r0, #-EFAULT - mov pc, lr + ret lr ENDPROC(__put_user_bad) .pushsection __ex_table, "a" diff --git a/arch/arm/lib/strchr.S b/arch/arm/lib/strchr.S index d8f2a1c1aea4..013d64c71e8d 100644 --- a/arch/arm/lib/strchr.S +++ b/arch/arm/lib/strchr.S @@ -23,5 +23,5 @@ ENTRY(strchr) teq r2, r1 movne r0, #0 subeq r0, r0, #1 - mov pc, lr + ret lr ENDPROC(strchr) diff --git a/arch/arm/lib/strrchr.S b/arch/arm/lib/strrchr.S index 302f20cd2423..3cec1c7482c4 100644 --- a/arch/arm/lib/strrchr.S +++ b/arch/arm/lib/strrchr.S @@ -22,5 +22,5 @@ ENTRY(strrchr) teq r2, #0 bne 1b mov r0, r3 - mov pc, lr + ret lr ENDPROC(strrchr) diff --git a/arch/arm/lib/ucmpdi2.S b/arch/arm/lib/ucmpdi2.S index f0df6a91db04..ad4a6309141a 100644 --- a/arch/arm/lib/ucmpdi2.S +++ b/arch/arm/lib/ucmpdi2.S @@ -11,6 +11,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> #ifdef __ARMEB__ #define xh r0 @@ -31,7 +32,7 @@ ENTRY(__ucmpdi2) movlo r0, #0 moveq r0, #1 movhi r0, #2 - mov pc, lr + ret lr ENDPROC(__ucmpdi2) @@ -44,7 +45,7 @@ ENTRY(__aeabi_ulcmp) movlo r0, #-1 moveq r0, #0 movhi r0, #1 - mov pc, lr + ret lr ENDPROC(__aeabi_ulcmp) diff --git a/arch/arm/mach-davinci/sleep.S b/arch/arm/mach-davinci/sleep.S index d4e9316ecacb..a5336a5e2739 100644 --- a/arch/arm/mach-davinci/sleep.S +++ b/arch/arm/mach-davinci/sleep.S @@ -213,7 +213,7 @@ ddr2clk_stop_done: cmp ip, r0 bne ddr2clk_stop_done - mov pc, lr + ret lr ENDPROC(davinci_ddr_psc_config) CACHE_FLUSH: diff --git a/arch/arm/mach-ebsa110/include/mach/memory.h b/arch/arm/mach-ebsa110/include/mach/memory.h index 8e49066ad850..866f8a1c6ff7 100644 --- a/arch/arm/mach-ebsa110/include/mach/memory.h +++ b/arch/arm/mach-ebsa110/include/mach/memory.h @@ -17,11 +17,6 @@ #define __ASM_ARCH_MEMORY_H /* - * Physical DRAM offset. - */ -#define PLAT_PHYS_OFFSET UL(0x00000000) - -/* * Cache flushing area - SRAM */ #define FLUSH_BASE_PHYS 0x40000000 diff --git a/arch/arm/mach-ep93xx/crunch-bits.S b/arch/arm/mach-ep93xx/crunch-bits.S index e96923a3017b..ee0be2af5c61 100644 --- a/arch/arm/mach-ep93xx/crunch-bits.S +++ b/arch/arm/mach-ep93xx/crunch-bits.S @@ -198,7 +198,7 @@ crunch_load: get_thread_info r10 #endif 2: dec_preempt_count r10, r3 - mov pc, lr + ret lr /* * Back up crunch regs to save area and disable access to them @@ -277,7 +277,7 @@ ENTRY(crunch_task_copy) mov r3, lr @ preserve return address bl crunch_save msr cpsr_c, ip @ restore interrupt mode - mov pc, r3 + ret r3 /* * Restore crunch state from given memory address @@ -310,4 +310,4 @@ ENTRY(crunch_task_restore) mov r3, lr @ preserve return address bl crunch_load msr cpsr_c, ip @ restore interrupt mode - mov pc, r3 + ret r3 diff --git a/arch/arm/mach-ep93xx/include/mach/memory.h b/arch/arm/mach-ep93xx/include/mach/memory.h deleted file mode 100644 index c9400cf0051c..000000000000 --- a/arch/arm/mach-ep93xx/include/mach/memory.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * arch/arm/mach-ep93xx/include/mach/memory.h - */ - -#ifndef __ASM_ARCH_MEMORY_H -#define __ASM_ARCH_MEMORY_H - -#if defined(CONFIG_EP93XX_SDCE3_SYNC_PHYS_OFFSET) -#define PLAT_PHYS_OFFSET UL(0x00000000) -#elif defined(CONFIG_EP93XX_SDCE0_PHYS_OFFSET) -#define PLAT_PHYS_OFFSET UL(0xc0000000) -#elif defined(CONFIG_EP93XX_SDCE1_PHYS_OFFSET) -#define PLAT_PHYS_OFFSET UL(0xd0000000) -#elif defined(CONFIG_EP93XX_SDCE2_PHYS_OFFSET) -#define PLAT_PHYS_OFFSET UL(0xe0000000) -#elif defined(CONFIG_EP93XX_SDCE3_ASYNC_PHYS_OFFSET) -#define PLAT_PHYS_OFFSET UL(0xf0000000) -#else -#error "Kconfig bug: No EP93xx PHYS_OFFSET set" -#endif - -#endif diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig index 8f9b66c4ac78..5d4ff6571dcd 100644 --- a/arch/arm/mach-exynos/Kconfig +++ b/arch/arm/mach-exynos/Kconfig @@ -119,6 +119,7 @@ config EXYNOS5420_MCPM bool "Exynos5420 Multi-Cluster PM support" depends on MCPM && SOC_EXYNOS5420 select ARM_CCI + select ARM_CPU_SUSPEND help This is needed to provide CPU and cluster power management on Exynos5420 implementing big.LITTLE. diff --git a/arch/arm/mach-exynos/mcpm-exynos.c b/arch/arm/mach-exynos/mcpm-exynos.c index ace0ed617476..a96b78f93f2b 100644 --- a/arch/arm/mach-exynos/mcpm-exynos.c +++ b/arch/arm/mach-exynos/mcpm-exynos.c @@ -196,7 +196,7 @@ static void exynos_power_down(void) if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) { arch_spin_unlock(&exynos_mcpm_lock); - if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A15) { + if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) { /* * On the Cortex-A15 we need to disable * L2 prefetching before flushing the cache. @@ -289,6 +289,19 @@ static void __naked exynos_pm_power_up_setup(unsigned int affinity_level) "b cci_enable_port_for_self"); } +static void __init exynos_cache_off(void) +{ + if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) { + /* disable L2 prefetching on the Cortex-A15 */ + asm volatile( + "mcr p15, 1, %0, c15, c0, 3\n\t" + "isb\n\t" + "dsb" + : : "r" (0x400)); + } + exynos_v7_exit_coherency_flush(all); +} + static const struct of_device_id exynos_dt_mcpm_match[] = { { .compatible = "samsung,exynos5420" }, { .compatible = "samsung,exynos5800" }, @@ -332,6 +345,8 @@ static int __init exynos_mcpm_init(void) ret = mcpm_platform_register(&exynos_power_ops); if (!ret) ret = mcpm_sync_init(exynos_pm_power_up_setup); + if (!ret) + ret = mcpm_loopback(exynos_cache_off); /* turn on the CCI */ if (ret) { iounmap(ns_sram_base_addr); return ret; diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c index 50b9aad5e27b..70d1e65a51d8 100644 --- a/arch/arm/mach-exynos/platsmp.c +++ b/arch/arm/mach-exynos/platsmp.c @@ -190,7 +190,7 @@ static void __init exynos_smp_init_cpus(void) void __iomem *scu_base = scu_base_addr(); unsigned int i, ncores; - if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9) + if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9) ncores = scu_base ? scu_get_core_count(scu_base) : 1; else /* @@ -216,7 +216,7 @@ static void __init exynos_smp_prepare_cpus(unsigned int max_cpus) exynos_sysram_init(); - if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9) + if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9) scu_enable(scu_base_addr()); /* diff --git a/arch/arm/mach-exynos/pm.c b/arch/arm/mach-exynos/pm.c index 202ca73e49c4..67d383de614f 100644 --- a/arch/arm/mach-exynos/pm.c +++ b/arch/arm/mach-exynos/pm.c @@ -300,7 +300,7 @@ static int exynos_pm_suspend(void) tmp = (S5P_USE_STANDBY_WFI0 | S5P_USE_STANDBY_WFE0); __raw_writel(tmp, S5P_CENTRAL_SEQ_OPTION); - if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9) + if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9) exynos_cpu_save_register(); return 0; @@ -334,7 +334,7 @@ static void exynos_pm_resume(void) if (exynos_pm_central_resume()) goto early_wakeup; - if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9) + if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9) exynos_cpu_restore_register(); /* For release retention */ @@ -353,7 +353,7 @@ static void exynos_pm_resume(void) s3c_pm_do_restore_core(exynos_core_save, ARRAY_SIZE(exynos_core_save)); - if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9) + if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9) scu_enable(S5P_VA_SCU); early_wakeup: @@ -440,15 +440,14 @@ static int exynos_cpu_pm_notifier(struct notifier_block *self, case CPU_PM_ENTER: if (cpu == 0) { exynos_pm_central_suspend(); - if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9) + if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9) exynos_cpu_save_register(); } break; case CPU_PM_EXIT: if (cpu == 0) { - if (read_cpuid_part_number() == - ARM_CPU_PART_CORTEX_A9) { + if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9) { scu_enable(S5P_VA_SCU); exynos_cpu_restore_register(); } diff --git a/arch/arm/mach-footbridge/include/mach/memory.h b/arch/arm/mach-footbridge/include/mach/memory.h index 5c6df377f969..6f2ecccdf323 100644 --- a/arch/arm/mach-footbridge/include/mach/memory.h +++ b/arch/arm/mach-footbridge/include/mach/memory.h @@ -59,11 +59,6 @@ extern unsigned long __bus_to_pfn(unsigned long); */ #define FLUSH_BASE 0xf9000000 -/* - * Physical DRAM offset. - */ -#define PLAT_PHYS_OFFSET UL(0x00000000) - #define FLUSH_BASE_PHYS 0x50000000 #endif diff --git a/arch/arm/mach-imx/suspend-imx6.S b/arch/arm/mach-imx/suspend-imx6.S index fe123b079c05..74b50f1982db 100644 --- a/arch/arm/mach-imx/suspend-imx6.S +++ b/arch/arm/mach-imx/suspend-imx6.S @@ -10,6 +10,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> #include <asm/asm-offsets.h> #include <asm/hardware/cache-l2x0.h> #include "hardware.h" @@ -301,7 +302,7 @@ rbc_loop: resume_mmdc /* return to suspend finish */ - mov pc, lr + ret lr resume: /* invalidate L1 I-cache first */ @@ -325,7 +326,7 @@ resume: mov r5, #0x1 resume_mmdc - mov pc, lr + ret lr ENDPROC(imx6_suspend) /* diff --git a/arch/arm/mach-integrator/include/mach/memory.h b/arch/arm/mach-integrator/include/mach/memory.h index 334d5e271889..7268cb50ded0 100644 --- a/arch/arm/mach-integrator/include/mach/memory.h +++ b/arch/arm/mach-integrator/include/mach/memory.h @@ -20,11 +20,6 @@ #ifndef __ASM_ARCH_MEMORY_H #define __ASM_ARCH_MEMORY_H -/* - * Physical DRAM offset. - */ -#define PLAT_PHYS_OFFSET UL(0x00000000) - #define BUS_OFFSET UL(0x80000000) #define __virt_to_bus(x) ((x) - PAGE_OFFSET + BUS_OFFSET) #define __bus_to_virt(x) ((x) - BUS_OFFSET + PAGE_OFFSET) diff --git a/arch/arm/mach-iop13xx/include/mach/iop13xx.h b/arch/arm/mach-iop13xx/include/mach/iop13xx.h index 17b40279e0a4..9311ee2126d6 100644 --- a/arch/arm/mach-iop13xx/include/mach/iop13xx.h +++ b/arch/arm/mach-iop13xx/include/mach/iop13xx.h @@ -3,7 +3,7 @@ #ifndef __ASSEMBLY__ -#include <linux/reboot.h> +enum reboot_mode; /* The ATU offsets can change based on the strapping */ extern u32 iop13xx_atux_pmmr_offset; diff --git a/arch/arm/mach-iop13xx/include/mach/memory.h b/arch/arm/mach-iop13xx/include/mach/memory.h index 7c032d0ab24a..59307e787588 100644 --- a/arch/arm/mach-iop13xx/include/mach/memory.h +++ b/arch/arm/mach-iop13xx/include/mach/memory.h @@ -3,11 +3,6 @@ #include <mach/hardware.h> -/* - * Physical DRAM offset. - */ -#define PLAT_PHYS_OFFSET UL(0x00000000) - #ifndef __ASSEMBLY__ #if defined(CONFIG_ARCH_IOP13XX) diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c index bca96f433495..53c316f7301e 100644 --- a/arch/arm/mach-iop13xx/setup.c +++ b/arch/arm/mach-iop13xx/setup.c @@ -20,6 +20,7 @@ #include <linux/dma-mapping.h> #include <linux/serial_8250.h> #include <linux/io.h> +#include <linux/reboot.h> #ifdef CONFIG_MTD_PHYSMAP #include <linux/mtd/physmap.h> #endif diff --git a/arch/arm/mach-ks8695/include/mach/memory.h b/arch/arm/mach-ks8695/include/mach/memory.h index 95e731a7ed6a..ab0d27fa8969 100644 --- a/arch/arm/mach-ks8695/include/mach/memory.h +++ b/arch/arm/mach-ks8695/include/mach/memory.h @@ -15,11 +15,6 @@ #include <mach/hardware.h> -/* - * Physical SRAM offset. - */ -#define PLAT_PHYS_OFFSET KS8695_SDRAM_PA - #ifndef __ASSEMBLY__ #ifdef CONFIG_PCI diff --git a/arch/arm/mach-mvebu/coherency_ll.S b/arch/arm/mach-mvebu/coherency_ll.S index 510c29e079ca..f5d881b5d0f7 100644 --- a/arch/arm/mach-mvebu/coherency_ll.S +++ b/arch/arm/mach-mvebu/coherency_ll.S @@ -46,7 +46,7 @@ ENTRY(ll_get_coherency_base) ldr r1, =coherency_base ldr r1, [r1] 2: - mov pc, lr + ret lr ENDPROC(ll_get_coherency_base) /* @@ -63,7 +63,7 @@ ENTRY(ll_get_coherency_cpumask) mov r2, #(1 << 24) lsl r3, r2, r3 ARM_BE8(rev r3, r3) - mov pc, lr + ret lr ENDPROC(ll_get_coherency_cpumask) /* @@ -94,7 +94,7 @@ ENTRY(ll_add_cpu_to_smp_group) strex r1, r2, [r0] cmp r1, #0 bne 1b - mov pc, lr + ret lr ENDPROC(ll_add_cpu_to_smp_group) ENTRY(ll_enable_coherency) @@ -118,7 +118,7 @@ ENTRY(ll_enable_coherency) bne 1b dsb mov r0, #0 - mov pc, lr + ret lr ENDPROC(ll_enable_coherency) ENTRY(ll_disable_coherency) @@ -141,7 +141,7 @@ ENTRY(ll_disable_coherency) cmp r1, #0 bne 1b dsb - mov pc, lr + ret lr ENDPROC(ll_disable_coherency) .align 2 diff --git a/arch/arm/mach-mvebu/headsmp-a9.S b/arch/arm/mach-mvebu/headsmp-a9.S index da5bb292b91c..2c3c7fc65e28 100644 --- a/arch/arm/mach-mvebu/headsmp-a9.S +++ b/arch/arm/mach-mvebu/headsmp-a9.S @@ -29,7 +29,7 @@ ARM_BE8(setend be) ldr r0, [r0] ldr r1, [r0] ARM_BE8(rev r1, r1) - mov pc, r1 + ret r1 1: .word CPU_RESUME_ADDR_REG armada_375_smp_cpu1_enable_code_end: diff --git a/arch/arm/mach-omap1/include/mach/memory.h b/arch/arm/mach-omap1/include/mach/memory.h index 3c2530523111..058a4f7d44c5 100644 --- a/arch/arm/mach-omap1/include/mach/memory.h +++ b/arch/arm/mach-omap1/include/mach/memory.h @@ -6,11 +6,6 @@ #define __ASM_ARCH_MEMORY_H /* - * Physical DRAM offset. - */ -#define PLAT_PHYS_OFFSET UL(0x10000000) - -/* * Bus address is physical address, except for OMAP-1510 Local Bus. * OMAP-1510 bus address is translated into a Local Bus address if the * OMAP bus type is lbus. We do the address translation based on the diff --git a/arch/arm/mach-omap2/sleep44xx.S b/arch/arm/mach-omap2/sleep44xx.S index 9086ce03ae12..b84a0122d823 100644 --- a/arch/arm/mach-omap2/sleep44xx.S +++ b/arch/arm/mach-omap2/sleep44xx.S @@ -10,6 +10,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> #include <asm/smp_scu.h> #include <asm/memory.h> #include <asm/hardware/cache-l2x0.h> @@ -334,7 +335,7 @@ ENDPROC(omap4_cpu_resume) #ifndef CONFIG_OMAP4_ERRATA_I688 ENTRY(omap_bus_sync) - mov pc, lr + ret lr ENDPROC(omap_bus_sync) #endif diff --git a/arch/arm/mach-omap2/sram242x.S b/arch/arm/mach-omap2/sram242x.S index 680a7c56cc3e..2c88ff2d0236 100644 --- a/arch/arm/mach-omap2/sram242x.S +++ b/arch/arm/mach-omap2/sram242x.S @@ -101,7 +101,7 @@ i_dll_wait: i_dll_delay: subs r4, r4, #0x1 bne i_dll_delay - mov pc, lr + ret lr /* * shift up or down voltage, use R9 as input to tell level. @@ -125,7 +125,7 @@ volt_delay: ldr r7, [r3] @ get timer value cmp r5, r7 @ time up? bhi volt_delay @ not yet->branch - mov pc, lr @ back to caller. + ret lr @ back to caller. omap242x_sdi_cm_clksel2_pll: .word OMAP2420_CM_REGADDR(PLL_MOD, CM_CLKSEL2) @@ -220,7 +220,7 @@ volt_delay_c: ldr r7, [r10] @ get timer value cmp r8, r7 @ time up? bhi volt_delay_c @ not yet->branch - mov pc, lr @ back to caller + ret lr @ back to caller omap242x_srs_cm_clksel2_pll: .word OMAP2420_CM_REGADDR(PLL_MOD, CM_CLKSEL2) diff --git a/arch/arm/mach-omap2/sram243x.S b/arch/arm/mach-omap2/sram243x.S index a1e9edd673f4..d5deb9761fc7 100644 --- a/arch/arm/mach-omap2/sram243x.S +++ b/arch/arm/mach-omap2/sram243x.S @@ -101,7 +101,7 @@ i_dll_wait: i_dll_delay: subs r4, r4, #0x1 bne i_dll_delay - mov pc, lr + ret lr /* * shift up or down voltage, use R9 as input to tell level. @@ -125,7 +125,7 @@ volt_delay: ldr r7, [r3] @ get timer value cmp r5, r7 @ time up? bhi volt_delay @ not yet->branch - mov pc, lr @ back to caller. + ret lr @ back to caller. omap243x_sdi_cm_clksel2_pll: .word OMAP2430_CM_REGADDR(PLL_MOD, CM_CLKSEL2) @@ -220,7 +220,7 @@ volt_delay_c: ldr r7, [r10] @ get timer value cmp r8, r7 @ time up? bhi volt_delay_c @ not yet->branch - mov pc, lr @ back to caller + ret lr @ back to caller omap243x_srs_cm_clksel2_pll: .word OMAP2430_CM_REGADDR(PLL_MOD, CM_CLKSEL2) diff --git a/arch/arm/mach-pxa/mioa701_bootresume.S b/arch/arm/mach-pxa/mioa701_bootresume.S index 324d25a48c85..81591491ab94 100644 --- a/arch/arm/mach-pxa/mioa701_bootresume.S +++ b/arch/arm/mach-pxa/mioa701_bootresume.S @@ -29,7 +29,7 @@ ENTRY(mioa701_jumpaddr) str r1, [r0] @ Early disable resume for next boot ldr r0, mioa701_jumpaddr @ (Murphy's Law) ldr r0, [r0] - mov pc, r0 + ret r0 2: ENTRY(mioa701_bootstrap_lg) diff --git a/arch/arm/mach-pxa/standby.S b/arch/arm/mach-pxa/standby.S index 29f5f5c180b7..eab1645bb4ad 100644 --- a/arch/arm/mach-pxa/standby.S +++ b/arch/arm/mach-pxa/standby.S @@ -29,7 +29,7 @@ ENTRY(pxa_cpu_standby) .align 5 1: mcr p14, 0, r2, c7, c0, 0 @ put the system into Standby str r1, [r0] @ make sure PSSR_PH/STS are clear - mov pc, lr + ret lr #endif @@ -108,7 +108,7 @@ ENTRY(pm_enter_standby_start) bic r0, r0, #0x20000000 str r0, [r1, #PXA3_DMCIER] - mov pc, lr + ret lr ENTRY(pm_enter_standby_end) #endif diff --git a/arch/arm/mach-realview/include/mach/memory.h b/arch/arm/mach-realview/include/mach/memory.h index db09170e3832..23e7a313f75d 100644 --- a/arch/arm/mach-realview/include/mach/memory.h +++ b/arch/arm/mach-realview/include/mach/memory.h @@ -20,15 +20,6 @@ #ifndef __ASM_ARCH_MEMORY_H #define __ASM_ARCH_MEMORY_H -/* - * Physical DRAM offset. - */ -#ifdef CONFIG_REALVIEW_HIGH_PHYS_OFFSET -#define PLAT_PHYS_OFFSET UL(0x70000000) -#else -#define PLAT_PHYS_OFFSET UL(0x00000000) -#endif - #ifdef CONFIG_SPARSEMEM /* diff --git a/arch/arm/mach-rpc/include/mach/memory.h b/arch/arm/mach-rpc/include/mach/memory.h index 18a221093bf5..b7e49571417d 100644 --- a/arch/arm/mach-rpc/include/mach/memory.h +++ b/arch/arm/mach-rpc/include/mach/memory.h @@ -19,11 +19,6 @@ #define __ASM_ARCH_MEMORY_H /* - * Physical DRAM offset. - */ -#define PLAT_PHYS_OFFSET UL(0x10000000) - -/* * Cache flushing area - ROM */ #define FLUSH_BASE_PHYS 0x00000000 diff --git a/arch/arm/mach-s3c24xx/sleep-s3c2410.S b/arch/arm/mach-s3c24xx/sleep-s3c2410.S index c9b91223697c..875ba8911127 100644 --- a/arch/arm/mach-s3c24xx/sleep-s3c2410.S +++ b/arch/arm/mach-s3c24xx/sleep-s3c2410.S @@ -66,4 +66,4 @@ s3c2410_do_sleep: streq r8, [r5] @ SDRAM power-down config streq r9, [r6] @ CPU sleep 1: beq 1b - mov pc, r14 + ret lr diff --git a/arch/arm/mach-s3c24xx/sleep-s3c2412.S b/arch/arm/mach-s3c24xx/sleep-s3c2412.S index 5adaceb7da13..6bf5b4d8743c 100644 --- a/arch/arm/mach-s3c24xx/sleep-s3c2412.S +++ b/arch/arm/mach-s3c24xx/sleep-s3c2412.S @@ -65,4 +65,4 @@ s3c2412_sleep_enter1: strne r9, [r3] bne s3c2412_sleep_enter1 - mov pc, r14 + ret lr diff --git a/arch/arm/mach-s5pv210/include/mach/memory.h b/arch/arm/mach-s5pv210/include/mach/memory.h index 2d3cfa221d5f..d584fac9156b 100644 --- a/arch/arm/mach-s5pv210/include/mach/memory.h +++ b/arch/arm/mach-s5pv210/include/mach/memory.h @@ -13,8 +13,6 @@ #ifndef __ASM_ARCH_MEMORY_H #define __ASM_ARCH_MEMORY_H -#define PLAT_PHYS_OFFSET UL(0x20000000) - /* * Sparsemem support * Physical memory can be located from 0x20000000 to 0x7fffffff, diff --git a/arch/arm/mach-sa1100/include/mach/memory.h b/arch/arm/mach-sa1100/include/mach/memory.h index 12d376795abc..2054051eb797 100644 --- a/arch/arm/mach-sa1100/include/mach/memory.h +++ b/arch/arm/mach-sa1100/include/mach/memory.h @@ -10,11 +10,6 @@ #include <asm/sizes.h> /* - * Physical DRAM offset is 0xc0000000 on the SA1100 - */ -#define PLAT_PHYS_OFFSET UL(0xc0000000) - -/* * Because of the wide memory address space between physical RAM banks on the * SA1100, it's much convenient to use Linux's SparseMEM support to implement * our memory map representation. Assuming all memory nodes have equal access diff --git a/arch/arm/mach-shmobile/headsmp.S b/arch/arm/mach-shmobile/headsmp.S index e5be5c88644b..293007579b8e 100644 --- a/arch/arm/mach-shmobile/headsmp.S +++ b/arch/arm/mach-shmobile/headsmp.S @@ -12,6 +12,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <asm/assembler.h> #include <asm/memory.h> ENTRY(shmobile_invalidate_start) @@ -75,7 +76,7 @@ shmobile_smp_boot_next: shmobile_smp_boot_found: ldr r0, [r7, r1, lsl #2] - mov pc, r9 + ret r9 ENDPROC(shmobile_smp_boot) ENTRY(shmobile_smp_sleep) diff --git a/arch/arm/mach-tegra/sleep-tegra20.S b/arch/arm/mach-tegra/sleep-tegra20.S index aaaf3abd2688..be4bc5f853f5 100644 --- a/arch/arm/mach-tegra/sleep-tegra20.S +++ b/arch/arm/mach-tegra/sleep-tegra20.S @@ -78,7 +78,7 @@ ENTRY(tegra20_hotplug_shutdown) /* Put this CPU down */ cpu_id r0 bl tegra20_cpu_shutdown - mov pc, lr @ should never get here + ret lr @ should never get here ENDPROC(tegra20_hotplug_shutdown) /* @@ -96,7 +96,7 @@ ENDPROC(tegra20_hotplug_shutdown) */ ENTRY(tegra20_cpu_shutdown) cmp r0, #0 - moveq pc, lr @ must not be called for CPU 0 + reteq lr @ must not be called for CPU 0 mov32 r1, TEGRA_PMC_VIRT + PMC_SCRATCH41 mov r12, #CPU_RESETTABLE str r12, [r1] @@ -117,7 +117,7 @@ ENTRY(tegra20_cpu_shutdown) cpu_id r3 cmp r3, r0 beq . - mov pc, lr + ret lr ENDPROC(tegra20_cpu_shutdown) #endif @@ -164,7 +164,7 @@ ENTRY(tegra_pen_lock) cmpeq r12, r0 @ !turn == cpu? beq 1b @ while !turn == cpu && flag[!cpu] == 1 - mov pc, lr @ locked + ret lr @ locked ENDPROC(tegra_pen_lock) ENTRY(tegra_pen_unlock) @@ -176,7 +176,7 @@ ENTRY(tegra_pen_unlock) addne r2, r3, #PMC_SCRATCH39 mov r12, #0 str r12, [r2] - mov pc, lr + ret lr ENDPROC(tegra_pen_unlock) /* @@ -189,7 +189,7 @@ ENTRY(tegra20_cpu_clear_resettable) mov32 r1, TEGRA_PMC_VIRT + PMC_SCRATCH41 mov r12, #CPU_NOT_RESETTABLE str r12, [r1] - mov pc, lr + ret lr ENDPROC(tegra20_cpu_clear_resettable) /* @@ -202,7 +202,7 @@ ENTRY(tegra20_cpu_set_resettable_soon) mov32 r1, TEGRA_PMC_VIRT + PMC_SCRATCH41 mov r12, #CPU_RESETTABLE_SOON str r12, [r1] - mov pc, lr + ret lr ENDPROC(tegra20_cpu_set_resettable_soon) /* @@ -217,7 +217,7 @@ ENTRY(tegra20_cpu_is_resettable_soon) cmp r12, #CPU_RESETTABLE_SOON moveq r0, #1 movne r0, #0 - mov pc, lr + ret lr ENDPROC(tegra20_cpu_is_resettable_soon) /* @@ -239,7 +239,7 @@ ENTRY(tegra20_sleep_core_finish) mov32 r1, TEGRA_IRAM_LPx_RESUME_AREA add r0, r0, r1 - mov pc, r3 + ret r3 ENDPROC(tegra20_sleep_core_finish) /* @@ -402,7 +402,7 @@ exit_selfrefresh_loop: mov32 r0, TEGRA_PMC_BASE ldr r0, [r0, #PMC_SCRATCH41] - mov pc, r0 @ jump to tegra_resume + ret r0 @ jump to tegra_resume ENDPROC(tegra20_lp1_reset) /* @@ -455,7 +455,7 @@ tegra20_switch_cpu_to_clk32k: mov r0, #0 /* brust policy = 32KHz */ str r0, [r5, #CLK_RESET_SCLK_BURST] - mov pc, lr + ret lr /* * tegra20_enter_sleep @@ -535,7 +535,7 @@ padsave_done: adr r2, tegra20_sclk_save str r0, [r2] dsb - mov pc, lr + ret lr tegra20_sdram_pad_address: .word TEGRA_APB_MISC_BASE + APB_MISC_XM2CFGCPADCTRL diff --git a/arch/arm/mach-tegra/sleep-tegra30.S b/arch/arm/mach-tegra/sleep-tegra30.S index b16d4a57fa59..09cad9b071de 100644 --- a/arch/arm/mach-tegra/sleep-tegra30.S +++ b/arch/arm/mach-tegra/sleep-tegra30.S @@ -142,7 +142,7 @@ ENTRY(tegra30_hotplug_shutdown) /* Powergate this CPU */ mov r0, #TEGRA30_POWER_HOTPLUG_SHUTDOWN bl tegra30_cpu_shutdown - mov pc, lr @ should never get here + ret lr @ should never get here ENDPROC(tegra30_hotplug_shutdown) /* @@ -161,7 +161,7 @@ ENTRY(tegra30_cpu_shutdown) bne _no_cpu0_chk @ It's not Tegra30 cmp r3, #0 - moveq pc, lr @ Must never be called for CPU 0 + reteq lr @ Must never be called for CPU 0 _no_cpu0_chk: ldr r12, =TEGRA_FLOW_CTRL_VIRT @@ -266,7 +266,7 @@ ENTRY(tegra30_sleep_core_finish) mov32 r1, TEGRA_IRAM_LPx_RESUME_AREA add r0, r0, r1 - mov pc, r3 + ret r3 ENDPROC(tegra30_sleep_core_finish) /* @@ -285,7 +285,7 @@ ENTRY(tegra30_sleep_cpu_secondary_finish) mov r0, #0 @ power mode flags (!hotplug) bl tegra30_cpu_shutdown mov r0, #1 @ never return here - mov pc, r7 + ret r7 ENDPROC(tegra30_sleep_cpu_secondary_finish) /* @@ -529,7 +529,7 @@ __no_dual_emc_chanl: mov32 r0, TEGRA_PMC_BASE ldr r0, [r0, #PMC_SCRATCH41] - mov pc, r0 @ jump to tegra_resume + ret r0 @ jump to tegra_resume ENDPROC(tegra30_lp1_reset) .align L1_CACHE_SHIFT @@ -659,7 +659,7 @@ _no_pll_in_iddq: mov r0, #0 /* brust policy = 32KHz */ str r0, [r5, #CLK_RESET_SCLK_BURST] - mov pc, lr + ret lr /* * tegra30_enter_sleep @@ -819,7 +819,7 @@ pmc_io_dpd_skip: dsb - mov pc, lr + ret lr .ltorg /* dummy symbol for end of IRAM */ diff --git a/arch/arm/mach-tegra/sleep.S b/arch/arm/mach-tegra/sleep.S index 8d06213fbc47..f024a5109e8e 100644 --- a/arch/arm/mach-tegra/sleep.S +++ b/arch/arm/mach-tegra/sleep.S @@ -87,7 +87,7 @@ ENTRY(tegra_init_l2_for_a15) mcrne p15, 0x1, r0, c9, c0, 2 _exit_init_l2_a15: - mov pc, lr + ret lr ENDPROC(tegra_init_l2_for_a15) /* @@ -111,7 +111,7 @@ ENTRY(tegra_sleep_cpu_finish) add r3, r3, r0 mov r0, r1 - mov pc, r3 + ret r3 ENDPROC(tegra_sleep_cpu_finish) /* @@ -139,7 +139,7 @@ ENTRY(tegra_shut_off_mmu) moveq r3, #0 streq r3, [r2, #L2X0_CTRL] #endif - mov pc, r0 + ret r0 ENDPROC(tegra_shut_off_mmu) .popsection @@ -156,6 +156,6 @@ ENTRY(tegra_switch_cpu_to_pllp) str r0, [r5, #CLK_RESET_CCLK_BURST] mov r0, #0 str r0, [r5, #CLK_RESET_CCLK_DIVIDER] - mov pc, lr + ret lr ENDPROC(tegra_switch_cpu_to_pllp) #endif diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c index b743a0ae02ce..2fb78b4648cb 100644 --- a/arch/arm/mach-vexpress/tc2_pm.c +++ b/arch/arm/mach-vexpress/tc2_pm.c @@ -152,7 +152,7 @@ static void tc2_pm_down(u64 residency) if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) { arch_spin_unlock(&tc2_pm_lock); - if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A15) { + if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) { /* * On the Cortex-A15 we need to disable * L2 prefetching before flushing the cache. @@ -323,6 +323,21 @@ static void __naked tc2_pm_power_up_setup(unsigned int affinity_level) " b cci_enable_port_for_self "); } +static void __init tc2_cache_off(void) +{ + pr_info("TC2: disabling cache during MCPM loopback test\n"); + if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) { + /* disable L2 prefetching on the Cortex-A15 */ + asm volatile( + "mcr p15, 1, %0, c15, c0, 3 \n\t" + "isb \n\t" + "dsb " + : : "r" (0x400) ); + } + v7_exit_coherency_flush(all); + cci_disable_port_by_cpu(read_cpuid_mpidr()); +} + static int __init tc2_pm_init(void) { int ret, irq; @@ -370,6 +385,8 @@ static int __init tc2_pm_init(void) ret = mcpm_platform_register(&tc2_pm_power_ops); if (!ret) { mcpm_sync_init(tc2_pm_power_up_setup); + /* test if we can (re)enable the CCI on our own */ + BUG_ON(mcpm_loopback(tc2_cache_off) != 0); pr_info("TC2 power management initialized\n"); } return ret; diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index c348eaee7ee2..577039a3f6e5 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -669,7 +669,7 @@ config ARM_VIRT_EXT details. config SWP_EMULATE - bool "Emulate SWP/SWPB instructions" + bool "Emulate SWP/SWPB instructions" if !SMP depends on CPU_V7 default y if SMP select HAVE_PROC_CPU if PROC_FS @@ -907,8 +907,8 @@ config PL310_ERRATA_588369 They are architecturally defined to behave as the execution of a clean operation followed immediately by an invalidate operation, both performing to the same memory location. This functionality - is not correctly implemented in PL310 as clean lines are not - invalidated as a result of these operations. + is not correctly implemented in PL310 prior to r2p0 (fixed in r2p0) + as clean lines are not invalidated as a result of these operations. config PL310_ERRATA_727915 bool "PL310 errata: Background Clean & Invalidate by Way operation can cause data corruption" @@ -918,7 +918,8 @@ config PL310_ERRATA_727915 PL310 can handle normal accesses while it is in progress. Under very rare circumstances, due to this erratum, write data can be lost when PL310 treats a cacheable write transaction during a Clean & - Invalidate by Way operation. + Invalidate by Way operation. Revisions prior to r3p1 are affected by + this errata (fixed in r3p1). config PL310_ERRATA_753970 bool "PL310 errata: cache sync operation may be faulty" diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index b8cb1a2688a0..0c1ab49e5f7b 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -76,6 +76,7 @@ static unsigned long ai_user; static unsigned long ai_sys; +static void *ai_sys_last_pc; static unsigned long ai_skipped; static unsigned long ai_half; static unsigned long ai_word; @@ -130,7 +131,7 @@ static const char *usermode_action[] = { static int alignment_proc_show(struct seq_file *m, void *v) { seq_printf(m, "User:\t\t%lu\n", ai_user); - seq_printf(m, "System:\t\t%lu\n", ai_sys); + seq_printf(m, "System:\t\t%lu (%pF)\n", ai_sys, ai_sys_last_pc); seq_printf(m, "Skipped:\t%lu\n", ai_skipped); seq_printf(m, "Half:\t\t%lu\n", ai_half); seq_printf(m, "Word:\t\t%lu\n", ai_word); @@ -794,6 +795,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) goto user; ai_sys += 1; + ai_sys_last_pc = (void *)instruction_pointer(regs); fixup: diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S index e505befe51b5..2f0c58836ae7 100644 --- a/arch/arm/mm/cache-fa.S +++ b/arch/arm/mm/cache-fa.S @@ -15,6 +15,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <asm/assembler.h> #include <asm/memory.h> #include <asm/page.h> @@ -45,7 +46,7 @@ ENTRY(fa_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mov pc, lr + ret lr ENDPROC(fa_flush_icache_all) /* @@ -71,7 +72,7 @@ __flush_whole_cache: mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -99,7 +100,7 @@ ENTRY(fa_flush_user_cache_range) mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB mcrne p15, 0, ip, c7, c10, 4 @ data write barrier mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -135,7 +136,7 @@ ENTRY(fa_coherent_user_range) mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB mcr p15, 0, r0, c7, c10, 4 @ drain write buffer mcr p15, 0, r0, c7, c5, 4 @ prefetch flush - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -155,7 +156,7 @@ ENTRY(fa_flush_kern_dcache_area) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -181,7 +182,7 @@ fa_dma_inv_range: blo 1b mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -199,7 +200,7 @@ fa_dma_clean_range: blo 1b mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - mov pc, lr + ret lr /* * dma_flush_range(start,end) @@ -214,7 +215,7 @@ ENTRY(fa_dma_flush_range) blo 1b mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -237,7 +238,7 @@ ENDPROC(fa_dma_map_area) * - dir - DMA direction */ ENTRY(fa_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(fa_dma_unmap_area) .globl fa_flush_kern_cache_louis diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 7c3fb41a462e..5f2c988a06ac 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -665,7 +665,7 @@ static int l2c310_cpu_enable_flz(struct notifier_block *nb, unsigned long act, v static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock) { unsigned rev = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_RTL_MASK; - bool cortex_a9 = read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9; + bool cortex_a9 = read_cpuid_part() == ARM_CPU_PART_CORTEX_A9; if (rev >= L310_CACHE_ID_RTL_R2P0) { if (cortex_a9) { diff --git a/arch/arm/mm/cache-nop.S b/arch/arm/mm/cache-nop.S index 8e12ddca0031..f1cc9861031f 100644 --- a/arch/arm/mm/cache-nop.S +++ b/arch/arm/mm/cache-nop.S @@ -5,11 +5,12 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <asm/assembler.h> #include "proc-macros.S" ENTRY(nop_flush_icache_all) - mov pc, lr + ret lr ENDPROC(nop_flush_icache_all) .globl nop_flush_kern_cache_all @@ -29,7 +30,7 @@ ENDPROC(nop_flush_icache_all) ENTRY(nop_coherent_user_range) mov r0, 0 - mov pc, lr + ret lr ENDPROC(nop_coherent_user_range) .globl nop_flush_kern_dcache_area diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S index a7ba68f59f0c..91e3adf155cb 100644 --- a/arch/arm/mm/cache-v4.S +++ b/arch/arm/mm/cache-v4.S @@ -9,6 +9,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <asm/assembler.h> #include <asm/page.h> #include "proc-macros.S" @@ -18,7 +19,7 @@ * Unconditionally clean and invalidate the entire icache. */ ENTRY(v4_flush_icache_all) - mov pc, lr + ret lr ENDPROC(v4_flush_icache_all) /* @@ -40,7 +41,7 @@ ENTRY(v4_flush_kern_cache_all) #ifdef CONFIG_CPU_CP15 mov r0, #0 mcr p15, 0, r0, c7, c7, 0 @ flush ID cache - mov pc, lr + ret lr #else /* FALLTHROUGH */ #endif @@ -59,7 +60,7 @@ ENTRY(v4_flush_user_cache_range) #ifdef CONFIG_CPU_CP15 mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ flush ID cache - mov pc, lr + ret lr #else /* FALLTHROUGH */ #endif @@ -89,7 +90,7 @@ ENTRY(v4_coherent_kern_range) */ ENTRY(v4_coherent_user_range) mov r0, #0 - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -116,7 +117,7 @@ ENTRY(v4_dma_flush_range) mov r0, #0 mcr p15, 0, r0, c7, c7, 0 @ flush ID cache #endif - mov pc, lr + ret lr /* * dma_unmap_area(start, size, dir) @@ -136,7 +137,7 @@ ENTRY(v4_dma_unmap_area) * - dir - DMA direction */ ENTRY(v4_dma_map_area) - mov pc, lr + ret lr ENDPROC(v4_dma_unmap_area) ENDPROC(v4_dma_map_area) diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S index cd4945321407..2522f8c8fbb1 100644 --- a/arch/arm/mm/cache-v4wb.S +++ b/arch/arm/mm/cache-v4wb.S @@ -9,6 +9,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <asm/assembler.h> #include <asm/memory.h> #include <asm/page.h> #include "proc-macros.S" @@ -58,7 +59,7 @@ flush_base: ENTRY(v4wb_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mov pc, lr + ret lr ENDPROC(v4wb_flush_icache_all) /* @@ -94,7 +95,7 @@ __flush_whole_cache: blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain write buffer - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -122,7 +123,7 @@ ENTRY(v4wb_flush_user_cache_range) blo 1b tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -170,7 +171,7 @@ ENTRY(v4wb_coherent_user_range) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* @@ -195,7 +196,7 @@ v4wb_dma_inv_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -212,7 +213,7 @@ v4wb_dma_clean_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -248,7 +249,7 @@ ENDPROC(v4wb_dma_map_area) * - dir - DMA direction */ ENTRY(v4wb_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(v4wb_dma_unmap_area) .globl v4wb_flush_kern_cache_louis diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S index 11e5e5838bc5..a0982ce49007 100644 --- a/arch/arm/mm/cache-v4wt.S +++ b/arch/arm/mm/cache-v4wt.S @@ -13,6 +13,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <asm/assembler.h> #include <asm/page.h> #include "proc-macros.S" @@ -48,7 +49,7 @@ ENTRY(v4wt_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mov pc, lr + ret lr ENDPROC(v4wt_flush_icache_all) /* @@ -71,7 +72,7 @@ __flush_whole_cache: tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -94,7 +95,7 @@ ENTRY(v4wt_flush_user_cache_range) add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -126,7 +127,7 @@ ENTRY(v4wt_coherent_user_range) cmp r0, r1 blo 1b mov r0, #0 - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -160,7 +161,7 @@ v4wt_dma_inv_range: add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -192,7 +193,7 @@ ENTRY(v4wt_dma_unmap_area) * - dir - DMA direction */ ENTRY(v4wt_dma_map_area) - mov pc, lr + ret lr ENDPROC(v4wt_dma_unmap_area) ENDPROC(v4wt_dma_map_area) diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S index d8fd4d4bd3d4..24659952c278 100644 --- a/arch/arm/mm/cache-v6.S +++ b/arch/arm/mm/cache-v6.S @@ -51,7 +51,7 @@ ENTRY(v6_flush_icache_all) #else mcr p15, 0, r0, c7, c5, 0 @ invalidate I-cache #endif - mov pc, lr + ret lr ENDPROC(v6_flush_icache_all) /* @@ -73,7 +73,7 @@ ENTRY(v6_flush_kern_cache_all) #else mcr p15, 0, r0, c7, c15, 0 @ Cache clean+invalidate #endif - mov pc, lr + ret lr /* * v6_flush_cache_all() @@ -98,7 +98,7 @@ ENTRY(v6_flush_user_cache_all) * - we have a VIPT cache. */ ENTRY(v6_flush_user_cache_range) - mov pc, lr + ret lr /* * v6_coherent_kern_range(start,end) @@ -150,7 +150,7 @@ ENTRY(v6_coherent_user_range) #else mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB #endif - mov pc, lr + ret lr /* * Fault handling for the cache operation above. If the virtual address in r0 @@ -158,7 +158,7 @@ ENTRY(v6_coherent_user_range) */ 9001: mov r0, #-EFAULT - mov pc, lr + ret lr UNWIND(.fnend ) ENDPROC(v6_coherent_user_range) ENDPROC(v6_coherent_kern_range) @@ -188,7 +188,7 @@ ENTRY(v6_flush_kern_dcache_area) mov r0, #0 mcr p15, 0, r0, c7, c10, 4 #endif - mov pc, lr + ret lr /* @@ -239,7 +239,7 @@ v6_dma_inv_range: blo 1b mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - mov pc, lr + ret lr /* * v6_dma_clean_range(start,end) @@ -262,7 +262,7 @@ v6_dma_clean_range: blo 1b mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - mov pc, lr + ret lr /* * v6_dma_flush_range(start,end) @@ -290,7 +290,7 @@ ENTRY(v6_dma_flush_range) blo 1b mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -323,7 +323,7 @@ ENTRY(v6_dma_unmap_area) teq r2, #DMA_TO_DEVICE bne v6_dma_inv_range #endif - mov pc, lr + ret lr ENDPROC(v6_dma_unmap_area) .globl v6_flush_kern_cache_louis diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 615c99e38ba1..b966656d2c2d 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -61,7 +61,7 @@ ENTRY(v7_invalidate_l1) bgt 1b dsb st isb - mov pc, lr + ret lr ENDPROC(v7_invalidate_l1) /* @@ -76,7 +76,7 @@ ENTRY(v7_flush_icache_all) mov r0, #0 ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate - mov pc, lr + ret lr ENDPROC(v7_flush_icache_all) /* @@ -94,7 +94,7 @@ ENTRY(v7_flush_dcache_louis) ALT_UP(ands r3, r0, #(7 << 27)) @ extract LoUU from clidr #ifdef CONFIG_ARM_ERRATA_643719 ALT_SMP(mrceq p15, 0, r2, c0, c0, 0) @ read main ID register - ALT_UP(moveq pc, lr) @ LoUU is zero, so nothing to do + ALT_UP(reteq lr) @ LoUU is zero, so nothing to do ldreq r1, =0x410fc090 @ ID of ARM Cortex A9 r0p? biceq r2, r2, #0x0000000f @ clear minor revision number teqeq r2, r1 @ test for errata affected core and if so... @@ -102,7 +102,7 @@ ENTRY(v7_flush_dcache_louis) #endif ALT_SMP(mov r3, r3, lsr #20) @ r3 = LoUIS * 2 ALT_UP(mov r3, r3, lsr #26) @ r3 = LoUU * 2 - moveq pc, lr @ return if level == 0 + reteq lr @ return if level == 0 mov r10, #0 @ r10 (starting level) = 0 b flush_levels @ start flushing cache levels ENDPROC(v7_flush_dcache_louis) @@ -168,7 +168,7 @@ finished: mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr dsb st isb - mov pc, lr + ret lr ENDPROC(v7_flush_dcache_all) /* @@ -191,7 +191,7 @@ ENTRY(v7_flush_kern_cache_all) ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) - mov pc, lr + ret lr ENDPROC(v7_flush_kern_cache_all) /* @@ -209,7 +209,7 @@ ENTRY(v7_flush_kern_cache_louis) ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) - mov pc, lr + ret lr ENDPROC(v7_flush_kern_cache_louis) /* @@ -235,7 +235,7 @@ ENTRY(v7_flush_user_cache_all) * - we have a VIPT cache. */ ENTRY(v7_flush_user_cache_range) - mov pc, lr + ret lr ENDPROC(v7_flush_user_cache_all) ENDPROC(v7_flush_user_cache_range) @@ -296,7 +296,7 @@ ENTRY(v7_coherent_user_range) ALT_UP(mcr p15, 0, r0, c7, c5, 6) @ invalidate BTB dsb ishst isb - mov pc, lr + ret lr /* * Fault handling for the cache operation above. If the virtual address in r0 @@ -307,7 +307,7 @@ ENTRY(v7_coherent_user_range) dsb #endif mov r0, #-EFAULT - mov pc, lr + ret lr UNWIND(.fnend ) ENDPROC(v7_coherent_kern_range) ENDPROC(v7_coherent_user_range) @@ -336,7 +336,7 @@ ENTRY(v7_flush_kern_dcache_area) cmp r0, r1 blo 1b dsb st - mov pc, lr + ret lr ENDPROC(v7_flush_kern_dcache_area) /* @@ -369,7 +369,7 @@ v7_dma_inv_range: cmp r0, r1 blo 1b dsb st - mov pc, lr + ret lr ENDPROC(v7_dma_inv_range) /* @@ -391,7 +391,7 @@ v7_dma_clean_range: cmp r0, r1 blo 1b dsb st - mov pc, lr + ret lr ENDPROC(v7_dma_clean_range) /* @@ -413,7 +413,7 @@ ENTRY(v7_dma_flush_range) cmp r0, r1 blo 1b dsb st - mov pc, lr + ret lr ENDPROC(v7_dma_flush_range) /* @@ -439,7 +439,7 @@ ENTRY(v7_dma_unmap_area) add r1, r1, r0 teq r2, #DMA_TO_DEVICE bne v7_dma_inv_range - mov pc, lr + ret lr ENDPROC(v7_dma_unmap_area) __INITDATA diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c index c508f41a43bc..59424937e52b 100644 --- a/arch/arm/mm/dump.c +++ b/arch/arm/mm/dump.c @@ -126,8 +126,8 @@ static const struct prot_bits section_bits[] = { .val = PMD_SECT_USER, .set = "USR", }, { - .mask = PMD_SECT_RDONLY, - .val = PMD_SECT_RDONLY, + .mask = L_PMD_SECT_RDONLY, + .val = L_PMD_SECT_RDONLY, .set = "ro", .clear = "RW", #elif __LINUX_ARM_ARCH__ >= 6 diff --git a/arch/arm/mm/l2c-l2x0-resume.S b/arch/arm/mm/l2c-l2x0-resume.S index 99b05f21a59a..fda415e4ca8f 100644 --- a/arch/arm/mm/l2c-l2x0-resume.S +++ b/arch/arm/mm/l2c-l2x0-resume.S @@ -6,6 +6,7 @@ * This code can only be used to if you are running in the secure world. */ #include <linux/linkage.h> +#include <asm/assembler.h> #include <asm/hardware/cache-l2x0.h> .text @@ -27,7 +28,7 @@ ENTRY(l2c310_early_resume) @ Check that the address has been initialised teq r1, #0 - moveq pc, lr + reteq lr @ The prefetch and power control registers are revision dependent @ and can be written whether or not the L2 cache is enabled @@ -41,7 +42,7 @@ ENTRY(l2c310_early_resume) @ Don't setup the L2 cache if it is already enabled ldr r0, [r1, #L2X0_CTRL] tst r0, #L2X0_CTRL_EN - movne pc, lr + retne lr str r3, [r1, #L310_TAG_LATENCY_CTRL] str r4, [r1, #L310_DATA_LATENCY_CTRL] @@ -51,7 +52,7 @@ ENTRY(l2c310_early_resume) str r2, [r1, #L2X0_AUX_CTRL] mov r9, #L2X0_CTRL_EN str r9, [r1, #L2X0_CTRL] - mov pc, lr + ret lr ENDPROC(l2c310_early_resume) .align diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 6e3ba8d112a2..8348ed6b2efe 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1434,23 +1434,64 @@ void __init early_paging_init(const struct machine_desc *mdesc, dsb(ishst); isb(); - /* remap level 1 table */ + /* + * FIXME: This code is not architecturally compliant: we modify + * the mappings in-place, indeed while they are in use by this + * very same code. This may lead to unpredictable behaviour of + * the CPU. + * + * Even modifying the mappings in a separate page table does + * not resolve this. + * + * The architecture strongly recommends that when a mapping is + * changed, that it is changed by first going via an invalid + * mapping and back to the new mapping. This is to ensure that + * no TLB conflicts (caused by the TLB having more than one TLB + * entry match a translation) can occur. However, doing that + * here will result in unmapping the code we are running. + */ + pr_warn("WARNING: unsafe modification of in-place page tables - tainting kernel\n"); + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); + + /* + * Remap level 1 table. This changes the physical addresses + * used to refer to the level 2 page tables to the high + * physical address alias, leaving everything else the same. + */ for (i = 0; i < PTRS_PER_PGD; pud0++, i++) { set_pud(pud0, __pud(__pa(pmd0) | PMD_TYPE_TABLE | L_PGD_SWAPPER)); pmd0 += PTRS_PER_PMD; } - /* remap pmds for kernel mapping */ + /* + * Remap the level 2 table, pointing the mappings at the high + * physical address alias of these pages. + */ phys = __pa(map_start); do { *pmdk++ = __pmd(phys | pmdprot); phys += PMD_SIZE; } while (phys < map_end); + /* + * Ensure that the above updates are flushed out of the cache. + * This is not strictly correct; on a system where the caches + * are coherent with each other, but the MMU page table walks + * may not be coherent, flush_cache_all() may be a no-op, and + * this will fail. + */ flush_cache_all(); + + /* + * Re-write the TTBR values to point them at the high physical + * alias of the page tables. We expect __va() will work on + * cpu_get_pgd(), which returns the value of TTBR0. + */ cpu_switch_mm(pgd0, &init_mm); cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET); + + /* Finally flush any stale TLB values. */ local_flush_bp_all(); local_flush_tlb_all(); } diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S index d1a2d05971e0..86ee5d47ce3c 100644 --- a/arch/arm/mm/proc-arm1020.S +++ b/arch/arm/mm/proc-arm1020.S @@ -73,7 +73,7 @@ * cpu_arm1020_proc_init() */ ENTRY(cpu_arm1020_proc_init) - mov pc, lr + ret lr /* * cpu_arm1020_proc_fin() @@ -83,7 +83,7 @@ ENTRY(cpu_arm1020_proc_fin) bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_arm1020_reset(loc) @@ -107,7 +107,7 @@ ENTRY(cpu_arm1020_reset) bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_arm1020_reset) .popsection @@ -117,7 +117,7 @@ ENDPROC(cpu_arm1020_reset) .align 5 ENTRY(cpu_arm1020_do_idle) mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - mov pc, lr + ret lr /* ================================= CACHE ================================ */ @@ -133,7 +133,7 @@ ENTRY(arm1020_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache #endif - mov pc, lr + ret lr ENDPROC(arm1020_flush_icache_all) /* @@ -169,7 +169,7 @@ __flush_whole_cache: mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache #endif mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -200,7 +200,7 @@ ENTRY(arm1020_flush_user_cache_range) mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache #endif mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -242,7 +242,7 @@ ENTRY(arm1020_coherent_user_range) blo 1b mcr p15, 0, ip, c7, c10, 4 @ drain WB mov r0, #0 - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -264,7 +264,7 @@ ENTRY(arm1020_flush_kern_dcache_area) blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -297,7 +297,7 @@ arm1020_dma_inv_range: blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -320,7 +320,7 @@ arm1020_dma_clean_range: blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -342,7 +342,7 @@ ENTRY(arm1020_dma_flush_range) blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -365,7 +365,7 @@ ENDPROC(arm1020_dma_map_area) * - dir - DMA direction */ ENTRY(arm1020_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(arm1020_dma_unmap_area) .globl arm1020_flush_kern_cache_louis @@ -384,7 +384,7 @@ ENTRY(cpu_arm1020_dcache_clean_area) subs r1, r1, #CACHE_DLINESIZE bhi 1b #endif - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -423,7 +423,7 @@ ENTRY(cpu_arm1020_switch_mm) mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs #endif /* CONFIG_MMU */ - mov pc, lr + ret lr /* * cpu_arm1020_set_pte(ptep, pte) @@ -441,7 +441,7 @@ ENTRY(cpu_arm1020_set_pte_ext) #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif /* CONFIG_MMU */ - mov pc, lr + ret lr .type __arm1020_setup, #function __arm1020_setup: @@ -460,7 +460,7 @@ __arm1020_setup: #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN orr r0, r0, #0x4000 @ .R.. .... .... .... #endif - mov pc, lr + ret lr .size __arm1020_setup, . - __arm1020_setup /* diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S index 9d89405c3d03..a6331d78601f 100644 --- a/arch/arm/mm/proc-arm1020e.S +++ b/arch/arm/mm/proc-arm1020e.S @@ -73,7 +73,7 @@ * cpu_arm1020e_proc_init() */ ENTRY(cpu_arm1020e_proc_init) - mov pc, lr + ret lr /* * cpu_arm1020e_proc_fin() @@ -83,7 +83,7 @@ ENTRY(cpu_arm1020e_proc_fin) bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_arm1020e_reset(loc) @@ -107,7 +107,7 @@ ENTRY(cpu_arm1020e_reset) bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_arm1020e_reset) .popsection @@ -117,7 +117,7 @@ ENDPROC(cpu_arm1020e_reset) .align 5 ENTRY(cpu_arm1020e_do_idle) mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - mov pc, lr + ret lr /* ================================= CACHE ================================ */ @@ -133,7 +133,7 @@ ENTRY(arm1020e_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache #endif - mov pc, lr + ret lr ENDPROC(arm1020e_flush_icache_all) /* @@ -168,7 +168,7 @@ __flush_whole_cache: mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache #endif mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -197,7 +197,7 @@ ENTRY(arm1020e_flush_user_cache_range) mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache #endif mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -236,7 +236,7 @@ ENTRY(arm1020e_coherent_user_range) blo 1b mcr p15, 0, ip, c7, c10, 4 @ drain WB mov r0, #0 - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -257,7 +257,7 @@ ENTRY(arm1020e_flush_kern_dcache_area) blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -286,7 +286,7 @@ arm1020e_dma_inv_range: blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -308,7 +308,7 @@ arm1020e_dma_clean_range: blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -328,7 +328,7 @@ ENTRY(arm1020e_dma_flush_range) blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -351,7 +351,7 @@ ENDPROC(arm1020e_dma_map_area) * - dir - DMA direction */ ENTRY(arm1020e_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(arm1020e_dma_unmap_area) .globl arm1020e_flush_kern_cache_louis @@ -369,7 +369,7 @@ ENTRY(cpu_arm1020e_dcache_clean_area) subs r1, r1, #CACHE_DLINESIZE bhi 1b #endif - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -407,7 +407,7 @@ ENTRY(cpu_arm1020e_switch_mm) mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs #endif - mov pc, lr + ret lr /* * cpu_arm1020e_set_pte(ptep, pte) @@ -423,7 +423,7 @@ ENTRY(cpu_arm1020e_set_pte_ext) mcr p15, 0, r0, c7, c10, 1 @ clean D entry #endif #endif /* CONFIG_MMU */ - mov pc, lr + ret lr .type __arm1020e_setup, #function __arm1020e_setup: @@ -441,7 +441,7 @@ __arm1020e_setup: #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN orr r0, r0, #0x4000 @ .R.. .... .... .... #endif - mov pc, lr + ret lr .size __arm1020e_setup, . - __arm1020e_setup /* diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S index 6f01a0ae3b30..a126b7a59928 100644 --- a/arch/arm/mm/proc-arm1022.S +++ b/arch/arm/mm/proc-arm1022.S @@ -62,7 +62,7 @@ * cpu_arm1022_proc_init() */ ENTRY(cpu_arm1022_proc_init) - mov pc, lr + ret lr /* * cpu_arm1022_proc_fin() @@ -72,7 +72,7 @@ ENTRY(cpu_arm1022_proc_fin) bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_arm1022_reset(loc) @@ -96,7 +96,7 @@ ENTRY(cpu_arm1022_reset) bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_arm1022_reset) .popsection @@ -106,7 +106,7 @@ ENDPROC(cpu_arm1022_reset) .align 5 ENTRY(cpu_arm1022_do_idle) mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - mov pc, lr + ret lr /* ================================= CACHE ================================ */ @@ -122,7 +122,7 @@ ENTRY(arm1022_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache #endif - mov pc, lr + ret lr ENDPROC(arm1022_flush_icache_all) /* @@ -156,7 +156,7 @@ __flush_whole_cache: mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache #endif mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -185,7 +185,7 @@ ENTRY(arm1022_flush_user_cache_range) mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache #endif mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -225,7 +225,7 @@ ENTRY(arm1022_coherent_user_range) blo 1b mcr p15, 0, ip, c7, c10, 4 @ drain WB mov r0, #0 - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -246,7 +246,7 @@ ENTRY(arm1022_flush_kern_dcache_area) blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -275,7 +275,7 @@ arm1022_dma_inv_range: blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -297,7 +297,7 @@ arm1022_dma_clean_range: blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -317,7 +317,7 @@ ENTRY(arm1022_dma_flush_range) blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -340,7 +340,7 @@ ENDPROC(arm1022_dma_map_area) * - dir - DMA direction */ ENTRY(arm1022_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(arm1022_dma_unmap_area) .globl arm1022_flush_kern_cache_louis @@ -358,7 +358,7 @@ ENTRY(cpu_arm1022_dcache_clean_area) subs r1, r1, #CACHE_DLINESIZE bhi 1b #endif - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -389,7 +389,7 @@ ENTRY(cpu_arm1022_switch_mm) mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs #endif - mov pc, lr + ret lr /* * cpu_arm1022_set_pte_ext(ptep, pte, ext) @@ -405,7 +405,7 @@ ENTRY(cpu_arm1022_set_pte_ext) mcr p15, 0, r0, c7, c10, 1 @ clean D entry #endif #endif /* CONFIG_MMU */ - mov pc, lr + ret lr .type __arm1022_setup, #function __arm1022_setup: @@ -423,7 +423,7 @@ __arm1022_setup: #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN orr r0, r0, #0x4000 @ .R.............. #endif - mov pc, lr + ret lr .size __arm1022_setup, . - __arm1022_setup /* diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S index 4799a24b43e6..fc294067e977 100644 --- a/arch/arm/mm/proc-arm1026.S +++ b/arch/arm/mm/proc-arm1026.S @@ -62,7 +62,7 @@ * cpu_arm1026_proc_init() */ ENTRY(cpu_arm1026_proc_init) - mov pc, lr + ret lr /* * cpu_arm1026_proc_fin() @@ -72,7 +72,7 @@ ENTRY(cpu_arm1026_proc_fin) bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_arm1026_reset(loc) @@ -96,7 +96,7 @@ ENTRY(cpu_arm1026_reset) bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_arm1026_reset) .popsection @@ -106,7 +106,7 @@ ENDPROC(cpu_arm1026_reset) .align 5 ENTRY(cpu_arm1026_do_idle) mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - mov pc, lr + ret lr /* ================================= CACHE ================================ */ @@ -122,7 +122,7 @@ ENTRY(arm1026_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache #endif - mov pc, lr + ret lr ENDPROC(arm1026_flush_icache_all) /* @@ -151,7 +151,7 @@ __flush_whole_cache: mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache #endif mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -180,7 +180,7 @@ ENTRY(arm1026_flush_user_cache_range) mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache #endif mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -219,7 +219,7 @@ ENTRY(arm1026_coherent_user_range) blo 1b mcr p15, 0, ip, c7, c10, 4 @ drain WB mov r0, #0 - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -240,7 +240,7 @@ ENTRY(arm1026_flush_kern_dcache_area) blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -269,7 +269,7 @@ arm1026_dma_inv_range: blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -291,7 +291,7 @@ arm1026_dma_clean_range: blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -311,7 +311,7 @@ ENTRY(arm1026_dma_flush_range) blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -334,7 +334,7 @@ ENDPROC(arm1026_dma_map_area) * - dir - DMA direction */ ENTRY(arm1026_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(arm1026_dma_unmap_area) .globl arm1026_flush_kern_cache_louis @@ -352,7 +352,7 @@ ENTRY(cpu_arm1026_dcache_clean_area) subs r1, r1, #CACHE_DLINESIZE bhi 1b #endif - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -378,7 +378,7 @@ ENTRY(cpu_arm1026_switch_mm) mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs #endif - mov pc, lr + ret lr /* * cpu_arm1026_set_pte_ext(ptep, pte, ext) @@ -394,7 +394,7 @@ ENTRY(cpu_arm1026_set_pte_ext) mcr p15, 0, r0, c7, c10, 1 @ clean D entry #endif #endif /* CONFIG_MMU */ - mov pc, lr + ret lr .type __arm1026_setup, #function __arm1026_setup: @@ -417,7 +417,7 @@ __arm1026_setup: #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN orr r0, r0, #0x4000 @ .R.. .... .... .... #endif - mov pc, lr + ret lr .size __arm1026_setup, . - __arm1026_setup /* diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S index d42c37f9f5bc..2baa66b3ac9b 100644 --- a/arch/arm/mm/proc-arm720.S +++ b/arch/arm/mm/proc-arm720.S @@ -51,14 +51,14 @@ */ ENTRY(cpu_arm720_dcache_clean_area) ENTRY(cpu_arm720_proc_init) - mov pc, lr + ret lr ENTRY(cpu_arm720_proc_fin) mrc p15, 0, r0, c1, c0, 0 bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * Function: arm720_proc_do_idle(void) @@ -66,7 +66,7 @@ ENTRY(cpu_arm720_proc_fin) * Purpose : put the processor in proper idle mode */ ENTRY(cpu_arm720_do_idle) - mov pc, lr + ret lr /* * Function: arm720_switch_mm(unsigned long pgd_phys) @@ -81,7 +81,7 @@ ENTRY(cpu_arm720_switch_mm) mcr p15, 0, r0, c2, c0, 0 @ update page table ptr mcr p15, 0, r1, c8, c7, 0 @ flush TLB (v4) #endif - mov pc, lr + ret lr /* * Function: arm720_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext) @@ -94,7 +94,7 @@ ENTRY(cpu_arm720_set_pte_ext) #ifdef CONFIG_MMU armv3_set_pte_ext wc_disable=0 #endif - mov pc, lr + ret lr /* * Function: arm720_reset @@ -112,7 +112,7 @@ ENTRY(cpu_arm720_reset) bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x2100 @ ..v....s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_arm720_reset) .popsection @@ -128,7 +128,7 @@ __arm710_setup: bic r0, r0, r5 ldr r5, arm710_cr1_set orr r0, r0, r5 - mov pc, lr @ __ret (head.S) + ret lr @ __ret (head.S) .size __arm710_setup, . - __arm710_setup /* @@ -156,7 +156,7 @@ __arm720_setup: mrc p15, 0, r0, c1, c0 @ get control register bic r0, r0, r5 orr r0, r0, r6 - mov pc, lr @ __ret (head.S) + ret lr @ __ret (head.S) .size __arm720_setup, . - __arm720_setup /* diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S index 9b0ae90cbf17..ac1ea6b3bce4 100644 --- a/arch/arm/mm/proc-arm740.S +++ b/arch/arm/mm/proc-arm740.S @@ -32,7 +32,7 @@ ENTRY(cpu_arm740_proc_init) ENTRY(cpu_arm740_do_idle) ENTRY(cpu_arm740_dcache_clean_area) ENTRY(cpu_arm740_switch_mm) - mov pc, lr + ret lr /* * cpu_arm740_proc_fin() @@ -42,7 +42,7 @@ ENTRY(cpu_arm740_proc_fin) bic r0, r0, #0x3f000000 @ bank/f/lock/s bic r0, r0, #0x0000000c @ w-buffer/cache mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_arm740_reset(loc) @@ -56,7 +56,7 @@ ENTRY(cpu_arm740_reset) mrc p15, 0, ip, c1, c0, 0 @ get ctrl register bic ip, ip, #0x0000000c @ ............wc.. mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_arm740_reset) .popsection @@ -115,7 +115,7 @@ __arm740_setup: @ need some benchmark orr r0, r0, #0x0000000d @ MPU/Cache/WB - mov pc, lr + ret lr .size __arm740_setup, . - __arm740_setup diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S index f6cc3f63ce39..bf6ba4bc30ff 100644 --- a/arch/arm/mm/proc-arm7tdmi.S +++ b/arch/arm/mm/proc-arm7tdmi.S @@ -32,13 +32,13 @@ ENTRY(cpu_arm7tdmi_proc_init) ENTRY(cpu_arm7tdmi_do_idle) ENTRY(cpu_arm7tdmi_dcache_clean_area) ENTRY(cpu_arm7tdmi_switch_mm) - mov pc, lr + ret lr /* * cpu_arm7tdmi_proc_fin() */ ENTRY(cpu_arm7tdmi_proc_fin) - mov pc, lr + ret lr /* * Function: cpu_arm7tdmi_reset(loc) @@ -47,13 +47,13 @@ ENTRY(cpu_arm7tdmi_proc_fin) */ .pushsection .idmap.text, "ax" ENTRY(cpu_arm7tdmi_reset) - mov pc, r0 + ret r0 ENDPROC(cpu_arm7tdmi_reset) .popsection .type __arm7tdmi_setup, #function __arm7tdmi_setup: - mov pc, lr + ret lr .size __arm7tdmi_setup, . - __arm7tdmi_setup __INITDATA diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S index 549557df6d57..22bf8dde4f84 100644 --- a/arch/arm/mm/proc-arm920.S +++ b/arch/arm/mm/proc-arm920.S @@ -63,7 +63,7 @@ * cpu_arm920_proc_init() */ ENTRY(cpu_arm920_proc_init) - mov pc, lr + ret lr /* * cpu_arm920_proc_fin() @@ -73,7 +73,7 @@ ENTRY(cpu_arm920_proc_fin) bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_arm920_reset(loc) @@ -97,7 +97,7 @@ ENTRY(cpu_arm920_reset) bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_arm920_reset) .popsection @@ -107,7 +107,7 @@ ENDPROC(cpu_arm920_reset) .align 5 ENTRY(cpu_arm920_do_idle) mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - mov pc, lr + ret lr #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH @@ -120,7 +120,7 @@ ENTRY(cpu_arm920_do_idle) ENTRY(arm920_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mov pc, lr + ret lr ENDPROC(arm920_flush_icache_all) /* @@ -151,7 +151,7 @@ __flush_whole_cache: tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -177,7 +177,7 @@ ENTRY(arm920_flush_user_cache_range) blo 1b tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -211,7 +211,7 @@ ENTRY(arm920_coherent_user_range) blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB mov r0, #0 - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -231,7 +231,7 @@ ENTRY(arm920_flush_kern_dcache_area) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -257,7 +257,7 @@ arm920_dma_inv_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -276,7 +276,7 @@ arm920_dma_clean_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -293,7 +293,7 @@ ENTRY(arm920_dma_flush_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -316,7 +316,7 @@ ENDPROC(arm920_dma_map_area) * - dir - DMA direction */ ENTRY(arm920_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(arm920_dma_unmap_area) .globl arm920_flush_kern_cache_louis @@ -332,7 +332,7 @@ ENTRY(cpu_arm920_dcache_clean_area) add r0, r0, #CACHE_DLINESIZE subs r1, r1, #CACHE_DLINESIZE bhi 1b - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -367,7 +367,7 @@ ENTRY(cpu_arm920_switch_mm) mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs #endif - mov pc, lr + ret lr /* * cpu_arm920_set_pte(ptep, pte, ext) @@ -382,7 +382,7 @@ ENTRY(cpu_arm920_set_pte_ext) mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif - mov pc, lr + ret lr /* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */ .globl cpu_arm920_suspend_size @@ -423,7 +423,7 @@ __arm920_setup: mrc p15, 0, r0, c1, c0 @ get control register v4 bic r0, r0, r5 orr r0, r0, r6 - mov pc, lr + ret lr .size __arm920_setup, . - __arm920_setup /* diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S index 2a758b06c6f6..0c6d5ac5a6d4 100644 --- a/arch/arm/mm/proc-arm922.S +++ b/arch/arm/mm/proc-arm922.S @@ -65,7 +65,7 @@ * cpu_arm922_proc_init() */ ENTRY(cpu_arm922_proc_init) - mov pc, lr + ret lr /* * cpu_arm922_proc_fin() @@ -75,7 +75,7 @@ ENTRY(cpu_arm922_proc_fin) bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_arm922_reset(loc) @@ -99,7 +99,7 @@ ENTRY(cpu_arm922_reset) bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_arm922_reset) .popsection @@ -109,7 +109,7 @@ ENDPROC(cpu_arm922_reset) .align 5 ENTRY(cpu_arm922_do_idle) mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - mov pc, lr + ret lr #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH @@ -122,7 +122,7 @@ ENTRY(cpu_arm922_do_idle) ENTRY(arm922_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mov pc, lr + ret lr ENDPROC(arm922_flush_icache_all) /* @@ -153,7 +153,7 @@ __flush_whole_cache: tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -179,7 +179,7 @@ ENTRY(arm922_flush_user_cache_range) blo 1b tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -213,7 +213,7 @@ ENTRY(arm922_coherent_user_range) blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB mov r0, #0 - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -233,7 +233,7 @@ ENTRY(arm922_flush_kern_dcache_area) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -259,7 +259,7 @@ arm922_dma_inv_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -278,7 +278,7 @@ arm922_dma_clean_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -295,7 +295,7 @@ ENTRY(arm922_dma_flush_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -318,7 +318,7 @@ ENDPROC(arm922_dma_map_area) * - dir - DMA direction */ ENTRY(arm922_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(arm922_dma_unmap_area) .globl arm922_flush_kern_cache_louis @@ -336,7 +336,7 @@ ENTRY(cpu_arm922_dcache_clean_area) subs r1, r1, #CACHE_DLINESIZE bhi 1b #endif - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -371,7 +371,7 @@ ENTRY(cpu_arm922_switch_mm) mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs #endif - mov pc, lr + ret lr /* * cpu_arm922_set_pte_ext(ptep, pte, ext) @@ -386,7 +386,7 @@ ENTRY(cpu_arm922_set_pte_ext) mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif /* CONFIG_MMU */ - mov pc, lr + ret lr .type __arm922_setup, #function __arm922_setup: @@ -401,7 +401,7 @@ __arm922_setup: mrc p15, 0, r0, c1, c0 @ get control register v4 bic r0, r0, r5 orr r0, r0, r6 - mov pc, lr + ret lr .size __arm922_setup, . - __arm922_setup /* diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S index ba0d58e1a2a2..c32d073282ea 100644 --- a/arch/arm/mm/proc-arm925.S +++ b/arch/arm/mm/proc-arm925.S @@ -86,7 +86,7 @@ * cpu_arm925_proc_init() */ ENTRY(cpu_arm925_proc_init) - mov pc, lr + ret lr /* * cpu_arm925_proc_fin() @@ -96,7 +96,7 @@ ENTRY(cpu_arm925_proc_fin) bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_arm925_reset(loc) @@ -129,7 +129,7 @@ ENDPROC(cpu_arm925_reset) bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 /* * cpu_arm925_do_idle() @@ -145,7 +145,7 @@ ENTRY(cpu_arm925_do_idle) mcr p15, 0, r2, c1, c0, 0 @ Disable I cache mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt mcr p15, 0, r1, c1, c0, 0 @ Restore ICache enable - mov pc, lr + ret lr /* * flush_icache_all() @@ -155,7 +155,7 @@ ENTRY(cpu_arm925_do_idle) ENTRY(arm925_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mov pc, lr + ret lr ENDPROC(arm925_flush_icache_all) /* @@ -188,7 +188,7 @@ __flush_whole_cache: tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -225,7 +225,7 @@ ENTRY(arm925_flush_user_cache_range) blo 1b tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -259,7 +259,7 @@ ENTRY(arm925_coherent_user_range) blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB mov r0, #0 - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -279,7 +279,7 @@ ENTRY(arm925_flush_kern_dcache_area) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -307,7 +307,7 @@ arm925_dma_inv_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -328,7 +328,7 @@ arm925_dma_clean_range: blo 1b #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -350,7 +350,7 @@ ENTRY(arm925_dma_flush_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -373,7 +373,7 @@ ENDPROC(arm925_dma_map_area) * - dir - DMA direction */ ENTRY(arm925_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(arm925_dma_unmap_area) .globl arm925_flush_kern_cache_louis @@ -390,7 +390,7 @@ ENTRY(cpu_arm925_dcache_clean_area) bhi 1b #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -419,7 +419,7 @@ ENTRY(cpu_arm925_switch_mm) mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs #endif - mov pc, lr + ret lr /* * cpu_arm925_set_pte_ext(ptep, pte, ext) @@ -436,7 +436,7 @@ ENTRY(cpu_arm925_set_pte_ext) #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif /* CONFIG_MMU */ - mov pc, lr + ret lr .type __arm925_setup, #function __arm925_setup: @@ -469,7 +469,7 @@ __arm925_setup: #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN orr r0, r0, #0x4000 @ .1.. .... .... .... #endif - mov pc, lr + ret lr .size __arm925_setup, . - __arm925_setup /* diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S index 0f098f407c9f..252b2503038d 100644 --- a/arch/arm/mm/proc-arm926.S +++ b/arch/arm/mm/proc-arm926.S @@ -55,7 +55,7 @@ * cpu_arm926_proc_init() */ ENTRY(cpu_arm926_proc_init) - mov pc, lr + ret lr /* * cpu_arm926_proc_fin() @@ -65,7 +65,7 @@ ENTRY(cpu_arm926_proc_fin) bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_arm926_reset(loc) @@ -89,7 +89,7 @@ ENTRY(cpu_arm926_reset) bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_arm926_reset) .popsection @@ -111,7 +111,7 @@ ENTRY(cpu_arm926_do_idle) mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt mcr p15, 0, r1, c1, c0, 0 @ Restore ICache enable msr cpsr_c, r3 @ Restore FIQ state - mov pc, lr + ret lr /* * flush_icache_all() @@ -121,7 +121,7 @@ ENTRY(cpu_arm926_do_idle) ENTRY(arm926_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mov pc, lr + ret lr ENDPROC(arm926_flush_icache_all) /* @@ -151,7 +151,7 @@ __flush_whole_cache: tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -188,7 +188,7 @@ ENTRY(arm926_flush_user_cache_range) blo 1b tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -222,7 +222,7 @@ ENTRY(arm926_coherent_user_range) blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB mov r0, #0 - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -242,7 +242,7 @@ ENTRY(arm926_flush_kern_dcache_area) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -270,7 +270,7 @@ arm926_dma_inv_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -291,7 +291,7 @@ arm926_dma_clean_range: blo 1b #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -313,7 +313,7 @@ ENTRY(arm926_dma_flush_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -336,7 +336,7 @@ ENDPROC(arm926_dma_map_area) * - dir - DMA direction */ ENTRY(arm926_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(arm926_dma_unmap_area) .globl arm926_flush_kern_cache_louis @@ -353,7 +353,7 @@ ENTRY(cpu_arm926_dcache_clean_area) bhi 1b #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -380,7 +380,7 @@ ENTRY(cpu_arm926_switch_mm) mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs #endif - mov pc, lr + ret lr /* * cpu_arm926_set_pte_ext(ptep, pte, ext) @@ -397,7 +397,7 @@ ENTRY(cpu_arm926_set_pte_ext) #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif - mov pc, lr + ret lr /* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */ .globl cpu_arm926_suspend_size @@ -448,7 +448,7 @@ __arm926_setup: #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN orr r0, r0, #0x4000 @ .1.. .... .... .... #endif - mov pc, lr + ret lr .size __arm926_setup, . - __arm926_setup /* diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S index 1c39a704ff6e..e5212d489377 100644 --- a/arch/arm/mm/proc-arm940.S +++ b/arch/arm/mm/proc-arm940.S @@ -31,7 +31,7 @@ */ ENTRY(cpu_arm940_proc_init) ENTRY(cpu_arm940_switch_mm) - mov pc, lr + ret lr /* * cpu_arm940_proc_fin() @@ -41,7 +41,7 @@ ENTRY(cpu_arm940_proc_fin) bic r0, r0, #0x00001000 @ i-cache bic r0, r0, #0x00000004 @ d-cache mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_arm940_reset(loc) @@ -58,7 +58,7 @@ ENTRY(cpu_arm940_reset) bic ip, ip, #0x00000005 @ .............c.p bic ip, ip, #0x00001000 @ i-cache mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_arm940_reset) .popsection @@ -68,7 +68,7 @@ ENDPROC(cpu_arm940_reset) .align 5 ENTRY(cpu_arm940_do_idle) mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - mov pc, lr + ret lr /* * flush_icache_all() @@ -78,7 +78,7 @@ ENTRY(cpu_arm940_do_idle) ENTRY(arm940_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mov pc, lr + ret lr ENDPROC(arm940_flush_icache_all) /* @@ -122,7 +122,7 @@ ENTRY(arm940_flush_user_cache_range) tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -170,7 +170,7 @@ ENTRY(arm940_flush_kern_dcache_area) bcs 1b @ segments 7 to 0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -191,7 +191,7 @@ arm940_dma_inv_range: subs r1, r1, #1 << 4 bcs 1b @ segments 7 to 0 mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -215,7 +215,7 @@ ENTRY(cpu_arm940_dcache_clean_area) bcs 1b @ segments 7 to 0 #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -241,7 +241,7 @@ ENTRY(arm940_dma_flush_range) subs r1, r1, #1 << 4 bcs 1b @ segments 7 to 0 mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -264,7 +264,7 @@ ENDPROC(arm940_dma_map_area) * - dir - DMA direction */ ENTRY(arm940_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(arm940_dma_unmap_area) .globl arm940_flush_kern_cache_louis @@ -337,7 +337,7 @@ __arm940_setup: orr r0, r0, #0x00001000 @ I-cache orr r0, r0, #0x00000005 @ MPU/D-cache - mov pc, lr + ret lr .size __arm940_setup, . - __arm940_setup diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S index 0289cd905e73..b3dd9b2d0b8e 100644 --- a/arch/arm/mm/proc-arm946.S +++ b/arch/arm/mm/proc-arm946.S @@ -38,7 +38,7 @@ */ ENTRY(cpu_arm946_proc_init) ENTRY(cpu_arm946_switch_mm) - mov pc, lr + ret lr /* * cpu_arm946_proc_fin() @@ -48,7 +48,7 @@ ENTRY(cpu_arm946_proc_fin) bic r0, r0, #0x00001000 @ i-cache bic r0, r0, #0x00000004 @ d-cache mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_arm946_reset(loc) @@ -65,7 +65,7 @@ ENTRY(cpu_arm946_reset) bic ip, ip, #0x00000005 @ .............c.p bic ip, ip, #0x00001000 @ i-cache mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_arm946_reset) .popsection @@ -75,7 +75,7 @@ ENDPROC(cpu_arm946_reset) .align 5 ENTRY(cpu_arm946_do_idle) mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - mov pc, lr + ret lr /* * flush_icache_all() @@ -85,7 +85,7 @@ ENTRY(cpu_arm946_do_idle) ENTRY(arm946_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mov pc, lr + ret lr ENDPROC(arm946_flush_icache_all) /* @@ -117,7 +117,7 @@ __flush_whole_cache: tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c5, 0 @ flush I cache mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -156,7 +156,7 @@ ENTRY(arm946_flush_user_cache_range) blo 1b tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -191,7 +191,7 @@ ENTRY(arm946_coherent_user_range) blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB mov r0, #0 - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -212,7 +212,7 @@ ENTRY(arm946_flush_kern_dcache_area) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -239,7 +239,7 @@ arm946_dma_inv_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -260,7 +260,7 @@ arm946_dma_clean_range: blo 1b #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -284,7 +284,7 @@ ENTRY(arm946_dma_flush_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -307,7 +307,7 @@ ENDPROC(arm946_dma_map_area) * - dir - DMA direction */ ENTRY(arm946_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(arm946_dma_unmap_area) .globl arm946_flush_kern_cache_louis @@ -324,7 +324,7 @@ ENTRY(cpu_arm946_dcache_clean_area) bhi 1b #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr .type __arm946_setup, #function __arm946_setup: @@ -392,7 +392,7 @@ __arm946_setup: #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN orr r0, r0, #0x00004000 @ .1.. .... .... .... #endif - mov pc, lr + ret lr .size __arm946_setup, . - __arm946_setup diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S index f51197ba754a..8227322bbb8f 100644 --- a/arch/arm/mm/proc-arm9tdmi.S +++ b/arch/arm/mm/proc-arm9tdmi.S @@ -32,13 +32,13 @@ ENTRY(cpu_arm9tdmi_proc_init) ENTRY(cpu_arm9tdmi_do_idle) ENTRY(cpu_arm9tdmi_dcache_clean_area) ENTRY(cpu_arm9tdmi_switch_mm) - mov pc, lr + ret lr /* * cpu_arm9tdmi_proc_fin() */ ENTRY(cpu_arm9tdmi_proc_fin) - mov pc, lr + ret lr /* * Function: cpu_arm9tdmi_reset(loc) @@ -47,13 +47,13 @@ ENTRY(cpu_arm9tdmi_proc_fin) */ .pushsection .idmap.text, "ax" ENTRY(cpu_arm9tdmi_reset) - mov pc, r0 + ret r0 ENDPROC(cpu_arm9tdmi_reset) .popsection .type __arm9tdmi_setup, #function __arm9tdmi_setup: - mov pc, lr + ret lr .size __arm9tdmi_setup, . - __arm9tdmi_setup __INITDATA diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S index 2dfc0f1d3bfd..c494886892ba 100644 --- a/arch/arm/mm/proc-fa526.S +++ b/arch/arm/mm/proc-fa526.S @@ -32,7 +32,7 @@ * cpu_fa526_proc_init() */ ENTRY(cpu_fa526_proc_init) - mov pc, lr + ret lr /* * cpu_fa526_proc_fin() @@ -44,7 +44,7 @@ ENTRY(cpu_fa526_proc_fin) mcr p15, 0, r0, c1, c0, 0 @ disable caches nop nop - mov pc, lr + ret lr /* * cpu_fa526_reset(loc) @@ -72,7 +72,7 @@ ENTRY(cpu_fa526_reset) mcr p15, 0, ip, c1, c0, 0 @ ctrl register nop nop - mov pc, r0 + ret r0 ENDPROC(cpu_fa526_reset) .popsection @@ -81,7 +81,7 @@ ENDPROC(cpu_fa526_reset) */ .align 4 ENTRY(cpu_fa526_do_idle) - mov pc, lr + ret lr ENTRY(cpu_fa526_dcache_clean_area) @@ -90,7 +90,7 @@ ENTRY(cpu_fa526_dcache_clean_area) subs r1, r1, #CACHE_DLINESIZE bhi 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -117,7 +117,7 @@ ENTRY(cpu_fa526_switch_mm) mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate UTLB #endif - mov pc, lr + ret lr /* * cpu_fa526_set_pte_ext(ptep, pte, ext) @@ -133,7 +133,7 @@ ENTRY(cpu_fa526_set_pte_ext) mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif - mov pc, lr + ret lr .type __fa526_setup, #function __fa526_setup: @@ -162,7 +162,7 @@ __fa526_setup: bic r0, r0, r5 ldr r5, fa526_cr1_set orr r0, r0, r5 - mov pc, lr + ret lr .size __fa526_setup, . - __fa526_setup /* diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S index db79b62c92fb..03a1b75f2e16 100644 --- a/arch/arm/mm/proc-feroceon.S +++ b/arch/arm/mm/proc-feroceon.S @@ -69,7 +69,7 @@ ENTRY(cpu_feroceon_proc_init) movne r2, r2, lsr #2 @ turned into # of sets sub r2, r2, #(1 << 5) stmia r1, {r2, r3} - mov pc, lr + ret lr /* * cpu_feroceon_proc_fin() @@ -86,7 +86,7 @@ ENTRY(cpu_feroceon_proc_fin) bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_feroceon_reset(loc) @@ -110,7 +110,7 @@ ENTRY(cpu_feroceon_reset) bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_feroceon_reset) .popsection @@ -124,7 +124,7 @@ ENTRY(cpu_feroceon_do_idle) mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ Drain write buffer mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - mov pc, lr + ret lr /* * flush_icache_all() @@ -134,7 +134,7 @@ ENTRY(cpu_feroceon_do_idle) ENTRY(feroceon_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mov pc, lr + ret lr ENDPROC(feroceon_flush_icache_all) /* @@ -169,7 +169,7 @@ __flush_whole_cache: mov ip, #0 mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -198,7 +198,7 @@ ENTRY(feroceon_flush_user_cache_range) tst r2, #VM_EXEC mov ip, #0 mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -233,7 +233,7 @@ ENTRY(feroceon_coherent_user_range) blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB mov r0, #0 - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -254,7 +254,7 @@ ENTRY(feroceon_flush_kern_dcache_area) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr .align 5 ENTRY(feroceon_range_flush_kern_dcache_area) @@ -268,7 +268,7 @@ ENTRY(feroceon_range_flush_kern_dcache_area) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -295,7 +295,7 @@ feroceon_dma_inv_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr .align 5 feroceon_range_dma_inv_range: @@ -311,7 +311,7 @@ feroceon_range_dma_inv_range: mcr p15, 5, r0, c15, c14, 0 @ D inv range start mcr p15, 5, r1, c15, c14, 1 @ D inv range top msr cpsr_c, r2 @ restore interrupts - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -331,7 +331,7 @@ feroceon_dma_clean_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr .align 5 feroceon_range_dma_clean_range: @@ -344,7 +344,7 @@ feroceon_range_dma_clean_range: mcr p15, 5, r1, c15, c13, 1 @ D clean range top msr cpsr_c, r2 @ restore interrupts mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -362,7 +362,7 @@ ENTRY(feroceon_dma_flush_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr .align 5 ENTRY(feroceon_range_dma_flush_range) @@ -375,7 +375,7 @@ ENTRY(feroceon_range_dma_flush_range) mcr p15, 5, r1, c15, c15, 1 @ D clean/inv range top msr cpsr_c, r2 @ restore interrupts mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -412,7 +412,7 @@ ENDPROC(feroceon_range_dma_map_area) * - dir - DMA direction */ ENTRY(feroceon_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(feroceon_dma_unmap_area) .globl feroceon_flush_kern_cache_louis @@ -461,7 +461,7 @@ ENTRY(cpu_feroceon_dcache_clean_area) bhi 1b #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -490,9 +490,9 @@ ENTRY(cpu_feroceon_switch_mm) mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs - mov pc, r2 + ret r2 #else - mov pc, lr + ret lr #endif /* @@ -512,7 +512,7 @@ ENTRY(cpu_feroceon_set_pte_ext) #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif - mov pc, lr + ret lr /* Suspend/resume support: taken from arch/arm/mm/proc-arm926.S */ .globl cpu_feroceon_suspend_size @@ -554,7 +554,7 @@ __feroceon_setup: mrc p15, 0, r0, c1, c0 @ get control register v4 bic r0, r0, r5 orr r0, r0, r6 - mov pc, lr + ret lr .size __feroceon_setup, . - __feroceon_setup /* diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S index 40acba595731..53d393455f13 100644 --- a/arch/arm/mm/proc-mohawk.S +++ b/arch/arm/mm/proc-mohawk.S @@ -45,7 +45,7 @@ * cpu_mohawk_proc_init() */ ENTRY(cpu_mohawk_proc_init) - mov pc, lr + ret lr /* * cpu_mohawk_proc_fin() @@ -55,7 +55,7 @@ ENTRY(cpu_mohawk_proc_fin) bic r0, r0, #0x1800 @ ...iz........... bic r0, r0, #0x0006 @ .............ca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_mohawk_reset(loc) @@ -79,7 +79,7 @@ ENTRY(cpu_mohawk_reset) bic ip, ip, #0x0007 @ .............cam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_mohawk_reset) .popsection @@ -93,7 +93,7 @@ ENTRY(cpu_mohawk_do_idle) mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer mcr p15, 0, r0, c7, c0, 4 @ wait for interrupt - mov pc, lr + ret lr /* * flush_icache_all() @@ -103,7 +103,7 @@ ENTRY(cpu_mohawk_do_idle) ENTRY(mohawk_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mov pc, lr + ret lr ENDPROC(mohawk_flush_icache_all) /* @@ -128,7 +128,7 @@ __flush_whole_cache: tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcrne p15, 0, ip, c7, c10, 0 @ drain write buffer - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -158,7 +158,7 @@ ENTRY(mohawk_flush_user_cache_range) blo 1b tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -194,7 +194,7 @@ ENTRY(mohawk_coherent_user_range) blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB mov r0, #0 - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -214,7 +214,7 @@ ENTRY(mohawk_flush_kern_dcache_area) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -240,7 +240,7 @@ mohawk_dma_inv_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -259,7 +259,7 @@ mohawk_dma_clean_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -277,7 +277,7 @@ ENTRY(mohawk_dma_flush_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -300,7 +300,7 @@ ENDPROC(mohawk_dma_map_area) * - dir - DMA direction */ ENTRY(mohawk_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(mohawk_dma_unmap_area) .globl mohawk_flush_kern_cache_louis @@ -315,7 +315,7 @@ ENTRY(cpu_mohawk_dcache_clean_area) subs r1, r1, #CACHE_DLINESIZE bhi 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * cpu_mohawk_switch_mm(pgd) @@ -333,7 +333,7 @@ ENTRY(cpu_mohawk_switch_mm) orr r0, r0, #0x18 @ cache the page table in L2 mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs - mov pc, lr + ret lr /* * cpu_mohawk_set_pte_ext(ptep, pte, ext) @@ -346,7 +346,7 @@ ENTRY(cpu_mohawk_set_pte_ext) mov r0, r0 mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr .globl cpu_mohawk_suspend_size .equ cpu_mohawk_suspend_size, 4 * 6 @@ -400,7 +400,7 @@ __mohawk_setup: mrc p15, 0, r0, c1, c0 @ get control register bic r0, r0, r5 orr r0, r0, r6 - mov pc, lr + ret lr .size __mohawk_setup, . - __mohawk_setup diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S index c45319c8f1d9..8008a0461cf5 100644 --- a/arch/arm/mm/proc-sa110.S +++ b/arch/arm/mm/proc-sa110.S @@ -38,7 +38,7 @@ ENTRY(cpu_sa110_proc_init) mov r0, #0 mcr p15, 0, r0, c15, c1, 2 @ Enable clock switching - mov pc, lr + ret lr /* * cpu_sa110_proc_fin() @@ -50,7 +50,7 @@ ENTRY(cpu_sa110_proc_fin) bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_sa110_reset(loc) @@ -74,7 +74,7 @@ ENTRY(cpu_sa110_reset) bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_sa110_reset) .popsection @@ -103,7 +103,7 @@ ENTRY(cpu_sa110_do_idle) mov r0, r0 @ safety mov r0, r0 @ safety mcr p15, 0, r0, c15, c1, 2 @ enable clock switching - mov pc, lr + ret lr /* ================================= CACHE ================================ */ @@ -121,7 +121,7 @@ ENTRY(cpu_sa110_dcache_clean_area) add r0, r0, #DCACHELINESIZE subs r1, r1, #DCACHELINESIZE bhi 1b - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -141,7 +141,7 @@ ENTRY(cpu_sa110_switch_mm) mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs ldr pc, [sp], #4 #else - mov pc, lr + ret lr #endif /* @@ -157,7 +157,7 @@ ENTRY(cpu_sa110_set_pte_ext) mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif - mov pc, lr + ret lr .type __sa110_setup, #function __sa110_setup: @@ -173,7 +173,7 @@ __sa110_setup: mrc p15, 0, r0, c1, c0 @ get control register v4 bic r0, r0, r5 orr r0, r0, r6 - mov pc, lr + ret lr .size __sa110_setup, . - __sa110_setup /* diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S index 09d241ae2dbe..89f97ac648a9 100644 --- a/arch/arm/mm/proc-sa1100.S +++ b/arch/arm/mm/proc-sa1100.S @@ -43,7 +43,7 @@ ENTRY(cpu_sa1100_proc_init) mov r0, #0 mcr p15, 0, r0, c15, c1, 2 @ Enable clock switching mcr p15, 0, r0, c9, c0, 5 @ Allow read-buffer operations from userland - mov pc, lr + ret lr /* * cpu_sa1100_proc_fin() @@ -58,7 +58,7 @@ ENTRY(cpu_sa1100_proc_fin) bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_sa1100_reset(loc) @@ -82,7 +82,7 @@ ENTRY(cpu_sa1100_reset) bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_sa1100_reset) .popsection @@ -113,7 +113,7 @@ ENTRY(cpu_sa1100_do_idle) mcr p15, 0, r0, c15, c8, 2 @ wait for interrupt mov r0, r0 @ safety mcr p15, 0, r0, c15, c1, 2 @ enable clock switching - mov pc, lr + ret lr /* ================================= CACHE ================================ */ @@ -131,7 +131,7 @@ ENTRY(cpu_sa1100_dcache_clean_area) add r0, r0, #DCACHELINESIZE subs r1, r1, #DCACHELINESIZE bhi 1b - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -152,7 +152,7 @@ ENTRY(cpu_sa1100_switch_mm) mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs ldr pc, [sp], #4 #else - mov pc, lr + ret lr #endif /* @@ -168,7 +168,7 @@ ENTRY(cpu_sa1100_set_pte_ext) mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif - mov pc, lr + ret lr .globl cpu_sa1100_suspend_size .equ cpu_sa1100_suspend_size, 4 * 3 @@ -211,7 +211,7 @@ __sa1100_setup: mrc p15, 0, r0, c1, c0 @ get control register v4 bic r0, r0, r5 orr r0, r0, r6 - mov pc, lr + ret lr .size __sa1100_setup, . - __sa1100_setup /* diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index 32b3558321c4..d0390f4b3f18 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -36,14 +36,14 @@ #define PMD_FLAGS_SMP PMD_SECT_WBWA|PMD_SECT_S ENTRY(cpu_v6_proc_init) - mov pc, lr + ret lr ENTRY(cpu_v6_proc_fin) mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x0006 @ .............ca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_v6_reset(loc) @@ -62,7 +62,7 @@ ENTRY(cpu_v6_reset) mcr p15, 0, r1, c1, c0, 0 @ disable MMU mov r1, #0 mcr p15, 0, r1, c7, c5, 4 @ ISB - mov pc, r0 + ret r0 ENDPROC(cpu_v6_reset) .popsection @@ -77,14 +77,14 @@ ENTRY(cpu_v6_do_idle) mov r1, #0 mcr p15, 0, r1, c7, c10, 4 @ DWB - WFI may enter a low-power mode mcr p15, 0, r1, c7, c0, 4 @ wait for interrupt - mov pc, lr + ret lr ENTRY(cpu_v6_dcache_clean_area) 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #D_CACHE_LINE_SIZE subs r1, r1, #D_CACHE_LINE_SIZE bhi 1b - mov pc, lr + ret lr /* * cpu_v6_switch_mm(pgd_phys, tsk) @@ -113,7 +113,7 @@ ENTRY(cpu_v6_switch_mm) #endif mcr p15, 0, r1, c13, c0, 1 @ set context ID #endif - mov pc, lr + ret lr /* * cpu_v6_set_pte_ext(ptep, pte, ext) @@ -131,7 +131,7 @@ ENTRY(cpu_v6_set_pte_ext) #ifdef CONFIG_MMU armv6_set_pte_ext cpu_v6 #endif - mov pc, lr + ret lr /* Suspend/resume support: taken from arch/arm/mach-s3c64xx/sleep.S */ .globl cpu_v6_suspend_size @@ -241,7 +241,7 @@ __v6_setup: mcreq p15, 0, r5, c1, c0, 1 @ write aux control reg orreq r0, r0, #(1 << 21) @ low interrupt latency configuration #endif - mov pc, lr @ return to head.S:__ret + ret lr @ return to head.S:__ret /* * V X F I D LR diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S index 1f52915f2b28..ed448d8a596b 100644 --- a/arch/arm/mm/proc-v7-2level.S +++ b/arch/arm/mm/proc-v7-2level.S @@ -59,7 +59,7 @@ ENTRY(cpu_v7_switch_mm) mcr p15, 0, r0, c2, c0, 0 @ set TTB 0 isb #endif - mov pc, lr + bx lr ENDPROC(cpu_v7_switch_mm) /* @@ -106,7 +106,7 @@ ENTRY(cpu_v7_set_pte_ext) ALT_SMP(W(nop)) ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte #endif - mov pc, lr + bx lr ENDPROC(cpu_v7_set_pte_ext) /* diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S index 22e3ad63500c..e4c8acfc1323 100644 --- a/arch/arm/mm/proc-v7-3level.S +++ b/arch/arm/mm/proc-v7-3level.S @@ -19,6 +19,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <asm/assembler.h> #define TTB_IRGN_NC (0 << 8) #define TTB_IRGN_WBWA (1 << 8) @@ -61,7 +62,7 @@ ENTRY(cpu_v7_switch_mm) mcrr p15, 0, rpgdl, rpgdh, c2 @ set TTB 0 isb #endif - mov pc, lr + ret lr ENDPROC(cpu_v7_switch_mm) #ifdef __ARMEB__ @@ -86,13 +87,18 @@ ENTRY(cpu_v7_set_pte_ext) tst rh, #1 << (57 - 32) @ L_PTE_NONE bicne rl, #L_PTE_VALID bne 1f - tst rh, #1 << (55 - 32) @ L_PTE_DIRTY - orreq rl, #L_PTE_RDONLY + + eor ip, rh, #1 << (55 - 32) @ toggle L_PTE_DIRTY in temp reg to + @ test for !L_PTE_DIRTY || L_PTE_RDONLY + tst ip, #1 << (55 - 32) | 1 << (58 - 32) + orrne rl, #PTE_AP2 + biceq rl, #PTE_AP2 + 1: strd r2, r3, [r0] ALT_SMP(W(nop)) ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte #endif - mov pc, lr + ret lr ENDPROC(cpu_v7_set_pte_ext) /* diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 3db2c2f04a30..b5d67db20897 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -26,7 +26,7 @@ #endif ENTRY(cpu_v7_proc_init) - mov pc, lr + ret lr ENDPROC(cpu_v7_proc_init) ENTRY(cpu_v7_proc_fin) @@ -34,7 +34,7 @@ ENTRY(cpu_v7_proc_fin) bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x0006 @ .............ca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr ENDPROC(cpu_v7_proc_fin) /* @@ -71,20 +71,20 @@ ENDPROC(cpu_v7_reset) ENTRY(cpu_v7_do_idle) dsb @ WFI may enter a low-power mode wfi - mov pc, lr + ret lr ENDPROC(cpu_v7_do_idle) ENTRY(cpu_v7_dcache_clean_area) ALT_SMP(W(nop)) @ MP extensions imply L1 PTW ALT_UP_B(1f) - mov pc, lr + ret lr 1: dcache_line_size r2, r3 2: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, r2 subs r1, r1, r2 bhi 2b dsb ishst - mov pc, lr + ret lr ENDPROC(cpu_v7_dcache_clean_area) string cpu_v7_name, "ARMv7 Processor" @@ -152,6 +152,40 @@ ENTRY(cpu_v7_do_resume) ENDPROC(cpu_v7_do_resume) #endif +/* + * Cortex-A9 processor functions + */ + globl_equ cpu_ca9mp_proc_init, cpu_v7_proc_init + globl_equ cpu_ca9mp_proc_fin, cpu_v7_proc_fin + globl_equ cpu_ca9mp_reset, cpu_v7_reset + globl_equ cpu_ca9mp_do_idle, cpu_v7_do_idle + globl_equ cpu_ca9mp_dcache_clean_area, cpu_v7_dcache_clean_area + globl_equ cpu_ca9mp_switch_mm, cpu_v7_switch_mm + globl_equ cpu_ca9mp_set_pte_ext, cpu_v7_set_pte_ext +.globl cpu_ca9mp_suspend_size +.equ cpu_ca9mp_suspend_size, cpu_v7_suspend_size + 4 * 2 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_ca9mp_do_suspend) + stmfd sp!, {r4 - r5} + mrc p15, 0, r4, c15, c0, 1 @ Diagnostic register + mrc p15, 0, r5, c15, c0, 0 @ Power register + stmia r0!, {r4 - r5} + ldmfd sp!, {r4 - r5} + b cpu_v7_do_suspend +ENDPROC(cpu_ca9mp_do_suspend) + +ENTRY(cpu_ca9mp_do_resume) + ldmia r0!, {r4 - r5} + mrc p15, 0, r10, c15, c0, 1 @ Read Diagnostic register + teq r4, r10 @ Already restored? + mcrne p15, 0, r4, c15, c0, 1 @ No, so restore it + mrc p15, 0, r10, c15, c0, 0 @ Read Power register + teq r5, r10 @ Already restored? + mcrne p15, 0, r5, c15, c0, 0 @ No, so restore it + b cpu_v7_do_resume +ENDPROC(cpu_ca9mp_do_resume) +#endif + #ifdef CONFIG_CPU_PJ4B globl_equ cpu_pj4b_switch_mm, cpu_v7_switch_mm globl_equ cpu_pj4b_set_pte_ext, cpu_v7_set_pte_ext @@ -163,7 +197,7 @@ ENTRY(cpu_pj4b_do_idle) dsb @ WFI may enter a low-power mode wfi dsb @barrier - mov pc, lr + ret lr ENDPROC(cpu_pj4b_do_idle) #else globl_equ cpu_pj4b_do_idle, cpu_v7_do_idle @@ -184,16 +218,16 @@ ENDPROC(cpu_pj4b_do_suspend) ENTRY(cpu_pj4b_do_resume) ldmia r0!, {r6 - r10} - mcr p15, 1, r6, c15, c1, 0 @ save CP15 - extra features - mcr p15, 1, r7, c15, c2, 0 @ save CP15 - Aux Func Modes Ctrl 0 - mcr p15, 1, r8, c15, c1, 2 @ save CP15 - Aux Debug Modes Ctrl 2 - mcr p15, 1, r9, c15, c1, 1 @ save CP15 - Aux Debug Modes Ctrl 1 - mcr p15, 0, r10, c9, c14, 0 @ save CP15 - PMC + mcr p15, 1, r6, c15, c1, 0 @ restore CP15 - extra features + mcr p15, 1, r7, c15, c2, 0 @ restore CP15 - Aux Func Modes Ctrl 0 + mcr p15, 1, r8, c15, c1, 2 @ restore CP15 - Aux Debug Modes Ctrl 2 + mcr p15, 1, r9, c15, c1, 1 @ restore CP15 - Aux Debug Modes Ctrl 1 + mcr p15, 0, r10, c9, c14, 0 @ restore CP15 - PMC b cpu_v7_do_resume ENDPROC(cpu_pj4b_do_resume) #endif .globl cpu_pj4b_suspend_size -.equ cpu_pj4b_suspend_size, 4 * 14 +.equ cpu_pj4b_suspend_size, cpu_v7_suspend_size + 4 * 5 #endif @@ -216,6 +250,7 @@ __v7_cr7mp_setup: __v7_ca7mp_setup: __v7_ca12mp_setup: __v7_ca15mp_setup: +__v7_b15mp_setup: __v7_ca17mp_setup: mov r10, #0 1: @@ -407,7 +442,7 @@ __v7_setup: bic r0, r0, r5 @ clear bits them orr r0, r0, r6 @ set them THUMB( orr r0, r0, #1 << 30 ) @ Thumb exceptions - mov pc, lr @ return to head.S:__ret + ret lr @ return to head.S:__ret ENDPROC(__v7_setup) .align 2 @@ -418,6 +453,7 @@ __v7_setup_stack: @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) define_processor_functions v7, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 + define_processor_functions ca9mp, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 #ifdef CONFIG_CPU_PJ4B define_processor_functions pj4b, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 #endif @@ -470,7 +506,7 @@ __v7_ca5mp_proc_info: __v7_ca9mp_proc_info: .long 0x410fc090 .long 0xff0ffff0 - __v7_proc __v7_ca9mp_setup + __v7_proc __v7_ca9mp_setup, proc_fns = ca9mp_processor_functions .size __v7_ca9mp_proc_info, . - __v7_ca9mp_proc_info #endif /* CONFIG_ARM_LPAE */ @@ -528,6 +564,16 @@ __v7_ca15mp_proc_info: .size __v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info /* + * Broadcom Corporation Brahma-B15 processor. + */ + .type __v7_b15mp_proc_info, #object +__v7_b15mp_proc_info: + .long 0x420f00f0 + .long 0xff0ffff0 + __v7_proc __v7_b15mp_setup, hwcaps = HWCAP_IDIV + .size __v7_b15mp_proc_info, . - __v7_b15mp_proc_info + + /* * ARM Ltd. Cortex A17 processor. */ .type __v7_ca17mp_proc_info, #object diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index 1ca37c72f12f..d1e68b553d3b 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -16,11 +16,11 @@ #include "proc-macros.S" ENTRY(cpu_v7m_proc_init) - mov pc, lr + ret lr ENDPROC(cpu_v7m_proc_init) ENTRY(cpu_v7m_proc_fin) - mov pc, lr + ret lr ENDPROC(cpu_v7m_proc_fin) /* @@ -34,7 +34,7 @@ ENDPROC(cpu_v7m_proc_fin) */ .align 5 ENTRY(cpu_v7m_reset) - mov pc, r0 + ret r0 ENDPROC(cpu_v7m_reset) /* @@ -46,18 +46,18 @@ ENDPROC(cpu_v7m_reset) */ ENTRY(cpu_v7m_do_idle) wfi - mov pc, lr + ret lr ENDPROC(cpu_v7m_do_idle) ENTRY(cpu_v7m_dcache_clean_area) - mov pc, lr + ret lr ENDPROC(cpu_v7m_dcache_clean_area) /* * There is no MMU, so here is nothing to do. */ ENTRY(cpu_v7m_switch_mm) - mov pc, lr + ret lr ENDPROC(cpu_v7m_switch_mm) .globl cpu_v7m_suspend_size @@ -65,11 +65,11 @@ ENDPROC(cpu_v7m_switch_mm) #ifdef CONFIG_ARM_CPU_SUSPEND ENTRY(cpu_v7m_do_suspend) - mov pc, lr + ret lr ENDPROC(cpu_v7m_do_suspend) ENTRY(cpu_v7m_do_resume) - mov pc, lr + ret lr ENDPROC(cpu_v7m_do_resume) #endif @@ -120,7 +120,7 @@ __v7m_setup: ldr r12, [r0, V7M_SCB_CCR] @ system control register orr r12, #V7M_SCB_CCR_STKALIGN str r12, [r0, V7M_SCB_CCR] - mov pc, lr + ret lr ENDPROC(__v7m_setup) .align 2 diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S index dc1645890042..f8acdfece036 100644 --- a/arch/arm/mm/proc-xsc3.S +++ b/arch/arm/mm/proc-xsc3.S @@ -83,7 +83,7 @@ * Nothing too exciting at the moment */ ENTRY(cpu_xsc3_proc_init) - mov pc, lr + ret lr /* * cpu_xsc3_proc_fin() @@ -93,7 +93,7 @@ ENTRY(cpu_xsc3_proc_fin) bic r0, r0, #0x1800 @ ...IZ........... bic r0, r0, #0x0006 @ .............CA. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_xsc3_reset(loc) @@ -119,7 +119,7 @@ ENTRY(cpu_xsc3_reset) @ CAUTION: MMU turned off from this point. We count on the pipeline @ already containing those two last instructions to survive. mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs - mov pc, r0 + ret r0 ENDPROC(cpu_xsc3_reset) .popsection @@ -138,7 +138,7 @@ ENDPROC(cpu_xsc3_reset) ENTRY(cpu_xsc3_do_idle) mov r0, #1 mcr p14, 0, r0, c7, c0, 0 @ go to idle - mov pc, lr + ret lr /* ================================= CACHE ================================ */ @@ -150,7 +150,7 @@ ENTRY(cpu_xsc3_do_idle) ENTRY(xsc3_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mov pc, lr + ret lr ENDPROC(xsc3_flush_icache_all) /* @@ -176,7 +176,7 @@ __flush_whole_cache: mcrne p15, 0, ip, c7, c5, 0 @ invalidate L1 I cache and BTB mcrne p15, 0, ip, c7, c10, 4 @ data write barrier mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, vm_flags) @@ -205,7 +205,7 @@ ENTRY(xsc3_flush_user_cache_range) mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB mcrne p15, 0, ip, c7, c10, 4 @ data write barrier mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -232,7 +232,7 @@ ENTRY(xsc3_coherent_user_range) mcr p15, 0, r0, c7, c5, 0 @ invalidate L1 I cache and BTB mcr p15, 0, r0, c7, c10, 4 @ data write barrier mcr p15, 0, r0, c7, c5, 4 @ prefetch flush - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -253,7 +253,7 @@ ENTRY(xsc3_flush_kern_dcache_area) mcr p15, 0, r0, c7, c5, 0 @ invalidate L1 I cache and BTB mcr p15, 0, r0, c7, c10, 4 @ data write barrier mcr p15, 0, r0, c7, c5, 4 @ prefetch flush - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -277,7 +277,7 @@ xsc3_dma_inv_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ data write barrier - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -294,7 +294,7 @@ xsc3_dma_clean_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ data write barrier - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -311,7 +311,7 @@ ENTRY(xsc3_dma_flush_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ data write barrier - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -334,7 +334,7 @@ ENDPROC(xsc3_dma_map_area) * - dir - DMA direction */ ENTRY(xsc3_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(xsc3_dma_unmap_area) .globl xsc3_flush_kern_cache_louis @@ -348,7 +348,7 @@ ENTRY(cpu_xsc3_dcache_clean_area) add r0, r0, #CACHELINESIZE subs r1, r1, #CACHELINESIZE bhi 1b - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -406,7 +406,7 @@ ENTRY(cpu_xsc3_set_pte_ext) orr r2, r2, ip xscale_set_pte_ext_epilogue - mov pc, lr + ret lr .ltorg .align @@ -478,7 +478,7 @@ __xsc3_setup: bic r0, r0, r5 @ ..V. ..R. .... ..A. orr r0, r0, r6 @ ..VI Z..S .... .C.M (mmu) @ ...I Z..S .... .... (uc) - mov pc, lr + ret lr .size __xsc3_setup, . - __xsc3_setup diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index d19b1cfcad91..23259f104c66 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -118,7 +118,7 @@ ENTRY(cpu_xscale_proc_init) mrc p15, 0, r1, c1, c0, 1 bic r1, r1, #1 mcr p15, 0, r1, c1, c0, 1 - mov pc, lr + ret lr /* * cpu_xscale_proc_fin() @@ -128,7 +128,7 @@ ENTRY(cpu_xscale_proc_fin) bic r0, r0, #0x1800 @ ...IZ........... bic r0, r0, #0x0006 @ .............CA. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_xscale_reset(loc) @@ -160,7 +160,7 @@ ENTRY(cpu_xscale_reset) @ CAUTION: MMU turned off from this point. We count on the pipeline @ already containing those two last instructions to survive. mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs - mov pc, r0 + ret r0 ENDPROC(cpu_xscale_reset) .popsection @@ -179,7 +179,7 @@ ENDPROC(cpu_xscale_reset) ENTRY(cpu_xscale_do_idle) mov r0, #1 mcr p14, 0, r0, c7, c0, 0 @ Go to IDLE - mov pc, lr + ret lr /* ================================= CACHE ================================ */ @@ -191,7 +191,7 @@ ENTRY(cpu_xscale_do_idle) ENTRY(xscale_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mov pc, lr + ret lr ENDPROC(xscale_flush_icache_all) /* @@ -216,7 +216,7 @@ __flush_whole_cache: tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c5, 0 @ Invalidate I cache & BTB mcrne p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, vm_flags) @@ -245,7 +245,7 @@ ENTRY(xscale_flush_user_cache_range) tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c5, 6 @ Invalidate BTB mcrne p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -269,7 +269,7 @@ ENTRY(xscale_coherent_kern_range) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer - mov pc, lr + ret lr /* * coherent_user_range(start, end) @@ -291,7 +291,7 @@ ENTRY(xscale_coherent_user_range) mov r0, #0 mcr p15, 0, r0, c7, c5, 6 @ Invalidate BTB mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -312,7 +312,7 @@ ENTRY(xscale_flush_kern_dcache_area) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -336,7 +336,7 @@ xscale_dma_inv_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -353,7 +353,7 @@ xscale_dma_clean_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -371,7 +371,7 @@ ENTRY(xscale_dma_flush_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -407,7 +407,7 @@ ENDPROC(xscale_80200_A0_A1_dma_map_area) * - dir - DMA direction */ ENTRY(xscale_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(xscale_dma_unmap_area) .globl xscale_flush_kern_cache_louis @@ -458,7 +458,7 @@ ENTRY(cpu_xscale_dcache_clean_area) add r0, r0, #CACHELINESIZE subs r1, r1, #CACHELINESIZE bhi 1b - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -521,7 +521,7 @@ ENTRY(cpu_xscale_set_pte_ext) orr r2, r2, ip xscale_set_pte_ext_epilogue - mov pc, lr + ret lr .ltorg .align @@ -572,7 +572,7 @@ __xscale_setup: mrc p15, 0, r0, c1, c0, 0 @ get control register bic r0, r0, r5 orr r0, r0, r6 - mov pc, lr + ret lr .size __xscale_setup, . - __xscale_setup /* diff --git a/arch/arm/mm/tlb-fa.S b/arch/arm/mm/tlb-fa.S index d3ddcf9a76ca..d2d9ecbe0aac 100644 --- a/arch/arm/mm/tlb-fa.S +++ b/arch/arm/mm/tlb-fa.S @@ -18,6 +18,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <asm/assembler.h> #include <asm/asm-offsets.h> #include <asm/tlbflush.h> #include "proc-macros.S" @@ -37,7 +38,7 @@ ENTRY(fa_flush_user_tlb_range) vma_vm_mm ip, r2 act_mm r3 @ get current->active_mm eors r3, ip, r3 @ == mm ? - movne pc, lr @ no, we dont do anything + retne lr @ no, we dont do anything mov r3, #0 mcr p15, 0, r3, c7, c10, 4 @ drain WB bic r0, r0, #0x0ff @@ -47,7 +48,7 @@ ENTRY(fa_flush_user_tlb_range) cmp r0, r1 blo 1b mcr p15, 0, r3, c7, c10, 4 @ data write barrier - mov pc, lr + ret lr ENTRY(fa_flush_kern_tlb_range) @@ -61,7 +62,7 @@ ENTRY(fa_flush_kern_tlb_range) blo 1b mcr p15, 0, r3, c7, c10, 4 @ data write barrier mcr p15, 0, r3, c7, c5, 4 @ prefetch flush (isb) - mov pc, lr + ret lr __INITDATA diff --git a/arch/arm/mm/tlb-v4.S b/arch/arm/mm/tlb-v4.S index 17a025ade573..a2b5dca42048 100644 --- a/arch/arm/mm/tlb-v4.S +++ b/arch/arm/mm/tlb-v4.S @@ -14,6 +14,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <asm/assembler.h> #include <asm/asm-offsets.h> #include <asm/tlbflush.h> #include "proc-macros.S" @@ -33,7 +34,7 @@ ENTRY(v4_flush_user_tlb_range) vma_vm_mm ip, r2 act_mm r3 @ get current->active_mm eors r3, ip, r3 @ == mm ? - movne pc, lr @ no, we dont do anything + retne lr @ no, we dont do anything .v4_flush_kern_tlb_range: bic r0, r0, #0x0ff bic r0, r0, #0xf00 @@ -41,7 +42,7 @@ ENTRY(v4_flush_user_tlb_range) add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b - mov pc, lr + ret lr /* * v4_flush_kern_tlb_range(start, end) diff --git a/arch/arm/mm/tlb-v4wb.S b/arch/arm/mm/tlb-v4wb.S index c04598fa4d4a..5a093b458dbc 100644 --- a/arch/arm/mm/tlb-v4wb.S +++ b/arch/arm/mm/tlb-v4wb.S @@ -14,6 +14,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <asm/assembler.h> #include <asm/asm-offsets.h> #include <asm/tlbflush.h> #include "proc-macros.S" @@ -33,7 +34,7 @@ ENTRY(v4wb_flush_user_tlb_range) vma_vm_mm ip, r2 act_mm r3 @ get current->active_mm eors r3, ip, r3 @ == mm ? - movne pc, lr @ no, we dont do anything + retne lr @ no, we dont do anything vma_vm_flags r2, r2 mcr p15, 0, r3, c7, c10, 4 @ drain WB tst r2, #VM_EXEC @@ -44,7 +45,7 @@ ENTRY(v4wb_flush_user_tlb_range) add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b - mov pc, lr + ret lr /* * v4_flush_kern_tlb_range(start, end) @@ -65,7 +66,7 @@ ENTRY(v4wb_flush_kern_tlb_range) add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b - mov pc, lr + ret lr __INITDATA diff --git a/arch/arm/mm/tlb-v4wbi.S b/arch/arm/mm/tlb-v4wbi.S index 1f6062b6c1c1..058861548f68 100644 --- a/arch/arm/mm/tlb-v4wbi.S +++ b/arch/arm/mm/tlb-v4wbi.S @@ -14,6 +14,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <asm/assembler.h> #include <asm/asm-offsets.h> #include <asm/tlbflush.h> #include "proc-macros.S" @@ -32,7 +33,7 @@ ENTRY(v4wbi_flush_user_tlb_range) vma_vm_mm ip, r2 act_mm r3 @ get current->active_mm eors r3, ip, r3 @ == mm ? - movne pc, lr @ no, we dont do anything + retne lr @ no, we dont do anything mov r3, #0 mcr p15, 0, r3, c7, c10, 4 @ drain WB vma_vm_flags r2, r2 @@ -44,7 +45,7 @@ ENTRY(v4wbi_flush_user_tlb_range) add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b - mov pc, lr + ret lr ENTRY(v4wbi_flush_kern_tlb_range) mov r3, #0 @@ -56,7 +57,7 @@ ENTRY(v4wbi_flush_kern_tlb_range) add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b - mov pc, lr + ret lr __INITDATA diff --git a/arch/arm/mm/tlb-v6.S b/arch/arm/mm/tlb-v6.S index eca07f550a0b..6f689be638bd 100644 --- a/arch/arm/mm/tlb-v6.S +++ b/arch/arm/mm/tlb-v6.S @@ -13,6 +13,7 @@ #include <linux/init.h> #include <linux/linkage.h> #include <asm/asm-offsets.h> +#include <asm/assembler.h> #include <asm/page.h> #include <asm/tlbflush.h> #include "proc-macros.S" @@ -55,7 +56,7 @@ ENTRY(v6wbi_flush_user_tlb_range) cmp r0, r1 blo 1b mcr p15, 0, ip, c7, c10, 4 @ data synchronization barrier - mov pc, lr + ret lr /* * v6wbi_flush_kern_tlb_range(start,end) @@ -84,7 +85,7 @@ ENTRY(v6wbi_flush_kern_tlb_range) blo 1b mcr p15, 0, r2, c7, c10, 4 @ data synchronization barrier mcr p15, 0, r2, c7, c5, 4 @ prefetch flush (isb) - mov pc, lr + ret lr __INIT diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S index 355308767bae..e5101a3bc57c 100644 --- a/arch/arm/mm/tlb-v7.S +++ b/arch/arm/mm/tlb-v7.S @@ -57,7 +57,7 @@ ENTRY(v7wbi_flush_user_tlb_range) cmp r0, r1 blo 1b dsb ish - mov pc, lr + ret lr ENDPROC(v7wbi_flush_user_tlb_range) /* @@ -86,7 +86,7 @@ ENTRY(v7wbi_flush_kern_tlb_range) blo 1b dsb ish isb - mov pc, lr + ret lr ENDPROC(v7wbi_flush_kern_tlb_range) __INIT diff --git a/arch/arm/nwfpe/entry.S b/arch/arm/nwfpe/entry.S index d18dde95b8aa..5d65be1f1e8a 100644 --- a/arch/arm/nwfpe/entry.S +++ b/arch/arm/nwfpe/entry.S @@ -19,7 +19,7 @@ along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ - +#include <asm/assembler.h> #include <asm/opcodes.h> /* This is the kernel's entry point into the floating point emulator. @@ -92,7 +92,7 @@ emulate: mov r0, r6 @ prepare for EmulateAll() bl EmulateAll @ emulate the instruction cmp r0, #0 @ was emulation successful - moveq pc, r4 @ no, return failure + reteq r4 @ no, return failure next: .Lx1: ldrt r6, [r5], #4 @ get the next instruction and @@ -102,7 +102,7 @@ next: teq r2, #0x0C000000 teqne r2, #0x0D000000 teqne r2, #0x0E000000 - movne pc, r9 @ return ok if not a fp insn + retne r9 @ return ok if not a fp insn str r5, [sp, #S_PC] @ update PC copy in regs @@ -115,7 +115,7 @@ next: @ plain LDR instruction. Weird, but it seems harmless. .pushsection .fixup,"ax" .align 2 -.Lfix: mov pc, r9 @ let the user eat segfaults +.Lfix: ret r9 @ let the user eat segfaults .popsection .pushsection __ex_table,"a" diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c index 99c63d4b6af8..cc649a1e46da 100644 --- a/arch/arm/oprofile/common.c +++ b/arch/arm/oprofile/common.c @@ -33,12 +33,14 @@ static struct op_perf_name { char *perf_name; char *op_name; } op_perf_name_map[] = { - { "xscale1", "arm/xscale1" }, - { "xscale1", "arm/xscale2" }, - { "v6", "arm/armv6" }, - { "v6mpcore", "arm/mpcore" }, - { "ARMv7 Cortex-A8", "arm/armv7" }, - { "ARMv7 Cortex-A9", "arm/armv7-ca9" }, + { "armv5_xscale1", "arm/xscale1" }, + { "armv5_xscale2", "arm/xscale2" }, + { "armv6_1136", "arm/armv6" }, + { "armv6_1156", "arm/armv6" }, + { "armv6_1176", "arm/armv6" }, + { "armv6_11mpcore", "arm/mpcore" }, + { "armv7_cortex_a8", "arm/armv7" }, + { "armv7_cortex_a9", "arm/armv7-ca9" }, }; char *op_name_from_perf_id(void) @@ -107,10 +109,7 @@ static void arm_backtrace(struct pt_regs * const regs, unsigned int depth) if (!user_mode(regs)) { struct stackframe frame; - frame.fp = regs->ARM_fp; - frame.sp = regs->ARM_sp; - frame.lr = regs->ARM_lr; - frame.pc = regs->ARM_pc; + arm_get_current_stackframe(regs, &frame); walk_stackframe(&frame, report_trace, &depth); return; } diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c index b5608b1f9fbd..1c98659bbf89 100644 --- a/arch/arm/plat-omap/dma.c +++ b/arch/arm/plat-omap/dma.c @@ -698,6 +698,8 @@ int omap_request_dma(int dev_id, const char *dev_name, unsigned long flags; struct omap_dma_lch *chan; + WARN(strcmp(dev_name, "DMA engine"), "Using deprecated platform DMA API - please update to DMA engine"); + spin_lock_irqsave(&dma_chan_lock, flags); for (ch = 0; ch < dma_chan_count; ch++) { if (free_ch == -1 && dma_chan[ch].dev_id == -1) { diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S index fe6ca574d093..2e78760f3495 100644 --- a/arch/arm/vfp/entry.S +++ b/arch/arm/vfp/entry.S @@ -34,7 +34,7 @@ ENDPROC(do_vfp) ENTRY(vfp_null_entry) dec_preempt_count_ti r10, r4 - mov pc, lr + ret lr ENDPROC(vfp_null_entry) .align 2 @@ -49,7 +49,7 @@ ENTRY(vfp_testing_entry) dec_preempt_count_ti r10, r4 ldr r0, VFP_arch_address str r0, [r0] @ set to non-zero value - mov pc, r9 @ we have handled the fault + ret r9 @ we have handled the fault ENDPROC(vfp_testing_entry) .align 2 diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S index be807625ed8c..cda654cbf2c2 100644 --- a/arch/arm/vfp/vfphw.S +++ b/arch/arm/vfp/vfphw.S @@ -183,7 +183,7 @@ vfp_hw_state_valid: @ always subtract 4 from the following @ instruction address. dec_preempt_count_ti r10, r4 - mov pc, r9 @ we think we have handled things + ret r9 @ we think we have handled things look_for_VFP_exceptions: @@ -202,7 +202,7 @@ look_for_VFP_exceptions: DBGSTR "not VFP" dec_preempt_count_ti r10, r4 - mov pc, lr + ret lr process_exception: DBGSTR "bounce" @@ -234,7 +234,7 @@ ENTRY(vfp_save_state) VFPFMRX r12, FPINST2 @ FPINST2 if needed (and present) 1: stmia r0, {r1, r2, r3, r12} @ save FPEXC, FPSCR, FPINST, FPINST2 - mov pc, lr + ret lr ENDPROC(vfp_save_state) .align @@ -245,7 +245,7 @@ vfp_current_hw_state_address: #ifdef CONFIG_THUMB2_KERNEL adr \tmp, 1f add \tmp, \tmp, \base, lsl \shift - mov pc, \tmp + ret \tmp #else add pc, pc, \base, lsl \shift mov r0, r0 @@ -257,10 +257,10 @@ ENTRY(vfp_get_float) tbl_branch r0, r3, #3 .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 1: mrc p10, 0, r0, c\dr, c0, 0 @ fmrs r0, s0 - mov pc, lr + ret lr .org 1b + 8 1: mrc p10, 0, r0, c\dr, c0, 4 @ fmrs r0, s1 - mov pc, lr + ret lr .org 1b + 8 .endr ENDPROC(vfp_get_float) @@ -269,10 +269,10 @@ ENTRY(vfp_put_float) tbl_branch r1, r3, #3 .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 1: mcr p10, 0, r0, c\dr, c0, 0 @ fmsr r0, s0 - mov pc, lr + ret lr .org 1b + 8 1: mcr p10, 0, r0, c\dr, c0, 4 @ fmsr r0, s1 - mov pc, lr + ret lr .org 1b + 8 .endr ENDPROC(vfp_put_float) @@ -281,14 +281,14 @@ ENTRY(vfp_get_double) tbl_branch r0, r3, #3 .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 1: fmrrd r0, r1, d\dr - mov pc, lr + ret lr .org 1b + 8 .endr #ifdef CONFIG_VFPv3 @ d16 - d31 registers .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 1: mrrc p11, 3, r0, r1, c\dr @ fmrrd r0, r1, d\dr - mov pc, lr + ret lr .org 1b + 8 .endr #endif @@ -296,21 +296,21 @@ ENTRY(vfp_get_double) @ virtual register 16 (or 32 if VFPv3) for compare with zero mov r0, #0 mov r1, #0 - mov pc, lr + ret lr ENDPROC(vfp_get_double) ENTRY(vfp_put_double) tbl_branch r2, r3, #3 .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 1: fmdrr d\dr, r0, r1 - mov pc, lr + ret lr .org 1b + 8 .endr #ifdef CONFIG_VFPv3 @ d16 - d31 registers .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 1: mcrr p11, 3, r0, r1, c\dr @ fmdrr r0, r1, d\dr - mov pc, lr + ret lr .org 1b + 8 .endr #endif diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S index 44e3a5f10c4c..f00e08075938 100644 --- a/arch/arm/xen/hypercall.S +++ b/arch/arm/xen/hypercall.S @@ -58,7 +58,7 @@ ENTRY(HYPERVISOR_##hypercall) \ mov r12, #__HYPERVISOR_##hypercall; \ __HVC(XEN_IMM); \ - mov pc, lr; \ + ret lr; \ ENDPROC(HYPERVISOR_##hypercall) #define HYPERCALL0 HYPERCALL_SIMPLE @@ -74,7 +74,7 @@ ENTRY(HYPERVISOR_##hypercall) \ mov r12, #__HYPERVISOR_##hypercall; \ __HVC(XEN_IMM); \ ldm sp!, {r4} \ - mov pc, lr \ + ret lr \ ENDPROC(HYPERVISOR_##hypercall) .text @@ -101,5 +101,5 @@ ENTRY(privcmd_call) ldr r4, [sp, #4] __HVC(XEN_IMM) ldm sp!, {r4} - mov pc, lr + ret lr ENDPROC(privcmd_call); diff --git a/crypto/Kconfig b/crypto/Kconfig index 6345c470650d..00b5906f57b7 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -541,6 +541,17 @@ config CRYPTO_SHA1_ARM SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented using optimized ARM assembler. +config CRYPTO_SHA1_ARM_NEON + tristate "SHA1 digest algorithm (ARM NEON)" + depends on ARM && KERNEL_MODE_NEON && !CPU_BIG_ENDIAN + select CRYPTO_SHA1_ARM + select CRYPTO_SHA1 + select CRYPTO_HASH + help + SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented + using optimized ARM NEON assembly, when NEON instructions are + available. + config CRYPTO_SHA1_PPC tristate "SHA1 digest algorithm (powerpc)" depends on PPC @@ -590,6 +601,21 @@ config CRYPTO_SHA512_SPARC64 SHA-512 secure hash standard (DFIPS 180-2) implemented using sparc64 crypto instructions, when available. +config CRYPTO_SHA512_ARM_NEON + tristate "SHA384 and SHA512 digest algorithm (ARM NEON)" + depends on ARM && KERNEL_MODE_NEON && !CPU_BIG_ENDIAN + select CRYPTO_SHA512 + select CRYPTO_HASH + help + SHA-512 secure hash standard (DFIPS 180-2) implemented + using ARM NEON instructions, when available. + + This version of SHA implements a 512 bit hash with 256 bits of + security against collision attacks. + + This code also includes SHA-384, a 384 bit hash with 192 bits + of security against collision attacks. + config CRYPTO_TGR192 tristate "Tiger digest algorithms" select CRYPTO_HASH diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c index 60e5a170c4d2..e6833771a716 100644 --- a/drivers/clocksource/arm_global_timer.c +++ b/drivers/clocksource/arm_global_timer.c @@ -250,7 +250,7 @@ static void __init global_timer_of_register(struct device_node *np) * fire when the timer value is greater than or equal to. In previous * revisions the comparators fired when the timer value was equal to. */ - if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9 + if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9 && (read_cpuid_id() & 0xf0000f) < 0x200000) { pr_warn("global-timer: non support for this cpu version.\n"); return; |